From db365a917ebf9f427905f4df2d472c11d07d848c Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Wed, 21 Jan 2026 11:26:36 +0000 Subject: [PATCH 1/2] chore[array]: use mask array scalar fn over compute function Signed-off-by: Joe Isaacs --- .../src/decimal_byte_parts/compute/mask.rs | 7 ++-- encodings/zigzag/src/compute/mod.rs | 7 ++-- .../src/arrays/chunked/compute/mask.rs | 32 +++++++++++++++---- .../src/arrays/extension/compute/mask.rs | 7 ++-- vortex-array/src/builtins.rs | 6 ++-- vortex-array/src/expr/exprs/mask.rs | 7 ++-- vortex-buffer/src/bit/buf.rs | 5 +++ vortex-buffer/src/bit/buf_mut.rs | 8 +++++ 8 files changed, 58 insertions(+), 21 deletions(-) diff --git a/encodings/decimal-byte-parts/src/decimal_byte_parts/compute/mask.rs b/encodings/decimal-byte-parts/src/decimal_byte_parts/compute/mask.rs index 1ad0d959dff..2dc5a702f9f 100644 --- a/encodings/decimal-byte-parts/src/decimal_byte_parts/compute/mask.rs +++ b/encodings/decimal-byte-parts/src/decimal_byte_parts/compute/mask.rs @@ -2,9 +2,10 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use vortex_array::ArrayRef; +use vortex_array::IntoArray; +use vortex_array::builtins::ArrayBuiltins; use vortex_array::compute::MaskKernel; use vortex_array::compute::MaskKernelAdapter; -use vortex_array::compute::mask; use vortex_array::register_kernel; use vortex_error::VortexResult; use vortex_mask::Mask; @@ -14,8 +15,8 @@ use crate::DecimalBytePartsVTable; impl MaskKernel for DecimalBytePartsVTable { fn mask(&self, array: &DecimalBytePartsArray, mask_array: &Mask) -> VortexResult { - DecimalBytePartsArray::try_new(mask(&array.msp, mask_array)?, *array.decimal_dtype()) - .map(|a| a.to_array()) + let masked = array.msp.clone().mask(mask_array.clone().into_array())?; + DecimalBytePartsArray::try_new(masked, *array.decimal_dtype()).map(|a| a.to_array()) } } diff --git a/encodings/zigzag/src/compute/mod.rs b/encodings/zigzag/src/compute/mod.rs index 8bfd79b1f57..60147c4fc94 100644 --- a/encodings/zigzag/src/compute/mod.rs +++ b/encodings/zigzag/src/compute/mod.rs @@ -6,6 +6,7 @@ mod cast; use vortex_array::Array; use vortex_array::ArrayRef; use vortex_array::IntoArray; +use vortex_array::builtins::ArrayBuiltins; use vortex_array::compute::FilterKernel; use vortex_array::compute::FilterKernelAdapter; use vortex_array::compute::MaskKernel; @@ -13,7 +14,6 @@ use vortex_array::compute::MaskKernelAdapter; use vortex_array::compute::TakeKernel; use vortex_array::compute::TakeKernelAdapter; use vortex_array::compute::filter; -use vortex_array::compute::mask; use vortex_array::compute::take; use vortex_array::register_kernel; use vortex_error::VortexResult; @@ -42,7 +42,10 @@ register_kernel!(TakeKernelAdapter(ZigZagVTable).lift()); impl MaskKernel for ZigZagVTable { fn mask(&self, array: &ZigZagArray, filter_mask: &Mask) -> VortexResult { - let encoded = mask(array.encoded(), filter_mask)?; + let encoded = array + .encoded() + .clone() + .mask(filter_mask.clone().into_array())?; Ok(ZigZagArray::try_new(encoded)?.into_array()) } } diff --git a/vortex-array/src/arrays/chunked/compute/mask.rs b/vortex-array/src/arrays/chunked/compute/mask.rs index ce75689ac98..ab14d69c5d3 100644 --- a/vortex-array/src/arrays/chunked/compute/mask.rs +++ b/vortex-array/src/arrays/chunked/compute/mask.rs @@ -2,6 +2,8 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use itertools::Itertools as _; +use vortex_buffer::BitBuffer; +use vortex_buffer::BitBufferMut; use vortex_dtype::DType; use vortex_error::VortexResult; use vortex_mask::AllOr; @@ -15,15 +17,18 @@ use super::filter::find_chunk_idx; use crate::Array; use crate::ArrayRef; use crate::IntoArray; +use crate::arrays::BoolArray; use crate::arrays::ChunkedArray; use crate::arrays::ChunkedVTable; use crate::arrays::ConstantArray; use crate::arrays::chunked::compute::filter::FILTER_SLICES_SELECTIVITY_THRESHOLD; +use crate::builtins::ArrayBuiltins; use crate::compute::MaskKernel; use crate::compute::MaskKernelAdapter; use crate::compute::cast; use crate::compute::mask; use crate::register_kernel; +use crate::validity::Validity; impl MaskKernel for ChunkedVTable { fn mask(&self, array: &ChunkedArray, mask: &Mask) -> VortexResult { @@ -54,15 +59,21 @@ fn mask_indices( ) -> VortexResult> { let mut new_chunks = Vec::with_capacity(array.nchunks()); let mut current_chunk_id = 0; - let mut chunk_indices = Vec::new(); + let mut chunk_indices = Vec::::new(); let chunk_offsets = array.chunk_offsets(); for &set_index in indices { let (chunk_id, index) = find_chunk_idx(set_index, &chunk_offsets); if chunk_id != current_chunk_id { - let chunk = array.chunk(current_chunk_id); - let masked_chunk = mask(chunk, &Mask::from_indices(chunk.len(), chunk_indices))?; + let chunk = array.chunk(current_chunk_id).clone(); + let chunk_len = chunk.len(); + let mask = BoolArray::new( + BitBuffer::from_indices(chunk_len, &chunk_indices), + Validity::NonNullable, + ) + .into_array(); + let masked_chunk = chunk.mask(mask.clone())?; // Advance the chunk forward, reset the chunk indices buffer. chunk_indices = Vec::new(); new_chunks.push(masked_chunk); @@ -70,8 +81,8 @@ fn mask_indices( while current_chunk_id < chunk_id { // Chunks that are not affected by the mask, must still be casted to the correct dtype. - let chunk = array.chunk(current_chunk_id); - new_chunks.push(cast(chunk, new_dtype)?); + let chunk = array.chunk(current_chunk_id).cast(new_dtype.clone())?; + new_chunks.push(chunk); current_chunk_id += 1; } } @@ -80,8 +91,15 @@ fn mask_indices( } if !chunk_indices.is_empty() { - let chunk = array.chunk(current_chunk_id); - let masked_chunk = mask(chunk, &Mask::from_indices(chunk.len(), chunk_indices))?; + let chunk = array.chunk(current_chunk_id).clone(); + let chunk_len = chunk.len(); + let masked_chunk = chunk.mask( + BoolArray::new( + BitBufferMut::from_indices(chunk_len, &chunk_indices).freeze(), + Validity::NonNullable, + ) + .into_array(), + )?; new_chunks.push(masked_chunk); current_chunk_id += 1; } diff --git a/vortex-array/src/arrays/extension/compute/mask.rs b/vortex-array/src/arrays/extension/compute/mask.rs index 2260c2d6dbb..a06ade40c3e 100644 --- a/vortex-array/src/arrays/extension/compute/mask.rs +++ b/vortex-array/src/arrays/extension/compute/mask.rs @@ -11,14 +11,17 @@ use crate::ArrayRef; use crate::IntoArray; use crate::arrays::ExtensionArray; use crate::arrays::ExtensionVTable; +use crate::builtins::ArrayBuiltins; use crate::compute::MaskKernel; use crate::compute::MaskKernelAdapter; -use crate::compute::{self}; use crate::register_kernel; impl MaskKernel for ExtensionVTable { fn mask(&self, array: &ExtensionArray, mask_array: &Mask) -> VortexResult { - let masked_storage = compute::mask(array.storage(), mask_array)?; + let masked_storage = array + .storage() + .clone() + .mask(mask_array.clone().into_array())?; if masked_storage.dtype().nullability() == array.ext_dtype().storage_dtype().nullability() { Ok(ExtensionArray::new(array.ext_dtype().clone(), masked_storage).into_array()) } else { diff --git a/vortex-array/src/builtins.rs b/vortex-array/src/builtins.rs index c6f9cd0fa47..e0744dbf151 100644 --- a/vortex-array/src/builtins.rs +++ b/vortex-array/src/builtins.rs @@ -81,7 +81,7 @@ pub trait ArrayBuiltins: Sized { /// Mask the array using the given boolean mask. /// The resulting array's validity is the intersection of the original array's validity /// and the mask's validity. - fn mask(&self, mask: &ArrayRef) -> VortexResult; + fn mask(self, mask: ArrayRef) -> VortexResult; /// Boolean negation. fn not(&self) -> VortexResult; @@ -105,8 +105,8 @@ impl ArrayBuiltins for ArrayRef { .optimize() } - fn mask(&self, mask: &ArrayRef) -> VortexResult { - Mask.try_new_array(self.len(), EmptyOptions, [self.clone(), mask.clone()])? + fn mask(self, mask: ArrayRef) -> VortexResult { + Mask.try_new_array(self.len(), EmptyOptions, [self, mask])? .optimize() } diff --git a/vortex-array/src/expr/exprs/mask.rs b/vortex-array/src/expr/exprs/mask.rs index 150064a3b36..146c8ff7b1c 100644 --- a/vortex-array/src/expr/exprs/mask.rs +++ b/vortex-array/src/expr/exprs/mask.rs @@ -2,7 +2,6 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use std::fmt::Formatter; -use std::ops::Not; use vortex_dtype::DType; use vortex_dtype::Nullability; @@ -17,8 +16,8 @@ use vortex_vector::ScalarOps; use vortex_vector::VectorMutOps; use vortex_vector::VectorOps; -use crate::Array; use crate::ArrayRef; +use crate::ToCanonical; use crate::expr::Arity; use crate::expr::ChildName; use crate::expr::EmptyOptions; @@ -96,9 +95,9 @@ impl VTable for Mask { let child = expr.child(0).evaluate(scope)?; // Invert the validity mask - we want to set values to null where validity is false. - let inverted_mask = child.validity_mask().not(); + let mask = expr.child(1).evaluate(scope)?.to_bool().into_bit_buffer(); - crate::compute::mask(&child, &inverted_mask) + crate::compute::mask(&child, &vortex_mask::Mask::from_buffer(mask)) } fn execute(&self, _options: &Self::Options, args: ExecutionArgs) -> VortexResult { diff --git a/vortex-buffer/src/bit/buf.rs b/vortex-buffer/src/bit/buf.rs index c1472b6d1d5..558c5ccf34a 100644 --- a/vortex-buffer/src/bit/buf.rs +++ b/vortex-buffer/src/bit/buf.rs @@ -118,6 +118,11 @@ impl BitBuffer { } } + /// Create a bit buffer of `len` with `indices` set as true. + pub fn from_indices(len: usize, indices: &[usize]) -> BitBuffer { + BitBufferMut::from_indices(len, indices).freeze() + } + /// Create a new empty `BitBuffer`. pub fn empty() -> Self { Self::new_set(0) diff --git a/vortex-buffer/src/bit/buf_mut.rs b/vortex-buffer/src/bit/buf_mut.rs index c94d3027399..b9e1f63a78d 100644 --- a/vortex-buffer/src/bit/buf_mut.rs +++ b/vortex-buffer/src/bit/buf_mut.rs @@ -124,6 +124,14 @@ impl BitBufferMut { } } + /// Create a bit buffer of `len` with `indices` set as true. + pub fn from_indices(len: usize, indices: &[usize]) -> BitBufferMut { + let mut buf = BitBufferMut::new_unset(len); + // TODO(ngates): for dense indices, we can do better by collecting into u64s. + indices.iter().for_each(|&idx| buf.set(idx)); + buf + } + /// Invokes `f` with indexes `0..len` collecting the boolean results into a new `BitBufferMut` pub fn collect_bool bool>(len: usize, mut f: F) -> Self { let mut buffer = BufferMut::with_capacity(len.div_ceil(64) * 8); From 40699b1a393ccbfe9736d3fd455eb8fde5aa485e Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Wed, 21 Jan 2026 13:16:27 +0000 Subject: [PATCH 2/2] wip Signed-off-by: Joe Isaacs --- .../src/decimal_byte_parts/compute/mask.rs | 5 ++--- encodings/zigzag/src/compute/mod.rs | 7 ++----- vortex-array/src/arrays/chunked/compute/mask.rs | 11 +++++++---- vortex-array/src/arrays/extension/compute/mask.rs | 8 +++----- vortex-array/src/expr/exprs/mask.rs | 6 ++++-- 5 files changed, 18 insertions(+), 19 deletions(-) diff --git a/encodings/decimal-byte-parts/src/decimal_byte_parts/compute/mask.rs b/encodings/decimal-byte-parts/src/decimal_byte_parts/compute/mask.rs index 2dc5a702f9f..04a91c5de17 100644 --- a/encodings/decimal-byte-parts/src/decimal_byte_parts/compute/mask.rs +++ b/encodings/decimal-byte-parts/src/decimal_byte_parts/compute/mask.rs @@ -2,10 +2,9 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use vortex_array::ArrayRef; -use vortex_array::IntoArray; -use vortex_array::builtins::ArrayBuiltins; use vortex_array::compute::MaskKernel; use vortex_array::compute::MaskKernelAdapter; +use vortex_array::compute::mask; use vortex_array::register_kernel; use vortex_error::VortexResult; use vortex_mask::Mask; @@ -15,7 +14,7 @@ use crate::DecimalBytePartsVTable; impl MaskKernel for DecimalBytePartsVTable { fn mask(&self, array: &DecimalBytePartsArray, mask_array: &Mask) -> VortexResult { - let masked = array.msp.clone().mask(mask_array.clone().into_array())?; + let masked = mask(&array.msp, mask_array)?; DecimalBytePartsArray::try_new(masked, *array.decimal_dtype()).map(|a| a.to_array()) } } diff --git a/encodings/zigzag/src/compute/mod.rs b/encodings/zigzag/src/compute/mod.rs index 60147c4fc94..8bfd79b1f57 100644 --- a/encodings/zigzag/src/compute/mod.rs +++ b/encodings/zigzag/src/compute/mod.rs @@ -6,7 +6,6 @@ mod cast; use vortex_array::Array; use vortex_array::ArrayRef; use vortex_array::IntoArray; -use vortex_array::builtins::ArrayBuiltins; use vortex_array::compute::FilterKernel; use vortex_array::compute::FilterKernelAdapter; use vortex_array::compute::MaskKernel; @@ -14,6 +13,7 @@ use vortex_array::compute::MaskKernelAdapter; use vortex_array::compute::TakeKernel; use vortex_array::compute::TakeKernelAdapter; use vortex_array::compute::filter; +use vortex_array::compute::mask; use vortex_array::compute::take; use vortex_array::register_kernel; use vortex_error::VortexResult; @@ -42,10 +42,7 @@ register_kernel!(TakeKernelAdapter(ZigZagVTable).lift()); impl MaskKernel for ZigZagVTable { fn mask(&self, array: &ZigZagArray, filter_mask: &Mask) -> VortexResult { - let encoded = array - .encoded() - .clone() - .mask(filter_mask.clone().into_array())?; + let encoded = mask(array.encoded(), filter_mask)?; Ok(ZigZagArray::try_new(encoded)?.into_array()) } } diff --git a/vortex-array/src/arrays/chunked/compute/mask.rs b/vortex-array/src/arrays/chunked/compute/mask.rs index ab14d69c5d3..034c3f84297 100644 --- a/vortex-array/src/arrays/chunked/compute/mask.rs +++ b/vortex-array/src/arrays/chunked/compute/mask.rs @@ -14,7 +14,6 @@ use vortex_scalar::Scalar; use super::filter::ChunkFilter; use super::filter::chunk_filters; use super::filter::find_chunk_idx; -use crate::Array; use crate::ArrayRef; use crate::IntoArray; use crate::arrays::BoolArray; @@ -68,12 +67,15 @@ fn mask_indices( if chunk_id != current_chunk_id { let chunk = array.chunk(current_chunk_id).clone(); let chunk_len = chunk.len(); + // chunk_indices contains indices to null out, but chunk.mask() expects + // mask=true to mean "retain". So we create a mask with bits set at indices + // to null, then invert it to get mask=true at indices to retain. let mask = BoolArray::new( - BitBuffer::from_indices(chunk_len, &chunk_indices), + !BitBuffer::from_indices(chunk_len, &chunk_indices), Validity::NonNullable, ) .into_array(); - let masked_chunk = chunk.mask(mask.clone())?; + let masked_chunk = chunk.mask(mask)?; // Advance the chunk forward, reset the chunk indices buffer. chunk_indices = Vec::new(); new_chunks.push(masked_chunk); @@ -93,9 +95,10 @@ fn mask_indices( if !chunk_indices.is_empty() { let chunk = array.chunk(current_chunk_id).clone(); let chunk_len = chunk.len(); + // Same inversion as above: invert the mask so mask=true means "retain" let masked_chunk = chunk.mask( BoolArray::new( - BitBufferMut::from_indices(chunk_len, &chunk_indices).freeze(), + !BitBufferMut::from_indices(chunk_len, &chunk_indices).freeze(), Validity::NonNullable, ) .into_array(), diff --git a/vortex-array/src/arrays/extension/compute/mask.rs b/vortex-array/src/arrays/extension/compute/mask.rs index a06ade40c3e..9c2b2607ec0 100644 --- a/vortex-array/src/arrays/extension/compute/mask.rs +++ b/vortex-array/src/arrays/extension/compute/mask.rs @@ -11,17 +11,15 @@ use crate::ArrayRef; use crate::IntoArray; use crate::arrays::ExtensionArray; use crate::arrays::ExtensionVTable; -use crate::builtins::ArrayBuiltins; use crate::compute::MaskKernel; use crate::compute::MaskKernelAdapter; +use crate::compute::mask; use crate::register_kernel; impl MaskKernel for ExtensionVTable { fn mask(&self, array: &ExtensionArray, mask_array: &Mask) -> VortexResult { - let masked_storage = array - .storage() - .clone() - .mask(mask_array.clone().into_array())?; + // Use compute::mask directly since mask_array has compute::mask semantics (true=null) + let masked_storage = mask(array.storage(), mask_array)?; if masked_storage.dtype().nullability() == array.ext_dtype().storage_dtype().nullability() { Ok(ExtensionArray::new(array.ext_dtype().clone(), masked_storage).into_array()) } else { diff --git a/vortex-array/src/expr/exprs/mask.rs b/vortex-array/src/expr/exprs/mask.rs index 146c8ff7b1c..f24e2347ae9 100644 --- a/vortex-array/src/expr/exprs/mask.rs +++ b/vortex-array/src/expr/exprs/mask.rs @@ -94,10 +94,12 @@ impl VTable for Mask { ) -> VortexResult { let child = expr.child(0).evaluate(scope)?; - // Invert the validity mask - we want to set values to null where validity is false. + // The expr::Mask semantics are: mask=true means retain, mask=false means null. + // But compute::mask has: mask=true means null, mask=false means retain. + // So we need to invert the mask before passing to compute::mask. let mask = expr.child(1).evaluate(scope)?.to_bool().into_bit_buffer(); - crate::compute::mask(&child, &vortex_mask::Mask::from_buffer(mask)) + crate::compute::mask(&child, &vortex_mask::Mask::from_buffer(!mask)) } fn execute(&self, _options: &Self::Options, args: ExecutionArgs) -> VortexResult {