From e5c120a1b5cdde623ac54c5e4135cb3a376bd6bc Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Fri, 23 Jan 2026 22:38:14 +0000 Subject: [PATCH] fix[fsst]: execute_parent empty validity non empty filter Signed-off-by: Joe Isaacs --- encodings/fsst/src/kernel.rs | 163 ++++++++++++++++++++++++++++++++++- 1 file changed, 160 insertions(+), 3 deletions(-) diff --git a/encodings/fsst/src/kernel.rs b/encodings/fsst/src/kernel.rs index 4e839bb11bc..43aa4f1ad62 100644 --- a/encodings/fsst/src/kernel.rs +++ b/encodings/fsst/src/kernel.rs @@ -142,9 +142,7 @@ fn fsst_decode + AsPrimitive>( } } Mask::AllFalse(_) => { - // Nothing to decompress - unsafe { uncompressed.set_len(0) }; - return (Buffer::empty(), uncompressed.freeze()); + // Nothing to decompress - all values are null with length 0 } Mask::Values(values) => { for (filtered_idx, (idx, is_valid)) in filter_mask @@ -191,3 +189,162 @@ fn fsst_decode + AsPrimitive>( (views.freeze(), uncompressed) } + +#[cfg(test)] +mod tests { + use std::sync::LazyLock; + + use vortex_array::Array; + use vortex_array::ArrayRef; + use vortex_array::Canonical; + use vortex_array::IntoArray; + use vortex_array::VortexSessionExecute; + use vortex_array::arrays::FilterArray; + use vortex_array::arrays::builder::VarBinBuilder; + use vortex_array::assert_arrays_eq; + use vortex_array::compute::filter; + use vortex_array::session::ArraySession; + use vortex_dtype::DType; + use vortex_dtype::Nullability; + use vortex_error::VortexResult; + use vortex_mask::Mask; + use vortex_session::VortexSession; + + use crate::FSSTVTable; + use crate::fsst_compress; + use crate::fsst_train_compressor; + + static SESSION: LazyLock = + LazyLock::new(|| VortexSession::empty().with::()); + + fn build_test_fsst_array() -> ArrayRef { + let mut builder = VarBinBuilder::::with_capacity(10); + builder.append_value(b"hello world"); + builder.append_value(b"foo bar baz"); + builder.append_value(b"testing fsst compression"); + builder.append_value(b"another string here"); + builder.append_value(b"the quick brown fox"); + builder.append_value(b"jumps over the lazy dog"); + builder.append_value(b"abcdefghijklmnop"); + builder.append_value(b"qrstuvwxyz"); + builder.append_value(b"0123456789"); + builder.append_value(b"final string"); + let input = builder.finish(DType::Utf8(Nullability::NonNullable)); + + let compressor = fsst_train_compressor(&input); + fsst_compress(input, &compressor).into_array() + } + + #[test] + fn test_fsst_filter_simple() -> VortexResult<()> { + let fsst_array = build_test_fsst_array(); + assert!(fsst_array.is::()); + assert_eq!(fsst_array.len(), 10); + + // Filter 1/5 elements (every 5th element: indices 0 and 5) + let mask = Mask::from_iter([ + true, false, false, false, false, true, false, false, false, false, + ]); + + // Create FilterArray and execute + let filter_array = FilterArray::new(fsst_array.clone(), mask.clone()).into_array(); + let mut ctx = SESSION.create_execution_ctx(); + let result = filter_array.execute::(&mut ctx)?; + + // Compare with filtering the canonical VarBinView + let expected = filter(&fsst_array, &mask)?; + + assert_eq!(result.len(), 2); + assert_arrays_eq!(result.into_array(), expected); + Ok(()) + } + + #[test] + fn test_fsst_filter_every_other() -> VortexResult<()> { + let fsst_array = build_test_fsst_array(); + + // Filter every other element + let mask = Mask::from_iter([ + true, false, true, false, true, false, true, false, true, false, + ]); + + let filter_array = FilterArray::new(fsst_array.clone(), mask.clone()).into_array(); + let mut ctx = SESSION.create_execution_ctx(); + let result = filter_array.execute::(&mut ctx)?; + + let expected = filter(&fsst_array, &mask)?; + + assert_eq!(result.len(), 5); + assert_arrays_eq!(result.into_array(), expected); + Ok(()) + } + + #[test] + fn issues_6034_test_fsst_filter_with_nulls_and_special_chars() -> VortexResult<()> { + // + // Test case with special characters and nulls + // Values: ["", "", "", "", "", "", "", "", "", "", "", ",", "A<<<<<<<", "", "", "", "", null, null, null, null, null, null] + // Mask: only the last element is selected (true at index 22) + let mut builder = VarBinBuilder::::with_capacity(23); + // 11 empty strings + for _ in 0..11 { + builder.append_value(b""); + } + // "," + builder.append_value(b","); + // "A<<<<<<<" + builder.append_value(b"A<<<<<<<"); + // 4 more empty strings + for _ in 0..4 { + builder.append_value(b""); + } + // 6 nulls + for _ in 0..6 { + builder.append_null(); + } + let input = builder.finish(DType::Utf8(Nullability::Nullable)); + + let compressor = fsst_train_compressor(&input); + let fsst_array: ArrayRef = fsst_compress(input.clone(), &compressor).into_array(); + + // Filter: only select the last element (index 22) + let mut mask = vec![false; 22]; + mask.push(true); + let mask = Mask::from_iter(mask); + + let filter_array = FilterArray::new(fsst_array.clone(), mask.clone()).into_array(); + let mut ctx = SESSION.create_execution_ctx(); + let result = filter_array.execute::(&mut ctx)?; + + let expected = filter(input.as_ref(), &mask)?; + + assert_eq!(result.len(), 1); + assert_arrays_eq!(result.into_array(), expected); + Ok(()) + } + + #[test] + fn filter_only_null() -> VortexResult<()> { + let mut builder = VarBinBuilder::::with_capacity(3); + builder.append_null(); + builder.append_value(b"A"); + builder.append_null(); + + let input = builder.finish(DType::Utf8(Nullability::Nullable)); + + let compressor = fsst_train_compressor(&input); + let fsst_array: ArrayRef = fsst_compress(input.clone(), &compressor).into_array(); + + let mask = Mask::from_iter([true, false, true]); + + let filter_array = FilterArray::new(fsst_array.clone(), mask.clone()).into_array(); + let mut ctx = SESSION.create_execution_ctx(); + let result = filter_array.execute::(&mut ctx)?; + + let expected = filter(input.as_ref(), &mask)?; + + assert_eq!(result.len(), 2); + assert_arrays_eq!(result.into_array(), expected); + Ok(()) + } +}