Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
163 changes: 160 additions & 3 deletions encodings/fsst/src/kernel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -142,9 +142,7 @@ fn fsst_decode<S: IntegerPType + AsPrimitive<usize> + AsPrimitive<u32>>(
}
}
Mask::AllFalse(_) => {
// Nothing to decompress
unsafe { uncompressed.set_len(0) };
return (Buffer::empty(), uncompressed.freeze());
// Nothing to decompress - all values are null with length 0
}
Mask::Values(values) => {
for (filtered_idx, (idx, is_valid)) in filter_mask
Expand Down Expand Up @@ -191,3 +189,162 @@ fn fsst_decode<S: IntegerPType + AsPrimitive<usize> + AsPrimitive<u32>>(

(views.freeze(), uncompressed)
}

#[cfg(test)]
mod tests {
use std::sync::LazyLock;

use vortex_array::Array;
use vortex_array::ArrayRef;
use vortex_array::Canonical;
use vortex_array::IntoArray;
use vortex_array::VortexSessionExecute;
use vortex_array::arrays::FilterArray;
use vortex_array::arrays::builder::VarBinBuilder;
use vortex_array::assert_arrays_eq;
use vortex_array::compute::filter;
use vortex_array::session::ArraySession;
use vortex_dtype::DType;
use vortex_dtype::Nullability;
use vortex_error::VortexResult;
use vortex_mask::Mask;
use vortex_session::VortexSession;

use crate::FSSTVTable;
use crate::fsst_compress;
use crate::fsst_train_compressor;

static SESSION: LazyLock<VortexSession> =
LazyLock::new(|| VortexSession::empty().with::<ArraySession>());

fn build_test_fsst_array() -> ArrayRef {
let mut builder = VarBinBuilder::<i32>::with_capacity(10);
builder.append_value(b"hello world");
builder.append_value(b"foo bar baz");
builder.append_value(b"testing fsst compression");
builder.append_value(b"another string here");
builder.append_value(b"the quick brown fox");
builder.append_value(b"jumps over the lazy dog");
builder.append_value(b"abcdefghijklmnop");
builder.append_value(b"qrstuvwxyz");
builder.append_value(b"0123456789");
builder.append_value(b"final string");
let input = builder.finish(DType::Utf8(Nullability::NonNullable));

let compressor = fsst_train_compressor(&input);
fsst_compress(input, &compressor).into_array()
}

#[test]
fn test_fsst_filter_simple() -> VortexResult<()> {
let fsst_array = build_test_fsst_array();
assert!(fsst_array.is::<FSSTVTable>());
assert_eq!(fsst_array.len(), 10);

// Filter 1/5 elements (every 5th element: indices 0 and 5)
let mask = Mask::from_iter([
true, false, false, false, false, true, false, false, false, false,
]);

// Create FilterArray and execute
let filter_array = FilterArray::new(fsst_array.clone(), mask.clone()).into_array();
let mut ctx = SESSION.create_execution_ctx();
let result = filter_array.execute::<Canonical>(&mut ctx)?;

// Compare with filtering the canonical VarBinView
let expected = filter(&fsst_array, &mask)?;

assert_eq!(result.len(), 2);
assert_arrays_eq!(result.into_array(), expected);
Ok(())
}

#[test]
fn test_fsst_filter_every_other() -> VortexResult<()> {
let fsst_array = build_test_fsst_array();

// Filter every other element
let mask = Mask::from_iter([
true, false, true, false, true, false, true, false, true, false,
]);

let filter_array = FilterArray::new(fsst_array.clone(), mask.clone()).into_array();
let mut ctx = SESSION.create_execution_ctx();
let result = filter_array.execute::<Canonical>(&mut ctx)?;

let expected = filter(&fsst_array, &mask)?;

assert_eq!(result.len(), 5);
assert_arrays_eq!(result.into_array(), expected);
Ok(())
}

#[test]
fn issues_6034_test_fsst_filter_with_nulls_and_special_chars() -> VortexResult<()> {
//
// Test case with special characters and nulls
// Values: ["", "", "", "", "", "", "", "", "", "", "", ",", "A<<<<<<<", "", "", "", "", null, null, null, null, null, null]
// Mask: only the last element is selected (true at index 22)
let mut builder = VarBinBuilder::<i32>::with_capacity(23);
// 11 empty strings
for _ in 0..11 {
builder.append_value(b"");
}
// ","
builder.append_value(b",");
// "A<<<<<<<"
builder.append_value(b"A<<<<<<<");
// 4 more empty strings
for _ in 0..4 {
builder.append_value(b"");
}
// 6 nulls
for _ in 0..6 {
builder.append_null();
}
let input = builder.finish(DType::Utf8(Nullability::Nullable));

let compressor = fsst_train_compressor(&input);
let fsst_array: ArrayRef = fsst_compress(input.clone(), &compressor).into_array();

// Filter: only select the last element (index 22)
let mut mask = vec![false; 22];
mask.push(true);
let mask = Mask::from_iter(mask);

let filter_array = FilterArray::new(fsst_array.clone(), mask.clone()).into_array();
let mut ctx = SESSION.create_execution_ctx();
let result = filter_array.execute::<Canonical>(&mut ctx)?;

let expected = filter(input.as_ref(), &mask)?;

assert_eq!(result.len(), 1);
assert_arrays_eq!(result.into_array(), expected);
Ok(())
}

#[test]
fn filter_only_null() -> VortexResult<()> {
let mut builder = VarBinBuilder::<i32>::with_capacity(3);
builder.append_null();
builder.append_value(b"A");
builder.append_null();

let input = builder.finish(DType::Utf8(Nullability::Nullable));

let compressor = fsst_train_compressor(&input);
let fsst_array: ArrayRef = fsst_compress(input.clone(), &compressor).into_array();

let mask = Mask::from_iter([true, false, true]);

let filter_array = FilterArray::new(fsst_array.clone(), mask.clone()).into_array();
let mut ctx = SESSION.create_execution_ctx();
let result = filter_array.execute::<Canonical>(&mut ctx)?;

let expected = filter(input.as_ref(), &mask)?;

assert_eq!(result.len(), 2);
assert_arrays_eq!(result.into_array(), expected);
Ok(())
}
}
Loading