diff --git a/encodings/runend/Cargo.toml b/encodings/runend/Cargo.toml index 88cb70ef1a5..7817d65e216 100644 --- a/encodings/runend/Cargo.toml +++ b/encodings/runend/Cargo.toml @@ -48,3 +48,7 @@ harness = false [[bench]] name = "run_end_compress" harness = false + +[[bench]] +name = "run_end_decode" +harness = false diff --git a/encodings/runend/benches/run_end_decode.rs b/encodings/runend/benches/run_end_decode.rs new file mode 100644 index 00000000000..06ceac4186d --- /dev/null +++ b/encodings/runend/benches/run_end_decode.rs @@ -0,0 +1,380 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +#![allow(clippy::unwrap_used, clippy::cast_possible_truncation)] + +use std::fmt; + +use divan::Bencher; +use vortex_array::arrays::BoolArray; +use vortex_array::arrays::PrimitiveArray; +use vortex_array::compute::warm_up_vtables; +use vortex_array::validity::Validity; +use vortex_buffer::BitBuffer; +use vortex_buffer::BufferMut; +use vortex_runend::decompress_bool::runend_decode_bools; + +fn main() { + warm_up_vtables(); + divan::main(); +} + +/// Distribution types for bool benchmarks +#[derive(Clone, Copy)] +enum BoolDistribution { + /// Alternating true/false (50/50) + Alternating, + /// Mostly true (90% true runs) + MostlyTrue, + /// Mostly false (90% false runs) + MostlyFalse, + /// All true + AllTrue, + /// All false + AllFalse, +} + +impl fmt::Display for BoolDistribution { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + BoolDistribution::Alternating => write!(f, "alternating"), + BoolDistribution::MostlyTrue => write!(f, "mostly_true"), + BoolDistribution::MostlyFalse => write!(f, "mostly_false"), + BoolDistribution::AllTrue => write!(f, "all_true"), + BoolDistribution::AllFalse => write!(f, "all_false"), + } + } +} + +#[derive(Clone, Copy)] +struct BoolBenchArgs { + total_length: usize, + avg_run_length: usize, + distribution: BoolDistribution, +} + +impl fmt::Display for BoolBenchArgs { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{}_{}_{}", + self.total_length, self.avg_run_length, self.distribution + ) + } +} + +/// Creates bool test data with configurable distribution +fn create_bool_test_data( + total_length: usize, + avg_run_length: usize, + distribution: BoolDistribution, +) -> (PrimitiveArray, BoolArray) { + let mut ends = BufferMut::::with_capacity(total_length / avg_run_length + 1); + let mut values = Vec::with_capacity(total_length / avg_run_length + 1); + + let mut pos = 0usize; + let mut run_index = 0usize; + + while pos < total_length { + let run_len = avg_run_length.min(total_length - pos); + pos += run_len; + ends.push(pos as u32); + + let val = match distribution { + BoolDistribution::Alternating => run_index % 2 == 0, + BoolDistribution::MostlyTrue => run_index % 10 != 0, // 90% true + BoolDistribution::MostlyFalse => run_index % 10 == 0, // 10% true (90% false) + BoolDistribution::AllTrue => true, + BoolDistribution::AllFalse => false, + }; + values.push(val); + run_index += 1; + } + + ( + PrimitiveArray::new(ends.freeze(), Validity::NonNullable), + BoolArray::from(BitBuffer::from(values)), + ) +} + +// Medium size: 10k elements with various run lengths and distributions +const BOOL_ARGS: &[BoolBenchArgs] = &[ + BoolBenchArgs { + total_length: 10_000, + avg_run_length: 2, + distribution: BoolDistribution::Alternating, + }, + BoolBenchArgs { + total_length: 10_000, + avg_run_length: 10, + distribution: BoolDistribution::Alternating, + }, + BoolBenchArgs { + total_length: 10_000, + avg_run_length: 100, + distribution: BoolDistribution::Alternating, + }, + BoolBenchArgs { + total_length: 10_000, + avg_run_length: 1000, + distribution: BoolDistribution::Alternating, + }, + BoolBenchArgs { + total_length: 10_000, + avg_run_length: 2, + distribution: BoolDistribution::MostlyTrue, + }, + BoolBenchArgs { + total_length: 10_000, + avg_run_length: 10, + distribution: BoolDistribution::MostlyTrue, + }, + BoolBenchArgs { + total_length: 10_000, + avg_run_length: 100, + distribution: BoolDistribution::MostlyTrue, + }, + BoolBenchArgs { + total_length: 10_000, + avg_run_length: 1000, + distribution: BoolDistribution::MostlyTrue, + }, + BoolBenchArgs { + total_length: 10_000, + avg_run_length: 2, + distribution: BoolDistribution::MostlyFalse, + }, + BoolBenchArgs { + total_length: 10_000, + avg_run_length: 10, + distribution: BoolDistribution::MostlyFalse, + }, + BoolBenchArgs { + total_length: 10_000, + avg_run_length: 100, + distribution: BoolDistribution::MostlyFalse, + }, + BoolBenchArgs { + total_length: 10_000, + avg_run_length: 1000, + distribution: BoolDistribution::MostlyFalse, + }, + BoolBenchArgs { + total_length: 10_000, + avg_run_length: 2, + distribution: BoolDistribution::AllTrue, + }, + BoolBenchArgs { + total_length: 10_000, + avg_run_length: 10, + distribution: BoolDistribution::AllTrue, + }, + BoolBenchArgs { + total_length: 10_000, + avg_run_length: 100, + distribution: BoolDistribution::AllTrue, + }, + BoolBenchArgs { + total_length: 10_000, + avg_run_length: 1000, + distribution: BoolDistribution::AllTrue, + }, + BoolBenchArgs { + total_length: 10_000, + avg_run_length: 2, + distribution: BoolDistribution::AllFalse, + }, + BoolBenchArgs { + total_length: 10_000, + avg_run_length: 10, + distribution: BoolDistribution::AllFalse, + }, + BoolBenchArgs { + total_length: 10_000, + avg_run_length: 100, + distribution: BoolDistribution::AllFalse, + }, + BoolBenchArgs { + total_length: 10_000, + avg_run_length: 1000, + distribution: BoolDistribution::AllFalse, + }, +]; + +#[divan::bench(args = BOOL_ARGS)] +fn decode_bool(bencher: Bencher, args: BoolBenchArgs) { + let BoolBenchArgs { + total_length, + avg_run_length, + distribution, + } = args; + let (ends, values) = create_bool_test_data(total_length, avg_run_length, distribution); + bencher + .with_inputs(|| (ends.clone(), values.clone())) + .bench_refs(|(ends, values)| { + runend_decode_bools(ends.clone(), values.clone(), 0, total_length) + }); +} + +/// Validity distribution for nullable benchmarks +#[derive(Clone, Copy)] +enum ValidityDistribution { + /// 90% valid + MostlyValid, + /// 50% valid + HalfValid, + /// 10% valid + MostlyNull, +} + +impl fmt::Display for ValidityDistribution { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + ValidityDistribution::MostlyValid => write!(f, "mostly_valid"), + ValidityDistribution::HalfValid => write!(f, "half_valid"), + ValidityDistribution::MostlyNull => write!(f, "mostly_null"), + } + } +} + +#[derive(Clone, Copy)] +struct NullableBoolBenchArgs { + total_length: usize, + avg_run_length: usize, + distribution: BoolDistribution, + validity: ValidityDistribution, +} + +impl fmt::Display for NullableBoolBenchArgs { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{}_{}_{}_{}", + self.total_length, self.avg_run_length, self.distribution, self.validity + ) + } +} + +/// Creates nullable bool test data with configurable distribution and validity +fn create_nullable_bool_test_data( + total_length: usize, + avg_run_length: usize, + distribution: BoolDistribution, + validity: ValidityDistribution, +) -> (PrimitiveArray, BoolArray) { + let mut ends = BufferMut::::with_capacity(total_length / avg_run_length + 1); + let mut values = Vec::with_capacity(total_length / avg_run_length + 1); + let mut validity_bits = Vec::with_capacity(total_length / avg_run_length + 1); + + let mut pos = 0usize; + let mut run_index = 0usize; + + while pos < total_length { + let run_len = avg_run_length.min(total_length - pos); + pos += run_len; + ends.push(pos as u32); + + let val = match distribution { + BoolDistribution::Alternating => run_index % 2 == 0, + BoolDistribution::MostlyTrue => run_index % 10 != 0, + BoolDistribution::MostlyFalse => run_index % 10 == 0, + BoolDistribution::AllTrue => true, + BoolDistribution::AllFalse => false, + }; + values.push(val); + + let is_valid = match validity { + ValidityDistribution::MostlyValid => run_index % 10 != 0, + ValidityDistribution::HalfValid => run_index % 2 == 0, + ValidityDistribution::MostlyNull => run_index % 10 == 0, + }; + validity_bits.push(is_valid); + + run_index += 1; + } + + ( + PrimitiveArray::new(ends.freeze(), Validity::NonNullable), + BoolArray::new( + BitBuffer::from(values), + Validity::from(BitBuffer::from(validity_bits)), + ), + ) +} + +const NULLABLE_BOOL_ARGS: &[NullableBoolBenchArgs] = &[ + // Alternating with different validity + NullableBoolBenchArgs { + total_length: 10_000, + avg_run_length: 10, + distribution: BoolDistribution::Alternating, + validity: ValidityDistribution::MostlyValid, + }, + NullableBoolBenchArgs { + total_length: 10_000, + avg_run_length: 10, + distribution: BoolDistribution::Alternating, + validity: ValidityDistribution::HalfValid, + }, + NullableBoolBenchArgs { + total_length: 10_000, + avg_run_length: 10, + distribution: BoolDistribution::Alternating, + validity: ValidityDistribution::MostlyNull, + }, + // MostlyTrue with different validity + NullableBoolBenchArgs { + total_length: 10_000, + avg_run_length: 10, + distribution: BoolDistribution::MostlyTrue, + validity: ValidityDistribution::MostlyValid, + }, + NullableBoolBenchArgs { + total_length: 10_000, + avg_run_length: 10, + distribution: BoolDistribution::MostlyTrue, + validity: ValidityDistribution::HalfValid, + }, + NullableBoolBenchArgs { + total_length: 10_000, + avg_run_length: 10, + distribution: BoolDistribution::MostlyTrue, + validity: ValidityDistribution::MostlyNull, + }, + // Different run lengths with MostlyValid + NullableBoolBenchArgs { + total_length: 10_000, + avg_run_length: 2, + distribution: BoolDistribution::Alternating, + validity: ValidityDistribution::MostlyValid, + }, + NullableBoolBenchArgs { + total_length: 10_000, + avg_run_length: 100, + distribution: BoolDistribution::Alternating, + validity: ValidityDistribution::MostlyValid, + }, + NullableBoolBenchArgs { + total_length: 10_000, + avg_run_length: 1000, + distribution: BoolDistribution::Alternating, + validity: ValidityDistribution::MostlyValid, + }, +]; + +#[divan::bench(args = NULLABLE_BOOL_ARGS)] +fn decode_bool_nullable(bencher: Bencher, args: NullableBoolBenchArgs) { + let NullableBoolBenchArgs { + total_length, + avg_run_length, + distribution, + validity, + } = args; + let (ends, values) = + create_nullable_bool_test_data(total_length, avg_run_length, distribution, validity); + bencher + .with_inputs(|| (ends.clone(), values.clone())) + .bench_refs(|(ends, values)| { + runend_decode_bools(ends.clone(), values.clone(), 0, total_length) + }); +} diff --git a/encodings/runend/src/array.rs b/encodings/runend/src/array.rs index 9d18bf4cd3b..fe610e663c1 100644 --- a/encodings/runend/src/array.rs +++ b/encodings/runend/src/array.rs @@ -39,9 +39,9 @@ use vortex_error::vortex_ensure; use vortex_error::vortex_panic; use vortex_scalar::PValue; -use crate::compress::runend_decode_bools; use crate::compress::runend_decode_primitive; use crate::compress::runend_encode; +use crate::decompress_bool::runend_decode_bools; use crate::kernel::PARENT_KERNELS; use crate::rules::RULES; diff --git a/encodings/runend/src/compress.rs b/encodings/runend/src/compress.rs index 8d8af3a828f..72ee62ce0a7 100644 --- a/encodings/runend/src/compress.rs +++ b/encodings/runend/src/compress.rs @@ -186,24 +186,6 @@ pub fn runend_decode_primitive( })) } -pub fn runend_decode_bools( - ends: PrimitiveArray, - values: BoolArray, - offset: usize, - length: usize, -) -> VortexResult { - let validity_mask = values.validity_mask()?; - Ok(match_each_unsigned_integer_ptype!(ends.ptype(), |E| { - runend_decode_typed_bool( - trimmed_ends_iter(ends.as_slice::(), offset, length), - &values.to_bit_buffer(), - validity_mask, - values.dtype().nullability(), - length, - ) - })) -} - pub fn runend_decode_typed_primitive( run_ends: impl Iterator, values: &[T], @@ -263,47 +245,6 @@ pub fn runend_decode_typed_primitive( } } -pub fn runend_decode_typed_bool( - run_ends: impl Iterator, - values: &BitBuffer, - values_validity: Mask, - values_nullability: Nullability, - length: usize, -) -> BoolArray { - match values_validity { - Mask::AllTrue(_) => { - let mut decoded = BitBufferMut::with_capacity(length); - for (end, value) in run_ends.zip_eq(values.iter()) { - decoded.append_n(value, end - decoded.len()); - } - BoolArray::new(decoded.freeze(), values_nullability.into()) - } - Mask::AllFalse(_) => BoolArray::new(BitBuffer::new_unset(length), Validity::AllInvalid), - Mask::Values(mask) => { - let mut decoded = BitBufferMut::with_capacity(length); - let mut decoded_validity = BitBufferMut::with_capacity(length); - for (end, value) in run_ends.zip_eq( - values - .iter() - .zip(mask.bit_buffer().iter()) - .map(|(v, is_valid)| is_valid.then_some(v)), - ) { - match value { - None => { - decoded_validity.append_n(false, end - decoded.len()); - decoded.append_n(false, end - decoded.len()); - } - Some(value) => { - decoded_validity.append_n(true, end - decoded.len()); - decoded.append_n(value, end - decoded.len()); - } - } - } - BoolArray::new(decoded.freeze(), Validity::from(decoded_validity.freeze())) - } - } -} - #[cfg(test)] mod test { use vortex_array::ToCanonical; diff --git a/encodings/runend/src/compute/compare.rs b/encodings/runend/src/compute/compare.rs index 4161898af61..7b6eb9cc1f1 100644 --- a/encodings/runend/src/compute/compare.rs +++ b/encodings/runend/src/compute/compare.rs @@ -15,7 +15,7 @@ use vortex_error::VortexResult; use crate::RunEndArray; use crate::RunEndVTable; -use crate::compress::runend_decode_bools; +use crate::decompress_bool::runend_decode_bools; impl CompareKernel for RunEndVTable { fn compare( diff --git a/encodings/runend/src/decompress_bool.rs b/encodings/runend/src/decompress_bool.rs new file mode 100644 index 00000000000..4df188d8b8f --- /dev/null +++ b/encodings/runend/src/decompress_bool.rs @@ -0,0 +1,358 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Optimized run-end decoding for boolean arrays. +//! +//! Uses an adaptive strategy that pre-fills the buffer with the majority value +//! (0s or 1s) and only fills the minority runs, minimizing work for skewed distributions. + +use itertools::Itertools; +use vortex_array::arrays::BoolArray; +use vortex_array::arrays::PrimitiveArray; +use vortex_array::validity::Validity; +use vortex_buffer::BitBuffer; +use vortex_buffer::BitBufferMut; +use vortex_dtype::Nullability; +use vortex_dtype::match_each_unsigned_integer_ptype; +use vortex_error::VortexResult; +use vortex_mask::Mask; + +use crate::iter::trimmed_ends_iter; + +/// Decodes run-end encoded boolean values into a flat `BoolArray`. +pub fn runend_decode_bools( + ends: PrimitiveArray, + values: BoolArray, + offset: usize, + length: usize, +) -> VortexResult { + let validity = values.validity_mask()?; + Ok(match_each_unsigned_integer_ptype!(ends.ptype(), |E| { + runend_decode_typed_bool( + trimmed_ends_iter(ends.as_slice::(), offset, length), + &values.to_bit_buffer(), + validity, + values.dtype().nullability(), + length, + ) + })) +} + +/// Decodes run-end encoded boolean values using an adaptive strategy. +/// +/// The strategy counts true vs false runs and chooses the optimal approach: +/// - If more true runs: pre-fill with 1s, clear false runs +/// - If more false runs: pre-fill with 0s, fill true runs +/// +/// This minimizes work for skewed distributions (e.g., sparse validity masks). +pub fn runend_decode_typed_bool( + run_ends: impl Iterator, + values: &BitBuffer, + values_validity: Mask, + values_nullability: Nullability, + length: usize, +) -> BoolArray { + match values_validity { + Mask::AllTrue(_) => decode_bool_non_nullable(run_ends, values, values_nullability, length), + Mask::AllFalse(_) => BoolArray::new(BitBuffer::new_unset(length), Validity::AllInvalid), + Mask::Values(mask) => decode_bool_nullable(run_ends, values, mask.bit_buffer(), length), + } +} + +/// Decodes run-end encoded booleans when all values are valid (non-nullable). +fn decode_bool_non_nullable( + run_ends: impl Iterator, + values: &BitBuffer, + nullability: Nullability, + length: usize, +) -> BoolArray { + // Adaptive strategy: choose based on which value is more common + // If more runs have true values, pre-fill with 1s and clear false runs + // If more runs have false values, pre-fill with 0s and fill true runs + let true_count = values.true_count(); + let false_count = values.len() - true_count; + + if true_count > false_count { + // More true runs - pre-fill with 1s and clear false runs + let mut decoded = BitBufferMut::new_set(length); + let decoded_bytes = decoded.as_mut_slice(); + let mut current_pos = 0usize; + + for (end, value) in run_ends.zip_eq(values.iter()) { + // Only clear when value is false (true is already 1) + if end > current_pos && !value { + fill_bits_false(decoded_bytes, current_pos, end); + } + current_pos = end; + } + BoolArray::new(decoded.freeze(), nullability.into()) + } else { + // More or equal false runs - pre-fill with 0s and fill true runs + let mut decoded = BitBufferMut::new_unset(length); + let decoded_bytes = decoded.as_mut_slice(); + let mut current_pos = 0usize; + + for (end, value) in run_ends.zip_eq(values.iter()) { + // Only fill when value is true (false is already 0) + if end > current_pos && value { + fill_bits_true(decoded_bytes, current_pos, end); + } + current_pos = end; + } + BoolArray::new(decoded.freeze(), nullability.into()) + } +} + +/// Decodes run-end encoded booleans when values may be null (nullable). +fn decode_bool_nullable( + run_ends: impl Iterator, + values: &BitBuffer, + validity_mask: &BitBuffer, + length: usize, +) -> BoolArray { + let true_count = values.true_count(); + let false_count = values.len() - true_count; + + // Use true and false count as a proxy for valid true and false count. + if true_count > false_count { + // More true runs - pre-fill with 1s and clear false/null runs + let mut decoded = BitBufferMut::new_set(length); + let mut decoded_validity = BitBufferMut::new_unset(length); + let decoded_bytes = decoded.as_mut_slice(); + let validity_bytes = decoded_validity.as_mut_slice(); + let mut current_pos = 0usize; + + for (end, value) in run_ends.zip_eq( + values + .iter() + .zip(validity_mask.iter()) + .map(|(v, is_valid)| is_valid.then_some(v)), + ) { + if end > current_pos { + match value { + None => { + // Null: clear decoded bits, validity stays false + fill_bits_false(decoded_bytes, current_pos, end); + } + Some(v) => { + // Valid: set validity bits to true + fill_bits_true(validity_bytes, current_pos, end); + // Clear decoded bits if value is false + if !v { + fill_bits_false(decoded_bytes, current_pos, end); + } + } + } + current_pos = end; + } + } + BoolArray::new(decoded.freeze(), Validity::from(decoded_validity.freeze())) + } else { + // More or equal false runs - pre-fill with 0s and fill true runs + let mut decoded = BitBufferMut::new_unset(length); + let mut decoded_validity = BitBufferMut::new_unset(length); + let decoded_bytes = decoded.as_mut_slice(); + let validity_bytes = decoded_validity.as_mut_slice(); + let mut current_pos = 0usize; + + for (end, value) in run_ends.zip_eq( + values + .iter() + .zip(validity_mask.iter()) + .map(|(v, is_valid)| is_valid.then_some(v)), + ) { + if end > current_pos { + match value { + None => { + // Validity stays false (already 0), decoded stays false + } + Some(v) => { + // Set validity bits to true + fill_bits_true(validity_bytes, current_pos, end); + // Set decoded bits if value is true + if v { + fill_bits_true(decoded_bytes, current_pos, end); + } + } + } + current_pos = end; + } + } + BoolArray::new(decoded.freeze(), Validity::from(decoded_validity.freeze())) + } +} + +/// Fills bits in range [start, end) to true using byte-level operations. +/// Assumes the buffer is pre-initialized to all zeros. +#[inline(always)] +fn fill_bits_true(slice: &mut [u8], start: usize, end: usize) { + if start >= end { + return; + } + + let start_byte = start / 8; + let start_bit = start % 8; + let end_byte = end / 8; + let end_bit = end % 8; + + if start_byte == end_byte { + // All bits in same byte + // Use u16 to avoid overflow, then truncate (guaranteed to fit in u8 since max is 0xFF) + #[allow(clippy::cast_possible_truncation)] + let mask = ((1u16 << (end_bit - start_bit)) - 1) as u8; + slice[start_byte] |= mask << start_bit; + } else { + // First partial byte + if start_bit != 0 { + slice[start_byte] |= !((1u8 << start_bit) - 1); + } + + // Middle bytes (bulk memset to 0xFF) + let fill_start = if start_bit != 0 { + start_byte + 1 + } else { + start_byte + }; + if fill_start < end_byte { + slice[fill_start..end_byte].fill(0xFF); + } + + // Last partial byte + if end_bit != 0 { + slice[end_byte] |= (1u8 << end_bit) - 1; + } + } +} + +/// Clears bits in range [start, end) to false using byte-level operations. +/// Assumes the buffer is pre-initialized to all ones. +#[inline(always)] +fn fill_bits_false(slice: &mut [u8], start: usize, end: usize) { + if start >= end { + return; + } + + let start_byte = start / 8; + let start_bit = start % 8; + let end_byte = end / 8; + let end_bit = end % 8; + + if start_byte == end_byte { + // All bits in same byte - create mask with 0s in the range we want to clear + #[allow(clippy::cast_possible_truncation)] + let mask = ((1u16 << (end_bit - start_bit)) - 1) as u8; + slice[start_byte] &= !(mask << start_bit); + } else { + // First partial byte - clear high bits from start_bit + if start_bit != 0 { + slice[start_byte] &= (1u8 << start_bit) - 1; + } + + // Middle bytes (bulk memset to 0x00) + let fill_start = if start_bit != 0 { + start_byte + 1 + } else { + start_byte + }; + if fill_start < end_byte { + slice[fill_start..end_byte].fill(0x00); + } + + // Last partial byte - clear low bits up to end_bit + if end_bit != 0 { + slice[end_byte] &= !((1u8 << end_bit) - 1); + } + } +} + +#[cfg(test)] +mod tests { + use vortex_array::arrays::BoolArray; + use vortex_array::arrays::PrimitiveArray; + use vortex_array::assert_arrays_eq; + use vortex_buffer::BitBuffer; + use vortex_error::VortexResult; + + use super::runend_decode_bools; + + #[test] + fn decode_bools_alternating() -> VortexResult<()> { + // Alternating true/false: [T, T, F, F, F, T, T, T, T, T] + let ends = PrimitiveArray::from_iter([2u32, 5, 10]); + let values = BoolArray::from(BitBuffer::from(vec![true, false, true])); + let decoded = runend_decode_bools(ends, values, 0, 10)?; + + let expected = BoolArray::from(BitBuffer::from(vec![ + true, true, false, false, false, true, true, true, true, true, + ])); + assert_arrays_eq!(decoded, expected); + Ok(()) + } + + #[test] + fn decode_bools_mostly_true() -> VortexResult<()> { + // Mostly true: [T, T, T, T, T, F, T, T, T, T] + let ends = PrimitiveArray::from_iter([5u32, 6, 10]); + let values = BoolArray::from(BitBuffer::from(vec![true, false, true])); + let decoded = runend_decode_bools(ends, values, 0, 10)?; + + let expected = BoolArray::from(BitBuffer::from(vec![ + true, true, true, true, true, false, true, true, true, true, + ])); + assert_arrays_eq!(decoded, expected); + Ok(()) + } + + #[test] + fn decode_bools_mostly_false() -> VortexResult<()> { + // Mostly false: [F, F, F, F, F, T, F, F, F, F] + let ends = PrimitiveArray::from_iter([5u32, 6, 10]); + let values = BoolArray::from(BitBuffer::from(vec![false, true, false])); + let decoded = runend_decode_bools(ends, values, 0, 10)?; + + let expected = BoolArray::from(BitBuffer::from(vec![ + false, false, false, false, false, true, false, false, false, false, + ])); + assert_arrays_eq!(decoded, expected); + Ok(()) + } + + #[test] + fn decode_bools_all_true_single_run() -> VortexResult<()> { + let ends = PrimitiveArray::from_iter([10u32]); + let values = BoolArray::from(BitBuffer::from(vec![true])); + let decoded = runend_decode_bools(ends, values, 0, 10)?; + + let expected = BoolArray::from(BitBuffer::from(vec![ + true, true, true, true, true, true, true, true, true, true, + ])); + assert_arrays_eq!(decoded, expected); + Ok(()) + } + + #[test] + fn decode_bools_all_false_single_run() -> VortexResult<()> { + let ends = PrimitiveArray::from_iter([10u32]); + let values = BoolArray::from(BitBuffer::from(vec![false])); + let decoded = runend_decode_bools(ends, values, 0, 10)?; + + let expected = BoolArray::from(BitBuffer::from(vec![ + false, false, false, false, false, false, false, false, false, false, + ])); + assert_arrays_eq!(decoded, expected); + Ok(()) + } + + #[test] + fn decode_bools_with_offset() -> VortexResult<()> { + // Test with offset: [T, T, F, F, F, T, T, T, T, T] -> slice [2..8] = [F, F, F, T, T, T] + let ends = PrimitiveArray::from_iter([2u32, 5, 10]); + let values = BoolArray::from(BitBuffer::from(vec![true, false, true])); + let decoded = runend_decode_bools(ends, values, 2, 6)?; + + let expected = + BoolArray::from(BitBuffer::from(vec![false, false, false, true, true, true])); + assert_arrays_eq!(decoded, expected); + Ok(()) + } +} diff --git a/encodings/runend/src/lib.rs b/encodings/runend/src/lib.rs index 589b16e2c65..5be018b036d 100644 --- a/encodings/runend/src/lib.rs +++ b/encodings/runend/src/lib.rs @@ -13,6 +13,7 @@ mod array; mod arrow; pub mod compress; mod compute; +pub mod decompress_bool; mod iter; mod kernel; mod ops;