From 067e7dd9510cba8d8e72b36a8f354ad8d0248f5a Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Fri, 13 Feb 2026 14:31:58 -0500 Subject: [PATCH] fix RLE encoding positive/negative float zeros Signed-off-by: Andrew Duffy --- .../fastlanes/src/rle/array/rle_compress.rs | 29 ++++++++++++++++--- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/encodings/fastlanes/src/rle/array/rle_compress.rs b/encodings/fastlanes/src/rle/array/rle_compress.rs index cbc877d0cfb..37eb1860388 100644 --- a/encodings/fastlanes/src/rle/array/rle_compress.rs +++ b/encodings/fastlanes/src/rle/array/rle_compress.rs @@ -6,6 +6,7 @@ use arrayref::array_ref; use fastlanes::RLE; use vortex_array::IntoArray; use vortex_array::ToCanonical; +use vortex_array::arrays::NativeValue; use vortex_array::arrays::PrimitiveArray; use vortex_array::validity::Validity; use vortex_array::vtable::ValidityHelper; @@ -31,13 +32,14 @@ impl RLEArray { fn rle_encode_typed(array: &PrimitiveArray) -> VortexResult where T: NativePType + RLE, + NativeValue: RLE, { let values = array.as_slice::(); let len = values.len(); let padded_len = len.next_multiple_of(FL_CHUNK_SIZE); // Allocate capacity up to the next multiple of chunk size. - let mut values_buf = BufferMut::::with_capacity(padded_len); + let mut values_buf = BufferMut::>::with_capacity(padded_len); let mut indices_buf = BufferMut::::with_capacity(padded_len); // Pre-allocate for one offset per chunk. @@ -50,8 +52,11 @@ where let mut chunks = values.chunks_exact(FL_CHUNK_SIZE); let mut process_chunk = |chunk_start_idx: usize, input: &[T; FL_CHUNK_SIZE]| { - // SAFETY: `MaybeUninit` and `T` have the same layout. - let rle_vals: &mut [T] = + // SAFETY: NativeValue is repr(transparent) + let input: &[NativeValue; FL_CHUNK_SIZE] = unsafe { std::mem::transmute(input) }; + + // SAFETY: `MaybeUninit>` and `NativeValue` have the same layout. + let rle_vals: &mut [NativeValue] = unsafe { std::mem::transmute(&mut values_uninit[value_count_acc..][..FL_CHUNK_SIZE]) }; // SAFETY: `MaybeUninit` and `u16` have the same layout. @@ -62,7 +67,7 @@ where // returned from `T::encode` are relative to the chunk. values_idx_offsets.push(value_count_acc as u64); - let value_count = T::encode( + let value_count = NativeValue::::encode( input, array_mut_ref![rle_vals, 0, FL_CHUNK_SIZE], array_mut_ref![rle_idxs, 0, FL_CHUNK_SIZE], @@ -92,6 +97,9 @@ where indices_buf.set_len(padded_len); } + // SAFETY: NativeValue is repr(transparent) to T. + let values_buf = unsafe { values_buf.transmute::().freeze() }; + RLEArray::try_new( values_buf.into_array(), PrimitiveArray::new(indices_buf.freeze(), padded_validity(array)).into_array(), @@ -254,4 +262,17 @@ mod tests { let expected = PrimitiveArray::new(values, primitive.validity().clone()); assert_arrays_eq!(decoded, expected); } + + // Regression test: RLE compression properly supports decoding pos/neg zeros + // See + #[rstest] + #[case(vec![f16::ZERO, f16::NEG_ZERO])] + #[case(vec![0f32, -0f32])] + #[case(vec![0f64, -0f64])] + fn test_float_zeros(#[case] values: Vec) { + let primitive = PrimitiveArray::from_iter(values); + let rle = RLEArray::encode(&primitive).unwrap(); + let decoded = rle.to_primitive(); + assert_arrays_eq!(primitive, decoded); + } }