diff --git a/encodings/fsst/public-api.lock b/encodings/fsst/public-api.lock index 260e14d7f09..3744b50e522 100644 --- a/encodings/fsst/public-api.lock +++ b/encodings/fsst/public-api.lock @@ -22,7 +22,7 @@ pub type vortex_fsst::FSST::ArrayData = vortex_fsst::FSSTData pub type vortex_fsst::FSST::OperationsVTable = vortex_fsst::FSST -pub type vortex_fsst::FSST::ValidityVTable = vortex_array::array::vtable::validity::ValidityVTableFromChild +pub type vortex_fsst::FSST::ValidityVTable = vortex_fsst::FSST pub fn vortex_fsst::FSST::append_to_builder(array: vortex_array::array::view::ArrayView<'_, Self>, builder: &mut dyn vortex_array::builders::ArrayBuilder, ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<()> @@ -52,9 +52,9 @@ impl vortex_array::array::vtable::operations::OperationsVTable, index: usize, _ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult -impl vortex_array::array::vtable::validity::ValidityChild for vortex_fsst::FSST +impl vortex_array::array::vtable::validity::ValidityVTable for vortex_fsst::FSST -pub fn vortex_fsst::FSST::validity_child(array: vortex_array::array::view::ArrayView<'_, vortex_fsst::FSST>) -> vortex_array::array::erased::ArrayRef +pub fn vortex_fsst::FSST::validity(array: vortex_array::array::view::ArrayView<'_, vortex_fsst::FSST>) -> vortex_error::VortexResult impl vortex_array::arrays::dict::take::TakeExecute for vortex_fsst::FSST diff --git a/encodings/fsst/src/array.rs b/encodings/fsst/src/array.rs index f09f0bf9dda..ee59d79a8a5 100644 --- a/encodings/fsst/src/array.rs +++ b/encodings/fsst/src/array.rs @@ -37,8 +37,8 @@ use vortex_array::serde::ArrayChildren; use vortex_array::validity::Validity; use vortex_array::vtable; use vortex_array::vtable::VTable; -use vortex_array::vtable::ValidityChild; -use vortex_array::vtable::ValidityVTableFromChild; +use vortex_array::vtable::ValidityVTable; +use vortex_array::vtable::child_to_validity; use vortex_array::vtable::validity_to_child; use vortex_buffer::Buffer; use vortex_buffer::ByteBuffer; @@ -98,7 +98,7 @@ impl ArrayEq for FSSTData { impl VTable for FSST { type ArrayData = FSSTData; type OperationsVTable = Self; - type ValidityVTable = ValidityVTableFromChild; + type ValidityVTable = Self; fn id(&self) -> ArrayId { Self::ID @@ -319,9 +319,8 @@ pub(crate) const SLOT_NAMES: [&str; NUM_SLOTS] = pub struct FSSTData { symbols: Buffer, symbol_lengths: Buffer, + // TODO(joe): this was broken by a previous pr. This will not be updated if a slot is replaced. codes: VarBinArray, - /// NOTE(ngates): this === codes, but is stored as an ArrayRef so we can return &ArrayRef! - codes_array: ArrayRef, /// Memoized compressor used for push-down of compute by compressing the RHS. compressor: Arc Compressor + Send>>>, @@ -505,13 +504,10 @@ impl FSSTData { Compressor::rebuild_from(symbols2.as_slice(), symbol_lengths2.as_slice()) }) as Box Compressor + Send>)); - let codes_array = codes.clone().into_array(); - Self { symbols, symbol_lengths, codes, - codes_array, compressor, } } @@ -577,9 +573,12 @@ pub trait FSSTArrayExt: TypedArrayRef { impl> FSSTArrayExt for T {} -impl ValidityChild for FSST { - fn validity_child(array: ArrayView<'_, FSST>) -> ArrayRef { - array.codes_array.clone() +impl ValidityVTable for FSST { + fn validity(array: ArrayView<'_, FSST>) -> VortexResult { + Ok(child_to_validity( + &array.slots()[CODES_VALIDITY_SLOT], + array.dtype().nullability(), + )) } } diff --git a/encodings/pco/public-api.lock b/encodings/pco/public-api.lock index 47e76a743bc..3444d0df2a8 100644 --- a/encodings/pco/public-api.lock +++ b/encodings/pco/public-api.lock @@ -22,7 +22,7 @@ pub type vortex_pco::Pco::ArrayData = vortex_pco::PcoData pub type vortex_pco::Pco::OperationsVTable = vortex_pco::Pco -pub type vortex_pco::Pco::ValidityVTable = vortex_array::array::vtable::validity::ValidityVTableFromValiditySliceHelper +pub type vortex_pco::Pco::ValidityVTable = vortex_pco::Pco pub fn vortex_pco::Pco::buffer(array: vortex_array::array::view::ArrayView<'_, Self>, idx: usize) -> vortex_array::buffer::BufferHandle @@ -42,12 +42,16 @@ pub fn vortex_pco::Pco::serialize(array: vortex_array::array::view::ArrayView<'_ pub fn vortex_pco::Pco::slot_name(_array: vortex_array::array::view::ArrayView<'_, Self>, idx: usize) -> alloc::string::String -pub fn vortex_pco::Pco::validate(&self, data: &vortex_pco::PcoData, dtype: &vortex_array::dtype::DType, len: usize, _slots: &[core::option::Option]) -> vortex_error::VortexResult<()> +pub fn vortex_pco::Pco::validate(&self, data: &vortex_pco::PcoData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option]) -> vortex_error::VortexResult<()> impl vortex_array::array::vtable::operations::OperationsVTable for vortex_pco::Pco pub fn vortex_pco::Pco::scalar_at(array: vortex_array::array::view::ArrayView<'_, vortex_pco::Pco>, index: usize, _ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult +impl vortex_array::array::vtable::validity::ValidityVTable for vortex_pco::Pco + +pub fn vortex_pco::Pco::validity(array: vortex_array::array::view::ArrayView<'_, vortex_pco::Pco>) -> vortex_error::VortexResult + impl vortex_array::arrays::slice::SliceReduce for vortex_pco::Pco pub fn vortex_pco::Pco::slice(array: vortex_array::array::view::ArrayView<'_, Self>, range: core::ops::range::Range) -> vortex_error::VortexResult> @@ -82,7 +86,7 @@ pub struct vortex_pco::PcoData impl vortex_pco::PcoData -pub fn vortex_pco::PcoData::decompress(&self, ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult +pub fn vortex_pco::PcoData::decompress(&self, unsliced_validity: &vortex_array::validity::Validity, ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult pub fn vortex_pco::PcoData::from_array(array: vortex_array::array::erased::ArrayRef, level: usize, nums_per_page: usize) -> vortex_error::VortexResult @@ -92,9 +96,9 @@ pub fn vortex_pco::PcoData::is_empty(&self) -> bool pub fn vortex_pco::PcoData::len(&self) -> usize -pub fn vortex_pco::PcoData::new(chunk_metas: alloc::vec::Vec, pages: alloc::vec::Vec, ptype: vortex_array::dtype::ptype::PType, metadata: vortex_pco::PcoMetadata, len: usize, validity: vortex_array::validity::Validity) -> Self +pub fn vortex_pco::PcoData::new(chunk_metas: alloc::vec::Vec, pages: alloc::vec::Vec, ptype: vortex_array::dtype::ptype::PType, metadata: vortex_pco::PcoMetadata, len: usize) -> Self -pub fn vortex_pco::PcoData::validate(&self, dtype: &vortex_array::dtype::DType, len: usize) -> vortex_error::VortexResult<()> +pub fn vortex_pco::PcoData::validate(&self, dtype: &vortex_array::dtype::DType, len: usize, validity: &vortex_array::validity::Validity) -> vortex_error::VortexResult<()> impl core::clone::Clone for vortex_pco::PcoData @@ -104,10 +108,6 @@ impl core::fmt::Debug for vortex_pco::PcoData pub fn vortex_pco::PcoData::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result -impl vortex_array::array::vtable::validity::ValiditySliceHelper for vortex_pco::PcoData - -pub fn vortex_pco::PcoData::unsliced_validity_and_slice(&self) -> (&vortex_array::validity::Validity, usize, usize) - impl vortex_array::hash::ArrayEq for vortex_pco::PcoData pub fn vortex_pco::PcoData::array_eq(&self, other: &Self, precision: vortex_array::hash::Precision) -> bool diff --git a/encodings/pco/src/array.rs b/encodings/pco/src/array.rs index 5cd9c615145..66209a16edd 100644 --- a/encodings/pco/src/array.rs +++ b/encodings/pco/src/array.rs @@ -42,8 +42,8 @@ use vortex_array::validity::Validity; use vortex_array::vtable; use vortex_array::vtable::OperationsVTable; use vortex_array::vtable::VTable; -use vortex_array::vtable::ValiditySliceHelper; -use vortex_array::vtable::ValidityVTableFromValiditySliceHelper; +use vortex_array::vtable::ValidityVTable; +use vortex_array::vtable::child_to_validity; use vortex_array::vtable::validity_to_child; use vortex_buffer::BufferMut; use vortex_buffer::ByteBuffer; @@ -83,7 +83,6 @@ vtable!(Pco, Pco, PcoData); impl ArrayHash for PcoData { fn array_hash(&self, state: &mut H, precision: Precision) { - self.unsliced_validity.array_hash(state, precision); self.unsliced_n_rows.hash(state); self.slice_start.hash(state); self.slice_stop.hash(state); @@ -99,10 +98,7 @@ impl ArrayHash for PcoData { impl ArrayEq for PcoData { fn array_eq(&self, other: &Self, precision: Precision) -> bool { - if !self - .unsliced_validity - .array_eq(&other.unsliced_validity, precision) - || self.unsliced_n_rows != other.unsliced_n_rows + if self.unsliced_n_rows != other.unsliced_n_rows || self.slice_start != other.slice_start || self.slice_stop != other.slice_stop || self.chunk_metas.len() != other.chunk_metas.len() @@ -128,7 +124,7 @@ impl VTable for Pco { type ArrayData = PcoData; type OperationsVTable = Self; - type ValidityVTable = ValidityVTableFromValiditySliceHelper; + type ValidityVTable = Self; fn id(&self) -> ArrayId { Self::ID @@ -139,9 +135,10 @@ impl VTable for Pco { data: &PcoData, dtype: &DType, len: usize, - _slots: &[Option], + slots: &[Option], ) -> VortexResult<()> { - data.validate(dtype, len) + let validity = child_to_validity(&slots[0], dtype.nullability()); + data.validate(dtype, len, &validity) } fn nbuffers(array: ArrayView<'_, Self>) -> usize { @@ -206,14 +203,7 @@ impl VTable for Pco { vortex_ensure!(pages.len() == expected_n_pages); let slots = vec![validity_to_child(&validity, len)]; - let data = PcoData::new( - chunk_metas, - pages, - dtype.as_ptype(), - metadata, - len, - validity, - ); + let data = PcoData::new(chunk_metas, pages, dtype.as_ptype(), metadata, len); Ok(ArrayParts::new(self.clone(), dtype.clone(), len, data).with_slots(slots)) } @@ -222,7 +212,14 @@ impl VTable for Pco { } fn execute(array: Array, ctx: &mut ExecutionCtx) -> VortexResult { - Ok(ExecutionResult::done(array.decompress(ctx)?.into_array())) + let unsliced_validity = + child_to_validity(&array.as_ref().slots()[0], array.dtype().nullability()); + Ok(ExecutionResult::done( + array + .data() + .decompress(&unsliced_validity, ctx)? + .into_array(), + )) } fn reduce_parent( @@ -273,13 +270,14 @@ pub struct Pco; impl Pco { pub const ID: ArrayId = ArrayId::new_ref("vortex.pco"); - pub(crate) fn try_new(dtype: DType, data: PcoData) -> VortexResult { + pub(crate) fn try_new( + dtype: DType, + data: PcoData, + validity: Validity, + ) -> VortexResult { let len = data.len(); - data.validate(&dtype, len)?; - let slots = vec![validity_to_child( - &data.unsliced_validity, - data.unsliced_n_rows, - )]; + data.validate(&dtype, len, &validity)?; + let slots = vec![validity_to_child(&validity, data.unsliced_n_rows())]; Ok(unsafe { Array::from_parts_unchecked(ArrayParts::new(Pco, dtype, len, data).with_slots(slots)) }) @@ -292,8 +290,9 @@ impl Pco { values_per_page: usize, ) -> VortexResult { let dtype = parray.dtype().clone(); + let validity = parray.validity()?; let data = PcoData::from_primitive(parray, level, values_per_page)?; - Self::try_new(dtype, data) + Self::try_new(dtype, data, validity) } } @@ -307,14 +306,13 @@ pub struct PcoData { pub(crate) pages: Vec, pub(crate) metadata: PcoMetadata, ptype: PType, - pub(crate) unsliced_validity: Validity, unsliced_n_rows: usize, slice_start: usize, slice_stop: usize, } impl PcoData { - pub fn validate(&self, dtype: &DType, len: usize) -> VortexResult<()> { + pub fn validate(&self, dtype: &DType, len: usize, validity: &Validity) -> VortexResult<()> { let _ = number_type_from_ptype(self.ptype); vortex_ensure!( dtype.as_ptype() == self.ptype, @@ -323,9 +321,9 @@ impl PcoData { dtype.as_ptype() ); vortex_ensure!( - dtype.nullability() == self.unsliced_validity.nullability(), + dtype.nullability() == validity.nullability(), "expected nullability {}, got {}", - self.unsliced_validity.nullability(), + validity.nullability(), dtype.nullability() ); vortex_ensure!( @@ -340,7 +338,7 @@ impl PcoData { "expected len {len}, got {}", self.slice_stop - self.slice_start ); - if let Some(validity_len) = self.unsliced_validity.maybe_len() { + if let Some(validity_len) = validity.maybe_len() { vortex_ensure!( validity_len == self.unsliced_n_rows, "expected validity len {}, got {}", @@ -373,14 +371,12 @@ impl PcoData { ptype: PType, metadata: PcoMetadata, len: usize, - validity: Validity, ) -> Self { Self { chunk_metas, pages, metadata, ptype, - unsliced_validity: validity, unsliced_n_rows: len, slice_start: 0, slice_stop: len, @@ -464,7 +460,6 @@ impl PcoData { parray.dtype().as_ptype(), metadata, parray.len(), - parray.validity()?, )) } @@ -478,33 +473,36 @@ impl PcoData { Self::from_primitive(&parray, level, nums_per_page) } - pub fn decompress(&self, ctx: &mut ExecutionCtx) -> VortexResult { + pub fn decompress( + &self, + unsliced_validity: &Validity, + ctx: &mut ExecutionCtx, + ) -> VortexResult { // To start, we figure out which chunks and pages we need to decompress, and with // what value offset into the first such page. let number_type = number_type_from_ptype(self.ptype); let values_byte_buffer = match_number_enum!( number_type, NumberType => { - self.decompress_values_typed::(ctx)? + self.decompress_values_typed::(unsliced_validity, ctx)? } ); Ok(PrimitiveArray::from_values_byte_buffer( values_byte_buffer, self.ptype, - self.unsliced_validity - .slice(self.slice_start..self.slice_stop)?, + unsliced_validity.slice(self.slice_start..self.slice_stop)?, self.slice_stop - self.slice_start, )) } fn decompress_values_typed( &self, + unsliced_validity: &Validity, ctx: &mut ExecutionCtx, ) -> VortexResult { // To start, we figure out what range of values we need to decompress. - let slice_value_indices = self - .unsliced_validity + let slice_value_indices = unsliced_validity .execute_mask(self.unsliced_n_rows, ctx)? .valid_counts_for_indices(&[self.slice_start, self.slice_stop]); let slice_value_start = slice_value_indices[0]; @@ -605,9 +603,10 @@ impl PcoData { } } -impl ValiditySliceHelper for PcoData { - fn unsliced_validity_and_slice(&self) -> (&Validity, usize, usize) { - (&self.unsliced_validity, self.slice_start, self.slice_stop) +impl ValidityVTable for Pco { + fn validity(array: ArrayView<'_, Pco>) -> VortexResult { + let unsliced_validity = child_to_validity(&array.slots()[0], array.dtype().nullability()); + unsliced_validity.slice(array.slice_start()..array.slice_stop()) } } @@ -618,9 +617,10 @@ impl OperationsVTable for Pco { _ctx: &mut ExecutionCtx, ) -> VortexResult { let mut ctx = LEGACY_SESSION.create_execution_ctx(); + let unsliced_validity = child_to_validity(&array.slots()[0], array.dtype().nullability()); array ._slice(index, index + 1) - .decompress(&mut ctx)? + .decompress(&unsliced_validity, &mut ctx)? .into_array() .scalar_at(0) } diff --git a/encodings/pco/src/compute/cast.rs b/encodings/pco/src/compute/cast.rs index 877535bbd97..21598191fec 100644 --- a/encodings/pco/src/compute/cast.rs +++ b/encodings/pco/src/compute/cast.rs @@ -6,6 +6,7 @@ use vortex_array::ArrayView; use vortex_array::IntoArray; use vortex_array::dtype::DType; use vortex_array::scalar_fn::fns::cast::CastReduce; +use vortex_array::vtable::child_to_validity; use vortex_error::VortexResult; use crate::Pco; @@ -24,10 +25,10 @@ impl CastReduce for Pco { // PCO supports: F16, F32, F64, I16, I32, I64, U16, U32, U64 if array.dtype().eq_ignore_nullability(dtype) { // Create a new validity with the target nullability - let new_validity = array - .unsliced_validity - .clone() - .cast_nullability(dtype.nullability(), array.len())?; + let unsliced_validity = + child_to_validity(&array.slots()[0], array.dtype().nullability()); + let new_validity = + unsliced_validity.cast_nullability(dtype.nullability(), array.len())?; let data = PcoData::new( array.chunk_metas.clone(), @@ -35,11 +36,12 @@ impl CastReduce for Pco { dtype.as_ptype(), array.metadata.clone(), array.unsliced_n_rows(), - new_validity, ) ._slice(array.slice_start(), array.slice_stop()); - return Ok(Some(Pco::try_new(dtype.clone(), data)?.into_array())); + return Ok(Some( + Pco::try_new(dtype.clone(), data, new_validity)?.into_array(), + )); } // For other casts (e.g., numeric type changes), decode to canonical and let PrimitiveArray handle it diff --git a/encodings/pco/src/slice.rs b/encodings/pco/src/slice.rs index a53a1e39898..2f341f989b7 100644 --- a/encodings/pco/src/slice.rs +++ b/encodings/pco/src/slice.rs @@ -7,14 +7,21 @@ use vortex_array::ArrayRef; use vortex_array::ArrayView; use vortex_array::IntoArray; use vortex_array::arrays::slice::SliceReduce; +use vortex_array::vtable::child_to_validity; use vortex_error::VortexResult; use crate::Pco; impl SliceReduce for Pco { fn slice(array: ArrayView<'_, Self>, range: Range) -> VortexResult> { + let unsliced_validity = child_to_validity(&array.slots()[0], array.dtype().nullability()); Ok(Some( - Pco::try_new(array.dtype().clone(), array._slice(range.start, range.end))?.into_array(), + Pco::try_new( + array.dtype().clone(), + array._slice(range.start, range.end), + unsliced_validity, + )? + .into_array(), )) } } diff --git a/encodings/pco/src/test.rs b/encodings/pco/src/test.rs index 28c4cdbf830..2112da6a222 100644 --- a/encodings/pco/src/test.rs +++ b/encodings/pco/src/test.rs @@ -22,6 +22,7 @@ use vortex_array::serde::SerializedArray; use vortex_array::session::ArraySession; use vortex_array::session::ArraySessionExt; use vortex_array::validity::Validity; +use vortex_array::vtable::child_to_validity; use vortex_buffer::Buffer; use vortex_buffer::BufferMut; use vortex_error::VortexExpect; @@ -49,7 +50,11 @@ fn test_compress_decompress() { // check full decompression works let mut ctx = LEGACY_SESSION.create_execution_ctx(); - let decompressed = compressed.decompress(&mut ctx).unwrap(); + let unsliced_validity = child_to_validity( + &compressed.as_ref().slots()[0], + compressed.dtype().nullability(), + ); + let decompressed = compressed.decompress(&unsliced_validity, &mut ctx).unwrap(); assert_arrays_eq!(decompressed, PrimitiveArray::from_iter(data)); // check slicing works @@ -72,7 +77,11 @@ fn test_compress_decompress_small() { assert_arrays_eq!(compressed, expected); let mut ctx = LEGACY_SESSION.create_execution_ctx(); - let decompressed = compressed.decompress(&mut ctx).unwrap(); + let unsliced_validity = child_to_validity( + &compressed.as_ref().slots()[0], + compressed.dtype().nullability(), + ); + let decompressed = compressed.decompress(&unsliced_validity, &mut ctx).unwrap(); assert_arrays_eq!(decompressed, expected); } @@ -82,7 +91,11 @@ fn test_empty() { let array = PrimitiveArray::from_iter(data.clone()); let compressed = Pco::from_primitive(&array, 3, 100).unwrap(); let mut ctx = LEGACY_SESSION.create_execution_ctx(); - let primitive = compressed.decompress(&mut ctx).unwrap(); + let unsliced_validity = child_to_validity( + &compressed.as_ref().slots()[0], + compressed.dtype().nullability(), + ); + let primitive = compressed.decompress(&unsliced_validity, &mut ctx).unwrap(); assert_arrays_eq!(primitive, PrimitiveArray::from_iter(data)); } @@ -99,6 +112,7 @@ fn test_validity_and_multiple_chunks_and_pages() { let compression_level = 3; let values_per_chunk = 33; let values_per_page = 10; + let validity = array.validity().unwrap(); let compressed = Pco::try_new( array.dtype().clone(), PcoData::from_primitive_with_values_per_chunk( @@ -108,6 +122,7 @@ fn test_validity_and_multiple_chunks_and_pages() { values_per_page, ) .unwrap(), + validity, ) .vortex_expect("PcoData is always valid"); diff --git a/encodings/zstd/benches/listview_rebuild.rs b/encodings/zstd/benches/listview_rebuild.rs index 55690cdccae..82b16c392fa 100644 --- a/encodings/zstd/benches/listview_rebuild.rs +++ b/encodings/zstd/benches/listview_rebuild.rs @@ -18,9 +18,14 @@ fn rebuild_naive(bencher: Bencher) { let dudes = VarBinViewArray::from_iter_str(["Washington", "Adams", "Jefferson", "Madison"]) .into_array(); let dtype = dudes.dtype().clone(); - let dudes = Zstd::try_new(dtype, ZstdData::from_array(dudes, 9, 1024).unwrap()) - .unwrap() - .into_array(); + let validity = dudes.validity().unwrap(); + let dudes = Zstd::try_new( + dtype, + ZstdData::from_array(dudes, 9, 1024).unwrap(), + validity, + ) + .unwrap() + .into_array(); let offsets = std::iter::repeat_n(0u32, 1024) .collect::>() diff --git a/encodings/zstd/public-api.lock b/encodings/zstd/public-api.lock index e3ce21c73c1..3413dd1c75b 100644 --- a/encodings/zstd/public-api.lock +++ b/encodings/zstd/public-api.lock @@ -14,7 +14,7 @@ pub fn vortex_zstd::Zstd::from_var_bin_view(vbv: &vortex_array::arrays::varbinvi pub fn vortex_zstd::Zstd::from_var_bin_view_without_dict(vbv: &vortex_array::arrays::varbinview::vtable::VarBinViewArray, level: i32, values_per_frame: usize) -> vortex_error::VortexResult -pub fn vortex_zstd::Zstd::try_new(dtype: vortex_array::dtype::DType, data: vortex_zstd::ZstdData) -> vortex_error::VortexResult +pub fn vortex_zstd::Zstd::try_new(dtype: vortex_array::dtype::DType, data: vortex_zstd::ZstdData, validity: vortex_array::validity::Validity) -> vortex_error::VortexResult impl core::clone::Clone for vortex_zstd::Zstd @@ -30,7 +30,7 @@ pub type vortex_zstd::Zstd::ArrayData = vortex_zstd::ZstdData pub type vortex_zstd::Zstd::OperationsVTable = vortex_zstd::Zstd -pub type vortex_zstd::Zstd::ValidityVTable = vortex_array::array::vtable::validity::ValidityVTableFromValiditySliceHelper +pub type vortex_zstd::Zstd::ValidityVTable = vortex_zstd::Zstd pub fn vortex_zstd::Zstd::buffer(array: vortex_array::array::view::ArrayView<'_, Self>, idx: usize) -> vortex_array::buffer::BufferHandle @@ -50,12 +50,16 @@ pub fn vortex_zstd::Zstd::serialize(array: vortex_array::array::view::ArrayView< pub fn vortex_zstd::Zstd::slot_name(_array: vortex_array::array::view::ArrayView<'_, Self>, idx: usize) -> alloc::string::String -pub fn vortex_zstd::Zstd::validate(&self, data: &Self::ArrayData, dtype: &vortex_array::dtype::DType, len: usize, _slots: &[core::option::Option]) -> vortex_error::VortexResult<()> +pub fn vortex_zstd::Zstd::validate(&self, data: &Self::ArrayData, dtype: &vortex_array::dtype::DType, len: usize, slots: &[core::option::Option]) -> vortex_error::VortexResult<()> impl vortex_array::array::vtable::operations::OperationsVTable for vortex_zstd::Zstd pub fn vortex_zstd::Zstd::scalar_at(array: vortex_array::array::view::ArrayView<'_, vortex_zstd::Zstd>, index: usize, _ctx: &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult +impl vortex_array::array::vtable::validity::ValidityVTable for vortex_zstd::Zstd + +pub fn vortex_zstd::Zstd::validity(array: vortex_array::array::view::ArrayView<'_, vortex_zstd::Zstd>) -> vortex_error::VortexResult + impl vortex_array::arrays::slice::SliceReduce for vortex_zstd::Zstd pub fn vortex_zstd::Zstd::slice(array: vortex_array::array::view::ArrayView<'_, Self>, range: core::ops::range::Range) -> vortex_error::VortexResult> @@ -108,15 +112,15 @@ pub fn vortex_zstd::ZstdData::from_var_bin_view(vbv: &vortex_array::arrays::varb pub fn vortex_zstd::ZstdData::from_var_bin_view_without_dict(vbv: &vortex_array::arrays::varbinview::vtable::VarBinViewArray, level: i32, values_per_frame: usize) -> vortex_error::VortexResult -pub fn vortex_zstd::ZstdData::into_parts(self) -> vortex_zstd::ZstdDataParts +pub fn vortex_zstd::ZstdData::into_parts(self, validity: vortex_array::validity::Validity) -> vortex_zstd::ZstdDataParts pub fn vortex_zstd::ZstdData::is_empty(&self) -> bool pub fn vortex_zstd::ZstdData::len(&self) -> usize -pub fn vortex_zstd::ZstdData::new(dictionary: core::option::Option, frames: alloc::vec::Vec, metadata: vortex_zstd::ZstdMetadata, n_rows: usize, validity: vortex_array::validity::Validity) -> Self +pub fn vortex_zstd::ZstdData::new(dictionary: core::option::Option, frames: alloc::vec::Vec, metadata: vortex_zstd::ZstdMetadata, n_rows: usize) -> Self -pub fn vortex_zstd::ZstdData::validate(&self, dtype: &vortex_array::dtype::DType, len: usize) -> vortex_error::VortexResult<()> +pub fn vortex_zstd::ZstdData::validate(&self, dtype: &vortex_array::dtype::DType, len: usize, validity: &vortex_array::validity::Validity) -> vortex_error::VortexResult<()> impl core::clone::Clone for vortex_zstd::ZstdData @@ -126,10 +130,6 @@ impl core::fmt::Debug for vortex_zstd::ZstdData pub fn vortex_zstd::ZstdData::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result -impl vortex_array::array::vtable::validity::ValiditySliceHelper for vortex_zstd::ZstdData - -pub fn vortex_zstd::ZstdData::unsliced_validity_and_slice(&self) -> (&vortex_array::validity::Validity, usize, usize) - impl vortex_array::hash::ArrayEq for vortex_zstd::ZstdData pub fn vortex_zstd::ZstdData::array_eq(&self, other: &Self, precision: vortex_array::hash::Precision) -> bool diff --git a/encodings/zstd/src/array.rs b/encodings/zstd/src/array.rs index f2378e52e5f..4ed08ccad1d 100644 --- a/encodings/zstd/src/array.rs +++ b/encodings/zstd/src/array.rs @@ -37,8 +37,8 @@ use vortex_array::validity::Validity; use vortex_array::vtable; use vortex_array::vtable::OperationsVTable; use vortex_array::vtable::VTable; -use vortex_array::vtable::ValiditySliceHelper; -use vortex_array::vtable::ValidityVTableFromValiditySliceHelper; +use vortex_array::vtable::ValidityVTable; +use vortex_array::vtable::child_to_validity; use vortex_array::vtable::validity_to_child; use vortex_buffer::Alignment; use vortex_buffer::Buffer; @@ -96,7 +96,6 @@ impl ArrayHash for ZstdData { for frame in &self.frames { frame.array_hash(state, precision); } - self.unsliced_validity.array_hash(state, precision); self.unsliced_n_rows.hash(state); self.slice_start.hash(state); self.slice_stop.hash(state); @@ -120,9 +119,7 @@ impl ArrayEq for ZstdData { return false; } } - self.unsliced_validity - .array_eq(&other.unsliced_validity, precision) - && self.unsliced_n_rows == other.unsliced_n_rows + self.unsliced_n_rows == other.unsliced_n_rows && self.slice_start == other.slice_start && self.slice_stop == other.slice_stop } @@ -132,7 +129,7 @@ impl VTable for Zstd { type ArrayData = ZstdData; type OperationsVTable = Self; - type ValidityVTable = ValidityVTableFromValiditySliceHelper; + type ValidityVTable = Self; fn id(&self) -> ArrayId { Self::ID @@ -143,9 +140,10 @@ impl VTable for Zstd { data: &Self::ArrayData, dtype: &DType, len: usize, - _slots: &[Option], + slots: &[Option], ) -> VortexResult<()> { - data.validate(dtype, len) + let validity = child_to_validity(&slots[0], dtype.nullability()); + data.validate(dtype, len, &validity) } fn nbuffers(array: ArrayView<'_, Self>) -> usize { @@ -219,13 +217,7 @@ impl VTable for Zstd { }; let slots = vec![validity_to_child(&validity, len)]; - let data = ZstdData::new( - dictionary_buffer, - compressed_buffers, - metadata, - len, - validity, - ); + let data = ZstdData::new(dictionary_buffer, compressed_buffers, metadata, len); Ok(ArrayParts::new(self.clone(), dtype.clone(), len, data).with_slots(slots)) } @@ -234,7 +226,11 @@ impl VTable for Zstd { } fn execute(array: Array, ctx: &mut ExecutionCtx) -> VortexResult { - Zstd::decompress(&array, ctx)? + let unsliced_validity = + child_to_validity(&array.as_ref().slots()[0], array.dtype().nullability()); + array + .data() + .decompress(array.dtype(), &unsliced_validity, ctx)? .execute::(ctx) .map(ExecutionResult::done) } @@ -254,13 +250,10 @@ pub struct Zstd; impl Zstd { pub const ID: ArrayId = ArrayId::new_ref("vortex.zstd"); - pub fn try_new(dtype: DType, data: ZstdData) -> VortexResult { + pub fn try_new(dtype: DType, data: ZstdData, validity: Validity) -> VortexResult { let len = data.len(); - data.validate(&dtype, len)?; - let slots = vec![validity_to_child( - &data.unsliced_validity, - data.unsliced_n_rows, - )]; + data.validate(&dtype, len, &validity)?; + let slots = vec![validity_to_child(&validity, data.unsliced_n_rows())]; Ok(unsafe { Array::from_parts_unchecked(ArrayParts::new(Zstd, dtype, len, data).with_slots(slots)) }) @@ -272,9 +265,11 @@ impl Zstd { level: i32, values_per_frame: usize, ) -> VortexResult { + let validity = vbv.validity()?; Self::try_new( vbv.dtype().clone(), ZstdData::from_var_bin_view_without_dict(vbv, level, values_per_frame)?, + validity, ) } @@ -284,9 +279,11 @@ impl Zstd { level: i32, values_per_frame: usize, ) -> VortexResult { + let validity = parray.validity()?; Self::try_new( parray.dtype().clone(), ZstdData::from_primitive(parray, level, values_per_frame)?, + validity, ) } @@ -296,14 +293,20 @@ impl Zstd { level: i32, values_per_frame: usize, ) -> VortexResult { + let validity = vbv.validity()?; Self::try_new( vbv.dtype().clone(), ZstdData::from_var_bin_view(vbv, level, values_per_frame)?, + validity, ) } pub fn decompress(array: &ZstdArray, ctx: &mut ExecutionCtx) -> VortexResult { - array.data().decompress(array.dtype(), ctx) + let unsliced_validity = + child_to_validity(&array.as_ref().slots()[0], array.dtype().nullability()); + array + .data() + .decompress(array.dtype(), &unsliced_validity, ctx) } } @@ -316,7 +319,6 @@ pub struct ZstdData { pub(crate) dictionary: Option, pub(crate) frames: Vec, pub(crate) metadata: ZstdMetadata, - pub(crate) unsliced_validity: Validity, unsliced_n_rows: usize, slice_start: usize, slice_stop: usize, @@ -436,20 +438,18 @@ impl ZstdData { frames: Vec, metadata: ZstdMetadata, n_rows: usize, - validity: Validity, ) -> Self { Self { dictionary, frames, metadata, - unsliced_validity: validity, unsliced_n_rows: n_rows, slice_start: 0, slice_stop: n_rows, } } - pub fn validate(&self, dtype: &DType, len: usize) -> VortexResult<()> { + pub fn validate(&self, dtype: &DType, len: usize, validity: &Validity) -> VortexResult<()> { vortex_ensure!( matches!( dtype, @@ -475,7 +475,7 @@ impl ZstdData { self.slice_stop - self.slice_start, len ); - if let Some(validity_len) = self.unsliced_validity.maybe_len() { + if let Some(validity_len) = validity.maybe_len() { vortex_ensure!( validity_len == self.unsliced_n_rows, "Validity length {} does not match unsliced row count {}", @@ -672,13 +672,7 @@ impl ZstdData { frames: frame_metas, }; - Ok(ZstdData::new( - dictionary, - frames, - metadata, - parray.len(), - parray.validity()?, - )) + Ok(ZstdData::new(dictionary, frames, metadata, parray.len())) } /// Creates a ZstdArray from a VarBinView array. @@ -760,13 +754,7 @@ impl ZstdData { .try_into()?, frames: frame_metas, }; - Ok(ZstdData::new( - dictionary, - frames, - metadata, - vbv.len(), - vbv.validity()?, - )) + Ok(ZstdData::new(dictionary, frames, metadata, vbv.len())) } pub fn from_canonical( @@ -802,13 +790,17 @@ impl ZstdData { } } - fn decompress(&self, dtype: &DType, ctx: &mut ExecutionCtx) -> VortexResult { + fn decompress( + &self, + dtype: &DType, + unsliced_validity: &Validity, + ctx: &mut ExecutionCtx, + ) -> VortexResult { // To start, we figure out which frames we need to decompress, and with // what row offset into the first such frame. let byte_width = Self::byte_width(dtype); let slice_n_rows = self.slice_stop - self.slice_start; - let slice_value_indices = self - .unsliced_validity + let slice_value_indices = unsliced_validity .execute_mask(self.unsliced_n_rows, ctx)? .valid_counts_for_indices(&[self.slice_start, self.slice_stop]); @@ -875,9 +867,7 @@ impl ZstdData { let decompressed = decompressed.freeze(); // Last, we slice the exact values requested out of the decompressed data. - let mut slice_validity = self - .unsliced_validity - .slice(self.slice_start..self.slice_stop)?; + let mut slice_validity = unsliced_validity.slice(self.slice_start..self.slice_stop)?; // NOTE: this block handles setting the output type when the validity and DType disagree. // @@ -982,12 +972,12 @@ impl ZstdData { self.slice_stop == self.slice_start } - pub fn into_parts(self) -> ZstdDataParts { + pub fn into_parts(self, validity: Validity) -> ZstdDataParts { ZstdDataParts { dictionary: self.dictionary, frames: self.frames, metadata: self.metadata, - validity: self.unsliced_validity, + validity, n_rows: self.unsliced_n_rows, slice_start: self.slice_start, slice_stop: self.slice_stop, @@ -1007,9 +997,10 @@ impl ZstdData { } } -impl ValiditySliceHelper for ZstdData { - fn unsliced_validity_and_slice(&self) -> (&Validity, usize, usize) { - (&self.unsliced_validity, self.slice_start, self.slice_stop) +impl ValidityVTable for Zstd { + fn validity(array: ArrayView<'_, Zstd>) -> VortexResult { + let unsliced_validity = child_to_validity(&array.slots()[0], array.dtype().nullability()); + unsliced_validity.slice(array.slice_start()..array.slice_stop()) } } @@ -1020,11 +1011,11 @@ impl OperationsVTable for Zstd { _ctx: &mut ExecutionCtx, ) -> VortexResult { let mut ctx = LEGACY_SESSION.create_execution_ctx(); - let sliced = Zstd::try_new( - array.dtype().clone(), - array.data().with_slice(index, index + 1), - )?; - Zstd::decompress(&sliced, &mut ctx)?.scalar_at(0) + let unsliced_validity = child_to_validity(&array.slots()[0], array.dtype().nullability()); + let sliced = array.data().with_slice(index, index + 1); + sliced + .decompress(array.dtype(), &unsliced_validity, &mut ctx)? + .scalar_at(0) } } diff --git a/encodings/zstd/src/compute/cast.rs b/encodings/zstd/src/compute/cast.rs index 4ad636fe88a..2beb94a1a4e 100644 --- a/encodings/zstd/src/compute/cast.rs +++ b/encodings/zstd/src/compute/cast.rs @@ -8,7 +8,7 @@ use vortex_array::dtype::DType; use vortex_array::dtype::Nullability; use vortex_array::scalar_fn::fns::cast::CastReduce; use vortex_array::validity::Validity; -use vortex_array::vtable::ValiditySliceHelper; +use vortex_array::vtable::child_to_validity; use vortex_error::VortexResult; use crate::Zstd; @@ -33,6 +33,8 @@ impl CastReduce for Zstd { } (Nullability::NonNullable, Nullability::Nullable) => { // nonnull => null, trivial cast by altering the validity + let unsliced_validity = + child_to_validity(&array.slots()[0], array.dtype().nullability()); Ok(Some( Zstd::try_new( dtype.clone(), @@ -41,8 +43,8 @@ impl CastReduce for Zstd { array.frames.clone(), array.metadata.clone(), array.unsliced_n_rows(), - array.unsliced_validity.clone(), ), + unsliced_validity, )? .into_array() .slice(array.slice_start()..array.slice_stop())?, @@ -50,8 +52,10 @@ impl CastReduce for Zstd { } (Nullability::Nullable, Nullability::NonNullable) => { // null => non-null works if there are no nulls in the sliced range + let unsliced_validity = + child_to_validity(&array.slots()[0], array.dtype().nullability()); let has_nulls = !matches!( - array.sliced_validity()?, + unsliced_validity.slice(array.slice_start()..array.slice_stop())?, Validity::AllValid | Validity::NonNullable ); @@ -69,8 +73,8 @@ impl CastReduce for Zstd { array.frames.clone(), array.metadata.clone(), array.unsliced_n_rows(), - array.unsliced_validity.clone(), ), + unsliced_validity, )? .into_array() .slice(array.slice_start()..array.slice_stop())?, diff --git a/encodings/zstd/src/slice.rs b/encodings/zstd/src/slice.rs index 2f74d40072c..6b6906a1628 100644 --- a/encodings/zstd/src/slice.rs +++ b/encodings/zstd/src/slice.rs @@ -7,16 +7,19 @@ use vortex_array::ArrayRef; use vortex_array::ArrayView; use vortex_array::IntoArray; use vortex_array::arrays::slice::SliceReduce; +use vortex_array::vtable::child_to_validity; use vortex_error::VortexResult; use crate::Zstd; impl SliceReduce for Zstd { fn slice(array: ArrayView<'_, Self>, range: Range) -> VortexResult> { + let unsliced_validity = child_to_validity(&array.slots()[0], array.dtype().nullability()); Ok(Some( Zstd::try_new( array.dtype().clone(), array.data().with_slice(range.start, range.end), + unsliced_validity, )? .into_array(), )) diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index 0e198a1cdc0..84b3fb9513d 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -18846,12 +18846,6 @@ impl vortex_array::ValidityVTable for vortex_array:: pub fn vortex_array::ValidityVTableFromChildSliceHelper::validity(array: vortex_array::ArrayView<'_, V>) -> vortex_error::VortexResult -pub struct vortex_array::vtable::ValidityVTableFromValiditySliceHelper - -impl vortex_array::ValidityVTable for vortex_array::ValidityVTableFromValiditySliceHelper where ::ArrayData: vortex_array::ValiditySliceHelper - -pub fn vortex_array::ValidityVTableFromValiditySliceHelper::validity(array: vortex_array::ArrayView<'_, V>) -> vortex_error::VortexResult - pub trait vortex_array::vtable::ArrayPlugin: 'static + core::marker::Send + core::marker::Sync pub fn vortex_array::vtable::ArrayPlugin::deserialize(&self, dtype: &vortex_array::dtype::DType, len: usize, metadata: &[u8], buffers: &[vortex_array::buffer::BufferHandle], children: &dyn vortex_array::serde::ArrayChildren, session: &vortex_session::VortexSession) -> vortex_error::VortexResult @@ -20822,12 +20816,6 @@ pub fn vortex_array::vtable::ValidityChildSliceHelper::sliced_child_array(&self) pub fn vortex_array::vtable::ValidityChildSliceHelper::unsliced_child_and_slice(&self) -> (&vortex_array::ArrayRef, usize, usize) -pub trait vortex_array::vtable::ValiditySliceHelper - -pub fn vortex_array::vtable::ValiditySliceHelper::sliced_validity(&self) -> vortex_error::VortexResult - -pub fn vortex_array::vtable::ValiditySliceHelper::unsliced_validity_and_slice(&self) -> (&vortex_array::validity::Validity, usize, usize) - pub trait vortex_array::vtable::ValidityVTable pub fn vortex_array::vtable::ValidityVTable::validity(array: vortex_array::ArrayView<'_, V>) -> vortex_error::VortexResult @@ -20912,10 +20900,6 @@ impl vortex_array::ValidityVTable for vortex_array:: pub fn vortex_array::ValidityVTableFromChildSliceHelper::validity(array: vortex_array::ArrayView<'_, V>) -> vortex_error::VortexResult -impl vortex_array::ValidityVTable for vortex_array::ValidityVTableFromValiditySliceHelper where ::ArrayData: vortex_array::ValiditySliceHelper - -pub fn vortex_array::ValidityVTableFromValiditySliceHelper::validity(array: vortex_array::ArrayView<'_, V>) -> vortex_error::VortexResult - impl vortex_array::ValidityVTable for vortex_array::ValidityVTableFromChild where V: vortex_array::ValidityChild + vortex_array::VTable pub fn vortex_array::ValidityVTableFromChild::validity(array: vortex_array::ArrayView<'_, V>) -> vortex_error::VortexResult @@ -22330,12 +22314,6 @@ impl vortex_array::ValidityVTable for vortex_array:: pub fn vortex_array::ValidityVTableFromChildSliceHelper::validity(array: vortex_array::ArrayView<'_, V>) -> vortex_error::VortexResult -pub struct vortex_array::ValidityVTableFromValiditySliceHelper - -impl vortex_array::ValidityVTable for vortex_array::ValidityVTableFromValiditySliceHelper where ::ArrayData: vortex_array::ValiditySliceHelper - -pub fn vortex_array::ValidityVTableFromValiditySliceHelper::validity(array: vortex_array::ArrayView<'_, V>) -> vortex_error::VortexResult - pub static vortex_array::LEGACY_SESSION: std::sync::lazy_lock::LazyLock pub trait vortex_array::ArrayEq @@ -24808,12 +24786,6 @@ pub fn vortex_array::ValidityChildSliceHelper::sliced_child_array(&self) -> vort pub fn vortex_array::ValidityChildSliceHelper::unsliced_child_and_slice(&self) -> (&vortex_array::ArrayRef, usize, usize) -pub trait vortex_array::ValiditySliceHelper - -pub fn vortex_array::ValiditySliceHelper::sliced_validity(&self) -> vortex_error::VortexResult - -pub fn vortex_array::ValiditySliceHelper::unsliced_validity_and_slice(&self) -> (&vortex_array::validity::Validity, usize, usize) - pub trait vortex_array::ValidityVTable pub fn vortex_array::ValidityVTable::validity(array: vortex_array::ArrayView<'_, V>) -> vortex_error::VortexResult @@ -24898,10 +24870,6 @@ impl vortex_array::ValidityVTable for vortex_array:: pub fn vortex_array::ValidityVTableFromChildSliceHelper::validity(array: vortex_array::ArrayView<'_, V>) -> vortex_error::VortexResult -impl vortex_array::ValidityVTable for vortex_array::ValidityVTableFromValiditySliceHelper where ::ArrayData: vortex_array::ValiditySliceHelper - -pub fn vortex_array::ValidityVTableFromValiditySliceHelper::validity(array: vortex_array::ArrayView<'_, V>) -> vortex_error::VortexResult - impl vortex_array::ValidityVTable for vortex_array::ValidityVTableFromChild where V: vortex_array::ValidityChild + vortex_array::VTable pub fn vortex_array::ValidityVTableFromChild::validity(array: vortex_array::ArrayView<'_, V>) -> vortex_error::VortexResult diff --git a/vortex-array/src/array/vtable/validity.rs b/vortex-array/src/array/vtable/validity.rs index 350ef5d3ab7..9f2b2989e52 100644 --- a/vortex-array/src/array/vtable/validity.rs +++ b/vortex-array/src/array/vtable/validity.rs @@ -17,28 +17,6 @@ pub trait ValidityVTable { fn validity(array: ArrayView<'_, V>) -> VortexResult; } -/// An implementation of the [`ValidityVTable`] for arrays that hold an unsliced validity -/// and a slice into it. -pub struct ValidityVTableFromValiditySliceHelper; - -pub trait ValiditySliceHelper { - fn unsliced_validity_and_slice(&self) -> (&Validity, usize, usize); - - fn sliced_validity(&self) -> VortexResult { - let (unsliced_validity, start, stop) = self.unsliced_validity_and_slice(); - unsliced_validity.slice(start..stop) - } -} - -impl ValidityVTable for ValidityVTableFromValiditySliceHelper -where - V::ArrayData: ValiditySliceHelper, -{ - fn validity(array: ArrayView<'_, V>) -> VortexResult { - array.data().sliced_validity() - } -} - /// An implementation of the [`ValidityVTable`] for arrays that delegate validity entirely /// to a child array. pub struct ValidityVTableFromChild; diff --git a/vortex-cuda/benches/zstd_cuda.rs b/vortex-cuda/benches/zstd_cuda.rs index d3e49d03c48..a5d285d2d2a 100644 --- a/vortex-cuda/benches/zstd_cuda.rs +++ b/vortex-cuda/benches/zstd_cuda.rs @@ -13,6 +13,7 @@ use cudarc::driver::DevicePtrMut; use cudarc::driver::sys::CUevent_flags; use futures::executor::block_on; use vortex::array::arrays::VarBinViewArray; +use vortex::array::vtable::child_to_validity; use vortex::encodings::zstd::Zstd; use vortex::encodings::zstd::ZstdArray; use vortex::encodings::zstd::ZstdDataParts; @@ -140,7 +141,13 @@ fn benchmark_zstd_cuda_decompress(c: &mut Criterion) { for _ in 0..iters { let ZstdDataParts { frames, metadata, .. - } = zstd_array.clone().into_data().into_parts(); + } = { + let validity = child_to_validity( + &zstd_array.as_ref().slots()[0], + zstd_array.dtype().nullability(), + ); + zstd_array.clone().into_data().into_parts(validity) + }; let exec = block_on(zstd_kernel_prepare(frames, &metadata, &mut cuda_ctx)) .vortex_expect("kernel setup failed"); let kernel_time = block_on(execute_zstd_kernel(exec, &mut cuda_ctx)) diff --git a/vortex-cuda/src/kernel/encodings/zstd.rs b/vortex-cuda/src/kernel/encodings/zstd.rs index d1b3b9b4423..77ade13a6b0 100644 --- a/vortex-cuda/src/kernel/encodings/zstd.rs +++ b/vortex-cuda/src/kernel/encodings/zstd.rs @@ -20,6 +20,7 @@ use vortex::array::arrays::varbinview::BinaryView; use vortex::array::arrays::varbinview::build_views::MAX_BUFFER_LEN; use vortex::array::buffer::BufferHandle; use vortex::array::buffer::DeviceBuffer; +use vortex::array::vtable::child_to_validity; use vortex::buffer::Alignment; use vortex::buffer::Buffer; use vortex::buffer::ByteBuffer; @@ -214,6 +215,7 @@ impl CudaExecute for ZstdExecutor { async fn decode_zstd(array: ZstdArray, ctx: &mut CudaExecutionCtx) -> VortexResult { let dtype = array.dtype().clone(); + let validity = child_to_validity(&array.as_ref().slots()[0], dtype.nullability()); let ZstdDataParts { frames, metadata, @@ -222,7 +224,7 @@ async fn decode_zstd(array: ZstdArray, ctx: &mut CudaExecutionCtx) -> VortexResu dictionary, slice_start, slice_stop, - } = array.into_data().into_parts(); + } = array.into_data().into_parts(validity); // nvCOMP doesn't support ZSTD dictionaries. if dictionary.is_some() { diff --git a/vortex/benches/single_encoding_throughput.rs b/vortex/benches/single_encoding_throughput.rs index 5e27475a90f..90053072bd1 100644 --- a/vortex/benches/single_encoding_throughput.rs +++ b/vortex/benches/single_encoding_throughput.rs @@ -329,9 +329,11 @@ fn bench_zstd_compress_u32(bencher: Bencher) { fn bench_zstd_decompress_u32(bencher: Bencher) { let (uint_array, ..) = setup_primitive_arrays(); let dtype = uint_array.dtype().clone(); + let validity = uint_array.validity().unwrap(); let compressed = Zstd::try_new( dtype, ZstdData::from_array(uint_array.into_array(), 3, 8192).unwrap(), + validity, ) .unwrap() .into_array(); @@ -414,9 +416,11 @@ fn bench_zstd_decompress_string(bencher: Bencher) { let varbinview_arr = VarBinViewArray::from_iter_str(gen_varbin_words(NUM_VALUES as usize, 0.00005)); let dtype = varbinview_arr.dtype().clone(); + let validity = varbinview_arr.validity().unwrap(); let compressed = Zstd::try_new( dtype, ZstdData::from_array(varbinview_arr.clone().into_array(), 3, 8192).unwrap(), + validity, ) .unwrap() .into_array();