Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions vortex-array/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ rand_distr = { workspace = true }
rstest = { workspace = true }
serde_json = { workspace = true }
serde_test = { workspace = true }
test-with = { workspace = true }
vortex-array = { path = ".", features = ["_test-harness", "table-display"] }

[[bench]]
Expand Down
301 changes: 301 additions & 0 deletions vortex-array/src/arrays/varbinview/build_views.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,44 @@ pub fn build_views<P: NativePType + AsPrimitive<usize>>(
) -> (Vec<ByteBuffer>, Buffer<BinaryView>) {
let mut views = BufferMut::<BinaryView>::with_capacity(lens.len());

// Common case: the whole decoded heap fits within a single buffer, so no rollover can occur
// (`bytes.len()` is the total decoded size and therefore an upper bound on every offset). This
// lets the hot loop drop the per-element rollover branch and construct reference views inline,
// avoiding the out-of-line `BinaryView::make_view` call for the common long-string case.
if bytes.len() <= max_buffer_len {
let data = bytes.as_slice();
let mut offset = 0usize;
// Write directly into the reserved spare capacity rather than `push_unchecked`. The latter
// advances the backing buffer's length on every call, which the optimizer cannot prove is
// loop-invariant, so it reloads and rewrites the output cursor through the stack each
// iteration. Writing into the spare slice keeps the cursor in a register and the length is
// set once after the loop.
let spare = views.spare_capacity_mut();
for (slot, &len) in spare.iter_mut().zip(lens) {
let len = len.as_();
let value = &data[offset..offset + len];
let view = if len > BinaryView::MAX_INLINED_SIZE {
let mut prefix = [0u8; 4];
prefix.copy_from_slice(&value[..4]);
BinaryView::new_ref(len.as_(), prefix, start_buf_index, offset.as_())
} else {
BinaryView::make_view(value, start_buf_index, offset.as_())
Copy link
Copy Markdown
Contributor

@0ax1 0ax1 May 29, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

offset.as_() can implicitly truncate?

};
slot.write(view);
offset += len;
}
// SAFETY: the loop initialized exactly `lens.len()` contiguous views (`spare` has at least
// `lens.len()` slots, and `zip` stops at the shorter operand).
unsafe { views.set_len(lens.len()) };

let buffers = if bytes.is_empty() {
Vec::new()
} else {
vec![bytes.freeze()]
};
return (buffers, views.freeze());
}

let mut buffers = Vec::new();
let mut buf_index = start_buf_index;

Expand Down Expand Up @@ -66,12 +104,275 @@ pub fn build_views<P: NativePType + AsPrimitive<usize>>(

#[cfg(test)]
mod tests {
use rstest::rstest;
use vortex_buffer::ByteBuffer;
use vortex_buffer::ByteBufferMut;

use crate::arrays::varbinview::BinaryView;
use crate::arrays::varbinview::build_views::MAX_BUFFER_LEN;
use crate::arrays::varbinview::build_views::build_views;

/// Concatenate `values` into a single byte heap and return it alongside the per-element lengths,
/// matching the `(bytes, lens)` inputs that `build_views` consumes.
fn flatten(values: &[&[u8]]) -> (ByteBufferMut, Vec<u32>) {
let mut bytes = ByteBufferMut::empty();
let mut lens = Vec::with_capacity(values.len());
for v in values {
bytes.extend_from_slice(v);
lens.push(u32::try_from(v.len()).unwrap());
}
(bytes, lens)
}

/// Reconstruct the logical value behind each view by dereferencing it through the output
/// buffers. The first buffer corresponds to `start_buf_index`, so buffer indices are rebased by
/// that amount. This is the core correctness invariant: regardless of which code path built the
/// views, every view must point back at its original bytes.
fn reconstruct(
buffers: &[ByteBuffer],
views: &[BinaryView],
start_buf_index: u32,
) -> Vec<Vec<u8>> {
views
.iter()
.map(|view| {
if view.is_inlined() {
view.as_inlined().value().to_vec()
} else {
let r = view.as_view();
let buf = &buffers[(r.buffer_index - start_buf_index) as usize];
buf[r.as_range()].to_vec()
}
})
.collect()
}

/// The single-buffer fast path (`bytes.len() <= max_buffer_len`) must reproduce every input
/// value exactly, emit a single output buffer holding the untouched heap, and reference only
/// `start_buf_index`. We cover a spread of value sets that mix inlined (<= 12 bytes) and
/// reference (> 12 bytes) lengths, including the 12/13 byte inline boundary, empty values, and a
/// fully-inlined set.
#[rstest]
#[case::mixed(&[b"a".as_slice(), b"this is a long reference value", b"short", b"another long value here!!"])]
#[case::inline_boundary(&[&[b'x'; 12] as &[u8], &[b'y'; 13], &[b'z'; 12], &[b'w'; 13]])]
#[case::all_inlined(&[b"".as_slice(), b"a", b"bb", b"ccc", b"dddddddddddd"])]
#[case::all_reference(&[&[b'a'; 100] as &[u8], &[b'b'; 50], &[b'c'; 4096]])]
#[case::empty_values_interleaved(&[b"".as_slice(), b"a long value that is referenced", b"", b"", b"trailing long reference value"])]
#[case::single_long(&[&[7u8; 1 << 16] as &[u8]])]
fn fast_path_roundtrip(#[case] values: &[&[u8]]) {
let (bytes, lens) = flatten(values);
let total = bytes.len();
let start_buf_index = 3;

// `max_buffer_len` strictly greater than the heap forces the single-buffer fast path.
let (buffers, views) = build_views(start_buf_index, total + 1, bytes, &lens);

assert_eq!(views.len(), values.len());
if total == 0 {
assert!(buffers.is_empty(), "empty heap must not allocate a buffer");
} else {
assert_eq!(buffers.len(), 1, "whole heap must stay in one buffer");
// The fast path freezes the input heap unchanged.
let concatenated: Vec<u8> = values.concat();
assert_eq!(buffers[0].as_slice(), concatenated.as_slice());
}
for view in views.iter() {
if !view.is_inlined() {
assert_eq!(view.as_view().buffer_index, start_buf_index);
}
}

let expected: Vec<Vec<u8>> = values.iter().map(|v| v.to_vec()).collect();
assert_eq!(reconstruct(&buffers, &views, start_buf_index), expected);
}

/// Offsets and sizes are written into the `u32` `Ref` fields via `as_` truncation, so we must
/// confirm they stay correct once the running offset grows well past the 16-bit range (i.e. is
/// not narrowed to a smaller width). A ~9 MiB heap pushes offsets above 2^23 while remaining far
/// below `MAX_BUFFER_LEN`; each value encodes its index in its first bytes so a misplaced offset
/// would reconstruct the wrong bytes.
#[test]
fn fast_path_large_offsets() {
const N: usize = 9000;
const LEN: usize = 1000;
// The final offset is (N - 1) * LEN, which must exceed 2^23 to be a meaningful check.
const _: () = assert!((N - 1) * LEN > (1 << 23));

let values: Vec<Vec<u8>> = (0..N)
.map(|i| {
let mut v = vec![0u8; LEN];
v[..4].copy_from_slice(&u32::try_from(i).unwrap().to_le_bytes());
v
})
.collect();
let refs: Vec<&[u8]> = values.iter().map(|v| v.as_slice()).collect();

let (bytes, lens) = flatten(&refs);
let total = bytes.len();

let (buffers, views) = build_views(0, total + 1, bytes, &lens);

assert_eq!(buffers.len(), 1);
// The recorded offset must equal the cumulative byte position, exactly, for every view.
for (i, view) in views.iter().enumerate() {
let r = view.as_view();
assert_eq!(r.offset as usize, i * LEN, "wrong offset for view {i}");
assert_eq!(r.size as usize, LEN);
}
assert_eq!(reconstruct(&buffers, &views, 0), values);
}

/// The fast path is taken when `bytes.len() <= max_buffer_len`, so equality at the boundary must
/// still produce a single buffer (not roll over to the slow path).
#[test]
fn fast_path_taken_at_exact_boundary() {
let (bytes, lens) =
flatten(&[b"this value is definitely long", b"and so is this one here"]);
let total = bytes.len();

let (buffers, views) = build_views(0, total, bytes, &lens);

assert_eq!(
buffers.len(),
1,
"len == max_buffer_len must stay on fast path"
);
assert_eq!(views.len(), 2);
}

/// For the same logical data, the fast path (single buffer) and the slow rollover path must
/// reconstruct identical values. Driving the slow path with a small `max_buffer_len` forces
/// buffer splitting while leaving the recovered values unchanged.
#[test]
fn fast_and_slow_paths_agree() {
let values: &[&[u8]] = &[
b"first long reference value",
b"tiny",
b"second long reference value!!",
b"third looooong reference value",
];
let expected: Vec<Vec<u8>> = values.iter().map(|v| v.to_vec()).collect();

let (fast_bytes, lens) = flatten(values);
let total = fast_bytes.len();
let (fast_buffers, fast_views) = build_views(0, total + 1, fast_bytes, &lens);
assert_eq!(fast_buffers.len(), 1);
assert_eq!(reconstruct(&fast_buffers, &fast_views, 0), expected);

// Force the rollover path: a small cap (>= the longest value) that the total heap exceeds.
let longest = values.iter().map(|v| v.len()).max().unwrap();
let (slow_bytes, _) = flatten(values);
let (slow_buffers, slow_views) = build_views(0, longest, slow_bytes, &lens);
assert!(
slow_buffers.len() > 1,
"small cap should split into many buffers"
);
assert_eq!(reconstruct(&slow_buffers, &slow_views, 0), expected);

// Same logical contents regardless of how the heap was partitioned.
assert_eq!(
reconstruct(&fast_buffers, &fast_views, 0),
reconstruct(&slow_buffers, &slow_views, 0)
);
}

/// Empty input must yield no buffers and no views, exercising the `bytes.is_empty()` branch.
#[test]
fn fast_path_empty_input() {
let lens: Vec<u32> = Vec::new();
let (buffers, views) = build_views(0, 1024, ByteBufferMut::empty(), &lens);
assert!(buffers.is_empty());
assert!(views.is_empty());
}

/// The fast path must produce views byte-identical to the value-inspecting `make_view`, which is
/// what the slow path uses. This pins the inline/reference decision and field layout.
#[test]
fn fast_path_matches_make_view() {
let values: &[&[u8]] = &[b"inline", b"this is a long reference value", b""];
let (bytes, lens) = flatten(values);
let total = bytes.len();
let (_buffers, views) = build_views(0, total + 1, bytes, &lens);

let expected = [
BinaryView::make_view(b"inline", 0, 0),
BinaryView::make_view(b"this is a long reference value", 0, 6),
BinaryView::make_view(b"", 0, 36),
];
assert_eq!(views.as_slice(), &expected);
}

// TODO(someone): ideally CI would run this in release mode as well, since debug builds make the
// ~2.25 GiB allocation and fill loop substantially slower.
/// Slow regression for the single-buffer fast-path guard. The fast path is only valid when the
/// whole heap fits in one buffer (`bytes.len() <= max_buffer_len`); once the heap exceeds
/// [`MAX_BUFFER_LEN`] (`i32::MAX`, ~2.0 GiB) `build_views` must roll the heap into multiple
/// buffers, resetting the per-buffer offset, so no view references an offset past the
/// `i32`-bounded buffer limit.
///
/// We build a heap just past `i32::MAX` and assert it rolls over into more than one buffer, that
/// no buffer exceeds `MAX_BUFFER_LEN`, and that values straddling the rollover boundary (where
/// the second buffer's offsets restart from zero) reconstruct exactly. If the guard regressed and
/// the fast path swallowed the whole heap, it would emit a single >2 GiB buffer with offsets past
/// `i32::MAX`, which the buffer-count and buffer-size assertions catch.
///
/// Allocates ~2.25 GiB, so it is gated to CI and skipped when `VORTEX_SKIP_SLOW_TESTS` is set:
///
/// ```text
/// CI=1 cargo test --release -p vortex-array build_views_offsets_overflow
/// ```
///
/// [`MAX_BUFFER_LEN`]: super::MAX_BUFFER_LEN
#[test_with::env(CI)]
#[test_with::no_env(VORTEX_SKIP_SLOW_TESTS)]
fn build_views_offsets_overflow_i32() {
const STRING_LEN: usize = 64 * 1024;
// Comfortably past MAX_BUFFER_LEN (`i32::MAX` ~= 2.0 GiB) so the heap must roll over.
const TOTAL_BYTES: usize = (1usize << 31) + (256 << 20); // ~2.25 GiB
const N: usize = TOTAL_BYTES / STRING_LEN;

// Each value's first 8 bytes encode its row index, so a misrouted offset is detectable.
let nth_string = |i: usize| {
let mut s = vec![b'x'; STRING_LEN];
s[..8].copy_from_slice(&(i as u64).to_le_bytes());
s
};

let mut bytes = ByteBufferMut::with_capacity(N * STRING_LEN);
let mut value = vec![b'x'; STRING_LEN];
for i in 0..N {
value[..8].copy_from_slice(&(i as u64).to_le_bytes());
bytes.extend_from_slice(&value);
}

let lens = vec![u32::try_from(STRING_LEN).unwrap(); N];
let (buffers, views) = build_views(0, MAX_BUFFER_LEN, bytes, &lens);

assert_eq!(views.len(), N);
assert!(
buffers.len() >= 2,
"heap exceeding MAX_BUFFER_LEN must roll over into multiple buffers, got {}",
buffers.len()
);
for (i, b) in buffers.iter().enumerate() {
assert!(
b.len() <= MAX_BUFFER_LEN,
"buffer {i} of {} bytes exceeds MAX_BUFFER_LEN",
b.len()
);
}

// The boundary row is the first whose offset would cross MAX_BUFFER_LEN on the fast path.
let boundary = MAX_BUFFER_LEN / STRING_LEN;
for i in [0, boundary - 1, boundary, boundary + 1, N / 2, N - 1] {
let view = &views[i];
let r = view.as_view();
let got = &buffers[r.buffer_index as usize][r.as_range()];
assert_eq!(got, nth_string(i).as_slice(), "value mismatch at row {i}");
assert_eq!(r.size as usize, STRING_LEN);
}
}

#[test]
fn test_to_canonical_large() {
// We are testing generating views for raw data that should look like
Expand Down
49 changes: 49 additions & 0 deletions vortex-array/src/arrays/varbinview/view.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,27 @@ impl BinaryView {
Self { le_bytes: [0; 16] }
}

/// Create a reference view directly from its components, without inspecting the value.
///
/// `size` must be greater than [`MAX_INLINED_SIZE`], and `prefix` must hold the first four
/// bytes of the value. This is the fast path for bulk view construction where the caller has
/// already established that the value is too long to inline; it assembles the 16-byte view as a
/// single `u128` so the compiler can emit one wide store per view.
///
/// [`MAX_INLINED_SIZE`]: Self::MAX_INLINED_SIZE
#[inline]
pub fn new_ref(size: u32, prefix: [u8; 4], buffer_index: u32, offset: u32) -> Self {
debug_assert!(size as usize > Self::MAX_INLINED_SIZE);
// Matches the little-endian field order of `Ref` (size, prefix, buffer_index, offset),
// consistent with `le_bytes` and the `From<u128>`/`as_u128` representation.
Self::from(
u128::from(size)
| (u128::from(u32::from_le_bytes(prefix)) << 32)
| (u128::from(buffer_index) << 64)
| (u128::from(offset) << 96),
)
}

/// Create a new inlined binary view
///
/// # Panics
Expand Down Expand Up @@ -278,3 +299,31 @@ impl fmt::Debug for BinaryView {
s.finish()
}
}

#[cfg(test)]
#[expect(
clippy::cast_possible_truncation,
reason = "test values are bounded well within the target types"
)]
mod tests {
use super::*;

#[test]
fn new_ref_matches_make_view() {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any more test cases you can come up with? Is the change sufficiently covered?

// `new_ref` assembles the reference view as a `u128`; it must be byte-identical to the
// value-inspecting `make_view` for any value longer than the inline limit.
for &len in &[13usize, 20, 255, 4096] {
let value: Vec<u8> = (0..len).map(|i| (i % 251) as u8).collect();
let prefix = [value[0], value[1], value[2], value[3]];
let made = BinaryView::make_view(&value, 7, 42);
let built = BinaryView::new_ref(len as u32, prefix, 7, 42);
assert_eq!(made.as_u128(), built.as_u128(), "mismatch at len {len}");
assert!(!built.is_inlined());
let r = built.as_view();
assert_eq!(r.size, len as u32);
assert_eq!(r.prefix, prefix);
assert_eq!(r.buffer_index, 7);
assert_eq!(r.offset, 42);
}
}
}
Loading