Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions vortex-buffer/benches/vortex_bitbuffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,17 @@ fn bitwise_and_arrow_buffer(bencher: Bencher, length: usize) {
.bench_refs(|(a, b)| &a.0 & &b.0);
}

/// Owned-LHS AND: the left operand is a fresh, uniquely-owned `BitBuffer` each iteration, so
/// `bitwise_binary_op_lhs_owned` takes the in-place (zero-allocation) fast path. Compare against
/// `bitwise_and_vortex_buffer` (reference-LHS, which always allocates a result buffer).
#[divan::bench(args = INPUT_SIZE)]
fn bitand_owned_lhs_vortex_buffer(bencher: Bencher, length: usize) {
let b = BitBuffer::from_iter((0..length).map(|i| i % 3 == 0));
bencher
.with_inputs(|| BitBuffer::from_iter((0..length).map(|i| i % 2 == 0)))
.bench_values(|a| a & &b);
}

#[divan::bench(args = INPUT_SIZE)]
fn bitwise_or_vortex_buffer(bencher: Bencher, length: usize) {
let a = BitBuffer::from_iter((0..length).map(|i| i % 2 == 0));
Expand Down
99 changes: 57 additions & 42 deletions vortex-buffer/src/bit/buf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ use crate::bit::collect_bool_word;
use crate::bit::count_ones::count_ones;
use crate::bit::get_bit_unchecked;
use crate::bit::ops::bitwise_binary_op;
use crate::bit::ops::bitwise_binary_op_lhs_owned;
use crate::bit::ops::bitwise_unary_op;
use crate::bit::ops::bitwise_unary_op_copy;
use crate::bit::select::bit_select;
use crate::buffer;

Expand Down Expand Up @@ -59,6 +61,29 @@ impl PartialEq for BitBuffer {
return false;
}

if self.len == 0 {
return true;
}

// Fast path: both byte-aligned and same length — direct byte comparison.
if self.offset == 0 && other.offset == 0 {
let full_bytes = self.len / 8;
let self_bytes = &self.buffer.as_slice()[..full_bytes];
let other_bytes = &other.buffer.as_slice()[..full_bytes];
if self_bytes != other_bytes {
return false;
}
// Compare remaining bits in the last partial byte.
let rem = self.len % 8;
if rem != 0 {
let mask = (1u8 << rem) - 1;
let a = self.buffer.as_slice()[full_bytes] & mask;
let b = other.buffer.as_slice()[full_bytes] & mask;
return a == b;
}
return true;
}

self.chunks()
.iter_padded()
.zip(other.chunks().iter_padded())
Expand Down Expand Up @@ -315,6 +340,7 @@ impl BitBuffer {
}

/// Get the number of set bits in the buffer.
#[inline]
pub fn true_count(&self) -> usize {
count_ones(self.buffer.as_slice(), self.offset, self.len)
}
Expand All @@ -330,6 +356,7 @@ impl BitBuffer {
}

/// Get the number of unset bits in the buffer.
#[inline]
pub fn false_count(&self) -> usize {
self.len - self.true_count()
}
Expand All @@ -353,12 +380,14 @@ impl BitBuffer {
pub fn sliced(&self) -> Self {
if self.offset.is_multiple_of(8) {
return Self::new(
self.buffer.slice(self.offset / 8..self.len.div_ceil(8)),
self.buffer
.slice(self.offset / 8..(self.offset + self.len).div_ceil(8)),
self.len,
);
}

bitwise_unary_op(self.clone(), |a| a)
// Allocate directly rather than clone + identity op which would fail try_into_mut.
bitwise_unary_op_copy(self, |a| a)
}
}

Expand Down Expand Up @@ -402,7 +431,7 @@ impl BitOr for BitBuffer {

#[inline]
fn bitor(self, rhs: Self) -> Self::Output {
BitOr::bitor(&self, &rhs)
bitwise_binary_op_lhs_owned(self, &rhs, |a, b| a | b)
}
}

Expand All @@ -420,7 +449,7 @@ impl BitOr<&BitBuffer> for BitBuffer {

#[inline]
fn bitor(self, rhs: &BitBuffer) -> Self::Output {
(&self).bitor(rhs)
bitwise_binary_op_lhs_owned(self, rhs, |a, b| a | b)
}
}

Expand All @@ -447,7 +476,7 @@ impl BitAnd<&BitBuffer> for BitBuffer {

#[inline]
fn bitand(self, rhs: &BitBuffer) -> Self::Output {
(&self).bitand(rhs)
bitwise_binary_op_lhs_owned(self, rhs, |a, b| a & b)
}
}

Expand All @@ -456,7 +485,7 @@ impl BitAnd<BitBuffer> for BitBuffer {

#[inline]
fn bitand(self, rhs: BitBuffer) -> Self::Output {
(&self).bitand(&rhs)
bitwise_binary_op_lhs_owned(self, &rhs, |a, b| a & b)
}
}

Expand All @@ -465,7 +494,9 @@ impl Not for &BitBuffer {

#[inline]
fn not(self) -> Self::Output {
!self.clone()
// Allocate directly rather than clone+try_into_mut, which always fails
// since the clone shares the Arc with the original reference.
bitwise_unary_op_copy(self, |a| !a)
}
}

Expand All @@ -492,7 +523,7 @@ impl BitXor<&BitBuffer> for BitBuffer {

#[inline]
fn bitxor(self, rhs: &BitBuffer) -> Self::Output {
(&self).bitxor(rhs)
bitwise_binary_op_lhs_owned(self, rhs, |a, b| a ^ b)
}
}

Expand All @@ -505,6 +536,11 @@ impl BitBuffer {
bitwise_binary_op(self, rhs, |a, b| a & !b)
}

/// Owned variant of [`bitand_not`](Self::bitand_not) that can mutate in-place when possible.
pub fn into_bitand_not(self, rhs: &BitBuffer) -> BitBuffer {
bitwise_binary_op_lhs_owned(self, rhs, |a, b| a & !b)
}

/// Iterate through bits in a buffer.
///
/// # Arguments
Expand All @@ -524,44 +560,23 @@ impl BitBuffer {
return;
}

let is_bit_set = |byte: u8, bit_idx: usize| (byte & (1 << bit_idx)) != 0;
let bit_offset = self.offset % 8;
let mut buffer_ptr = unsafe { self.buffer.as_ptr().add(self.offset / 8) };
let mut callback_idx = 0;

// Handle incomplete first byte.
if bit_offset > 0 {
let bits_in_first_byte = (8 - bit_offset).min(total_bits);
let byte = unsafe { *buffer_ptr };

for bit_idx in 0..bits_in_first_byte {
f(callback_idx, is_bit_set(byte, bit_offset + bit_idx));
callback_idx += 1;
}

buffer_ptr = unsafe { buffer_ptr.add(1) };
}

// Process complete bytes.
let complete_bytes = (total_bits - callback_idx) / 8;
for _ in 0..complete_bytes {
let byte = unsafe { *buffer_ptr };
// Process in 64-bit chunks for better ILP and fewer loop iterations.
let chunks = self.chunks();
let chunks_count = total_bits / 64;
let remainder = total_bits % 64;

for bit_idx in 0..8 {
f(callback_idx, is_bit_set(byte, bit_idx));
callback_idx += 1;
for (chunk_idx, chunk) in chunks.iter().enumerate() {
let base = chunk_idx * 64;
for bit_idx in 0..64 {
f(base + bit_idx, (chunk >> bit_idx) & 1 == 1);
}
buffer_ptr = unsafe { buffer_ptr.add(1) };
}

// Handle remaining bits at the end.
let remaining_bits = total_bits - callback_idx;
if remaining_bits > 0 {
let byte = unsafe { *buffer_ptr };

for bit_idx in 0..remaining_bits {
f(callback_idx, is_bit_set(byte, bit_idx));
callback_idx += 1;
if remainder != 0 {
let rem_chunk = chunks.remainder_bits();
let base = chunks_count * 64;
for bit_idx in 0..remainder {
f(base + bit_idx, (rem_chunk >> bit_idx) & 1 == 1);
}
}
}
Expand Down
90 changes: 67 additions & 23 deletions vortex-buffer/src/bit/buf_mut.rs
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ impl BitBufferMut {
/// Set the bit at `index` to the given boolean value.
///
/// This operation is checked so if `index` exceeds the buffer length, this will panic.
#[inline]
pub fn set_to(&mut self, index: usize, value: bool) {
if value {
self.set(index);
Expand All @@ -288,6 +289,7 @@ impl BitBufferMut {
/// # Safety
///
/// The caller must ensure that `index` does not exceed the largest bit index in the backing buffer.
#[inline]
pub unsafe fn set_to_unchecked(&mut self, index: usize, value: bool) {
if value {
// SAFETY: checked by caller
Expand All @@ -301,6 +303,7 @@ impl BitBufferMut {
/// Set a position to `true`.
///
/// This operation is checked so if `index` exceeds the buffer length, this will panic.
#[inline]
pub fn set(&mut self, index: usize) {
assert!(index < self.len, "index {index} exceeds len {}", self.len);

Expand Down Expand Up @@ -373,12 +376,21 @@ impl BitBufferMut {
return;
}

let new_len_bytes = (self.offset + len).div_ceil(8);
let end_bit = self.offset + len;
let new_len_bytes = end_bit.div_ceil(8);
self.buffer.truncate(new_len_bytes);
self.len = len;

// Clear stale bits in the final partial byte so the "bits beyond len are zero" invariant
// holds. `append_false` (and `append_buffer`) rely on it to avoid a read-modify-write.
if !end_bit.is_multiple_of(8) {
let keep = (1u8 << (end_bit % 8)) - 1;
self.buffer.as_mut_slice()[new_len_bytes - 1] &= keep;
}
}

/// Append a new boolean into the bit buffer, incrementing the length.
#[inline]
pub fn append(&mut self, value: bool) {
if value {
self.append_true()
Expand All @@ -388,6 +400,7 @@ impl BitBufferMut {
}

/// Append a new true value to the buffer.
#[inline]
pub fn append_true(&mut self) {
let bit_pos = self.offset + self.len;
let byte_pos = bit_pos / 8;
Expand All @@ -404,21 +417,18 @@ impl BitBufferMut {
}

/// Append a new false value to the buffer.
#[inline]
pub fn append_false(&mut self) {
let bit_pos = self.offset + self.len;
let byte_pos = bit_pos / 8;
let bit_in_byte = bit_pos % 8;

// Ensure buffer has enough bytes
// Ensure buffer has enough bytes (pushed as 0x00, so bit is already unset).
if byte_pos >= self.buffer.len() {
self.buffer.push(0u8);
}

// Bit is already 0 if we just pushed a new byte, otherwise ensure it's unset
if bit_in_byte != 0 {
self.buffer.as_mut_slice()[byte_pos] &= !(1 << bit_in_byte);
}

// The bit is guaranteed to be 0: new bytes are zero-initialized, and
// existing bytes have this bit unset (it's beyond the current length).
self.len += 1;
}

Expand Down Expand Up @@ -487,24 +497,39 @@ impl BitBufferMut {
let end_bit_pos = start_bit_pos + bit_len;
let required_bytes = end_bit_pos.div_ceil(8);

// Ensure buffer has enough bytes
// Ensure buffer has enough bytes, zero-initialized for OR-based writes.
if required_bytes > self.buffer.len() {
self.buffer.push_n(0x00, required_bytes - self.buffer.len());
}

// Use bitvec for efficient bit copying
let self_slice = self
.buffer
.as_mut_slice()
.view_bits_mut::<bitvec::prelude::Lsb0>();
let other_slice = buffer
.inner()
.as_slice()
.view_bits::<bitvec::prelude::Lsb0>();

// Copy from source buffer (accounting for its offset) to destination (accounting for our offset + len)
let source_range = buffer.offset()..buffer.offset() + bit_len;
self_slice[start_bit_pos..end_bit_pos].copy_from_bitslice(&other_slice[source_range]);
let dst_bit_offset = start_bit_pos % 8;
let src_bit_offset = buffer.offset();

if dst_bit_offset == 0 && src_bit_offset == 0 {
// Both byte-aligned: use memcpy for full bytes, then mask the tail.
let dst_byte = start_bit_pos / 8;
let src_bytes = buffer.inner().as_slice();
let full_bytes = bit_len / 8;
self.buffer.as_mut_slice()[dst_byte..dst_byte + full_bytes]
.copy_from_slice(&src_bytes[..full_bytes]);
let rem = bit_len % 8;
if rem != 0 {
let mask = (1u8 << rem) - 1;
self.buffer.as_mut_slice()[dst_byte + full_bytes] |= src_bytes[full_bytes] & mask;
}
} else {
// Use bitvec for unaligned bit copying.
let self_slice = self
.buffer
.as_mut_slice()
.view_bits_mut::<bitvec::prelude::Lsb0>();
let other_slice = buffer
.inner()
.as_slice()
.view_bits::<bitvec::prelude::Lsb0>();
let source_range = src_bit_offset..src_bit_offset + bit_len;
self_slice[start_bit_pos..end_bit_pos].copy_from_bitslice(&other_slice[source_range]);
}

self.len += bit_len;
}
Expand Down Expand Up @@ -611,7 +636,8 @@ impl FromIterator<bool> for BitBufferMut {
}
}

// Append the remaining items (as we do not know how many more there are).
// Append any remaining items one at a time, as we do not know how many more there are.
// (`append` is already a single branch + bit set, see `append_true`/`append_false`.)
for v in iter {
buf.append(v);
}
Expand Down Expand Up @@ -659,6 +685,24 @@ mod tests {
assert!(bools.value(9));
}

#[test]
fn append_false_after_truncate_reads_back_false() {
// `truncate` leaves stale bits in the final partial byte; a subsequent `append_false`
// must still read back as false. Regression test for the `append_false` fast path.
let mut bools = BitBufferMut::new_set(16);
bools.truncate(12);
bools.append_false();
bools.append_true();

let bools = bools.freeze();
assert_eq!(bools.len(), 14);
assert!(
!bools.value(12),
"appended false must read back false after truncate"
);
assert!(bools.value(13));
}

#[test]
fn test_reserve_ensures_len_plus_additional() {
// This test documents the fix for the bug where reserve was incorrectly
Expand Down
Loading
Loading