From 7522675a576a4050b6086b72ef52fbbbdd7825e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Fri, 3 Apr 2026 16:07:39 +0200 Subject: [PATCH] Compare length + prefix together in ArrowBytesViewMap find For strings > 12 bytes, compare the first 8 bytes of the StringView (length + 4-byte prefix) as a single u64 instead of only comparing the 4-byte prefix. This rejects length mismatches earlier and is simpler. Co-Authored-By: Claude Opus 4.6 (1M context) --- datafusion/physical-expr-common/src/binary_view_map.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/datafusion/physical-expr-common/src/binary_view_map.rs b/datafusion/physical-expr-common/src/binary_view_map.rs index abc3e28f82627..c372161f9bc0f 100644 --- a/datafusion/physical-expr-common/src/binary_view_map.rs +++ b/datafusion/physical-expr-common/src/binary_view_map.rs @@ -310,10 +310,8 @@ where return header.view == view_u128; } - // For larger strings: first compare the 4-byte prefix - let stored_prefix = (header.view >> 32) as u32; - let input_prefix = (view_u128 >> 32) as u32; - if stored_prefix != input_prefix { + // For larger strings: compare length + 4-byte prefix together + if (header.view as u64) != (view_u128 as u64) { return false; }