From 62bc4a42c0fb6cc8f5b38ee4ae5a5a70bc0c7076 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Florian=20M=C3=BCller?= Date: Mon, 18 May 2026 08:55:02 +0200 Subject: [PATCH 1/2] fix: return error instead of capacity overflow panic in generate_series --- datafusion/functions-nested/src/range.rs | 40 ++++++++++++++----- .../test_files/array/array_range.slt | 14 +++++++ 2 files changed, 43 insertions(+), 11 deletions(-) diff --git a/datafusion/functions-nested/src/range.rs b/datafusion/functions-nested/src/range.rs index 72450cb56be6b..c44c78e51e7ee 100644 --- a/datafusion/functions-nested/src/range.rs +++ b/datafusion/functions-nested/src/range.rs @@ -329,7 +329,7 @@ impl Range { step, self.include_upper_bound, &mut values, - ); + )?; offsets.push(values.len() as i32); valid.append_non_null(); } @@ -538,6 +538,25 @@ fn retrieve_range_args( Some((start, stop, step)) } +/// Upper bound on the number of `i64` elements a range may materialize at +/// once. `Vec::reserve` panics with "capacity overflow" when the requested +/// allocation exceeds `isize::MAX` bytes, so cap the count at the same limit +/// and return an error rather than panicking on user-supplied SQL. +const MAX_RANGE_ELEMENTS: u64 = isize::MAX as u64 / size_of::() as u64; + +/// Reserve space for `count` more elements, returning an error when the +/// resulting allocation would exceed what `Vec` can hold. +fn reserve_range_capacity(values: &mut Vec, count: u64) -> Result<()> { + if count > MAX_RANGE_ELEMENTS { + return exec_err!( + "Range too large to materialize: would produce {count} elements \ + (max {MAX_RANGE_ELEMENTS})" + ); + } + values.reserve(count as usize); + Ok(()) +} + /// Generate integer range values directly into the provided buffer. #[inline] fn generate_range_values( @@ -546,9 +565,9 @@ fn generate_range_values( step: i64, include_upper: bool, values: &mut Vec, -) { +) -> Result<()> { if !include_upper && start == stop { - return; + return Ok(()); } if step > 0 { @@ -558,11 +577,10 @@ fn generate_range_values( stop.saturating_sub(1) }; if start > limit { - return; + return Ok(()); } - let count = - (start.abs_diff(limit) / step.unsigned_abs()).saturating_add(1) as usize; - values.reserve(count); + let count = (start.abs_diff(limit) / step.unsigned_abs()).saturating_add(1); + reserve_range_capacity(values, count)?; let mut current = start; while current <= limit { values.push(current); @@ -578,11 +596,10 @@ fn generate_range_values( stop.saturating_add(1) }; if start < limit { - return; + return Ok(()); } - let count = - (start.abs_diff(limit) / step.unsigned_abs()).saturating_add(1) as usize; - values.reserve(count); + let count = (start.abs_diff(limit) / step.unsigned_abs()).saturating_add(1); + reserve_range_capacity(values, count)?; let mut current = start; while current >= limit { values.push(current); @@ -592,6 +609,7 @@ fn generate_range_values( } } } + Ok(()) } fn parse_tz(tz: &Option<&str>) -> Result { diff --git a/datafusion/sqllogictest/test_files/array/array_range.slt b/datafusion/sqllogictest/test_files/array/array_range.slt index a55ec4a657790..b2a39634ef2a8 100644 --- a/datafusion/sqllogictest/test_files/array/array_range.slt +++ b/datafusion/sqllogictest/test_files/array/array_range.slt @@ -38,6 +38,13 @@ select range(5), query error DataFusion error: Execution error: step can't be 0 for function range\(start \[, stop, step\]\) select range(1, 1, 0); +# Range too large to materialize should error instead of panicking +query error DataFusion error: Execution error: Range too large to materialize +select range(0, 9223372036854775807); + +query error DataFusion error: Execution error: Range too large to materialize +select range(9223372036854775807); + # Test range with big steps query ???? select @@ -352,6 +359,13 @@ select generate_series(1, 1, 0); query error DataFusion error: Execution error: Interval argument to generate_series must not be 0 select generate_series(TIMESTAMP '2000-01-02', TIMESTAMP '2000-01-01', INTERVAL '0' MINUTE); +# Range too large to materialize should error instead of panicking +query error DataFusion error: Execution error: Range too large to materialize +select generate_series(0, 9223372036854775807); + +query error DataFusion error: Execution error: Range too large to materialize +select generate_series(-9223372036854775808, 9223372036854775807); + # Test generate_series with big steps query ???? select From 0b7a8642db4c48a52f0cc664e627276f5a6049ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Florian=20M=C3=BCller?= Date: Mon, 18 May 2026 12:18:31 +0200 Subject: [PATCH 2/2] refactor: use try_reserve instead of custom size check --- datafusion/functions-nested/src/range.rs | 27 +++++++++++------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/datafusion/functions-nested/src/range.rs b/datafusion/functions-nested/src/range.rs index c44c78e51e7ee..65d9244ecdd4c 100644 --- a/datafusion/functions-nested/src/range.rs +++ b/datafusion/functions-nested/src/range.rs @@ -538,23 +538,20 @@ fn retrieve_range_args( Some((start, stop, step)) } -/// Upper bound on the number of `i64` elements a range may materialize at -/// once. `Vec::reserve` panics with "capacity overflow" when the requested -/// allocation exceeds `isize::MAX` bytes, so cap the count at the same limit -/// and return an error rather than panicking on user-supplied SQL. -const MAX_RANGE_ELEMENTS: u64 = isize::MAX as u64 / size_of::() as u64; - /// Reserve space for `count` more elements, returning an error when the -/// resulting allocation would exceed what `Vec` can hold. +/// allocation would overflow `Vec`'s capacity limit or the allocator +/// rejects it, rather than panicking on user-supplied SQL. fn reserve_range_capacity(values: &mut Vec, count: u64) -> Result<()> { - if count > MAX_RANGE_ELEMENTS { - return exec_err!( - "Range too large to materialize: would produce {count} elements \ - (max {MAX_RANGE_ELEMENTS})" - ); - } - values.reserve(count as usize); - Ok(()) + let count_usize = usize::try_from(count).map_err(|_| { + exec_datafusion_err!( + "Range too large to materialize: would produce {count} elements" + ) + })?; + values.try_reserve(count_usize).map_err(|e| { + exec_datafusion_err!( + "Range too large to materialize: failed to allocate {count} elements: {e}" + ) + }) } /// Generate integer range values directly into the provided buffer.