diff --git a/datafusion/functions-nested/src/range.rs b/datafusion/functions-nested/src/range.rs index 72450cb56be6b..65d9244ecdd4c 100644 --- a/datafusion/functions-nested/src/range.rs +++ b/datafusion/functions-nested/src/range.rs @@ -329,7 +329,7 @@ impl Range { step, self.include_upper_bound, &mut values, - ); + )?; offsets.push(values.len() as i32); valid.append_non_null(); } @@ -538,6 +538,22 @@ fn retrieve_range_args( Some((start, stop, step)) } +/// Reserve space for `count` more elements, returning an error when the +/// allocation would overflow `Vec`'s capacity limit or the allocator +/// rejects it, rather than panicking on user-supplied SQL. +fn reserve_range_capacity(values: &mut Vec, count: u64) -> Result<()> { + let count_usize = usize::try_from(count).map_err(|_| { + exec_datafusion_err!( + "Range too large to materialize: would produce {count} elements" + ) + })?; + values.try_reserve(count_usize).map_err(|e| { + exec_datafusion_err!( + "Range too large to materialize: failed to allocate {count} elements: {e}" + ) + }) +} + /// Generate integer range values directly into the provided buffer. #[inline] fn generate_range_values( @@ -546,9 +562,9 @@ fn generate_range_values( step: i64, include_upper: bool, values: &mut Vec, -) { +) -> Result<()> { if !include_upper && start == stop { - return; + return Ok(()); } if step > 0 { @@ -558,11 +574,10 @@ fn generate_range_values( stop.saturating_sub(1) }; if start > limit { - return; + return Ok(()); } - let count = - (start.abs_diff(limit) / step.unsigned_abs()).saturating_add(1) as usize; - values.reserve(count); + let count = (start.abs_diff(limit) / step.unsigned_abs()).saturating_add(1); + reserve_range_capacity(values, count)?; let mut current = start; while current <= limit { values.push(current); @@ -578,11 +593,10 @@ fn generate_range_values( stop.saturating_add(1) }; if start < limit { - return; + return Ok(()); } - let count = - (start.abs_diff(limit) / step.unsigned_abs()).saturating_add(1) as usize; - values.reserve(count); + let count = (start.abs_diff(limit) / step.unsigned_abs()).saturating_add(1); + reserve_range_capacity(values, count)?; let mut current = start; while current >= limit { values.push(current); @@ -592,6 +606,7 @@ fn generate_range_values( } } } + Ok(()) } fn parse_tz(tz: &Option<&str>) -> Result { diff --git a/datafusion/sqllogictest/test_files/array/array_range.slt b/datafusion/sqllogictest/test_files/array/array_range.slt index a55ec4a657790..b2a39634ef2a8 100644 --- a/datafusion/sqllogictest/test_files/array/array_range.slt +++ b/datafusion/sqllogictest/test_files/array/array_range.slt @@ -38,6 +38,13 @@ select range(5), query error DataFusion error: Execution error: step can't be 0 for function range\(start \[, stop, step\]\) select range(1, 1, 0); +# Range too large to materialize should error instead of panicking +query error DataFusion error: Execution error: Range too large to materialize +select range(0, 9223372036854775807); + +query error DataFusion error: Execution error: Range too large to materialize +select range(9223372036854775807); + # Test range with big steps query ???? select @@ -352,6 +359,13 @@ select generate_series(1, 1, 0); query error DataFusion error: Execution error: Interval argument to generate_series must not be 0 select generate_series(TIMESTAMP '2000-01-02', TIMESTAMP '2000-01-01', INTERVAL '0' MINUTE); +# Range too large to materialize should error instead of panicking +query error DataFusion error: Execution error: Range too large to materialize +select generate_series(0, 9223372036854775807); + +query error DataFusion error: Execution error: Range too large to materialize +select generate_series(-9223372036854775808, 9223372036854775807); + # Test generate_series with big steps query ???? select