Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions datafusion-cli/tests/cli_integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -414,14 +414,12 @@ fn test_backtrace_output(#[case] query: &str) {
let output = cmd.output().expect("Failed to execute command");
let stdout = String::from_utf8_lossy(&output.stdout);
let stderr = String::from_utf8_lossy(&output.stderr);
let combined_output = format!("{}{}", stdout, stderr);
let combined_output = format!("{stdout}{stderr}");

// Assert that the output includes literal 'backtrace'
assert!(
combined_output.to_lowercase().contains("backtrace"),
"Expected output to contain 'backtrace', but got stdout: '{}' stderr: '{}'",
stdout,
stderr
"Expected output to contain 'backtrace', but got stdout: '{stdout}' stderr: '{stderr}'"
);
}

Expand Down
1 change: 0 additions & 1 deletion datafusion/common/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1274,7 +1274,6 @@ mod test {
// To pass the test the environment variable RUST_BACKTRACE should be set to 1 to enforce backtrace
#[cfg(feature = "backtrace")]
#[test]
#[expect(clippy::unnecessary_literal_unwrap)]
fn test_enabled_backtrace() {
match std::env::var("RUST_BACKTRACE") {
Ok(val) if val == "1" => {}
Expand Down
6 changes: 6 additions & 0 deletions datafusion/common/src/hash_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,15 @@

use arrow::array::types::{IntervalDayTime, IntervalMonthDayNano};
use arrow::array::*;
#[cfg(not(feature = "force_hash_collisions"))]
use arrow::compute::take;
use arrow::datatypes::*;
#[cfg(not(feature = "force_hash_collisions"))]
use arrow::{downcast_dictionary_array, downcast_primitive_array};
use foldhash::fast::FixedState;
#[cfg(not(feature = "force_hash_collisions"))]
use itertools::Itertools;
#[cfg(not(feature = "force_hash_collisions"))]
use std::collections::HashMap;
use std::hash::{BuildHasher, Hash, Hasher};

Expand Down Expand Up @@ -198,6 +201,7 @@ hash_float_value!((half::f16, u16), (f32, u32), (f64, u64));
/// Create a `SeedableRandomState` whose per-hasher seed incorporates `seed`.
/// This folds the previous hash into the hasher's initial state so only the
/// new value needs to pass through the hash function — same cost as `hash_one`.
#[cfg(not(feature = "force_hash_collisions"))]
#[inline]
fn seeded_state(seed: u64) -> foldhash::fast::SeedableRandomState {
foldhash::fast::SeedableRandomState::with_seed(
Expand Down Expand Up @@ -303,6 +307,7 @@ fn hash_array<T>(
/// HAS_NULLS: do we have to check null in the inner loop
/// HAS_BUFFERS: if true, array has external buffers; if false, all strings are inlined/ less then 12 bytes
/// REHASH: if true, combining with existing hash, otherwise initializing
#[cfg(not(feature = "force_hash_collisions"))]
#[inline(never)]
fn hash_string_view_array_inner<
T: ByteViewType,
Expand Down Expand Up @@ -429,6 +434,7 @@ fn hash_generic_byte_view_array<T: ByteViewType>(
/// - `HAS_NULL_KEYS`: Whether to check for null dictionary keys
/// - `HAS_NULL_VALUES`: Whether to check for null dictionary values
/// - `MULTI_COL`: Whether to combine with existing hash (true) or initialize (false)
#[cfg(not(feature = "force_hash_collisions"))]
#[inline(never)]
fn hash_dictionary_inner<
K: ArrowDictionaryKeyType,
Expand Down
2 changes: 2 additions & 0 deletions datafusion/common/src/pruning.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ pub trait PruningStatistics {
/// container, return `None` (the default).
///
/// Note: the returned array must contain [`Self::num_containers`] rows
#[allow(clippy::allow_attributes, clippy::mutable_key_type)] // ScalarValue has interior mutability but is intentionally used as hash key
fn contained(
&self,
column: &Column,
Expand Down Expand Up @@ -526,6 +527,7 @@ impl PruningStatistics for CompositePruningStatistics {

#[cfg(test)]
#[expect(deprecated)]
#[allow(clippy::allow_attributes, clippy::mutable_key_type)] // ScalarValue has interior mutability but is intentionally used as hash key
mod tests {
use crate::{
ColumnStatistics,
Expand Down
3 changes: 3 additions & 0 deletions datafusion/common/src/scalar/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4601,6 +4601,7 @@ impl ScalarValue {
/// Estimates [size](Self::size) of [`HashSet`] in bytes.
///
/// Includes the size of the [`HashSet`] container itself.
#[allow(clippy::allow_attributes, clippy::mutable_key_type)] // ScalarValue has interior mutability but is intentionally used as hash key
pub fn size_of_hashset<S>(set: &HashSet<Self, S>) -> usize {
size_of_val(set)
+ (size_of::<ScalarValue>() * set.capacity())
Expand Down Expand Up @@ -7263,6 +7264,8 @@ mod tests {
size_of::<Vec<ScalarValue>>() + (9 * size_of::<ScalarValue>()) + sv_size,
);

#[allow(clippy::allow_attributes, clippy::mutable_key_type)]
// ScalarValue has interior mutability but is intentionally used as hash key
let mut s = HashSet::with_capacity(0);
// do NOT clone `sv` here because this may shrink the vector capacity
s.insert(v.pop().unwrap());
Expand Down
1 change: 1 addition & 0 deletions datafusion/core/src/physical_planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2086,6 +2086,7 @@ fn get_physical_expr_pair(
/// A vector of unqualified filter expressions that can be passed to the TableProvider for execution.
/// Returns an empty vector if no applicable filters are found.
///
#[allow(clippy::allow_attributes, clippy::mutable_key_type)] // Expr contains Arc with interior mutability but is intentionally used as hash key
fn extract_dml_filters(
input: &Arc<LogicalPlan>,
target: &TableReference,
Expand Down
1 change: 1 addition & 0 deletions datafusion/datasource/src/file_groups.rs
Original file line number Diff line number Diff line change
Expand Up @@ -488,6 +488,7 @@ impl FileGroup {
///
/// Note: May return fewer groups than `max_target_partitions` when the
/// number of unique partition values is less than the target.
#[allow(clippy::allow_attributes, clippy::mutable_key_type)] // ScalarValue has interior mutability but is intentionally used as hash key
pub fn group_by_partition_values(
self,
max_target_partitions: usize,
Expand Down
2 changes: 1 addition & 1 deletion datafusion/execution/src/memory_pool/arrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ impl arrow_buffer::MemoryReservation for MemoryReservation {
impl arrow_buffer::MemoryPool for ArrowMemoryPool {
fn reserve(&self, size: usize) -> Box<dyn arrow_buffer::MemoryReservation> {
let consumer = self.consumer.clone_with_new_id();
let mut reservation = consumer.register(&self.inner);
let reservation = consumer.register(&self.inner);
reservation.grow(size);

Box::new(reservation)
Expand Down
2 changes: 2 additions & 0 deletions datafusion/expr/src/logical_plan/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2123,6 +2123,8 @@ pub fn wrap_projection_for_join_if_necessary(
.into_iter()
.map(Expr::Column)
.collect::<Vec<_>>();
#[allow(clippy::allow_attributes, clippy::mutable_key_type)]
// Expr contains Arc with interior mutability but is intentionally used as hash key
let join_key_items = alias_join_keys
.iter()
.flat_map(|expr| expr.try_as_col().is_none().then_some(expr))
Expand Down
1 change: 1 addition & 0 deletions datafusion/functions-aggregate/src/median.rs
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,7 @@ impl<T: ArrowNumericType> Accumulator for MedianAccumulator<T> {
size_of_val(self) + self.all_values.capacity() * size_of::<T::Native>()
}

#[allow(clippy::allow_attributes, clippy::mutable_key_type)] // ScalarValue has interior mutability but is intentionally used as hash key
fn retract_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
let mut to_remove: HashMap<ScalarValue, usize> = HashMap::new();

Expand Down
1 change: 1 addition & 0 deletions datafusion/functions-aggregate/src/percentile_cont.rs
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,7 @@ where
size_of_val(self) + self.all_values.capacity() * size_of::<T::Native>()
}

#[allow(clippy::allow_attributes, clippy::mutable_key_type)] // ScalarValue has interior mutability but is intentionally used as hash key
fn retract_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
let mut to_remove: HashMap<ScalarValue, usize> = HashMap::new();
for i in 0..values[0].len() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ fn group_expr_to_bitmap_index(group_expr: &[Expr]) -> Result<HashMap<&Expr, usiz
.collect::<HashMap<_, _>>())
}

#[allow(clippy::allow_attributes, clippy::mutable_key_type)] // Expr contains Arc with interior mutability but is intentionally used as hash key
fn replace_grouping_exprs(
input: Arc<LogicalPlan>,
schema: &DFSchema,
Expand Down Expand Up @@ -158,6 +159,7 @@ fn contains_grouping_function(exprs: &[Expr]) -> bool {
}

/// Validate that the arguments to the grouping function are in the group by clause.
#[allow(clippy::allow_attributes, clippy::mutable_key_type)] // Expr contains Arc with interior mutability but is intentionally used as hash key
fn validate_args(
function: &AggregateFunction,
group_by_expr: &HashMap<&Expr, usize>,
Expand All @@ -178,6 +180,7 @@ fn validate_args(
}
}

#[allow(clippy::allow_attributes, clippy::mutable_key_type)] // Expr contains Arc with interior mutability but is intentionally used as hash key
fn grouping_function_on_id(
function: &AggregateFunction,
group_by_expr: &HashMap<&Expr, usize>,
Expand Down
4 changes: 4 additions & 0 deletions datafusion/optimizer/src/scalar_subquery_to_join.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,11 @@ impl OptimizerRule for ScalarSubqueryToJoin {
}

let mut all_subqueries = vec![];
#[allow(clippy::allow_attributes, clippy::mutable_key_type)]
// Expr contains Arc with interior mutability but is intentionally used as hash key
let mut expr_to_rewrite_expr_map = HashMap::new();
#[allow(clippy::allow_attributes, clippy::mutable_key_type)]
// Expr contains Arc with interior mutability but is intentionally used as hash key
let mut subquery_to_expr_map = HashMap::new();
for expr in projection.expr.iter() {
let (subqueries, rewrite_exprs) =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1784,6 +1784,8 @@ impl TreeNodeRewriter for Simplifier<'_> {
}) if are_inlist_and_eq(left.as_ref(), right.as_ref()) => {
let lhs = to_inlist(*left).unwrap();
let rhs = to_inlist(*right).unwrap();
#[allow(clippy::allow_attributes, clippy::mutable_key_type)]
// Expr contains Arc with interior mutability but is intentionally used as hash key
let mut seen: HashSet<Expr> = HashSet::new();
let list = lhs
.list
Expand Down Expand Up @@ -2174,6 +2176,7 @@ impl<'a> StringScalar<'a> {
}
}

#[allow(clippy::allow_attributes, clippy::mutable_key_type)] // Expr contains Arc with interior mutability but is intentionally used as hash key
fn has_common_conjunction(lhs: &Expr, rhs: &Expr) -> bool {
let lhs_set: HashSet<&Expr> = iter_conjunction(lhs).collect();
iter_conjunction(rhs).any(|e| lhs_set.contains(&e) && !e.is_volatile())
Expand Down Expand Up @@ -2258,6 +2261,7 @@ fn to_inlist(expr: Expr) -> Option<InList> {

/// Return the union of two inlist expressions
/// maintaining the order of the elements in the two lists
#[allow(clippy::allow_attributes, clippy::mutable_key_type)] // Expr contains Arc with interior mutability but is intentionally used as hash key
fn inlist_union(mut l1: InList, l2: InList, negated: bool) -> Result<Expr> {
// extend the list in l1 with the elements in l2 that are not already in l1
let l1_items: HashSet<_> = l1.list.iter().collect();
Expand All @@ -2276,6 +2280,7 @@ fn inlist_union(mut l1: InList, l2: InList, negated: bool) -> Result<Expr> {

/// Return the intersection of two inlist expressions
/// maintaining the order of the elements in the two lists
#[allow(clippy::allow_attributes, clippy::mutable_key_type)] // Expr contains Arc with interior mutability but is intentionally used as hash key
fn inlist_intersection(mut l1: InList, l2: &InList, negated: bool) -> Result<Expr> {
let l2_items = l2.list.iter().collect::<HashSet<_>>();

Expand All @@ -2292,6 +2297,7 @@ fn inlist_intersection(mut l1: InList, l2: &InList, negated: bool) -> Result<Exp

/// Return the all items in l1 that are not in l2
/// maintaining the order of the elements in the two lists
#[allow(clippy::allow_attributes, clippy::mutable_key_type)] // Expr contains Arc with interior mutability but is intentionally used as hash key
fn inlist_except(mut l1: InList, l2: &InList) -> Result<Expr> {
let l2_items = l2.list.iter().collect::<HashSet<_>>();

Expand Down
2 changes: 2 additions & 0 deletions datafusion/physical-expr/src/utils/guarantee.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ impl LiteralGuarantee {
/// Create a new instance of the guarantee if the provided operator is
/// supported. Returns None otherwise. See [`LiteralGuarantee::analyze`] to
/// create these structures from an predicate (boolean expression).
#[allow(clippy::allow_attributes, clippy::mutable_key_type)] // ScalarValue has interior mutability but is intentionally used as hash key
fn new<'a>(
column_name: impl Into<String>,
guarantee: Guarantee,
Expand Down Expand Up @@ -309,6 +310,7 @@ impl<'a> GuaranteeBuilder<'a> {
/// * `AND (a IN (1,2,3))`: a is in (1, 2, or 3)
/// * `AND (a != 1 OR a != 2 OR a != 3)`: a is not in (1, 2, or 3)
/// * `AND (a NOT IN (1,2,3))`: a is not in (1, 2, or 3)
#[allow(clippy::allow_attributes, clippy::mutable_key_type)] // ScalarValue has interior mutability but is intentionally used as hash key
fn aggregate_multi_conjunct(
mut self,
col: &'a crate::expressions::Column,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ use datafusion_execution::memory_pool::proxy::VecAllocExt;
use datafusion_expr::EmitTo;
use half::f16;
use hashbrown::hash_table::HashTable;
#[cfg(not(feature = "force_hash_collisions"))]
use std::hash::BuildHasher;
use std::mem::size_of;
use std::sync::Arc;
Expand Down
2 changes: 2 additions & 0 deletions datafusion/pruning/src/pruning_predicate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2158,6 +2158,7 @@ mod tests {
}

/// Add contained information.
#[allow(clippy::allow_attributes, clippy::mutable_key_type)] // ScalarValue has interior mutability but is intentionally used as hash key
pub fn with_contained(
mut self,
values: impl IntoIterator<Item = ScalarValue>,
Expand All @@ -2172,6 +2173,7 @@ mod tests {
}

/// get any contained information for the specified values
#[allow(clippy::allow_attributes, clippy::mutable_key_type)] // ScalarValue has interior mutability but is intentionally used as hash key
fn contained(&self, find_values: &HashSet<ScalarValue>) -> Option<BooleanArray> {
// find the one with the matching values
self.contained
Expand Down
1 change: 1 addition & 0 deletions datafusion/spark/src/function/map/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ pub fn map_from_keys_values_offsets_nulls(
)?))
}

#[allow(clippy::allow_attributes, clippy::mutable_key_type)] // ScalarValue has interior mutability but is intentionally used as hash key
fn map_deduplicate_keys(
flat_keys: &ArrayRef,
flat_values: &ArrayRef,
Expand Down
3 changes: 3 additions & 0 deletions datafusion/sql/src/select.rs
Original file line number Diff line number Diff line change
Expand Up @@ -592,9 +592,12 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
} else {
let mut unnest_options = UnnestOptions::new().with_preserve_nulls(false);

#[allow(clippy::allow_attributes, clippy::mutable_key_type)]
// Expr contains Arc with interior mutability but is intentionally used as hash key
let mut projection_exprs = match &aggr_expr_using_columns {
Some(exprs) => (*exprs).clone(),
None => {
#[allow(clippy::allow_attributes, clippy::mutable_key_type)]
let mut columns = HashSet::new();
for expr in &aggr_expr {
expr.apply(|expr| {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ pub async fn from_project_rel(
// For WindowFunctions, we need to wrap them in a Window relation. If there are duplicates,
// we can do the window'ing only once, then the project will duplicate the result.
// Order here doesn't matter since LPB::window_plan sorts the expressions.
#[allow(clippy::allow_attributes, clippy::mutable_key_type)]
// Expr contains Arc with interior mutability but is intentionally used as hash key
let mut window_exprs: HashSet<Expr> = HashSet::new();
for expr in &p.expressions {
let e = consumer
Expand Down
Loading