Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
192 changes: 172 additions & 20 deletions datafusion/common/src/nested_struct.rs
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,15 @@ pub fn cast_column(
(DataType::LargeListView(_), DataType::LargeListView(target_inner)) => {
cast_list_view_column::<i64>(source_col, target_inner, cast_options)
}
(
DataType::FixedSizeList(_, _),
DataType::FixedSizeList(target_inner, target_size),
) => cast_fixed_size_list_column(
source_col,
target_inner,
*target_size,
cast_options,
),
(
DataType::Dictionary(source_key_type, _),
DataType::Dictionary(target_key_type, target_value_type),
Expand All @@ -208,15 +217,8 @@ fn cast_list_column<O: arrow::array::OffsetSizeTrait>(
target_inner_field: &FieldRef,
cast_options: &CastOptions,
) -> Result<ArrayRef> {
let source_list = source_col
.as_any()
.downcast_ref::<GenericListArray<O>>()
.ok_or_else(|| {
crate::error::DataFusionError::Plan(format!(
"Expected list array but got {}",
source_col.data_type()
))
})?;
let source_list =
downcast_list_array::<GenericListArray<O>>(source_col, "list array")?;

let cast_values = cast_column(
source_list.values(),
Expand All @@ -238,15 +240,8 @@ fn cast_list_view_column<O: arrow::array::OffsetSizeTrait>(
target_inner_field: &FieldRef,
cast_options: &CastOptions,
) -> Result<ArrayRef> {
let source_list = source_col
.as_any()
.downcast_ref::<GenericListViewArray<O>>()
.ok_or_else(|| {
crate::error::DataFusionError::Plan(format!(
"Expected list view array but got {}",
source_col.data_type()
))
})?;
let source_list =
downcast_list_array::<GenericListViewArray<O>>(source_col, "list view array")?;

let cast_values = cast_column(
source_list.values(),
Expand All @@ -264,6 +259,53 @@ fn cast_list_view_column<O: arrow::array::OffsetSizeTrait>(
Ok(Arc::new(result))
}

fn cast_fixed_size_list_column(
source_col: &ArrayRef,
target_inner_field: &FieldRef,
target_size: i32,
cast_options: &CastOptions,
) -> Result<ArrayRef> {
use arrow::array::FixedSizeListArray;

let source_list =
downcast_list_array::<FixedSizeListArray>(source_col, "fixed size list array")?;

let source_size = source_list.value_length();
if source_size != target_size {
return _plan_err!(
"Cannot cast FixedSizeList column with size {} to size {}",
source_size,
target_size
);
}

let cast_values = cast_column(
source_list.values(),
target_inner_field.data_type(),
cast_options,
)?;

let result = FixedSizeListArray::new(
Arc::clone(target_inner_field),
target_size,
cast_values,
source_list.nulls().cloned(),
);
Ok(Arc::new(result))
}

fn downcast_list_array<'a, A: Array + 'static>(
source_col: &'a ArrayRef,
expected: &str,
) -> Result<&'a A> {
source_col.as_any().downcast_ref::<A>().ok_or_else(|| {
crate::error::DataFusionError::Plan(format!(
"Expected {expected} but got {}",
source_col.data_type()
))
})
}

fn cast_dictionary_column(
source_col: &ArrayRef,
source_key_type: &DataType,
Expand Down Expand Up @@ -431,6 +473,21 @@ pub fn validate_data_type_compatibility(
| (DataType::LargeListView(s), DataType::LargeListView(t)) => {
validate_field_compatibility(s, t)?;
}
(
DataType::FixedSizeList(s, source_size),
DataType::FixedSizeList(t, target_size),
) => {
// FixedSizeList sizes must match before nested field checks.
if source_size != target_size {
return _plan_err!(
"Cannot cast FixedSizeList field '{}' with size {} to size {}",
field_name,
source_size,
target_size
);
}
validate_field_compatibility(s, t)?;
}
(DataType::Dictionary(s_key, s_val), DataType::Dictionary(t_key, t_val)) => {
if !can_cast_types(s_key, t_key) {
return _plan_err!(
Expand Down Expand Up @@ -460,7 +517,7 @@ pub fn validate_data_type_compatibility(
/// name-based nested struct casting logic, rather than Arrow's standard cast.
///
/// This is the case when both types are struct types, or both are the same
/// container type (List, LargeList, ListView, LargeListView, Dictionary) wrapping
/// container type (List, LargeList, ListView, LargeListView, FixedSizeList, Dictionary) wrapping
/// types that recursively contain structs.
///
/// Use this predicate at both planning time (to decide whether to apply struct
Expand All @@ -475,7 +532,8 @@ pub fn requires_nested_struct_cast(
(DataType::List(s), DataType::List(t))
| (DataType::LargeList(s), DataType::LargeList(t))
| (DataType::ListView(s), DataType::ListView(t))
| (DataType::LargeListView(s), DataType::LargeListView(t)) => {
| (DataType::LargeListView(s), DataType::LargeListView(t))
| (DataType::FixedSizeList(s, _), DataType::FixedSizeList(t, _)) => {
requires_nested_struct_cast(s.data_type(), t.data_type())
}
(DataType::Dictionary(_, s_val), DataType::Dictionary(_, t_val)) => {
Expand Down Expand Up @@ -1336,4 +1394,98 @@ mod tests {
&DataType::List(arc_field("item", DataType::Int64)),
));
}

#[test]
fn test_cast_fixed_size_list_struct_incompatible_type_fails() {
// Build a FixedSizeList<Struct{a: String}> and try to cast to
// FixedSizeList<Struct{a: Int32}> (incompatible types).
let struct_arr = StructArray::from(vec![(
arc_field("a", DataType::Utf8),
Arc::new(StringArray::from(vec!["x", "y"])) as ArrayRef,
)]);

let source_field =
arc_field("item", struct_type(vec![field("a", DataType::Utf8)]));
use arrow::array::FixedSizeListArray;
let source_list =
FixedSizeListArray::new(source_field, 1, Arc::new(struct_arr), None);
let source_col: ArrayRef = Arc::new(source_list);

let target_field =
arc_field("item", struct_type(vec![field("a", DataType::Int32)]));
let target_type = DataType::FixedSizeList(target_field, 1);

let result = cast_column(&source_col, &target_type, &DEFAULT_CAST_OPTIONS);
assert!(result.is_err());
assert!(result.unwrap_err().to_string().contains("Cannot cast"));
}

#[test]
fn test_cast_fixed_size_list_struct_non_nullable_field_fails() {
// Build a FixedSizeList<Struct{a: Int32}> and try to cast to
// FixedSizeList<Struct{a: Int32, b: Int32 non-nullable}> (should fail).
let struct_arr = StructArray::from(vec![(
arc_field("a", DataType::Int32),
Arc::new(Int32Array::from(vec![1])) as ArrayRef,
)]);

let source_field =
arc_field("item", struct_type(vec![field("a", DataType::Int32)]));
use arrow::array::FixedSizeListArray;
let source_list =
FixedSizeListArray::new(source_field, 1, Arc::new(struct_arr), None);
let source_col: ArrayRef = Arc::new(source_list);

let target_field = arc_field(
"item",
struct_type(vec![
field("a", DataType::Int32),
non_null_field("b", DataType::Int32),
]),
);
let target_type = DataType::FixedSizeList(target_field, 1);

let result = cast_column(&source_col, &target_type, &DEFAULT_CAST_OPTIONS);
assert!(result.is_err());
let error = result.unwrap_err().to_string();
assert!(
error.contains("cannot fill with NULL") || error.contains("non-nullable")
);
}

#[test]
fn test_cast_fixed_size_list_size_mismatch_fails() {
use arrow::array::FixedSizeListArray;

let values = Arc::new(Int32Array::from(vec![1, 2])) as ArrayRef;
let source_list =
FixedSizeListArray::new(arc_field("item", DataType::Int32), 1, values, None);
let source_col: ArrayRef = Arc::new(source_list);

let target_type = DataType::FixedSizeList(arc_field("item", DataType::Int32), 2);

let result = cast_column(&source_col, &target_type, &DEFAULT_CAST_OPTIONS);
assert!(result.is_err());
assert_contains!(
result.unwrap_err().to_string(),
"Cannot cast FixedSizeList column with size 1 to size 2"
);
}

#[test]
fn test_requires_nested_struct_cast_fixed_size_list() {
let s1 = struct_type(vec![field("a", DataType::Int32)]);
let s2 = struct_type(vec![field("a", DataType::Int64)]);

assert!(requires_nested_struct_cast(
&DataType::FixedSizeList(arc_field("item", s1.clone()), 2),
&DataType::FixedSizeList(arc_field("item", s2.clone()), 2),
));

// FixedSizeList with non-struct inner types should return false
assert!(!requires_nested_struct_cast(
&DataType::FixedSizeList(arc_field("item", DataType::Int32), 2),
&DataType::FixedSizeList(arc_field("item", DataType::Int64), 2),
));
}
}
Loading
Loading