From 026e5f35c5c9d7e559ccb476ce6da48fdf70a2a3 Mon Sep 17 00:00:00 2001 From: Sergei Grebnov Date: Wed, 1 Apr 2026 14:01:34 +0300 Subject: [PATCH] fix(unparser): make BigQueryDialect more robust --- datafusion/sql/src/unparser/dialect.rs | 49 +++++++++++++++++++++++ datafusion/sql/src/unparser/expr.rs | 54 ++++++++++++++++++++++++++ 2 files changed, 103 insertions(+) diff --git a/datafusion/sql/src/unparser/dialect.rs b/datafusion/sql/src/unparser/dialect.rs index fa9d40c6c78a8..3ed6514b07f80 100644 --- a/datafusion/sql/src/unparser/dialect.rs +++ b/datafusion/sql/src/unparser/dialect.rs @@ -657,6 +657,55 @@ impl Dialect for BigQueryDialect { true } + fn supports_column_alias_in_table_alias(&self) -> bool { + false + } + + fn float64_ast_dtype(&self) -> ast::DataType { + ast::DataType::Float64 + } + + fn utf8_cast_dtype(&self) -> ast::DataType { + ast::DataType::String(None) + } + + fn large_utf8_cast_dtype(&self) -> ast::DataType { + ast::DataType::String(None) + } + + fn int64_cast_dtype(&self) -> ast::DataType { + ast::DataType::Int64 + } + + fn timestamp_cast_dtype( + &self, + _time_unit: &TimeUnit, + _tz: &Option>, + ) -> ast::DataType { + ast::DataType::Timestamp(None, TimezoneInfo::None) + } + + fn date_field_extract_style(&self) -> DateFieldExtractStyle { + DateFieldExtractStyle::Extract + } + + fn interval_style(&self) -> IntervalStyle { + IntervalStyle::SQLStandard + } + + fn scalar_function_to_sql_overrides( + &self, + unparser: &Unparser, + func_name: &str, + args: &[Expr], + ) -> Result> { + if func_name == "date_part" { + return date_part_to_sql(unparser, self.date_field_extract_style(), args); + } + + Ok(None) + } + fn timestamp_with_tz_to_string(&self, dt: DateTime, unit: TimeUnit) -> String { // https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/data-types#timestamp_type let format = match unit { diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index a4a231d9857c2..eef21983b4ac8 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -3469,4 +3469,58 @@ mod tests { } Ok(()) } + + #[test] + fn test_bigquery_dialect_overrides() -> Result<()> { + let bigquery_dialect: Arc = Arc::new(BigQueryDialect::new()); + let unparser = Unparser::new(bigquery_dialect.as_ref()); + + // date_field_extract_style: EXTRACT instead of date_part + let expr = Expr::ScalarFunction(ScalarFunction { + func: Arc::new(ScalarUDF::new_from_impl( + datafusion_functions::datetime::date_part::DatePartFunc::new(), + )), + args: vec![lit("YEAR"), col("date_col")], + }); + let actual = format!("{}", unparser.expr_to_sql(&expr)?); + assert_eq!(actual, "EXTRACT(YEAR FROM `date_col`)"); + + // interval_style: SQL standard instead of PostgresVerbose + let expr = interval_year_month_lit("3 months"); + let actual = format!("{}", unparser.expr_to_sql(&expr)?); + assert_eq!(actual, "INTERVAL '3' MONTH"); + + // float64_ast_dtype: FLOAT64 instead of DOUBLE + let expr = cast(col("a"), DataType::Float64); + let actual = format!("{}", unparser.expr_to_sql(&expr)?); + assert_eq!(actual, "CAST(`a` AS FLOAT64)"); + + // supports_column_alias_in_table_alias: false + assert!(!bigquery_dialect.supports_column_alias_in_table_alias()); + + // utf8_cast_dtype: STRING instead of VARCHAR + let expr = cast(col("a"), DataType::Utf8); + let actual = format!("{}", unparser.expr_to_sql(&expr)?); + assert_eq!(actual, "CAST(`a` AS STRING)"); + + // large_utf8_cast_dtype: STRING instead of TEXT + let expr = cast(col("a"), DataType::LargeUtf8); + let actual = format!("{}", unparser.expr_to_sql(&expr)?); + assert_eq!(actual, "CAST(`a` AS STRING)"); + + // int64_cast_dtype: INT64 instead of BIGINT + let expr = cast(col("a"), DataType::Int64); + let actual = format!("{}", unparser.expr_to_sql(&expr)?); + assert_eq!(actual, "CAST(`a` AS INT64)"); + + // timestamp_cast_dtype: TIMESTAMP (no WITH TIME ZONE) + let expr = cast( + col("a"), + DataType::Timestamp(TimeUnit::Microsecond, Some("+00:00".into())), + ); + let actual = format!("{}", unparser.expr_to_sql(&expr)?); + assert_eq!(actual, "CAST(`a` AS TIMESTAMP)"); + + Ok(()) + } }