From 10ef3974a086d2b8a6dcaea65927be78a3199b8c Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Fri, 27 Feb 2026 18:41:34 +0100 Subject: [PATCH 01/10] Add snapshot tests of existing formatting --- Cargo.lock | 1 + datafusion/common/src/types/logical.rs | 128 +++++++++++++++++++++++ datafusion/common/src/types/native.rs | 83 +++++++++++++++ datafusion/expr-common/Cargo.toml | 3 + datafusion/expr-common/src/signature.rs | 133 +++++++++++++++++++++++- datafusion/expr/src/utils.rs | 108 +++++++++++++++++++ 6 files changed, 455 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index daf9e166c9896..2655c923f49f0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2176,6 +2176,7 @@ dependencies = [ "arrow", "datafusion-common", "indexmap 2.13.0", + "insta", "itertools 0.14.0", "paste", ] diff --git a/datafusion/common/src/types/logical.rs b/datafusion/common/src/types/logical.rs index 674b1a41204d1..9257238cd41a9 100644 --- a/datafusion/common/src/types/logical.rs +++ b/datafusion/common/src/types/logical.rs @@ -132,3 +132,131 @@ impl Hash for dyn LogicalType { self.signature().hash(state); } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::types::{ + LogicalField, LogicalFields, logical_boolean, logical_date, logical_float32, + logical_float64, logical_int32, logical_int64, logical_null, logical_string, + }; + use arrow::datatypes::{DataType, Field, Fields}; + use insta::assert_snapshot; + + #[test] + fn test_logical_type_display_simple() { + assert_snapshot!(logical_null(), @"LogicalType(Native(Null), Null)"); + assert_snapshot!(logical_boolean(), @"LogicalType(Native(Boolean), Boolean)"); + assert_snapshot!(logical_int32(), @"LogicalType(Native(Int32), Int32)"); + assert_snapshot!(logical_int64(), @"LogicalType(Native(Int64), Int64)"); + assert_snapshot!(logical_float32(), @"LogicalType(Native(Float32), Float32)"); + assert_snapshot!(logical_float64(), @"LogicalType(Native(Float64), Float64)"); + assert_snapshot!(logical_string(), @"LogicalType(Native(String), String)"); + assert_snapshot!(logical_date(), @"LogicalType(Native(Date), Date)"); + } + + #[test] + fn test_logical_type_display_list() { + let list_type: Arc = Arc::new(NativeType::List(Arc::new( + LogicalField::from(&Field::new("item", DataType::Int32, true)), + ))); + // NOTE: this is extremely verbose and hard to read. + // Ideally this would just say something like "List(Int32)". + assert_snapshot!(list_type, @r#"LogicalType(Native(List(LogicalField { name: "item", logical_type: LogicalType(Native(Int32), Int32), nullable: true })), List(LogicalField { name: "item", logical_type: LogicalType(Native(Int32), Int32), nullable: true }))"#); + } + + #[test] + fn test_logical_type_display_struct() { + let struct_type: Arc = + Arc::new(NativeType::Struct(LogicalFields::from(&Fields::from( + vec![ + Field::new("x", DataType::Float64, false), + Field::new("y", DataType::Float64, false), + ], + )))); + // NOTE: this is extremely verbose and hard to read. + // Ideally this would just say something like "Struct(x: Float64, y: Float64)". + assert_snapshot!(struct_type, @r#"LogicalType(Native(Struct(LogicalFields([LogicalField { name: "x", logical_type: LogicalType(Native(Float64), Float64), nullable: false }, LogicalField { name: "y", logical_type: LogicalType(Native(Float64), Float64), nullable: false }]))), Struct(LogicalFields([LogicalField { name: "x", logical_type: LogicalType(Native(Float64), Float64), nullable: false }, LogicalField { name: "y", logical_type: LogicalType(Native(Float64), Float64), nullable: false }])))"#); + } + + #[test] + fn test_logical_type_display_fixed_size_list() { + let fsl_type: Arc = Arc::new(NativeType::FixedSizeList( + Arc::new(LogicalField::from(&Field::new( + "item", + DataType::Float32, + false, + ))), + 3, + )); + assert_snapshot!(fsl_type, @r#"LogicalType(Native(FixedSizeList(LogicalField { name: "item", logical_type: LogicalType(Native(Float32), Float32), nullable: false }, 3)), FixedSizeList(LogicalField { name: "item", logical_type: LogicalType(Native(Float32), Float32), nullable: false }, 3))"#); + } + + #[test] + fn test_logical_type_display_map() { + let map_type: Arc = Arc::new(NativeType::Map(Arc::new( + LogicalField::from(&Field::new("entries", DataType::Utf8, false)), + ))); + assert_snapshot!(map_type, @r#"LogicalType(Native(Map(LogicalField { name: "entries", logical_type: LogicalType(Native(String), String), nullable: false })), Map(LogicalField { name: "entries", logical_type: LogicalType(Native(String), String), nullable: false }))"#); + } + + #[test] + fn test_logical_type_display_union() { + use arrow::datatypes::{UnionFields, UnionMode}; + + let union_fields = UnionFields::try_new( + vec![0, 1], + vec![ + Field::new("int_val", DataType::Int32, false), + Field::new("str_val", DataType::Utf8, true), + ], + ) + .unwrap(); + let union_type: Arc = Arc::new(NativeType::Union( + crate::types::LogicalUnionFields::from(&union_fields), + )); + assert_snapshot!(union_type, @r#"LogicalType(Native(Union(LogicalUnionFields([(0, LogicalField { name: "int_val", logical_type: LogicalType(Native(Int32), Int32), nullable: false }), (1, LogicalField { name: "str_val", logical_type: LogicalType(Native(String), String), nullable: true })]))), Union(LogicalUnionFields([(0, LogicalField { name: "int_val", logical_type: LogicalType(Native(Int32), Int32), nullable: false }), (1, LogicalField { name: "str_val", logical_type: LogicalType(Native(String), String), nullable: true })])))"#); + } + + #[test] + fn test_logical_type_display_nullable_vs_non_nullable() { + // Both nullable and non-nullable fields produce the same LogicalType Display, + // since LogicalType Display is based on Debug and includes the nullable field. + let nullable_list: Arc = Arc::new(NativeType::List(Arc::new( + LogicalField::from(&Field::new("item", DataType::Int32, true)), + ))); + let non_nullable_list: Arc = + Arc::new(NativeType::List(Arc::new(LogicalField::from(&Field::new( + "item", + DataType::Int32, + false, + ))))); + + // The Display output includes nullable info (because it delegates to Debug) + let nullable_str = format!("{nullable_list}"); + let non_nullable_str = format!("{non_nullable_list}"); + assert!(nullable_str.contains("nullable: true")); + assert!(non_nullable_str.contains("nullable: false")); + + assert_snapshot!(nullable_list, @r#"LogicalType(Native(List(LogicalField { name: "item", logical_type: LogicalType(Native(Int32), Int32), nullable: true })), List(LogicalField { name: "item", logical_type: LogicalType(Native(Int32), Int32), nullable: true }))"#); + assert_snapshot!(non_nullable_list, @r#"LogicalType(Native(List(LogicalField { name: "item", logical_type: LogicalType(Native(Int32), Int32), nullable: false })), List(LogicalField { name: "item", logical_type: LogicalType(Native(Int32), Int32), nullable: false }))"#); + } + + #[test] + fn test_logical_type_display_extension() { + struct JsonType; + impl LogicalType for JsonType { + fn native(&self) -> &NativeType { + &NativeType::String + } + fn signature(&self) -> TypeSignature<'_> { + TypeSignature::Extension { + name: "JSON", + parameters: &[], + } + } + } + let json: Arc = Arc::new(JsonType); + assert_snapshot!(json, @r#"LogicalType(Extension { name: "JSON", parameters: [] }, String)"#); + } +} diff --git a/datafusion/common/src/types/native.rs b/datafusion/common/src/types/native.rs index 65b6a5a15fc89..01cbc3bc87528 100644 --- a/datafusion/common/src/types/native.rs +++ b/datafusion/common/src/types/native.rs @@ -537,3 +537,86 @@ impl NativeType { matches!(self, Self::Float16 | Self::Float32 | Self::Float64) } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::types::LogicalField; + use arrow::datatypes::Field; + use insta::assert_snapshot; + + #[test] + fn test_native_type_display() { + assert_snapshot!(NativeType::Null, @"Null"); + assert_snapshot!(NativeType::Boolean, @"Boolean"); + assert_snapshot!(NativeType::Int8, @"Int8"); + assert_snapshot!(NativeType::Int16, @"Int16"); + assert_snapshot!(NativeType::Int32, @"Int32"); + assert_snapshot!(NativeType::Int64, @"Int64"); + assert_snapshot!(NativeType::UInt8, @"UInt8"); + assert_snapshot!(NativeType::UInt16, @"UInt16"); + assert_snapshot!(NativeType::UInt32, @"UInt32"); + assert_snapshot!(NativeType::UInt64, @"UInt64"); + assert_snapshot!(NativeType::Float16, @"Float16"); + assert_snapshot!(NativeType::Float32, @"Float32"); + assert_snapshot!(NativeType::Float64, @"Float64"); + assert_snapshot!(NativeType::Date, @"Date"); + assert_snapshot!(NativeType::Binary, @"Binary"); + assert_snapshot!(NativeType::String, @"String"); + assert_snapshot!(NativeType::FixedSizeBinary(16), @"FixedSizeBinary(16)"); + assert_snapshot!(NativeType::Decimal(10, 2), @"Decimal(10, 2)"); + } + + #[test] + fn test_native_type_display_timestamp() { + assert_snapshot!( + NativeType::Timestamp(TimeUnit::Second, None), + @"Timestamp(s)" + ); + assert_snapshot!( + NativeType::Timestamp(TimeUnit::Millisecond, None), + @"Timestamp(ms)" + ); + assert_snapshot!( + NativeType::Timestamp(TimeUnit::Nanosecond, Some(Arc::from("UTC"))), + @r#"Timestamp(ns, "UTC")"# + ); + } + + #[test] + fn test_native_type_display_time_duration_interval() { + assert_snapshot!(NativeType::Time(TimeUnit::Microsecond), @"Time(µs)"); + assert_snapshot!(NativeType::Duration(TimeUnit::Nanosecond), @"Duration(ns)"); + assert_snapshot!(NativeType::Interval(IntervalUnit::YearMonth), @"Interval(YearMonth)"); + assert_snapshot!(NativeType::Interval(IntervalUnit::MonthDayNano), @"Interval(MonthDayNano)"); + } + + #[test] + fn test_native_type_display_nested() { + let list = NativeType::List(Arc::new(LogicalField::from( + &Field::new("item", DataType::Int32, true), + ))); + assert_snapshot!(list, @"List(LogicalType(Native(Int32), Int32))"); + + let fixed_list = NativeType::FixedSizeList( + Arc::new(LogicalField::from( + &Field::new("item", DataType::Float64, false), + )), + 3, + ); + assert_snapshot!(fixed_list, @"FixedSizeList(3 x LogicalType(Native(Float64), Float64))"); + + let struct_type = NativeType::Struct(LogicalFields::from( + &Fields::from(vec![ + Field::new("name", DataType::Utf8, false), + Field::new("age", DataType::Int32, true), + ]), + )); + assert_snapshot!(struct_type, @r#"Struct("name": LogicalType(Native(String), String), "age": LogicalType(Native(Int32), Int32))"#); + + let map = NativeType::Map(Arc::new(LogicalField::from( + &Field::new("entries", DataType::Utf8, false), + ))); + assert_snapshot!(map, @"Map(LogicalType(Native(String), String))"); + } +} diff --git a/datafusion/expr-common/Cargo.toml b/datafusion/expr-common/Cargo.toml index 5ee46b454e791..d66d8ee8583d0 100644 --- a/datafusion/expr-common/Cargo.toml +++ b/datafusion/expr-common/Cargo.toml @@ -46,3 +46,6 @@ datafusion-common = { workspace = true } indexmap = { workspace = true } itertools = { workspace = true } paste = { workspace = true } + +[dev-dependencies] +insta = { workspace = true } diff --git a/datafusion/expr-common/src/signature.rs b/datafusion/expr-common/src/signature.rs index 857e9dc5d42d1..af180342a1e54 100644 --- a/datafusion/expr-common/src/signature.rs +++ b/datafusion/expr-common/src/signature.rs @@ -1438,7 +1438,9 @@ impl Signature { #[cfg(test)] mod tests { - use datafusion_common::types::{logical_int32, logical_int64, logical_string}; + use datafusion_common::types::{ + logical_float64, logical_int32, logical_int64, logical_string, NativeType, + }; use super::*; use crate::signature::{ @@ -2034,4 +2036,133 @@ mod tests { let sig = TypeSignature::UserDefined; assert_eq!(sig.arity(), Arity::Variable); } + + #[test] + fn test_type_signature_class_display() { + use insta::assert_snapshot; + + assert_snapshot!( + TypeSignatureClass::Any, + @"TypeSignatureClass::Any" + ); + assert_snapshot!( + TypeSignatureClass::Numeric, + @"TypeSignatureClass::Numeric" + ); + assert_snapshot!( + TypeSignatureClass::Integer, + @"TypeSignatureClass::Integer" + ); + assert_snapshot!( + TypeSignatureClass::Float, + @"TypeSignatureClass::Float" + ); + assert_snapshot!( + TypeSignatureClass::Decimal, + @"TypeSignatureClass::Decimal" + ); + assert_snapshot!( + TypeSignatureClass::Timestamp, + @"TypeSignatureClass::Timestamp" + ); + assert_snapshot!( + TypeSignatureClass::Time, + @"TypeSignatureClass::Time" + ); + assert_snapshot!( + TypeSignatureClass::Interval, + @"TypeSignatureClass::Interval" + ); + assert_snapshot!( + TypeSignatureClass::Duration, + @"TypeSignatureClass::Duration" + ); + assert_snapshot!( + TypeSignatureClass::Binary, + @"TypeSignatureClass::Binary" + ); + assert_snapshot!( + TypeSignatureClass::Native(logical_int32()), + @"TypeSignatureClass::Native(LogicalType(Native(Int32), Int32))" + ); + assert_snapshot!( + TypeSignatureClass::Native(logical_string()), + @"TypeSignatureClass::Native(LogicalType(Native(String), String))" + ); + } + + #[test] + fn test_coercion_display() { + use insta::assert_snapshot; + + let exact_int = Coercion::new_exact(TypeSignatureClass::Native(logical_int32())); + assert_snapshot!( + exact_int, + @"Coercion(TypeSignatureClass::Native(LogicalType(Native(Int32), Int32)))" + ); + + let exact_numeric = Coercion::new_exact(TypeSignatureClass::Numeric); + assert_snapshot!(exact_numeric, @"Coercion(TypeSignatureClass::Numeric)"); + + let implicit = Coercion::new_implicit( + TypeSignatureClass::Native(logical_float64()), + vec![TypeSignatureClass::Numeric], + NativeType::Float64, + ); + assert_snapshot!( + implicit, + @"Coercion(TypeSignatureClass::Native(LogicalType(Native(Float64), Float64)), implicit_coercion=ImplicitCoercion([Numeric], default_type=Float64)" + ); + + let implicit_with_multiple_sources = Coercion::new_implicit( + TypeSignatureClass::Native(logical_int64()), + vec![TypeSignatureClass::Integer, TypeSignatureClass::Numeric], + NativeType::Int64, + ); + assert_snapshot!( + implicit_with_multiple_sources, + @"Coercion(TypeSignatureClass::Native(LogicalType(Native(Int64), Int64)), implicit_coercion=ImplicitCoercion([Integer, Numeric], default_type=Int64)" + ); + } + + #[test] + fn test_to_string_repr_coercible() { + use insta::assert_snapshot; + + // Simulates a function like round(Float64, Int64) with coercion + let sig = TypeSignature::Coercible(vec![ + Coercion::new_implicit( + TypeSignatureClass::Native(logical_float64()), + vec![TypeSignatureClass::Numeric], + NativeType::Float64, + ), + Coercion::new_implicit( + TypeSignatureClass::Native(logical_int64()), + vec![TypeSignatureClass::Integer], + NativeType::Int64, + ), + ]); + let repr = sig.to_string_repr(); + assert_eq!(repr.len(), 1); + assert_snapshot!( + repr[0], + @"Coercion(TypeSignatureClass::Native(LogicalType(Native(Float64), Float64)), implicit_coercion=ImplicitCoercion([Numeric], default_type=Float64), Coercion(TypeSignatureClass::Native(LogicalType(Native(Int64), Int64)), implicit_coercion=ImplicitCoercion([Integer], default_type=Int64)" + ); + } + + #[test] + fn test_to_string_repr_coercible_exact() { + use insta::assert_snapshot; + + let sig = TypeSignature::Coercible(vec![ + Coercion::new_exact(TypeSignatureClass::Native(logical_string())), + Coercion::new_exact(TypeSignatureClass::Native(logical_int64())), + ]); + let repr = sig.to_string_repr(); + assert_eq!(repr.len(), 1); + assert_snapshot!( + repr[0], + @"Coercion(TypeSignatureClass::Native(LogicalType(Native(String), String))), Coercion(TypeSignatureClass::Native(LogicalType(Native(Int64), Int64)))" + ); + } } diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs index b19299981cef3..e1a4282b5a856 100644 --- a/datafusion/expr/src/utils.rs +++ b/datafusion/expr/src/utils.rs @@ -1784,4 +1784,112 @@ mod tests { "Expected 'Any, Any' without parameter names, got: {error_msg}" ); } + + #[test] + fn test_signature_error_msg_exact() { + use insta::assert_snapshot; + + let sig = Signature::one_of( + vec![ + TypeSignature::Exact(vec![DataType::Float64, DataType::Int64]), + TypeSignature::Exact(vec![DataType::Float32, DataType::Int64]), + TypeSignature::Exact(vec![DataType::Float64]), + TypeSignature::Exact(vec![DataType::Float32]), + ], + Volatility::Immutable, + ); + let msg = generate_signature_error_message( + "round", + &sig, + &[DataType::Float64, DataType::Float64], + ); + assert_snapshot!(msg, @r" + No function matches the given name and argument types 'round(Float64, Float64)'. You might need to add explicit type casts. + Candidate functions: + round(Float64, Int64) + round(Float32, Int64) + round(Float64) + round(Float32) + "); + } + + #[test] + fn test_signature_error_msg_coercible() { + use datafusion_common::types::NativeType; + use datafusion_expr_common::signature::{Coercion, TypeSignatureClass}; + use insta::assert_snapshot; + + let sig = Signature::coercible( + vec![ + Coercion::new_implicit( + TypeSignatureClass::Native( + datafusion_common::types::logical_float64(), + ), + vec![TypeSignatureClass::Numeric], + NativeType::Float64, + ), + Coercion::new_implicit( + TypeSignatureClass::Native( + datafusion_common::types::logical_int64(), + ), + vec![TypeSignatureClass::Integer], + NativeType::Int64, + ), + ], + Volatility::Immutable, + ); + let msg = generate_signature_error_message( + "round", + &sig, + &[DataType::Utf8, DataType::Utf8], + ); + assert_snapshot!(msg, @r" + No function matches the given name and argument types 'round(Utf8, Utf8)'. You might need to add explicit type casts. + Candidate functions: + round(Coercion(TypeSignatureClass::Native(LogicalType(Native(Float64), Float64)), implicit_coercion=ImplicitCoercion([Numeric], default_type=Float64), Coercion(TypeSignatureClass::Native(LogicalType(Native(Int64), Int64)), implicit_coercion=ImplicitCoercion([Integer], default_type=Int64)) + "); + } + + #[test] + fn test_signature_error_msg_with_names_coercible() { + use datafusion_common::types::NativeType; + use datafusion_expr_common::signature::{Coercion, TypeSignatureClass}; + use insta::assert_snapshot; + + let sig = Signature::coercible( + vec![ + Coercion::new_exact(TypeSignatureClass::Native( + datafusion_common::types::logical_string(), + )), + Coercion::new_exact(TypeSignatureClass::Native( + datafusion_common::types::logical_int64(), + )), + Coercion::new_implicit( + TypeSignatureClass::Native( + datafusion_common::types::logical_int64(), + ), + vec![TypeSignatureClass::Integer], + NativeType::Int64, + ), + ], + Volatility::Immutable, + ) + .with_parameter_names(vec![ + "string".to_string(), + "start_pos".to_string(), + "length".to_string(), + ]) + .expect("valid parameter names"); + + let msg = generate_signature_error_message( + "substr", + &sig, + &[DataType::Int32], + ); + assert_snapshot!(msg, @r" + No function matches the given name and argument types 'substr(Int32)'. You might need to add explicit type casts. + Candidate functions: + substr(string: Coercion(TypeSignatureClass::Native(LogicalType(Native(String), String))), start_pos: Coercion(TypeSignatureClass::Native(LogicalType(Native(Int64), Int64))), length: Coercion(TypeSignatureClass::Native(LogicalType(Native(Int64), Int64)), implicit_coercion=ImplicitCoercion([Integer], default_type=Int64)) + "); + } } From b2644263df5ee4e0ab2919706381af0d67046c58 Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Fri, 27 Feb 2026 18:48:43 +0100 Subject: [PATCH 02/10] Improve Display formatting of LogicalType, TypeSignatureClass, and Coercion Use Arrow DataType Display style as the north-star: terse, readable type names instead of dumping internal Rust enum structure. - `dyn LogicalType`: delegates to NativeType Display instead of Debug, e.g. `Int32` instead of `LogicalType(Native(Int32), Int32)` - `TypeSignatureClass`: shows just the class name, e.g. `Numeric` instead of `TypeSignatureClass::Numeric` - `Coercion`: shows just the desired type, e.g. `Float64` instead of `Coercion(TypeSignatureClass::...)` - `ImplicitCoercion`: uses Display for its fields instead of Debug This dramatically improves error messages like: Before: `round(Coercion(TypeSignatureClass::Native(LogicalType(Native(Float64), Float64)), implicit_coercion=ImplicitCoercion([Numeric], default_type=Float64), ...)` After: `round(Float64, Int64)` Co-Authored-By: Claude Opus 4.6 --- datafusion/common/src/types/logical.rs | 52 +++++----- datafusion/common/src/types/native.rs | 8 +- datafusion/expr-common/src/signature.rs | 121 ++++++++---------------- datafusion/expr/src/utils.rs | 4 +- 4 files changed, 68 insertions(+), 117 deletions(-) diff --git a/datafusion/common/src/types/logical.rs b/datafusion/common/src/types/logical.rs index 9257238cd41a9..cdd220e9a6b05 100644 --- a/datafusion/common/src/types/logical.rs +++ b/datafusion/common/src/types/logical.rs @@ -100,7 +100,10 @@ impl fmt::Debug for dyn LogicalType { impl std::fmt::Display for dyn LogicalType { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{self:?}") + match self.signature() { + TypeSignature::Native(_) => write!(f, "{}", self.native()), + TypeSignature::Extension { name, .. } => write!(f, "{name}"), + } } } @@ -145,14 +148,14 @@ mod tests { #[test] fn test_logical_type_display_simple() { - assert_snapshot!(logical_null(), @"LogicalType(Native(Null), Null)"); - assert_snapshot!(logical_boolean(), @"LogicalType(Native(Boolean), Boolean)"); - assert_snapshot!(logical_int32(), @"LogicalType(Native(Int32), Int32)"); - assert_snapshot!(logical_int64(), @"LogicalType(Native(Int64), Int64)"); - assert_snapshot!(logical_float32(), @"LogicalType(Native(Float32), Float32)"); - assert_snapshot!(logical_float64(), @"LogicalType(Native(Float64), Float64)"); - assert_snapshot!(logical_string(), @"LogicalType(Native(String), String)"); - assert_snapshot!(logical_date(), @"LogicalType(Native(Date), Date)"); + assert_snapshot!(logical_null(), @"Null"); + assert_snapshot!(logical_boolean(), @"Boolean"); + assert_snapshot!(logical_int32(), @"Int32"); + assert_snapshot!(logical_int64(), @"Int64"); + assert_snapshot!(logical_float32(), @"Float32"); + assert_snapshot!(logical_float64(), @"Float64"); + assert_snapshot!(logical_string(), @"String"); + assert_snapshot!(logical_date(), @"Date"); } #[test] @@ -160,9 +163,7 @@ mod tests { let list_type: Arc = Arc::new(NativeType::List(Arc::new( LogicalField::from(&Field::new("item", DataType::Int32, true)), ))); - // NOTE: this is extremely verbose and hard to read. - // Ideally this would just say something like "List(Int32)". - assert_snapshot!(list_type, @r#"LogicalType(Native(List(LogicalField { name: "item", logical_type: LogicalType(Native(Int32), Int32), nullable: true })), List(LogicalField { name: "item", logical_type: LogicalType(Native(Int32), Int32), nullable: true }))"#); + assert_snapshot!(list_type, @"List(Int32)"); } #[test] @@ -174,9 +175,7 @@ mod tests { Field::new("y", DataType::Float64, false), ], )))); - // NOTE: this is extremely verbose and hard to read. - // Ideally this would just say something like "Struct(x: Float64, y: Float64)". - assert_snapshot!(struct_type, @r#"LogicalType(Native(Struct(LogicalFields([LogicalField { name: "x", logical_type: LogicalType(Native(Float64), Float64), nullable: false }, LogicalField { name: "y", logical_type: LogicalType(Native(Float64), Float64), nullable: false }]))), Struct(LogicalFields([LogicalField { name: "x", logical_type: LogicalType(Native(Float64), Float64), nullable: false }, LogicalField { name: "y", logical_type: LogicalType(Native(Float64), Float64), nullable: false }])))"#); + assert_snapshot!(struct_type, @r#"Struct("x": Float64, "y": Float64)"#); } #[test] @@ -189,7 +188,7 @@ mod tests { ))), 3, )); - assert_snapshot!(fsl_type, @r#"LogicalType(Native(FixedSizeList(LogicalField { name: "item", logical_type: LogicalType(Native(Float32), Float32), nullable: false }, 3)), FixedSizeList(LogicalField { name: "item", logical_type: LogicalType(Native(Float32), Float32), nullable: false }, 3))"#); + assert_snapshot!(fsl_type, @"FixedSizeList(3 x Float32)"); } #[test] @@ -197,12 +196,12 @@ mod tests { let map_type: Arc = Arc::new(NativeType::Map(Arc::new( LogicalField::from(&Field::new("entries", DataType::Utf8, false)), ))); - assert_snapshot!(map_type, @r#"LogicalType(Native(Map(LogicalField { name: "entries", logical_type: LogicalType(Native(String), String), nullable: false })), Map(LogicalField { name: "entries", logical_type: LogicalType(Native(String), String), nullable: false }))"#); + assert_snapshot!(map_type, @"Map(String)"); } #[test] fn test_logical_type_display_union() { - use arrow::datatypes::{UnionFields, UnionMode}; + use arrow::datatypes::UnionFields; let union_fields = UnionFields::try_new( vec![0, 1], @@ -215,13 +214,11 @@ mod tests { let union_type: Arc = Arc::new(NativeType::Union( crate::types::LogicalUnionFields::from(&union_fields), )); - assert_snapshot!(union_type, @r#"LogicalType(Native(Union(LogicalUnionFields([(0, LogicalField { name: "int_val", logical_type: LogicalType(Native(Int32), Int32), nullable: false }), (1, LogicalField { name: "str_val", logical_type: LogicalType(Native(String), String), nullable: true })]))), Union(LogicalUnionFields([(0, LogicalField { name: "int_val", logical_type: LogicalType(Native(Int32), Int32), nullable: false }), (1, LogicalField { name: "str_val", logical_type: LogicalType(Native(String), String), nullable: true })])))"#); + assert_snapshot!(union_type, @r#"Union(0: ("int_val": Int32), 1: ("str_val": String))"#); } #[test] fn test_logical_type_display_nullable_vs_non_nullable() { - // Both nullable and non-nullable fields produce the same LogicalType Display, - // since LogicalType Display is based on Debug and includes the nullable field. let nullable_list: Arc = Arc::new(NativeType::List(Arc::new( LogicalField::from(&Field::new("item", DataType::Int32, true)), ))); @@ -232,14 +229,9 @@ mod tests { false, ))))); - // The Display output includes nullable info (because it delegates to Debug) - let nullable_str = format!("{nullable_list}"); - let non_nullable_str = format!("{non_nullable_list}"); - assert!(nullable_str.contains("nullable: true")); - assert!(non_nullable_str.contains("nullable: false")); - - assert_snapshot!(nullable_list, @r#"LogicalType(Native(List(LogicalField { name: "item", logical_type: LogicalType(Native(Int32), Int32), nullable: true })), List(LogicalField { name: "item", logical_type: LogicalType(Native(Int32), Int32), nullable: true }))"#); - assert_snapshot!(non_nullable_list, @r#"LogicalType(Native(List(LogicalField { name: "item", logical_type: LogicalType(Native(Int32), Int32), nullable: false })), List(LogicalField { name: "item", logical_type: LogicalType(Native(Int32), Int32), nullable: false }))"#); + // Display delegates to NativeType which doesn't distinguish nullability + assert_snapshot!(nullable_list, @"List(Int32)"); + assert_snapshot!(non_nullable_list, @"List(Int32)"); } #[test] @@ -257,6 +249,6 @@ mod tests { } } let json: Arc = Arc::new(JsonType); - assert_snapshot!(json, @r#"LogicalType(Extension { name: "JSON", parameters: [] }, String)"#); + assert_snapshot!(json, @"JSON"); } } diff --git a/datafusion/common/src/types/native.rs b/datafusion/common/src/types/native.rs index 01cbc3bc87528..7eafde208f75b 100644 --- a/datafusion/common/src/types/native.rs +++ b/datafusion/common/src/types/native.rs @@ -596,7 +596,7 @@ mod tests { let list = NativeType::List(Arc::new(LogicalField::from( &Field::new("item", DataType::Int32, true), ))); - assert_snapshot!(list, @"List(LogicalType(Native(Int32), Int32))"); + assert_snapshot!(list, @"List(Int32)"); let fixed_list = NativeType::FixedSizeList( Arc::new(LogicalField::from( @@ -604,7 +604,7 @@ mod tests { )), 3, ); - assert_snapshot!(fixed_list, @"FixedSizeList(3 x LogicalType(Native(Float64), Float64))"); + assert_snapshot!(fixed_list, @"FixedSizeList(3 x Float64)"); let struct_type = NativeType::Struct(LogicalFields::from( &Fields::from(vec![ @@ -612,11 +612,11 @@ mod tests { Field::new("age", DataType::Int32, true), ]), )); - assert_snapshot!(struct_type, @r#"Struct("name": LogicalType(Native(String), String), "age": LogicalType(Native(Int32), Int32))"#); + assert_snapshot!(struct_type, @r#"Struct("name": String, "age": Int32)"#); let map = NativeType::Map(Arc::new(LogicalField::from( &Field::new("entries", DataType::Utf8, false), ))); - assert_snapshot!(map, @"Map(LogicalType(Native(String), String))"); + assert_snapshot!(map, @"Map(String)"); } } diff --git a/datafusion/expr-common/src/signature.rs b/datafusion/expr-common/src/signature.rs index af180342a1e54..a855d2a824b0a 100644 --- a/datafusion/expr-common/src/signature.rs +++ b/datafusion/expr-common/src/signature.rs @@ -358,7 +358,19 @@ pub enum TypeSignatureClass { impl Display for TypeSignatureClass { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "TypeSignatureClass::{self:?}") + match self { + Self::Any => write!(f, "Any"), + Self::Timestamp => write!(f, "Timestamp"), + Self::Time => write!(f, "Time"), + Self::Interval => write!(f, "Interval"), + Self::Duration => write!(f, "Duration"), + Self::Native(logical_type) => write!(f, "{logical_type}"), + Self::Integer => write!(f, "Integer"), + Self::Float => write!(f, "Float"), + Self::Decimal => write!(f, "Decimal"), + Self::Numeric => write!(f, "Numeric"), + Self::Binary => write!(f, "Binary"), + } } } @@ -1062,12 +1074,7 @@ impl Coercion { impl Display for Coercion { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "Coercion({}", self.desired_type())?; - if let Some(implicit_coercion) = self.implicit_coercion() { - write!(f, ", implicit_coercion={implicit_coercion}",) - } else { - write!(f, ")") - } + write!(f, "{}", self.desired_type()) } } @@ -1119,11 +1126,14 @@ pub struct ImplicitCoercion { impl Display for ImplicitCoercion { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "ImplicitCoercion({:?}, default_type={:?})", - self.allowed_source_types, self.default_casted_type - ) + write!(f, "ImplicitCoercion(")?; + for (i, source_type) in self.allowed_source_types.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{source_type}")?; + } + write!(f, "; default={}", self.default_casted_type) } } @@ -2041,54 +2051,18 @@ mod tests { fn test_type_signature_class_display() { use insta::assert_snapshot; - assert_snapshot!( - TypeSignatureClass::Any, - @"TypeSignatureClass::Any" - ); - assert_snapshot!( - TypeSignatureClass::Numeric, - @"TypeSignatureClass::Numeric" - ); - assert_snapshot!( - TypeSignatureClass::Integer, - @"TypeSignatureClass::Integer" - ); - assert_snapshot!( - TypeSignatureClass::Float, - @"TypeSignatureClass::Float" - ); - assert_snapshot!( - TypeSignatureClass::Decimal, - @"TypeSignatureClass::Decimal" - ); - assert_snapshot!( - TypeSignatureClass::Timestamp, - @"TypeSignatureClass::Timestamp" - ); - assert_snapshot!( - TypeSignatureClass::Time, - @"TypeSignatureClass::Time" - ); - assert_snapshot!( - TypeSignatureClass::Interval, - @"TypeSignatureClass::Interval" - ); - assert_snapshot!( - TypeSignatureClass::Duration, - @"TypeSignatureClass::Duration" - ); - assert_snapshot!( - TypeSignatureClass::Binary, - @"TypeSignatureClass::Binary" - ); - assert_snapshot!( - TypeSignatureClass::Native(logical_int32()), - @"TypeSignatureClass::Native(LogicalType(Native(Int32), Int32))" - ); - assert_snapshot!( - TypeSignatureClass::Native(logical_string()), - @"TypeSignatureClass::Native(LogicalType(Native(String), String))" - ); + assert_snapshot!(TypeSignatureClass::Any, @"Any"); + assert_snapshot!(TypeSignatureClass::Numeric, @"Numeric"); + assert_snapshot!(TypeSignatureClass::Integer, @"Integer"); + assert_snapshot!(TypeSignatureClass::Float, @"Float"); + assert_snapshot!(TypeSignatureClass::Decimal, @"Decimal"); + assert_snapshot!(TypeSignatureClass::Timestamp, @"Timestamp"); + assert_snapshot!(TypeSignatureClass::Time, @"Time"); + assert_snapshot!(TypeSignatureClass::Interval, @"Interval"); + assert_snapshot!(TypeSignatureClass::Duration, @"Duration"); + assert_snapshot!(TypeSignatureClass::Binary, @"Binary"); + assert_snapshot!(TypeSignatureClass::Native(logical_int32()), @"Int32"); + assert_snapshot!(TypeSignatureClass::Native(logical_string()), @"String"); } #[test] @@ -2096,33 +2070,24 @@ mod tests { use insta::assert_snapshot; let exact_int = Coercion::new_exact(TypeSignatureClass::Native(logical_int32())); - assert_snapshot!( - exact_int, - @"Coercion(TypeSignatureClass::Native(LogicalType(Native(Int32), Int32)))" - ); + assert_snapshot!(exact_int, @"Int32"); let exact_numeric = Coercion::new_exact(TypeSignatureClass::Numeric); - assert_snapshot!(exact_numeric, @"Coercion(TypeSignatureClass::Numeric)"); + assert_snapshot!(exact_numeric, @"Numeric"); let implicit = Coercion::new_implicit( TypeSignatureClass::Native(logical_float64()), vec![TypeSignatureClass::Numeric], NativeType::Float64, ); - assert_snapshot!( - implicit, - @"Coercion(TypeSignatureClass::Native(LogicalType(Native(Float64), Float64)), implicit_coercion=ImplicitCoercion([Numeric], default_type=Float64)" - ); + assert_snapshot!(implicit, @"Float64"); let implicit_with_multiple_sources = Coercion::new_implicit( TypeSignatureClass::Native(logical_int64()), vec![TypeSignatureClass::Integer, TypeSignatureClass::Numeric], NativeType::Int64, ); - assert_snapshot!( - implicit_with_multiple_sources, - @"Coercion(TypeSignatureClass::Native(LogicalType(Native(Int64), Int64)), implicit_coercion=ImplicitCoercion([Integer, Numeric], default_type=Int64)" - ); + assert_snapshot!(implicit_with_multiple_sources, @"Int64"); } #[test] @@ -2144,10 +2109,7 @@ mod tests { ]); let repr = sig.to_string_repr(); assert_eq!(repr.len(), 1); - assert_snapshot!( - repr[0], - @"Coercion(TypeSignatureClass::Native(LogicalType(Native(Float64), Float64)), implicit_coercion=ImplicitCoercion([Numeric], default_type=Float64), Coercion(TypeSignatureClass::Native(LogicalType(Native(Int64), Int64)), implicit_coercion=ImplicitCoercion([Integer], default_type=Int64)" - ); + assert_snapshot!(repr[0], @"Float64, Int64"); } #[test] @@ -2160,9 +2122,6 @@ mod tests { ]); let repr = sig.to_string_repr(); assert_eq!(repr.len(), 1); - assert_snapshot!( - repr[0], - @"Coercion(TypeSignatureClass::Native(LogicalType(Native(String), String))), Coercion(TypeSignatureClass::Native(LogicalType(Native(Int64), Int64)))" - ); + assert_snapshot!(repr[0], @"String, Int64"); } } diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs index e1a4282b5a856..e44e1b58b16a5 100644 --- a/datafusion/expr/src/utils.rs +++ b/datafusion/expr/src/utils.rs @@ -1846,7 +1846,7 @@ mod tests { assert_snapshot!(msg, @r" No function matches the given name and argument types 'round(Utf8, Utf8)'. You might need to add explicit type casts. Candidate functions: - round(Coercion(TypeSignatureClass::Native(LogicalType(Native(Float64), Float64)), implicit_coercion=ImplicitCoercion([Numeric], default_type=Float64), Coercion(TypeSignatureClass::Native(LogicalType(Native(Int64), Int64)), implicit_coercion=ImplicitCoercion([Integer], default_type=Int64)) + round(Float64, Int64) "); } @@ -1889,7 +1889,7 @@ mod tests { assert_snapshot!(msg, @r" No function matches the given name and argument types 'substr(Int32)'. You might need to add explicit type casts. Candidate functions: - substr(string: Coercion(TypeSignatureClass::Native(LogicalType(Native(String), String))), start_pos: Coercion(TypeSignatureClass::Native(LogicalType(Native(Int64), Int64))), length: Coercion(TypeSignatureClass::Native(LogicalType(Native(Int64), Int64)), implicit_coercion=ImplicitCoercion([Integer], default_type=Int64)) + substr(string: String, start_pos: Int64, length: Int64) "); } } From ef9408d3bdd51a7e98040c442a328c6f5092d957 Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Fri, 27 Feb 2026 18:55:15 +0100 Subject: [PATCH 03/10] Add `non-null` prefix --- datafusion/common/src/types/logical.rs | 11 ++++----- datafusion/common/src/types/native.rs | 32 +++++++++++++++++++------- 2 files changed, 29 insertions(+), 14 deletions(-) diff --git a/datafusion/common/src/types/logical.rs b/datafusion/common/src/types/logical.rs index cdd220e9a6b05..2e8a7eecd3fb6 100644 --- a/datafusion/common/src/types/logical.rs +++ b/datafusion/common/src/types/logical.rs @@ -175,7 +175,7 @@ mod tests { Field::new("y", DataType::Float64, false), ], )))); - assert_snapshot!(struct_type, @r#"Struct("x": Float64, "y": Float64)"#); + assert_snapshot!(struct_type, @r#"Struct("x": non-null Float64, "y": non-null Float64)"#); } #[test] @@ -188,7 +188,7 @@ mod tests { ))), 3, )); - assert_snapshot!(fsl_type, @"FixedSizeList(3 x Float32)"); + assert_snapshot!(fsl_type, @"FixedSizeList(3 x non-null Float32)"); } #[test] @@ -196,7 +196,7 @@ mod tests { let map_type: Arc = Arc::new(NativeType::Map(Arc::new( LogicalField::from(&Field::new("entries", DataType::Utf8, false)), ))); - assert_snapshot!(map_type, @"Map(String)"); + assert_snapshot!(map_type, @"Map(non-null String)"); } #[test] @@ -214,7 +214,7 @@ mod tests { let union_type: Arc = Arc::new(NativeType::Union( crate::types::LogicalUnionFields::from(&union_fields), )); - assert_snapshot!(union_type, @r#"Union(0: ("int_val": Int32), 1: ("str_val": String))"#); + assert_snapshot!(union_type, @r#"Union(0: ("int_val": non-null Int32), 1: ("str_val": String))"#); } #[test] @@ -229,9 +229,8 @@ mod tests { false, ))))); - // Display delegates to NativeType which doesn't distinguish nullability assert_snapshot!(nullable_list, @"List(Int32)"); - assert_snapshot!(non_nullable_list, @"List(Int32)"); + assert_snapshot!(non_nullable_list, @"List(non-null Int32)"); } #[test] diff --git a/datafusion/common/src/types/native.rs b/datafusion/common/src/types/native.rs index 7eafde208f75b..8e1cc5f197ce5 100644 --- a/datafusion/common/src/types/native.rs +++ b/datafusion/common/src/types/native.rs @@ -184,6 +184,13 @@ pub enum NativeType { Map(LogicalFieldRef), } +/// Format a [`LogicalField`] for display, matching [`arrow::datatypes::DataType`]'s +/// Display convention of showing a `"non-null "` prefix for non-nullable fields. +fn format_logical_field(f: &mut std::fmt::Formatter<'_>, field: &LogicalField) -> std::fmt::Result { + let non_null = if field.nullable { "" } else { "non-null " }; + write!(f, "{:?}: {non_null}{}", field.name, field.logical_type) +} + impl Display for NativeType { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { // Match the format used by arrow::datatypes::DataType's Display impl @@ -210,9 +217,13 @@ impl Display for NativeType { Self::Binary => write!(f, "Binary"), Self::FixedSizeBinary(size) => write!(f, "FixedSizeBinary({size})"), Self::String => write!(f, "String"), - Self::List(field) => write!(f, "List({})", field.logical_type), + Self::List(field) => { + let non_null = if field.nullable { "" } else { "non-null " }; + write!(f, "List({non_null}{})", field.logical_type) + } Self::FixedSizeList(field, size) => { - write!(f, "FixedSizeList({size} x {})", field.logical_type) + let non_null = if field.nullable { "" } else { "non-null " }; + write!(f, "FixedSizeList({size} x {non_null}{})", field.logical_type) } Self::Struct(fields) => { write!(f, "Struct(")?; @@ -220,7 +231,7 @@ impl Display for NativeType { if i > 0 { write!(f, ", ")?; } - write!(f, "{:?}: {}", field.name, field.logical_type)?; + format_logical_field(f, field)?; } write!(f, ")") } @@ -230,12 +241,17 @@ impl Display for NativeType { if i > 0 { write!(f, ", ")?; } - write!(f, "{type_id}: ({:?}: {})", field.name, field.logical_type)?; + write!(f, "{type_id}: (")?; + format_logical_field(f, field)?; + write!(f, ")")?; } write!(f, ")") } Self::Decimal(precision, scale) => write!(f, "Decimal({precision}, {scale})"), - Self::Map(field) => write!(f, "Map({})", field.logical_type), + Self::Map(field) => { + let non_null = if field.nullable { "" } else { "non-null " }; + write!(f, "Map({non_null}{})", field.logical_type) + } } } } @@ -604,7 +620,7 @@ mod tests { )), 3, ); - assert_snapshot!(fixed_list, @"FixedSizeList(3 x Float64)"); + assert_snapshot!(fixed_list, @"FixedSizeList(3 x non-null Float64)"); let struct_type = NativeType::Struct(LogicalFields::from( &Fields::from(vec![ @@ -612,11 +628,11 @@ mod tests { Field::new("age", DataType::Int32, true), ]), )); - assert_snapshot!(struct_type, @r#"Struct("name": String, "age": Int32)"#); + assert_snapshot!(struct_type, @r#"Struct("name": non-null String, "age": Int32)"#); let map = NativeType::Map(Arc::new(LogicalField::from( &Field::new("entries", DataType::Utf8, false), ))); - assert_snapshot!(map, @"Map(String)"); + assert_snapshot!(map, @"Map(non-null String)"); } } From bbb7ed318156d4f4eb6a72e4ae95e61b2b984774 Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Fri, 27 Feb 2026 19:05:27 +0100 Subject: [PATCH 04/10] Use Display formatting for errors --- datafusion/expr-common/src/signature.rs | 79 +++++++++++++++++++ .../src/type_coercion/aggregates.rs | 9 +-- .../expr-common/src/type_coercion/binary.rs | 6 +- .../expr/src/type_coercion/functions.rs | 18 ++--- datafusion/expr/src/udf.rs | 2 +- .../src/aggregate/groups_accumulator/nulls.rs | 2 +- .../optimizer/src/analyzer/type_coercion.rs | 2 +- .../physical-plan/src/aggregates/topk/heap.rs | 4 +- datafusion/physical-plan/src/unnest.rs | 4 +- datafusion/sqllogictest/test_files/math.slt | 6 +- 10 files changed, 104 insertions(+), 28 deletions(-) diff --git a/datafusion/expr-common/src/signature.rs b/datafusion/expr-common/src/signature.rs index a855d2a824b0a..feddfbef2ca9c 100644 --- a/datafusion/expr-common/src/signature.rs +++ b/datafusion/expr-common/src/signature.rs @@ -322,6 +322,43 @@ impl TypeSignature { } } +impl Display for TypeSignature { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + TypeSignature::Variadic(types) => { + write!(f, "Variadic({})", types.iter().join(", ")) + } + TypeSignature::UserDefined => write!(f, "UserDefined"), + TypeSignature::VariadicAny => write!(f, "VariadicAny"), + TypeSignature::Uniform(count, types) => { + write!(f, "Uniform({count}, [{}])", types.iter().join(", ")) + } + TypeSignature::Exact(types) => { + write!(f, "Exact({})", types.iter().join(", ")) + } + TypeSignature::Coercible(coercions) => { + write!(f, "Coercible({})", coercions.iter().join(", ")) + } + TypeSignature::Comparable(count) => write!(f, "Comparable({count})"), + TypeSignature::Any(count) => write!(f, "Any({count})"), + TypeSignature::OneOf(sigs) => { + write!(f, "OneOf(")?; + for (i, sig) in sigs.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{sig}")?; + } + write!(f, ")") + } + TypeSignature::ArraySignature(sig) => write!(f, "ArraySignature({sig})"), + TypeSignature::Numeric(count) => write!(f, "Numeric({count})"), + TypeSignature::String(count) => write!(f, "String({count})"), + TypeSignature::Nullary => write!(f, "Nullary"), + } + } +} + /// Represents the class of types that can be used in a function signature. /// /// This is used to specify what types are valid for function arguments in a more flexible way than @@ -2047,6 +2084,48 @@ mod tests { assert_eq!(sig.arity(), Arity::Variable); } + #[test] + fn test_type_signature_display() { + use insta::assert_snapshot; + + assert_snapshot!(TypeSignature::Nullary, @"Nullary"); + assert_snapshot!(TypeSignature::Any(2), @"Any(2)"); + assert_snapshot!(TypeSignature::Numeric(3), @"Numeric(3)"); + assert_snapshot!(TypeSignature::String(1), @"String(1)"); + assert_snapshot!(TypeSignature::Comparable(2), @"Comparable(2)"); + assert_snapshot!(TypeSignature::VariadicAny, @"VariadicAny"); + assert_snapshot!(TypeSignature::UserDefined, @"UserDefined"); + + assert_snapshot!( + TypeSignature::Exact(vec![DataType::Int32, DataType::Utf8]), + @"Exact(Int32, Utf8)" + ); + assert_snapshot!( + TypeSignature::Variadic(vec![DataType::Utf8, DataType::LargeUtf8]), + @"Variadic(Utf8, LargeUtf8)" + ); + assert_snapshot!( + TypeSignature::Uniform(2, vec![DataType::Float32, DataType::Float64]), + @"Uniform(2, [Float32, Float64])" + ); + + assert_snapshot!( + TypeSignature::Coercible(vec![ + Coercion::new_exact(TypeSignatureClass::Native(logical_float64())), + Coercion::new_exact(TypeSignatureClass::Native(logical_int32())), + ]), + @"Coercible(Float64, Int32)" + ); + + assert_snapshot!( + TypeSignature::OneOf(vec![ + TypeSignature::Nullary, + TypeSignature::VariadicAny, + ]), + @"OneOf(Nullary, VariadicAny)" + ); + } + #[test] fn test_type_signature_class_display() { use insta::assert_snapshot; diff --git a/datafusion/expr-common/src/type_coercion/aggregates.rs b/datafusion/expr-common/src/type_coercion/aggregates.rs index ab4d086e4ca5f..85f789c2f1454 100644 --- a/datafusion/expr-common/src/type_coercion/aggregates.rs +++ b/datafusion/expr-common/src/type_coercion/aggregates.rs @@ -61,8 +61,7 @@ pub fn check_arg_count( TypeSignature::Uniform(agg_count, _) | TypeSignature::Any(agg_count) => { if input_fields.len() != *agg_count { return plan_err!( - "The function {func_name} expects {:?} arguments, but {:?} were provided", - agg_count, + "The function {func_name} expects {agg_count} arguments, but {} were provided", input_fields.len() ); } @@ -70,7 +69,7 @@ pub fn check_arg_count( TypeSignature::Exact(types) => { if types.len() != input_fields.len() { return plan_err!( - "The function {func_name} expects {:?} arguments, but {:?} were provided", + "The function {func_name} expects {} arguments, but {} were provided", types.len(), input_fields.len() ); @@ -82,7 +81,7 @@ pub fn check_arg_count( .any(|v| check_arg_count(func_name, input_fields, v).is_ok()); if !ok { return plan_err!( - "The function {func_name} does not accept {:?} function arguments.", + "The function {func_name} does not accept {} function arguments.", input_fields.len() ); } @@ -102,7 +101,7 @@ pub fn check_arg_count( } _ => { return internal_err!( - "Aggregate functions do not support this {signature:?}" + "Aggregate functions do not support this {signature}" ); } } diff --git a/datafusion/expr-common/src/type_coercion/binary.rs b/datafusion/expr-common/src/type_coercion/binary.rs index c6ac86cd396c4..dd3db8e40baa7 100644 --- a/datafusion/expr-common/src/type_coercion/binary.rs +++ b/datafusion/expr-common/src/type_coercion/binary.rs @@ -753,15 +753,15 @@ fn type_union_resolution_coercion( /// Handle type union resolution including struct type and others. pub fn try_type_union_resolution(data_types: &[DataType]) -> Result> { - let err = match try_type_union_resolution_with_struct(data_types) { + let struct_err = match try_type_union_resolution_with_struct(data_types) { Ok(struct_types) => return Ok(struct_types), - Err(e) => Some(e), + Err(e) => e, }; if let Some(new_type) = type_union_resolution(data_types) { Ok(vec![new_type; data_types.len()]) } else { - exec_err!("Fail to find the coerced type, errors: {:?}", err) + exec_err!("Fail to find the coerced type, errors: {struct_err}") } } diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs index fe259fb8c972a..45814ca419000 100644 --- a/datafusion/expr/src/type_coercion/functions.rs +++ b/datafusion/expr/src/type_coercion/functions.rs @@ -221,12 +221,12 @@ pub fn data_types( } else if type_signature.used_to_support_zero_arguments() { // Special error to help during upgrade: https://github.com/apache/datafusion/issues/13763 return plan_err!( - "function '{}' has signature {type_signature:?} which does not support zero arguments. Use TypeSignature::Nullary for zero arguments", + "function '{}' has signature {type_signature} which does not support zero arguments. Use TypeSignature::Nullary for zero arguments", function_name.as_ref() ); } else { return plan_err!( - "Function '{}' has signature {type_signature:?} which does not support zero arguments", + "Function '{}' has signature {type_signature} which does not support zero arguments", function_name.as_ref() ); } @@ -302,7 +302,7 @@ fn try_coerce_types( // none possible -> Error plan_err!( - "Failed to coerce arguments to satisfy a call to '{function_name}' function: coercion from {} to the signature {type_signature:?} failed", + "Failed to coerce arguments to satisfy a call to '{function_name}' function: coercion from {} to the signature {type_signature} failed", current_types.iter().join(", ") ) } @@ -317,7 +317,7 @@ fn get_valid_types_with_udf( Ok(coerced_types) => vec![coerced_types], Err(e) => { return exec_err!( - "Function '{}' user-defined coercion failed with {:?}", + "Function '{}' user-defined coercion failed with {}", func.name(), e.strip_backtrace() ); @@ -502,7 +502,7 @@ fn get_valid_types( new_types.push(DataType::Utf8); } else { return plan_err!( - "Function '{function_name}' expects NativeType::String but NativeType::received NativeType::{logical_data_type}" + "Function '{function_name}' expects String but received {logical_data_type}" ); } } @@ -562,7 +562,7 @@ fn get_valid_types( if !logical_data_type.is_numeric() { return plan_err!( - "Function '{function_name}' expects NativeType::Numeric but received NativeType::{logical_data_type}" + "Function '{function_name}' expects Numeric but received {logical_data_type}" ); } @@ -583,7 +583,7 @@ fn get_valid_types( valid_type = DataType::Float64; } else if !logical_data_type.is_numeric() { return plan_err!( - "Function '{function_name}' expects NativeType::Numeric but received NativeType::{logical_data_type}" + "Function '{function_name}' expects Numeric but received {logical_data_type}" ); } @@ -1056,7 +1056,7 @@ mod tests { .unwrap_err(); assert_contains!( got.to_string(), - "Function 'test' expects NativeType::Numeric but received NativeType::String" + "Function 'test' expects Numeric but received String" ); // Fallbacks to float64 if the arg is of type null. @@ -1076,7 +1076,7 @@ mod tests { .unwrap_err(); assert_contains!( got.to_string(), - "Function 'test' expects NativeType::Numeric but received NativeType::Timestamp(s)" + "Function 'test' expects Numeric but received Timestamp(s)" ); Ok(()) diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index 405fb256803b6..9705006e41afc 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -261,7 +261,7 @@ impl ScalarUDF { let expected_type = return_field.data_type(); assert_or_internal_err!( result_data_type == *expected_type, - "Function '{}' returned value of type '{:?}' while the following type was promised at planning time and expected: '{:?}'", + "Function '{}' returned value of type '{}' while the following type was promised at planning time and expected: '{}'", self.name(), result_data_type, expected_type diff --git a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/nulls.rs b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/nulls.rs index 435560721cd2b..5b56b77e11d3f 100644 --- a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/nulls.rs +++ b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/nulls.rs @@ -206,7 +206,7 @@ pub fn set_nulls_dyn(input: &dyn Array, nulls: Option) -> Result { - return not_impl_err!("Applying nulls {:?}", input.data_type()); + return not_impl_err!("Applying nulls {}", input.data_type()); } }; assert_eq!(input.len(), output.len()); diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs index ed04aa4285d1a..efc9984acb9b0 100644 --- a/datafusion/optimizer/src/analyzer/type_coercion.rs +++ b/datafusion/optimizer/src/analyzer/type_coercion.rs @@ -849,7 +849,7 @@ fn coerce_scalar_range_aware( // If type coercion fails, check if the largest type in family works: if let Some(largest_type) = get_widest_type_in_family(target_type) { coerce_scalar(largest_type, value).map_or_else( - |_| exec_err!("Cannot cast {value:?} to {target_type}"), + |_| exec_err!("Cannot cast {value} to {target_type}"), |_| ScalarValue::try_from(target_type), ) } else { diff --git a/datafusion/physical-plan/src/aggregates/topk/heap.rs b/datafusion/physical-plan/src/aggregates/topk/heap.rs index 9f0b697ccabee..889fe04bf830a 100644 --- a/datafusion/physical-plan/src/aggregates/topk/heap.rs +++ b/datafusion/physical-plan/src/aggregates/topk/heap.rs @@ -216,7 +216,7 @@ fn extract_string_value<'a>( DataType::Utf8 => batch.as_string::().value(idx), DataType::LargeUtf8 => batch.as_string::().value(idx), DataType::Utf8View => batch.as_string_view().value(idx), - _ => unreachable!("Unsupported string type: {:?}", data_type), + _ => unreachable!("Unsupported string type: {data_type}"), } } @@ -316,7 +316,7 @@ impl ArrowHeap for StringHeap { DataType::Utf8 => build_string_array!(StringBuilder), DataType::LargeUtf8 => build_string_array!(LargeStringBuilder), DataType::Utf8View => build_string_array!(StringViewBuilder), - _ => unreachable!("Unsupported string type: {:?}", self.data_type), + _ => unreachable!("Unsupported string type: {}", self.data_type), }; (arr, map_idxs) } diff --git a/datafusion/physical-plan/src/unnest.rs b/datafusion/physical-plan/src/unnest.rs index 422a9dd0d32bc..a57da65753912 100644 --- a/datafusion/physical-plan/src/unnest.rs +++ b/datafusion/physical-plan/src/unnest.rs @@ -418,9 +418,7 @@ fn flatten_struct_cols( Ok(struct_arr.columns().to_vec()) } data_type => internal_err!( - "expecting column {} from input plan to be a struct, got {:?}", - idx, - data_type + "expecting column {idx} from input plan to be a struct, got {data_type}" ), }, None => Ok(vec![Arc::clone(column_data)]), diff --git a/datafusion/sqllogictest/test_files/math.slt b/datafusion/sqllogictest/test_files/math.slt index 2227466fdf254..d571fcd947134 100644 --- a/datafusion/sqllogictest/test_files/math.slt +++ b/datafusion/sqllogictest/test_files/math.slt @@ -158,15 +158,15 @@ statement error SELECT abs(1, 2); # abs: unsupported argument type -query error DataFusion error: Error during planning: Function 'abs' expects NativeType::Numeric but received NativeType::String +query error DataFusion error: Error during planning: Function 'abs' expects Numeric but received String SELECT abs('foo'); # abs: numeric string # TODO: In Postgres, '-1.2' is unknown type and interpreted to float8 so they don't fail on this query -query error DataFusion error: Error during planning: Function 'abs' expects NativeType::Numeric but received NativeType::String +query error DataFusion error: Error during planning: Function 'abs' expects Numeric but received String select abs('-1.2'); -query error DataFusion error: Error during planning: Function 'abs' expects NativeType::Numeric but received NativeType::String +query error DataFusion error: Error during planning: Function 'abs' expects Numeric but received String select abs(arrow_cast('-1.2', 'Utf8')); statement ok From 4b2f97d721150ef2c7d4819a08592770b4f2ee5d Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Fri, 27 Feb 2026 19:17:44 +0100 Subject: [PATCH 05/10] cargo fmt --- datafusion/common/src/types/logical.rs | 13 +++--- datafusion/common/src/types/native.rs | 45 ++++++++++++------- datafusion/expr-common/src/signature.rs | 2 +- .../src/type_coercion/aggregates.rs | 4 +- datafusion/expr/src/utils.rs | 14 ++---- 5 files changed, 39 insertions(+), 39 deletions(-) diff --git a/datafusion/common/src/types/logical.rs b/datafusion/common/src/types/logical.rs index 2e8a7eecd3fb6..c4824a138e90f 100644 --- a/datafusion/common/src/types/logical.rs +++ b/datafusion/common/src/types/logical.rs @@ -168,13 +168,12 @@ mod tests { #[test] fn test_logical_type_display_struct() { - let struct_type: Arc = - Arc::new(NativeType::Struct(LogicalFields::from(&Fields::from( - vec![ - Field::new("x", DataType::Float64, false), - Field::new("y", DataType::Float64, false), - ], - )))); + let struct_type: Arc = Arc::new(NativeType::Struct( + LogicalFields::from(&Fields::from(vec![ + Field::new("x", DataType::Float64, false), + Field::new("y", DataType::Float64, false), + ])), + )); assert_snapshot!(struct_type, @r#"Struct("x": non-null Float64, "y": non-null Float64)"#); } diff --git a/datafusion/common/src/types/native.rs b/datafusion/common/src/types/native.rs index 8e1cc5f197ce5..a4202db986bbf 100644 --- a/datafusion/common/src/types/native.rs +++ b/datafusion/common/src/types/native.rs @@ -186,7 +186,10 @@ pub enum NativeType { /// Format a [`LogicalField`] for display, matching [`arrow::datatypes::DataType`]'s /// Display convention of showing a `"non-null "` prefix for non-nullable fields. -fn format_logical_field(f: &mut std::fmt::Formatter<'_>, field: &LogicalField) -> std::fmt::Result { +fn format_logical_field( + f: &mut std::fmt::Formatter<'_>, + field: &LogicalField, +) -> std::fmt::Result { let non_null = if field.nullable { "" } else { "non-null " }; write!(f, "{:?}: {non_null}{}", field.name, field.logical_type) } @@ -223,7 +226,11 @@ impl Display for NativeType { } Self::FixedSizeList(field, size) => { let non_null = if field.nullable { "" } else { "non-null " }; - write!(f, "FixedSizeList({size} x {non_null}{})", field.logical_type) + write!( + f, + "FixedSizeList({size} x {non_null}{})", + field.logical_type + ) } Self::Struct(fields) => { write!(f, "Struct(")?; @@ -609,30 +616,34 @@ mod tests { #[test] fn test_native_type_display_nested() { - let list = NativeType::List(Arc::new(LogicalField::from( - &Field::new("item", DataType::Int32, true), - ))); + let list = NativeType::List(Arc::new(LogicalField::from(&Field::new( + "item", + DataType::Int32, + true, + )))); assert_snapshot!(list, @"List(Int32)"); let fixed_list = NativeType::FixedSizeList( - Arc::new(LogicalField::from( - &Field::new("item", DataType::Float64, false), - )), + Arc::new(LogicalField::from(&Field::new( + "item", + DataType::Float64, + false, + ))), 3, ); assert_snapshot!(fixed_list, @"FixedSizeList(3 x non-null Float64)"); - let struct_type = NativeType::Struct(LogicalFields::from( - &Fields::from(vec![ - Field::new("name", DataType::Utf8, false), - Field::new("age", DataType::Int32, true), - ]), - )); + let struct_type = NativeType::Struct(LogicalFields::from(&Fields::from(vec![ + Field::new("name", DataType::Utf8, false), + Field::new("age", DataType::Int32, true), + ]))); assert_snapshot!(struct_type, @r#"Struct("name": non-null String, "age": Int32)"#); - let map = NativeType::Map(Arc::new(LogicalField::from( - &Field::new("entries", DataType::Utf8, false), - ))); + let map = NativeType::Map(Arc::new(LogicalField::from(&Field::new( + "entries", + DataType::Utf8, + false, + )))); assert_snapshot!(map, @"Map(non-null String)"); } } diff --git a/datafusion/expr-common/src/signature.rs b/datafusion/expr-common/src/signature.rs index feddfbef2ca9c..82759be9f75e8 100644 --- a/datafusion/expr-common/src/signature.rs +++ b/datafusion/expr-common/src/signature.rs @@ -1486,7 +1486,7 @@ impl Signature { #[cfg(test)] mod tests { use datafusion_common::types::{ - logical_float64, logical_int32, logical_int64, logical_string, NativeType, + NativeType, logical_float64, logical_int32, logical_int64, logical_string, }; use super::*; diff --git a/datafusion/expr-common/src/type_coercion/aggregates.rs b/datafusion/expr-common/src/type_coercion/aggregates.rs index 85f789c2f1454..df86ff582d658 100644 --- a/datafusion/expr-common/src/type_coercion/aggregates.rs +++ b/datafusion/expr-common/src/type_coercion/aggregates.rs @@ -100,9 +100,7 @@ pub fn check_arg_count( // Numeric and Coercible signature is validated in `get_valid_types` } _ => { - return internal_err!( - "Aggregate functions do not support this {signature}" - ); + return internal_err!("Aggregate functions do not support this {signature}"); } } Ok(()) diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs index e44e1b58b16a5..81a6fd393a989 100644 --- a/datafusion/expr/src/utils.rs +++ b/datafusion/expr/src/utils.rs @@ -1829,9 +1829,7 @@ mod tests { NativeType::Float64, ), Coercion::new_implicit( - TypeSignatureClass::Native( - datafusion_common::types::logical_int64(), - ), + TypeSignatureClass::Native(datafusion_common::types::logical_int64()), vec![TypeSignatureClass::Integer], NativeType::Int64, ), @@ -1865,9 +1863,7 @@ mod tests { datafusion_common::types::logical_int64(), )), Coercion::new_implicit( - TypeSignatureClass::Native( - datafusion_common::types::logical_int64(), - ), + TypeSignatureClass::Native(datafusion_common::types::logical_int64()), vec![TypeSignatureClass::Integer], NativeType::Int64, ), @@ -1881,11 +1877,7 @@ mod tests { ]) .expect("valid parameter names"); - let msg = generate_signature_error_message( - "substr", - &sig, - &[DataType::Int32], - ); + let msg = generate_signature_error_message("substr", &sig, &[DataType::Int32]); assert_snapshot!(msg, @r" No function matches the given name and argument types 'substr(Int32)'. You might need to add explicit type casts. Candidate functions: From 2cd0b7324c93bdb1e5bae673637e5157357123d5 Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Fri, 27 Feb 2026 19:32:41 +0100 Subject: [PATCH 06/10] A few more improvements --- datafusion/expr/src/expr_schema.rs | 2 +- .../expr/src/type_coercion/functions.rs | 2 +- datafusion/sql/tests/sql_integration.rs | 65 +++++++++---------- 3 files changed, 34 insertions(+), 35 deletions(-) diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index f4e4f014f533c..cc3d1876b398a 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -634,7 +634,7 @@ fn verify_function_arguments( .cloned() .collect::>(); plan_datafusion_err!( - "{} {}", + "{}. {}", match err { DataFusionError::Plan(msg) => msg, err => err.to_string(), diff --git a/datafusion/expr/src/type_coercion/functions.rs b/datafusion/expr/src/type_coercion/functions.rs index 45814ca419000..a79f2c66830bf 100644 --- a/datafusion/expr/src/type_coercion/functions.rs +++ b/datafusion/expr/src/type_coercion/functions.rs @@ -317,7 +317,7 @@ fn get_valid_types_with_udf( Ok(coerced_types) => vec![coerced_types], Err(e) => { return exec_err!( - "Function '{}' user-defined coercion failed with {}", + "Function '{}' user-defined coercion failed with: {}", func.name(), e.strip_backtrace() ); diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index 444bdae73ac26..b028587d46ef9 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -4730,56 +4730,55 @@ fn test_custom_type_plan() -> Result<()> { Ok(()) } -fn error_message_test(sql: &str, err_msg_starts_with: &str) { +fn error_message(sql: &str) -> String { let err = logical_plan(sql).expect_err("query should have failed"); - assert!( - err.strip_backtrace().starts_with(err_msg_starts_with), - "Expected error to start with '{}', but got: '{}'", - err_msg_starts_with, - err.strip_backtrace(), - ); + err.strip_backtrace() } #[test] fn test_error_message_invalid_scalar_function_signature() { - error_message_test( - "select sqrt()", - "Error during planning: 'sqrt' does not support zero arguments", - ); - error_message_test( - "select sqrt(1, 2)", - "Error during planning: Failed to coerce arguments", - ); + assert_snapshot!(error_message("select sqrt()"), @r" + Error during planning: 'sqrt' does not support zero arguments. No function matches the given name and argument types 'sqrt()'. You might need to add explicit type casts. + Candidate functions: + sqrt(Int64) + "); + assert_snapshot!(error_message("select sqrt(1, 2)"), @r" + Error during planning: Failed to coerce arguments to satisfy a call to 'sqrt' function: coercion from Int64, Int64 to the signature Exact(Int64) failed. No function matches the given name and argument types 'sqrt(Int64, Int64)'. You might need to add explicit type casts. + Candidate functions: + sqrt(Int64) + "); } #[test] fn test_error_message_invalid_aggregate_function_signature() { - error_message_test( - "select sum()", - "Error during planning: Execution error: Function 'sum' user-defined coercion failed with \"Execution error: sum function requires 1 argument, got 0\"", - ); - // We keep two different prefixes because they clarify each other. - // It might be incorrect, and we should consider keeping only one. - error_message_test( - "select max(9, 3)", - "Error during planning: Execution error: Function 'max' user-defined coercion failed", - ); + assert_snapshot!(error_message("select sum()"), @r" + Error during planning: Execution error: Function 'sum' user-defined coercion failed with: Execution error: sum function requires 1 argument, got 0. No function matches the given name and argument types 'sum()'. You might need to add explicit type casts. + Candidate functions: + sum(UserDefined) + "); + assert_snapshot!(error_message("select max(9, 3)"), @r" + Error during planning: Execution error: Function 'max' user-defined coercion failed with: Execution error: min/max was called with 2 arguments. It requires only 1.. No function matches the given name and argument types 'max(Int64, Int64)'. You might need to add explicit type casts. + Candidate functions: + max(UserDefined) + "); } #[test] fn test_error_message_invalid_window_function_signature() { - error_message_test( - "select rank(1) over()", - "Error during planning: The function 'rank' expected zero argument but received 1", - ); + assert_snapshot!(error_message("select rank(1) over()"), @r" + Error during planning: The function 'rank' expected zero argument but received 1. No function matches the given name and argument types 'rank(Int64)'. You might need to add explicit type casts. + Candidate functions: + rank(NullAry()) + "); } #[test] fn test_error_message_invalid_window_aggregate_function_signature() { - error_message_test( - "select sum() over()", - "Error during planning: Execution error: Function 'sum' user-defined coercion failed with \"Execution error: sum function requires 1 argument, got 0\"", - ); + assert_snapshot!(error_message("select sum() over()"), @r" + Error during planning: Execution error: Function 'sum' user-defined coercion failed with: Execution error: sum function requires 1 argument, got 0. No function matches the given name and argument types 'sum()'. You might need to add explicit type casts. + Candidate functions: + sum(UserDefined) + "); } // Test issue: https://github.com/apache/datafusion/issues/14058 From 1991966e9970e6e78c130818993f8fe287b00f94 Mon Sep 17 00:00:00 2001 From: Emil Ernerfeldt Date: Sat, 28 Feb 2026 13:57:21 +0100 Subject: [PATCH 07/10] Update another test --- datafusion/sqllogictest/test_files/binary.slt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/sqllogictest/test_files/binary.slt b/datafusion/sqllogictest/test_files/binary.slt index c4a21deeff26b..54ac51d9e780d 100644 --- a/datafusion/sqllogictest/test_files/binary.slt +++ b/datafusion/sqllogictest/test_files/binary.slt @@ -313,7 +313,7 @@ Bar Bar Bar Bar FooBar fooBar FooBar fooBar # show helpful error msg when Binary type is used with string functions -query error DataFusion error: Error during planning: Function 'split_part' requires TypeSignatureClass::Native\(LogicalType\(Native\(String\), String\)\), but received Binary \(DataType: Binary\)\.\n\nHint: Binary types are not automatically coerced to String\. Use CAST\(column AS VARCHAR\) to convert Binary data to String\. +query error DataFusion error: Error during planning: Function 'split_part' requires String, but received Binary \(DataType: Binary\)\.\n\nHint: Binary types are not automatically coerced to String\. Use CAST\(column AS VARCHAR\) to convert Binary data to String\. SELECT split_part(binary, '~', 2) FROM t WHERE binary IS NOT NULL LIMIT 1; # ensure the suggested CAST workaround works From aea6186f29f55738889106cab0e0f315db3afe18 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sat, 28 Feb 2026 10:28:06 -0500 Subject: [PATCH 08/10] Update sql tests for new error message format --- datafusion/sqllogictest/test_files/array.slt | 2 +- datafusion/sqllogictest/test_files/arrow_typeof.slt | 2 +- .../sqllogictest/test_files/datetime/timestamps.slt | 6 +++--- datafusion/sqllogictest/test_files/encoding.slt | 6 +++--- datafusion/sqllogictest/test_files/expr.slt | 2 +- datafusion/sqllogictest/test_files/functions.slt | 4 ++-- datafusion/sqllogictest/test_files/scalar.slt | 2 +- .../sqllogictest/test_files/spark/datetime/unix.slt | 8 ++++---- .../sqllogictest/test_files/spark/string/base64.slt | 4 ++-- datafusion/sqllogictest/test_files/window.slt | 4 ++-- 10 files changed, 20 insertions(+), 20 deletions(-) diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index 5113b9718c4e5..9de125a5d9ca0 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -3454,7 +3454,7 @@ select [1, 2, 3] List(Utf8View) # array_concat error -query error DataFusion error: Error during planning: Execution error: Function 'array_concat' user-defined coercion failed with "Error during planning: array_concat does not support type Int64" +query error DataFusion error: Error during planning: Execution error: Function 'array_concat' user-defined coercion failed with: Error during planning: array_concat does not support type Int64 select array_concat(1, 2); # array_concat scalar function #1 diff --git a/datafusion/sqllogictest/test_files/arrow_typeof.slt b/datafusion/sqllogictest/test_files/arrow_typeof.slt index 0c69e8591c3a4..e00909ad5fc59 100644 --- a/datafusion/sqllogictest/test_files/arrow_typeof.slt +++ b/datafusion/sqllogictest/test_files/arrow_typeof.slt @@ -95,7 +95,7 @@ SELECT arrow_cast('1', 'Int16') query error SELECT arrow_cast('1') -query error DataFusion error: Error during planning: Function 'arrow_cast' requires TypeSignatureClass::Native\(LogicalType\(Native\(String\), String\)\), but received Int64 \(DataType: Int64\) +query error DataFusion error: Error during planning: Function 'arrow_cast' requires String, but received Int64 \(DataType: Int64\) SELECT arrow_cast('1', 43) query error DataFusion error: Execution error: arrow_cast requires its second argument to be a non\-empty constant string diff --git a/datafusion/sqllogictest/test_files/datetime/timestamps.slt b/datafusion/sqllogictest/test_files/datetime/timestamps.slt index 9526ccebfd16e..78889230fde13 100644 --- a/datafusion/sqllogictest/test_files/datetime/timestamps.slt +++ b/datafusion/sqllogictest/test_files/datetime/timestamps.slt @@ -3076,7 +3076,7 @@ NULL query error DataFusion error: Error during planning: Function 'make_date' expects 3 arguments but received 1 select make_date(1); -query error DataFusion error: Error during planning: Function 'make_date' requires TypeSignatureClass::Native\(LogicalType\(Native\(Int32\), Int32\)\), but received Interval\(MonthDayNano\) \(DataType: Interval\(MonthDayNano\)\) +query error DataFusion error: Error during planning: Function 'make_date' requires Int32, but received Interval\(MonthDayNano\) \(DataType: Interval\(MonthDayNano\)\). select make_date(interval '1 day', '2001-05-21'::timestamp, '2001-05-21'::timestamp); ########## @@ -3349,7 +3349,7 @@ select make_time(22, '', 27); query error Cannot cast string '' to value of Int32 type select make_time(22, 1, ''); -query error DataFusion error: Error during planning: Function 'make_time' requires TypeSignatureClass::Native\(LogicalType\(Native\(Int32\), Int32\)\), but received Float64 \(DataType: Float64\) +query error DataFusion error: Error during planning: Function 'make_time' requires Int32, but received Float64 \(DataType: Float64\) select make_time(arrow_cast(22, 'Float64'), 1, ''); ########## @@ -3964,7 +3964,7 @@ statement error select to_local_time('2024-04-01T00:00:20Z'::timestamp, 'some string'); # invalid argument data type -statement error DataFusion error: Error during planning: Function 'to_local_time' requires TypeSignatureClass::Timestamp, but received String \(DataType: Utf8\) +statement error DataFusion error: Error during planning: Function 'to_local_time' requires Timestamp, but received String \(DataType: Utf8\) select to_local_time('2024-04-01T00:00:20Z'); # invalid timezone diff --git a/datafusion/sqllogictest/test_files/encoding.slt b/datafusion/sqllogictest/test_files/encoding.slt index b04d5061825b4..c68d59819ea63 100644 --- a/datafusion/sqllogictest/test_files/encoding.slt +++ b/datafusion/sqllogictest/test_files/encoding.slt @@ -75,10 +75,10 @@ CREATE TABLE test( ; # errors -query error DataFusion error: Error during planning: Function 'encode' requires TypeSignatureClass::Binary, but received Int64 \(DataType: Int64\) +query error DataFusion error: Error during planning: Function 'encode' requires Binary, but received Int64 \(DataType: Int64\) select encode(12, 'hex'); -query error DataFusion error: Error during planning: Function 'decode' requires TypeSignatureClass::Binary, but received Int64 \(DataType: Int64\) +query error DataFusion error: Error during planning: Function 'decode' requires Binary, but received Int64 \(DataType: Int64\) select decode(12, 'hex'); query error DataFusion error: Error during planning: There is no built\-in encoding named 'non_encoding', currently supported encodings are: base64, base64pad, hex @@ -93,7 +93,7 @@ select decode('', null) from test; query error DataFusion error: This feature is not implemented: Encoding must be a scalar; array specified encoding is not yet supported select decode('', hex_field) from test; -query error DataFusion error: Error during planning: Function 'to_hex' requires TypeSignatureClass::Integer, but received String \(DataType: Utf8View\) +query error DataFusion error: Error during planning: Function 'to_hex' requires Integer, but received String \(DataType: Utf8View\) select to_hex(hex_field) from test; query error DataFusion error: Execution error: Failed to decode value using base64 diff --git a/datafusion/sqllogictest/test_files/expr.slt b/datafusion/sqllogictest/test_files/expr.slt index 6d19d1436e1c8..a6341bc686f74 100644 --- a/datafusion/sqllogictest/test_files/expr.slt +++ b/datafusion/sqllogictest/test_files/expr.slt @@ -618,7 +618,7 @@ select repeat('-1.2', arrow_cast(3, 'Int32')); ---- -1.2-1.2-1.2 -query error DataFusion error: Error during planning: Function 'repeat' requires TypeSignatureClass::Native\(LogicalType\(Native\(Int64\), Int64\)\), but received Float64 \(DataType: Float64\) +query error DataFusion error: Error during planning: Function 'repeat' requires Int64, but received Float64 \(DataType: Float64\) select repeat('-1.2', 3.2); query T diff --git a/datafusion/sqllogictest/test_files/functions.slt b/datafusion/sqllogictest/test_files/functions.slt index 5a43d18e23879..fa852e98ac869 100644 --- a/datafusion/sqllogictest/test_files/functions.slt +++ b/datafusion/sqllogictest/test_files/functions.slt @@ -887,7 +887,7 @@ SELECT greatest(-1, 1, 2.3, 123456789, 3 + 5, -(-4), abs(-9.0)) 123456789 -query error Function 'greatest' user-defined coercion failed with "Error during planning: greatest was called without any arguments. It requires at least 1." +query error Function 'greatest' user-defined coercion failed with: Error during planning: greatest was called without any arguments. It requires at least 1. SELECT greatest() query I @@ -1085,7 +1085,7 @@ SELECT least(-1, 1, 2.3, 123456789, 3 + 5, -(-4), abs(-9.0)) -1 -query error Function 'least' user-defined coercion failed with "Error during planning: least was called without any arguments. It requires at least 1." +query error Function 'least' user-defined coercion failed with: Error during planning: least was called without any arguments. It requires at least 1. SELECT least() query I diff --git a/datafusion/sqllogictest/test_files/scalar.slt b/datafusion/sqllogictest/test_files/scalar.slt index 681540a29d379..e91ec1cb848ba 100644 --- a/datafusion/sqllogictest/test_files/scalar.slt +++ b/datafusion/sqllogictest/test_files/scalar.slt @@ -2137,7 +2137,7 @@ select position('' in '') ---- 1 -query error DataFusion error: Error during planning: Function 'strpos' requires TypeSignatureClass::Native\(LogicalType\(Native\(String\), String\)\), but received Int64 \(DataType: Int64\) +query error DataFusion error: Error during planning: Function 'strpos' requires String, but received Int64 \(DataType: Int64\). select position(1 in 1) query I diff --git a/datafusion/sqllogictest/test_files/spark/datetime/unix.slt b/datafusion/sqllogictest/test_files/spark/datetime/unix.slt index d7441f487d037..9dd39acd7f1de 100644 --- a/datafusion/sqllogictest/test_files/spark/datetime/unix.slt +++ b/datafusion/sqllogictest/test_files/spark/datetime/unix.slt @@ -38,7 +38,7 @@ SELECT unix_date(NULL::date); ---- NULL -query error Function 'unix_date' requires TypeSignatureClass::Native\(LogicalType\(Native\(Date\), Date\)\), but received String \(DataType: Utf8View\) +query error Function 'unix_date' requires Date, but received String \(DataType: Utf8View\) SELECT unix_date('1970-01-02'::string); # Unix Micro Tests @@ -68,7 +68,7 @@ SELECT unix_micros(NULL::timestamp); ---- NULL -query error Function 'unix_micros' requires TypeSignatureClass::Timestamp, but received String \(DataType: Utf8View\) +query error Function 'unix_micros' requires Timestamp, but received String \(DataType: Utf8View\) SELECT unix_micros('1970-01-01 00:00:01Z'::string); @@ -99,7 +99,7 @@ SELECT unix_millis(NULL::timestamp); ---- NULL -query error Function 'unix_millis' requires TypeSignatureClass::Timestamp, but received String \(DataType: Utf8View\) +query error Function 'unix_millis' requires Timestamp, but received String \(DataType: Utf8View\) SELECT unix_millis('1970-01-01 00:00:01Z'::string); @@ -130,5 +130,5 @@ SELECT unix_seconds(NULL::timestamp); ---- NULL -query error Function 'unix_seconds' requires TypeSignatureClass::Timestamp, but received String \(DataType: Utf8View\) +query error Function 'unix_seconds' requires Timestamp, but received String \(DataType: Utf8View\) SELECT unix_seconds('1970-01-01 00:00:01Z'::string); diff --git a/datafusion/sqllogictest/test_files/spark/string/base64.slt b/datafusion/sqllogictest/test_files/spark/string/base64.slt index 03b488de0ee9a..dbd266f65a132 100644 --- a/datafusion/sqllogictest/test_files/spark/string/base64.slt +++ b/datafusion/sqllogictest/test_files/spark/string/base64.slt @@ -58,7 +58,7 @@ U3BhcmsgU1E= U3BhcmsgUw== NULL -query error Function 'base64' requires TypeSignatureClass::Binary, but received Int32 \(DataType: Int32\) +query error Error during planning: Function 'base64' requires Binary, but received Int32 \(DataType: Int32\) SELECT base64(12::integer); @@ -111,5 +111,5 @@ SELECT unbase64('123'::string); query error Failed to decode value using base64 SELECT unbase64('123'::bytea); -query error Function 'unbase64' requires TypeSignatureClass::Binary, but received Int32 \(DataType: Int32\) +query error Error during planning: Function 'unbase64' requires Binary, but received Int32 \(DataType: Int32\) SELECT unbase64(12::integer); diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt index c3e6f39adbd68..62296c5d87f2b 100644 --- a/datafusion/sqllogictest/test_files/window.slt +++ b/datafusion/sqllogictest/test_files/window.slt @@ -2580,7 +2580,7 @@ statement ok set datafusion.optimizer.skip_failed_rules = true # Error is returned from the physical plan. -query error Cannot cast Utf8\("1 DAY"\) to Int8 +query error Cannot cast 1 DAY to Int8 SELECT COUNT(c1) OVER (ORDER BY c2 RANGE BETWEEN '1 DAY' PRECEDING AND '2 DAY' FOLLOWING) FROM aggregate_test_100; @@ -2589,7 +2589,7 @@ statement ok set datafusion.optimizer.skip_failed_rules = false # Error is returned from the logical plan. -query error Cannot cast Utf8\("1 DAY"\) to Int8 +query error Cannot cast 1 DAY to Int8 SELECT COUNT(c1) OVER (ORDER BY c2 RANGE BETWEEN '1 DAY' PRECEDING AND '2 DAY' FOLLOWING) FROM aggregate_test_100; From 013ababa1341699eeb81f7e4ee8effd26e0fa8c5 Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sat, 28 Feb 2026 14:25:54 -0500 Subject: [PATCH 09/10] Difference between ci and test profiles generates different results on the snapshots, so just check the first part of the error message --- datafusion/sql/tests/sql_integration.rs | 40 +++++++------------------ 1 file changed, 10 insertions(+), 30 deletions(-) diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index b028587d46ef9..9570336e995f2 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -4737,48 +4737,28 @@ fn error_message(sql: &str) -> String { #[test] fn test_error_message_invalid_scalar_function_signature() { - assert_snapshot!(error_message("select sqrt()"), @r" - Error during planning: 'sqrt' does not support zero arguments. No function matches the given name and argument types 'sqrt()'. You might need to add explicit type casts. - Candidate functions: - sqrt(Int64) - "); - assert_snapshot!(error_message("select sqrt(1, 2)"), @r" - Error during planning: Failed to coerce arguments to satisfy a call to 'sqrt' function: coercion from Int64, Int64 to the signature Exact(Int64) failed. No function matches the given name and argument types 'sqrt(Int64, Int64)'. You might need to add explicit type casts. - Candidate functions: - sqrt(Int64) - "); + assert!( + error_message("select sqrt()").starts_with( + r"Error during planning: 'sqrt' does not support zero arguments" + ) + ); + assert!(error_message("select sqrt(1, 2)").starts_with(r"Error during planning: Failed to coerce arguments to satisfy a call to 'sqrt' function: coercion from Int64, Int64 to the signature Exact(Int64) failed")); } #[test] fn test_error_message_invalid_aggregate_function_signature() { - assert_snapshot!(error_message("select sum()"), @r" - Error during planning: Execution error: Function 'sum' user-defined coercion failed with: Execution error: sum function requires 1 argument, got 0. No function matches the given name and argument types 'sum()'. You might need to add explicit type casts. - Candidate functions: - sum(UserDefined) - "); - assert_snapshot!(error_message("select max(9, 3)"), @r" - Error during planning: Execution error: Function 'max' user-defined coercion failed with: Execution error: min/max was called with 2 arguments. It requires only 1.. No function matches the given name and argument types 'max(Int64, Int64)'. You might need to add explicit type casts. - Candidate functions: - max(UserDefined) - "); + assert!(error_message("select sum()").starts_with(r"Error during planning: Execution error: Function 'sum' user-defined coercion failed with: Execution error: sum function requires 1 argument, got 0")); + assert!(error_message("select max(9, 3)").starts_with(r"Error during planning: Execution error: Function 'max' user-defined coercion failed with: Execution error: min/max was called with 2 arguments. It requires only 1")); } #[test] fn test_error_message_invalid_window_function_signature() { - assert_snapshot!(error_message("select rank(1) over()"), @r" - Error during planning: The function 'rank' expected zero argument but received 1. No function matches the given name and argument types 'rank(Int64)'. You might need to add explicit type casts. - Candidate functions: - rank(NullAry()) - "); + assert!(error_message("select rank(1) over()").starts_with(r"Error during planning: The function 'rank' expected zero argument but received 1")); } #[test] fn test_error_message_invalid_window_aggregate_function_signature() { - assert_snapshot!(error_message("select sum() over()"), @r" - Error during planning: Execution error: Function 'sum' user-defined coercion failed with: Execution error: sum function requires 1 argument, got 0. No function matches the given name and argument types 'sum()'. You might need to add explicit type casts. - Candidate functions: - sum(UserDefined) - "); + assert!(error_message("select sum() over()").starts_with(r"Error during planning: Execution error: Function 'sum' user-defined coercion failed with: Execution error: sum function requires 1 argument, got 0")); } // Test issue: https://github.com/apache/datafusion/issues/14058 From dc6910e248b3a1228181a182bc4f72e4e076f65d Mon Sep 17 00:00:00 2001 From: Tim Saucer Date: Sun, 1 Mar 2026 08:18:08 -0500 Subject: [PATCH 10/10] minor test update --- datafusion/common/src/types/logical.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/common/src/types/logical.rs b/datafusion/common/src/types/logical.rs index c4824a138e90f..0f886252d6452 100644 --- a/datafusion/common/src/types/logical.rs +++ b/datafusion/common/src/types/logical.rs @@ -171,10 +171,10 @@ mod tests { let struct_type: Arc = Arc::new(NativeType::Struct( LogicalFields::from(&Fields::from(vec![ Field::new("x", DataType::Float64, false), - Field::new("y", DataType::Float64, false), + Field::new("y", DataType::Float64, true), ])), )); - assert_snapshot!(struct_type, @r#"Struct("x": non-null Float64, "y": non-null Float64)"#); + assert_snapshot!(struct_type, @r#"Struct("x": non-null Float64, "y": Float64)"#); } #[test]