Skip to content

Commit 58bdb9f

Browse files
authored
fix: restore no-op logic in constants_map for NULL identity-partitioned columns (#1922)
## Which issue does this PR close? See #1824 (comment) and #1914 (comment) ## What changes are included in this PR? This restores the behavior in `record_batch_transformer.rs`'s `constants_map` function to pre-#1824 behavior where `NULL`s are not inserted into the constants map, and instead are just skipped. This allows the column projection rules for missing partition values to default to `NULL`. ## Are these changes tested? New test, and running the entire Iceberg Java suite via DataFusion Comet in apache/datafusion-comet#2729.
1 parent 2ed0a6f commit 58bdb9f

File tree

1 file changed

+73
-8
lines changed

1 file changed

+73
-8
lines changed

crates/iceberg/src/arrow/record_batch_transformer.rs

Lines changed: 73 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -83,14 +83,10 @@ fn constants_map(
8383
// Handle both None (null) and Some(Literal::Primitive) cases
8484
match &partition_data[pos] {
8585
None => {
86-
// TODO (https://github.com/apache/iceberg-rust/issues/1914): Add support for null datum values.
87-
return Err(Error::new(
88-
ErrorKind::Unexpected,
89-
format!(
90-
"Partition field {} has null value for identity transform",
91-
field.source_id
92-
),
93-
));
86+
// Skip null partition values - they will be resolved as null per Iceberg spec rule #4.
87+
// When a partition value is null, we don't add it to the constants map,
88+
// allowing downstream column resolution to handle it correctly.
89+
continue;
9490
}
9591
Some(Literal::Primitive(value)) => {
9692
// Create a Datum from the primitive type and value
@@ -1610,4 +1606,73 @@ mod test {
16101606
assert_eq!(get_string_value(result.column(4).as_ref(), 0), "");
16111607
assert_eq!(get_string_value(result.column(4).as_ref(), 1), "");
16121608
}
1609+
1610+
/// Test handling of null values in identity-partitioned columns.
1611+
///
1612+
/// Reproduces TestPartitionValues.testNullPartitionValue() from iceberg-java, which
1613+
/// writes records where the partition column has null values. Before the fix in #1922,
1614+
/// this would error with "Partition field X has null value for identity transform".
1615+
#[test]
1616+
fn null_identity_partition_value() {
1617+
use crate::spec::{Struct, Transform};
1618+
1619+
let schema = Arc::new(
1620+
Schema::builder()
1621+
.with_schema_id(0)
1622+
.with_fields(vec![
1623+
NestedField::optional(1, "id", Type::Primitive(PrimitiveType::Int)).into(),
1624+
NestedField::optional(2, "data", Type::Primitive(PrimitiveType::String)).into(),
1625+
])
1626+
.build()
1627+
.unwrap(),
1628+
);
1629+
1630+
let partition_spec = Arc::new(
1631+
crate::spec::PartitionSpec::builder(schema.clone())
1632+
.with_spec_id(0)
1633+
.add_partition_field("data", "data", Transform::Identity)
1634+
.unwrap()
1635+
.build()
1636+
.unwrap(),
1637+
);
1638+
1639+
// Partition has null value for the data column
1640+
let partition_data = Struct::from_iter(vec![None]);
1641+
1642+
let file_schema = Arc::new(ArrowSchema::new(vec![simple_field(
1643+
"id",
1644+
DataType::Int32,
1645+
true,
1646+
"1",
1647+
)]));
1648+
1649+
let projected_field_ids = [1, 2];
1650+
1651+
let mut transformer = RecordBatchTransformerBuilder::new(schema, &projected_field_ids)
1652+
.with_partition(partition_spec, partition_data)
1653+
.expect("Should handle null partition values")
1654+
.build();
1655+
1656+
let file_batch =
1657+
RecordBatch::try_new(file_schema, vec![Arc::new(Int32Array::from(vec![1, 2, 3]))])
1658+
.unwrap();
1659+
1660+
let result = transformer.process_record_batch(file_batch).unwrap();
1661+
1662+
assert_eq!(result.num_columns(), 2);
1663+
assert_eq!(result.num_rows(), 3);
1664+
1665+
let id_col = result
1666+
.column(0)
1667+
.as_any()
1668+
.downcast_ref::<Int32Array>()
1669+
.unwrap();
1670+
assert_eq!(id_col.values(), &[1, 2, 3]);
1671+
1672+
// Partition column with null value should produce nulls
1673+
let data_col = result.column(1);
1674+
assert!(data_col.is_null(0));
1675+
assert!(data_col.is_null(1));
1676+
assert!(data_col.is_null(2));
1677+
}
16131678
}

0 commit comments

Comments
 (0)