Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions datafusion-examples/examples/relation_planner/table_sample.rs
Original file line number Diff line number Diff line change
Expand Up @@ -727,8 +727,8 @@ impl ExecutionPlan for SampleExec {
Some(self.metrics.clone_inner())
}

fn partition_statistics(&self, partition: Option<usize>) -> Result<Statistics> {
let mut stats = self.input.partition_statistics(partition)?;
fn partition_statistics(&self, partition: Option<usize>) -> Result<Arc<Statistics>> {
let mut stats = Arc::unwrap_or_clone(self.input.partition_statistics(partition)?);
let ratio = self.upper_bound - self.lower_bound;

// Scale statistics by sampling ratio (inexact due to randomness)
Expand All @@ -741,7 +741,7 @@ impl ExecutionPlan for SampleExec {
.map(|n| (n as f64 * ratio) as usize)
.to_inexact();

Ok(stats)
Ok(Arc::new(stats))
}
}

Expand Down
8 changes: 4 additions & 4 deletions datafusion/core/tests/custom_sources_cases/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -180,12 +180,12 @@ impl ExecutionPlan for CustomExecutionPlan {
Ok(Box::pin(TestCustomRecordBatchStream { nb_batch: 1 }))
}

fn partition_statistics(&self, partition: Option<usize>) -> Result<Statistics> {
fn partition_statistics(&self, partition: Option<usize>) -> Result<Arc<Statistics>> {
if partition.is_some() {
return Ok(Statistics::new_unknown(&self.schema()));
return Ok(Arc::new(Statistics::new_unknown(&self.schema())));
}
let batch = TEST_CUSTOM_RECORD_BATCH!().unwrap();
Ok(Statistics {
Ok(Arc::new(Statistics {
num_rows: Precision::Exact(batch.num_rows()),
total_byte_size: Precision::Absent,
column_statistics: self
Expand All @@ -204,7 +204,7 @@ impl ExecutionPlan for CustomExecutionPlan {
..Default::default()
})
.collect(),
})
}))
}
}

Expand Down
14 changes: 7 additions & 7 deletions datafusion/core/tests/custom_sources_cases/statistics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -181,11 +181,11 @@ impl ExecutionPlan for StatisticsValidation {
unimplemented!("This plan only serves for testing statistics")
}

fn partition_statistics(&self, partition: Option<usize>) -> Result<Statistics> {
fn partition_statistics(&self, partition: Option<usize>) -> Result<Arc<Statistics>> {
if partition.is_some() {
Ok(Statistics::new_unknown(&self.schema))
Ok(Arc::new(Statistics::new_unknown(&self.schema)))
} else {
Ok(self.stats.clone())
Ok(Arc::new(self.stats.clone()))
}
}
}
Expand Down Expand Up @@ -238,7 +238,7 @@ async fn sql_basic() -> Result<()> {
let physical_plan = df.create_physical_plan().await.unwrap();

// the statistics should be those of the source
assert_eq!(stats, physical_plan.partition_statistics(None)?);
assert_eq!(stats, *physical_plan.partition_statistics(None)?);

Ok(())
}
Expand Down Expand Up @@ -278,7 +278,7 @@ async fn sql_limit() -> Result<()> {
.collect(),
total_byte_size: Precision::Absent
},
physical_plan.partition_statistics(None)?
*physical_plan.partition_statistics(None)?
);

let df = ctx
Expand All @@ -287,7 +287,7 @@ async fn sql_limit() -> Result<()> {
.unwrap();
let physical_plan = df.create_physical_plan().await.unwrap();
// when the limit is larger than the original number of lines, statistics remain unchanged
assert_eq!(stats, physical_plan.partition_statistics(None)?);
assert_eq!(stats, *physical_plan.partition_statistics(None)?);

Ok(())
}
Expand All @@ -307,7 +307,7 @@ async fn sql_window() -> Result<()> {
let result = physical_plan.partition_statistics(None)?;

assert_eq!(stats.num_rows, result.num_rows);
let col_stats = result.column_statistics;
let col_stats = &result.column_statistics;
assert_eq!(2, col_stats.len());
assert_eq!(stats.column_statistics[1], col_stats[0]);

Expand Down
6 changes: 3 additions & 3 deletions datafusion/core/tests/physical_optimizer/join_selection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1176,12 +1176,12 @@ impl ExecutionPlan for StatisticsExec {
unimplemented!("This plan only serves for testing statistics")
}

fn partition_statistics(&self, partition: Option<usize>) -> Result<Statistics> {
Ok(if partition.is_some() {
fn partition_statistics(&self, partition: Option<usize>) -> Result<Arc<Statistics>> {
Ok(Arc::new(if partition.is_some() {
Statistics::new_unknown(&self.schema)
} else {
self.stats.clone()
})
}))
}
}

Expand Down
88 changes: 44 additions & 44 deletions datafusion/core/tests/physical_optimizer/partition_statistics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -255,8 +255,8 @@ mod test {
);
// Check the statistics of each partition
assert_eq!(statistics.len(), 2);
assert_eq!(statistics[0], expected_statistic_partition_1);
assert_eq!(statistics[1], expected_statistic_partition_2);
assert_eq!(*statistics[0], expected_statistic_partition_1);
assert_eq!(*statistics[1], expected_statistic_partition_2);

// Check the statistics_by_partition with real results
let expected_stats = vec![
Expand Down Expand Up @@ -288,8 +288,8 @@ mod test {
create_partition_statistics(2, 8, 1, 2, None);
// Check the statistics of each partition
assert_eq!(statistics.len(), 2);
assert_eq!(statistics[0], expected_statistic_partition_1);
assert_eq!(statistics[1], expected_statistic_partition_2);
assert_eq!(*statistics[0], expected_statistic_partition_1);
assert_eq!(*statistics[1], expected_statistic_partition_2);

// Check the statistics_by_partition with real results
let expected_stats = vec![
Expand Down Expand Up @@ -322,7 +322,7 @@ mod test {
Some((DATE_2025_03_01, DATE_2025_03_04)),
);
assert_eq!(statistics.len(), 1);
assert_eq!(statistics[0], expected_statistic_partition);
assert_eq!(*statistics[0], expected_statistic_partition);
// Check the statistics_by_partition with real results
let expected_stats = vec![ExpectedStatistics::NonEmpty(1, 4, 4)];
validate_statistics_with_data(sort_exec.clone(), expected_stats, 0).await?;
Expand Down Expand Up @@ -353,8 +353,8 @@ mod test {
.map(|idx| sort_exec.partition_statistics(Some(idx)))
.collect::<Result<Vec<_>>>()?;
assert_eq!(statistics.len(), 2);
assert_eq!(statistics[0], expected_statistic_partition_1);
assert_eq!(statistics[1], expected_statistic_partition_2);
assert_eq!(*statistics[0], expected_statistic_partition_1);
assert_eq!(*statistics[1], expected_statistic_partition_2);

// Check the statistics_by_partition with real results
let expected_stats = vec![
Expand Down Expand Up @@ -402,7 +402,7 @@ mod test {
},
],
};
assert_eq!(full_statistics, expected_full_statistic);
assert_eq!(*full_statistics, expected_full_statistic);

let statistics = (0..filter.output_partitioning().partition_count())
.map(|idx| filter.partition_statistics(Some(idx)))
Expand Down Expand Up @@ -431,8 +431,8 @@ mod test {
},
],
};
assert_eq!(statistics[0], expected_partition_statistic);
assert_eq!(statistics[1], expected_partition_statistic);
assert_eq!(*statistics[0], expected_partition_statistic);
assert_eq!(*statistics[1], expected_partition_statistic);
Ok(())
}

Expand Down Expand Up @@ -463,13 +463,13 @@ mod test {
Some((DATE_2025_03_03, DATE_2025_03_04)),
);
// Verify first partition (from first scan)
assert_eq!(statistics[0], expected_statistic_partition_1);
assert_eq!(*statistics[0], expected_statistic_partition_1);
// Verify second partition (from first scan)
assert_eq!(statistics[1], expected_statistic_partition_2);
assert_eq!(*statistics[1], expected_statistic_partition_2);
// Verify third partition (from second scan - same as first partition)
assert_eq!(statistics[2], expected_statistic_partition_1);
assert_eq!(*statistics[2], expected_statistic_partition_1);
// Verify fourth partition (from second scan - same as second partition)
assert_eq!(statistics[3], expected_statistic_partition_2);
assert_eq!(*statistics[3], expected_statistic_partition_2);

// Check the statistics_by_partition with real results
let expected_stats = vec![
Expand Down Expand Up @@ -518,8 +518,8 @@ mod test {
ColumnStatistics::new_unknown(),
],
};
assert_eq!(stats[0], expected_stats);
assert_eq!(stats[1], expected_stats);
assert_eq!(*stats[0], expected_stats);
assert_eq!(*stats[1], expected_stats);

// Verify the execution results
let partitions = execute_stream_partitioned(
Expand Down Expand Up @@ -625,8 +625,8 @@ mod test {
},
],
};
assert_eq!(statistics[0], expected_statistic_partition_1);
assert_eq!(statistics[1], expected_statistic_partition_2);
assert_eq!(*statistics[0], expected_statistic_partition_1);
assert_eq!(*statistics[1], expected_statistic_partition_2);

// Check the statistics_by_partition with real results
let expected_stats = vec![
Expand Down Expand Up @@ -670,7 +670,7 @@ mod test {
);
expected_full_statistics.num_rows = Precision::Inexact(4);
expected_full_statistics.total_byte_size = Precision::Absent;
assert_eq!(full_statistics, expected_full_statistics);
assert_eq!(*full_statistics, expected_full_statistics);

// Test partition_statistics(Some(idx)) - returns partition-specific statistics
// Partition 1: ids [3,4], dates [2025-03-01, 2025-03-02]
Expand Down Expand Up @@ -699,8 +699,8 @@ mod test {
.map(|idx| nested_loop_join.partition_statistics(Some(idx)))
.collect::<Result<Vec<_>>>()?;
assert_eq!(statistics.len(), 2);
assert_eq!(statistics[0], expected_statistic_partition_1);
assert_eq!(statistics[1], expected_statistic_partition_2);
assert_eq!(*statistics[0], expected_statistic_partition_1);
assert_eq!(*statistics[1], expected_statistic_partition_2);

// Check the statistics_by_partition with real results
let expected_stats = vec![
Expand Down Expand Up @@ -729,7 +729,7 @@ mod test {
.map(|idx| coalesce_partitions.partition_statistics(Some(idx)))
.collect::<Result<Vec<_>>>()?;
assert_eq!(statistics.len(), 1);
assert_eq!(statistics[0], expected_statistic_partition);
assert_eq!(*statistics[0], expected_statistic_partition);

// Check the statistics_by_partition with real results
let expected_stats = vec![ExpectedStatistics::NonEmpty(1, 4, 4)];
Expand All @@ -746,20 +746,20 @@ mod test {
.map(|idx| local_limit.partition_statistics(Some(idx)))
.collect::<Result<Vec<_>>>()?;
assert_eq!(statistics.len(), 2);
let mut expected_0 = statistics[0].clone();
let mut expected_0 = Statistics::clone(&statistics[0]);
expected_0.column_statistics = expected_0
.column_statistics
.into_iter()
.map(|c| c.to_inexact())
.collect();
let mut expected_1 = statistics[1].clone();
let mut expected_1 = Statistics::clone(&statistics[1]);
expected_1.column_statistics = expected_1
.column_statistics
.into_iter()
.map(|c| c.to_inexact())
.collect();
assert_eq!(statistics[0], expected_0);
assert_eq!(statistics[1], expected_1);
assert_eq!(*statistics[0], expected_0);
assert_eq!(*statistics[1], expected_1);
Ok(())
}

Expand All @@ -781,7 +781,7 @@ mod test {
4,
Some((DATE_2025_03_01, DATE_2025_03_02)),
);
assert_eq!(statistics[0], expected_statistic_partition);
assert_eq!(*statistics[0], expected_statistic_partition);
Ok(())
}

Expand Down Expand Up @@ -849,7 +849,7 @@ mod test {
],
};

assert_eq!(&p0_statistics, &expected_p0_statistics);
assert_eq!(*p0_statistics, expected_p0_statistics);

let expected_p1_statistics = Statistics {
num_rows: Precision::Inexact(2),
Expand All @@ -869,7 +869,7 @@ mod test {
};

let p1_statistics = aggregate_exec_partial.partition_statistics(Some(1))?;
assert_eq!(&p1_statistics, &expected_p1_statistics);
assert_eq!(*p1_statistics, expected_p1_statistics);

validate_statistics_with_data(
aggregate_exec_partial.clone(),
Expand All @@ -891,10 +891,10 @@ mod test {
)?);

let p0_statistics = agg_final.partition_statistics(Some(0))?;
assert_eq!(&p0_statistics, &expected_p0_statistics);
assert_eq!(*p0_statistics, expected_p0_statistics);

let p1_statistics = agg_final.partition_statistics(Some(1))?;
assert_eq!(&p1_statistics, &expected_p1_statistics);
assert_eq!(*p1_statistics, expected_p1_statistics);

validate_statistics_with_data(
agg_final.clone(),
Expand Down Expand Up @@ -935,8 +935,8 @@ mod test {
],
};

assert_eq!(&empty_stat, &agg_partial.partition_statistics(Some(0))?);
assert_eq!(&empty_stat, &agg_partial.partition_statistics(Some(1))?);
assert_eq!(empty_stat, *agg_partial.partition_statistics(Some(0))?);
assert_eq!(empty_stat, *agg_partial.partition_statistics(Some(1))?);
validate_statistics_with_data(
agg_partial.clone(),
vec![ExpectedStatistics::Empty, ExpectedStatistics::Empty],
Expand All @@ -962,8 +962,8 @@ mod test {
agg_partial.schema(),
)?);

assert_eq!(&empty_stat, &agg_final.partition_statistics(Some(0))?);
assert_eq!(&empty_stat, &agg_final.partition_statistics(Some(1))?);
assert_eq!(empty_stat, *agg_final.partition_statistics(Some(0))?);
assert_eq!(empty_stat, *agg_final.partition_statistics(Some(1))?);

validate_statistics_with_data(
agg_final,
Expand Down Expand Up @@ -999,7 +999,7 @@ mod test {
column_statistics: vec![ColumnStatistics::new_unknown()],
};

assert_eq!(&expect_stat, &agg_final.partition_statistics(Some(0))?);
assert_eq!(expect_stat, *agg_final.partition_statistics(Some(0))?);

// Verify that the aggregate final result has exactly one partition with one row
let mut partitions = execute_stream_partitioned(
Expand Down Expand Up @@ -1033,13 +1033,13 @@ mod test {
&schema,
None,
);
assert_eq!(actual, expected);
assert_eq!(*actual, expected);
all_batches.push(batches);
}

let actual = plan.partition_statistics(None)?;
let expected = compute_record_batch_statistics(&all_batches, &schema, None);
assert_eq!(actual, expected);
assert_eq!(*actual, expected);

Ok(())
}
Expand Down Expand Up @@ -1070,7 +1070,7 @@ mod test {

// All partitions should have the same statistics
for stat in statistics.iter() {
assert_eq!(stat, &expected_stats);
assert_eq!(**stat, expected_stats);
}

// Verify that the result has exactly 3 partitions
Expand Down Expand Up @@ -1135,7 +1135,7 @@ mod test {
)?);

let result = repartition.partition_statistics(Some(0))?;
assert_eq!(result, Statistics::new_unknown(&scan_schema));
assert_eq!(*result, Statistics::new_unknown(&scan_schema));

// Verify that the result has exactly 0 partitions
let partitions = execute_stream_partitioned(
Expand Down Expand Up @@ -1174,8 +1174,8 @@ mod test {
ColumnStatistics::new_unknown(),
],
};
assert_eq!(stats[0], expected_stats);
assert_eq!(stats[1], expected_stats);
assert_eq!(*stats[0], expected_stats);
assert_eq!(*stats[1], expected_stats);

// Verify the repartition execution results
let partitions =
Expand Down Expand Up @@ -1282,8 +1282,8 @@ mod test {
],
};

assert_eq!(statistics[0], expected_statistic_partition_1);
assert_eq!(statistics[1], expected_statistic_partition_2);
assert_eq!(*statistics[0], expected_statistic_partition_1);
assert_eq!(*statistics[1], expected_statistic_partition_2);

// Verify the statistics match actual execution results
let expected_stats = vec![
Expand Down
Loading
Loading