From e6a6a0c5994e37796111f14f16dc703c90166e07 Mon Sep 17 00:00:00 2001 From: Simon Vandel Sillesen Date: Sat, 10 Jan 2026 17:40:09 +0100 Subject: [PATCH 1/2] add failing case --- .../sqllogictest/test_files/simplify_expr.slt | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/datafusion/sqllogictest/test_files/simplify_expr.slt b/datafusion/sqllogictest/test_files/simplify_expr.slt index d8c25ab25e8ea..7279df86edf70 100644 --- a/datafusion/sqllogictest/test_files/simplify_expr.slt +++ b/datafusion/sqllogictest/test_files/simplify_expr.slt @@ -113,3 +113,21 @@ logical_plan physical_plan 01)ProjectionExec: expr=[[{x:100}] as a] 02)--PlaceholderRowExec + +# Simplify cases where the end expressions are the same to that expression +query TT +EXPLAIN SELECT + CASE v when 100 then 1 else 1 end as opt1, + CASE v when 200 then 2 when 201 then 2 else 2 end as opt2, + CASE v when 300 then 3 when 301 then 3 else 4 end as noopt1, + CASE v when 400 then 4 when 401 then 4 end as noopt2 +FROM (VALUES (0), (1), (2)) t(v) +---- +logical_plan +01)Projection: CASE t.v WHEN Int64(100) THEN Int64(1) ELSE Int64(1) END AS opt1, CASE t.v WHEN Int64(200) THEN Int64(2) WHEN Int64(201) THEN Int64(2) ELSE Int64(2) END AS opt2, CASE t.v WHEN Int64(300) THEN Int64(3) WHEN Int64(301) THEN Int64(3) ELSE Int64(4) END AS noopt1, CASE t.v WHEN Int64(400) THEN Int64(4) WHEN Int64(401) THEN Int64(4) END AS noopt2 +02)--SubqueryAlias: t +03)----Projection: column1 AS v +04)------Values: (Int64(0)), (Int64(1)), (Int64(2)) +physical_plan +01)ProjectionExec: expr=[CASE column1@0 WHEN 100 THEN 1 ELSE 1 END as opt1, CASE column1@0 WHEN 200 THEN 2 WHEN 201 THEN 2 ELSE 2 END as opt2, CASE column1@0 WHEN 300 THEN 3 WHEN 301 THEN 3 ELSE 4 END as noopt1, CASE column1@0 WHEN 400 THEN 4 WHEN 401 THEN 4 END as noopt2] +02)--DataSourceExec: partitions=1, partition_sizes=[1] From 2d70d8e9221a989f0951dff825ac709f99c3a615 Mon Sep 17 00:00:00 2001 From: Simon Vandel Sillesen Date: Sat, 10 Jan 2026 17:52:11 +0100 Subject: [PATCH 2/2] add optimization --- .../src/simplify_expressions/expr_simplifier.rs | 15 +++++++++++++++ datafusion/sqllogictest/test_files/cse.slt | 12 ++++++------ .../sqllogictest/test_files/simplify_expr.slt | 4 ++-- 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index 55bff5849c5cb..033d2032b45dd 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -1466,6 +1466,21 @@ impl TreeNodeRewriter for Simplifier<'_> { })) } + // CASE [expr] WHEN ... THEN A WHEN ... THEN A ... ELSE A END --> A + // All branches (THEN expressions and ELSE) must be identical. + // ELSE clause must be present (otherwise unmatched cases return NULL). + Expr::Case(Case { + expr: _, + when_then_expr, + else_expr: Some(else_expr), + }) if !when_then_expr.is_empty() + && when_then_expr + .iter() + .all(|(_, then_expr)| then_expr.as_ref() == else_expr.as_ref()) => + { + Transformed::yes(*else_expr) + } + // CASE // WHEN X THEN A // WHEN Y THEN B diff --git a/datafusion/sqllogictest/test_files/cse.slt b/datafusion/sqllogictest/test_files/cse.slt index 1af4f14c937e7..f0c2ae31ac810 100644 --- a/datafusion/sqllogictest/test_files/cse.slt +++ b/datafusion/sqllogictest/test_files/cse.slt @@ -188,12 +188,12 @@ EXPLAIN SELECT FROM t1 ---- logical_plan -01)Projection: (__common_expr_1 OR random() = Float64(0)) AND __common_expr_2 AS c1, __common_expr_2 AND random() = Float64(0) OR __common_expr_1 AS c2, CASE WHEN __common_expr_3 = Float64(0) THEN __common_expr_3 ELSE Float64(0) END AS c3, CASE WHEN __common_expr_4 = Float64(0) THEN Int64(0) WHEN CAST(__common_expr_4 AS Boolean) THEN Int64(0) ELSE Int64(0) END AS c4, CASE WHEN __common_expr_5 = Float64(0) THEN Float64(0) WHEN random() = Float64(0) THEN __common_expr_5 ELSE Float64(0) END AS c5, CASE WHEN __common_expr_6 = Float64(0) THEN Float64(0) ELSE __common_expr_6 END AS c6 -02)--Projection: t1.a = Float64(1) AS __common_expr_1, t1.a = Float64(2) AS __common_expr_2, t1.a + Float64(3) AS __common_expr_3, t1.a + Float64(4) AS __common_expr_4, t1.a + Float64(5) AS __common_expr_5, t1.a + Float64(6) AS __common_expr_6 +01)Projection: (__common_expr_1 OR random() = Float64(0)) AND __common_expr_2 AS c1, __common_expr_2 AND random() = Float64(0) OR __common_expr_1 AS c2, CASE WHEN __common_expr_3 = Float64(0) THEN __common_expr_3 ELSE Float64(0) END AS c3, Int64(0) AS c4, CASE WHEN __common_expr_4 = Float64(0) THEN Float64(0) WHEN random() = Float64(0) THEN __common_expr_4 ELSE Float64(0) END AS c5, CASE WHEN __common_expr_5 = Float64(0) THEN Float64(0) ELSE __common_expr_5 END AS c6 +02)--Projection: t1.a = Float64(1) AS __common_expr_1, t1.a = Float64(2) AS __common_expr_2, t1.a + Float64(3) AS __common_expr_3, t1.a + Float64(5) AS __common_expr_4, t1.a + Float64(6) AS __common_expr_5 03)----TableScan: t1 projection=[a] physical_plan -01)ProjectionExec: expr=[(__common_expr_1@0 OR random() = 0) AND __common_expr_2@1 as c1, __common_expr_2@1 AND random() = 0 OR __common_expr_1@0 as c2, CASE WHEN __common_expr_3@2 = 0 THEN __common_expr_3@2 ELSE 0 END as c3, CASE WHEN __common_expr_4@3 = 0 THEN 0 WHEN CAST(__common_expr_4@3 AS Boolean) THEN 0 ELSE 0 END as c4, CASE WHEN __common_expr_5@4 = 0 THEN 0 WHEN random() = 0 THEN __common_expr_5@4 ELSE 0 END as c5, CASE WHEN __common_expr_6@5 = 0 THEN 0 ELSE __common_expr_6@5 END as c6] -02)--ProjectionExec: expr=[a@0 = 1 as __common_expr_1, a@0 = 2 as __common_expr_2, a@0 + 3 as __common_expr_3, a@0 + 4 as __common_expr_4, a@0 + 5 as __common_expr_5, a@0 + 6 as __common_expr_6] +01)ProjectionExec: expr=[(__common_expr_1@0 OR random() = 0) AND __common_expr_2@1 as c1, __common_expr_2@1 AND random() = 0 OR __common_expr_1@0 as c2, CASE WHEN __common_expr_3@2 = 0 THEN __common_expr_3@2 ELSE 0 END as c3, 0 as c4, CASE WHEN __common_expr_4@3 = 0 THEN 0 WHEN random() = 0 THEN __common_expr_4@3 ELSE 0 END as c5, CASE WHEN __common_expr_5@4 = 0 THEN 0 ELSE __common_expr_5@4 END as c6] +02)--ProjectionExec: expr=[a@0 = 1 as __common_expr_1, a@0 = 2 as __common_expr_2, a@0 + 3 as __common_expr_3, a@0 + 5 as __common_expr_4, a@0 + 6 as __common_expr_5] 03)----DataSourceExec: partitions=1, partition_sizes=[0] # Surely only once but also conditionally evaluated subexpressions @@ -226,8 +226,8 @@ EXPLAIN SELECT FROM t1 ---- logical_plan -01)Projection: (random() = Float64(0) OR t1.a = Float64(1)) AND t1.a = Float64(2) AS c1, random() = Float64(0) AND t1.a = Float64(2) OR t1.a = Float64(1) AS c2, CASE WHEN random() = Float64(0) THEN t1.a + Float64(3) ELSE t1.a + Float64(3) END AS c3, CASE WHEN random() = Float64(0) THEN Float64(0) WHEN t1.a + Float64(4) = Float64(0) THEN t1.a + Float64(4) ELSE Float64(0) END AS c4, CASE WHEN random() = Float64(0) THEN Float64(0) WHEN t1.a + Float64(5) = Float64(0) THEN Float64(0) ELSE t1.a + Float64(5) END AS c5, CASE WHEN random() = Float64(0) THEN Float64(0) WHEN random() = Float64(0) THEN t1.a + Float64(6) ELSE t1.a + Float64(6) END AS c6 +01)Projection: (random() = Float64(0) OR t1.a = Float64(1)) AND t1.a = Float64(2) AS c1, random() = Float64(0) AND t1.a = Float64(2) OR t1.a = Float64(1) AS c2, t1.a + Float64(3) AS c3, CASE WHEN random() = Float64(0) THEN Float64(0) WHEN t1.a + Float64(4) = Float64(0) THEN t1.a + Float64(4) ELSE Float64(0) END AS c4, CASE WHEN random() = Float64(0) THEN Float64(0) WHEN t1.a + Float64(5) = Float64(0) THEN Float64(0) ELSE t1.a + Float64(5) END AS c5, CASE WHEN random() = Float64(0) THEN Float64(0) WHEN random() = Float64(0) THEN t1.a + Float64(6) ELSE t1.a + Float64(6) END AS c6 02)--TableScan: t1 projection=[a] physical_plan -01)ProjectionExec: expr=[(random() = 0 OR a@0 = 1) AND a@0 = 2 as c1, random() = 0 AND a@0 = 2 OR a@0 = 1 as c2, CASE WHEN random() = 0 THEN a@0 + 3 ELSE a@0 + 3 END as c3, CASE WHEN random() = 0 THEN 0 WHEN a@0 + 4 = 0 THEN a@0 + 4 ELSE 0 END as c4, CASE WHEN random() = 0 THEN 0 WHEN a@0 + 5 = 0 THEN 0 ELSE a@0 + 5 END as c5, CASE WHEN random() = 0 THEN 0 WHEN random() = 0 THEN a@0 + 6 ELSE a@0 + 6 END as c6] +01)ProjectionExec: expr=[(random() = 0 OR a@0 = 1) AND a@0 = 2 as c1, random() = 0 AND a@0 = 2 OR a@0 = 1 as c2, a@0 + 3 as c3, CASE WHEN random() = 0 THEN 0 WHEN a@0 + 4 = 0 THEN a@0 + 4 ELSE 0 END as c4, CASE WHEN random() = 0 THEN 0 WHEN a@0 + 5 = 0 THEN 0 ELSE a@0 + 5 END as c5, CASE WHEN random() = 0 THEN 0 WHEN random() = 0 THEN a@0 + 6 ELSE a@0 + 6 END as c6] 02)--DataSourceExec: partitions=1, partition_sizes=[0] diff --git a/datafusion/sqllogictest/test_files/simplify_expr.slt b/datafusion/sqllogictest/test_files/simplify_expr.slt index 7279df86edf70..8d857a5332bfb 100644 --- a/datafusion/sqllogictest/test_files/simplify_expr.slt +++ b/datafusion/sqllogictest/test_files/simplify_expr.slt @@ -124,10 +124,10 @@ EXPLAIN SELECT FROM (VALUES (0), (1), (2)) t(v) ---- logical_plan -01)Projection: CASE t.v WHEN Int64(100) THEN Int64(1) ELSE Int64(1) END AS opt1, CASE t.v WHEN Int64(200) THEN Int64(2) WHEN Int64(201) THEN Int64(2) ELSE Int64(2) END AS opt2, CASE t.v WHEN Int64(300) THEN Int64(3) WHEN Int64(301) THEN Int64(3) ELSE Int64(4) END AS noopt1, CASE t.v WHEN Int64(400) THEN Int64(4) WHEN Int64(401) THEN Int64(4) END AS noopt2 +01)Projection: Int64(1) AS opt1, Int64(2) AS opt2, CASE t.v WHEN Int64(300) THEN Int64(3) WHEN Int64(301) THEN Int64(3) ELSE Int64(4) END AS noopt1, CASE t.v WHEN Int64(400) THEN Int64(4) WHEN Int64(401) THEN Int64(4) END AS noopt2 02)--SubqueryAlias: t 03)----Projection: column1 AS v 04)------Values: (Int64(0)), (Int64(1)), (Int64(2)) physical_plan -01)ProjectionExec: expr=[CASE column1@0 WHEN 100 THEN 1 ELSE 1 END as opt1, CASE column1@0 WHEN 200 THEN 2 WHEN 201 THEN 2 ELSE 2 END as opt2, CASE column1@0 WHEN 300 THEN 3 WHEN 301 THEN 3 ELSE 4 END as noopt1, CASE column1@0 WHEN 400 THEN 4 WHEN 401 THEN 4 END as noopt2] +01)ProjectionExec: expr=[1 as opt1, 2 as opt2, CASE column1@0 WHEN 300 THEN 3 WHEN 301 THEN 3 ELSE 4 END as noopt1, CASE column1@0 WHEN 400 THEN 4 WHEN 401 THEN 4 END as noopt2] 02)--DataSourceExec: partitions=1, partition_sizes=[1]