From e858bdc41586196b41df31829f4a4a9e7bb22843 Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Thu, 12 Feb 2026 18:51:37 +0800
Subject: [PATCH 1/7] feat: Implement preimage bounds for ceil function and add
 corresponding SQL logic tests

---
 datafusion/functions/src/math/ceil.rs         | 388 +++++++++++++++++-
 .../sqllogictest/test_files/ceil_preimage.slt | 242 +++++++++++
 2 files changed, 626 insertions(+), 4 deletions(-)
 create mode 100644 datafusion/sqllogictest/test_files/ceil_preimage.slt
diff --git a/datafusion/functions/src/math/ceil.rs b/datafusion/functions/src/math/ceil.rs
index 5961b3cb27fed..348d6dd2f9cc1 100644
--- a/datafusion/functions/src/math/ceil.rs
+++ b/datafusion/functions/src/math/ceil.rs
@@ -19,18 +19,23 @@ use std::any::Any;
 use std::sync::Arc;
 
 use arrow::array::{ArrayRef, AsArray};
+use arrow::compute::{DecimalCast, rescale_decimal};
 use arrow::datatypes::{
-    DataType, Decimal32Type, Decimal64Type, Decimal128Type, Decimal256Type, Float32Type,
-    Float64Type,
+    ArrowNativeTypeOp, DataType, Decimal32Type, Decimal64Type, Decimal128Type,
+    Decimal256Type, DecimalType, Float32Type, Float64Type,
 };
+use datafusion_common::rounding::{FloatBits, next_up};
 use datafusion_common::{Result, ScalarValue, exec_err};
 use datafusion_expr::interval_arithmetic::Interval;
+use datafusion_expr::preimage::PreimageResult;
+use datafusion_expr::simplify::SimplifyContext;
 use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
 use datafusion_expr::{
-    Coercion, ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
-    TypeSignature, TypeSignatureClass, Volatility,
+    Coercion, ColumnarValue, Documentation, Expr, ScalarFunctionArgs, ScalarUDFImpl,
+    Signature, TypeSignature, TypeSignatureClass, Volatility,
 };
 use datafusion_macros::user_doc;
+use num_traits::{CheckedAdd, Float, One};
 
 use super::decimal::{apply_decimal_op, ceil_decimal_value};
 
@@ -74,6 +79,42 @@ impl CeilFunc {
     }
 }
 
+// ============ Macro for preimage bounds ============
+/// Generates the code to call the appropriate bounds function and wrap results.
+macro_rules! preimage_bounds {
+    // Float types: call float_preimage_bounds and wrap in ScalarValue
+    (float: $variant:ident, $value:expr) => {
+        float_preimage_bounds($value).map(|(lo, hi)| {
+            (
+                ScalarValue::$variant(Some(lo)),
+                ScalarValue::$variant(Some(hi)),
+            )
+        })
+    };
+
+    // Integer types: call int_preimage_bounds and wrap in ScalarValue
+    (int: $variant:ident, $value:expr) => {
+        int_preimage_bounds($value).map(|(lo, hi)| {
+            (
+                ScalarValue::$variant(Some(lo)),
+                ScalarValue::$variant(Some(hi)),
+            )
+        })
+    };
+
+    // Decimal types: call decimal_preimage_bounds with precision/scale and wrap in ScalarValue
+    (decimal: $variant:ident, $decimal_type:ty, $value:expr, $precision:expr, $scale:expr) => {
+        decimal_preimage_bounds::<$decimal_type>($value, $precision, $scale).map(
+            |(lo, hi)| {
+                (
+                    ScalarValue::$variant(Some(lo), $precision, $scale),
+                    ScalarValue::$variant(Some(hi), $precision, $scale),
+                )
+            },
+        )
+    };
+}
+
 impl ScalarUDFImpl for CeilFunc {
     fn as_any(&self) -> &dyn Any {
         self
@@ -200,7 +241,346 @@ impl ScalarUDFImpl for CeilFunc {
         Interval::make_unbounded(&data_type)
     }
 
+    /// Compute the preimage for ceil function.
+    ///
+    /// For `ceil(x) = N`, the preimage is `x > N - 1 AND x <= N`
+    /// because ceil(x) = N for all x in (N-1, N].
+    fn preimage(
+        &self,
+        args: &[Expr],
+        lit_expr: &Expr,
+        _info: &SimplifyContext,
+    ) -> Result<PreimageResult> {
+        // ceil takes exactly one argument and we do not expect to reach here with multiple arguments.
+        debug_assert!(args.len() == 1, "ceil() takes exactly one argument");
+
+        let arg = args[0].clone();
+
+        // Extract the literal value being compared to
+        let Expr::Literal(lit_value, _) = lit_expr else {
+            return Ok(PreimageResult::None);
+        };
+
+        // Compute lower bound (next representable above N-1) and upper bound (next representable above N)
+        let Some((lower, upper)) = (match lit_value {
+            // Floating-point types
+            ScalarValue::Float64(Some(n)) => preimage_bounds!(float: Float64, *n),
+            ScalarValue::Float32(Some(n)) => preimage_bounds!(float: Float32, *n),
+
+            // Integer types (not reachable from SQL/SLT: ceil() only accepts Float64/Float32/Decimal,
+            // so the RHS literal is always coerced to one of those before preimage runs; kept for
+            // programmatic use and unit tests)
+            ScalarValue::Int8(Some(n)) => preimage_bounds!(int: Int8, *n),
+            ScalarValue::Int16(Some(n)) => preimage_bounds!(int: Int16, *n),
+            ScalarValue::Int32(Some(n)) => preimage_bounds!(int: Int32, *n),
+            ScalarValue::Int64(Some(n)) => preimage_bounds!(int: Int64, *n),
+
+            // Decimal types
+            // DECIMAL(precision, scale) where precision <= 38 -> Decimal128(precision, scale)
+            // DECIMAL(precision, scale) where precision > 38 -> Decimal256(precision, scale)
+            // Decimal32 and Decimal64 are unreachable from SQL/SLT.
+            ScalarValue::Decimal32(Some(n), precision, scale) => {
+                preimage_bounds!(decimal: Decimal32, Decimal32Type, *n, *precision, *scale)
+            }
+            ScalarValue::Decimal64(Some(n), precision, scale) => {
+                preimage_bounds!(decimal: Decimal64, Decimal64Type, *n, *precision, *scale)
+            }
+            ScalarValue::Decimal128(Some(n), precision, scale) => {
+                preimage_bounds!(decimal: Decimal128, Decimal128Type, *n, *precision, *scale)
+            }
+            ScalarValue::Decimal256(Some(n), precision, scale) => {
+                preimage_bounds!(decimal: Decimal256, Decimal256Type, *n, *precision, *scale)
+            }
+
+            // Unsupported types
+            _ => None,
+        }) else {
+            return Ok(PreimageResult::None);
+        };
+
+        Ok(PreimageResult::Range {
+            expr: arg,
+            interval: Box::new(Interval::try_new(lower, upper)?),
+        })
+    }
+
     fn documentation(&self) -> Option<&Documentation> {
         self.doc()
     }
 }
+
+// ============ Helper functions for preimage bounds ============
+
+/// Compute preimage bounds for ceil function on floating-point types.
+/// For ceil(x) = n, the preimage is (n-1, n] which maps to
+/// [next_up(n-1), next_up(n)).
+/// Returns None if:
+/// - The value is non-finite (infinity, NaN)
+/// - The value is not an integer (ceil always returns integers, so ceil(x) = 1.3 has no solution)
+/// - Subtracting 1 would lose precision at extreme values
+fn float_preimage_bounds<F: Float + FloatBits + Copy>(n: F) -> Option<(F, F)> {
+    let one = F::one();
+    if !n.is_finite() {
+        return None;
+    }
+    if n.fract() != F::zero() {
+        return None;
+    }
+
+    let lower_candidate = n - one;
+    if lower_candidate >= n {
+        return None;
+    }
+
+    let lower = next_up(lower_candidate);
+    let upper = next_up(n);
+    if lower >= upper {
+        return None;
+    }
+
+    Some((lower, upper))
+}
+
+/// Compute preimage bounds for ceil function on integer types.
+/// For ceil(x) = n, the preimage is [n, n+1).
+/// Returns None if adding 1 would overflow.
+fn int_preimage_bounds<I: CheckedAdd + One + Copy>(n: I) -> Option<(I, I)> {
+    let upper = n.checked_add(&I::one())?;
+    Some((n, upper))
+}
+
+/// Compute preimage bounds for ceil function on decimal types.
+/// For ceil(x) = n, the preimage is (n-1, n] which maps to
+/// [n-1 + step, n + step) where step is the decimal unit at the target scale.
+/// Returns None if:
+/// - The value has a fractional part (ceil always returns integers)
+/// - Adding or subtracting would overflow
+fn decimal_preimage_bounds<D: DecimalType>(
+    value: D::Native,
+    precision: u8,
+    scale: i8,
+) -> Option<(D::Native, D::Native)>
+where
+    D::Native: DecimalCast + ArrowNativeTypeOp + std::ops::Rem<Output = D::Native>,
+{
+    let one_scaled: D::Native =
+        rescale_decimal::<D, D>(D::Native::ONE, 1, 0, precision, scale)?;
+
+    if scale > 0 && value % one_scaled != D::Native::ZERO {
+        return None;
+    }
+
+    let lower = if scale == 0 {
+        value
+    } else {
+        let lower_base = value.sub_checked(one_scaled).ok()?;
+        lower_base.add_checked(D::Native::ONE).ok()?
+    };
+
+    let upper = value.add_checked(D::Native::ONE).ok()?;
+
+    Some((lower, upper))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use arrow_buffer::i256;
+    use datafusion_expr::col;
+
+    /// Helper to test valid preimage cases that should return a Range
+    fn assert_preimage_range(
+        input: ScalarValue,
+        expected_lower: ScalarValue,
+        expected_upper: ScalarValue,
+    ) {
+        let ceil_func = CeilFunc::new();
+        let args = vec![col("x")];
+        let lit_expr = Expr::Literal(input.clone(), None);
+        let info = SimplifyContext::default();
+
+        let result = ceil_func.preimage(&args, &lit_expr, &info).unwrap();
+
+        match result {
+            PreimageResult::Range { expr, interval } => {
+                assert_eq!(expr, col("x"));
+                assert_eq!(interval.lower().clone(), expected_lower);
+                assert_eq!(interval.upper().clone(), expected_upper);
+            }
+            PreimageResult::None => {
+                panic!("Expected Range, got None for input {input:?}")
+            }
+        }
+    }
+
+    /// Helper to test cases that should return None
+    fn assert_preimage_none(input: ScalarValue) {
+        let ceil_func = CeilFunc::new();
+        let args = vec![col("x")];
+        let lit_expr = Expr::Literal(input.clone(), None);
+        let info = SimplifyContext::default();
+
+        let result = ceil_func.preimage(&args, &lit_expr, &info).unwrap();
+        assert!(
+            matches!(result, PreimageResult::None),
+            "Expected None for input {input:?}"
+        );
+    }
+
+    #[test]
+    fn test_ceil_preimage_valid_cases() {
+        assert_preimage_range(
+            ScalarValue::Float64(Some(100.0)),
+            ScalarValue::Float64(Some(next_up(99.0))),
+            ScalarValue::Float64(Some(next_up(100.0))),
+        );
+        assert_preimage_range(
+            ScalarValue::Float32(Some(50.0)),
+            ScalarValue::Float32(Some(next_up(49.0))),
+            ScalarValue::Float32(Some(next_up(50.0))),
+        );
+        assert_preimage_range(
+            ScalarValue::Int64(Some(42)),
+            ScalarValue::Int64(Some(42)),
+            ScalarValue::Int64(Some(43)),
+        );
+        assert_preimage_range(
+            ScalarValue::Int32(Some(100)),
+            ScalarValue::Int32(Some(100)),
+            ScalarValue::Int32(Some(101)),
+        );
+        assert_preimage_range(
+            ScalarValue::Float64(Some(-5.0)),
+            ScalarValue::Float64(Some(next_up(-6.0))),
+            ScalarValue::Float64(Some(next_up(-5.0))),
+        );
+        assert_preimage_range(
+            ScalarValue::Float64(Some(0.0)),
+            ScalarValue::Float64(Some(next_up(-1.0))),
+            ScalarValue::Float64(Some(next_up(0.0))),
+        );
+    }
+
+    #[test]
+    fn test_ceil_preimage_non_integer_float() {
+        assert_preimage_none(ScalarValue::Float64(Some(1.3)));
+        assert_preimage_none(ScalarValue::Float64(Some(-2.5)));
+        assert_preimage_none(ScalarValue::Float32(Some(3.7)));
+    }
+
+    #[test]
+    fn test_ceil_preimage_integer_overflow() {
+        assert_preimage_none(ScalarValue::Int64(Some(i64::MAX)));
+        assert_preimage_none(ScalarValue::Int32(Some(i32::MAX)));
+        assert_preimage_none(ScalarValue::Int16(Some(i16::MAX)));
+        assert_preimage_none(ScalarValue::Int8(Some(i8::MAX)));
+    }
+
+    #[test]
+    fn test_ceil_preimage_float_edge_cases() {
+        assert_preimage_none(ScalarValue::Float64(Some(f64::INFINITY)));
+        assert_preimage_none(ScalarValue::Float64(Some(f64::NEG_INFINITY)));
+        assert_preimage_none(ScalarValue::Float64(Some(f64::NAN)));
+        assert_preimage_none(ScalarValue::Float64(Some(f64::MAX)));
+
+        assert_preimage_none(ScalarValue::Float32(Some(f32::INFINITY)));
+        assert_preimage_none(ScalarValue::Float32(Some(f32::NEG_INFINITY)));
+        assert_preimage_none(ScalarValue::Float32(Some(f32::NAN)));
+        assert_preimage_none(ScalarValue::Float32(Some(f32::MAX)));
+    }
+
+    #[test]
+    fn test_ceil_preimage_null_values() {
+        assert_preimage_none(ScalarValue::Float64(None));
+        assert_preimage_none(ScalarValue::Float32(None));
+        assert_preimage_none(ScalarValue::Int64(None));
+    }
+
+    #[test]
+    fn test_ceil_preimage_decimal_valid_cases() {
+        assert_preimage_range(
+            ScalarValue::Decimal32(Some(10000), 9, 2),
+            ScalarValue::Decimal32(Some(9901), 9, 2),
+            ScalarValue::Decimal32(Some(10001), 9, 2),
+        );
+        assert_preimage_range(
+            ScalarValue::Decimal32(Some(-500), 9, 2),
+            ScalarValue::Decimal32(Some(-599), 9, 2),
+            ScalarValue::Decimal32(Some(-499), 9, 2),
+        );
+        assert_preimage_range(
+            ScalarValue::Decimal32(Some(0), 9, 2),
+            ScalarValue::Decimal32(Some(-99), 9, 2),
+            ScalarValue::Decimal32(Some(1), 9, 2),
+        );
+        assert_preimage_range(
+            ScalarValue::Decimal32(Some(42), 9, 0),
+            ScalarValue::Decimal32(Some(42), 9, 0),
+            ScalarValue::Decimal32(Some(43), 9, 0),
+        );
+
+        assert_preimage_range(
+            ScalarValue::Decimal64(Some(10000), 18, 2),
+            ScalarValue::Decimal64(Some(9901), 18, 2),
+            ScalarValue::Decimal64(Some(10001), 18, 2),
+        );
+        assert_preimage_range(
+            ScalarValue::Decimal64(Some(-500), 18, 2),
+            ScalarValue::Decimal64(Some(-599), 18, 2),
+            ScalarValue::Decimal64(Some(-499), 18, 2),
+        );
+
+        assert_preimage_range(
+            ScalarValue::Decimal128(Some(10000), 38, 2),
+            ScalarValue::Decimal128(Some(9901), 38, 2),
+            ScalarValue::Decimal128(Some(10001), 38, 2),
+        );
+        assert_preimage_range(
+            ScalarValue::Decimal128(Some(-500), 38, 2),
+            ScalarValue::Decimal128(Some(-599), 38, 2),
+            ScalarValue::Decimal128(Some(-499), 38, 2),
+        );
+
+        assert_preimage_range(
+            ScalarValue::Decimal256(Some(i256::from(10000)), 76, 2),
+            ScalarValue::Decimal256(Some(i256::from(9901)), 76, 2),
+            ScalarValue::Decimal256(Some(i256::from(10001)), 76, 2),
+        );
+        assert_preimage_range(
+            ScalarValue::Decimal256(Some(i256::from(-500)), 76, 2),
+            ScalarValue::Decimal256(Some(i256::from(-599)), 76, 2),
+            ScalarValue::Decimal256(Some(i256::from(-499)), 76, 2),
+        );
+    }
+
+    #[test]
+    fn test_ceil_preimage_decimal_non_integer() {
+        assert_preimage_none(ScalarValue::Decimal32(Some(130), 9, 2));
+        assert_preimage_none(ScalarValue::Decimal32(Some(-250), 9, 2));
+        assert_preimage_none(ScalarValue::Decimal32(Some(370), 9, 2));
+        assert_preimage_none(ScalarValue::Decimal32(Some(1), 9, 2));
+
+        assert_preimage_none(ScalarValue::Decimal64(Some(130), 18, 2));
+        assert_preimage_none(ScalarValue::Decimal64(Some(-250), 18, 2));
+
+        assert_preimage_none(ScalarValue::Decimal128(Some(130), 38, 2));
+        assert_preimage_none(ScalarValue::Decimal128(Some(-250), 38, 2));
+
+        assert_preimage_none(ScalarValue::Decimal256(Some(i256::from(130)), 76, 2));
+        assert_preimage_none(ScalarValue::Decimal256(Some(i256::from(-250)), 76, 2));
+    }
+
+    #[test]
+    fn test_ceil_preimage_decimal_overflow() {
+        assert_preimage_none(ScalarValue::Decimal32(Some(i32::MAX), 10, 0));
+        assert_preimage_none(ScalarValue::Decimal64(Some(i64::MAX), 19, 0));
+    }
+
+    #[test]
+    fn test_ceil_preimage_decimal_null() {
+        assert_preimage_none(ScalarValue::Decimal32(None, 9, 2));
+        assert_preimage_none(ScalarValue::Decimal64(None, 18, 2));
+        assert_preimage_none(ScalarValue::Decimal128(None, 38, 2));
+        assert_preimage_none(ScalarValue::Decimal256(None, 76, 2));
+    }
+}
diff --git a/datafusion/sqllogictest/test_files/ceil_preimage.slt b/datafusion/sqllogictest/test_files/ceil_preimage.slt
new file mode 100644
index 0000000000000..ab748b6f13089
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/ceil_preimage.slt
@@ -0,0 +1,242 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+##########
+## Ceil Preimage Tests
+##
+## Tests for ceil function preimage optimization:
+## ceil(col) = N transforms to col > N - 1 AND col <= N
+##
+## Uses representative types only (Float64, Int32, Decimal128).
+## Unit tests cover all type variants.
+##########
+
+# Setup: Single table with representative types
+statement ok
+CREATE TABLE test_data (
+    id INT,
+    float_val DOUBLE,
+    int_val INT,
+    decimal_val DECIMAL(10,2)
+) AS VALUES
+    (1, 5.3,   100, 100.00),
+    (2, 5.7,   101, 100.50),
+    (3, 6.0,   102, 101.00),
+    (4, 6.5,   -5,  101.99),
+    (5, 7.0,   0,   102.00),
+    (6, NULL,  NULL, NULL);
+
+##########
+## Data Correctness Tests
+##########
+
+# Float64: ceil(x) = 6 matches values in (5.0, 6.0]
+query I rowsort
+SELECT id FROM test_data WHERE ceil(float_val) = arrow_cast(6, 'Float64');
+----
+1
+2
+3
+
+# Int32: ceil(x) = 100 matches values in [100, 101)
+query I rowsort
+SELECT id FROM test_data WHERE ceil(int_val) = 100;
+----
+1
+
+# Decimal128: ceil(x) = 100 matches values in (99.00, 100.00]
+query I rowsort
+SELECT id FROM test_data WHERE ceil(decimal_val) = arrow_cast(100, 'Decimal128(10,2)');
+----
+1
+
+# Negative value: ceil(x) = -5 matches values in (-6, -5]
+query I rowsort
+SELECT id FROM test_data WHERE ceil(int_val) = -5;
+----
+4
+
+# Zero value: ceil(x) = 0 matches values in (-1, 0]
+query I rowsort
+SELECT id FROM test_data WHERE ceil(int_val) = 0;
+----
+5
+
+# Column on RHS (same result as LHS)
+query I rowsort
+SELECT id FROM test_data WHERE arrow_cast(6, 'Float64') = ceil(float_val);
+----
+1
+2
+3
+
+# IS NOT DISTINCT FROM (excludes NULLs)
+query I rowsort
+SELECT id FROM test_data WHERE ceil(float_val) IS NOT DISTINCT FROM arrow_cast(6, 'Float64');
+----
+1
+2
+3
+
+# IS DISTINCT FROM (includes NULLs)
+query I rowsort
+SELECT id FROM test_data WHERE ceil(float_val) IS DISTINCT FROM arrow_cast(6, 'Float64');
+----
+4
+5
+6
+
+# Non-integer literal (empty result - ceil returns integers)
+query I rowsort
+SELECT id FROM test_data WHERE ceil(float_val) = arrow_cast(5.5, 'Float64');
+----
+
+# IN list: ceil(x) IN (6, 7) matches values in (5.0, 7.0]
+query I rowsort
+SELECT id FROM test_data WHERE ceil(float_val) IN (arrow_cast(6, 'Float64'), arrow_cast(7, 'Float64'));
+----
+1
+2
+3
+4
+5
+
+# NOT IN list: ceil(x) NOT IN (6, 7) excludes matching ranges and NULLs
+query I rowsort
+SELECT id FROM test_data WHERE ceil(float_val) NOT IN (arrow_cast(6, 'Float64'), arrow_cast(7, 'Float64'));
+----
+
+##########
+## EXPLAIN Tests - Plan Optimization
+##########
+
+statement ok
+set datafusion.explain.logical_plan_only = true;
+
+# 1. Basic: Float64 - ceil(col) = N transforms to col >= next_up(N-1) AND col < next_up(N)
+query TT
+EXPLAIN SELECT * FROM test_data WHERE ceil(float_val) = arrow_cast(6, 'Float64');
+----
+logical_plan
+01)Filter: test_data.float_val >= Float64(5.000000000000001) AND test_data.float_val < Float64(6.000000000000001)
+02)--TableScan: test_data projection=[id, float_val, int_val, decimal_val]
+
+# 2. Basic: Int32 - transformed (coerced to Float64)
+query TT
+EXPLAIN SELECT * FROM test_data WHERE ceil(int_val) = 100;
+----
+logical_plan
+01)Projection: test_data.id, test_data.float_val, test_data.int_val, test_data.decimal_val
+02)--Filter: __common_expr_3 >= Float64(99.00000000000001) AND __common_expr_3 < Float64(100.00000000000001)
+03)----Projection: CAST(test_data.int_val AS Float64) AS __common_expr_3, test_data.id, test_data.float_val, test_data.int_val, test_data.decimal_val
+04)------TableScan: test_data projection=[id, float_val, int_val, decimal_val]
+
+# 3. Basic: Decimal128 - same transformation
+query TT
+EXPLAIN SELECT * FROM test_data WHERE ceil(decimal_val) = arrow_cast(100, 'Decimal128(10,2)');
+----
+logical_plan
+01)Filter: test_data.decimal_val >= Decimal128(Some(9901),10,2) AND test_data.decimal_val < Decimal128(Some(10001),10,2)
+02)--TableScan: test_data projection=[id, float_val, int_val, decimal_val]
+
+# 4. Column on RHS - same transformation
+query TT
+EXPLAIN SELECT * FROM test_data WHERE arrow_cast(6, 'Float64') = ceil(float_val);
+----
+logical_plan
+01)Filter: test_data.float_val >= Float64(5.000000000000001) AND test_data.float_val < Float64(6.000000000000001)
+02)--TableScan: test_data projection=[id, float_val, int_val, decimal_val]
+
+# 5. IS NOT DISTINCT FROM - adds IS NOT NULL
+query TT
+EXPLAIN SELECT * FROM test_data WHERE ceil(float_val) IS NOT DISTINCT FROM arrow_cast(6, 'Float64');
+----
+logical_plan
+01)Filter: test_data.float_val IS NOT NULL AND test_data.float_val >= Float64(5.000000000000001) AND test_data.float_val < Float64(6.000000000000001)
+02)--TableScan: test_data projection=[id, float_val, int_val, decimal_val]
+
+# 6. IS DISTINCT FROM - includes NULL check
+query TT
+EXPLAIN SELECT * FROM test_data WHERE ceil(float_val) IS DISTINCT FROM arrow_cast(6, 'Float64');
+----
+logical_plan
+01)Filter: test_data.float_val < Float64(5.000000000000001) OR test_data.float_val >= Float64(6.000000000000001) OR test_data.float_val IS NULL
+02)--TableScan: test_data projection=[id, float_val, int_val, decimal_val]
+
+# 7. Non-optimizable: non-integer literal (original predicate preserved)
+query TT
+EXPLAIN SELECT * FROM test_data WHERE ceil(float_val) = arrow_cast(5.5, 'Float64');
+----
+logical_plan
+01)Filter: ceil(test_data.float_val) = Float64(5.5)
+02)--TableScan: test_data projection=[id, float_val, int_val, decimal_val]
+
+# 8. Non-optimizable: extreme float literal (2^53) where N-1 loses precision, so preimage returns None
+query TT
+EXPLAIN SELECT * FROM test_data WHERE ceil(float_val) = 9007199254740992;
+----
+logical_plan
+01)Filter: ceil(test_data.float_val) = Float64(9007199254740992)
+02)--TableScan: test_data projection=[id, float_val, int_val, decimal_val]
+
+# 9. IN list: each list item is rewritten with preimage and OR-ed together
+query TT
+EXPLAIN SELECT * FROM test_data WHERE ceil(float_val) IN (arrow_cast(6, 'Float64'), arrow_cast(7, 'Float64'));
+----
+logical_plan
+01)Filter: test_data.float_val >= Float64(5.000000000000001) AND test_data.float_val < Float64(6.000000000000001) OR test_data.float_val >= Float64(6.000000000000001) AND test_data.float_val < Float64(7.000000000000001)
+02)--TableScan: test_data projection=[id, float_val, int_val, decimal_val]
+
+# Data correctness: ceil(col) = 2^53 returns no rows (no value in test_data has ceil exactly 2^53)
+query I rowsort
+SELECT id FROM test_data WHERE ceil(float_val) = 9007199254740992;
+----
+
+##########
+## Other Comparison Operators
+##
+## The preimage framework automatically handles all comparison operators:
+##   ceil(x) <> N  ->  x < next_up(N-1) OR x >= next_up(N)
+##   ceil(x) > N   ->  x >= next_up(N)
+##   ceil(x) < N   ->  x < next_up(N-1)
+##   ceil(x) >= N  ->  x >= next_up(N-1)
+##   ceil(x) <= N  ->  x < next_up(N)
+##########
+
+# Data correctness tests for other operators
+
+# Not equals: ceil(x) <> 6 matches values outside (5.0, 6.0]
+query I rowsort
+SELECT id FROM test_data WHERE ceil(float_val) <> arrow_cast(6, 'Float64');
+----
+4
+5
+
+# Greater than: ceil(x) > 6 matches values in (6.0, inf)
+query I rowsort
+SELECT id FROM test_data WHERE ceil(float_val) > arrow_cast(6, 'Float64');
+----
+4
+5
+
+# Less than: ceil(x) < 7 matches values in (-inf, 6.0]
+query I rowsort
+SELECT id FROM test_data WHERE ceil(float_val) < arrow_cast(7, 'Float64');
+----
+1
+2
+3

From 21e1506831b6260629079ad10ed56fa5a5512328 Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Thu, 12 Feb 2026 19:21:25 +0800
Subject: [PATCH 2/7] test: Add precision boundary tests for ceil function
 preimage optimization

---
 datafusion/functions/src/math/ceil.rs              | 14 ++++++++++++++
 .../sqllogictest/test_files/ceil_preimage.slt      | 14 +++++++++++---
 2 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/datafusion/functions/src/math/ceil.rs b/datafusion/functions/src/math/ceil.rs
index 348d6dd2f9cc1..84b0a4ca8eea9 100644
--- a/datafusion/functions/src/math/ceil.rs
+++ b/datafusion/functions/src/math/ceil.rs
@@ -489,6 +489,20 @@ mod tests {
         assert_preimage_none(ScalarValue::Float32(Some(f32::MAX)));
     }
 
+    #[test]
+    fn test_ceil_preimage_float_precision_boundaries() {
+        // 2^53 is exactly representable, and so is 2^53 - 1, so preimage rewrite is valid.
+        assert_preimage_range(
+            ScalarValue::Float64(Some(9_007_199_254_740_992.0)),
+            ScalarValue::Float64(Some(9_007_199_254_740_992.0)),
+            ScalarValue::Float64(Some(9_007_199_254_740_994.0)),
+        );
+
+        // Above 2^53, adjacent integer spacing changes and `n - 1` can collapse to `n`.
+        // In that case we conservatively skip preimage rewrite.
+        assert_preimage_none(ScalarValue::Float64(Some(9_007_199_254_740_996.0)));
+    }
+
     #[test]
     fn test_ceil_preimage_null_values() {
         assert_preimage_none(ScalarValue::Float64(None));
diff --git a/datafusion/sqllogictest/test_files/ceil_preimage.slt b/datafusion/sqllogictest/test_files/ceil_preimage.slt
index ab748b6f13089..f7124526b0be5 100644
--- a/datafusion/sqllogictest/test_files/ceil_preimage.slt
+++ b/datafusion/sqllogictest/test_files/ceil_preimage.slt
@@ -185,15 +185,23 @@ logical_plan
 01)Filter: ceil(test_data.float_val) = Float64(5.5)
 02)--TableScan: test_data projection=[id, float_val, int_val, decimal_val]
 
-# 8. Non-optimizable: extreme float literal (2^53) where N-1 loses precision, so preimage returns None
+# 8. Boundary: 2^53 is still optimizable for Float64 (2^53 and 2^53-1 are both representable)
 query TT
 EXPLAIN SELECT * FROM test_data WHERE ceil(float_val) = 9007199254740992;
 ----
 logical_plan
-01)Filter: ceil(test_data.float_val) = Float64(9007199254740992)
+01)Filter: test_data.float_val >= Float64(9007199254740992) AND test_data.float_val < Float64(9007199254740994)
 02)--TableScan: test_data projection=[id, float_val, int_val, decimal_val]
 
-# 9. IN list: each list item is rewritten with preimage and OR-ed together
+# 9. Non-optimizable: precision-loss case above 2^53 where N-1 is not distinct in Float64
+query TT
+EXPLAIN SELECT * FROM test_data WHERE ceil(float_val) = 9007199254740996;
+----
+logical_plan
+01)Filter: ceil(test_data.float_val) = Float64(9007199254740996)
+02)--TableScan: test_data projection=[id, float_val, int_val, decimal_val]
+
+# 10. IN list: each list item is rewritten with preimage and OR-ed together
 query TT
 EXPLAIN SELECT * FROM test_data WHERE ceil(float_val) IN (arrow_cast(6, 'Float64'), arrow_cast(7, 'Float64'));
 ----

From 1420ba23c02e2430488c4dbced8e863b277e9776 Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Thu, 12 Feb 2026 19:54:13 +0800
Subject: [PATCH 3/7] test: Enhance precision boundary tests for ceil function
 with Float32 cases

---
 datafusion/functions/src/math/ceil.rs | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/datafusion/functions/src/math/ceil.rs b/datafusion/functions/src/math/ceil.rs
index 84b0a4ca8eea9..c7701ab8bf90f 100644
--- a/datafusion/functions/src/math/ceil.rs
+++ b/datafusion/functions/src/math/ceil.rs
@@ -491,7 +491,7 @@ mod tests {
 
     #[test]
     fn test_ceil_preimage_float_precision_boundaries() {
-        // 2^53 is exactly representable, and so is 2^53 - 1, so preimage rewrite is valid.
+        // Float64: 2^53 is exactly representable, and so is 2^53 - 1, so preimage rewrite is valid.
         assert_preimage_range(
             ScalarValue::Float64(Some(9_007_199_254_740_992.0)),
             ScalarValue::Float64(Some(9_007_199_254_740_992.0)),
@@ -501,6 +501,17 @@ mod tests {
         // Above 2^53, adjacent integer spacing changes and `n - 1` can collapse to `n`.
         // In that case we conservatively skip preimage rewrite.
         assert_preimage_none(ScalarValue::Float64(Some(9_007_199_254_740_996.0)));
+
+        // Float32: 2^24 is exactly representable, and so is 2^24 - 1, so preimage rewrite is valid.
+        assert_preimage_range(
+            ScalarValue::Float32(Some(16_777_216.0)),
+            ScalarValue::Float32(Some(next_up(16_777_215.0_f32))),
+            ScalarValue::Float32(Some(next_up(16_777_216.0_f32))),
+        );
+
+        // Above 2^24, adjacent integer spacing changes and `n - 1` can collapse to `n`.
+        // In that case we conservatively skip preimage rewrite.
+        assert_preimage_none(ScalarValue::Float32(Some(16_777_220.0)));
     }
 
     #[test]

From b6244107ce93b149fcf23c600a5ab26a916ae1f1 Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Thu, 12 Feb 2026 19:55:49 +0800
Subject: [PATCH 4/7] test: Clarify boundary condition for Float64 in ceil
 function preimage optimization

---
 datafusion/sqllogictest/test_files/ceil_preimage.slt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/datafusion/sqllogictest/test_files/ceil_preimage.slt b/datafusion/sqllogictest/test_files/ceil_preimage.slt
index f7124526b0be5..6b1a15f7daefc 100644
--- a/datafusion/sqllogictest/test_files/ceil_preimage.slt
+++ b/datafusion/sqllogictest/test_files/ceil_preimage.slt
@@ -185,7 +185,8 @@ logical_plan
 01)Filter: ceil(test_data.float_val) = Float64(5.5)
 02)--TableScan: test_data projection=[id, float_val, int_val, decimal_val]
 
-# 8. Boundary: 2^53 is still optimizable for Float64 (2^53 and 2^53-1 are both representable)
+# 8. Boundary: 2^53 is the upper boundary for Float64 integer representability.
+#    At 2^53 (9007199254740992), both 2^53 and 2^53-1 are exactly representable, so preimage optimization is valid.
 query TT
 EXPLAIN SELECT * FROM test_data WHERE ceil(float_val) = 9007199254740992;
 ----

From aebb3e2b79455f98b5041b0338321488d5a31509 Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Wed, 25 Feb 2026 16:41:23 +0800
Subject: [PATCH 5/7] refactor: Remove redundant comment about argument count
 in ceil function

---
 datafusion/functions/src/math/ceil.rs | 1 -
 1 file changed, 1 deletion(-)

diff --git a/datafusion/functions/src/math/ceil.rs b/datafusion/functions/src/math/ceil.rs
index c7701ab8bf90f..d028b4ba6faf2 100644
--- a/datafusion/functions/src/math/ceil.rs
+++ b/datafusion/functions/src/math/ceil.rs
@@ -251,7 +251,6 @@ impl ScalarUDFImpl for CeilFunc {
         lit_expr: &Expr,
         _info: &SimplifyContext,
     ) -> Result<PreimageResult> {
-        // ceil takes exactly one argument and we do not expect to reach here with multiple arguments.
         debug_assert!(args.len() == 1, "ceil() takes exactly one argument");
 
         let arg = args[0].clone();

From 416a159f42f31499bc74e8660696cf8dba66d0e3 Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Wed, 25 Feb 2026 17:02:34 +0800
Subject: [PATCH 6/7] Add comment for test_ceil_preimage_decimal_valid_cases

---
 datafusion/functions/src/math/ceil.rs | 70 +++++++++++++++------------
 1 file changed, 40 insertions(+), 30 deletions(-)

diff --git a/datafusion/functions/src/math/ceil.rs b/datafusion/functions/src/math/ceil.rs
index d028b4ba6faf2..b0696076bafcd 100644
--- a/datafusion/functions/src/math/ceil.rs
+++ b/datafusion/functions/src/math/ceil.rs
@@ -522,58 +522,68 @@ mod tests {
 
     #[test]
     fn test_ceil_preimage_decimal_valid_cases() {
+        // Decimal format: raw_value / 10^scale
+        // For ceil(x) = N, preimage is (N-1, N] → [N-1+step, N+step) where step = 10^(-scale)
+
+        // ceil(x) = 100.00: preimage is (99, 100] → [99.01, 100.01)
         assert_preimage_range(
-            ScalarValue::Decimal32(Some(10000), 9, 2),
-            ScalarValue::Decimal32(Some(9901), 9, 2),
-            ScalarValue::Decimal32(Some(10001), 9, 2),
+            ScalarValue::Decimal32(Some(10000), 9, 2), // 100.00
+            ScalarValue::Decimal32(Some(9901), 9, 2),  // 99.01
+            ScalarValue::Decimal32(Some(10001), 9, 2), // 100.01
         );
+        // ceil(x) = -5.00: preimage is (-6, -5] → [-5.99, -4.99)
         assert_preimage_range(
-            ScalarValue::Decimal32(Some(-500), 9, 2),
-            ScalarValue::Decimal32(Some(-599), 9, 2),
-            ScalarValue::Decimal32(Some(-499), 9, 2),
+            ScalarValue::Decimal32(Some(-500), 9, 2), // -5.00
+            ScalarValue::Decimal32(Some(-599), 9, 2), // -5.99
+            ScalarValue::Decimal32(Some(-499), 9, 2), // -4.99
         );
+        // ceil(x) = 0.00: preimage is (-1, 0] → [-0.99, 0.01)
         assert_preimage_range(
-            ScalarValue::Decimal32(Some(0), 9, 2),
-            ScalarValue::Decimal32(Some(-99), 9, 2),
-            ScalarValue::Decimal32(Some(1), 9, 2),
+            ScalarValue::Decimal32(Some(0), 9, 2),   // 0.00
+            ScalarValue::Decimal32(Some(-99), 9, 2), // -0.99
+            ScalarValue::Decimal32(Some(1), 9, 2),   // 0.01
         );
+        // ceil(x) = 42 (scale 0 means integer): preimage is (41, 42] → [42, 43)
         assert_preimage_range(
-            ScalarValue::Decimal32(Some(42), 9, 0),
-            ScalarValue::Decimal32(Some(42), 9, 0),
-            ScalarValue::Decimal32(Some(43), 9, 0),
+            ScalarValue::Decimal32(Some(42), 9, 0), // 42
+            ScalarValue::Decimal32(Some(42), 9, 0), // 42
+            ScalarValue::Decimal32(Some(43), 9, 0), // 43
         );
 
+        // Decimal64 tests: same logic with wider precision
         assert_preimage_range(
-            ScalarValue::Decimal64(Some(10000), 18, 2),
-            ScalarValue::Decimal64(Some(9901), 18, 2),
-            ScalarValue::Decimal64(Some(10001), 18, 2),
+            ScalarValue::Decimal64(Some(10000), 18, 2), // 100.00
+            ScalarValue::Decimal64(Some(9901), 18, 2),  // 99.01
+            ScalarValue::Decimal64(Some(10001), 18, 2), // 100.01
         );
         assert_preimage_range(
-            ScalarValue::Decimal64(Some(-500), 18, 2),
-            ScalarValue::Decimal64(Some(-599), 18, 2),
-            ScalarValue::Decimal64(Some(-499), 18, 2),
+            ScalarValue::Decimal64(Some(-500), 18, 2), // -5.00
+            ScalarValue::Decimal64(Some(-599), 18, 2), // -5.99
+            ScalarValue::Decimal64(Some(-499), 18, 2), // -4.99
         );
 
+        // Decimal128 tests: same logic with even wider precision
         assert_preimage_range(
-            ScalarValue::Decimal128(Some(10000), 38, 2),
-            ScalarValue::Decimal128(Some(9901), 38, 2),
-            ScalarValue::Decimal128(Some(10001), 38, 2),
+            ScalarValue::Decimal128(Some(10000), 38, 2), // 100.00
+            ScalarValue::Decimal128(Some(9901), 38, 2),  // 99.01
+            ScalarValue::Decimal128(Some(10001), 38, 2), // 100.01
         );
         assert_preimage_range(
-            ScalarValue::Decimal128(Some(-500), 38, 2),
-            ScalarValue::Decimal128(Some(-599), 38, 2),
-            ScalarValue::Decimal128(Some(-499), 38, 2),
+            ScalarValue::Decimal128(Some(-500), 38, 2), // -5.00
+            ScalarValue::Decimal128(Some(-599), 38, 2), // -5.99
+            ScalarValue::Decimal128(Some(-499), 38, 2), // -4.99
         );
 
+        // Decimal256 tests: same logic with widest precision
         assert_preimage_range(
-            ScalarValue::Decimal256(Some(i256::from(10000)), 76, 2),
-            ScalarValue::Decimal256(Some(i256::from(9901)), 76, 2),
-            ScalarValue::Decimal256(Some(i256::from(10001)), 76, 2),
+            ScalarValue::Decimal256(Some(i256::from(10000)), 76, 2), // 100.00
+            ScalarValue::Decimal256(Some(i256::from(9901)), 76, 2),  // 99.01
+            ScalarValue::Decimal256(Some(i256::from(10001)), 76, 2), // 100.01
         );
         assert_preimage_range(
-            ScalarValue::Decimal256(Some(i256::from(-500)), 76, 2),
-            ScalarValue::Decimal256(Some(i256::from(-599)), 76, 2),
-            ScalarValue::Decimal256(Some(i256::from(-499)), 76, 2),
+            ScalarValue::Decimal256(Some(i256::from(-500)), 76, 2), // -5.00
+            ScalarValue::Decimal256(Some(i256::from(-599)), 76, 2), // -5.99
+            ScalarValue::Decimal256(Some(i256::from(-499)), 76, 2), // -4.99
         );
     }
 

From 4ce61d0ed1709ff891249a9b98ff61f6fe1d4911 Mon Sep 17 00:00:00 2001
From: Siew Kam Onn <kosiew@gmail.com>
Date: Wed, 25 Feb 2026 17:04:12 +0800
Subject: [PATCH 7/7] Add comment for test_ceil_preimage_valid_cases

---
 datafusion/functions/src/math/ceil.rs | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/datafusion/functions/src/math/ceil.rs b/datafusion/functions/src/math/ceil.rs
index b0696076bafcd..7361d2e5d6b3d 100644
--- a/datafusion/functions/src/math/ceil.rs
+++ b/datafusion/functions/src/math/ceil.rs
@@ -428,31 +428,41 @@ mod tests {
 
     #[test]
     fn test_ceil_preimage_valid_cases() {
+        // For ceil(x) = N, preimage is (N-1, N] mathematically
+        // For floats: use next_up() to get [next_up(N-1), next_up(N)) as a half-open interval
+        // For integers: the interval is simply [N, N+1)
+
+        // ceil(x) = 100.0: preimage is (99, 100] → [next_up(99), next_up(100))
         assert_preimage_range(
             ScalarValue::Float64(Some(100.0)),
             ScalarValue::Float64(Some(next_up(99.0))),
             ScalarValue::Float64(Some(next_up(100.0))),
         );
+        // ceil(x) = 50.0: preimage is (49, 50] → [next_up(49), next_up(50))
         assert_preimage_range(
             ScalarValue::Float32(Some(50.0)),
             ScalarValue::Float32(Some(next_up(49.0))),
             ScalarValue::Float32(Some(next_up(50.0))),
         );
+        // ceil(x) = 42: preimage is (41, 42] → [42, 43) for integers
         assert_preimage_range(
             ScalarValue::Int64(Some(42)),
             ScalarValue::Int64(Some(42)),
             ScalarValue::Int64(Some(43)),
         );
+        // ceil(x) = 100: preimage is (99, 100] → [100, 101) for integers
         assert_preimage_range(
             ScalarValue::Int32(Some(100)),
             ScalarValue::Int32(Some(100)),
             ScalarValue::Int32(Some(101)),
         );
+        // ceil(x) = -5.0: preimage is (-6, -5] → [next_up(-6), next_up(-5))
         assert_preimage_range(
             ScalarValue::Float64(Some(-5.0)),
             ScalarValue::Float64(Some(next_up(-6.0))),
             ScalarValue::Float64(Some(next_up(-5.0))),
         );
+        // ceil(x) = 0.0: preimage is (-1, 0] → [next_up(-1), next_up(0))
         assert_preimage_range(
             ScalarValue::Float64(Some(0.0)),
             ScalarValue::Float64(Some(next_up(-1.0))),