apache · shivbhatia10 · Feb 27, 2026 · Feb 27, 2026 · Feb 27, 2026 · Feb 27, 2026
diff --git a/datafusion/spark/src/function/math/ceil.rs b/datafusion/spark/src/function/math/ceil.rs
@@ -0,0 +1,269 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+use std::sync::Arc;
+
+use arrow::array::{AsArray, Decimal128Array};
+use arrow::compute::cast;
+use arrow::datatypes::{DataType, Decimal128Type, Float32Type, Float64Type, Int64Type};
+use datafusion_common::utils::take_function_args;
+use datafusion_common::{Result, exec_err};
+use datafusion_expr::{
+    ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility,
+};
+
+/// Spark-compatible `ceil` expression
+/// <https://spark.apache.org/docs/latest/api/sql/index.html#ceil>
+///
+/// Differences with DataFusion ceil:
+///  - Spark's ceil returns Int64 for float and integer inputs; DataFusion preserves
+///    the input type (Float32→Float32, Float64→Float64, integers coerced to Float64)
+///  - Spark's ceil on Decimal128(p, s) returns Decimal128(p−s+1, 0), reducing scale
+///    to 0; DataFusion preserves the original precision and scale
+///  - Spark only supports Decimal128; DataFusion also supports Decimal32/64/256
+///  - Spark does not check for decimal overflow; DataFusion errors on overflow
+#[derive(Debug, PartialEq, Eq, Hash)]
+pub struct SparkCeil {
+    signature: Signature,
+    aliases: Vec<String>,
+}
+
+impl Default for SparkCeil {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl SparkCeil {
+    pub fn new() -> Self {
+        Self {
+            signature: Signature::numeric(1, Volatility::Immutable),
+            aliases: vec!["ceiling".to_string()],
+        }
+    }
+}
+
+impl ScalarUDFImpl for SparkCeil {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        "ceil"
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        match &arg_types[0] {
+            DataType::Decimal128(p, s) if *s > 0 => {
+                let new_p = ((*p as i64) - (*s as i64) + 1).clamp(1, 38) as u8;
+                Ok(DataType::Decimal128(new_p, 0))
+            }
+            DataType::Decimal128(p, s) => Ok(DataType::Decimal128(*p, *s)),
+            _ => Ok(DataType::Int64),
+        }
+    }
+
+    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
+        let return_type = args.return_type().clone();
+        spark_ceil(&args.args, &return_type)
+    }
+
+    fn aliases(&self) -> &[String] {
+        &self.aliases
+    }
+}
+
+fn spark_ceil(args: &[ColumnarValue], return_type: &DataType) -> Result<ColumnarValue> {
+    let input = match take_function_args("ceil", args)? {
+        [ColumnarValue::Scalar(value)] => value.to_array()?,
+        [ColumnarValue::Array(arr)] => Arc::clone(arr),
+    };
+
+    let result = match input.data_type() {
+        DataType::Float32 => Arc::new(
+            input
+                .as_primitive::<Float32Type>()
+                .unary::<_, Int64Type>(|x| x.ceil() as i64),
+        ) as _,
+        DataType::Float64 => Arc::new(
+            input
+                .as_primitive::<Float64Type>()
+                .unary::<_, Int64Type>(|x| x.ceil() as i64),
+        ) as _,
+        dt if dt.is_integer() => cast(&input, &DataType::Int64)?,
+        DataType::Decimal128(_, s) if *s > 0 => {
+            let div = 10_i128.pow(*s as u32);
+            let result: Decimal128Array =
+                input.as_primitive::<Decimal128Type>().unary(|x| {
+                    let d = x / div;
+                    let r = x % div;
+                    if r > 0 { d + 1 } else { d }
+                });
+            Arc::new(result.with_data_type(return_type.clone()))
+        }
+        DataType::Decimal128(_, _) => input,
+        other => return exec_err!("Unsupported data type {other:?} for function ceil"),
+    };
+
+    Ok(ColumnarValue::Array(result))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use arrow::array::{Decimal128Array, Float32Array, Float64Array, Int64Array};
+    use datafusion_common::ScalarValue;
+
+    #[test]
+    fn test_ceil_float64() {
+        let input = Float64Array::from(vec![
+            Some(125.2345),
+            Some(15.0001),
+            Some(0.1),
+            Some(-0.9),
+            Some(-1.1),
+            Some(123.0),
+            None,
+        ]);
+        let args = vec![ColumnarValue::Array(Arc::new(input))];
+        let result = spark_ceil(&args, &DataType::Int64).unwrap();
+        let result = match result {
+            ColumnarValue::Array(arr) => arr,
+            _ => panic!("Expected array"),
+        };
+        let result = result.as_primitive::<Int64Type>();
+        assert_eq!(
+            result,
+            &Int64Array::from(vec![
+                Some(126),
+                Some(16),
+                Some(1),
+                Some(0),
+                Some(-1),
+                Some(123),
+                None,
+            ])
+        );
+    }
+
+    #[test]
+    fn test_ceil_float32() {
+        let input = Float32Array::from(vec![
+            Some(125.2345f32),
+            Some(15.0001f32),
+            Some(0.1f32),
+            Some(-0.9f32),
+            Some(-1.1f32),
+            Some(123.0f32),
+            None,
+        ]);
+        let args = vec![ColumnarValue::Array(Arc::new(input))];
+        let result = spark_ceil(&args, &DataType::Int64).unwrap();
+        let result = match result {
+            ColumnarValue::Array(arr) => arr,
+            _ => panic!("Expected array"),
+        };
+        let result = result.as_primitive::<Int64Type>();
+        assert_eq!(
+            result,
+            &Int64Array::from(vec![
+                Some(126),
+                Some(16),
+                Some(1),
+                Some(0),
+                Some(-1),
+                Some(123),
+                None,
+            ])
+        );
+    }
+
+    #[test]
+    fn test_ceil_int64() {
+        let input = Int64Array::from(vec![Some(1), Some(-1), None]);
+        let args = vec![ColumnarValue::Array(Arc::new(input))];
+        let result = spark_ceil(&args, &DataType::Int64).unwrap();
+        let result = match result {
+            ColumnarValue::Array(arr) => arr,
+            _ => panic!("Expected array"),
+        };
+        let result = result.as_primitive::<Int64Type>();
+        assert_eq!(result, &Int64Array::from(vec![Some(1), Some(-1), None]));
+    }
+
+    #[test]
+    fn test_ceil_decimal128() {
+        // Decimal128(10, 2): 150 = 1.50, -150 = -1.50, 100 = 1.00
+        let return_type = DataType::Decimal128(9, 0);
+        let input = Decimal128Array::from(vec![Some(150), Some(-150), Some(100), None])
+            .with_data_type(DataType::Decimal128(10, 2));
+        let args = vec![ColumnarValue::Array(Arc::new(input))];
+        let result = spark_ceil(&args, &return_type).unwrap();
+        let result = match result {
+            ColumnarValue::Array(arr) => arr,
+            _ => panic!("Expected array"),
+        };
+        let result = result.as_primitive::<Decimal128Type>();
+        let expected = Decimal128Array::from(vec![Some(2), Some(-1), Some(1), None])
+            .with_data_type(return_type);
+        assert_eq!(result, &expected);
+    }
+
+    #[test]
+    fn test_ceil_float64_scalar() {
+        let input = ScalarValue::Float64(Some(-1.1));
+        let args = vec![ColumnarValue::Scalar(input)];
+        let result = spark_ceil(&args, &DataType::Int64).unwrap();
+        let result = match result {
+            ColumnarValue::Array(arr) => arr,
+            _ => panic!("Expected array"),
+        };
+        let result = result.as_primitive::<Int64Type>();
+        assert_eq!(result, &Int64Array::from(vec![Some(-1)]));
+    }
+
+    #[test]
+    fn test_ceil_float32_scalar() {
+        let input = ScalarValue::Float32(Some(125.2345f32));
+        let args = vec![ColumnarValue::Scalar(input)];
+        let result = spark_ceil(&args, &DataType::Int64).unwrap();
+        let result = match result {
+            ColumnarValue::Array(arr) => arr,
+            _ => panic!("Expected array"),
+        };
+        let result = result.as_primitive::<Int64Type>();
+        assert_eq!(result, &Int64Array::from(vec![Some(126)]));
+    }
+
+    #[test]
+    fn test_ceil_int64_scalar() {
+        let input = ScalarValue::Int64(Some(48));
+        let args = vec![ColumnarValue::Scalar(input)];
+        let result = spark_ceil(&args, &DataType::Int64).unwrap();
+        let result = match result {
+            ColumnarValue::Array(arr) => arr,
+            _ => panic!("Expected array"),
+        };
+        let result = result.as_primitive::<Int64Type>();
+        assert_eq!(result, &Int64Array::from(vec![Some(48)]));
+    }
+}
diff --git a/datafusion/spark/src/function/math/mod.rs b/datafusion/spark/src/function/math/mod.rs
@@ -17,6 +17,7 @@
 
 pub mod abs;
 pub mod bin;
+pub mod ceil;
 pub mod expm1;
 pub mod factorial;
 pub mod hex;
@@ -32,6 +33,7 @@ use datafusion_functions::make_udf_function;
 use std::sync::Arc;
 
 make_udf_function!(abs::SparkAbs, abs);
+make_udf_function!(ceil::SparkCeil, ceil);
 make_udf_function!(expm1::SparkExpm1, expm1);
 make_udf_function!(factorial::SparkFactorial, factorial);
 make_udf_function!(hex::SparkHex, hex);
@@ -49,6 +51,11 @@ pub mod expr_fn {
     use datafusion_functions::export_functions;
 
     export_functions!((abs, "Returns abs(expr)", arg1));
+    export_functions!((
+        ceil,
+        "Returns the smallest integer not less than expr.",
+        arg1
+    ));
     export_functions!((expm1, "Returns exp(expr) - 1 as a Float64.", arg1));
     export_functions!((
         factorial,
@@ -82,6 +89,7 @@ pub mod expr_fn {
 pub fn functions() -> Vec<Arc<ScalarUDF>> {
     vec![
         abs(),
+        ceil(),
         expm1(),
         factorial(),
         hex(),