diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs index 6f0b60556a751..7945cbbe00495 100644 --- a/datafusion/physical-expr/src/expressions/binary.rs +++ b/datafusion/physical-expr/src/expressions/binary.rs @@ -24,9 +24,7 @@ use std::sync::Arc; use arrow::array::*; use arrow::compute::kernels::boolean::{and_kleene, or_kleene}; -use arrow::compute::kernels::concat_elements::{ - concat_element_binary, concat_elements_utf8, -}; +use arrow::compute::kernels::concat_elements::concat_elements_dyn; use arrow::compute::{SlicesIterator, cast, filter_record_batch}; use arrow::datatypes::*; use arrow::error::ArrowError; @@ -50,8 +48,7 @@ use kernels::{ bitwise_and_dyn, bitwise_and_dyn_scalar, bitwise_or_dyn, bitwise_or_dyn_scalar, bitwise_shift_left_dyn, bitwise_shift_left_dyn_scalar, bitwise_shift_right_dyn, bitwise_shift_right_dyn_scalar, bitwise_xor_dyn, bitwise_xor_dyn_scalar, - concat_elements_binary_view_array, concat_elements_utf8view, regex_match_dyn, - regex_match_dyn_scalar, + regex_match_dyn, regex_match_dyn_scalar, }; /// Binary expression @@ -833,7 +830,7 @@ impl BinaryExpr { BitwiseXor => bitwise_xor_dyn(left, right), BitwiseShiftRight => bitwise_shift_right_dyn(left, right), BitwiseShiftLeft => bitwise_shift_left_dyn(left, right), - StringConcat => concat_elements(&left, &right), + StringConcat => concat_elements_dyn(&left, &right).map_err(|e| e.into()), AtArrow | ArrowAt | Arrow | LongArrow | HashArrow | HashLongArrow | AtAt | HashMinus | AtQuestion | Question | QuestionAnd | QuestionPipe | IntegerDivide | Colon => { @@ -1053,40 +1050,6 @@ fn pre_selection_scatter( Ok(ColumnarValue::Array(Arc::new(boolean_result))) } -fn concat_elements(left: &ArrayRef, right: &ArrayRef) -> Result { - Ok(match left.data_type() { - DataType::Utf8 => Arc::new(concat_elements_utf8( - left.as_string::(), - right.as_string::(), - )?), - DataType::LargeUtf8 => Arc::new(concat_elements_utf8( - left.as_string::(), - right.as_string::(), - )?), - DataType::Utf8View => Arc::new(concat_elements_utf8view( - left.as_string_view(), - right.as_string_view(), - )?), - DataType::Binary => Arc::new(concat_element_binary::( - left.as_binary(), - right.as_binary(), - )?), - DataType::LargeBinary => Arc::new(concat_element_binary::( - left.as_binary(), - right.as_binary(), - )?), - DataType::BinaryView => Arc::new(concat_elements_binary_view_array( - left.as_binary_view(), - right.as_binary_view(), - )?), - other => { - return internal_err!( - "Data type {other:?} not supported for binary operation 'concat_elements' on string arrays" - ); - } - }) -} - /// Create a binary expression whose arguments are correctly coerced. /// This function errors if it is not possible to coerce the arguments /// to computational types supported by the operator. diff --git a/datafusion/physical-expr/src/expressions/binary/kernels.rs b/datafusion/physical-expr/src/expressions/binary/kernels.rs index e573d7ece2afa..39e9c40dbdf24 100644 --- a/datafusion/physical-expr/src/expressions/binary/kernels.rs +++ b/datafusion/physical-expr/src/expressions/binary/kernels.rs @@ -18,7 +18,6 @@ //! This module contains computation kernels that are specific to //! datafusion and not (yet) targeted to port upstream to arrow use arrow::array::*; -use arrow::buffer::{MutableBuffer, NullBuffer}; use arrow::compute::kernels::bitwise::{ bitwise_and, bitwise_and_scalar, bitwise_or, bitwise_or_scalar, bitwise_shift_left, bitwise_shift_left_scalar, bitwise_shift_right, bitwise_shift_right_scalar, @@ -27,7 +26,6 @@ use arrow::compute::kernels::bitwise::{ use arrow::compute::kernels::boolean::not; use arrow::compute::kernels::comparison::{regexp_is_match, regexp_is_match_scalar}; use arrow::datatypes::DataType; -use arrow::error::ArrowError; use datafusion_common::{Result, ScalarValue}; use datafusion_common::{internal_err, plan_err}; @@ -161,93 +159,6 @@ create_left_integral_dyn_scalar_kernel!( bitwise_shift_left_scalar ); -/// Concatenates two `StringViewArray`s element-wise. -/// If either element is `Null`, the result element is also `Null`. -/// -/// # Errors -/// - Returns an error if the input arrays have different lengths. -/// - Returns an error if any concatenated string exceeds `u32::MAX` (≈4 GB) in length. -pub fn concat_elements_utf8view( - left: &StringViewArray, - right: &StringViewArray, -) -> std::result::Result { - if left.len() != right.len() { - return Err(ArrowError::ComputeError(format!( - "Arrays must have the same length: {} != {}", - left.len(), - right.len() - ))); - } - let mut result = StringViewBuilder::with_capacity(left.len()); - - // Avoid reallocations by writing to a reused buffer (note we could be even - // more efficient by creating the view directly here and avoid the buffer - // but that would be more complex) - let mut buffer = String::new(); - - // Pre-compute combined null bitmap, so the per-row NULL check is more - // efficient - let nulls = NullBuffer::union(left.nulls(), right.nulls()); - - for i in 0..left.len() { - if nulls.as_ref().is_some_and(|n| n.is_null(i)) { - result.append_null(); - } else { - let l = left.value(i); - let r = right.value(i); - buffer.clear(); - buffer.push_str(l); - buffer.push_str(r); - result.try_append_value(&buffer)?; - } - } - Ok(result.finish()) -} - -/// Concatenates two `BinaryViewArray`s element-wise. -/// If either element is `Null`, the result element is also `Null`. -/// -/// # Errors -/// - Returns an error if the input arrays have different lengths. -/// - Returns an error if any concatenated string exceeds `u32::MAX` in length. -pub fn concat_elements_binary_view_array( - left: &BinaryViewArray, - right: &BinaryViewArray, -) -> std::result::Result { - if left.len() != right.len() { - return Err(ArrowError::ComputeError(format!( - "Arrays must have the same length: {} != {}", - left.len(), - right.len() - ))); - } - let mut result = BinaryViewBuilder::with_capacity(left.len()); - - // Avoid reallocations by writing to a reused buffer (note we could be even - // more efficient by creating the view directly here and avoid the buffer - // but that would be more complex) - let mut buffer = MutableBuffer::new(0); - - // Pre-compute combined null bitmap, so the per-row NULL check is more - // efficient - let nulls = NullBuffer::union(left.nulls(), right.nulls()); - - for i in 0..left.len() { - if nulls.as_ref().is_some_and(|n| n.is_null(i)) { - result.append_null(); - } else { - let l = left.value(i); - let r = right.value(i); - buffer.clear(); - buffer.extend_from_slice(l); - buffer.extend_from_slice(r); - // No try-version of append_value - result.try_append_value(&buffer)?; - } - } - Ok(result.finish()) -} - /// Invoke a compute kernel on a pair of binary data arrays with flags macro_rules! regexp_is_match_flag { ($LEFT:expr, $RIGHT:expr, $ARRAYTYPE:ident, $NOT:expr, $FLAG:expr) => {{