Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 3 additions & 40 deletions datafusion/physical-expr/src/expressions/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,7 @@ use std::sync::Arc;

use arrow::array::*;
use arrow::compute::kernels::boolean::{and_kleene, or_kleene};
use arrow::compute::kernels::concat_elements::{
concat_element_binary, concat_elements_utf8,
};
use arrow::compute::kernels::concat_elements::concat_elements_dyn;
use arrow::compute::{SlicesIterator, cast, filter_record_batch};
use arrow::datatypes::*;
use arrow::error::ArrowError;
Expand All @@ -50,8 +48,7 @@ use kernels::{
bitwise_and_dyn, bitwise_and_dyn_scalar, bitwise_or_dyn, bitwise_or_dyn_scalar,
bitwise_shift_left_dyn, bitwise_shift_left_dyn_scalar, bitwise_shift_right_dyn,
bitwise_shift_right_dyn_scalar, bitwise_xor_dyn, bitwise_xor_dyn_scalar,
concat_elements_binary_view_array, concat_elements_utf8view, regex_match_dyn,
regex_match_dyn_scalar,
regex_match_dyn, regex_match_dyn_scalar,
};

/// Binary expression
Expand Down Expand Up @@ -833,7 +830,7 @@ impl BinaryExpr {
BitwiseXor => bitwise_xor_dyn(left, right),
BitwiseShiftRight => bitwise_shift_right_dyn(left, right),
BitwiseShiftLeft => bitwise_shift_left_dyn(left, right),
StringConcat => concat_elements(&left, &right),
StringConcat => concat_elements_dyn(&left, &right).map_err(|e| e.into()),
AtArrow | ArrowAt | Arrow | LongArrow | HashArrow | HashLongArrow | AtAt
| HashMinus | AtQuestion | Question | QuestionAnd | QuestionPipe
| IntegerDivide | Colon => {
Expand Down Expand Up @@ -1053,40 +1050,6 @@ fn pre_selection_scatter(
Ok(ColumnarValue::Array(Arc::new(boolean_result)))
}

fn concat_elements(left: &ArrayRef, right: &ArrayRef) -> Result<ArrayRef> {
Ok(match left.data_type() {
DataType::Utf8 => Arc::new(concat_elements_utf8(
left.as_string::<i32>(),
right.as_string::<i32>(),
)?),
DataType::LargeUtf8 => Arc::new(concat_elements_utf8(
left.as_string::<i64>(),
right.as_string::<i64>(),
)?),
DataType::Utf8View => Arc::new(concat_elements_utf8view(
left.as_string_view(),
right.as_string_view(),
)?),
DataType::Binary => Arc::new(concat_element_binary::<i32>(
left.as_binary(),
right.as_binary(),
)?),
DataType::LargeBinary => Arc::new(concat_element_binary::<i64>(
left.as_binary(),
right.as_binary(),
)?),
DataType::BinaryView => Arc::new(concat_elements_binary_view_array(
left.as_binary_view(),
right.as_binary_view(),
)?),
other => {
return internal_err!(
"Data type {other:?} not supported for binary operation 'concat_elements' on string arrays"
);
}
})
}

/// Create a binary expression whose arguments are correctly coerced.
/// This function errors if it is not possible to coerce the arguments
/// to computational types supported by the operator.
Expand Down
89 changes: 0 additions & 89 deletions datafusion/physical-expr/src/expressions/binary/kernels.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
//! This module contains computation kernels that are specific to
//! datafusion and not (yet) targeted to port upstream to arrow
use arrow::array::*;
use arrow::buffer::{MutableBuffer, NullBuffer};
use arrow::compute::kernels::bitwise::{
bitwise_and, bitwise_and_scalar, bitwise_or, bitwise_or_scalar, bitwise_shift_left,
bitwise_shift_left_scalar, bitwise_shift_right, bitwise_shift_right_scalar,
Expand All @@ -27,7 +26,6 @@ use arrow::compute::kernels::bitwise::{
use arrow::compute::kernels::boolean::not;
use arrow::compute::kernels::comparison::{regexp_is_match, regexp_is_match_scalar};
use arrow::datatypes::DataType;
use arrow::error::ArrowError;
use datafusion_common::{Result, ScalarValue};
use datafusion_common::{internal_err, plan_err};

Expand Down Expand Up @@ -161,93 +159,6 @@ create_left_integral_dyn_scalar_kernel!(
bitwise_shift_left_scalar
);

/// Concatenates two `StringViewArray`s element-wise.
/// If either element is `Null`, the result element is also `Null`.
///
/// # Errors
/// - Returns an error if the input arrays have different lengths.
/// - Returns an error if any concatenated string exceeds `u32::MAX` (≈4 GB) in length.
pub fn concat_elements_utf8view(
left: &StringViewArray,
right: &StringViewArray,
) -> std::result::Result<StringViewArray, ArrowError> {
if left.len() != right.len() {
return Err(ArrowError::ComputeError(format!(
"Arrays must have the same length: {} != {}",
left.len(),
right.len()
)));
}
let mut result = StringViewBuilder::with_capacity(left.len());

// Avoid reallocations by writing to a reused buffer (note we could be even
// more efficient by creating the view directly here and avoid the buffer
// but that would be more complex)
let mut buffer = String::new();

// Pre-compute combined null bitmap, so the per-row NULL check is more
// efficient
let nulls = NullBuffer::union(left.nulls(), right.nulls());

for i in 0..left.len() {
if nulls.as_ref().is_some_and(|n| n.is_null(i)) {
result.append_null();
} else {
let l = left.value(i);
let r = right.value(i);
buffer.clear();
buffer.push_str(l);
buffer.push_str(r);
result.try_append_value(&buffer)?;
}
}
Ok(result.finish())
}

/// Concatenates two `BinaryViewArray`s element-wise.
/// If either element is `Null`, the result element is also `Null`.
///
/// # Errors
/// - Returns an error if the input arrays have different lengths.
/// - Returns an error if any concatenated string exceeds `u32::MAX` in length.
pub fn concat_elements_binary_view_array(
left: &BinaryViewArray,
right: &BinaryViewArray,
) -> std::result::Result<BinaryViewArray, ArrowError> {
if left.len() != right.len() {
return Err(ArrowError::ComputeError(format!(
"Arrays must have the same length: {} != {}",
left.len(),
right.len()
)));
}
let mut result = BinaryViewBuilder::with_capacity(left.len());

// Avoid reallocations by writing to a reused buffer (note we could be even
// more efficient by creating the view directly here and avoid the buffer
// but that would be more complex)
let mut buffer = MutableBuffer::new(0);

// Pre-compute combined null bitmap, so the per-row NULL check is more
// efficient
let nulls = NullBuffer::union(left.nulls(), right.nulls());

for i in 0..left.len() {
if nulls.as_ref().is_some_and(|n| n.is_null(i)) {
result.append_null();
} else {
let l = left.value(i);
let r = right.value(i);
buffer.clear();
buffer.extend_from_slice(l);
buffer.extend_from_slice(r);
// No try-version of append_value
result.try_append_value(&buffer)?;
}
}
Ok(result.finish())
}

/// Invoke a compute kernel on a pair of binary data arrays with flags
macro_rules! regexp_is_match_flag {
($LEFT:expr, $RIGHT:expr, $ARRAYTYPE:ident, $NOT:expr, $FLAG:expr) => {{
Expand Down
Loading