Skip to content
30 changes: 26 additions & 4 deletions cpp/src/arrow/compute/kernels/scalar_if_else.cc
Original file line number Diff line number Diff line change
Expand Up @@ -759,11 +759,22 @@ struct IfElseFunctor<Type, enable_if_base_binary<Type>> {
auto* out_data = out->array_data().get();
auto offset_length = (cond.length + 1) * sizeof(OffsetType);
ARROW_ASSIGN_OR_RAISE(out_data->buffers[1], ctx->Allocate(offset_length));
std::memcpy(out_data->buffers[1]->mutable_data(), right_offsets, offset_length);

if (right_offsets[0] == 0) {
std::memcpy(out_data->buffers[1]->mutable_data(), right_offsets, offset_length);
} else {
OffsetType base = right_offsets[0];
auto* out_offsets =
reinterpret_cast<OffsetType*>(out_data->buffers[1]->mutable_data());
for (int64_t i = 0; i <= cond.length; ++i) {
out_offsets[i] = right_offsets[i] - base;
}
}

auto right_data_length = right_offsets[right.length] - right_offsets[0];
ARROW_ASSIGN_OR_RAISE(out_data->buffers[2], ctx->Allocate(right_data_length));
std::memcpy(out_data->buffers[2]->mutable_data(), right_data, right_data_length);
std::memcpy(out_data->buffers[2]->mutable_data(), right_data + right_offsets[0],
right_data_length);
return Status::OK();
}

Expand Down Expand Up @@ -801,11 +812,22 @@ struct IfElseFunctor<Type, enable_if_base_binary<Type>> {
auto* out_data = out->array_data().get();
auto offset_length = (cond.length + 1) * sizeof(OffsetType);
ARROW_ASSIGN_OR_RAISE(out_data->buffers[1], ctx->Allocate(offset_length));
std::memcpy(out_data->buffers[1]->mutable_data(), left_offsets, offset_length);

if (left_offsets[0] == 0) {
std::memcpy(out_data->buffers[1]->mutable_data(), left_offsets, offset_length);
} else {
OffsetType base = left_offsets[0];
auto* out_offsets =
reinterpret_cast<OffsetType*>(out_data->buffers[1]->mutable_data());
for (int64_t i = 0; i <= cond.length; ++i) {
out_offsets[i] = left_offsets[i] - base;
}
}

auto left_data_length = left_offsets[left.length] - left_offsets[0];
ARROW_ASSIGN_OR_RAISE(out_data->buffers[2], ctx->Allocate(left_data_length));
std::memcpy(out_data->buffers[2]->mutable_data(), left_data, left_data_length);
std::memcpy(out_data->buffers[2]->mutable_data(), left_data + left_offsets[0],
left_data_length);
return Status::OK();
}

Expand Down
43 changes: 43 additions & 0 deletions cpp/src/arrow/compute/kernels/scalar_if_else_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "arrow/compute/kernels/test_util_internal.h"
#include "arrow/compute/registry.h"
#include "arrow/testing/gtest_util.h"
#include "arrow/util/bitmap_builders.h"
#include "arrow/util/checked_cast.h"

namespace arrow {
Expand Down Expand Up @@ -609,6 +610,48 @@ TYPED_TEST(TestIfElseBaseBinary, IfElseBaseBinaryRand) {
CheckIfElseOutput(cond, left, right, expected_data);
}

TYPED_TEST(TestIfElseBaseBinary, IfElseBaseBinarySliced) {
auto type = TypeTraits<TypeParam>::type_singleton();

auto full_arr = ArrayFromJSON(type, R"(["not used", null, "x", "x"])");
auto sliced = full_arr->Slice(1);

auto cond_asa = ArrayFromJSON(boolean(), "[true, false, false]");
ASSERT_OK_AND_ASSIGN(auto result_asa,
CallFunction("if_else", {cond_asa, MakeNullScalar(type), sliced}));
ASSERT_OK(result_asa.make_array()->ValidateFull());
auto expected = ArrayFromJSON(type, R"([null, "x", "x"])");
AssertArraysEqual(*expected, *result_asa.make_array(), true);

auto cond_aas = ArrayFromJSON(boolean(), "[false, true, true]");
ASSERT_OK_AND_ASSIGN(auto result_aas,
CallFunction("if_else", {cond_aas, sliced, MakeNullScalar(type)}));
ASSERT_OK(result_aas.make_array()->ValidateFull());
AssertArraysEqual(*expected, *result_aas.make_array(), true);

// edge case: offset=0 but offsets[0] != 0 (spec-valid, manually constructed)
using OffsetType = typename TypeTraits<TypeParam>::OffsetType::c_type;
std::vector<OffsetType> raw_offsets = {8, 8, 9, 10};
std::string raw_data(8, 'x');
raw_data += "xx";
auto offsets_buf = Buffer::Wrap(raw_offsets.data(), raw_offsets.size());
auto data_buf =
std::make_shared<Buffer>(reinterpret_cast<const uint8_t*>(raw_data.data()),
static_cast<int64_t>(raw_data.size()));
auto array_data = ArrayData::Make(type, /*length=*/3, {nullptr, offsets_buf, data_buf},
/*null_count=*/1, /*offset=*/0);
std::vector<uint8_t> validity_bytes = {0, 1, 1};
ASSERT_OK_AND_ASSIGN(
array_data->buffers[0],
arrow::internal::BytesToBits(validity_bytes, arrow::default_memory_pool()));
auto arr = MakeArray(array_data);
ASSERT_OK(arr->ValidateFull());
ASSERT_OK_AND_ASSIGN(auto result_nonzero,
CallFunction("if_else", {cond_asa, MakeNullScalar(type), arr}));
ASSERT_OK(result_nonzero.make_array()->ValidateFull());
AssertArraysEqual(*expected, *result_nonzero.make_array(), true);
}

Result<std::shared_ptr<Array>> MakeBinaryArrayWithData(
const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data_buffer) {
// Make a (large-)binary array with a single item backed by the given data
Expand Down