diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else.cc b/cpp/src/arrow/compute/kernels/scalar_if_else.cc index ed32ec203c1..0c5cfe1c903 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else.cc @@ -759,11 +759,22 @@ struct IfElseFunctor> { auto* out_data = out->array_data().get(); auto offset_length = (cond.length + 1) * sizeof(OffsetType); ARROW_ASSIGN_OR_RAISE(out_data->buffers[1], ctx->Allocate(offset_length)); - std::memcpy(out_data->buffers[1]->mutable_data(), right_offsets, offset_length); + + if (right_offsets[0] == 0) { + std::memcpy(out_data->buffers[1]->mutable_data(), right_offsets, offset_length); + } else { + OffsetType base = right_offsets[0]; + auto* out_offsets = + reinterpret_cast(out_data->buffers[1]->mutable_data()); + for (int64_t i = 0; i <= cond.length; ++i) { + out_offsets[i] = right_offsets[i] - base; + } + } auto right_data_length = right_offsets[right.length] - right_offsets[0]; ARROW_ASSIGN_OR_RAISE(out_data->buffers[2], ctx->Allocate(right_data_length)); - std::memcpy(out_data->buffers[2]->mutable_data(), right_data, right_data_length); + std::memcpy(out_data->buffers[2]->mutable_data(), right_data + right_offsets[0], + right_data_length); return Status::OK(); } @@ -801,11 +812,22 @@ struct IfElseFunctor> { auto* out_data = out->array_data().get(); auto offset_length = (cond.length + 1) * sizeof(OffsetType); ARROW_ASSIGN_OR_RAISE(out_data->buffers[1], ctx->Allocate(offset_length)); - std::memcpy(out_data->buffers[1]->mutable_data(), left_offsets, offset_length); + + if (left_offsets[0] == 0) { + std::memcpy(out_data->buffers[1]->mutable_data(), left_offsets, offset_length); + } else { + OffsetType base = left_offsets[0]; + auto* out_offsets = + reinterpret_cast(out_data->buffers[1]->mutable_data()); + for (int64_t i = 0; i <= cond.length; ++i) { + out_offsets[i] = left_offsets[i] - base; + } + } auto left_data_length = left_offsets[left.length] - left_offsets[0]; ARROW_ASSIGN_OR_RAISE(out_data->buffers[2], ctx->Allocate(left_data_length)); - std::memcpy(out_data->buffers[2]->mutable_data(), left_data, left_data_length); + std::memcpy(out_data->buffers[2]->mutable_data(), left_data + left_offsets[0], + left_data_length); return Status::OK(); } diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc index e5cf73742b6..a4a7df8fd5d 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc @@ -27,6 +27,7 @@ #include "arrow/compute/kernels/test_util_internal.h" #include "arrow/compute/registry.h" #include "arrow/testing/gtest_util.h" +#include "arrow/util/bitmap_builders.h" #include "arrow/util/checked_cast.h" namespace arrow { @@ -609,6 +610,48 @@ TYPED_TEST(TestIfElseBaseBinary, IfElseBaseBinaryRand) { CheckIfElseOutput(cond, left, right, expected_data); } +TYPED_TEST(TestIfElseBaseBinary, IfElseBaseBinarySliced) { + auto type = TypeTraits::type_singleton(); + + auto full_arr = ArrayFromJSON(type, R"(["not used", null, "x", "x"])"); + auto sliced = full_arr->Slice(1); + + auto cond_asa = ArrayFromJSON(boolean(), "[true, false, false]"); + ASSERT_OK_AND_ASSIGN(auto result_asa, + CallFunction("if_else", {cond_asa, MakeNullScalar(type), sliced})); + ASSERT_OK(result_asa.make_array()->ValidateFull()); + auto expected = ArrayFromJSON(type, R"([null, "x", "x"])"); + AssertArraysEqual(*expected, *result_asa.make_array(), true); + + auto cond_aas = ArrayFromJSON(boolean(), "[false, true, true]"); + ASSERT_OK_AND_ASSIGN(auto result_aas, + CallFunction("if_else", {cond_aas, sliced, MakeNullScalar(type)})); + ASSERT_OK(result_aas.make_array()->ValidateFull()); + AssertArraysEqual(*expected, *result_aas.make_array(), true); + + // edge case: offset=0 but offsets[0] != 0 (spec-valid, manually constructed) + using OffsetType = typename TypeTraits::OffsetType::c_type; + std::vector raw_offsets = {8, 8, 9, 10}; + std::string raw_data(8, 'x'); + raw_data += "xx"; + auto offsets_buf = Buffer::Wrap(raw_offsets.data(), raw_offsets.size()); + auto data_buf = + std::make_shared(reinterpret_cast(raw_data.data()), + static_cast(raw_data.size())); + auto array_data = ArrayData::Make(type, /*length=*/3, {nullptr, offsets_buf, data_buf}, + /*null_count=*/1, /*offset=*/0); + std::vector validity_bytes = {0, 1, 1}; + ASSERT_OK_AND_ASSIGN( + array_data->buffers[0], + arrow::internal::BytesToBits(validity_bytes, arrow::default_memory_pool())); + auto arr = MakeArray(array_data); + ASSERT_OK(arr->ValidateFull()); + ASSERT_OK_AND_ASSIGN(auto result_nonzero, + CallFunction("if_else", {cond_asa, MakeNullScalar(type), arr})); + ASSERT_OK(result_nonzero.make_array()->ValidateFull()); + AssertArraysEqual(*expected, *result_nonzero.make_array(), true); +} + Result> MakeBinaryArrayWithData( const std::shared_ptr& type, const std::shared_ptr& data_buffer) { // Make a (large-)binary array with a single item backed by the given data