diff --git a/be/src/exprs/vcondition_expr.cpp b/be/src/exprs/vcondition_expr.cpp index ef462f4cde9976..a6be429307db95 100644 --- a/be/src/exprs/vcondition_expr.cpp +++ b/be/src/exprs/vcondition_expr.cpp @@ -583,10 +583,15 @@ void insert_result_data(MutableColumnPtr& result_column, ColumnPtr& argument_col binary_cast(result_raw_data[row]) + binary_cast(column_raw_data[row]) * int64_t(!(null_map_data[row] | filled_flag[row]))); + } else if constexpr (std::is_same_v || + std::is_same_v) { + auto flag = (!(null_map_data[row] | filled_flag[row])); + result_raw_data[row] += std::isfinite(column_raw_data[row]) ? + column_raw_data[row] * flag : (flag ? column_raw_data[row] : 0); } else { result_raw_data[row] += - column_raw_data[row] * - typename ColumnType::value_type(!(null_map_data[row] | filled_flag[row])); + column_raw_data[row] * + typename ColumnType::value_type(!(null_map_data[row] | filled_flag[row])); } filled_flag[row] += (!(null_map_data[row] | filled_flag[row])); diff --git a/be/test/exprs/vcondition_expr_test.cpp b/be/test/exprs/vcondition_expr_test.cpp new file mode 100644 index 00000000000000..5163eca9b1422e --- /dev/null +++ b/be/test/exprs/vcondition_expr_test.cpp @@ -0,0 +1,363 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "exprs/vcondition_expr.h" + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "core/column/column_nullable.h" +#include "core/column/column_vector.h" +#include "core/data_type/data_type_nullable.h" +#include "core/data_type/data_type_number.h" +#include "exprs/vexpr.h" +#include "exprs/vexpr_context.h" + +namespace doris { + +// Build a minimal TExprNode as the input of VectorizedCoalesceExpr. +// Only fields required by the VExpr base ctor (so that create_data_type works) are set. +static TExprNode make_coalesce_node(TPrimitiveType::type ptype, bool is_nullable) { + TExprNode node; + node.node_type = TExprNodeType::FUNCTION_CALL; + node.num_children = 0; + node.__set_is_nullable(is_nullable); + + TTypeDesc type_desc; + TTypeNode type_node; + type_node.type = TTypeNodeType::SCALAR; + TScalarType scalar_type; + scalar_type.__set_type(ptype); + type_node.__set_scalar_type(scalar_type); + type_desc.types.push_back(type_node); + node.__set_type(type_desc); + + TFunction fn; + TFunctionName fn_name; + fn_name.function_name = "coalesce"; + fn.name = fn_name; + node.__set_fn(fn); + + return node; +} + +// Mock child expression: returns the pre-injected ColumnPtr / DataTypePtr to the parent expr. +// Behavior is modeled after MockVExprForTryCast in try_cast_expr_test.cpp. +class MockChildVExpr : public VExpr { +public: + MockChildVExpr(ColumnPtr column, DataTypePtr type) + : _column(std::move(column)), _type(std::move(type)) {} + + MOCK_CONST_METHOD0(clone, VExprSPtr()); + + const std::string& expr_name() const override { return _name; } + + Status execute(VExprContext* context, Block* block, int* result_column_id) const override { + block->insert({_column, _type, "mock_child"}); + *result_column_id = static_cast(block->columns()) - 1; + return Status::OK(); + } + + Status execute_column_impl(VExprContext* /*context*/, const Block* /*block*/, + const Selector* /*selector*/, size_t /*count*/, + ColumnPtr& result_column) const override { + result_column = _column; + return Status::OK(); + } + + DataTypePtr execute_type(const Block* /*block*/) const override { return _type; } + +private: + ColumnPtr _column; + DataTypePtr _type; + std::string _name = "mock_child"; +}; + +// Helper: build a nullable Float64 column from a list of (value, is_null) pairs. +static ColumnPtr make_nullable_float64_column( + const std::vector>& values) { + auto nested = ColumnFloat64::create(); + auto null_map = ColumnUInt8::create(); + for (auto& [v, is_null] : values) { + nested->insert_value(v); + null_map->insert_value(is_null ? 1 : 0); + } + return ColumnNullable::create(std::move(nested), std::move(null_map)); +} + +// Helper: build a non-nullable Float64 column from a list of doubles. +static ColumnPtr make_float64_column(const std::vector& values) { + auto col = ColumnFloat64::create(); + for (auto v : values) { + col->insert_value(v); + } + return col; +} + +// Helper: extract the Float64 value at `row` from the result column, +// handling both nullable and non-nullable cases. +static double get_float64_value(const ColumnPtr& column, size_t row, bool* is_null = nullptr) { + if (const auto* nullable = check_and_get_column(column.get())) { + if (is_null) { + *is_null = nullable->is_null_at(row); + } + const auto& nested = + assert_cast(nullable->get_nested_column()).get_data(); + return nested[row]; + } + if (is_null) { + *is_null = false; + } + return assert_cast(column.get())->get_data()[row]; +} + +class VConditionExprCoalesceTest : public ::testing::Test {}; + +// Targets the fix: after some rows of col0 are filled, NaN/Inf values in the +// same rows of later columns must not pollute the already-filled result via +// "value * 0" arithmetic. +// Input: +// col0 (nullable, double): [1.0, NULL] +// col1 (non-nullable, double): [NaN, 100.0] +// Expected coalesce result: [1.0, 100.0] +TEST_F(VConditionExprCoalesceTest, Float64_NaN_NotPolluteResult) { + auto coalesce_node = make_coalesce_node(TPrimitiveType::DOUBLE, /*is_nullable=*/true); + auto coalesce_expr = VectorizedCoalesceExpr::create_shared(coalesce_node); + + // _data_type is already set to Nullable(Float64) by the base ctor; reassert it explicitly. + coalesce_expr->_data_type = std::make_shared(std::make_shared()); + coalesce_expr->_open_finished = true; + + const double kNaN = std::numeric_limits::quiet_NaN(); + auto col0 = make_nullable_float64_column({{1.0, false}, {0.0, true}}); + auto col1 = make_float64_column({kNaN, 100.0}); + + auto child0 = std::make_shared( + col0, std::make_shared(std::make_shared())); + auto child1 = + std::make_shared(col1, std::make_shared()); + coalesce_expr->add_child(child0); + coalesce_expr->add_child(child1); + + VExprContext context(coalesce_expr); + ColumnPtr result; + auto st = coalesce_expr->execute_column_impl(&context, /*block=*/nullptr, + /*selector=*/nullptr, /*count=*/2, result); + ASSERT_TRUE(st.ok()) << st.to_string(); + ASSERT_TRUE(result.get() != nullptr); + ASSERT_EQ(result->size(), 2); + + bool is_null = false; + auto v0 = get_float64_value(result, 0, &is_null); + EXPECT_FALSE(is_null); + EXPECT_FALSE(std::isnan(v0)) << "row0 must not be polluted by NaN, got " << v0; + EXPECT_DOUBLE_EQ(v0, 1.0); + + auto v1 = get_float64_value(result, 1, &is_null); + EXPECT_FALSE(is_null); + EXPECT_DOUBLE_EQ(v1, 100.0); +} + +// Same shape as above, but the polluting value in the later column is +Inf: +// col0 (nullable, double): [2.0, NULL] +// col1 (non-nullable, double): [+Inf, 200.0] +// Expected coalesce result: [2.0, 200.0] +TEST_F(VConditionExprCoalesceTest, Float64_Inf_NotPolluteResult) { + auto coalesce_node = make_coalesce_node(TPrimitiveType::DOUBLE, /*is_nullable=*/true); + auto coalesce_expr = VectorizedCoalesceExpr::create_shared(coalesce_node); + coalesce_expr->_data_type = std::make_shared(std::make_shared()); + coalesce_expr->_open_finished = true; + + const double kInf = std::numeric_limits::infinity(); + auto col0 = make_nullable_float64_column({{2.0, false}, {0.0, true}}); + auto col1 = make_float64_column({kInf, 200.0}); + + coalesce_expr->add_child(std::make_shared( + col0, std::make_shared(std::make_shared()))); + coalesce_expr->add_child( + std::make_shared(col1, std::make_shared())); + + VExprContext context(coalesce_expr); + ColumnPtr result; + auto st = coalesce_expr->execute_column_impl(&context, /*block=*/nullptr, + /*selector=*/nullptr, /*count=*/2, result); + ASSERT_TRUE(st.ok()) << st.to_string(); + ASSERT_EQ(result->size(), 2); + + bool is_null = false; + auto v0 = get_float64_value(result, 0, &is_null); + EXPECT_FALSE(is_null); + EXPECT_FALSE(std::isnan(v0)) << "row0 must not be polluted by Inf*0, got " << v0; + EXPECT_TRUE(std::isfinite(v0)); + EXPECT_DOUBLE_EQ(v0, 2.0); + + auto v1 = get_float64_value(result, 1, &is_null); + EXPECT_FALSE(is_null); + EXPECT_DOUBLE_EQ(v1, 200.0); +} + +// Verify that when NaN itself is the valid non-null value picked by coalesce, +// the result must preserve the NaN (the fix should not over-correct). +// col0 (nullable, double): [NaN, NULL] +// col1 (non-nullable, double): [10.0, 20.0] +// Expected: row0 takes col0's NaN (non-null, value is NaN); row1 takes col1's 20.0. +TEST_F(VConditionExprCoalesceTest, Float64_NaN_PreservedWhenSelected) { + auto coalesce_node = make_coalesce_node(TPrimitiveType::DOUBLE, /*is_nullable=*/true); + auto coalesce_expr = VectorizedCoalesceExpr::create_shared(coalesce_node); + coalesce_expr->_data_type = std::make_shared(std::make_shared()); + coalesce_expr->_open_finished = true; + + const double kNaN = std::numeric_limits::quiet_NaN(); + auto col0 = make_nullable_float64_column({{kNaN, false}, {0.0, true}}); + auto col1 = make_float64_column({10.0, 20.0}); + + coalesce_expr->add_child(std::make_shared( + col0, std::make_shared(std::make_shared()))); + coalesce_expr->add_child( + std::make_shared(col1, std::make_shared())); + + VExprContext context(coalesce_expr); + ColumnPtr result; + auto st = coalesce_expr->execute_column_impl(&context, /*block=*/nullptr, + /*selector=*/nullptr, /*count=*/2, result); + ASSERT_TRUE(st.ok()) << st.to_string(); + ASSERT_EQ(result->size(), 2); + + bool is_null = false; + auto v0 = get_float64_value(result, 0, &is_null); + EXPECT_FALSE(is_null); + EXPECT_TRUE(std::isnan(v0)) << "row0 should preserve NaN from col0"; + + auto v1 = get_float64_value(result, 1, &is_null); + EXPECT_FALSE(is_null); + EXPECT_DOUBLE_EQ(v1, 20.0); +} + +// Float32 variant: same shape as the Float64 NaN case. +TEST_F(VConditionExprCoalesceTest, Float32_NaN_NotPolluteResult) { + auto coalesce_node = make_coalesce_node(TPrimitiveType::FLOAT, /*is_nullable=*/true); + auto coalesce_expr = VectorizedCoalesceExpr::create_shared(coalesce_node); + coalesce_expr->_data_type = + std::make_shared(std::make_shared()); + coalesce_expr->_open_finished = true; + + const float kNaN = std::numeric_limits::quiet_NaN(); + + auto nested0 = ColumnFloat32::create(); + nested0->insert_value(1.5f); + nested0->insert_value(0.0f); + auto null_map0 = ColumnUInt8::create(); + null_map0->insert_value(0); + null_map0->insert_value(1); + ColumnPtr col0 = ColumnNullable::create(std::move(nested0), std::move(null_map0)); + + auto nested1 = ColumnFloat32::create(); + nested1->insert_value(kNaN); + nested1->insert_value(7.5f); + ColumnPtr col1 = std::move(nested1); + + coalesce_expr->add_child(std::make_shared( + col0, std::make_shared(std::make_shared()))); + coalesce_expr->add_child( + std::make_shared(col1, std::make_shared())); + + VExprContext context(coalesce_expr); + ColumnPtr result; + auto st = coalesce_expr->execute_column_impl(&context, /*block=*/nullptr, + /*selector=*/nullptr, /*count=*/2, result); + ASSERT_TRUE(st.ok()) << st.to_string(); + ASSERT_EQ(result->size(), 2); + + auto get_f32 = [&](size_t row, bool* is_null) -> float { + if (const auto* nullable = check_and_get_column(result.get())) { + *is_null = nullable->is_null_at(row); + return assert_cast(nullable->get_nested_column()) + .get_data()[row]; + } + *is_null = false; + return assert_cast(result.get())->get_data()[row]; + }; + + bool is_null = false; + float v0 = get_f32(0, &is_null); + EXPECT_FALSE(is_null); + EXPECT_FALSE(std::isnan(v0)) << "row0 must not be polluted by NaN, got " << v0; + EXPECT_FLOAT_EQ(v0, 1.5f); + + float v1 = get_f32(1, &is_null); + EXPECT_FALSE(is_null); + EXPECT_FLOAT_EQ(v1, 7.5f); +} + +// Regression: verify that the non-floating (integer) path is unaffected by the Float branch change. +TEST_F(VConditionExprCoalesceTest, Int32_NormalPathStillWorks) { + auto coalesce_node = make_coalesce_node(TPrimitiveType::INT, /*is_nullable=*/true); + auto coalesce_expr = VectorizedCoalesceExpr::create_shared(coalesce_node); + coalesce_expr->_data_type = + std::make_shared(std::make_shared()); + coalesce_expr->_open_finished = true; + + auto nested0 = ColumnInt32::create(); + nested0->insert_value(11); + nested0->insert_value(0); + auto null_map0 = ColumnUInt8::create(); + null_map0->insert_value(0); + null_map0->insert_value(1); + ColumnPtr col0 = ColumnNullable::create(std::move(nested0), std::move(null_map0)); + + auto nested1 = ColumnInt32::create(); + nested1->insert_value(99); + nested1->insert_value(22); + ColumnPtr col1 = std::move(nested1); + + coalesce_expr->add_child(std::make_shared( + col0, std::make_shared(std::make_shared()))); + coalesce_expr->add_child( + std::make_shared(col1, std::make_shared())); + + VExprContext context(coalesce_expr); + ColumnPtr result; + auto st = coalesce_expr->execute_column_impl(&context, /*block=*/nullptr, + /*selector=*/nullptr, /*count=*/2, result); + ASSERT_TRUE(st.ok()) << st.to_string(); + ASSERT_EQ(result->size(), 2); + + auto get_int = [&](size_t row, bool* is_null) -> int32_t { + if (const auto* nullable = check_and_get_column(result.get())) { + *is_null = nullable->is_null_at(row); + return assert_cast(nullable->get_nested_column()) + .get_data()[row]; + } + *is_null = false; + return assert_cast(result.get())->get_data()[row]; + }; + + bool is_null = false; + EXPECT_EQ(get_int(0, &is_null), 11); + EXPECT_FALSE(is_null); + EXPECT_EQ(get_int(1, &is_null), 22); + EXPECT_FALSE(is_null); +} + +} // namespace doris