Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions be/src/exprs/vcondition_expr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -583,10 +583,15 @@ void insert_result_data(MutableColumnPtr& result_column, ColumnPtr& argument_col
binary_cast<VecDateTimeValue, int64_t>(result_raw_data[row]) +
binary_cast<VecDateTimeValue, int64_t>(column_raw_data[row]) *
int64_t(!(null_map_data[row] | filled_flag[row])));
} else if constexpr (std::is_same_v<ColumnType, ColumnFloat32> ||
std::is_same_v<ColumnType, ColumnFloat64>) {
auto flag = (!(null_map_data[row] | filled_flag[row]));
result_raw_data[row] += std::isfinite(column_raw_data[row]) ?
column_raw_data[row] * flag : (flag ? column_raw_data[row] : 0);
} else {
result_raw_data[row] +=
column_raw_data[row] *
typename ColumnType::value_type(!(null_map_data[row] | filled_flag[row]));
column_raw_data[row] *
typename ColumnType::value_type(!(null_map_data[row] | filled_flag[row]));
}

filled_flag[row] += (!(null_map_data[row] | filled_flag[row]));
Expand Down
363 changes: 363 additions & 0 deletions be/test/exprs/vcondition_expr_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,363 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "exprs/vcondition_expr.h"

#include <gen_cpp/Exprs_types.h>
#include <gen_cpp/Types_types.h>
#include <gmock/gmock.h>
#include <gtest/gtest.h>

#include <cmath>
#include <limits>
#include <memory>
#include <vector>

#include "core/column/column_nullable.h"
#include "core/column/column_vector.h"
#include "core/data_type/data_type_nullable.h"
#include "core/data_type/data_type_number.h"
#include "exprs/vexpr.h"
#include "exprs/vexpr_context.h"

namespace doris {

// Build a minimal TExprNode as the input of VectorizedCoalesceExpr.
// Only fields required by the VExpr base ctor (so that create_data_type works) are set.
static TExprNode make_coalesce_node(TPrimitiveType::type ptype, bool is_nullable) {
TExprNode node;
node.node_type = TExprNodeType::FUNCTION_CALL;
node.num_children = 0;
node.__set_is_nullable(is_nullable);

TTypeDesc type_desc;
TTypeNode type_node;
type_node.type = TTypeNodeType::SCALAR;
TScalarType scalar_type;
scalar_type.__set_type(ptype);
type_node.__set_scalar_type(scalar_type);
type_desc.types.push_back(type_node);
node.__set_type(type_desc);

TFunction fn;
TFunctionName fn_name;
fn_name.function_name = "coalesce";
fn.name = fn_name;
node.__set_fn(fn);

return node;
}

// Mock child expression: returns the pre-injected ColumnPtr / DataTypePtr to the parent expr.
// Behavior is modeled after MockVExprForTryCast in try_cast_expr_test.cpp.
class MockChildVExpr : public VExpr {
public:
MockChildVExpr(ColumnPtr column, DataTypePtr type)
: _column(std::move(column)), _type(std::move(type)) {}

MOCK_CONST_METHOD0(clone, VExprSPtr());

const std::string& expr_name() const override { return _name; }

Status execute(VExprContext* context, Block* block, int* result_column_id) const override {
block->insert({_column, _type, "mock_child"});
*result_column_id = static_cast<int>(block->columns()) - 1;
return Status::OK();
}

Status execute_column_impl(VExprContext* /*context*/, const Block* /*block*/,
const Selector* /*selector*/, size_t /*count*/,
ColumnPtr& result_column) const override {
result_column = _column;
return Status::OK();
}

DataTypePtr execute_type(const Block* /*block*/) const override { return _type; }

private:
ColumnPtr _column;
DataTypePtr _type;
std::string _name = "mock_child";
};

// Helper: build a nullable Float64 column from a list of (value, is_null) pairs.
static ColumnPtr make_nullable_float64_column(
const std::vector<std::pair<double, bool>>& values) {
auto nested = ColumnFloat64::create();
auto null_map = ColumnUInt8::create();
for (auto& [v, is_null] : values) {
nested->insert_value(v);
null_map->insert_value(is_null ? 1 : 0);
}
return ColumnNullable::create(std::move(nested), std::move(null_map));
}

// Helper: build a non-nullable Float64 column from a list of doubles.
static ColumnPtr make_float64_column(const std::vector<double>& values) {
auto col = ColumnFloat64::create();
for (auto v : values) {
col->insert_value(v);
}
return col;
}

// Helper: extract the Float64 value at `row` from the result column,
// handling both nullable and non-nullable cases.
static double get_float64_value(const ColumnPtr& column, size_t row, bool* is_null = nullptr) {
if (const auto* nullable = check_and_get_column<ColumnNullable>(column.get())) {
if (is_null) {
*is_null = nullable->is_null_at(row);
}
const auto& nested =
assert_cast<const ColumnFloat64&>(nullable->get_nested_column()).get_data();
return nested[row];
}
if (is_null) {
*is_null = false;
}
return assert_cast<const ColumnFloat64*>(column.get())->get_data()[row];
}

class VConditionExprCoalesceTest : public ::testing::Test {};

// Targets the fix: after some rows of col0 are filled, NaN/Inf values in the
// same rows of later columns must not pollute the already-filled result via
// "value * 0" arithmetic.
// Input:
// col0 (nullable, double): [1.0, NULL]
// col1 (non-nullable, double): [NaN, 100.0]
// Expected coalesce result: [1.0, 100.0]
TEST_F(VConditionExprCoalesceTest, Float64_NaN_NotPolluteResult) {
auto coalesce_node = make_coalesce_node(TPrimitiveType::DOUBLE, /*is_nullable=*/true);
auto coalesce_expr = VectorizedCoalesceExpr::create_shared(coalesce_node);

// _data_type is already set to Nullable(Float64) by the base ctor; reassert it explicitly.
coalesce_expr->_data_type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeFloat64>());
coalesce_expr->_open_finished = true;

const double kNaN = std::numeric_limits<double>::quiet_NaN();
auto col0 = make_nullable_float64_column({{1.0, false}, {0.0, true}});
auto col1 = make_float64_column({kNaN, 100.0});

auto child0 = std::make_shared<MockChildVExpr>(
col0, std::make_shared<DataTypeNullable>(std::make_shared<DataTypeFloat64>()));
auto child1 =
std::make_shared<MockChildVExpr>(col1, std::make_shared<DataTypeFloat64>());
coalesce_expr->add_child(child0);
coalesce_expr->add_child(child1);

VExprContext context(coalesce_expr);
ColumnPtr result;
auto st = coalesce_expr->execute_column_impl(&context, /*block=*/nullptr,
/*selector=*/nullptr, /*count=*/2, result);
ASSERT_TRUE(st.ok()) << st.to_string();
ASSERT_TRUE(result.get() != nullptr);
ASSERT_EQ(result->size(), 2);

bool is_null = false;
auto v0 = get_float64_value(result, 0, &is_null);
EXPECT_FALSE(is_null);
EXPECT_FALSE(std::isnan(v0)) << "row0 must not be polluted by NaN, got " << v0;
EXPECT_DOUBLE_EQ(v0, 1.0);

auto v1 = get_float64_value(result, 1, &is_null);
EXPECT_FALSE(is_null);
EXPECT_DOUBLE_EQ(v1, 100.0);
}

// Same shape as above, but the polluting value in the later column is +Inf:
// col0 (nullable, double): [2.0, NULL]
// col1 (non-nullable, double): [+Inf, 200.0]
// Expected coalesce result: [2.0, 200.0]
TEST_F(VConditionExprCoalesceTest, Float64_Inf_NotPolluteResult) {
auto coalesce_node = make_coalesce_node(TPrimitiveType::DOUBLE, /*is_nullable=*/true);
auto coalesce_expr = VectorizedCoalesceExpr::create_shared(coalesce_node);
coalesce_expr->_data_type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeFloat64>());
coalesce_expr->_open_finished = true;

const double kInf = std::numeric_limits<double>::infinity();
auto col0 = make_nullable_float64_column({{2.0, false}, {0.0, true}});
auto col1 = make_float64_column({kInf, 200.0});

coalesce_expr->add_child(std::make_shared<MockChildVExpr>(
col0, std::make_shared<DataTypeNullable>(std::make_shared<DataTypeFloat64>())));
coalesce_expr->add_child(
std::make_shared<MockChildVExpr>(col1, std::make_shared<DataTypeFloat64>()));

VExprContext context(coalesce_expr);
ColumnPtr result;
auto st = coalesce_expr->execute_column_impl(&context, /*block=*/nullptr,
/*selector=*/nullptr, /*count=*/2, result);
ASSERT_TRUE(st.ok()) << st.to_string();
ASSERT_EQ(result->size(), 2);

bool is_null = false;
auto v0 = get_float64_value(result, 0, &is_null);
EXPECT_FALSE(is_null);
EXPECT_FALSE(std::isnan(v0)) << "row0 must not be polluted by Inf*0, got " << v0;
EXPECT_TRUE(std::isfinite(v0));
EXPECT_DOUBLE_EQ(v0, 2.0);

auto v1 = get_float64_value(result, 1, &is_null);
EXPECT_FALSE(is_null);
EXPECT_DOUBLE_EQ(v1, 200.0);
}

// Verify that when NaN itself is the valid non-null value picked by coalesce,
// the result must preserve the NaN (the fix should not over-correct).
// col0 (nullable, double): [NaN, NULL]
// col1 (non-nullable, double): [10.0, 20.0]
// Expected: row0 takes col0's NaN (non-null, value is NaN); row1 takes col1's 20.0.
TEST_F(VConditionExprCoalesceTest, Float64_NaN_PreservedWhenSelected) {
auto coalesce_node = make_coalesce_node(TPrimitiveType::DOUBLE, /*is_nullable=*/true);
auto coalesce_expr = VectorizedCoalesceExpr::create_shared(coalesce_node);
coalesce_expr->_data_type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeFloat64>());
coalesce_expr->_open_finished = true;

const double kNaN = std::numeric_limits<double>::quiet_NaN();
auto col0 = make_nullable_float64_column({{kNaN, false}, {0.0, true}});
auto col1 = make_float64_column({10.0, 20.0});

coalesce_expr->add_child(std::make_shared<MockChildVExpr>(
col0, std::make_shared<DataTypeNullable>(std::make_shared<DataTypeFloat64>())));
coalesce_expr->add_child(
std::make_shared<MockChildVExpr>(col1, std::make_shared<DataTypeFloat64>()));

VExprContext context(coalesce_expr);
ColumnPtr result;
auto st = coalesce_expr->execute_column_impl(&context, /*block=*/nullptr,
/*selector=*/nullptr, /*count=*/2, result);
ASSERT_TRUE(st.ok()) << st.to_string();
ASSERT_EQ(result->size(), 2);

bool is_null = false;
auto v0 = get_float64_value(result, 0, &is_null);
EXPECT_FALSE(is_null);
EXPECT_TRUE(std::isnan(v0)) << "row0 should preserve NaN from col0";

auto v1 = get_float64_value(result, 1, &is_null);
EXPECT_FALSE(is_null);
EXPECT_DOUBLE_EQ(v1, 20.0);
}

// Float32 variant: same shape as the Float64 NaN case.
TEST_F(VConditionExprCoalesceTest, Float32_NaN_NotPolluteResult) {
auto coalesce_node = make_coalesce_node(TPrimitiveType::FLOAT, /*is_nullable=*/true);
auto coalesce_expr = VectorizedCoalesceExpr::create_shared(coalesce_node);
coalesce_expr->_data_type =
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeFloat32>());
coalesce_expr->_open_finished = true;

const float kNaN = std::numeric_limits<float>::quiet_NaN();

auto nested0 = ColumnFloat32::create();
nested0->insert_value(1.5f);
nested0->insert_value(0.0f);
auto null_map0 = ColumnUInt8::create();
null_map0->insert_value(0);
null_map0->insert_value(1);
ColumnPtr col0 = ColumnNullable::create(std::move(nested0), std::move(null_map0));

auto nested1 = ColumnFloat32::create();
nested1->insert_value(kNaN);
nested1->insert_value(7.5f);
ColumnPtr col1 = std::move(nested1);

coalesce_expr->add_child(std::make_shared<MockChildVExpr>(
col0, std::make_shared<DataTypeNullable>(std::make_shared<DataTypeFloat32>())));
coalesce_expr->add_child(
std::make_shared<MockChildVExpr>(col1, std::make_shared<DataTypeFloat32>()));

VExprContext context(coalesce_expr);
ColumnPtr result;
auto st = coalesce_expr->execute_column_impl(&context, /*block=*/nullptr,
/*selector=*/nullptr, /*count=*/2, result);
ASSERT_TRUE(st.ok()) << st.to_string();
ASSERT_EQ(result->size(), 2);

auto get_f32 = [&](size_t row, bool* is_null) -> float {
if (const auto* nullable = check_and_get_column<ColumnNullable>(result.get())) {
*is_null = nullable->is_null_at(row);
return assert_cast<const ColumnFloat32&>(nullable->get_nested_column())
.get_data()[row];
}
*is_null = false;
return assert_cast<const ColumnFloat32*>(result.get())->get_data()[row];
};

bool is_null = false;
float v0 = get_f32(0, &is_null);
EXPECT_FALSE(is_null);
EXPECT_FALSE(std::isnan(v0)) << "row0 must not be polluted by NaN, got " << v0;
EXPECT_FLOAT_EQ(v0, 1.5f);

float v1 = get_f32(1, &is_null);
EXPECT_FALSE(is_null);
EXPECT_FLOAT_EQ(v1, 7.5f);
}

// Regression: verify that the non-floating (integer) path is unaffected by the Float branch change.
TEST_F(VConditionExprCoalesceTest, Int32_NormalPathStillWorks) {
auto coalesce_node = make_coalesce_node(TPrimitiveType::INT, /*is_nullable=*/true);
auto coalesce_expr = VectorizedCoalesceExpr::create_shared(coalesce_node);
coalesce_expr->_data_type =
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt32>());
coalesce_expr->_open_finished = true;

auto nested0 = ColumnInt32::create();
nested0->insert_value(11);
nested0->insert_value(0);
auto null_map0 = ColumnUInt8::create();
null_map0->insert_value(0);
null_map0->insert_value(1);
ColumnPtr col0 = ColumnNullable::create(std::move(nested0), std::move(null_map0));

auto nested1 = ColumnInt32::create();
nested1->insert_value(99);
nested1->insert_value(22);
ColumnPtr col1 = std::move(nested1);

coalesce_expr->add_child(std::make_shared<MockChildVExpr>(
col0, std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt32>())));
coalesce_expr->add_child(
std::make_shared<MockChildVExpr>(col1, std::make_shared<DataTypeInt32>()));

VExprContext context(coalesce_expr);
ColumnPtr result;
auto st = coalesce_expr->execute_column_impl(&context, /*block=*/nullptr,
/*selector=*/nullptr, /*count=*/2, result);
ASSERT_TRUE(st.ok()) << st.to_string();
ASSERT_EQ(result->size(), 2);

auto get_int = [&](size_t row, bool* is_null) -> int32_t {
if (const auto* nullable = check_and_get_column<ColumnNullable>(result.get())) {
*is_null = nullable->is_null_at(row);
return assert_cast<const ColumnInt32&>(nullable->get_nested_column())
.get_data()[row];
}
*is_null = false;
return assert_cast<const ColumnInt32*>(result.get())->get_data()[row];
};

bool is_null = false;
EXPECT_EQ(get_int(0, &is_null), 11);
EXPECT_FALSE(is_null);
EXPECT_EQ(get_int(1, &is_null), 22);
EXPECT_FALSE(is_null);
}

} // namespace doris