From 25274d3b2eedb050c3031ad4d3a10de4a4eb8029 Mon Sep 17 00:00:00 2001 From: BiteTheDDDDt Date: Mon, 27 Apr 2026 18:44:31 +0800 Subject: [PATCH 1/9] [fix](runtime-filter) Restore _applied_rf_num update in late arrival path ### What problem does this PR solve? Issue Number: close #xxx Related PR: #59786 Problem Summary: PR #59786 (commit e78d089ec81) refactored Scanner::try_append_late_arrival_runtime_filter() and accidentally removed the trailing `_applied_rf_num = arrived_rf_num;` assignment. As a result, _applied_rf_num is permanently 0 after construction: * the fast-path `_applied_rf_num == _total_rf_num` early return at the top of the function never fires, so every batch goes through the full late-arrival check; * `arrived_rf_num == _applied_rf_num` only short-circuits when no runtime filter has ever arrived, so once any RF arrives every subsequent call needlessly clears _conjuncts, re-clones them, and appends the old ctxs into _stale_expr_ctxs (CPU waste + slow memory growth); * the `ApplyAllRuntimeFilters=True` info string in profile (file_scanner.cpp) is never emitted; * `DCHECK(_applied_rf_num < _total_rf_num)` is effectively dead because the left-hand side is always 0. Restore the single missing assignment after cloning the new conjunct ctxs to bring back the original behavior. ### Release note None ### Check List (For Author) - Test: No need to test (one-line restoration of removed assignment; behavior covered by existing runtime-filter regression tests) - Behavior changed: No - Does this need documentation: No Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> (cherry picked from commit 884c9789984e73d6306734ea1b764b12310006f5) --- be/src/vec/exec/scan/scanner.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/be/src/vec/exec/scan/scanner.cpp b/be/src/vec/exec/scan/scanner.cpp index 8dc65bf720056c..e5382af9cd10e9 100644 --- a/be/src/vec/exec/scan/scanner.cpp +++ b/be/src/vec/exec/scan/scanner.cpp @@ -206,6 +206,7 @@ Status Scanner::try_append_late_arrival_runtime_filter() { // avoid conjunct destroy in used by storage layer _conjuncts.clear(); RETURN_IF_ERROR(_local_state->clone_conjunct_ctxs(_conjuncts)); + _applied_rf_num = arrived_rf_num; return Status::OK(); } From b669cd57b5d2c7ac63f48729822c472143cf234d Mon Sep 17 00:00:00 2001 From: BiteTheDDDDt Date: Mon, 27 Apr 2026 19:43:35 +0800 Subject: [PATCH 2/9] [test](runtime-filter) Add unit test for late-arrival RF apply count Related PR: #62872 Problem Summary: Lock in the fix for `Scanner::try_append_late_arrival_runtime_filter` so the regression introduced in PR #59786 cannot reappear. The new BE unit test instantiates a Scanner over MockScanLocalState/MockScanOperatorX, signals two runtime filters, and asserts that `_applied_rf_num` advances to `_total_rf_num` and that the second invocation hits the fast-path early return without re-cloning conjuncts. None - Test: Unit Test - Behavior changed: No - Does this need documentation: No Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> (cherry picked from commit 675dee19c09ef694ef659574f7aaab5795a13b9c) --- .../vec/exec/scanner_late_arrival_rf_test.cpp | 122 ++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 be/test/vec/exec/scanner_late_arrival_rf_test.cpp diff --git a/be/test/vec/exec/scanner_late_arrival_rf_test.cpp b/be/test/vec/exec/scanner_late_arrival_rf_test.cpp new file mode 100644 index 00000000000000..5a9e2381f2710c --- /dev/null +++ b/be/test/vec/exec/scanner_late_arrival_rf_test.cpp @@ -0,0 +1,122 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include + +#include "pipeline/exec/mock_scan_operator.h" +#include "runtime_filter/runtime_filter_consumer.h" +#include "runtime_filter/runtime_filter_consumer_helper.h" +#include "runtime_filter/runtime_filter_producer.h" +#include "runtime_filter/runtime_filter_test_utils.h" +#include "runtime/descriptors.h" +#include "runtime/exec_env.h" +#include "vec/data_types/data_type_factory.hpp" +#include "vec/data_types/data_type_number.h" +#include "vec/exec/scan/scanner.h" + +namespace doris { + +// Minimal concrete Scanner so the abstract base can be instantiated. The +// behavior under test (`Scanner::try_append_late_arrival_runtime_filter`) lives +// entirely in the base class and never calls `_get_block_impl`. +class TestScanner final : public vectorized::Scanner { +public: + TestScanner(RuntimeState* state, pipeline::ScanLocalStateBase* local_state, int64_t limit, + RuntimeProfile* profile) + : vectorized::Scanner(state, local_state, limit, profile) {} + +protected: + Status _get_block_impl(RuntimeState* /*state*/, vectorized::Block* /*block*/, + bool* eof) override { + *eof = true; + return Status::OK(); + } +}; + +class ScannerLateArrivalRfTest : public RuntimeFilterTest { +public: + void SetUp() override { + RuntimeFilterTest::SetUp(); + // The runtime filter helper schedules a timer on the global queue when + // it initialises, so the queue must exist before init() is called. + ExecEnv::GetInstance()->_init_runtime_filter_timer_queue(); + } +}; + +// Regression: PR #59786 accidentally removed `_applied_rf_num = arrived_rf_num` +// from `Scanner::try_append_late_arrival_runtime_filter`, leaving the counter +// at 0 forever. Subsequent calls then re-clone conjunct ctxs every time, the +// `ApplyAllRuntimeFilters=True` info-string is never emitted, and the +// `_applied_rf_num <= _total_rf_num` DCHECK becomes dead. This test asserts +// the counter advances after RFs arrive and that the second call short-circuits +// via the fast path at the top of the function. +TEST_F(ScannerLateArrivalRfTest, applied_rf_num_advances_after_late_arrival) { + std::vector rf_descs = { + TRuntimeFilterDescBuilder().add_planId_to_target_expr(0).build(), + TRuntimeFilterDescBuilder().add_planId_to_target_expr(0).build()}; + + SlotDescriptor slot_desc; + slot_desc._type = DataTypeFactory::instance().create_data_type(PrimitiveType::TYPE_INT, false); + TupleDescriptor tuple_desc; + tuple_desc.add_slot(&slot_desc); + RowDescriptor row_desc; + _tbl._slot_desc_map[0] = &slot_desc; + const_cast&>(row_desc._tuple_desc_map).push_back(&tuple_desc); + + // MockScanOperatorX feeds `_runtime_filter_descs` and `_row_descriptor` + // into the local state's helper. Access to the protected members is + // permitted by the build-wide `-fno-access-control` flag. + auto op = std::make_shared(); + op->_runtime_filter_descs = rf_descs; + op->_row_descriptor = row_desc; + + auto local_state = + std::make_shared(_runtime_states[0].get(), op.get()); + + std::vector> rf_dependencies; + ASSERT_TRUE(local_state->_helper.init(_runtime_states[0].get(), true, 0, 0, rf_dependencies, "") + .ok()); + + auto scanner = std::make_unique(_runtime_states[0].get(), local_state.get(), + -1 /*limit*/, &_profile); + ASSERT_TRUE(scanner->init(_runtime_states[0].get(), {}).ok()); + ASSERT_EQ(scanner->_total_rf_num, 2); + ASSERT_EQ(scanner->_applied_rf_num, 0); + + std::shared_ptr producer; + ASSERT_TRUE(RuntimeFilterProducer::create(_query_ctx.get(), rf_descs.data(), &producer).ok()); + producer->set_wrapper_state_and_ready_to_publish(RuntimeFilterWrapper::State::READY); + local_state->_helper._consumers[0]->signal(producer.get()); + local_state->_helper._consumers[1]->signal(producer.get()); + + // First call after both RFs arrived: counter must advance to total. Before + // the fix this stayed at 0 because the assignment was missing. + ASSERT_TRUE(scanner->try_append_late_arrival_runtime_filter().ok()); + ASSERT_EQ(scanner->_applied_rf_num, 2); + + // Second call: must hit the fast-path early return without re-cloning. + // We clear `_conjuncts` and verify the function does NOT repopulate them; + // if `_applied_rf_num` were still 0 the function would call + // `clone_conjunct_ctxs` and overwrite the cleared vector. + scanner->_conjuncts.clear(); + ASSERT_TRUE(scanner->try_append_late_arrival_runtime_filter().ok()); + ASSERT_EQ(scanner->_applied_rf_num, 2); + ASSERT_TRUE(scanner->_conjuncts.empty()); +} + +} // namespace doris From a2364d8eb4307feba7cc77f957121a0d106ab84b Mon Sep 17 00:00:00 2001 From: BiteTheDDDDt Date: Fri, 10 Apr 2026 19:34:34 +0800 Subject: [PATCH 3/9] [fix](be) Fix RuntimeFilter selectivity sampling_frequency lost during VExprContext recreation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Issue Number: close #xxx Problem Summary: RuntimeFilter selectivity tracking was completely non-functional because `sampling_frequency` was lost during VExprContext recreation. In `RuntimeFilterConsumer::_get_push_exprs()`, `sampling_frequency=32` was set on a temporary `probe_ctx` VExprContext. However, only VRuntimeFilterWrapper expressions (VExpr) were returned, not the VExprContext. When `_append_rf_into_conjuncts()` later created a new VExprContext via `VExprContext::create_shared(expr)`, the new context had default `_sampling_frequency=-1` (DISABLE_SAMPLING). With `_sampling_frequency=-1`, the condition `(_judge_counter++) >= -1` evaluated to `0 >= -1 → true` on every call, causing `reset_judge_selectivity()` to fire every time. This meant selectivity counters were perpetually reset and never accumulated, making the runtime filter selectivity optimization completely ineffective. The fix stores `sampling_frequency` in VRuntimeFilterWrapper (which survives VExprContext recreation) and propagates it to VExprContext in `VRuntimeFilterWrapper::open()`, which is called on both original and cloned contexts. Fixed a bug where RuntimeFilter selectivity tracking was non-functional due to sampling_frequency being lost during VExprContext recreation, causing runtime filters that should be skipped (due to low selectivity) to never be identified. - Test: Unit Test - Added 2 regression tests to runtime_filter_selectivity_test.cpp - Added 3 new tests in vruntimefilter_wrapper_sampling_test.cpp - All 22 tests pass - Behavior changed: No (selectivity tracking was broken before, this makes it work as designed) - Does this need documentation: No Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> (cherry picked from commit f7419221907e1ff81994dac99587dffaa9e14bc4) --- .../runtime_filter_consumer.cpp | 19 +- be/src/vec/exprs/vruntimefilter_wrapper.cpp | 6 +- be/src/vec/exprs/vruntimefilter_wrapper.h | 7 +- .../runtime_filter_selectivity_test.cpp | 46 +++++ .../vruntimefilter_wrapper_sampling_test.cpp | 180 ++++++++++++++++++ 5 files changed, 245 insertions(+), 13 deletions(-) create mode 100644 be/test/runtime_filter/vruntimefilter_wrapper_sampling_test.cpp diff --git a/be/src/runtime_filter/runtime_filter_consumer.cpp b/be/src/runtime_filter/runtime_filter_consumer.cpp index d72c6af7c8dd74..271aff08aac501 100644 --- a/be/src/runtime_filter/runtime_filter_consumer.cpp +++ b/be/src/runtime_filter/runtime_filter_consumer.cpp @@ -18,6 +18,7 @@ #include "runtime_filter/runtime_filter_consumer.h" #include "exprs/minmax_predicate.h" +#include "runtime_filter/runtime_filter_selectivity.h" #include "util/runtime_profile.h" #include "vec/exprs/vbitmap_predicate.h" #include "vec/exprs/vbloom_predicate.h" @@ -84,11 +85,11 @@ Status RuntimeFilterConsumer::_get_push_exprs(std::vectorget_real_type(); bool null_aware = _wrapper->contain_null(); - // Set sampling frequency based on disable_always_true_logic status + // Determine sampling frequency for the always_true optimization. + // This will be propagated to VExprContext in VRuntimeFilterWrapper::open(). int sampling_frequency = _wrapper->disable_always_true_logic() ? RuntimeFilterSelectivity::DISABLE_SAMPLING : config::runtime_filter_sampling_frequency; - probe_ctx->get_runtime_filter_selectivity().set_sampling_frequency(sampling_frequency); switch (real_filter_type) { case RuntimeFilterType::IN_FILTER: { @@ -106,7 +107,7 @@ Status RuntimeFilterConsumer::_get_push_exprs(std::vectoradd_child(probe_ctx->root()); auto wrapper = vectorized::VRuntimeFilterWrapper::create_shared( node, in_pred, get_in_list_ignore_thredhold(_wrapper->hybrid_set()->size()), - null_aware, _wrapper->filter_id()); + null_aware, _wrapper->filter_id(), sampling_frequency); container.push_back(wrapper); break; } @@ -124,7 +125,7 @@ Status RuntimeFilterConsumer::_get_push_exprs(std::vectorfilter_id())); + _wrapper->filter_id(), sampling_frequency)); break; } case RuntimeFilterType::MAX_FILTER: { @@ -141,7 +142,7 @@ Status RuntimeFilterConsumer::_get_push_exprs(std::vectorfilter_id())); + _wrapper->filter_id(), sampling_frequency)); break; } case RuntimeFilterType::MINMAX_FILTER: { @@ -157,7 +158,7 @@ Status RuntimeFilterConsumer::_get_push_exprs(std::vectoradd_child(max_literal); container.push_back(vectorized::VRuntimeFilterWrapper::create_shared( max_pred_node, max_pred, get_comparison_ignore_thredhold(), null_aware, - _wrapper->filter_id())); + _wrapper->filter_id(), sampling_frequency)); vectorized::VExprContextSPtr new_probe_ctx; RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(probe_expr, new_probe_ctx)); @@ -174,7 +175,7 @@ Status RuntimeFilterConsumer::_get_push_exprs(std::vectoradd_child(min_literal); container.push_back(vectorized::VRuntimeFilterWrapper::create_shared( min_pred_node, min_pred, get_comparison_ignore_thredhold(), null_aware, - _wrapper->filter_id())); + _wrapper->filter_id(), sampling_frequency)); break; } case RuntimeFilterType::BLOOM_FILTER: { @@ -191,7 +192,7 @@ Status RuntimeFilterConsumer::_get_push_exprs(std::vectoradd_child(probe_ctx->root()); auto wrapper = vectorized::VRuntimeFilterWrapper::create_shared( node, bloom_pred, get_bloom_filter_ignore_thredhold(), null_aware, - _wrapper->filter_id()); + _wrapper->filter_id(), sampling_frequency); container.push_back(wrapper); break; } @@ -209,7 +210,7 @@ Status RuntimeFilterConsumer::_get_push_exprs(std::vectoradd_child(probe_ctx->root()); DCHECK(null_aware == false) << "bitmap predicate do not support null aware"; auto wrapper = vectorized::VRuntimeFilterWrapper::create_shared( - node, bitmap_pred, 0, null_aware, _wrapper->filter_id()); + node, bitmap_pred, 0, null_aware, _wrapper->filter_id(), sampling_frequency); container.push_back(wrapper); break; } diff --git a/be/src/vec/exprs/vruntimefilter_wrapper.cpp b/be/src/vec/exprs/vruntimefilter_wrapper.cpp index d54ad6c0273013..79ff443a995c0f 100644 --- a/be/src/vec/exprs/vruntimefilter_wrapper.cpp +++ b/be/src/vec/exprs/vruntimefilter_wrapper.cpp @@ -57,12 +57,13 @@ class VExprContext; VRuntimeFilterWrapper::VRuntimeFilterWrapper(const TExprNode& node, VExprSPtr impl, double ignore_thredhold, bool null_aware, - int filter_id) + int filter_id, int sampling_frequency) : VExpr(node), _impl(std::move(impl)), _ignore_thredhold(ignore_thredhold), _null_aware(null_aware), - _filter_id(filter_id) {} + _filter_id(filter_id), + _sampling_frequency(sampling_frequency) {} Status VRuntimeFilterWrapper::prepare(RuntimeState* state, const RowDescriptor& desc, VExprContext* context) { @@ -76,6 +77,7 @@ Status VRuntimeFilterWrapper::open(RuntimeState* state, VExprContext* context, FunctionContext::FunctionStateScope scope) { DCHECK(_prepare_finished); RETURN_IF_ERROR(_impl->open(state, context, scope)); + context->get_runtime_filter_selectivity().set_sampling_frequency(_sampling_frequency); _open_finished = true; return Status::OK(); } diff --git a/be/src/vec/exprs/vruntimefilter_wrapper.h b/be/src/vec/exprs/vruntimefilter_wrapper.h index 91c60660678b57..d6e5873c2b8a97 100644 --- a/be/src/vec/exprs/vruntimefilter_wrapper.h +++ b/be/src/vec/exprs/vruntimefilter_wrapper.h @@ -26,6 +26,7 @@ #include "common/config.h" #include "common/status.h" +#include "runtime_filter/runtime_filter_selectivity.h" #include "util/runtime_profile.h" #include "vec/exprs/function_context.h" #include "vec/exprs/vexpr.h" @@ -51,7 +52,8 @@ class VRuntimeFilterWrapper final : public VExpr { public: VRuntimeFilterWrapper(const TExprNode& node, VExprSPtr impl, double ignore_thredhold, - bool null_aware, int filter_id); + bool null_aware, int filter_id, + int sampling_frequency = RuntimeFilterSelectivity::DISABLE_SAMPLING); ~VRuntimeFilterWrapper() override = default; Status execute_column(VExprContext* context, const Block* block, Selector* selector, size_t count, ColumnPtr& result_column) const override; @@ -115,9 +117,10 @@ class VRuntimeFilterWrapper final : public VExpr { double _ignore_thredhold; bool _null_aware; int _filter_id; + int _sampling_frequency; }; using VRuntimeFilterPtr = std::shared_ptr; #include "common/compile_check_end.h" -} // namespace doris::vectorized \ No newline at end of file +} // namespace doris::vectorized diff --git a/be/test/runtime_filter/runtime_filter_selectivity_test.cpp b/be/test/runtime_filter/runtime_filter_selectivity_test.cpp index bba98f3ecf2cf9..45e56ad65ded8d 100644 --- a/be/test/runtime_filter/runtime_filter_selectivity_test.cpp +++ b/be/test/runtime_filter/runtime_filter_selectivity_test.cpp @@ -228,4 +228,50 @@ TEST_F(RuntimeFilterSelectivityTest, different_thresholds) { } } +// Regression test: with default sampling_frequency (-1), update_judge_counter() +// always resets because (_judge_counter++) >= -1 is always true. +// This was the root cause of the selectivity accumulation bug. +TEST_F(RuntimeFilterSelectivityTest, default_sampling_frequency_always_resets) { + RuntimeFilterSelectivity selectivity; + // Don't set sampling_frequency — defaults to DISABLE_SAMPLING (-1) + + // Accumulate selectivity data: low filter rate -> should be always_true + selectivity.update_judge_selectivity(-1, 2000, 50000, 0.1); + // With default -1, maybe_always_true_can_ignore returns false (disabled) + EXPECT_FALSE(selectivity.maybe_always_true_can_ignore()); + + // Now call update_judge_counter — with -1, it immediately resets + selectivity.update_judge_counter(); + // Verify: accumulated data has been wiped out by the reset + // Even after setting a valid sampling_frequency, the previously accumulated + // selectivity data is gone + selectivity.set_sampling_frequency(100); + // always_true was reset to false by the premature reset + EXPECT_FALSE(selectivity.maybe_always_true_can_ignore()); +} + +// Verify that setting sampling_frequency correctly prevents premature reset +TEST_F(RuntimeFilterSelectivityTest, proper_sampling_frequency_preserves_accumulation) { + RuntimeFilterSelectivity selectivity; + selectivity.set_sampling_frequency(32); + + // Accumulate selectivity: low filter rate + selectivity.update_judge_selectivity(-1, 2000, 50000, 0.1); + EXPECT_TRUE(selectivity.maybe_always_true_can_ignore()); + + // Counter increments don't reset before reaching sampling_frequency. + // Post-increment semantics: check uses old value, so need 33 calls total + // to trigger reset (counter must reach 32 before comparison fires). + for (int i = 0; i < 32; i++) { + selectivity.update_judge_counter(); + } + // Still always_true because counter value 31 was compared last (31 >= 32 → false) + EXPECT_TRUE(selectivity.maybe_always_true_can_ignore()); + + // 33rd call: counter=32, 32 >= 32 → true → triggers reset + selectivity.update_judge_counter(); + // After reset, needs re-evaluation + EXPECT_FALSE(selectivity.maybe_always_true_can_ignore()); +} + } // namespace doris diff --git a/be/test/runtime_filter/vruntimefilter_wrapper_sampling_test.cpp b/be/test/runtime_filter/vruntimefilter_wrapper_sampling_test.cpp new file mode 100644 index 00000000000000..7a30262d32f787 --- /dev/null +++ b/be/test/runtime_filter/vruntimefilter_wrapper_sampling_test.cpp @@ -0,0 +1,180 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include + +#include "runtime_filter/runtime_filter_selectivity.h" +#include "runtime_filter/runtime_filter_test_utils.h" +#include "vec/exprs/vexpr_context.h" +#include "vec/exprs/vruntimefilter_wrapper.h" + +namespace doris { + +// Minimal VExpr implementation for testing VRuntimeFilterWrapper in isolation. +class StubVExpr : public vectorized::VExpr { +public: + StubVExpr() : vectorized::VExpr(make_texpr_node()) {} + + const std::string& expr_name() const override { + static const std::string name = "StubVExpr"; + return name; + } + + Status execute(vectorized::VExprContext*, vectorized::Block*, int*) const override { + return Status::OK(); + } + + Status execute_column(vectorized::VExprContext*, const vectorized::Block*, + vectorized::Selector*, size_t, vectorized::ColumnPtr&) const override { + return Status::OK(); + } + +private: + static TExprNode make_texpr_node() { + return TExprNodeBuilder( + TExprNodeType::SLOT_REF, + TTypeDescBuilder() + .set_types(TTypeNodeBuilder() + .set_type(TTypeNodeType::SCALAR) + .set_scalar_type(TPrimitiveType::INT) + .build()) + .build(), + 0) + .build(); + } +}; + +class VRuntimeFilterWrapperSamplingTest : public RuntimeFilterTest {}; + +// Test that VRuntimeFilterWrapper stores and propagates sampling_frequency +// through open() to VExprContext. This is the core fix for the bug where +// sampling_frequency was lost when _append_rf_into_conjuncts creates a new +// VExprContext via VExprContext::create_shared(expr). +TEST_F(VRuntimeFilterWrapperSamplingTest, open_propagates_sampling_frequency) { + auto stub = std::make_shared(); + auto node = TExprNodeBuilder( + TExprNodeType::SLOT_REF, + TTypeDescBuilder() + .set_types(TTypeNodeBuilder() + .set_type(TTypeNodeType::SCALAR) + .set_scalar_type(TPrimitiveType::INT) + .build()) + .build(), + 0) + .build(); + + const int expected_frequency = 32; + auto wrapper = vectorized::VRuntimeFilterWrapper::create_shared( + node, stub, 0.4, false, /*filter_id=*/1, expected_frequency); + + // Simulate the VExprContext recreation that happens in _append_rf_into_conjuncts. + // A fresh VExprContext has default sampling_frequency = DISABLE_SAMPLING (-1). + auto context = std::make_shared(wrapper); + ASSERT_EQ(context->get_runtime_filter_selectivity().maybe_always_true_can_ignore(), false); + + RowDescriptor row_desc; + ASSERT_TRUE(wrapper->prepare(_runtime_states[0].get(), row_desc, context.get()).ok()); + ASSERT_TRUE(wrapper->open(_runtime_states[0].get(), context.get(), + FunctionContext::FRAGMENT_LOCAL) + .ok()); + + // After open(), sampling_frequency should be propagated from VRuntimeFilterWrapper + // to VExprContext. Verify by accumulating low-selectivity data and checking + // that always_true can now be detected. + auto& selectivity = context->get_runtime_filter_selectivity(); + selectivity.update_judge_selectivity(1, 2000, 50000, 0.1); + EXPECT_TRUE(selectivity.maybe_always_true_can_ignore()); +} + +// Test that default sampling_frequency (DISABLE_SAMPLING) disables the always_true +// optimization, matching the behavior when disable_always_true_logic is set. +TEST_F(VRuntimeFilterWrapperSamplingTest, default_sampling_frequency_disables_optimization) { + auto stub = std::make_shared(); + auto node = TExprNodeBuilder( + TExprNodeType::SLOT_REF, + TTypeDescBuilder() + .set_types(TTypeNodeBuilder() + .set_type(TTypeNodeType::SCALAR) + .set_scalar_type(TPrimitiveType::INT) + .build()) + .build(), + 0) + .build(); + + // No sampling_frequency argument - uses default DISABLE_SAMPLING + auto wrapper = vectorized::VRuntimeFilterWrapper::create_shared(node, stub, 0.4, false, + /*filter_id=*/1); + + auto context = std::make_shared(wrapper); + RowDescriptor row_desc; + ASSERT_TRUE(wrapper->prepare(_runtime_states[0].get(), row_desc, context.get()).ok()); + ASSERT_TRUE(wrapper->open(_runtime_states[0].get(), context.get(), + FunctionContext::FRAGMENT_LOCAL) + .ok()); + + // Even with low-selectivity data, always_true should NOT be detected + // because sampling is disabled + auto& selectivity = context->get_runtime_filter_selectivity(); + selectivity.update_judge_selectivity(1, 2000, 50000, 0.1); + EXPECT_FALSE(selectivity.maybe_always_true_can_ignore()); +} + +// Test that sampling_frequency survives VExprContext recreation, which is the +// exact scenario that caused the original bug. +TEST_F(VRuntimeFilterWrapperSamplingTest, sampling_frequency_survives_context_recreation) { + auto stub = std::make_shared(); + auto node = TExprNodeBuilder( + TExprNodeType::SLOT_REF, + TTypeDescBuilder() + .set_types(TTypeNodeBuilder() + .set_type(TTypeNodeType::SCALAR) + .set_scalar_type(TPrimitiveType::INT) + .build()) + .build(), + 0) + .build(); + + const int expected_frequency = 32; + auto wrapper = vectorized::VRuntimeFilterWrapper::create_shared( + node, stub, 0.4, false, /*filter_id=*/1, expected_frequency); + + // First context - prepare and open work + auto context1 = std::make_shared(wrapper); + RowDescriptor row_desc; + ASSERT_TRUE(wrapper->prepare(_runtime_states[0].get(), row_desc, context1.get()).ok()); + ASSERT_TRUE(wrapper->open(_runtime_states[0].get(), context1.get(), + FunctionContext::FRAGMENT_LOCAL) + .ok()); + + // Simulate context recreation (what _append_rf_into_conjuncts does): + // Create a brand new VExprContext with the same VRuntimeFilterWrapper. + // The new context starts with default sampling_frequency = -1. + auto context2 = std::make_shared(wrapper); + EXPECT_FALSE(context2->get_runtime_filter_selectivity().maybe_always_true_can_ignore()); + + // After open() on the new context, sampling_frequency should be propagated + ASSERT_TRUE(wrapper->open(_runtime_states[0].get(), context2.get(), + FunctionContext::THREAD_LOCAL) + .ok()); + + auto& selectivity2 = context2->get_runtime_filter_selectivity(); + selectivity2.update_judge_selectivity(1, 2000, 50000, 0.1); + EXPECT_TRUE(selectivity2.maybe_always_true_can_ignore()); +} + +} // namespace doris From 61d76d018d600c6f375197827838de3fe2d134ec Mon Sep 17 00:00:00 2001 From: BiteTheDDDDt Date: Fri, 10 Apr 2026 20:30:17 +0800 Subject: [PATCH 4/9] format (cherry picked from commit 3c4d25e06123fdda11b4a22e712a5f6e177690e9) --- .../vruntimefilter_wrapper_sampling_test.cpp | 92 +++++++++---------- 1 file changed, 44 insertions(+), 48 deletions(-) diff --git a/be/test/runtime_filter/vruntimefilter_wrapper_sampling_test.cpp b/be/test/runtime_filter/vruntimefilter_wrapper_sampling_test.cpp index 7a30262d32f787..c7219f90553d99 100644 --- a/be/test/runtime_filter/vruntimefilter_wrapper_sampling_test.cpp +++ b/be/test/runtime_filter/vruntimefilter_wrapper_sampling_test.cpp @@ -46,15 +46,14 @@ class StubVExpr : public vectorized::VExpr { private: static TExprNode make_texpr_node() { - return TExprNodeBuilder( - TExprNodeType::SLOT_REF, - TTypeDescBuilder() - .set_types(TTypeNodeBuilder() - .set_type(TTypeNodeType::SCALAR) - .set_scalar_type(TPrimitiveType::INT) - .build()) - .build(), - 0) + return TExprNodeBuilder(TExprNodeType::SLOT_REF, + TTypeDescBuilder() + .set_types(TTypeNodeBuilder() + .set_type(TTypeNodeType::SCALAR) + .set_scalar_type(TPrimitiveType::INT) + .build()) + .build(), + 0) .build(); } }; @@ -67,15 +66,14 @@ class VRuntimeFilterWrapperSamplingTest : public RuntimeFilterTest {}; // VExprContext via VExprContext::create_shared(expr). TEST_F(VRuntimeFilterWrapperSamplingTest, open_propagates_sampling_frequency) { auto stub = std::make_shared(); - auto node = TExprNodeBuilder( - TExprNodeType::SLOT_REF, - TTypeDescBuilder() - .set_types(TTypeNodeBuilder() - .set_type(TTypeNodeType::SCALAR) - .set_scalar_type(TPrimitiveType::INT) - .build()) - .build(), - 0) + auto node = TExprNodeBuilder(TExprNodeType::SLOT_REF, + TTypeDescBuilder() + .set_types(TTypeNodeBuilder() + .set_type(TTypeNodeType::SCALAR) + .set_scalar_type(TPrimitiveType::INT) + .build()) + .build(), + 0) .build(); const int expected_frequency = 32; @@ -89,9 +87,9 @@ TEST_F(VRuntimeFilterWrapperSamplingTest, open_propagates_sampling_frequency) { RowDescriptor row_desc; ASSERT_TRUE(wrapper->prepare(_runtime_states[0].get(), row_desc, context.get()).ok()); - ASSERT_TRUE(wrapper->open(_runtime_states[0].get(), context.get(), - FunctionContext::FRAGMENT_LOCAL) - .ok()); + ASSERT_TRUE( + wrapper->open(_runtime_states[0].get(), context.get(), FunctionContext::FRAGMENT_LOCAL) + .ok()); // After open(), sampling_frequency should be propagated from VRuntimeFilterWrapper // to VExprContext. Verify by accumulating low-selectivity data and checking @@ -105,15 +103,14 @@ TEST_F(VRuntimeFilterWrapperSamplingTest, open_propagates_sampling_frequency) { // optimization, matching the behavior when disable_always_true_logic is set. TEST_F(VRuntimeFilterWrapperSamplingTest, default_sampling_frequency_disables_optimization) { auto stub = std::make_shared(); - auto node = TExprNodeBuilder( - TExprNodeType::SLOT_REF, - TTypeDescBuilder() - .set_types(TTypeNodeBuilder() - .set_type(TTypeNodeType::SCALAR) - .set_scalar_type(TPrimitiveType::INT) - .build()) - .build(), - 0) + auto node = TExprNodeBuilder(TExprNodeType::SLOT_REF, + TTypeDescBuilder() + .set_types(TTypeNodeBuilder() + .set_type(TTypeNodeType::SCALAR) + .set_scalar_type(TPrimitiveType::INT) + .build()) + .build(), + 0) .build(); // No sampling_frequency argument - uses default DISABLE_SAMPLING @@ -123,9 +120,9 @@ TEST_F(VRuntimeFilterWrapperSamplingTest, default_sampling_frequency_disables_op auto context = std::make_shared(wrapper); RowDescriptor row_desc; ASSERT_TRUE(wrapper->prepare(_runtime_states[0].get(), row_desc, context.get()).ok()); - ASSERT_TRUE(wrapper->open(_runtime_states[0].get(), context.get(), - FunctionContext::FRAGMENT_LOCAL) - .ok()); + ASSERT_TRUE( + wrapper->open(_runtime_states[0].get(), context.get(), FunctionContext::FRAGMENT_LOCAL) + .ok()); // Even with low-selectivity data, always_true should NOT be detected // because sampling is disabled @@ -138,15 +135,14 @@ TEST_F(VRuntimeFilterWrapperSamplingTest, default_sampling_frequency_disables_op // exact scenario that caused the original bug. TEST_F(VRuntimeFilterWrapperSamplingTest, sampling_frequency_survives_context_recreation) { auto stub = std::make_shared(); - auto node = TExprNodeBuilder( - TExprNodeType::SLOT_REF, - TTypeDescBuilder() - .set_types(TTypeNodeBuilder() - .set_type(TTypeNodeType::SCALAR) - .set_scalar_type(TPrimitiveType::INT) - .build()) - .build(), - 0) + auto node = TExprNodeBuilder(TExprNodeType::SLOT_REF, + TTypeDescBuilder() + .set_types(TTypeNodeBuilder() + .set_type(TTypeNodeType::SCALAR) + .set_scalar_type(TPrimitiveType::INT) + .build()) + .build(), + 0) .build(); const int expected_frequency = 32; @@ -157,9 +153,9 @@ TEST_F(VRuntimeFilterWrapperSamplingTest, sampling_frequency_survives_context_re auto context1 = std::make_shared(wrapper); RowDescriptor row_desc; ASSERT_TRUE(wrapper->prepare(_runtime_states[0].get(), row_desc, context1.get()).ok()); - ASSERT_TRUE(wrapper->open(_runtime_states[0].get(), context1.get(), - FunctionContext::FRAGMENT_LOCAL) - .ok()); + ASSERT_TRUE( + wrapper->open(_runtime_states[0].get(), context1.get(), FunctionContext::FRAGMENT_LOCAL) + .ok()); // Simulate context recreation (what _append_rf_into_conjuncts does): // Create a brand new VExprContext with the same VRuntimeFilterWrapper. @@ -168,9 +164,9 @@ TEST_F(VRuntimeFilterWrapperSamplingTest, sampling_frequency_survives_context_re EXPECT_FALSE(context2->get_runtime_filter_selectivity().maybe_always_true_can_ignore()); // After open() on the new context, sampling_frequency should be propagated - ASSERT_TRUE(wrapper->open(_runtime_states[0].get(), context2.get(), - FunctionContext::THREAD_LOCAL) - .ok()); + ASSERT_TRUE( + wrapper->open(_runtime_states[0].get(), context2.get(), FunctionContext::THREAD_LOCAL) + .ok()); auto& selectivity2 = context2->get_runtime_filter_selectivity(); selectivity2.update_judge_selectivity(1, 2000, 50000, 0.1); From d0e801ab3e74dd9ad57370f0b2cc7de5da23ad0d Mon Sep 17 00:00:00 2001 From: Pxl Date: Fri, 10 Apr 2026 20:30:36 +0800 Subject: [PATCH 5/9] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> (cherry picked from commit 69e6004d62860573a87e3a4fa184c58eba2c33da) --- .../vruntimefilter_wrapper_sampling_test.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/be/test/runtime_filter/vruntimefilter_wrapper_sampling_test.cpp b/be/test/runtime_filter/vruntimefilter_wrapper_sampling_test.cpp index c7219f90553d99..34576338775bdc 100644 --- a/be/test/runtime_filter/vruntimefilter_wrapper_sampling_test.cpp +++ b/be/test/runtime_filter/vruntimefilter_wrapper_sampling_test.cpp @@ -167,10 +167,10 @@ TEST_F(VRuntimeFilterWrapperSamplingTest, sampling_frequency_survives_context_re ASSERT_TRUE( wrapper->open(_runtime_states[0].get(), context2.get(), FunctionContext::THREAD_LOCAL) .ok()); - - auto& selectivity2 = context2->get_runtime_filter_selectivity(); - selectivity2.update_judge_selectivity(1, 2000, 50000, 0.1); - EXPECT_TRUE(selectivity2.maybe_always_true_can_ignore()); + // Prepare/open the recreated context through VExprContext so the test matches + // the production lifecycle and context-managed initialization. + ASSERT_TRUE(context2->prepare(_runtime_states[0].get(), row_desc).ok()); + ASSERT_TRUE(context2->open(_runtime_states[0].get()).ok()); } } // namespace doris From 37bdfbd43c9174377c45609974ebe78726b7df55 Mon Sep 17 00:00:00 2001 From: BiteTheDDDDt Date: Fri, 10 Apr 2026 20:33:11 +0800 Subject: [PATCH 6/9] update (cherry picked from commit f9a109b58b0a075df6d91af9851d9a71db105bad) --- .../vruntimefilter_wrapper_sampling_test.cpp | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/be/test/runtime_filter/vruntimefilter_wrapper_sampling_test.cpp b/be/test/runtime_filter/vruntimefilter_wrapper_sampling_test.cpp index 34576338775bdc..b1ff138f7403fc 100644 --- a/be/test/runtime_filter/vruntimefilter_wrapper_sampling_test.cpp +++ b/be/test/runtime_filter/vruntimefilter_wrapper_sampling_test.cpp @@ -157,20 +157,23 @@ TEST_F(VRuntimeFilterWrapperSamplingTest, sampling_frequency_survives_context_re wrapper->open(_runtime_states[0].get(), context1.get(), FunctionContext::FRAGMENT_LOCAL) .ok()); - // Simulate context recreation (what _append_rf_into_conjuncts does): - // Create a brand new VExprContext with the same VRuntimeFilterWrapper. - // The new context starts with default sampling_frequency = -1. + // Create a brand new non-clone VExprContext with the same VRuntimeFilterWrapper, + // matching the production path in _append_rf_into_conjuncts which calls + // VExprContext::create_shared(expr) then conjunct->prepare() and conjunct->open(). auto context2 = std::make_shared(wrapper); EXPECT_FALSE(context2->get_runtime_filter_selectivity().maybe_always_true_can_ignore()); - // After open() on the new context, sampling_frequency should be propagated - ASSERT_TRUE( - wrapper->open(_runtime_states[0].get(), context2.get(), FunctionContext::THREAD_LOCAL) - .ok()); - // Prepare/open the recreated context through VExprContext so the test matches - // the production lifecycle and context-managed initialization. + // Drive the recreated context through prepare/open via VExprContext (not the + // wrapper directly), matching the production _append_rf_into_conjuncts lifecycle. ASSERT_TRUE(context2->prepare(_runtime_states[0].get(), row_desc).ok()); ASSERT_TRUE(context2->open(_runtime_states[0].get()).ok()); + + // After open(), sampling_frequency should be propagated from VRuntimeFilterWrapper + // to context2. Verify by accumulating low-selectivity data and checking that + // always_true can be detected — this is the actual behavior the fix protects. + auto& selectivity = context2->get_runtime_filter_selectivity(); + selectivity.update_judge_selectivity(1, 2000, 50000, 0.1); + EXPECT_TRUE(selectivity.maybe_always_true_can_ignore()); } } // namespace doris From f9e04384f0e346dd9918c1944d9ed50554e2240c Mon Sep 17 00:00:00 2001 From: BiteTheDDDDt Date: Mon, 13 Apr 2026 19:18:49 +0800 Subject: [PATCH 7/9] [fix](test) Fix StubVExpr is_constant() causing DCHECK crash in sampling test StubVExpr (leaf node with no children) defaulted to is_constant()=true via VExpr::is_constant()'s all_of on empty children. On second open() call (context2), _constant_col was already populated from context1, causing DCHECK(column_wrapper != nullptr) to fail in get_const_col() when called with nullptr from VExpr::open(). Fix: override is_constant() to return false, matching SLOT_REF semantics. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> (cherry picked from commit 641c01007e73747fcab1f2dd6f41330248ac6818) --- .../runtime_filter/vruntimefilter_wrapper_sampling_test.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/be/test/runtime_filter/vruntimefilter_wrapper_sampling_test.cpp b/be/test/runtime_filter/vruntimefilter_wrapper_sampling_test.cpp index b1ff138f7403fc..5d4fee76c74874 100644 --- a/be/test/runtime_filter/vruntimefilter_wrapper_sampling_test.cpp +++ b/be/test/runtime_filter/vruntimefilter_wrapper_sampling_test.cpp @@ -44,6 +44,11 @@ class StubVExpr : public vectorized::VExpr { return Status::OK(); } + // SLOT_REF is not a constant — without this override, VExpr::is_constant() + // returns true for a leaf node (no children), causing get_const_col() to + // DCHECK-fail on the second open() call. + bool is_constant() const override { return false; } + private: static TExprNode make_texpr_node() { return TExprNodeBuilder(TExprNodeType::SLOT_REF, From 41f9c51fdf5efb55f309f8fd695849eecdfebca3 Mon Sep 17 00:00:00 2001 From: Copilot <223556219+Copilot@users.noreply.github.com> Date: Fri, 8 May 2026 17:05:43 +0800 Subject: [PATCH 8/9] [chore](test) Format branch-4.0 scanner runtime filter test Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- be/test/vec/exec/scanner_late_arrival_rf_test.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/be/test/vec/exec/scanner_late_arrival_rf_test.cpp b/be/test/vec/exec/scanner_late_arrival_rf_test.cpp index 5a9e2381f2710c..fdad14f03919de 100644 --- a/be/test/vec/exec/scanner_late_arrival_rf_test.cpp +++ b/be/test/vec/exec/scanner_late_arrival_rf_test.cpp @@ -19,12 +19,12 @@ #include #include "pipeline/exec/mock_scan_operator.h" +#include "runtime/descriptors.h" +#include "runtime/exec_env.h" #include "runtime_filter/runtime_filter_consumer.h" #include "runtime_filter/runtime_filter_consumer_helper.h" #include "runtime_filter/runtime_filter_producer.h" #include "runtime_filter/runtime_filter_test_utils.h" -#include "runtime/descriptors.h" -#include "runtime/exec_env.h" #include "vec/data_types/data_type_factory.hpp" #include "vec/data_types/data_type_number.h" #include "vec/exec/scan/scanner.h" From d12b4c5940ab7b6a91f12947a0fad7a38f77a16a Mon Sep 17 00:00:00 2001 From: Copilot <223556219+Copilot@users.noreply.github.com> Date: Fri, 8 May 2026 19:02:43 +0800 Subject: [PATCH 9/9] [fix](test) Qualify DataTypeFactory in branch-4.0 scanner RF test ### What problem does this PR solve? Issue Number: close #62872 Related PR: #62872 Problem Summary: Adapt the late-arrival runtime filter scanner unit test to branch-4.0 namespaces by qualifying DataTypeFactory. ### Release note None ### Check List (For Author) - Test: Unit Test - ./run-be-ut.sh --run --filter=ScannerLateArrivalRfTest.*:VRuntimeFilterWrapperSamplingTest.*:RuntimeFilterSelectivityTest.* -j 16 - Behavior changed: No - Does this need documentation: No Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- be/test/vec/exec/scanner_late_arrival_rf_test.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/be/test/vec/exec/scanner_late_arrival_rf_test.cpp b/be/test/vec/exec/scanner_late_arrival_rf_test.cpp index fdad14f03919de..3e55ba4acbfa4f 100644 --- a/be/test/vec/exec/scanner_late_arrival_rf_test.cpp +++ b/be/test/vec/exec/scanner_late_arrival_rf_test.cpp @@ -71,7 +71,8 @@ TEST_F(ScannerLateArrivalRfTest, applied_rf_num_advances_after_late_arrival) { TRuntimeFilterDescBuilder().add_planId_to_target_expr(0).build()}; SlotDescriptor slot_desc; - slot_desc._type = DataTypeFactory::instance().create_data_type(PrimitiveType::TYPE_INT, false); + slot_desc._type = vectorized::DataTypeFactory::instance().create_data_type( + PrimitiveType::TYPE_INT, false); TupleDescriptor tuple_desc; tuple_desc.add_slot(&slot_desc); RowDescriptor row_desc;