Skip to content

Commit 0c3262a

Browse files
committed
add _outer expend
1 parent b0c447a commit 0c3262a

9 files changed

Lines changed: 408 additions & 16 deletions

File tree

be/src/vec/exprs/table_function/vjson_each.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,9 @@ static void insert_value_as_json(const JsonbValue* value, MutableColumnPtr& col,
9393
template <bool TEXT_MODE>
9494
void VJsonEachTableFunction<TEXT_MODE>::process_row(size_t row_idx) {
9595
TableFunction::process_row(row_idx);
96+
if (_is_const && _cur_size > 0) {
97+
return;
98+
}
9699

97100
StringRef text;
98101
const size_t idx = _is_const ? 0 : row_idx;

be/src/vec/functions/function_fake.cpp

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,7 @@ struct FunctionFakeBaseImpl {
6060

6161
struct FunctionExplode {
6262
static DataTypePtr get_return_type_impl(const DataTypes& arguments) {
63-
DCHECK(arguments[0]->get_primitive_type() == TYPE_ARRAY)
64-
<< arguments[0]->get_name() << " not supported";
63+
DORIS_CHECK(arguments[0]->get_primitive_type() == TYPE_ARRAY);
6564
return make_nullable(
6665
check_and_get_data_type<DataTypeArray>(arguments[0].get())->get_nested_type());
6766
}
@@ -103,8 +102,7 @@ struct FunctionExplodeV2 {
103102
// explode map: make map k,v as struct field
104103
struct FunctionExplodeMap {
105104
static DataTypePtr get_return_type_impl(const DataTypes& arguments) {
106-
DCHECK(arguments[0]->get_primitive_type() == TYPE_MAP)
107-
<< arguments[0]->get_name() << " not supported";
105+
DORIS_CHECK(arguments[0]->get_primitive_type() == TYPE_MAP);
108106
DataTypes fieldTypes(2);
109107
fieldTypes[0] = check_and_get_data_type<DataTypeMap>(arguments[0].get())->get_key_type();
110108
fieldTypes[1] = check_and_get_data_type<DataTypeMap>(arguments[0].get())->get_value_type();
@@ -120,8 +118,7 @@ struct FunctionPoseExplode {
120118
DataTypes fieldTypes(arguments.size() + 1);
121119
fieldTypes[0] = std::make_shared<DataTypeInt32>();
122120
for (int i = 0; i < arguments.size(); i++) {
123-
DCHECK_EQ(arguments[i]->get_primitive_type(), TYPE_ARRAY)
124-
<< arguments[i]->get_name() << " not supported";
121+
DORIS_CHECK(arguments[i]->get_primitive_type() == TYPE_ARRAY);
125122
auto nestedType =
126123
check_and_get_data_type<DataTypeArray>(arguments[i].get())->get_nested_type();
127124
fieldTypes[i + 1] = make_nullable(nestedType);
@@ -140,8 +137,7 @@ struct FunctionPoseExplode {
140137
// explode json-object: expands json-object to struct with a pair of key and value in column string
141138
struct FunctionExplodeJsonObject {
142139
static DataTypePtr get_return_type_impl(const DataTypes& arguments) {
143-
DCHECK_EQ(arguments[0]->get_primitive_type(), PrimitiveType::TYPE_JSONB)
144-
<< " explode json object " << arguments[0]->get_name() << " not supported";
140+
DORIS_CHECK(arguments[0]->get_primitive_type() == PrimitiveType::TYPE_JSONB);
145141
DataTypes fieldTypes(2);
146142
fieldTypes[0] = make_nullable(std::make_shared<DataTypeString>());
147143
fieldTypes[1] = make_nullable(std::make_shared<DataTypeJsonb>());
@@ -154,8 +150,7 @@ struct FunctionExplodeJsonObject {
154150
// json_each(json) -> Nullable(Struct(key Nullable(String), value Nullable(JSONB)))
155151
struct FunctionJsonEach {
156152
static DataTypePtr get_return_type_impl(const DataTypes& arguments) {
157-
DCHECK_EQ(arguments[0]->get_primitive_type(), PrimitiveType::TYPE_JSONB)
158-
<< " json_each " << arguments[0]->get_name() << " not supported";
153+
DORIS_CHECK(arguments[0]->get_primitive_type() == PrimitiveType::TYPE_JSONB);
159154
DataTypes fieldTypes(2);
160155
fieldTypes[0] = make_nullable(std::make_shared<DataTypeString>());
161156
fieldTypes[1] = make_nullable(std::make_shared<DataTypeJsonb>());
@@ -168,8 +163,7 @@ struct FunctionJsonEach {
168163
// json_each_text(json) -> Nullable(Struct(key Nullable(String), value Nullable(String)))
169164
struct FunctionJsonEachText {
170165
static DataTypePtr get_return_type_impl(const DataTypes& arguments) {
171-
DCHECK_EQ(arguments[0]->get_primitive_type(), PrimitiveType::TYPE_JSONB)
172-
<< " json_each_text " << arguments[0]->get_name() << " not supported";
166+
DORIS_CHECK(arguments[0]->get_primitive_type() == PrimitiveType::TYPE_JSONB);
173167
DataTypes fieldTypes(2);
174168
fieldTypes[0] = make_nullable(std::make_shared<DataTypeString>());
175169
fieldTypes[1] = make_nullable(std::make_shared<DataTypeString>());
@@ -267,8 +261,8 @@ void register_function_fake(SimpleFunctionFactory& factory) {
267261
register_table_function_expand_outer<FunctionExplodeMap>(factory, "explode_map");
268262

269263
register_table_function_expand_outer<FunctionExplodeJsonObject>(factory, "explode_json_object");
270-
register_function<FunctionJsonEach>(factory, "json_each");
271-
register_function<FunctionJsonEachText>(factory, "json_each_text");
264+
register_table_function_expand_outer<FunctionJsonEach>(factory, "json_each");
265+
register_table_function_expand_outer<FunctionJsonEachText>(factory, "json_each_text");
272266
register_table_function_expand_outer_default<DataTypeString, false>(factory, "explode_split");
273267
register_table_function_expand_outer_default<DataTypeInt32, false>(factory, "explode_numbers");
274268
register_table_function_expand_outer_default<DataTypeInt64, false>(factory,

be/test/vec/function/table_function_test.cpp

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -612,4 +612,99 @@ TEST_F(TableFunctionTest, vjson_each_get_same_many_values) {
612612
fn.process_close();
613613
}
614614
}
615+
616+
TEST_F(TableFunctionTest, vjson_each_outer) {
617+
init_expr_context(1);
618+
VJsonEachTableFn fn;
619+
fn.set_expr_context(_ctx);
620+
621+
// set_outer() correctly sets the is_outer flag
622+
EXPECT_FALSE(fn.is_outer());
623+
fn.set_outer();
624+
EXPECT_TRUE(fn.is_outer());
625+
626+
// Normal object: outer flag does not affect KV expansion
627+
{
628+
auto block = build_jsonb_input_block({{R"({"a":"foo","b":123})"}});
629+
auto rows = run_json_each_fn(&fn, block.get(), true);
630+
ASSERT_EQ(2u, rows.size());
631+
EXPECT_EQ("a", rows[0].first);
632+
EXPECT_EQ("\"foo\"", rows[0].second);
633+
EXPECT_EQ("b", rows[1].first);
634+
EXPECT_EQ("123", rows[1].second);
635+
}
636+
637+
// For NULL / empty-object / non-object inputs: current_empty() is true.
638+
// The operator calls get_value() unconditionally when is_outer() — verify that
639+
// get_value() inserts exactly one default (NULL) struct row in each case.
640+
DataTypePtr key_dt = make_nullable(DataTypeFactory::instance().create_data_type(
641+
doris::PrimitiveType::TYPE_VARCHAR, false));
642+
DataTypePtr val_dt = make_nullable(
643+
DataTypeFactory::instance().create_data_type(doris::PrimitiveType::TYPE_JSONB, false));
644+
DataTypePtr struct_dt =
645+
make_nullable(std::make_shared<DataTypeStruct>(DataTypes {key_dt, val_dt}));
646+
647+
TQueryOptions q_opts;
648+
TQueryGlobals q_globals;
649+
RuntimeState rs(q_opts, q_globals);
650+
651+
for (const char* input : {"", "{}", "[1,2,3]"}) {
652+
auto block = build_jsonb_input_block({{input}});
653+
ASSERT_TRUE(fn.process_init(block.get(), &rs).ok()) << "input: " << input;
654+
fn.process_row(0);
655+
EXPECT_TRUE(fn.current_empty()) << "input: " << input;
656+
657+
auto out_col = struct_dt->create_column();
658+
fn.get_value(out_col, 1);
659+
ASSERT_EQ(1u, out_col->size()) << "input: " << input;
660+
EXPECT_TRUE(out_col->is_null_at(0)) << "input: " << input;
661+
fn.process_close();
662+
}
663+
}
664+
665+
TEST_F(TableFunctionTest, vjson_each_text_outer) {
666+
init_expr_context(1);
667+
VJsonEachTextTableFn fn;
668+
fn.set_expr_context(_ctx);
669+
670+
EXPECT_FALSE(fn.is_outer());
671+
fn.set_outer();
672+
EXPECT_TRUE(fn.is_outer());
673+
674+
// Normal object: text mode (strings unquoted), outer flag does not affect expansion
675+
{
676+
auto block = build_jsonb_input_block({{R"({"a":"foo","b":123})"}});
677+
auto rows = run_json_each_fn(&fn, block.get(), false);
678+
ASSERT_EQ(2u, rows.size());
679+
EXPECT_EQ("a", rows[0].first);
680+
EXPECT_EQ("foo", rows[0].second);
681+
EXPECT_EQ("b", rows[1].first);
682+
EXPECT_EQ("123", rows[1].second);
683+
}
684+
685+
// NULL / empty-object / non-object → current_empty(), get_value() inserts one default row
686+
DataTypePtr key_dt = make_nullable(DataTypeFactory::instance().create_data_type(
687+
doris::PrimitiveType::TYPE_VARCHAR, false));
688+
DataTypePtr val_dt = make_nullable(DataTypeFactory::instance().create_data_type(
689+
doris::PrimitiveType::TYPE_VARCHAR, false));
690+
DataTypePtr struct_dt =
691+
make_nullable(std::make_shared<DataTypeStruct>(DataTypes {key_dt, val_dt}));
692+
693+
TQueryOptions q_opts;
694+
TQueryGlobals q_globals;
695+
RuntimeState rs(q_opts, q_globals);
696+
697+
for (const char* input : {"", "{}", "[1,2,3]"}) {
698+
auto block = build_jsonb_input_block({{input}});
699+
ASSERT_TRUE(fn.process_init(block.get(), &rs).ok()) << "input: " << input;
700+
fn.process_row(0);
701+
EXPECT_TRUE(fn.current_empty()) << "input: " << input;
702+
703+
auto out_col = struct_dt->create_column();
704+
fn.get_value(out_col, 1);
705+
ASSERT_EQ(1u, out_col->size()) << "input: " << input;
706+
EXPECT_TRUE(out_col->is_null_at(0)) << "input: " << input;
707+
fn.process_close();
708+
}
709+
}
615710
} // namespace doris::vectorized

fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinTableGeneratingFunctions.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,9 @@
3939
import org.apache.doris.nereids.trees.expressions.functions.generator.ExplodeSplitOuter;
4040
import org.apache.doris.nereids.trees.expressions.functions.generator.ExplodeVariantArray;
4141
import org.apache.doris.nereids.trees.expressions.functions.generator.JsonEach;
42+
import org.apache.doris.nereids.trees.expressions.functions.generator.JsonEachOuter;
4243
import org.apache.doris.nereids.trees.expressions.functions.generator.JsonEachText;
44+
import org.apache.doris.nereids.trees.expressions.functions.generator.JsonEachTextOuter;
4345
import org.apache.doris.nereids.trees.expressions.functions.generator.PosExplode;
4446
import org.apache.doris.nereids.trees.expressions.functions.generator.PosExplodeOuter;
4547
import org.apache.doris.nereids.trees.expressions.functions.generator.Unnest;
@@ -66,7 +68,9 @@ public class BuiltinTableGeneratingFunctions implements FunctionHelper {
6668
tableGenerating(ExplodeJsonObject.class, "explode_json_object"),
6769
tableGenerating(ExplodeJsonObjectOuter.class, "explode_json_object_outer"),
6870
tableGenerating(JsonEach.class, "json_each"),
69-
tableGenerating(JsonEachText.class, "json_each_text"),
71+
tableGenerating(JsonEachOuter.class, "json_each_outer"),
72+
tableGenerating(JsonEachText.class, "json_each_text"),
73+
tableGenerating(JsonEachTextOuter.class, "json_each_text_outer"),
7074
tableGenerating(ExplodeNumbers.class, "explode_numbers"),
7175
tableGenerating(ExplodeNumbersOuter.class, "explode_numbers_outer"),
7276
tableGenerating(ExplodeBitmap.class, "explode_bitmap"),
@@ -94,7 +98,8 @@ public class BuiltinTableGeneratingFunctions implements FunctionHelper {
9498
.add("explode_json_array_double_outer").add("explode_json_array_string_outer")
9599
.add("explode_json_array_json_outer").add("explode_split").add("explode_split_outer")
96100
.add("posexplode").add("posexplode_outer")
97-
.add("json_each").add("json_each_text").build();
101+
.add("json_each").add("json_each_outer")
102+
.add("json_each_text").add("json_each_text_outer").build();
98103

99104
public Set<String> getReturnManyColumnFunctions() {
100105
return RETURN_MULTI_COLUMNS_FUNCTIONS;
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
package org.apache.doris.nereids.trees.expressions.functions.generator;
19+
20+
import org.apache.doris.catalog.FunctionSignature;
21+
import org.apache.doris.nereids.trees.expressions.Expression;
22+
import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable;
23+
import org.apache.doris.nereids.trees.expressions.literal.StructLiteral;
24+
import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression;
25+
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
26+
import org.apache.doris.nereids.types.JsonType;
27+
import org.apache.doris.nereids.types.StringType;
28+
29+
import com.google.common.base.Preconditions;
30+
import com.google.common.collect.ImmutableList;
31+
32+
import java.util.List;
33+
34+
/**
35+
* json_each_outer(json) is json_each with outer semantics: emits one NULL row
36+
* when the input is NULL or not a JSON object, instead of producing no rows.
37+
*/
38+
public class JsonEachOuter extends TableGeneratingFunction implements UnaryExpression, AlwaysNullable {
39+
40+
public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
41+
FunctionSignature.ret(StructLiteral.constructStructType(
42+
ImmutableList.of(StringType.INSTANCE, JsonType.INSTANCE)))
43+
.args(JsonType.INSTANCE));
44+
45+
/**
46+
* Constructor with 1 argument.
47+
*/
48+
public JsonEachOuter(Expression arg) {
49+
super("json_each_outer", arg);
50+
}
51+
52+
/** Constructor for withChildren and reuse signature. */
53+
private JsonEachOuter(GeneratorFunctionParams functionParams) {
54+
super(functionParams);
55+
}
56+
57+
@Override
58+
public JsonEachOuter withChildren(List<Expression> children) {
59+
Preconditions.checkArgument(children.size() == 1);
60+
return new JsonEachOuter(getFunctionParams(children));
61+
}
62+
63+
@Override
64+
public List<FunctionSignature> getSignatures() {
65+
return SIGNATURES;
66+
}
67+
68+
@Override
69+
public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
70+
return visitor.visitJsonEachOuter(this, context);
71+
}
72+
}
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
package org.apache.doris.nereids.trees.expressions.functions.generator;
19+
20+
import org.apache.doris.catalog.FunctionSignature;
21+
import org.apache.doris.nereids.trees.expressions.Expression;
22+
import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable;
23+
import org.apache.doris.nereids.trees.expressions.literal.StructLiteral;
24+
import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression;
25+
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
26+
import org.apache.doris.nereids.types.JsonType;
27+
import org.apache.doris.nereids.types.StringType;
28+
29+
import com.google.common.base.Preconditions;
30+
import com.google.common.collect.ImmutableList;
31+
32+
import java.util.List;
33+
34+
/**
35+
* json_each_text_outer(json) is json_each_text with outer semantics: emits one
36+
* NULL row when the input is NULL or not a JSON object, instead of producing no
37+
* rows.
38+
*/
39+
public class JsonEachTextOuter extends TableGeneratingFunction implements UnaryExpression, AlwaysNullable {
40+
41+
public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
42+
FunctionSignature.ret(StructLiteral.constructStructType(
43+
ImmutableList.of(StringType.INSTANCE, StringType.INSTANCE)))
44+
.args(JsonType.INSTANCE));
45+
46+
/**
47+
* Constructor with 1 argument.
48+
*/
49+
public JsonEachTextOuter(Expression arg) {
50+
super("json_each_text_outer", arg);
51+
}
52+
53+
/** Constructor for withChildren and reuse signature. */
54+
private JsonEachTextOuter(GeneratorFunctionParams functionParams) {
55+
super(functionParams);
56+
}
57+
58+
@Override
59+
public JsonEachTextOuter withChildren(List<Expression> children) {
60+
Preconditions.checkArgument(children.size() == 1);
61+
return new JsonEachTextOuter(getFunctionParams(children));
62+
}
63+
64+
@Override
65+
public List<FunctionSignature> getSignatures() {
66+
return SIGNATURES;
67+
}
68+
69+
@Override
70+
public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
71+
return visitor.visitJsonEachTextOuter(this, context);
72+
}
73+
}

fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/TableGeneratingFunctionVisitor.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,9 @@
3939
import org.apache.doris.nereids.trees.expressions.functions.generator.ExplodeSplitOuter;
4040
import org.apache.doris.nereids.trees.expressions.functions.generator.ExplodeVariantArray;
4141
import org.apache.doris.nereids.trees.expressions.functions.generator.JsonEach;
42+
import org.apache.doris.nereids.trees.expressions.functions.generator.JsonEachOuter;
4243
import org.apache.doris.nereids.trees.expressions.functions.generator.JsonEachText;
44+
import org.apache.doris.nereids.trees.expressions.functions.generator.JsonEachTextOuter;
4345
import org.apache.doris.nereids.trees.expressions.functions.generator.PosExplode;
4446
import org.apache.doris.nereids.trees.expressions.functions.generator.PosExplodeOuter;
4547
import org.apache.doris.nereids.trees.expressions.functions.generator.TableGeneratingFunction;
@@ -85,10 +87,18 @@ default R visitJsonEach(JsonEach jsonEach, C context) {
8587
return visitTableGeneratingFunction(jsonEach, context);
8688
}
8789

90+
default R visitJsonEachOuter(JsonEachOuter jsonEachOuter, C context) {
91+
return visitTableGeneratingFunction(jsonEachOuter, context);
92+
}
93+
8894
default R visitJsonEachText(JsonEachText jsonEachText, C context) {
8995
return visitTableGeneratingFunction(jsonEachText, context);
9096
}
9197

98+
default R visitJsonEachTextOuter(JsonEachTextOuter jsonEachTextOuter, C context) {
99+
return visitTableGeneratingFunction(jsonEachTextOuter, context);
100+
}
101+
92102
default R visitExplodeNumbers(ExplodeNumbers explodeNumbers, C context) {
93103
return visitTableGeneratingFunction(explodeNumbers, context);
94104
}

0 commit comments

Comments
 (0)