From 90033cfd1b0cb38aa85a937ef973fe6511949036 Mon Sep 17 00:00:00 2001 From: Raghav Aggarwal Date: Tue, 16 Dec 2025 23:42:39 +0530 Subject: [PATCH 1/3] HIVE-29375: FULL OUTER JOIN is failing with Unexpected hash table key type DATE --- ...torMapJoinOuterGenerateResultOperator.java | 1 + .../VectorMapJoinOptimizedLongHashMap.java | 6 + .../vector_full_outer_join_date.q | 29 ++++ .../llap/vector_full_outer_join_date.q.out | 149 ++++++++++++++++++ 4 files changed, 185 insertions(+) create mode 100644 ql/src/test/queries/clientpositive/vector_full_outer_join_date.q create mode 100644 ql/src/test/results/clientpositive/llap/vector_full_outer_join_date.q.out diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java index fff2f28a097f..e83b178e4dc7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java @@ -824,6 +824,7 @@ protected void generateFullOuterSmallTableNoMatches(byte smallTablePos, case SHORT: case INT: case LONG: + case DATE: generateFullOuterLongKeySmallTableNoMatches(); break; case STRING: diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongHashMap.java index 65c51270b8e6..cafd8326e1b0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongHashMap.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongHashMap.java @@ -89,6 +89,9 @@ public void init() { case LONG: integerTypeInfo = TypeInfoFactory.longTypeInfo; break; + case DATE: + integerTypeInfo = TypeInfoFactory.dateTypeInfo; + break; default: throw new RuntimeException("Unexpected key type " + hashMap.hashTableKeyType); } @@ -123,6 +126,9 @@ private boolean readNonMatchedLongKey(ByteSegmentRef keyRef) throws HiveExceptio case LONG: longValue = keyBinarySortableDeserializeRead.currentLong; break; + case DATE: + longValue = keyBinarySortableDeserializeRead.currentDateWritable.getDays(); + break; default: throw new RuntimeException("Unexpected key type " + hashMap.hashTableKeyType); } diff --git a/ql/src/test/queries/clientpositive/vector_full_outer_join_date.q b/ql/src/test/queries/clientpositive/vector_full_outer_join_date.q new file mode 100644 index 000000000000..ba9645e50b55 --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_full_outer_join_date.q @@ -0,0 +1,29 @@ +set hive.optimize.dynamic.partition.hashjoin=true; +set hive.auto.convert.join=true; + +-- Test Date column +create table tbl1 (id int, event_date date); +create table tbl2 (id int, event_date date); + +insert into tbl1 values (1, '2023-01-01'), (2, '2023-01-02'), (3, '2023-01-03'); +insert into tbl2 values (2, '2023-01-02'), (3, '2023-01-04'), (4, '2023-01-05'); + +select tbl1.id, tbl1.event_date from tbl1 full outer join tbl2 on tbl1.event_date = tbl2.event_date order by tbl1.id; + +-- Test timestamp column +create table tbl3 (id int, event_date timestamp); +create table tbl4 (id int, event_date timestamp); + +insert into tbl3 values (1, '2025-12-17 10:20:30'), (2, '2025-12-17 11:20:30'); +insert into tbl4 values (2, '2025-12-17 11:20:30'), (3, '2025-12-17 09:20:30'); + +select tbl3.id, tbl3.event_date from tbl3 full outer join tbl4 on tbl3.event_date = tbl4.event_date order by tbl3.id; + +-- Test Double column +create table tbl5 (id int, val double); +create table tbl6 (id int, val double); + +insert into tbl5 values (1, 5.6D), (2, 3.2D); +insert into tbl6 values (2, 3.2D), (3, 7.2D); + +select tbl5.id, tbl5.val from tbl5 full outer join tbl6 on tbl5.val = tbl6.val order by tbl5.id; diff --git a/ql/src/test/results/clientpositive/llap/vector_full_outer_join_date.q.out b/ql/src/test/results/clientpositive/llap/vector_full_outer_join_date.q.out new file mode 100644 index 000000000000..1c61ff45eede --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/vector_full_outer_join_date.q.out @@ -0,0 +1,149 @@ +PREHOOK: query: create table tbl1 (id int, event_date date) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl1 +POSTHOOK: query: create table tbl1 (id int, event_date date) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl1 +PREHOOK: query: create table tbl2 (id int, event_date date) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl2 +POSTHOOK: query: create table tbl2 (id int, event_date date) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl2 +PREHOOK: query: insert into tbl1 values (1, '2023-01-01'), (2, '2023-01-02'), (3, '2023-01-03') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tbl1 +POSTHOOK: query: insert into tbl1 values (1, '2023-01-01'), (2, '2023-01-02'), (3, '2023-01-03') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tbl1 +POSTHOOK: Lineage: tbl1.event_date SCRIPT [] +POSTHOOK: Lineage: tbl1.id SCRIPT [] +PREHOOK: query: insert into tbl2 values (2, '2023-01-02'), (3, '2023-01-04'), (4, '2023-01-05') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tbl2 +POSTHOOK: query: insert into tbl2 values (2, '2023-01-02'), (3, '2023-01-04'), (4, '2023-01-05') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tbl2 +POSTHOOK: Lineage: tbl2.event_date SCRIPT [] +POSTHOOK: Lineage: tbl2.id SCRIPT [] +PREHOOK: query: select tbl1.id, tbl1.event_date from tbl1 full outer join tbl2 on tbl1.event_date = tbl2.event_date order by tbl1.id +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select tbl1.id, tbl1.event_date from tbl1 full outer join tbl2 on tbl1.event_date = tbl2.event_date order by tbl1.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +1 2023-01-01 +2 2023-01-02 +3 2023-01-03 +NULL NULL +NULL NULL +PREHOOK: query: create table tbl3 (id int, event_date timestamp) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl3 +POSTHOOK: query: create table tbl3 (id int, event_date timestamp) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl3 +PREHOOK: query: create table tbl4 (id int, event_date timestamp) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl4 +POSTHOOK: query: create table tbl4 (id int, event_date timestamp) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl4 +PREHOOK: query: insert into tbl3 values (1, '2025-12-17 10:20:30'), (2, '2025-12-17 11:20:30') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tbl3 +POSTHOOK: query: insert into tbl3 values (1, '2025-12-17 10:20:30'), (2, '2025-12-17 11:20:30') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tbl3 +POSTHOOK: Lineage: tbl3.event_date SCRIPT [] +POSTHOOK: Lineage: tbl3.id SCRIPT [] +PREHOOK: query: insert into tbl4 values (2, '2025-12-17 11:20:30'), (3, '2025-12-17 09:20:30') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tbl4 +POSTHOOK: query: insert into tbl4 values (2, '2025-12-17 11:20:30'), (3, '2025-12-17 09:20:30') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tbl4 +POSTHOOK: Lineage: tbl4.event_date SCRIPT [] +POSTHOOK: Lineage: tbl4.id SCRIPT [] +PREHOOK: query: select tbl3.id, tbl3.event_date from tbl3 full outer join tbl4 on tbl3.event_date = tbl4.event_date order by tbl3.id +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl3 +PREHOOK: Input: default@tbl4 +#### A masked pattern was here #### +POSTHOOK: query: select tbl3.id, tbl3.event_date from tbl3 full outer join tbl4 on tbl3.event_date = tbl4.event_date order by tbl3.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl3 +POSTHOOK: Input: default@tbl4 +#### A masked pattern was here #### +1 2025-12-17 10:20:30 +2 2025-12-17 11:20:30 +NULL NULL +PREHOOK: query: create table tbl5 (id int, val double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl5 +POSTHOOK: query: create table tbl5 (id int, val double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl5 +PREHOOK: query: create table tbl6 (id int, val double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl6 +POSTHOOK: query: create table tbl6 (id int, val double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl6 +PREHOOK: query: insert into tbl5 values (1, 5.6D), (2, 3.2D) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tbl5 +POSTHOOK: query: insert into tbl5 values (1, 5.6D), (2, 3.2D) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tbl5 +POSTHOOK: Lineage: tbl5.id SCRIPT [] +POSTHOOK: Lineage: tbl5.val SCRIPT [] +PREHOOK: query: insert into tbl6 values (2, 3.2D), (3, 7.2D) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@tbl6 +POSTHOOK: query: insert into tbl6 values (2, 3.2D), (3, 7.2D) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@tbl6 +POSTHOOK: Lineage: tbl6.id SCRIPT [] +POSTHOOK: Lineage: tbl6.val SCRIPT [] +PREHOOK: query: select tbl5.id, tbl5.val from tbl5 full outer join tbl6 on tbl5.val = tbl6.val order by tbl5.id +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl5 +PREHOOK: Input: default@tbl6 +#### A masked pattern was here #### +POSTHOOK: query: select tbl5.id, tbl5.val from tbl5 full outer join tbl6 on tbl5.val = tbl6.val order by tbl5.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl5 +POSTHOOK: Input: default@tbl6 +#### A masked pattern was here #### +1 5.6 +2 3.2 +NULL NULL From b2674e5b39386a2bc4b287cd6e9892d801a6274a Mon Sep 17 00:00:00 2001 From: Raghav Aggarwal Date: Mon, 22 Dec 2025 14:22:28 +0530 Subject: [PATCH 2/3] Add DATE support in test code as well --- .../VectorMapJoinOptimizedLongHashMap.java | 4 +- .../vector/mapjoin/MapJoinTestConfig.java | 4 ++ .../mapjoin/fast/CheckFastRowHashMap.java | 5 +++ .../fast/TestVectorMapJoinFastRowHashMap.java | 41 +++++++++++++++++++ 4 files changed, 52 insertions(+), 2 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongHashMap.java index cafd8326e1b0..aeaab826898c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongHashMap.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongHashMap.java @@ -90,8 +90,8 @@ public void init() { integerTypeInfo = TypeInfoFactory.longTypeInfo; break; case DATE: - integerTypeInfo = TypeInfoFactory.dateTypeInfo; - break; + integerTypeInfo = TypeInfoFactory.dateTypeInfo; + break; default: throw new RuntimeException("Unexpected key type " + hashMap.hashTableKeyType); } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestConfig.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestConfig.java index e4674d81efc5..8597229c3e3a 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestConfig.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestConfig.java @@ -394,6 +394,9 @@ public static VectorMapJoinDesc createVectorMapJoinDesc(MapJoinTestDescription t case LONG: hashTableKeyType = HashTableKeyType.LONG; break; + case DATE: + hashTableKeyType = HashTableKeyType.DATE; + break; case STRING: hashTableKeyType = HashTableKeyType.STRING; break; @@ -547,6 +550,7 @@ public static VectorMapJoinCommonOperator createNativeVectorMapJoinOperator( case BYTE: case SHORT: case INT: + case DATE: case LONG: switch (VectorMapJoinVariation) { case INNER: diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastRowHashMap.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastRowHashMap.java index 5a9f180b3f39..e0d387718b53 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastRowHashMap.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastRowHashMap.java @@ -35,6 +35,7 @@ import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; import org.apache.hadoop.hive.serde2.WriteBuffers; import org.apache.hadoop.hive.serde2.io.ByteWritable; +import org.apache.hadoop.hive.serde2.io.DateWritableV2; import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.lazy.VerifyLazy; import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead; @@ -335,6 +336,7 @@ public void verify(VectorMapJoinFastHashTableContainerBase map, case SHORT: case INT: case LONG: + case DATE: { Object[] keyRow = element.getKeyRow(); Object keyObject = keyRow[0]; @@ -357,6 +359,9 @@ public void verify(VectorMapJoinFastHashTableContainerBase map, case LONG: longKey = ((LongWritable) keyObject).get(); break; + case DATE: + longKey = ((DateWritableV2) keyObject).getDays(); + break; default: throw new RuntimeException("Unexpected hash table key type " + hashTableKeyType.name()); } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastRowHashMap.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastRowHashMap.java index f5eb68c6ba7b..0a751728cc79 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastRowHashMap.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastRowHashMap.java @@ -495,6 +495,47 @@ public void testBigIntRowsExact() throws Exception { /* doClipping */ false, /* useExactBytes */ true); } + @Test + public void testDateRowsExact() throws Exception { + random = new Random(44332); + + // Use a large capacity that doesn't require expansion, yet. + VectorMapJoinFastLongHashMapContainer map = + new VectorMapJoinFastLongHashMapContainer( + false, + false, + HashTableKeyType.DATE, + LARGE_CAPACITY, + LOAD_FACTOR, + LARGE_WB_SIZE, + -1, + tableDesc, + 4); + + VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap(); + VectorRandomRowSource valueSource = new VectorRandomRowSource(); + + valueSource.init( + random, + VectorRandomRowSource.SupportedTypes.ALL, + 4, + /* allowNulls */ false, /* isUnicodeOk */ + false); + + int rowCount = 1000; + Object[][] rows = valueSource.randomRows(rowCount); + + addAndVerifyRows( + valueSource, + rows, + map, + HashTableKeyType.DATE, + verifyTable, + new String[] {"date"}, + /* doClipping */ false, /* useExactBytes */ + true); + } + @Test public void testIntRowsExact() throws Exception { random = new Random(8238383); From ce2b2859e72705779af34bd2ae818ee4869ebb9e Mon Sep 17 00:00:00 2001 From: Raghav Aggarwal Date: Sun, 4 Jan 2026 22:12:44 +0530 Subject: [PATCH 3/3] address review comments --- .../mapjoin/VectorMapJoinLongHashUtil.java | 41 +++ ...VectorMapJoinFastLongHashMapContainer.java | 3 +- ...rMapJoinFastLongHashMultiSetContainer.java | 3 +- ...VectorMapJoinFastLongHashSetContainer.java | 3 +- .../fast/VectorMapJoinFastLongHashTable.java | 3 +- .../fast/VectorMapJoinFastLongHashUtil.java | 55 ---- .../VectorMapJoinOptimizedLongHashMap.java | 30 +- .../vector/mapjoin/TestMapJoinOperator.java | 74 +++++ .../fast/TestVectorMapJoinFastRowHashMap.java | 4 +- .../vector_full_outer_join_date.q | 21 +- .../llap/vector_full_outer_join_date.q.out | 311 ++++++++++++------ 11 files changed, 346 insertions(+), 202 deletions(-) create mode 100644 ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLongHashUtil.java delete mode 100644 ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashUtil.java diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLongHashUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLongHashUtil.java new file mode 100644 index 000000000000..8008210c5bb5 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLongHashUtil.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin; + +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; + +public class VectorMapJoinLongHashUtil { + + public static long deserializeLongKey( + BinarySortableDeserializeRead keyBinarySortableDeserializeRead, + HashTableKeyType hashTableKeyType) + throws RuntimeException { + return switch (hashTableKeyType) { + case BOOLEAN -> (keyBinarySortableDeserializeRead.currentBoolean ? 1 : 0); + case BYTE -> keyBinarySortableDeserializeRead.currentByte; + case SHORT -> keyBinarySortableDeserializeRead.currentShort; + case INT -> keyBinarySortableDeserializeRead.currentInt; + case DATE -> keyBinarySortableDeserializeRead.currentDateWritable.getDays(); + case LONG -> keyBinarySortableDeserializeRead.currentLong; + default -> + throw new RuntimeException("Unexpected hash table key type " + hashTableKeyType.name()); + }; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMapContainer.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMapContainer.java index 6ef9b64cba9d..f8179c70f3fc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMapContainer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMapContainer.java @@ -21,6 +21,7 @@ import org.apache.hadoop.hive.common.MemoryEstimate; import org.apache.hadoop.hive.ql.exec.JoinUtil; import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLongHashUtil; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinLongHashMap; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; @@ -169,7 +170,7 @@ public long getHashCode(BytesWritable currentKey) throws HiveException, IOExcept throw new HiveException("DeserializeRead details: " + keyBinarySortableDeserializeRead.getDetailedReadPositionString(), e); } - long key = VectorMapJoinFastLongHashUtil.deserializeLongKey(keyBinarySortableDeserializeRead, hashTableKeyType); + long key = VectorMapJoinLongHashUtil.deserializeLongKey(keyBinarySortableDeserializeRead, hashTableKeyType); return HashCodeUtil.calculateLongHashCode(key); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSetContainer.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSetContainer.java index c7184d7e81d0..4842c0027859 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSetContainer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSetContainer.java @@ -20,6 +20,7 @@ import org.apache.hadoop.hive.ql.exec.JoinUtil; import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLongHashUtil; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMultiSetResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinLongHashMultiSet; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; @@ -87,7 +88,7 @@ public long getHashCode(BytesWritable currentKey) throws HiveException, IOExcept throw new HiveException("DeserializeRead details: " + keyBinarySortableDeserializeRead.getDetailedReadPositionString(), e); } - long key = VectorMapJoinFastLongHashUtil.deserializeLongKey(keyBinarySortableDeserializeRead, hashTableKeyType); + long key = VectorMapJoinLongHashUtil.deserializeLongKey(keyBinarySortableDeserializeRead, hashTableKeyType); return HashCodeUtil.calculateLongHashCode(key); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSetContainer.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSetContainer.java index 1690739cc62c..57030de901f2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSetContainer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSetContainer.java @@ -21,6 +21,7 @@ import java.io.IOException; import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLongHashUtil; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; @@ -86,7 +87,7 @@ public long getHashCode(BytesWritable currentKey) throws HiveException, IOExcept throw new HiveException("DeserializeRead details: " + keyBinarySortableDeserializeRead.getDetailedReadPositionString(), e); } - long key = VectorMapJoinFastLongHashUtil.deserializeLongKey(keyBinarySortableDeserializeRead, hashTableKeyType); + long key = VectorMapJoinLongHashUtil.deserializeLongKey(keyBinarySortableDeserializeRead, hashTableKeyType); return HashCodeUtil.calculateLongHashCode(key); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java index ba46bfc26dbb..d26e3d271b4a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java @@ -20,6 +20,7 @@ import java.io.IOException; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLongHashUtil; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.slf4j.Logger; @@ -77,7 +78,7 @@ public boolean adaptPutRow(long hashCode, BytesWritable currentKey, BytesWritabl throw new HiveException("DeserializeRead details: " + keyBinarySortableDeserializeRead.getDetailedReadPositionString(), e); } - long key = VectorMapJoinFastLongHashUtil.deserializeLongKey(keyBinarySortableDeserializeRead, hashTableKeyType); + long key = VectorMapJoinLongHashUtil.deserializeLongKey(keyBinarySortableDeserializeRead, hashTableKeyType); add(hashCode, key, currentValue); return true; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashUtil.java deleted file mode 100644 index d3bda217a16d..000000000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashUtil.java +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; - -import java.io.IOException; - -import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; -import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; - -public class VectorMapJoinFastLongHashUtil { - - public static long deserializeLongKey(BinarySortableDeserializeRead keyBinarySortableDeserializeRead, - HashTableKeyType hashTableKeyType) throws RuntimeException { - long key = 0; - switch (hashTableKeyType) { - case BOOLEAN: - key = (keyBinarySortableDeserializeRead.currentBoolean ? 1 : 0); - break; - case BYTE: - key = (long) keyBinarySortableDeserializeRead.currentByte; - break; - case SHORT: - key = (long) keyBinarySortableDeserializeRead.currentShort; - break; - case INT: - key = (long) keyBinarySortableDeserializeRead.currentInt; - break; - case DATE: - key = (long) keyBinarySortableDeserializeRead.currentDateWritable.getDays(); - break; - case LONG: - key = keyBinarySortableDeserializeRead.currentLong; - break; - default: - throw new RuntimeException("Unexpected hash table key type " + hashTableKeyType.name()); - } - return key; - } -} \ No newline at end of file diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongHashMap.java index aeaab826898c..934bd82d70ea 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongHashMap.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongHashMap.java @@ -18,8 +18,6 @@ package org.apache.hadoop.hive.ql.exec.vector.mapjoin.optimized; -import java.io.IOException; - import org.apache.hadoop.hive.ql.exec.JoinUtil.JoinResult; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor; @@ -34,6 +32,9 @@ import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLongHashUtil; + +import java.io.IOException; /* * An single long value hash map based on the BytesBytesMultiHashMap. @@ -110,28 +111,9 @@ private boolean readNonMatchedLongKey(ByteSegmentRef keyRef) throws HiveExceptio if (!keyBinarySortableDeserializeRead.readNextField()) { return false; } - switch (hashMap.hashTableKeyType) { - case BOOLEAN: - longValue = keyBinarySortableDeserializeRead.currentBoolean ? 1 : 0; - break; - case BYTE: - longValue = keyBinarySortableDeserializeRead.currentByte; - break; - case SHORT: - longValue = keyBinarySortableDeserializeRead.currentShort; - break; - case INT: - longValue = keyBinarySortableDeserializeRead.currentInt; - break; - case LONG: - longValue = keyBinarySortableDeserializeRead.currentLong; - break; - case DATE: - longValue = keyBinarySortableDeserializeRead.currentDateWritable.getDays(); - break; - default: - throw new RuntimeException("Unexpected key type " + hashMap.hashTableKeyType); - } + longValue = + VectorMapJoinLongHashUtil.deserializeLongKey( + keyBinarySortableDeserializeRead, hashMap.hashTableKeyType); } catch (IOException e) { throw new HiveException(e); } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java index a38a6c98f47a..5571be43574e 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java @@ -815,6 +815,80 @@ public boolean doTestLong6(long seed, int rowCount, int hiveConfVariation, return false; } + @Test + public void testDate0() throws Exception { + long seed = 8322; + int rowCount = 10; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestDate0( + seed, + rowCount, + hiveConfVariation, + vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestDate0( + long seed, + int rowCount, + int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) + throws Exception { + + HiveConf hiveConf = getHiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos; + int[] bigTableKeyColumnNums; + TypeInfo[] smallTableValueTypeInfos; + int[] smallTableRetainKeyColumnNums; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc; + MapJoinTestData testData; + + // Big Table: date key; Small Table: key retained, string value + bigTableTypeInfos = new TypeInfo[] {TypeInfoFactory.dateTypeInfo}; + bigTableKeyColumnNums = new int[] {0}; + smallTableRetainKeyColumnNums = new int[] {0}; + smallTableValueTypeInfos = new TypeInfo[] {TypeInfoFactory.stringTypeInfo}; + + testDesc = + new MapJoinTestDescription( + hiveConf, + vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = new MapJoinTestData(rowCount, testDesc, seed); + executeTest(testDesc, testData, "testDate0"); + + return false; + } + private boolean addNonLongHiveConfVariation(int hiveConfVariation, HiveConf hiveConf) { // Set defaults. diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastRowHashMap.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastRowHashMap.java index 0a751728cc79..291b6a40b25c 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastRowHashMap.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastRowHashMap.java @@ -519,7 +519,7 @@ public void testDateRowsExact() throws Exception { random, VectorRandomRowSource.SupportedTypes.ALL, 4, - /* allowNulls */ false, /* isUnicodeOk */ + false, false); int rowCount = 1000; @@ -532,7 +532,7 @@ public void testDateRowsExact() throws Exception { HashTableKeyType.DATE, verifyTable, new String[] {"date"}, - /* doClipping */ false, /* useExactBytes */ + false, true); } diff --git a/ql/src/test/queries/clientpositive/vector_full_outer_join_date.q b/ql/src/test/queries/clientpositive/vector_full_outer_join_date.q index ba9645e50b55..d1e2533578f2 100644 --- a/ql/src/test/queries/clientpositive/vector_full_outer_join_date.q +++ b/ql/src/test/queries/clientpositive/vector_full_outer_join_date.q @@ -1,29 +1,14 @@ +set hive.vectorized.execution.enabled=true; set hive.optimize.dynamic.partition.hashjoin=true; set hive.auto.convert.join=true; --- Test Date column create table tbl1 (id int, event_date date); create table tbl2 (id int, event_date date); insert into tbl1 values (1, '2023-01-01'), (2, '2023-01-02'), (3, '2023-01-03'); insert into tbl2 values (2, '2023-01-02'), (3, '2023-01-04'), (4, '2023-01-05'); -select tbl1.id, tbl1.event_date from tbl1 full outer join tbl2 on tbl1.event_date = tbl2.event_date order by tbl1.id; +explain vectorization detail select * from tbl1 full outer join tbl2 on tbl1.event_date = tbl2.event_date order by tbl1.id, tbl2.id; --- Test timestamp column -create table tbl3 (id int, event_date timestamp); -create table tbl4 (id int, event_date timestamp); +select * from tbl1 full outer join tbl2 on tbl1.event_date = tbl2.event_date order by tbl1.id, tbl2.id; -insert into tbl3 values (1, '2025-12-17 10:20:30'), (2, '2025-12-17 11:20:30'); -insert into tbl4 values (2, '2025-12-17 11:20:30'), (3, '2025-12-17 09:20:30'); - -select tbl3.id, tbl3.event_date from tbl3 full outer join tbl4 on tbl3.event_date = tbl4.event_date order by tbl3.id; - --- Test Double column -create table tbl5 (id int, val double); -create table tbl6 (id int, val double); - -insert into tbl5 values (1, 5.6D), (2, 3.2D); -insert into tbl6 values (2, 3.2D), (3, 7.2D); - -select tbl5.id, tbl5.val from tbl5 full outer join tbl6 on tbl5.val = tbl6.val order by tbl5.id; diff --git a/ql/src/test/results/clientpositive/llap/vector_full_outer_join_date.q.out b/ql/src/test/results/clientpositive/llap/vector_full_outer_join_date.q.out index 1c61ff45eede..d1d49f77854b 100644 --- a/ql/src/test/results/clientpositive/llap/vector_full_outer_join_date.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_full_outer_join_date.q.out @@ -34,116 +34,229 @@ POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@tbl2 POSTHOOK: Lineage: tbl2.event_date SCRIPT [] POSTHOOK: Lineage: tbl2.id SCRIPT [] -PREHOOK: query: select tbl1.id, tbl1.event_date from tbl1 full outer join tbl2 on tbl1.event_date = tbl2.event_date order by tbl1.id +PREHOOK: query: explain vectorization detail select * from tbl1 full outer join tbl2 on tbl1.event_date = tbl2.event_date order by tbl1.id, tbl2.id PREHOOK: type: QUERY PREHOOK: Input: default@tbl1 PREHOOK: Input: default@tbl2 #### A masked pattern was here #### -POSTHOOK: query: select tbl1.id, tbl1.event_date from tbl1 full outer join tbl2 on tbl1.event_date = tbl2.event_date order by tbl1.id +POSTHOOK: query: explain vectorization detail select * from tbl1 full outer join tbl2 on tbl1.event_date = tbl2.event_date order by tbl1.id, tbl2.id POSTHOOK: type: QUERY POSTHOOK: Input: default@tbl1 POSTHOOK: Input: default@tbl2 #### A masked pattern was here #### -1 2023-01-01 -2 2023-01-02 -3 2023-01-03 -NULL NULL -NULL NULL -PREHOOK: query: create table tbl3 (id int, event_date timestamp) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@tbl3 -POSTHOOK: query: create table tbl3 (id int, event_date timestamp) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@tbl3 -PREHOOK: query: create table tbl4 (id int, event_date timestamp) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@tbl4 -POSTHOOK: query: create table tbl4 (id int, event_date timestamp) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@tbl4 -PREHOOK: query: insert into tbl3 values (1, '2025-12-17 10:20:30'), (2, '2025-12-17 11:20:30') -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@tbl3 -POSTHOOK: query: insert into tbl3 values (1, '2025-12-17 10:20:30'), (2, '2025-12-17 11:20:30') -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@tbl3 -POSTHOOK: Lineage: tbl3.event_date SCRIPT [] -POSTHOOK: Lineage: tbl3.id SCRIPT [] -PREHOOK: query: insert into tbl4 values (2, '2025-12-17 11:20:30'), (3, '2025-12-17 09:20:30') -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@tbl4 -POSTHOOK: query: insert into tbl4 values (2, '2025-12-17 11:20:30'), (3, '2025-12-17 09:20:30') -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@tbl4 -POSTHOOK: Lineage: tbl4.event_date SCRIPT [] -POSTHOOK: Lineage: tbl4.id SCRIPT [] -PREHOOK: query: select tbl3.id, tbl3.event_date from tbl3 full outer join tbl4 on tbl3.event_date = tbl4.event_date order by tbl3.id -PREHOOK: type: QUERY -PREHOOK: Input: default@tbl3 -PREHOOK: Input: default@tbl4 +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez #### A masked pattern was here #### -POSTHOOK: query: select tbl3.id, tbl3.event_date from tbl3 full outer join tbl4 on tbl3.event_date = tbl4.event_date order by tbl3.id -POSTHOOK: type: QUERY -POSTHOOK: Input: default@tbl3 -POSTHOOK: Input: default@tbl4 + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### -1 2025-12-17 10:20:30 -2 2025-12-17 11:20:30 -NULL NULL -PREHOOK: query: create table tbl5 (id int, val double) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@tbl5 -POSTHOOK: query: create table tbl5 (id int, val double) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@tbl5 -PREHOOK: query: create table tbl6 (id int, val double) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@tbl6 -POSTHOOK: query: create table tbl6 (id int, val double) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@tbl6 -PREHOOK: query: insert into tbl5 values (1, 5.6D), (2, 3.2D) + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tbl1 + Statistics: Num rows: 3 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:id:int, 1:event_date:date, 2:ROW__ID:struct, 3:ROW__IS__DELETED:boolean] + Select Operator + expressions: id (type: int), event_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 3 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: date) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: date) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:date + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int + Statistics: Num rows: 3 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: id:int, event_date:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: tbl2 + Statistics: Num rows: 3 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:id:int, 1:event_date:date, 2:ROW__ID:struct, 3:ROW__IS__DELETED:boolean] + Select Operator + expressions: id (type: int), event_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 3 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: date) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: date) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:date + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 4:smallint + valueExpressions: ConstantVectorExpression(val 0) -> 4:smallint + Statistics: Num rows: 3 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), 0S (type: smallint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: id:int, event_date:date + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true + reduceColumnNullOrder: z + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:date, VALUE._col0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: date) + 1 KEY.reducesinkkey0 (type: date) + Map Join Vectorization: + bigTableKeyColumns: 0:date + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 1:int, 0:date + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 3 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 1:int, 0:date, 2:int, 3:date + smallTableValueMapping: 2:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 9 Data size: 1080 Basic stats: COMPLETE Column stats: COMPLETE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int), _col2 (type: int) + null sort order: zz + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 1:int, 2:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:date, 3:date + Statistics: Num rows: 9 Data size: 1080 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: date), _col3 (type: date) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true + reduceColumnNullOrder: zz + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:int, VALUE._col0:date, VALUE._col1:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: date), KEY.reducesinkkey1 (type: int), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 2, 1, 3] + Statistics: Num rows: 9 Data size: 1080 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 9 Data size: 1080 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from tbl1 full outer join tbl2 on tbl1.event_date = tbl2.event_date order by tbl1.id, tbl2.id PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@tbl5 -POSTHOOK: query: insert into tbl5 values (1, 5.6D), (2, 3.2D) -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@tbl5 -POSTHOOK: Lineage: tbl5.id SCRIPT [] -POSTHOOK: Lineage: tbl5.val SCRIPT [] -PREHOOK: query: insert into tbl6 values (2, 3.2D), (3, 7.2D) -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@tbl6 -POSTHOOK: query: insert into tbl6 values (2, 3.2D), (3, 7.2D) -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@tbl6 -POSTHOOK: Lineage: tbl6.id SCRIPT [] -POSTHOOK: Lineage: tbl6.val SCRIPT [] -PREHOOK: query: select tbl5.id, tbl5.val from tbl5 full outer join tbl6 on tbl5.val = tbl6.val order by tbl5.id -PREHOOK: type: QUERY -PREHOOK: Input: default@tbl5 -PREHOOK: Input: default@tbl6 +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 #### A masked pattern was here #### -POSTHOOK: query: select tbl5.id, tbl5.val from tbl5 full outer join tbl6 on tbl5.val = tbl6.val order by tbl5.id +POSTHOOK: query: select * from tbl1 full outer join tbl2 on tbl1.event_date = tbl2.event_date order by tbl1.id, tbl2.id POSTHOOK: type: QUERY -POSTHOOK: Input: default@tbl5 -POSTHOOK: Input: default@tbl6 +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 #### A masked pattern was here #### -1 5.6 -2 3.2 -NULL NULL +1 2023-01-01 NULL NULL +2 2023-01-02 2 2023-01-02 +3 2023-01-03 NULL NULL +NULL NULL 3 2023-01-04 +NULL NULL 4 2023-01-05