Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -616,6 +616,9 @@ public static ColumnIndex build(

builder.fill(nullPages, nullCounts, minValues, maxValues, repLevelHistogram, defLevelHistogram);
ColumnIndexBase<?> columnIndex = builder.build(type);
if (columnIndex == null) {
return null;
}
columnIndex.boundaryOrder = requireNonNull(boundaryOrder);
return columnIndex;
}
Expand Down Expand Up @@ -750,6 +753,19 @@ private ColumnIndexBase<?> build(PrimitiveType type) {
if (!nullCounts.isEmpty()) {
columnIndex.nullCounts = nullCounts.toLongArray();
}
// A null_pages entry of true indicates the page consists entirely of null values.
// When null_counts is present, a null_counts entry of zero means the page has no
// null values at all. These two fields directly contradict each other: a page
// cannot both consist entirely of nulls and contain zero nulls. Since null_pages
// is the field that causes the reader to skip pages during column index filtering,
// the safe response is to discard the column index for this column entirely.
if (columnIndex.nullCounts != null) {
for (int i = 0; i < columnIndex.nullPages.length; i++) {
if (columnIndex.nullPages[i] && columnIndex.nullCounts[i] == 0L) {
return null;
}
}
}
columnIndex.pageIndexes = pageIndexes.toIntArray();
// Repetition and definition level histograms are optional so keep them null if the builder has no values
if (repLevelHistogram != null && !repLevelHistogram.isEmpty()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1865,4 +1865,152 @@ long getMinMaxSize() {
private static void assertCorrectFiltering(ColumnIndex ci, FilterPredicate predicate, int... expectedIndexes) {
TestIndexIterator.assertEquals(predicate.accept(ci), expectedIndexes);
}

@Test
public void testBuildReturnsNullForNullPageCountContradiction() {
// null_pages[i]=true indicates the page is entirely null, but null_counts[i]=0
// says there are zero null values. This contradiction indicates invalid metadata.
// build() should return null to prevent incorrect page skipping.
PrimitiveType type = Types.required(INT32).named("test_col");

// Pages 1-3 have null_pages=true with null_counts=0 — contradictory
assertNull(
"Column index with null_pages=true and null_counts=0 should be rejected",
ColumnIndexBuilder.build(
type,
BoundaryOrder.ASCENDING,
List.of(false, true, true, true),
List.of(0L, 0L, 0L, 0L),
toBBList(Integer.valueOf(-99), null, null, null),
toBBList(Integer.valueOf(5), null, null, null)));

// Contradiction on a single page (last page) should also be rejected
assertNull(
"Single contradictory page should cause rejection",
ColumnIndexBuilder.build(
type,
BoundaryOrder.UNORDERED,
List.of(false, false, true),
List.of(0L, 5L, 0L),
toBBList(Integer.valueOf(1), Integer.valueOf(50), null),
toBBList(Integer.valueOf(49), Integer.valueOf(99), null)));

// Contradiction on the first page
assertNull(
"Contradictory first page should cause rejection",
ColumnIndexBuilder.build(
type,
BoundaryOrder.UNORDERED,
List.of(true, false, false),
List.of(0L, 0L, 5L),
toBBList(null, Integer.valueOf(1), Integer.valueOf(50)),
toBBList(null, Integer.valueOf(49), Integer.valueOf(99))));

// Single page with contradiction
assertNull(
"Single-page column index with contradiction should be rejected",
ColumnIndexBuilder.build(
type,
BoundaryOrder.UNORDERED,
List.of(true),
List.of(0L),
toBBList((Integer) null),
toBBList((Integer) null)));

// All pages are contradictory null pages
assertNull(
"All-contradictory column index should be rejected",
ColumnIndexBuilder.build(
type,
BoundaryOrder.UNORDERED,
List.of(true, true, true),
List.of(0L, 0L, 0L),
toBBList((Integer) null, null, null),
toBBList((Integer) null, null, null)));
}

@Test
public void testBuildPreservesValidColumnIndex() {
PrimitiveType type = Types.required(INT32).named("test_col");

// Legitimate null page: null_pages=true with null_counts > 0 — valid
ColumnIndex ci = ColumnIndexBuilder.build(
type,
BoundaryOrder.ASCENDING,
List.of(false, true, false),
List.of(0L, 100L, 0L),
toBBList(Integer.valueOf(1), null, Integer.valueOf(50)),
toBBList(Integer.valueOf(49), null, Integer.valueOf(99)));
assertCorrectNullPages(ci, false, true, false);
assertCorrectNullCounts(ci, 0, 100, 0);
assertCorrectValues(ci.getMinValues(), 1, null, 50);
assertCorrectValues(ci.getMaxValues(), 49, null, 99);

// All non-null pages — valid
ColumnIndex ci2 = ColumnIndexBuilder.build(
type,
BoundaryOrder.ASCENDING,
List.of(false, false, false),
List.of(0L, 5L, 10L),
toBBList(Integer.valueOf(1), Integer.valueOf(50), Integer.valueOf(100)),
toBBList(Integer.valueOf(49), Integer.valueOf(99), Integer.valueOf(150)));
assertCorrectNullPages(ci2, false, false, false);
assertCorrectNullCounts(ci2, 0, 5, 10);

// Single non-null page
ColumnIndex ci3 = ColumnIndexBuilder.build(
type,
BoundaryOrder.UNORDERED,
List.of(false),
List.of(0L),
toBBList(Integer.valueOf(42)),
toBBList(Integer.valueOf(42)));
assertCorrectNullPages(ci3, false);
assertCorrectNullCounts(ci3, 0);

// Single legitimate all-null page (null_pages=true, null_counts > 0)
ColumnIndex ci4 = ColumnIndexBuilder.build(
type, BoundaryOrder.UNORDERED, List.of(true), List.of(50L), toBBList((Integer) null), toBBList((Integer)
null));
assertCorrectNullPages(ci4, true);
assertCorrectNullCounts(ci4, 50);

// All pages legitimately null
ColumnIndex ci5 = ColumnIndexBuilder.build(
type,
BoundaryOrder.UNORDERED,
List.of(true, true, true),
List.of(10L, 20L, 30L),
toBBList((Integer) null, null, null),
toBBList((Integer) null, null, null));
assertCorrectNullPages(ci5, true, true, true);
assertCorrectNullCounts(ci5, 10, 20, 30);

// Boundary: null_counts=1 on a null page (minimum valid value) — should NOT be rejected
ColumnIndex ci6 = ColumnIndexBuilder.build(
type,
BoundaryOrder.UNORDERED,
List.of(false, true),
List.of(0L, 1L),
toBBList(Integer.valueOf(1), null),
toBBList(Integer.valueOf(99), null));
assertCorrectNullPages(ci6, false, true);
assertCorrectNullCounts(ci6, 0, 1);
}

@Test
public void testBuildWithoutNullCountsIsNotRejected() {
PrimitiveType type = Types.required(INT32).named("test_col");

// null_counts absent (optional field) — cannot detect contradiction, should build normally
ColumnIndex ci = ColumnIndexBuilder.build(
type,
BoundaryOrder.UNORDERED,
List.of(false, true, true),
null,
toBBList(Integer.valueOf(1), null, null),
toBBList(Integer.valueOf(99), null, null));
assertCorrectNullPages(ci, false, true, true);
assertNull("null_counts should be null when not provided", ci.getNullCounts());
}
}