Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions rust/perspective-js/test/js/constructors/arrow.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,48 @@ test.describe("Arrow", function () {
});

test.describe("regressions", () => {
// https://github.com/perspective-dev/perspective/issues/3169
test("null values are preserved across multi-batch Arrow IPC streams", async function () {
function row(
identifier: string,
value: number | null,
date: Date | null,
) {
return arrow.tableFromArrays({
Identifier: arrow.vectorFromArray(
[identifier],
new arrow.Utf8(),
),
Value: arrow.vectorFromArray([value], new arrow.Float64()),
Date: arrow.vectorFromArray([date], new arrow.DateDay()),
});
}

const t1 = row("A", null, null);
const t2 = row("B", 5, null);
const t3 = row("C", null, new Date(Date.UTC(2025, 5, 15)));

const multiBatchTable = new arrow.Table([
...t1.batches,
...t2.batches,
...t3.batches,
]);
expect(multiBatchTable.batches.length).toEqual(3);

const ipc = arrow.tableToIPC(multiBatchTable, "stream");
const table = await perspective.table(ipc.buffer as ArrayBuffer);
const view = await table.view();
const json = await view.to_json();
await view.delete();
await table.delete();

expect(json).toStrictEqual([
{ Identifier: "A", Value: null, Date: null },
{ Identifier: "B", Value: 5, Date: null },
{ Identifier: "C", Value: null, Date: 1749945600000 },
]);
});

test("null equality works correctly in updates", async function () {
async function write_to_json(
buffer: ArrayBuffer,
Expand Down
21 changes: 16 additions & 5 deletions rust/perspective-server/cpp/perspective/src/cpp/arrow_loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -907,19 +907,30 @@ ArrowLoader::fill_column(
copy_array(col, array, offset, len);
}

// Fill validity bitmap
// Fill validity bitmap. Operate only on the current chunk's
// range [offset, offset+len); a whole-column fill here would
// clobber validity bits set by other chunks in a multi-batch
// ChunkedArray.
std::int64_t null_count = array->null_count();

if (null_count == 0) {
col->valid_raw_fill();
for (uint32_t i = 0; i < len; ++i) {
col->set_valid(offset + i, true);
}
} else {
const uint8_t* null_bitmap = array->null_bitmap_data();

// If the arrow column is of null type, the null
// bitmap is a nullptr - so just mark everything as
// invalid and move on.
// bitmap is a nullptr - so just mark this chunk's rows
// as invalid and move on.
if (null_bitmap == nullptr) {
col->invalid_raw_fill();
for (uint32_t i = 0; i < len; ++i) {
if (is_update) {
col->unset(offset + i);
} else {
col->clear(offset + i);
}
}
} else {
// Read the null bitmap and set the correct rows
// as valid
Expand Down
Loading