Skip to content

Commit 2e895a7

Browse files
sirtimidclaude
andcommitted
fix(ocap-kernel): fix DGC e2e tests using wrong remote ID and isolated store cache
The DGC e2e tests had two compounding bugs: they called scheduleReap('r0') but the first allocated remote is r1, and they used a separate KernelStore instance whose cache was isolated from the kernel's internal store. Add reapRemotes() to RemoteManager and Kernel (mirroring reapVats) and use it in the e2e tests instead of bypassing the kernel with an external store. Remove getGCActions() and nextRemoteId assertions that suffered from the same stale cache issue. Also reset #remoteGcRequested flag in handlePeerRestart() so BOYD is correctly sent to a new incarnation after peer restart. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent dba940f commit 2e895a7

6 files changed

Lines changed: 111 additions & 22 deletions

File tree

packages/nodejs/test/e2e/remote-comms.test.ts

Lines changed: 14 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1036,19 +1036,16 @@ describe.sequential('Remote Communications E2E', () => {
10361036
);
10371037

10381038
// Send a message to create cross-kernel object references
1039-
await sendRemoteMessage(kernel1, aliceRef, bobURL, 'hello', ['Alice']);
1040-
1041-
// Verify remote endpoint was allocated on kernel1
1042-
const nextRemoteId1 = Number(
1043-
kernelStore1.kv.get('nextRemoteId') ?? '0',
1039+
const response = await sendRemoteMessage(
1040+
kernel1,
1041+
aliceRef,
1042+
bobURL,
1043+
'hello',
1044+
['Alice'],
10441045
);
1045-
expect(nextRemoteId1).toBeGreaterThan(0);
10461046

1047-
// Verify remote endpoint was allocated on kernel2
1048-
const nextRemoteId2 = Number(
1049-
kernelStore2.kv.get('nextRemoteId') ?? '0',
1050-
);
1051-
expect(nextRemoteId2).toBeGreaterThan(0);
1047+
// Verify cross-kernel communication works (implies remote endpoints were created)
1048+
expect(response).toContain('vat Bob got "hello" from Alice');
10521049
},
10531050
NETWORK_TIMEOUT,
10541051
);
@@ -1067,9 +1064,9 @@ describe.sequential('Remote Communications E2E', () => {
10671064
// Send a message to create cross-kernel refs
10681065
await sendRemoteMessage(kernel1, aliceRef, bobURL, 'hello', ['Alice']);
10691066

1070-
// Schedule reap on kernel1's remote endpoint (r0) - this will cause
1067+
// Schedule reap on kernel1's remote endpoints - this will cause
10711068
// the crank loop to deliver BOYD to the remote kernel
1072-
kernelStore1.scheduleReap('r0');
1069+
kernel1.reapRemotes();
10731070

10741071
// Trigger cranks to process the reap action (which sends BOYD to kernel2)
10751072
// and allow the remote to process it and respond
@@ -1106,8 +1103,8 @@ describe.sequential('Remote Communications E2E', () => {
11061103
await sendRemoteMessage(kernel1, aliceRef, bobURL, 'hello', ['Alice']);
11071104
await sendRemoteMessage(kernel2, bobRef, aliceURL, 'hello', ['Bob']);
11081105

1109-
// Schedule reap on kernel2's remote endpoint - this will send BOYD to kernel1
1110-
kernelStore2.scheduleReap('r0');
1106+
// Schedule reap on kernel2's remote endpoints - this will send BOYD to kernel1
1107+
kernel2.reapRemotes();
11111108

11121109
// Trigger cranks to process the reap and allow BOYD to flow
11131110
for (let i = 0; i < 3; i++) {
@@ -1154,8 +1151,8 @@ describe.sequential('Remote Communications E2E', () => {
11541151

11551152
// Schedule reap on BOTH sides simultaneously - this tests that the
11561153
// ping-pong prevention flag works correctly, preventing infinite BOYD loops
1157-
kernelStore1.scheduleReap('r0');
1158-
kernelStore2.scheduleReap('r0');
1154+
kernel1.reapRemotes();
1155+
kernel2.reapRemotes();
11591156

11601157
// Trigger cranks on both kernels to process the reaps and allow
11611158
// BOYD messages to flow in both directions
@@ -1167,10 +1164,6 @@ describe.sequential('Remote Communications E2E', () => {
11671164
await waitUntilQuiescent(500);
11681165
}
11691166

1170-
// No pending GC actions on either side after DGC completes
1171-
expect(kernelStore1.getGCActions().size).toBe(0);
1172-
expect(kernelStore2.getGCActions().size).toBe(0);
1173-
11741167
// Verify continued bidirectional communication works - this proves
11751168
// the BOYD exchange completed without breaking the connection
11761169
const aliceToBob = await sendRemoteMessage(

packages/ocap-kernel/src/Kernel.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import { makeKernelStore } from './store/index.ts';
1414
import type { KernelStore } from './store/index.ts';
1515
import type {
1616
VatId,
17+
RemoteId,
1718
EndpointId,
1819
KRef,
1920
PlatformServices,
@@ -481,6 +482,16 @@ export class Kernel {
481482
this.#vatManager.reapVats(filter);
482483
}
483484

485+
/**
486+
* Reap remotes that match the filter.
487+
* This is for debugging and testing purposes only.
488+
*
489+
* @param filter - A function that returns true if the remote should be reaped.
490+
*/
491+
reapRemotes(filter: (remoteId: RemoteId) => boolean = () => true): void {
492+
this.#remoteManager.reapRemotes(filter);
493+
}
494+
484495
/**
485496
* Pin a vat root.
486497
*

packages/ocap-kernel/src/remotes/kernel/RemoteHandle.test.ts

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1460,5 +1460,28 @@ describe('RemoteHandle', () => {
14601460
// The pending redemption should be rejected
14611461
await expect(redeemPromise).rejects.toThrow('Remote peer restarted');
14621462
});
1463+
1464+
it('resets remoteGcRequested flag so BOYD is sent to new incarnation', async () => {
1465+
const remote = makeRemote();
1466+
1467+
// Receive BOYD from remote — sets the ping-pong prevention flag
1468+
await remote.handleRemoteMessage(
1469+
JSON.stringify({
1470+
seq: 1,
1471+
method: 'deliver',
1472+
params: ['bringOutYourDead'],
1473+
}),
1474+
);
1475+
1476+
// Peer restarts — flag should be cleared
1477+
remote.handlePeerRestart();
1478+
1479+
// Next deliverBringOutYourDead should send BOYD (not suppress it)
1480+
await remote.deliverBringOutYourDead();
1481+
expect(mockRemoteComms.sendRemoteMessage).toHaveBeenCalledWith(
1482+
mockRemotePeerId,
1483+
expect.any(String),
1484+
);
1485+
});
14631486
});
14641487
});

packages/ocap-kernel/src/remotes/kernel/RemoteHandle.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1089,11 +1089,12 @@ export class RemoteHandle implements EndpointHandle {
10891089
// Reject pending URL redemptions - the remote won't have context for them
10901090
this.rejectPendingRedemptions('Remote peer restarted');
10911091

1092-
// Reset sequence numbers for fresh start
1092+
// Reset sequence numbers and flags for fresh start
10931093
this.#nextSendSeq = 0;
10941094
this.#highestReceivedSeq = 0;
10951095
this.#startSeq = 0;
10961096
this.#retryCount = 0;
1097+
this.#remoteGcRequested = false;
10971098

10981099
// Clear persisted sequence state
10991100
this.#kernelStore.clearRemoteSeqState(this.remoteId);

packages/ocap-kernel/src/remotes/kernel/RemoteManager.test.ts

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -520,6 +520,53 @@ describe('RemoteManager', () => {
520520
});
521521
});
522522

523+
describe('reapRemotes', () => {
524+
beforeEach(async () => {
525+
const messageHandler = vi.fn();
526+
vi.mocked(remoteComms.initRemoteComms).mockResolvedValue(mockRemoteComms);
527+
remoteManager.setMessageHandler(messageHandler);
528+
await remoteManager.initRemoteComms();
529+
});
530+
531+
it('schedules reap for all remotes when no filter provided', () => {
532+
remoteManager.establishRemote('peer1');
533+
remoteManager.establishRemote('peer2');
534+
remoteManager.establishRemote('peer3');
535+
536+
remoteManager.reapRemotes();
537+
538+
// Drain the reap queue and verify all remotes were scheduled
539+
const reaped: string[] = [];
540+
let action = kernelStore.nextReapAction();
541+
while (action) {
542+
reaped.push(action.endpointId);
543+
action = kernelStore.nextReapAction();
544+
}
545+
expect(reaped).toStrictEqual(['r1', 'r2', 'r3']);
546+
});
547+
548+
it('schedules reap only for remotes matching the filter', () => {
549+
remoteManager.establishRemote('peer1');
550+
remoteManager.establishRemote('peer2');
551+
remoteManager.establishRemote('peer3');
552+
553+
remoteManager.reapRemotes((remoteId) => remoteId === 'r2');
554+
555+
const action = kernelStore.nextReapAction();
556+
expect(action).toStrictEqual({
557+
type: 'bringOutYourDead',
558+
endpointId: 'r2',
559+
});
560+
expect(kernelStore.nextReapAction()).toBeUndefined();
561+
});
562+
563+
it('does nothing when no remotes exist', () => {
564+
remoteManager.reapRemotes();
565+
566+
expect(kernelStore.nextReapAction()).toBeUndefined();
567+
});
568+
});
569+
523570
describe('handleRemoteGiveUp', () => {
524571
beforeEach(async () => {
525572
const messageHandler = vi.fn();

packages/ocap-kernel/src/remotes/kernel/RemoteManager.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,20 @@ export class RemoteManager {
351351
await this.#platformServices.closeConnection(peerId);
352352
}
353353

354+
/**
355+
* Schedule reap for remotes that match the filter.
356+
* This is for debugging and testing purposes only.
357+
*
358+
* @param filter - A function that returns true if the remote should be reaped.
359+
*/
360+
reapRemotes(filter: (remoteId: RemoteId) => boolean = () => true): void {
361+
for (const remoteId of this.#remotes.keys()) {
362+
if (filter(remoteId)) {
363+
this.#kernelStore.scheduleReap(remoteId);
364+
}
365+
}
366+
}
367+
354368
/**
355369
* Take note of where a peer might be.
356370
*

0 commit comments

Comments
 (0)