From bf77631164f0c87124ef69e083d74fe7720b288d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Andr=C3=A9s=20Leiva=20Castillo?= Date: Sat, 20 Jun 2026 21:53:32 -0600 Subject: [PATCH 1/2] fix(fs): cap file list and timeout untracked scan to prevent session init OOM Large repos (100k+ files, unignored __pycache__ / venvs) caused git ls-files --others to hang for minutes and allocate hundreds of MB in the workspace-server process. The resulting GC pressure starved the concurrent session initializationResult() promise, reliably hitting the 30s timeout when adding a large project. Two-part fix in FsService.listRepoFiles: - Abort git ls-files --others after 8 s via AbortController; fall back to [] - Cap combined tracked + untracked array at 50,000 entries before building the directory tree (avoids ~200MB+ allocation for very large repos) Co-Authored-By: Claude Sonnet 4.6 Claude-Session: https://claude.ai/code/session_01TpGjPYBD4pgZpsAHjozzsN --- .../src/services/fs/service.test.ts | 38 ++++++++++++++++-- .../src/services/fs/service.ts | 39 +++++++++++++++++-- 2 files changed, 70 insertions(+), 7 deletions(-) diff --git a/packages/workspace-server/src/services/fs/service.test.ts b/packages/workspace-server/src/services/fs/service.test.ts index ae7cd89a32..e7276e1011 100644 --- a/packages/workspace-server/src/services/fs/service.test.ts +++ b/packages/workspace-server/src/services/fs/service.test.ts @@ -5,20 +5,26 @@ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; vi.mock("@posthog/git/queries", () => ({ getChangedFiles: vi.fn(async () => new Set()), - listAllFiles: vi.fn(async () => []), + listFiles: vi.fn(async () => []), + listUntrackedFiles: vi.fn(async () => []), })); -import { getChangedFiles, listAllFiles } from "@posthog/git/queries"; +import { + getChangedFiles, + listFiles, + listUntrackedFiles, +} from "@posthog/git/queries"; import { FsService } from "./service"; describe("FsService.listRepoFiles", () => { it("derives directory entries alongside files", async () => { vi.mocked(getChangedFiles).mockResolvedValue(new Set()); - vi.mocked(listAllFiles).mockResolvedValue([ + vi.mocked(listFiles).mockResolvedValue([ "a.ts", "src/b.ts", "src/sub/c.ts", ]); + vi.mocked(listUntrackedFiles).mockResolvedValue([]); const service = new FsService(); const entries = await service.listRepoFiles("/repo"); @@ -34,11 +40,12 @@ describe("FsService.listRepoFiles", () => { it("filters directories and files by query substring", async () => { vi.mocked(getChangedFiles).mockResolvedValue(new Set()); - vi.mocked(listAllFiles).mockResolvedValue([ + vi.mocked(listFiles).mockResolvedValue([ "a.ts", "src/b.ts", "src/sub/c.ts", ]); + vi.mocked(listUntrackedFiles).mockResolvedValue([]); const service = new FsService(); const entries = await service.listRepoFiles("/repo", "sub"); @@ -48,6 +55,29 @@ describe("FsService.listRepoFiles", () => { { path: "src/sub/c.ts", kind: "file" }, ]); }); + + it("caps file list at MAX_REPO_FILES when repo is very large", async () => { + vi.mocked(getChangedFiles).mockResolvedValue(new Set()); + const bigList = Array.from({ length: 60_000 }, (_, i) => `file${i}.ts`); + vi.mocked(listFiles).mockResolvedValue(bigList); + vi.mocked(listUntrackedFiles).mockResolvedValue([]); + + const service = new FsService(); + const entries = await service.listRepoFiles("/repo"); + + expect(entries.length).toBeLessThanOrEqual(50_000); + }); + + it("omits untracked files when git ls-files --others is aborted", async () => { + vi.mocked(getChangedFiles).mockResolvedValue(new Set()); + vi.mocked(listFiles).mockResolvedValue(["tracked.ts"]); + vi.mocked(listUntrackedFiles).mockRejectedValue(new Error("AbortError")); + + const service = new FsService(); + const entries = await service.listRepoFiles("/repo"); + + expect(entries.some((e) => e.path === "tracked.ts")).toBe(true); + }); }); describe("FsService repo file IO", () => { diff --git a/packages/workspace-server/src/services/fs/service.ts b/packages/workspace-server/src/services/fs/service.ts index 251109bc80..d9c61086ac 100644 --- a/packages/workspace-server/src/services/fs/service.ts +++ b/packages/workspace-server/src/services/fs/service.ts @@ -1,6 +1,10 @@ import fs from "node:fs/promises"; import path from "node:path"; -import { getChangedFiles, listAllFiles } from "@posthog/git/queries"; +import { + getChangedFiles, + listFiles, + listUntrackedFiles, +} from "@posthog/git/queries"; import { injectable } from "inversify"; import type { BoundedReadResult, DirectoryEntry, FileEntry } from "./schemas"; @@ -8,6 +12,12 @@ import type { BoundedReadResult, DirectoryEntry, FileEntry } from "./schemas"; export class FsService { private static readonly CACHE_TTL = 30000; private static readonly READ_REPO_FILES_CONCURRENCY = 24; + // Large repos (100k+ files) cause GC pressure that starves the session-init + // event loop. Cap the combined tracked + untracked list to bound allocation. + private static readonly MAX_REPO_FILES = 50_000; + // Abort git ls-files --others if it takes too long (unignored venvs, caches, + // or staticfiles directories can make it scan millions of entries). + private static readonly UNTRACKED_TIMEOUT_MS = 8_000; private cache = new Map(); async listDirectory(dirPath: string): Promise { @@ -43,7 +53,7 @@ export class FsService { const changedFiles = await getChangedFiles(repoPath); if (query?.trim()) { - const allFiles = await listAllFiles(repoPath); + const allFiles = await this.fetchAllFiles(repoPath); const directories = this.deriveDirectories(allFiles); const lowerQuery = query.toLowerCase(); const matchingDirs = directories.filter((d) => @@ -64,7 +74,7 @@ export class FsService { return limit ? cached.files.slice(0, limit) : cached.files; } - const files = await listAllFiles(repoPath); + const files = await this.fetchAllFiles(repoPath); const directories = this.deriveDirectories(files); const entries = [ ...this.toDirectoryEntries(directories), @@ -221,6 +231,29 @@ export class FsService { })); } + private async fetchAllFiles(repoPath: string): Promise { + const controller = new AbortController(); + const timer = setTimeout( + () => controller.abort(), + FsService.UNTRACKED_TIMEOUT_MS, + ); + try { + const [tracked, untracked] = await Promise.all([ + listFiles(repoPath), + listUntrackedFiles(repoPath, { abortSignal: controller.signal }).catch( + () => [], + ), + ]); + const combined = tracked.concat(untracked); + if (combined.length > FsService.MAX_REPO_FILES) { + combined.length = FsService.MAX_REPO_FILES; + } + return combined; + } finally { + clearTimeout(timer); + } + } + private deriveDirectories(files: string[]): string[] { const dirs = new Set(); for (const file of files) { From a37bbba1e96d1dce66ce615aa6e60d5fdede13f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Andr=C3=A9s=20Leiva=20Castillo?= Date: Sat, 20 Jun 2026 22:17:25 -0600 Subject: [PATCH 2/2] test(fs): tighten cap assertion and add nested-path case per Greptile review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Use toBe(50_000) for flat input — zero derived directories means total === cap - Add test documenting that entries total can exceed 50k when nested paths produce extra directory entries via deriveDirectories (cap is on raw files, not on the final files+dirs result) Co-Authored-By: Claude Sonnet 4.6 Claude-Session: https://claude.ai/code/session_01TpGjPYBD4pgZpsAHjozzsN --- .../src/services/fs/service.test.ts | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/packages/workspace-server/src/services/fs/service.test.ts b/packages/workspace-server/src/services/fs/service.test.ts index e7276e1011..a1e45a262c 100644 --- a/packages/workspace-server/src/services/fs/service.test.ts +++ b/packages/workspace-server/src/services/fs/service.test.ts @@ -58,6 +58,7 @@ describe("FsService.listRepoFiles", () => { it("caps file list at MAX_REPO_FILES when repo is very large", async () => { vi.mocked(getChangedFiles).mockResolvedValue(new Set()); + // Flat paths produce zero derived directory entries, so total === cap. const bigList = Array.from({ length: 60_000 }, (_, i) => `file${i}.ts`); vi.mocked(listFiles).mockResolvedValue(bigList); vi.mocked(listUntrackedFiles).mockResolvedValue([]); @@ -65,7 +66,26 @@ describe("FsService.listRepoFiles", () => { const service = new FsService(); const entries = await service.listRepoFiles("/repo"); - expect(entries.length).toBeLessThanOrEqual(50_000); + expect(entries.length).toBe(50_000); + }); + + it("total entries can exceed MAX_REPO_FILES when derived directories are included", async () => { + vi.mocked(getChangedFiles).mockResolvedValue(new Set()); + // Nested paths cause deriveDirectories to add parent directory entries on + // top of the capped 50k file entries, so the returned total is > 50k. + const bigList = Array.from( + { length: 60_000 }, + (_, i) => `src/sub${i}/file.ts`, + ); + vi.mocked(listFiles).mockResolvedValue(bigList); + vi.mocked(listUntrackedFiles).mockResolvedValue([]); + + const service = new FsService(); + const entries = await service.listRepoFiles("/repo"); + + const fileEntries = entries.filter((e) => e.kind === "file"); + expect(fileEntries.length).toBe(50_000); + expect(entries.length).toBeGreaterThan(50_000); }); it("omits untracked files when git ls-files --others is aborted", async () => {