Skip to content

Commit 201786e

Browse files
committed
WIP d152a
1 parent 3067909 commit 201786e

File tree

5 files changed

+158
-88
lines changed

5 files changed

+158
-88
lines changed

docs/site/app/api/crawl-sitemap/route.ts

Lines changed: 1 addition & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,51 +1,15 @@
11
import { NextResponse } from "next/server";
2-
import { repoDocsPages, blog, extraPages } from "#app/source.ts";
3-
import { openapiPages } from "#app/(openapi)/docs/openapi/source.ts";
42
import {
53
loadState,
64
saveState,
75
createEmptyState,
86
updatePageState,
97
pruneRemovedPages,
108
crawlPages,
9+
getAllPageUrls,
1110
SITEMAP_CONFIG,
1211
} from "#lib/sitemap/index.ts";
1312

14-
/**
15-
* Collect all page URLs from fumadocs loaders
16-
*/
17-
function getAllPageUrls(): Array<string> {
18-
const urls: Array<string> = [];
19-
20-
// Add homepage
21-
urls.push("/");
22-
23-
// Docs pages
24-
for (const page of repoDocsPages.getPages()) {
25-
urls.push(page.url);
26-
}
27-
28-
// Blog pages (exclude external blog posts)
29-
for (const page of blog.getPages()) {
30-
urls.push(page.url);
31-
}
32-
33-
// Extra pages (governance, terms, etc.)
34-
for (const page of extraPages.getPages()) {
35-
urls.push(page.url);
36-
}
37-
38-
// OpenAPI pages
39-
for (const page of openapiPages.getPages()) {
40-
urls.push(page.url);
41-
}
42-
43-
// Add showcase page
44-
urls.push("/showcase");
45-
46-
return urls;
47-
}
48-
4913
export const dynamic = "force-dynamic";
5014
export const maxDuration = 300; // 5 minutes max for crawling
5115

docs/site/app/sitemap.ts

Lines changed: 5 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,9 @@
11
import type { MetadataRoute } from "next";
2-
import { repoDocsPages, blog, extraPages } from "#app/source.ts";
3-
import { openapiPages } from "#app/(openapi)/docs/openapi/source.ts";
4-
import { loadState, SITEMAP_CONFIG } from "#lib/sitemap/index.ts";
5-
6-
/**
7-
* Collect all page URLs from fumadocs loaders
8-
*/
9-
function getAllPageUrls(): Array<string> {
10-
const urls: Array<string> = [];
11-
12-
// Add homepage
13-
urls.push("/");
14-
15-
// Docs pages
16-
for (const page of repoDocsPages.getPages()) {
17-
urls.push(page.url);
18-
}
19-
20-
// Blog pages (exclude external blog posts)
21-
for (const page of blog.getPages()) {
22-
urls.push(page.url);
23-
}
24-
25-
// Extra pages (governance, terms, etc.)
26-
for (const page of extraPages.getPages()) {
27-
urls.push(page.url);
28-
}
29-
30-
// OpenAPI pages
31-
for (const page of openapiPages.getPages()) {
32-
urls.push(page.url);
33-
}
34-
35-
// Add showcase page
36-
urls.push("/showcase");
37-
38-
return urls;
39-
}
2+
import {
3+
loadState,
4+
getAllPageUrls,
5+
SITEMAP_CONFIG,
6+
} from "#lib/sitemap/index.ts";
407

418
export const dynamic = "force-dynamic";
429
export const revalidate = 3600; // Revalidate every hour

docs/site/lib/sitemap/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,5 @@ export * from "./types";
22
export * from "./content-extractor";
33
export * from "./crawler";
44
export * from "./redis";
5+
export * from "./route-discovery";
6+
export * from "./pages";

docs/site/lib/sitemap/pages.ts

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,48 @@
11
import { repoDocsPages, blog, extraPages } from "#app/source.ts";
22
import { openapiPages } from "#app/(openapi)/docs/openapi/source.ts";
3+
import { discoverStaticRoutes, getAppDirectory } from "./route-discovery.ts";
34

45
/**
5-
* Collect all page URLs from fumadocs loaders
6+
* Collect all page URLs from both automatic route discovery and fumadocs loaders.
7+
*
8+
* This ensures the sitemap is always exhaustive by:
9+
* 1. Scanning the app directory for all static page.tsx files
10+
* 2. Getting all dynamic routes from fumadocs loaders (docs, blog, etc.)
11+
*
12+
* The results are deduplicated to handle any overlap.
613
*/
714
export function getAllPageUrls(): Array<string> {
8-
const urls: Array<string> = [];
15+
const urlSet = new Set<string>();
916

10-
// Add homepage
11-
urls.push("/");
17+
// 1. Discover static routes from app directory
18+
// This catches standalone pages like /blog, /showcase, etc.
19+
const staticRoutes = discoverStaticRoutes(getAppDirectory());
20+
for (const route of staticRoutes) {
21+
urlSet.add(route);
22+
}
23+
24+
// 2. Add dynamic routes from fumadocs loaders
25+
// These handle content-driven pages with [...slug] patterns
1226

1327
// Docs pages
1428
for (const page of repoDocsPages.getPages()) {
15-
urls.push(page.url);
29+
urlSet.add(page.url);
1630
}
1731

18-
// Blog pages (exclude external blog posts)
32+
// Blog pages (exclude external blog posts - they link off-site)
1933
for (const page of blog.getPages()) {
20-
urls.push(page.url);
34+
urlSet.add(page.url);
2135
}
2236

2337
// Extra pages (governance, terms, etc.)
2438
for (const page of extraPages.getPages()) {
25-
urls.push(page.url);
39+
urlSet.add(page.url);
2640
}
2741

2842
// OpenAPI pages
2943
for (const page of openapiPages.getPages()) {
30-
urls.push(page.url);
44+
urlSet.add(page.url);
3145
}
3246

33-
// Add showcase page
34-
urls.push("/showcase");
35-
36-
return urls;
47+
return [...urlSet].sort();
3748
}
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
import { readdirSync, statSync } from "node:fs";
2+
import { join } from "node:path";
3+
4+
/**
5+
* Discovers all page routes from the Next.js app directory.
6+
*
7+
* This scans the app directory for all page.tsx files and converts
8+
* them to URL paths, ensuring the sitemap is always exhaustive.
9+
*
10+
* Route conventions handled:
11+
* - (group) folders - stripped from URL path
12+
* - [[...slug]] - catch-all routes (handled by fumadocs)
13+
* - [...slug] - catch-all routes (handled by fumadocs)
14+
* - [param] - dynamic routes (skipped, handled by fumadocs)
15+
* - page.tsx at root - becomes "/"
16+
*/
17+
18+
/** Routes that should be excluded from the sitemap */
19+
const EXCLUDED_ROUTES = new Set([
20+
"/confirm", // Thank you page, not meant for SEO indexing
21+
]);
22+
23+
/**
24+
* Check if a route segment is a route group (parentheses)
25+
*/
26+
function isRouteGroup(segment: string): boolean {
27+
return segment.startsWith("(") && segment.endsWith(")");
28+
}
29+
30+
/**
31+
* Check if a route segment is dynamic (brackets)
32+
*/
33+
function isDynamicSegment(segment: string): boolean {
34+
return segment.startsWith("[") && segment.endsWith("]");
35+
}
36+
37+
/**
38+
* Recursively find all page.tsx files in a directory
39+
*/
40+
function findPageFiles(dir: string, basePath: string = ""): Array<string> {
41+
const pages: Array<string> = [];
42+
43+
let entries: Array<string>;
44+
try {
45+
entries = readdirSync(dir);
46+
} catch {
47+
return pages;
48+
}
49+
50+
for (const entry of entries) {
51+
const fullPath = join(dir, entry);
52+
53+
let stat;
54+
try {
55+
stat = statSync(fullPath);
56+
} catch {
57+
continue;
58+
}
59+
60+
if (stat.isDirectory()) {
61+
// Skip node_modules and hidden directories
62+
if (entry.startsWith(".") || entry === "node_modules") {
63+
continue;
64+
}
65+
66+
// Skip api routes - they're not pages
67+
if (entry === "api") {
68+
continue;
69+
}
70+
71+
// Build the URL path segment
72+
let urlSegment: string;
73+
if (isRouteGroup(entry)) {
74+
// Route groups don't affect the URL
75+
urlSegment = "";
76+
} else if (isDynamicSegment(entry)) {
77+
// Dynamic segments are handled by fumadocs loaders
78+
// We still recurse to find static pages within
79+
urlSegment = entry;
80+
} else {
81+
urlSegment = entry;
82+
}
83+
84+
const newBasePath = urlSegment
85+
? basePath
86+
? `${basePath}/${urlSegment}`
87+
: urlSegment
88+
: basePath;
89+
90+
pages.push(...findPageFiles(fullPath, newBasePath));
91+
} else if (entry === "page.tsx" || entry === "page.ts") {
92+
// Found a page file
93+
// Skip if the path contains dynamic segments (handled by fumadocs)
94+
if (!basePath.includes("[")) {
95+
const urlPath = basePath ? `/${basePath}` : "/";
96+
pages.push(urlPath);
97+
}
98+
}
99+
}
100+
101+
return pages;
102+
}
103+
104+
/**
105+
* Convert app directory path to URL path
106+
*/
107+
export function discoverStaticRoutes(appDir: string): Array<string> {
108+
const routes = findPageFiles(appDir);
109+
110+
// Filter out excluded routes and deduplicate
111+
const uniqueRoutes = [...new Set(routes)].filter(
112+
(route) => !EXCLUDED_ROUTES.has(route)
113+
);
114+
115+
return uniqueRoutes.sort();
116+
}
117+
118+
/**
119+
* Get the app directory path relative to the current working directory
120+
*/
121+
export function getAppDirectory(): string {
122+
// In Next.js, the app directory is at the root of the project
123+
// This function returns the path that should work both in development
124+
// and when the code is run from the project root
125+
return join(process.cwd(), "app");
126+
}

0 commit comments

Comments
 (0)