From 27714baf99d701d4f89f6c36f7930d8a0480cb9a Mon Sep 17 00:00:00 2001 From: hongyi-chen Date: Fri, 8 May 2026 23:49:48 +0000 Subject: [PATCH 1/2] Add sync-openapi-spec skill Adds a new skill at .agents/skills/sync-openapi-spec/ that compares warp-server/public_api/openapi.yaml against developers/agent-api-openapi.yaml (the file the Scalar API reference at docs.warp.dev/api renders from) and regenerates the docs subset deterministically. The skill is documented in SKILL.md, with the exclusion policy (which tags and paths are kept vs dropped) recorded in references/sync-policy.md and encoded in scripts/sync_openapi.py as EXCLUDED_TAGS / EXCLUDED_PATHS. The script supports --mode {diff,apply,self-test}. This PR ships only the skill itself. Running the skill (and updating developers/agent-api-openapi.yaml) is intentionally a follow-up, so reviewers can vet the policy before any changes land in the public API reference. Co-Authored-By: Oz --- .agents/skills/sync-openapi-spec/SKILL.md | 150 ++++++ .../references/sync-policy.md | 56 ++ .../sync-openapi-spec/scripts/sync_openapi.py | 507 ++++++++++++++++++ 3 files changed, 713 insertions(+) create mode 100644 .agents/skills/sync-openapi-spec/SKILL.md create mode 100644 .agents/skills/sync-openapi-spec/references/sync-policy.md create mode 100644 .agents/skills/sync-openapi-spec/scripts/sync_openapi.py diff --git a/.agents/skills/sync-openapi-spec/SKILL.md b/.agents/skills/sync-openapi-spec/SKILL.md new file mode 100644 index 00000000..ac7d1fc1 --- /dev/null +++ b/.agents/skills/sync-openapi-spec/SKILL.md @@ -0,0 +1,150 @@ +--- +name: sync-openapi-spec +description: >- + Sync the public Oz Agent API OpenAPI spec from warp-server into the docs + repo, regenerating `developers/agent-api-openapi.yaml` (the file that + powers the Scalar API reference at `docs.warp.dev/api`). Use when the + warp-server public API has changed, when the Scalar reference looks + stale, or on a scheduled cadence to keep the public API docs aligned + with the canonical spec. +--- + +# Sync OpenAPI Spec + +Keep `developers/agent-api-openapi.yaml` in sync with the canonical spec at `warp-server/public_api/openapi.yaml`. + +**Direction:** warp-server → docs. The server spec is the source of truth. The docs file is a curated subset (drops `memory_stores`/`harness-support` and a handful of internal `agent` paths) that Scalar renders on `docs.warp.dev/api`. + +## Repos + +This skill requires two repos in the agent's environment: + +- `warpdotdev/warp-server` — source of truth (`public_api/openapi.yaml`) +- `warpdotdev/docs` — Scalar-facing copy (`developers/agent-api-openapi.yaml`) + +## Prerequisites + +- Both repos checked out, with `warp-server` reachable from the docs repo (default assumption: sibling directories — `../warp-server/public_api/openapi.yaml`) +- Python 3 with `pyyaml` installed (`pip install pyyaml` or `pip install --break-system-packages pyyaml` in managed environments) +- `gh` CLI authenticated against `warpdotdev/docs` + +## Workflow + +### Step 1: Self-test + +Run the script's self-test first to confirm `pyyaml` is available and the transform logic still passes: + +```bash +python3 .agents/skills/sync-openapi-spec/scripts/sync_openapi.py --mode self-test +``` + +Expected output: `self-test: OK`. If this fails, fix the script before going further. + +### Step 2: Diff source against target + +```bash +python3 .agents/skills/sync-openapi-spec/scripts/sync_openapi.py \ + --mode diff \ + --source ../warp-server/public_api/openapi.yaml \ + --target developers/agent-api-openapi.yaml +``` + +The script prints structural drift grouped into: +- Paths added/removed/modified relative to the expected docs subset +- Component schemas added/removed/modified +- Top-level changes (`openapi`, `info`, `servers`) +- Unclassified tags or paths (anything not covered by `EXCLUDED_TAGS` or the `agent`/`schedules` allowlist) + +If the script reports `In sync. No changes needed.`, stop here. + +### Step 3: Triage unclassified items + +Any line prefixed with `!` flags a tag or path the policy doesn't recognize. Do NOT auto-include or auto-drop these. For each one: +1. Read the corresponding handler in `warp-server/router/handlers/public_api/` to confirm whether the endpoint is intended to be public. +2. If the endpoint is public-facing, leave the policy alone — the script will include it on the next `apply`. +3. If the endpoint should remain hidden, extend `EXCLUDED_TAGS` or `EXCLUDED_PATHS` in `scripts/sync_openapi.py` and update `references/sync-policy.md` to record the rationale. +4. Re-run `--mode diff` until no `!` lines remain. + +### Step 4: Apply the regenerated subset + +```bash +python3 .agents/skills/sync-openapi-spec/scripts/sync_openapi.py \ + --mode apply \ + --source ../warp-server/public_api/openapi.yaml \ + --target developers/agent-api-openapi.yaml +``` + +This rewrites `developers/agent-api-openapi.yaml` with the regenerated subset. + +### Step 5: Validate the regenerated spec + +```bash +# YAML parses +python3 -c "import yaml; yaml.safe_load(open('developers/agent-api-openapi.yaml'))" + +# Astro + Scalar boot succeed (catches dangling $refs, malformed paths) +npm run build +``` + +Optional, recommended when many schemas changed: +```bash +npx @redocly/cli lint developers/agent-api-openapi.yaml +``` + +If `npm run build` fails, inspect the build error, fix the underlying spec issue (most often a `$ref` to a schema that the script pruned because nothing public references it), and re-run. + +### Step 6: Commit and open a PR + +```bash +git checkout -b sync-openapi-spec/YYYY-MM-DD +git add developers/agent-api-openapi.yaml +git commit -m "docs: sync agent-api-openapi.yaml from warp-server + +Co-Authored-By: Oz " +git push origin sync-openapi-spec/YYYY-MM-DD +``` + +Open a draft PR with: +- **Title:** `docs: sync agent-api-openapi.yaml from warp-server` +- **Body:** include the full output from Step 2 (paths/schemas added/removed/modified) so reviewers can see exactly what changed and why. +- **Labels:** `documentation` + +Use `report_pr` to surface the PR link. + +### Step 7: Report + +Summarize: +- Source commit SHA used (capture with `cd ../warp-server && git rev-parse HEAD`) +- Number of paths added / removed / modified in the regenerated subset +- Number of schemas added / removed / modified +- Any items flagged for triage and how they were resolved +- Or confirm `In sync. No changes needed.` + +## Sync policy + +The policy is encoded in `scripts/sync_openapi.py` as `EXCLUDED_TAGS` and `EXCLUDED_PATHS`. See `references/sync-policy.md` for the rationale behind each entry and the rules for adding new ones. + +## Schedule + +Run on demand whenever `warp-server/public_api/openapi.yaml` has changed materially since the last docs sync, or on a weekly cadence as a safety net. + +## Troubleshooting + +### `ModuleNotFoundError: No module named 'yaml'` +Install pyyaml: `pip install pyyaml`. On Debian-based images with externally managed Python, use `pip install --break-system-packages pyyaml`. + +### `error: source spec not found at ...` +The `warp-server` repo isn't where the script expected. Pass `--source /absolute/path/to/warp-server/public_api/openapi.yaml`. + +### `npm run build` fails after `--mode apply` with a missing-schema error +The script's `$ref` walker missed a transitive reference. Inspect the failing `$ref`, confirm the schema exists in the source spec, and check whether the path holding the reference was supposed to be kept. If the path is genuinely public, the schema should follow automatically — file a bug against the script (the walker should be transitive over `allOf`/`oneOf`/`anyOf`/`items`/`additionalProperties`). + +### Diff shows changes that aren't in the source spec +Make sure `../warp-server` is on the branch you intended to compare against (usually `develop`). Run `cd ../warp-server && git status -sb && git --no-pager log -1` to confirm. + +## References + +- `scripts/sync_openapi.py` — the diff/apply tool +- `references/sync-policy.md` — exclusion policy and how to extend it +- `../warp-server/.agents/skills/update-open-api-spec/SKILL.md` — server-side workflow for editing the canonical spec +- `../../../src/pages/api.astro` — how the docs site loads the YAML into Scalar diff --git a/.agents/skills/sync-openapi-spec/references/sync-policy.md b/.agents/skills/sync-openapi-spec/references/sync-policy.md new file mode 100644 index 00000000..51466651 --- /dev/null +++ b/.agents/skills/sync-openapi-spec/references/sync-policy.md @@ -0,0 +1,56 @@ +# Sync Policy + +This document records what `developers/agent-api-openapi.yaml` keeps from `warp-server/public_api/openapi.yaml`, and why. The exclusion lists live in `scripts/sync_openapi.py` as `EXCLUDED_TAGS` and `EXCLUDED_PATHS`. Update both this document and the script when the policy changes. + +## How filtering works + +`scripts/sync_openapi.py` applies these rules, top-down: + +1. Drop every tag listed in `EXCLUDED_TAGS`. +2. Drop every path whose tags are a subset of `EXCLUDED_TAGS`, plus every path listed explicitly in `EXCLUDED_PATHS`. +3. Keep every surviving path verbatim, including any `x-internal: true` markers on its operations. +4. Keep top-level `openapi`, `info`, `servers`, and `components.securitySchemes` verbatim. +5. Keep only the `components.schemas` entries that are reachable from the surviving paths via `$ref` walking (recursive over `allOf`/`oneOf`/`anyOf`/`items`/`additionalProperties`/etc.). + +## Excluded tags + +### `memory_stores` +Memory stores are gated as `x-internal: true` server-side. They are not part of the public Oz Agent API surface today and are excluded from the docs reference until they ship publicly. If/when this tag goes public, remove it from `EXCLUDED_TAGS` and update this section. + +### `harness-support` +The `/harness-support/*` endpoints form the worker-to-server contract used by Oz workers (transcripts, snapshots, finish-task signaling, etc.). They are not part of the public API contract — customers should not call them directly. Excluded permanently. + +## Excluded paths (within otherwise-public tags) + +These five `agent`-tag paths are excluded individually because the `agent` tag itself remains public: + +- `/agent/runs/{runId}/followups` — internal followup-prompt mechanism used by the harness; not for direct customer use. +- `/agent/runs/{runId}/handoff/attachments` — handoff plumbing tied to local-to-cloud session handoff. +- `/agent/handoff/upload-snapshot` — handoff plumbing (snapshot upload from a local worker). +- `/agent/conversations/{conversation_id}/fork` — conversation-forking primitive used by the harness, not stable public API. +- `/agent/conversations/{conversationId}/redirect` — internal redirect endpoint. + +If any of these become stable public surfaces, remove them from `EXCLUDED_PATHS` and update this list. + +## What we deliberately KEEP that you might expect to be hidden + +The script keeps `x-internal: true` operations under public paths. Today this means the `/agent/messages/*` and `/agent/events/*` operations are present in the docs file even though they're flagged `x-internal` in the source. This matches the pre-existing state of `developers/agent-api-openapi.yaml` and the way Scalar already renders the reference. If we want to start stripping `x-internal` operations from the docs spec, change the policy here and update `_should_keep_path`/the operation-level filter in `scripts/sync_openapi.py`. + +## Adding a new exclusion + +Use the script's `_unknown_classifications` warnings as the trigger. When the diff flags a new tag or path with `!`: +1. Read the corresponding handler in `warp-server/router/handlers/public_api/` to determine intent. +2. If the endpoint should be hidden: + - For an entire new tag, add the tag name to `EXCLUDED_TAGS` in `scripts/sync_openapi.py`. + - For a single path, add it to `EXCLUDED_PATHS`. +3. Add a short rationale to this document under "Excluded tags" or "Excluded paths." +4. Re-run `--mode diff` to confirm there are no remaining `!` warnings. +5. Then run `--mode apply` and proceed with the normal PR flow. + +## Removing an exclusion + +When an internal endpoint becomes a stable public surface: +1. Remove it from `EXCLUDED_TAGS` or `EXCLUDED_PATHS`. +2. Remove its bullet from this document. +3. Run `--mode apply`. The path and its referenced schemas will be added to the docs file automatically. +4. Open the PR with the standard sync flow. diff --git a/.agents/skills/sync-openapi-spec/scripts/sync_openapi.py b/.agents/skills/sync-openapi-spec/scripts/sync_openapi.py new file mode 100644 index 00000000..b27b0a38 --- /dev/null +++ b/.agents/skills/sync-openapi-spec/scripts/sync_openapi.py @@ -0,0 +1,507 @@ +#!/usr/bin/env python3 +"""Sync the public OpenAPI spec from warp-server into the docs repo. + +The canonical OpenAPI spec lives in `warp-server/public_api/openapi.yaml`. +The Scalar API reference at `docs.warp.dev/api` renders from +`docs/developers/agent-api-openapi.yaml`, which is a curated subset. + +This script generates the docs subset deterministically: + * tags listed in EXCLUDED_TAGS are removed (and their paths/schemas) + * paths listed in EXCLUDED_PATHS are removed + * surviving paths and operations are kept verbatim, including any + ``x-internal: true`` markers + * components/schemas is pruned to only schemas reachable from the + surviving paths via $ref walking + +Modes: + diff Print structural drift between source and target. Exits 1 + if drift is found. + apply Rewrite target with the regenerated docs subset. + self-test Runs a small in-memory test to validate $ref walking. + +Usage: + python3 sync_openapi.py --mode diff + python3 sync_openapi.py --mode apply + python3 sync_openapi.py --mode self-test +""" + +from __future__ import annotations + +import argparse +import sys +from pathlib import Path +from typing import Any + +import yaml + +# Tags whose paths and tag entry should be removed entirely. +# `memory_stores` is gated as `x-internal` server-side. +# `harness-support` is the worker-to-server contract — not a public API. +EXCLUDED_TAGS: frozenset[str] = frozenset({"memory_stores", "harness-support"}) + +# Specific paths under otherwise-public tags that should be hidden from +# the public API reference. Keep in sync with references/sync-policy.md. +EXCLUDED_PATHS: frozenset[str] = frozenset( + { + "/agent/runs/{runId}/followups", + "/agent/runs/{runId}/handoff/attachments", + "/agent/handoff/upload-snapshot", + "/agent/conversations/{conversation_id}/fork", + "/agent/conversations/{conversationId}/redirect", + } +) + +# Default checkout layout: docs/ and warp-server/ as siblings. +DEFAULT_SOURCE = Path("../warp-server/public_api/openapi.yaml") +DEFAULT_TARGET = Path("developers/agent-api-openapi.yaml") + + +# --------------------------------------------------------------------------- +# YAML helpers +# --------------------------------------------------------------------------- + + +class _PreserveStringDumper(yaml.SafeDumper): + """SafeDumper that emits multiline strings as block literals (|). + + Without this, descriptions that contain newlines round-trip through + PyYAML as ugly quoted strings with explicit ``\\n`` escapes, which is + both unreadable and a noisy diff against the hand-edited source. + """ + + +def _str_representer(dumper: yaml.SafeDumper, data: str) -> Any: + if "\n" in data: + return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="|") + return dumper.represent_scalar("tag:yaml.org,2002:str", data) + + +_PreserveStringDumper.add_representer(str, _str_representer) + + +def _load_yaml(path: Path) -> dict[str, Any]: + with path.open("r", encoding="utf-8") as fh: + data = yaml.safe_load(fh) + if not isinstance(data, dict): + raise SystemExit(f"{path}: expected a YAML mapping at the document root") + return data + + +def _dump_yaml(data: dict[str, Any], path: Path) -> None: + with path.open("w", encoding="utf-8") as fh: + yaml.dump( + data, + fh, + Dumper=_PreserveStringDumper, + sort_keys=False, + default_flow_style=False, + allow_unicode=True, + width=10**6, # avoid line wrapping + ) + + +# --------------------------------------------------------------------------- +# Filtering & schema-pruning +# --------------------------------------------------------------------------- + + +def _operation_tags(operation: dict[str, Any]) -> list[str]: + tags = operation.get("tags") or [] + return [t for t in tags if isinstance(t, str)] + + +def _path_tags(path_item: dict[str, Any]) -> set[str]: + """Union of tags across all HTTP operations on a path item.""" + tags: set[str] = set() + for key, op in path_item.items(): + # OpenAPI methods + a few path-level fields. We only care about + # method entries (objects with `operationId`/`tags`). + if isinstance(op, dict) and ("tags" in op or "operationId" in op): + tags.update(_operation_tags(op)) + return tags + + +def _should_keep_path(path: str, path_item: dict[str, Any]) -> bool: + if path in EXCLUDED_PATHS: + return False + tags = _path_tags(path_item) + if tags and tags.issubset(EXCLUDED_TAGS): + return False + return True + + +def _collect_refs(node: Any, refs: set[str]) -> None: + """Recursively collect every component schema name referenced from ``node``. + + Walks dicts and lists, picking up any string under a ``$ref`` key that + points into ``#/components/schemas/``. Captures refs nested anywhere + (allOf/oneOf/anyOf, items, additionalProperties, etc.). + """ + if isinstance(node, dict): + for k, v in node.items(): + if ( + k == "$ref" + and isinstance(v, str) + and v.startswith("#/components/schemas/") + ): + refs.add(v[len("#/components/schemas/") :]) + else: + _collect_refs(v, refs) + elif isinstance(node, list): + for item in node: + _collect_refs(item, refs) + + +def _transitive_schemas( + seed_refs: set[str], schemas: dict[str, Any] +) -> set[str]: + """Closure of ``seed_refs`` under transitive $ref edges in ``schemas``.""" + reachable: set[str] = set() + pending = list(seed_refs) + while pending: + name = pending.pop() + if name in reachable: + continue + if name not in schemas: + # Dangling ref — skip silently. The diff will surface it via + # the resulting schema set comparison. + reachable.add(name) + continue + reachable.add(name) + new_refs: set[str] = set() + _collect_refs(schemas[name], new_refs) + for ref in new_refs: + if ref not in reachable: + pending.append(ref) + return reachable + + +def transform(source: dict[str, Any]) -> dict[str, Any]: + """Produce the docs subset of the given source spec.""" + out: dict[str, Any] = {} + + for top_key in ("openapi", "info", "servers"): + if top_key in source: + out[top_key] = source[top_key] + + src_tags = source.get("tags") or [] + out_tags = [ + t + for t in src_tags + if isinstance(t, dict) and t.get("name") not in EXCLUDED_TAGS + ] + if out_tags: + out["tags"] = out_tags + + src_paths = source.get("paths") or {} + kept_paths = { + path: item + for path, item in src_paths.items() + if isinstance(item, dict) and _should_keep_path(path, item) + } + out["paths"] = kept_paths + + seed_refs: set[str] = set() + _collect_refs(kept_paths, seed_refs) + + src_components = source.get("components") or {} + src_schemas = src_components.get("schemas") or {} + reachable = _transitive_schemas(seed_refs, src_schemas) + + out_components: dict[str, Any] = {} + for ck, cv in src_components.items(): + if ck == "schemas": + out_components["schemas"] = { + name: src_schemas[name] + for name in src_schemas + if name in reachable + } + else: + out_components[ck] = cv + if out_components: + out["components"] = out_components + + return out + + +# --------------------------------------------------------------------------- +# Diff reporting +# --------------------------------------------------------------------------- + + +def _summarize_drift( + expected: dict[str, Any], actual: dict[str, Any] +) -> list[str]: + """Return human-readable lines describing how ``actual`` drifts from ``expected``. + + ``expected`` is what the target *should* look like (i.e., the result of + transforming the source). ``actual`` is the file currently on disk. + """ + notes: list[str] = [] + + exp_paths = set((expected.get("paths") or {}).keys()) + act_paths = set((actual.get("paths") or {}).keys()) + + missing_paths = sorted(exp_paths - act_paths) + extra_paths = sorted(act_paths - exp_paths) + + if missing_paths: + notes.append("Paths present in source but missing from target:") + notes.extend(f" + {p}" for p in missing_paths) + if extra_paths: + notes.append("Paths present in target but absent from source subset:") + notes.extend(f" - {p}" for p in extra_paths) + + common_paths = exp_paths & act_paths + changed_paths = sorted(p for p in common_paths if expected["paths"][p] != actual["paths"][p]) + if changed_paths: + notes.append("Paths whose operations differ between source and target:") + notes.extend(f" ~ {p}" for p in changed_paths) + + exp_schemas = set(((expected.get("components") or {}).get("schemas") or {}).keys()) + act_schemas = set(((actual.get("components") or {}).get("schemas") or {}).keys()) + + missing_schemas = sorted(exp_schemas - act_schemas) + extra_schemas = sorted(act_schemas - exp_schemas) + + if missing_schemas: + notes.append("Schemas present in source subset but missing from target:") + notes.extend(f" + {s}" for s in missing_schemas) + if extra_schemas: + notes.append("Schemas present in target but absent from source subset:") + notes.extend(f" - {s}" for s in extra_schemas) + + common_schemas = exp_schemas & act_schemas + schema_changes = sorted( + s + for s in common_schemas + if expected["components"]["schemas"][s] != actual["components"]["schemas"][s] + ) + if schema_changes: + notes.append("Schemas whose definitions differ between source subset and target:") + notes.extend(f" ~ {s}" for s in schema_changes) + + for top_key in ("openapi", "info", "servers"): + if expected.get(top_key) != actual.get(top_key): + notes.append(f"Top-level `{top_key}` differs between source and target.") + + return notes + + +def _unknown_classifications(source: dict[str, Any]) -> list[str]: + """Flag tags or paths the policy doesn't already cover. + + The skill's policy currently knows about the `agent` and `schedules` + tags (kept) and `memory_stores`/`harness-support` (dropped). Anything + else needs human triage. + """ + KNOWN_TAGS = {"agent", "schedules"} | set(EXCLUDED_TAGS) + + notes: list[str] = [] + for tag in source.get("tags") or []: + name = tag.get("name") if isinstance(tag, dict) else None + if name and name not in KNOWN_TAGS: + notes.append( + f"Unknown tag `{name}` — extend EXCLUDED_TAGS or document it as public." + ) + + for path, item in (source.get("paths") or {}).items(): + if not isinstance(item, dict): + continue + tags = _path_tags(item) + unknown = tags - KNOWN_TAGS + if unknown: + notes.append( + f"Path `{path}` has unknown tag(s) {sorted(unknown)} — triage before next sync." + ) + return notes + + +# --------------------------------------------------------------------------- +# Self-test +# --------------------------------------------------------------------------- + + +def _self_test() -> int: + """Sanity-check the transform on a small synthetic spec.""" + sample = { + "openapi": "3.0.0", + "info": {"title": "t", "version": "1"}, + "tags": [ + {"name": "agent"}, + {"name": "memory_stores", "x-internal": True}, + {"name": "harness-support"}, + ], + "paths": { + "/agent/run": { + "post": { + "tags": ["agent"], + "operationId": "runAgent", + "requestBody": { + "content": { + "application/json": { + "schema": {"$ref": "#/components/schemas/RunReq"} + } + } + }, + "responses": { + "200": { + "description": "ok", + "content": { + "application/json": { + "schema": {"$ref": "#/components/schemas/RunResp"} + } + }, + } + }, + } + }, + "/memory_stores": { + "post": { + "tags": ["memory_stores"], + "operationId": "createMS", + "x-internal": True, + "responses": {"201": {"description": "ok"}}, + } + }, + "/harness-support/transcript": { + "get": { + "tags": ["harness-support"], + "operationId": "transcript", + "responses": {"200": {"description": "ok"}}, + } + }, + "/agent/runs/{runId}/followups": { + "post": { + "tags": ["agent"], + "operationId": "followups", + "responses": {"200": {"description": "ok"}}, + } + }, + }, + "components": { + "securitySchemes": {"bearerAuth": {"type": "http", "scheme": "bearer"}}, + "schemas": { + "RunReq": { + "type": "object", + "properties": { + "config": {"$ref": "#/components/schemas/Config"} + }, + }, + "Config": { + "type": "object", + "properties": { + "modes": { + "type": "array", + "items": {"$ref": "#/components/schemas/Mode"}, + }, + "merged": { + "allOf": [ + {"$ref": "#/components/schemas/Mode"}, + {"type": "object"}, + ] + }, + }, + }, + "Mode": {"type": "string"}, + "RunResp": {"type": "object"}, + "MSItem": {"type": "object"}, # only referenced by dropped path + "Followup": {"type": "object"}, + }, + }, + } + + out = transform(sample) + paths = set(out["paths"].keys()) + assert paths == {"/agent/run"}, f"unexpected paths: {paths}" + + schemas = set(out["components"]["schemas"].keys()) + # Config and Mode are reachable transitively (allOf, items) + assert schemas == {"RunReq", "Config", "Mode", "RunResp"}, f"unexpected schemas: {schemas}" + + tag_names = [t["name"] for t in out.get("tags") or []] + assert tag_names == ["agent"], f"unexpected tags: {tag_names}" + + assert out["components"].get("securitySchemes"), "securitySchemes should be preserved" + + print("self-test: OK") + return 0 + + +# --------------------------------------------------------------------------- +# CLI +# --------------------------------------------------------------------------- + + +def _resolve(path: Path) -> Path: + return path if path.is_absolute() else (Path.cwd() / path).resolve() + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser(description=__doc__.split("\n", 1)[0]) + parser.add_argument( + "--mode", + choices=("diff", "apply", "self-test"), + required=True, + help="diff: print drift; apply: write target; self-test: in-memory test.", + ) + parser.add_argument( + "--source", + type=Path, + default=DEFAULT_SOURCE, + help="Path to warp-server's public_api/openapi.yaml.", + ) + parser.add_argument( + "--target", + type=Path, + default=DEFAULT_TARGET, + help="Path to docs' developers/agent-api-openapi.yaml.", + ) + args = parser.parse_args(argv) + + if args.mode == "self-test": + return _self_test() + + source_path = _resolve(args.source) + target_path = _resolve(args.target) + + if not source_path.exists(): + print(f"error: source spec not found at {source_path}", file=sys.stderr) + return 2 + + source = _load_yaml(source_path) + expected = transform(source) + + unknown = _unknown_classifications(source) + + if args.mode == "diff": + if not target_path.exists(): + print(f"error: target spec not found at {target_path}", file=sys.stderr) + return 2 + actual = _load_yaml(target_path) + notes = _summarize_drift(expected, actual) + if unknown: + notes.append("") + notes.append("Unclassified tags/paths (require human triage):") + notes.extend(f" ! {n}" for n in unknown) + if not notes: + print("In sync. No changes needed.") + return 0 + print(f"Drift detected between\n source: {source_path}\n target: {target_path}\n") + print("\n".join(notes)) + return 1 + + # apply + target_path.parent.mkdir(parents=True, exist_ok=True) + _dump_yaml(expected, target_path) + print(f"Wrote {target_path}") + if unknown: + print("\nWarning: unclassified items the script auto-included or auto-dropped:") + for n in unknown: + print(f" ! {n}") + print("Triage these before merging the PR.") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) From 38dfa175b7912b69e70689d28add768f32410d88 Mon Sep 17 00:00:00 2001 From: hongyi-chen Date: Sat, 9 May 2026 00:03:16 +0000 Subject: [PATCH 2/2] Validate $refs in apply mode and clarify Step 5 Audit-driven follow-up: - Add _validate_output() that walks the regenerated spec and fails if any $ref points at a missing component. Apply mode now exits with code 3 and refuses to overwrite the target on validation failure. - Self-test asserts no unresolved refs after transform. - Fix SKILL.md Step 5 wording: npm run build does not catch dangling $refs (Astro just YAML-parses the file). The new validator covers that case; npm run build remains a belt-and-braces integration check. - Update troubleshooting entry to match the new behavior. Co-Authored-By: Oz --- .agents/skills/sync-openapi-spec/SKILL.md | 21 +++--- .../sync-openapi-spec/scripts/sync_openapi.py | 64 ++++++++++++++++++- 2 files changed, 74 insertions(+), 11 deletions(-) diff --git a/.agents/skills/sync-openapi-spec/SKILL.md b/.agents/skills/sync-openapi-spec/SKILL.md index ac7d1fc1..91f2de43 100644 --- a/.agents/skills/sync-openapi-spec/SKILL.md +++ b/.agents/skills/sync-openapi-spec/SKILL.md @@ -74,24 +74,23 @@ python3 .agents/skills/sync-openapi-spec/scripts/sync_openapi.py \ --target developers/agent-api-openapi.yaml ``` -This rewrites `developers/agent-api-openapi.yaml` with the regenerated subset. +This rewrites `developers/agent-api-openapi.yaml` with the regenerated subset. Apply mode validates every `$ref` in the output before writing the file: if any reference is unresolved, the script exits with code 3 and refuses to write. On success it prints `All $refs resolve in the regenerated spec.` ### Step 5: Validate the regenerated spec -```bash -# YAML parses -python3 -c "import yaml; yaml.safe_load(open('developers/agent-api-openapi.yaml'))" +Apply mode already catches unresolved `$ref`s (see Step 4). Run these as belt-and-braces integration checks: -# Astro + Scalar boot succeed (catches dangling $refs, malformed paths) +```bash +# Astro picks up the new YAML and parses it through Scalar's runtime. npm run build ``` -Optional, recommended when many schemas changed: +Optional, recommended when many schemas changed (full OpenAPI lint): ```bash npx @redocly/cli lint developers/agent-api-openapi.yaml ``` -If `npm run build` fails, inspect the build error, fix the underlying spec issue (most often a `$ref` to a schema that the script pruned because nothing public references it), and re-run. +If `npm run build` fails, the most common cause is a malformed path or missing `description` field. Schema-ref breakage is already prevented by Step 4's validator. ### Step 6: Commit and open a PR @@ -136,8 +135,12 @@ Install pyyaml: `pip install pyyaml`. On Debian-based images with externally man ### `error: source spec not found at ...` The `warp-server` repo isn't where the script expected. Pass `--source /absolute/path/to/warp-server/public_api/openapi.yaml`. -### `npm run build` fails after `--mode apply` with a missing-schema error -The script's `$ref` walker missed a transitive reference. Inspect the failing `$ref`, confirm the schema exists in the source spec, and check whether the path holding the reference was supposed to be kept. If the path is genuinely public, the schema should follow automatically — file a bug against the script (the walker should be transitive over `allOf`/`oneOf`/`anyOf`/`items`/`additionalProperties`). +### `--mode apply` exits with code 3 and "unresolved $refs" +Apply mode refuses to write the target if any `$ref` in the regenerated spec doesn't resolve to a defined component. The script's recursive `$ref` walker is supposed to keep transitive references (`allOf`/`oneOf`/`anyOf`/`items`/`additionalProperties`/etc.) reachable, so this means either: +- The source spec itself has a dangling reference (fix it in `warp-server`), or +- The walker is missing a reference shape (file a bug against the script). + +The error output lists the offending JSON pointer paths so you can locate the reference quickly. Apply will not overwrite `developers/agent-api-openapi.yaml` while this fails. ### Diff shows changes that aren't in the source spec Make sure `../warp-server` is on the branch you intended to compare against (usually `develop`). Run `cd ../warp-server && git status -sb && git --no-pager log -1` to confirm. diff --git a/.agents/skills/sync-openapi-spec/scripts/sync_openapi.py b/.agents/skills/sync-openapi-spec/scripts/sync_openapi.py index b27b0a38..f6981e97 100644 --- a/.agents/skills/sync-openapi-spec/scripts/sync_openapi.py +++ b/.agents/skills/sync-openapi-spec/scripts/sync_openapi.py @@ -12,12 +12,16 @@ ``x-internal: true`` markers * components/schemas is pruned to only schemas reachable from the surviving paths via $ref walking + * the regenerated spec is validated for unresolved $refs before + being written; apply will refuse to write a broken spec Modes: diff Print structural drift between source and target. Exits 1 if drift is found. - apply Rewrite target with the regenerated docs subset. - self-test Runs a small in-memory test to validate $ref walking. + apply Rewrite target with the regenerated docs subset. Exits 3 + if any $ref in the output is unresolved. + self-test Runs a small in-memory test to validate $ref walking and + output ref resolution. Usage: python3 sync_openapi.py --mode diff @@ -176,6 +180,48 @@ def _transitive_schemas( return reachable +def _validate_output(out: dict[str, Any]) -> list[str]: + """Return human-readable errors for any unresolved refs in ``out``. + + Walks the entire output tree and verifies that every ``$ref`` string + points at something that actually exists in the output's components + section. This catches cases where pruning or filtering leaves a + dangling reference behind — a class of bug that would otherwise slip + past `npm run build` (Astro just YAML-parses the file) and only + surface as an empty schema box at runtime in Scalar. + """ + components = out.get("components") or {} + available: dict[str, set[str]] = {} + for ck, cv in components.items(): + if isinstance(cv, dict): + available[ck] = set(cv.keys()) + + errors: list[str] = [] + + def visit(node: Any, path: str) -> None: + if isinstance(node, dict): + for k, v in node.items(): + if k == "$ref" and isinstance(v, str): + if v.startswith("#/components/"): + parts = v[len("#/components/") :].split("/", 1) + if len(parts) != 2: + errors.append(f"{path}: malformed $ref `{v}`") + continue + section, name = parts + if name not in available.get(section, set()): + errors.append( + f"{path}: $ref `{v}` is not defined in components.{section}" + ) + else: + visit(v, f"{path}.{k}") + elif isinstance(node, list): + for i, item in enumerate(node): + visit(item, f"{path}[{i}]") + + visit(out, "") + return errors + + def transform(source: dict[str, Any]) -> dict[str, Any]: """Produce the docs subset of the given source spec.""" out: dict[str, Any] = {} @@ -424,6 +470,9 @@ def _self_test() -> int: assert out["components"].get("securitySchemes"), "securitySchemes should be preserved" + ref_errors = _validate_output(out) + assert not ref_errors, f"unexpected unresolved refs: {ref_errors}" + print("self-test: OK") return 0 @@ -492,9 +541,20 @@ def main(argv: list[str] | None = None) -> int: return 1 # apply + ref_errors = _validate_output(expected) + if ref_errors: + print( + "error: regenerated spec has unresolved $refs. Refusing to write target.", + file=sys.stderr, + ) + for err in ref_errors: + print(f" {err}", file=sys.stderr) + return 3 + target_path.parent.mkdir(parents=True, exist_ok=True) _dump_yaml(expected, target_path) print(f"Wrote {target_path}") + print("All $refs resolve in the regenerated spec.") if unknown: print("\nWarning: unclassified items the script auto-included or auto-dropped:") for n in unknown: