Trim comments to the load-bearing lines

maxisbey · maxisbey · commit 4eeca68a6cc8 · 2026-07-01T14:30:22.000Z
diff --git a/.github/actions/conformance/run-client.sh b/.github/actions/conformance/run-client.sh
@@ -1,22 +1,14 @@
 #!/bin/bash
 # Run a client conformance suite, re-verifying unexpected failures solo.
-#
-# Suite mode launches every scenario's client subprocess concurrently; on a
-# 2-vCPU runner that contention can push scenarios with real-time waits (the
-# SSE reconnect timing in sse-retry) past their tolerances. So a scenario the
-# suite run flags as an unexpected failure is re-run alone on the then-quiet
-# runner: a real failure fails again and the job stays red; a contention
-# artifact passes and the job goes green, with a FLAKE_RESCUED marker written
-# into the --output-dir so the artifact upload preserves the evidence.
-# Failures that only reproduce under concurrency are deliberately traded
-# away - the suite asserts spec compliance, not behavior under parallel load.
+# Concurrent suite runs on a 2-vCPU runner can push scenarios with real-time
+# waits past tolerance; solo, a real failure fails again while a contention
+# artifact passes. Failures that only reproduce under concurrency are excused.
 set -uo pipefail
 
 : "${CONFORMANCE_PKG:?set CONFORMANCE_PKG (pinned in .github/workflows/conformance.yml)}"
 SOLO_ATTEMPTS="${CONFORMANCE_SOLO_ATTEMPTS:-2}"
 
-# Relative paths in the arguments (the client command, --output-dir) resolve
-# from the repo root, same contract as run-server.sh.
+# Relative args resolve from the repo root; same contract as run-server.sh.
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 cd "$SCRIPT_DIR/../../.." || exit 1
 
@@ -31,12 +23,8 @@ fi
 
 plain="$(sed 's/\x1b\[[0-9;]*m//g' "$log")"
 
-# Scenarios listed under "Unexpected failures (not in baseline):". Anything
-# else behind the nonzero exit (stale baseline entries, harness or infra
-# errors) is not retried. The extraction is coupled to the pinned harness's
-# summary wording and print order; if a pin bump changes either, the list
-# comes up empty and the original failure passes through - never a false
-# green.
+# If the harness's summary wording changes, the list comes up empty and the
+# original exit code passes through - never a false green.
 mapfile -t scenarios < <(
     printf '%s\n' "$plain" |
         sed -n '/^Unexpected failures (not in baseline):$/,/^$/p' |
@@ -58,10 +46,8 @@ if printf '%s\n' "$plain" | grep -q '^Stale baseline entries'; then
     exit "$rc"
 fi
 
-# Reuse the suite invocation's arguments for the solo runs, minus the flags
-# that only make sense for a suite (--scenario replaces --suite; single runs
-# are judged directly, not against the baseline). Solo results are saved next
-# to the suite's so the uploaded artifact carries both.
+# Drop the suite-only flags: --scenario replaces --suite, and solo runs are
+# judged directly rather than against the baseline.
 rerun_args=()
 output_dir=""
 skip_next=0
diff --git a/.github/workflows/conformance.yml b/.github/workflows/conformance.yml
@@ -96,8 +96,7 @@ jobs:
           --expected-failures ./.github/actions/conformance/expected-failures.yml
           --output-dir conformance-results/server-all
       - name: Upload conformance results
-        # The suite summary only prints counts for warning-level findings; the
-        # per-check measurements live in the checks.json files saved above.
+        # The log has only summary counts; per-check data is in checks.json.
         if: failure()
         uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
         with:
@@ -131,22 +130,17 @@ jobs:
               echo "CONFORMANCE_PKG=file:/tmp/conformance.tgz" >> "$GITHUB_ENV"
               ;;
           esac
-      # --compile-bytecode: the harness spawns every scenario's client
-      # concurrently; without pre-compiled site-packages, ~40 fresh
-      # interpreters race to byte-compile the same modules on a 2-core
-      # runner, saturating it for the first ~20s of the suite — exactly when
-      # timing-sensitive scenarios take their measurements.
+      # --compile-bytecode: without it, ~40 concurrently spawned interpreters
+      # race to byte-compile site-packages during the timing-sensitive window.
       - run: uv sync --frozen --all-extras --package mcp --compile-bytecode
       - name: Pre-compile bytecode (editable sources)
         run: uv run --frozen python -m compileall -q src .github/actions/conformance
       - name: Run client conformance (all suite)
         # The harness runs all scenarios via unbounded Promise.all; with 40
         # scenarios on a 2-core runner the slowest one (sse-retry, which has a
         # real-time SSE reconnect wait) needs more than the 30s default budget.
-        # The client command execs the synced venv's interpreter directly:
-        # `uv run` would re-check the lockfile in every one of the ~40
-        # concurrent spawns, compounding the startup storm. run-client.sh
-        # re-verifies unexpected failures solo before failing the job.
+        # `.venv/bin/python` (not `uv run`) avoids lockfile re-checks in ~40
+        # concurrent spawns; run-client.sh re-runs unexpected failures solo.
         run: >-
           ./.github/actions/conformance/run-client.sh
           --command '.venv/bin/python .github/actions/conformance/client.py'
@@ -164,10 +158,8 @@ jobs:
           --expected-failures ./.github/actions/conformance/expected-failures.2026-07-28.yml
           --output-dir conformance-results/client-2026-07-28
       - name: Upload conformance results
-        # The suite summary only prints counts for warning-level findings; the
-        # per-check measurements live in the checks.json files saved above.
-        # Also upload when run-client.sh rescued a flake (job green, but the
-        # contention evidence should not be discarded).
+        # The log has only summary counts; per-check data is in checks.json.
+        # Also on FLAKE_RESCUED: rescued-flake evidence is otherwise discarded.
         if: failure() || hashFiles('conformance-results/**/FLAKE_RESCUED') != ''
         uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
         with:
diff --git a/examples/servers/everything-server/mcp_everything_server/server.py b/examples/servers/everything-server/mcp_everything_server/server.py
@@ -191,10 +191,8 @@ async def test_tool_with_progress(ctx: Context) -> str:
 async def test_sampling(prompt: str, ctx: Context) -> str:
     """Tests server-initiated sampling (LLM completion request)"""
     try:
-        # Request sampling from client. related_request_id routes the request onto
-        # the originating tools/call SSE stream, which exists for the whole handler;
-        # without it the request targets the standalone GET stream and is dropped if
-        # the client has not finished opening that stream yet.
+        # Request sampling from client. Without related_request_id the request goes
+        # to the standalone GET stream and is silently dropped if it is not open yet.
         result = await ctx.session.create_message(  # pyright: ignore[reportDeprecated]
             messages=[SamplingMessage(role="user", content=TextContent(type="text", text=prompt))],
             max_tokens=100,
diff --git a/log.txt b/log.txt
@@ -0,0 +1,12 @@
+Total: 1 passed, 2 failed, 0 warnings
+
+[33mExpected failures (in baseline):[0m
+  ~ foo
+
+[31mStale baseline entries (now passing - remove from baseline):[0m
+  ✓ bar
+
+[31mUnexpected failures (not in baseline):[0m
+  ✗ sse-retry
+
+[31mBaseline is stale: update your expected-failures file to remove passing scenarios.[0m

-Original file line number
+Diff line change
@@ @@ -0,0 +1,12 @@ @@
 +Total: 1 passed, 2 failed, 0 warnings
++
 +[33mExpected failures (in baseline):[0m
 +  ~ foo
++
 +[31mStale baseline entries (now passing - remove from baseline):[0m
 +  ✓ bar
++
 +[31mUnexpected failures (not in baseline):[0m
 +  ✗ sse-retry
++
 +[31mBaseline is stale: update your expected-failures file to remove passing scenarios.[0m