Skip to content

Commit 35322a0

Browse files
Merge pull request #10 from coder/cat/scanner-llm-mode
feat(scanner): enable SkillSpector LLM semantic pass (Anthropic Claude)
2 parents 52576ca + 2848e3c commit 35322a0

7 files changed

Lines changed: 57 additions & 166 deletions

File tree

.github/workflows/scan.yaml

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,22 +88,43 @@ jobs:
8888
echo "drift=true" >> "$GITHUB_OUTPUT"
8989
echo "Skill path source/${{ matrix.skill_path }} not present upstream; will report catalogue drift." >&2
9090
fi
91+
- name: Determine LLM mode
92+
id: llm
93+
env:
94+
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
95+
run: |
96+
set -euo pipefail
97+
if [[ -n "${ANTHROPIC_API_KEY:-}" ]]; then
98+
echo "extra_flags=" >> "$GITHUB_OUTPUT"
99+
echo "SkillSpector LLM mode: enabled (anthropic provider, api.anthropic.com)." >&2
100+
else
101+
echo "extra_flags=--no-llm" >> "$GITHUB_OUTPUT"
102+
echo "::warning::ANTHROPIC_API_KEY secret not set; SkillSpector will run with --no-llm. Set the secret on this repo to enable the LLM semantic pass."
103+
fi
91104
- name: SkillSpector (JSON)
92105
if: steps.path_check.outputs.drift == 'false'
93106
continue-on-error: true
107+
env:
108+
SKILLSPECTOR_PROVIDER: anthropic
109+
SKILLSPECTOR_MODEL: claude-sonnet-4-6
110+
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
94111
run: |
95112
mkdir -p out
96113
skillspector scan "source/${{ matrix.skill_path }}" \
97-
--no-llm \
114+
${{ steps.llm.outputs.extra_flags }} \
98115
--format json \
99116
--output "out/skillspector.json" || true
100117
- name: SkillSpector (SARIF)
101118
if: steps.path_check.outputs.drift == 'false'
102119
continue-on-error: true
120+
env:
121+
SKILLSPECTOR_PROVIDER: anthropic
122+
SKILLSPECTOR_MODEL: claude-sonnet-4-6
123+
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
103124
run: |
104125
mkdir -p out
105126
skillspector scan "source/${{ matrix.skill_path }}" \
106-
--no-llm \
127+
${{ steps.llm.outputs.extra_flags }} \
107128
--format sarif \
108129
--output "out/skillspector.sarif" || true
109130
- name: Combine

README.md

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,10 @@ Every 6 hours, the scheduled workflow in this repo:
88
1. Enumerates every skill in `coder/registry` (both the in-tree
99
`.agents/skills/` format and the future external-sources format).
1010
2. Shallow-clones each source repo.
11-
3. Runs [NVIDIA SkillSpector](https://github.com/NVIDIA/SkillSpector) in
12-
`--no-llm` static mode over the upstream content.
11+
3. Runs [NVIDIA SkillSpector](https://github.com/NVIDIA/SkillSpector) over
12+
the upstream content. The scheduled scan runs SkillSpector's LLM
13+
semantic pass when the workflow's LLM credential secret is
14+
configured, and falls back to `--no-llm` static-only mode otherwise.
1315
4. Builds a per-skill verdict (`clean`, `suspicious`, `malicious`,
1416
`unknown`) from `risk_score` plus the thresholds in `config.yaml`.
1517
5. Builds the React SPA in `site/` and ships it together with
@@ -97,7 +99,13 @@ This scanner is data-driven. To run it against a different registry:
9799
"GitHub Actions").
98100
4. Set Actions workflow permissions to "Read and write" so the
99101
publish-release job can create releases.
100-
5. Enable Actions.
102+
5. To enable the LLM semantic pass, set the credential secret matching
103+
`config.yaml`'s `scanners.skillspector.llm.provider` on your fork
104+
(for the default `anthropic` provider, `ANTHROPIC_API_KEY`), AND
105+
confirm `.github/workflows/scan.yaml` exports that secret into the
106+
SkillSpector step. Static-only mode (without the secret) is the
107+
default and works out of the box.
108+
6. Enable Actions.
101109

102110
No source changes required for catalogue changes.
103111

@@ -112,10 +120,7 @@ verdict:
112120
```
113121
114122
SkillSpector's `risk_score` (0-100) is the only input. The thresholds
115-
are aligned to SkillSpector's own `HIGH` and `CRITICAL` bands;
116-
[`docs/CALIBRATION.md`](./docs/CALIBRATION.md) walks through the
117-
evidence (SkillSpector source, the ClawHub paper, our in-tree
118-
catalogue) behind the chosen numbers.
123+
are aligned to SkillSpector's own `HIGH` and `CRITICAL` bands.
119124

120125
The architecture keeps room for additional scanners (gitleaks, Semgrep,
121126
VirusTotal Premium, etc.); adding one is a new module under `scanner/`,

config.yaml

Lines changed: 19 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,13 @@
66
config_version: 1
77

88
catalogue:
9-
# Where to enumerate skills from. Both the current production format
10-
# (in-tree under .agents/skills/) and the future external-sources
11-
# format (registry/<ns>/skills/README.md with sources[].repo) are
12-
# supported. When both name the same slug, the external-sources entry
13-
# wins.
9+
# Skills are declared by per-namespace README.md files under
10+
# registry/<ns>/skills/ in the catalogue repo. Each README's
11+
# frontmatter lists sources[].repo plus per-skill overrides. This is
12+
# the canonical declaration; the in-tree .agents/skills/ format is
13+
# supported in scanner/enumerate.py for forks that need it but is
14+
# not enabled here because coder/registry duplicates the same
15+
# upstream skills across both layouts under different slugs.
1416
registry_repo:
1517
owner: coder
1618
repo: registry
@@ -22,13 +24,6 @@ catalogue:
2224
# has its frontmatter parsed for sources[].repo plus per-skill
2325
# overrides keyed by slug.
2426
readme_glob: registry/*/skills/README.md
25-
in_tree:
26-
enabled: true
27-
# The namespace is fixed for in-tree skills today (coder).
28-
namespace: coder
29-
# Path glob inside the catalogue repo. Each <slug>/SKILL.md is one
30-
# skill row in the matrix.
31-
base_path: .agents/skills
3227

3328
scanners:
3429
skillspector:
@@ -39,8 +34,12 @@ scanners:
3934
# so a bumper bot lives outside the loop until the upstream
4035
# publishes to PyPI and the pin can move into pyproject.toml.
4136
pin: "skillspector @ git+https://github.com/NVIDIA/SkillSpector.git@2eb844780ab163f01468ecf142c40a2ec0fcaec0"
42-
flags:
43-
- "--no-llm"
37+
# Empty so .github/workflows/scan.yaml can append --no-llm
38+
# dynamically based on whether the LLM credential secret is set.
39+
flags: []
40+
llm:
41+
provider: anthropic
42+
model: "claude-sonnet-4-6"
4443

4544
# Per-skill verdict policy. v1 has one input (SkillSpector risk_score).
4645
# When more scanners join the pipeline we add new threshold fields here
@@ -54,13 +53,12 @@ scanners:
5453
# 51-80 HIGH DO_NOT_INSTALL -> verdict: suspicious
5554
# 81-100 CRITICAL DO_NOT_INSTALL -> verdict: malicious
5655
#
57-
# Rationale and source links live in docs/CALIBRATION.md. Short version:
58-
# SkillSpector's static-analysis layer is loud on real catalogues (the
59-
# ClawHub paper measured a ~49% positive rate on 67k skills) and is
60-
# advisory rather than authoritative, so we only escalate above its
61-
# HIGH cutoff. CAUTION-band findings still appear in the per-skill page
62-
# so reviewers can see them; we just do not flag the skill as suspicious
63-
# at the catalogue level.
56+
# Rationale: SkillSpector's static-analysis layer is loud on real
57+
# catalogues (the ClawHub paper measured a ~49% positive rate on 67k
58+
# skills) and is advisory rather than authoritative, so we only
59+
# escalate above its HIGH cutoff. CAUTION-band findings still appear
60+
# on the per-skill page so reviewers can see them; we just do not
61+
# flag the skill as suspicious at the catalogue level.
6462
verdict:
6563
malicious_risk_score: 81
6664
suspicious_risk_score: 51

docs/CALIBRATION.md

Lines changed: 0 additions & 132 deletions
This file was deleted.

scanner/verdict.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def evaluate(
4848

4949
thresholds = config.get("verdict") or {}
5050
# Defaults match config.yaml. Keep these in sync with
51-
# docs/CALIBRATION.md and VerdictExplanation.tsx's defaults.
51+
# VerdictExplanation.tsx's defaults.
5252
malicious_at = int(thresholds.get("malicious_risk_score", 81))
5353
suspicious_at = int(thresholds.get("suspicious_risk_score", 51))
5454

site/src/components/RiskBar/RiskBar.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ interface RiskBarProps {
1919
* Optional cutoffs (0..100) for the suspicious and malicious bands.
2020
* When supplied, the bar renders thin tick marks at those positions so
2121
* the user can see how close a score is to escalating. Defaults match
22-
* the policy in config.yaml and docs/CALIBRATION.md.
22+
* the policy in config.yaml.
2323
*/
2424
suspicious_at?: number;
2525
malicious_at?: number;

site/src/components/VerdictExplanation/VerdictExplanation.tsx

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -374,8 +374,7 @@ const CategoryCard: FC<CategoryCardProps> = ({ group }) => {
374374
export const VerdictExplanation: FC<VerdictExplanationProps> = ({
375375
skill,
376376
// Defaults match config.yaml and scanner/verdict.py. They are also
377-
// SkillSpector's own HIGH and CRITICAL band edges; see
378-
// docs/CALIBRATION.md for the calibration write-up.
377+
// SkillSpector's own HIGH and CRITICAL band edges.
379378
malicious_at = 81,
380379
suspicious_at = 51,
381380
className,

0 commit comments

Comments
 (0)