diff --git a/docs/superpowers/contracts/vendor-agent-skills-import.json b/docs/superpowers/contracts/vendor-agent-skills-import.json new file mode 100644 index 0000000..a3e299a --- /dev/null +++ b/docs/superpowers/contracts/vendor-agent-skills-import.json @@ -0,0 +1,14 @@ +{ + "title": "Vendor third_party agent-skills snapshot", + "objective": "Add a vendored copy of the external agent-skills toolkit under third_party/agent-skills for local reference and integration, without introducing nested repository metadata.", + "acceptance_criteria": [ + "third_party/agent-skills files are staged without embedded .git metadata", + "pre-commit hooks pass for this commit scope", + "changes are isolated to third_party plus required contract/evidence artifacts" + ], + "out_of_scope": [ + "Modifying upstream third_party content semantics", + "Refactoring local scripts that may consume these files" + ], + "verifier": "pre-commit run --files " +} diff --git a/docs/superpowers/evidence/vendor-agent-skills-import.json b/docs/superpowers/evidence/vendor-agent-skills-import.json new file mode 100644 index 0000000..72932a3 --- /dev/null +++ b/docs/superpowers/evidence/vendor-agent-skills-import.json @@ -0,0 +1,27 @@ +{ + "intent": "Vendor external agent-skills assets into this repository as a standalone third_party snapshot for local reuse.", + "scope": [ + "third_party/agent-skills/**", + "docs/superpowers/contracts/vendor-agent-skills-import.json", + "docs/superpowers/evidence/vendor-agent-skills-import.json" + ], + "changes": [ + "Added third_party/agent-skills repository snapshot files (without nested .git directory)", + "Added contract/evidence artifacts required by repository governance hooks" + ], + "verification": [ + { + "command": "pre-commit run --files ", + "result": "pass", + "evidence": "All commit-stage hooks passed after adding contract/evidence JSON files." + } + ], + "risks": [ + "Vendored snapshot can drift from upstream over time", + "Large third_party updates may increase review overhead" + ], + "rollback": [ + "Revert this commit to remove vendored files", + "Re-run pre-commit to validate repository state after rollback" + ] +} diff --git a/third_party/agent-skills/.claude-plugin/marketplace.json b/third_party/agent-skills/.claude-plugin/marketplace.json new file mode 100644 index 0000000..9f15352 --- /dev/null +++ b/third_party/agent-skills/.claude-plugin/marketplace.json @@ -0,0 +1,19 @@ +{ + "name": "addy-agent-skills", + "owner": { + "name": "Addy Osmani" + }, + "metadata": { + "description": "Production-grade engineering skills for AI coding agents — covering the full software development lifecycle from spec to ship." + }, + "plugins": [ + { + "name": "agent-skills", + "source": { + "source": "github", + "repo": "addyosmani/agent-skills" + }, + "description": "Production-grade engineering skills covering every phase of software development: spec, plan, build, verify, review, and ship." + } + ] +} diff --git a/third_party/agent-skills/.claude-plugin/plugin.json b/third_party/agent-skills/.claude-plugin/plugin.json new file mode 100644 index 0000000..a0eba16 --- /dev/null +++ b/third_party/agent-skills/.claude-plugin/plugin.json @@ -0,0 +1,18 @@ +{ + "name": "agent-skills", + "description": "Production-grade engineering skills for AI coding agents — covering the full software development lifecycle from spec to ship.", + "version": "1.0.0", + "author": { + "name": "Addy Osmani" + }, + "homepage": "https://github.com/addyosmani/agent-skills", + "repository": "https://github.com/addyosmani/agent-skills", + "license": "MIT", + "commands": "./.claude/commands", + "skills": "./skills", + "agents": [ + "./agents/code-reviewer.md", + "./agents/security-auditor.md", + "./agents/test-engineer.md" + ] +} diff --git a/third_party/agent-skills/.claude/commands/build.md b/third_party/agent-skills/.claude/commands/build.md new file mode 100644 index 0000000..9f8e5f5 --- /dev/null +++ b/third_party/agent-skills/.claude/commands/build.md @@ -0,0 +1,18 @@ +--- +description: Implement the next task incrementally — build, test, verify, commit +--- + +Invoke the agent-skills:incremental-implementation skill alongside agent-skills:test-driven-development. + +Pick the next pending task from the plan. For each task: + +1. Read the task's acceptance criteria +2. Load relevant context (existing code, patterns, types) +3. Write a failing test for the expected behavior (RED) +4. Implement the minimum code to pass the test (GREEN) +5. Run the full test suite to check for regressions +6. Run the build to verify compilation +7. Commit with a descriptive message +8. Mark the task complete and move to the next one + +If any step fails, follow the agent-skills:debugging-and-error-recovery skill. diff --git a/third_party/agent-skills/.claude/commands/code-simplify.md b/third_party/agent-skills/.claude/commands/code-simplify.md new file mode 100644 index 0000000..f914bc8 --- /dev/null +++ b/third_party/agent-skills/.claude/commands/code-simplify.md @@ -0,0 +1,22 @@ +--- +description: Simplify code for clarity and maintainability — reduce complexity without changing behavior +--- + +Invoke the agent-skills:code-simplification skill. + +Simplify recently changed code (or the specified scope) while preserving exact behavior: + +1. Read CLAUDE.md and study project conventions +2. Identify the target code — recent changes unless a broader scope is specified +3. Understand the code's purpose, callers, edge cases, and test coverage before touching it +4. Scan for simplification opportunities: + - Deep nesting → guard clauses or extracted helpers + - Long functions → split by responsibility + - Nested ternaries → if/else or switch + - Generic names → descriptive names + - Duplicated logic → shared functions + - Dead code → remove after confirming +5. Apply each simplification incrementally — run tests after each change +6. Verify all tests pass, the build succeeds, and the diff is clean + +If tests fail after a simplification, revert that change and reconsider. Use `code-review-and-quality` to review the result. diff --git a/third_party/agent-skills/.claude/commands/plan.md b/third_party/agent-skills/.claude/commands/plan.md new file mode 100644 index 0000000..99b2ea1 --- /dev/null +++ b/third_party/agent-skills/.claude/commands/plan.md @@ -0,0 +1,16 @@ +--- +description: Break work into small verifiable tasks with acceptance criteria and dependency ordering +--- + +Invoke the agent-skills:planning-and-task-breakdown skill. + +Read the existing spec (SPEC.md or equivalent) and the relevant codebase sections. Then: + +1. Enter plan mode — read only, no code changes +2. Identify the dependency graph between components +3. Slice work vertically (one complete path per task, not horizontal layers) +4. Write tasks with acceptance criteria and verification steps +5. Add checkpoints between phases +6. Present the plan for human review + +Save the plan to tasks/plan.md and task list to tasks/todo.md. diff --git a/third_party/agent-skills/.claude/commands/review.md b/third_party/agent-skills/.claude/commands/review.md new file mode 100644 index 0000000..c1d750c --- /dev/null +++ b/third_party/agent-skills/.claude/commands/review.md @@ -0,0 +1,16 @@ +--- +description: Conduct a five-axis code review — correctness, readability, architecture, security, performance +--- + +Invoke the agent-skills:code-review-and-quality skill. + +Review the current changes (staged or recent commits) across all five axes: + +1. **Correctness** — Does it match the spec? Edge cases handled? Tests adequate? +2. **Readability** — Clear names? Straightforward logic? Well-organized? +3. **Architecture** — Follows existing patterns? Clean boundaries? Right abstraction level? +4. **Security** — Input validated? Secrets safe? Auth checked? (Use security-and-hardening skill) +5. **Performance** — No N+1 queries? No unbounded ops? (Use performance-optimization skill) + +Categorize findings as Critical, Important, or Suggestion. +Output a structured review with specific file:line references and fix recommendations. diff --git a/third_party/agent-skills/.claude/commands/ship.md b/third_party/agent-skills/.claude/commands/ship.md new file mode 100644 index 0000000..1dfaf01 --- /dev/null +++ b/third_party/agent-skills/.claude/commands/ship.md @@ -0,0 +1,72 @@ +--- +description: Run the pre-launch checklist via parallel fan-out to specialist personas, then synthesize a go/no-go decision +--- + +Invoke the agent-skills:shipping-and-launch skill. + +`/ship` is a **fan-out orchestrator**. It runs three specialist personas in parallel against the current change, then merges their reports into a single go/no-go decision with a rollback plan. The personas operate independently — no shared state, no ordering — which is what makes parallel execution safe and useful here. + +## Phase A — Parallel fan-out + +Spawn three subagents concurrently using the Agent tool. **Issue all three Agent tool calls in a single assistant turn so they execute in parallel** — sequential calls defeat the purpose of this command. + +In Claude Code, each call passes `subagent_type` matching the persona's `name` field: + +1. **`code-reviewer`** — Run a five-axis review (correctness, readability, architecture, security, performance) on the staged changes or recent commits. Output the standard review template. +2. **`security-auditor`** — Run a vulnerability and threat-model pass. Check OWASP Top 10, secrets handling, auth/authz, dependency CVEs. Output the standard audit report. +3. **`test-engineer`** — Analyze test coverage for the change. Identify gaps in happy path, edge cases, error paths, and concurrency scenarios. Output the standard coverage analysis. + +In other harnesses without an Agent tool, invoke each persona's system prompt sequentially and treat their outputs as if returned in parallel — the merge phase still works. + +Constraints (from Claude Code's subagent model): +- Subagents cannot spawn other subagents — do not let one persona delegate to another. +- Each subagent gets its own context window and returns only its report to this main session. +- If you need teammates that talk to each other instead of just reporting back, use Claude Code Agent Teams and reference these personas as teammate types (see `references/orchestration-patterns.md`). + +**Persona resolution.** If you've defined your own `code-reviewer`, `security-auditor`, or `test-engineer` in `.claude/agents/` or `~/.claude/agents/`, those take precedence over this plugin's versions — `/ship` picks up your customizations automatically. This is intentional: plugin subagents sit at the bottom of Claude Code's scope priority table, so user-level definitions win by design. + +## Phase B — Merge in main context + +Once all three reports are back, the main agent (not a sub-persona) synthesizes them: + +1. **Code Quality** — Aggregate Critical/Important findings from `code-reviewer` and any failing tests, lint, or build output. Resolve duplicates between reviewers. +2. **Security** — Promote any Critical/High `security-auditor` findings to launch blockers. Cross-reference with `code-reviewer`'s security axis. +3. **Performance** — Pull from `code-reviewer`'s performance axis; cross-check Core Web Vitals if applicable. +4. **Accessibility** — Verify keyboard nav, screen reader support, contrast (not covered by the three personas — handle directly here, or invoke the accessibility checklist). +5. **Infrastructure** — Env vars, migrations, monitoring, feature flags. Verify directly. +6. **Documentation** — README, ADRs, changelog. Verify directly. + +## Phase C — Decision and rollback + +Produce a single output: + +```markdown +## Ship Decision: GO | NO-GO + +### Blockers (must fix before ship) +- [Source persona: Critical finding + file:line] + +### Recommended fixes (should fix before ship) +- [Source persona: Important finding + file:line] + +### Acknowledged risks (shipping anyway) +- [Risk + mitigation] + +### Rollback plan +- Trigger conditions: [what signals would prompt rollback] +- Rollback procedure: [exact steps] +- Recovery time objective: [target] + +### Specialist reports (full) +- [code-reviewer report] +- [security-auditor report] +- [test-engineer report] +``` + +## Rules + +1. The three Phase A personas run in parallel — never sequentially. +2. Personas do not call each other. The main agent merges in Phase B. +3. The rollback plan is mandatory before any GO decision. +4. If any persona returns a Critical finding, the default verdict is NO-GO unless the user explicitly accepts the risk. +5. **Skip the fan-out only if all of the following are true:** the change touches 2 files or fewer, the diff is under 50 lines, and it does not touch auth, payments, data access, or config/env. Otherwise, default to fan-out. `/ship` is designed for production-bound changes — when the blast radius is non-trivial, run the parallel review even if the diff looks small. diff --git a/third_party/agent-skills/.claude/commands/spec.md b/third_party/agent-skills/.claude/commands/spec.md new file mode 100644 index 0000000..2207935 --- /dev/null +++ b/third_party/agent-skills/.claude/commands/spec.md @@ -0,0 +1,15 @@ +--- +description: Start spec-driven development — write a structured specification before writing code +--- + +Invoke the agent-skills:spec-driven-development skill. + +Begin by understanding what the user wants to build. Ask clarifying questions about: +1. The objective and target users +2. Core features and acceptance criteria +3. Tech stack preferences and constraints +4. Known boundaries (what to always do, ask first about, and never do) + +Then generate a structured spec covering all six core areas: objective, commands, project structure, code style, testing strategy, and boundaries. + +Save the spec as SPEC.md in the project root and confirm with the user before proceeding. diff --git a/third_party/agent-skills/.claude/commands/test.md b/third_party/agent-skills/.claude/commands/test.md new file mode 100644 index 0000000..a2b9cfd --- /dev/null +++ b/third_party/agent-skills/.claude/commands/test.md @@ -0,0 +1,19 @@ +--- +description: Run TDD workflow — write failing tests, implement, verify. For bugs, use the Prove-It pattern. +--- + +Invoke the agent-skills:test-driven-development skill. + +For new features: +1. Write tests that describe the expected behavior (they should FAIL) +2. Implement the code to make them pass +3. Refactor while keeping tests green + +For bug fixes (Prove-It pattern): +1. Write a test that reproduces the bug (must FAIL) +2. Confirm the test fails +3. Implement the fix +4. Confirm the test passes +5. Run the full test suite for regressions + +For browser-related issues, also invoke agent-skills:browser-testing-with-devtools to verify with Chrome DevTools MCP. diff --git a/third_party/agent-skills/.gemini/commands/build.toml b/third_party/agent-skills/.gemini/commands/build.toml new file mode 100644 index 0000000..935e2b7 --- /dev/null +++ b/third_party/agent-skills/.gemini/commands/build.toml @@ -0,0 +1,18 @@ +description = "Implement the next task incrementally — build, test, verify, commit" + +prompt = """ +Invoke the incremental-implementation skill alongside test-driven-development. + +Pick the next pending task from the plan. For each task: + +1. Read the task's acceptance criteria +2. Load relevant context (existing code, patterns, types) +3. Write a failing test for the expected behavior (RED) +4. Implement the minimum code to pass the test (GREEN) +5. Run the full test suite to check for regressions +6. Run the build to verify compilation +7. Commit with a descriptive message +8. Mark the task complete and move to the next one + +If any step fails, follow the debugging-and-error-recovery skill. +""" diff --git a/third_party/agent-skills/.gemini/commands/code-simplify.toml b/third_party/agent-skills/.gemini/commands/code-simplify.toml new file mode 100644 index 0000000..b4b6ab3 --- /dev/null +++ b/third_party/agent-skills/.gemini/commands/code-simplify.toml @@ -0,0 +1,22 @@ +description = "Simplify code for clarity and maintainability — reduce complexity without changing behavior" + +prompt = """ +Invoke the code-simplification skill. + +Simplify recently changed code (or the specified scope) while preserving exact behavior: + +1. Read GEMINI.md and study project conventions +2. Identify the target code — recent changes unless a broader scope is specified +3. Understand the code's purpose, callers, edge cases, and test coverage before touching it +4. Scan for simplification opportunities: + - Deep nesting → guard clauses or extracted helpers + - Long functions → split by responsibility + - Nested ternaries → if/else or switch + - Generic names → descriptive names + - Duplicated logic → shared functions + - Dead code → remove after confirming +5. Apply each simplification incrementally — run tests after each change +6. Verify all tests pass, the build succeeds, and the diff is clean + +If tests fail after a simplification, revert that change and reconsider. Use `code-review-and-quality` to review the result. +""" diff --git a/third_party/agent-skills/.gemini/commands/planning.toml b/third_party/agent-skills/.gemini/commands/planning.toml new file mode 100644 index 0000000..60d6601 --- /dev/null +++ b/third_party/agent-skills/.gemini/commands/planning.toml @@ -0,0 +1,16 @@ +description = "Break work into small verifiable tasks with acceptance criteria and dependency ordering" + +prompt = """ +Invoke the planning-and-task-breakdown skill. + +Read the existing spec (SPEC.md or equivalent) and the relevant codebase sections. Then: + +1. Enter plan mode — read only, no code changes +2. Identify the dependency graph between components +3. Slice work vertically (one complete path per task, not horizontal layers) +4. Write tasks with acceptance criteria and verification steps +5. Add checkpoints between phases +6. Present the plan for human review + +Save the plan to tasks/plan.md and task list to tasks/todo.md. +""" diff --git a/third_party/agent-skills/.gemini/commands/review.toml b/third_party/agent-skills/.gemini/commands/review.toml new file mode 100644 index 0000000..3de1054 --- /dev/null +++ b/third_party/agent-skills/.gemini/commands/review.toml @@ -0,0 +1,16 @@ +description = "Conduct a five-axis code review — correctness, readability, architecture, security, performance" + +prompt = """ +Invoke the code-review-and-quality skill. + +Review the current changes (staged or recent commits) across all five axes: + +1. **Correctness** — Does it match the spec? Edge cases handled? Tests adequate? +2. **Readability** — Clear names? Straightforward logic? Well-organized? +3. **Architecture** — Follows existing patterns? Clean boundaries? Right abstraction level? +4. **Security** — Input validated? Secrets safe? Auth checked? (Use security-and-hardening skill) +5. **Performance** — No N+1 queries? No unbounded ops? (Use performance-optimization skill) + +Categorize findings as Critical, Important, or Suggestion. +Output a structured review with specific file:line references and fix recommendations. +""" diff --git a/third_party/agent-skills/.gemini/commands/ship.toml b/third_party/agent-skills/.gemini/commands/ship.toml new file mode 100644 index 0000000..8941cc4 --- /dev/null +++ b/third_party/agent-skills/.gemini/commands/ship.toml @@ -0,0 +1,72 @@ +description = "Run the pre-launch checklist via parallel fan-out to specialist personas, then synthesize a go/no-go decision" + +prompt = """ +Invoke the shipping-and-launch skill. + +`/ship` is a **fan-out orchestrator**. It runs three specialist personas in parallel against the current change, then merges their reports into a single go/no-go decision with a rollback plan. The personas operate independently — no shared state, no ordering — which is what makes parallel execution safe and useful here. + +## Phase A — Parallel fan-out + +Spawn three subagents concurrently. Gemini CLI exposes each custom subagent in `.gemini/agents/` as a tool with the same name — so `code-reviewer.md` becomes a `code-reviewer` tool the main agent can call, and `@code-reviewer` works as an explicit invocation in the prompt. **Issue all three subagent tool calls in a single assistant turn so they execute in parallel** — sequential calls defeat the purpose of this command. + +Dispatch each persona by tool name: + +1. **`code-reviewer`** — Run a five-axis review (correctness, readability, architecture, security, performance) on the staged changes or recent commits. Output the standard review template. +2. **`security-auditor`** — Run a vulnerability and threat-model pass. Check OWASP Top 10, secrets handling, auth/authz, dependency CVEs. Output the standard audit report. +3. **`test-engineer`** — Analyze test coverage for the change. Identify gaps in happy path, edge cases, error paths, and concurrency scenarios. Output the standard coverage analysis. + +If subagents are unavailable in the current Gemini CLI version, invoke each persona's system prompt sequentially in the main context and treat their outputs as if returned in parallel — the merge phase still works. + +Constraints (from Gemini CLI's subagent model): +- Subagents run in isolated context loops and return only their report to this main session. +- Do not let one persona delegate to another — keep the fan-out flat. +- For richer multi-agent collaboration where teammates talk to each other instead of just reporting back, see `references/orchestration-patterns.md`. + +**Persona resolution.** If you've defined your own `code-reviewer`, `security-auditor`, or `test-engineer` in `.gemini/agents/` or `~/.gemini/agents/`, those take precedence over this plugin's versions — `/ship` picks up your customizations automatically. This is intentional: plugin subagents sit at the bottom of Gemini CLI's scope priority table, so user-level definitions win by design. + +## Phase B — Merge in main context + +Once all three reports are back, the main agent (not a sub-persona) synthesizes them: + +1. **Code Quality** — Aggregate Critical/Important findings from `code-reviewer` and any failing tests, lint, or build output. Resolve duplicates between reviewers. +2. **Security** — Promote any Critical/High `security-auditor` findings to launch blockers. Cross-reference with `code-reviewer`'s security axis. +3. **Performance** — Pull from `code-reviewer`'s performance axis; cross-check Core Web Vitals if applicable. +4. **Accessibility** — Verify keyboard nav, screen reader support, contrast (not covered by the three personas — handle directly here, or invoke the accessibility checklist). +5. **Infrastructure** — Env vars, migrations, monitoring, feature flags. Verify directly. +6. **Documentation** — README, ADRs, changelog. Verify directly. + +## Phase C — Decision and rollback + +Produce a single output: + +```markdown +## Ship Decision: GO | NO-GO + +### Blockers (must fix before ship) +- [Source persona: Critical finding + file:line] + +### Recommended fixes (should fix before ship) +- [Source persona: Important finding + file:line] + +### Acknowledged risks (shipping anyway) +- [Risk + mitigation] + +### Rollback plan +- Trigger conditions: [what signals would prompt rollback] +- Rollback procedure: [exact steps] +- Recovery time objective: [target] + +### Specialist reports (full) +- [code-reviewer report] +- [security-auditor report] +- [test-engineer report] +``` + +## Rules + +1. The three Phase A personas run in parallel — never sequentially. +2. Personas do not call each other. The main agent merges in Phase B. +3. The rollback plan is mandatory before any GO decision. +4. If any persona returns a Critical finding, the default verdict is NO-GO unless the user explicitly accepts the risk. +5. **Skip the fan-out only if all of the following are true:** the change touches 2 files or fewer, the diff is under 50 lines, and it does not touch auth, payments, data access, or config/env. Otherwise, default to fan-out. `/ship` is designed for production-bound changes — when the blast radius is non-trivial, run the parallel review even if the diff looks small. +""" diff --git a/third_party/agent-skills/.gemini/commands/spec.toml b/third_party/agent-skills/.gemini/commands/spec.toml new file mode 100644 index 0000000..f330d10 --- /dev/null +++ b/third_party/agent-skills/.gemini/commands/spec.toml @@ -0,0 +1,15 @@ +description = "Start spec-driven development — write a structured specification before writing code" + +prompt = """ +Invoke the spec-driven-development skill. + +Begin by understanding what the user wants to build. Ask clarifying questions about: +1. The objective and target users +2. Core features and acceptance criteria +3. Tech stack preferences and constraints +4. Known boundaries (what to always do, ask first about, and never do) + +Then generate a structured spec covering all six core areas: objective, commands, project structure, code style, testing strategy, and boundaries. + +Save the spec as SPEC.md in the project root and confirm with the user before proceeding. +""" diff --git a/third_party/agent-skills/.gemini/commands/test.toml b/third_party/agent-skills/.gemini/commands/test.toml new file mode 100644 index 0000000..a1ca635 --- /dev/null +++ b/third_party/agent-skills/.gemini/commands/test.toml @@ -0,0 +1,19 @@ +description = "Run TDD workflow — write failing tests, implement, verify. For bugs, use the Prove-It pattern." + +prompt = """ +Invoke the test-driven-development skill. + +For new features: +1. Write tests that describe the expected behavior (they should FAIL) +2. Implement the code to make them pass +3. Refactor while keeping tests green + +For bug fixes (Prove-It pattern): +1. Write a test that reproduces the bug (must FAIL) +2. Confirm the test fails +3. Implement the fix +4. Confirm the test passes +5. Run the full test suite for regressions + +For browser-related issues, also invoke browser-testing-with-devtools to verify with Chrome DevTools MCP. +""" diff --git a/third_party/agent-skills/.github/workflows/test-plugin-install.yml b/third_party/agent-skills/.github/workflows/test-plugin-install.yml new file mode 100644 index 0000000..b1784fb --- /dev/null +++ b/third_party/agent-skills/.github/workflows/test-plugin-install.yml @@ -0,0 +1,41 @@ +name: Test Plugin Installation + +on: + push: + pull_request: + workflow_dispatch: + +jobs: + validate: + name: Validate plugin structure + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + + - name: Install Claude Code + run: npm install -g @anthropic-ai/claude-code + + - name: Validate marketplace and plugin manifests + run: claude plugin validate . + + test-install: + name: Test plugin installation + runs-on: ubuntu-latest + needs: validate + steps: + - uses: actions/checkout@v6 + + - name: Install Claude Code + run: npm install -g @anthropic-ai/claude-code + + - name: Configure git to use HTTPS + run: git config --global url."https://github.com/".insteadOf "git@github.com:" + + - name: Add marketplace + run: claude plugin marketplace add ./ + + - name: List marketplaces + run: claude plugin marketplace list + + - name: Install plugin + run: claude plugin install agent-skills@addy-agent-skills --scope user diff --git a/third_party/agent-skills/.gitignore b/third_party/agent-skills/.gitignore new file mode 100644 index 0000000..c5401e9 --- /dev/null +++ b/third_party/agent-skills/.gitignore @@ -0,0 +1,7 @@ +.DS_Store +node_modules/ +.env +.env.* +*.log +.claude/.simplify-ignore-cache/ +.claude/sdd-cache/ diff --git a/third_party/agent-skills/.opencode/skills b/third_party/agent-skills/.opencode/skills new file mode 120000 index 0000000..7fc7d9f --- /dev/null +++ b/third_party/agent-skills/.opencode/skills @@ -0,0 +1 @@ +../skills/ \ No newline at end of file diff --git a/third_party/agent-skills/AGENTS.md b/third_party/agent-skills/AGENTS.md new file mode 100644 index 0000000..7b09470 --- /dev/null +++ b/third_party/agent-skills/AGENTS.md @@ -0,0 +1,185 @@ +# AGENTS.md + +This file provides guidance to AI coding agents (Claude Code, Cursor, Copilot, Antigravity, etc.) when working with code in this repository. + +## Repository Overview + +A collection of skills for Claude.ai and Claude Code for senior software engineers. Skills are packaged instructions and scripts that extend Claude and your coding agents capabilities. + +## OpenCode Integration + +OpenCode uses a **skill-driven execution model** powered by the `skill` tool and this repository's `/skills` directory. + +### Core Rules + +- If a task matches a skill, you MUST invoke it +- Skills are located in `skills//SKILL.md` +- Never implement directly if a skill applies +- Always follow the skill instructions exactly (do not partially apply them) + +### Intent → Skill Mapping + +The agent should automatically map user intent to skills: + +- Feature / new functionality → `spec-driven-development`, then `incremental-implementation`, `test-driven-development` +- Planning / breakdown → `planning-and-task-breakdown` +- Bug / failure / unexpected behavior → `debugging-and-error-recovery` +- Code review → `code-review-and-quality` +- Refactoring / simplification → `code-simplification` +- API or interface design → `api-and-interface-design` +- UI work → `frontend-ui-engineering` + +### Lifecycle Mapping (Implicit Commands) + +OpenCode does not support slash commands like `/spec` or `/plan`. + +Instead, the agent must internally follow this lifecycle: + +- DEFINE → `spec-driven-development` +- PLAN → `planning-and-task-breakdown` +- BUILD → `incremental-implementation` + `test-driven-development` +- VERIFY → `debugging-and-error-recovery` +- REVIEW → `code-review-and-quality` +- SHIP → `shipping-and-launch` + +### Execution Model + +For every request: + +1. Determine if any skill applies (even 1% chance) +2. Invoke the appropriate skill using the `skill` tool +3. Follow the skill workflow strictly +4. Only proceed to implementation after required steps (spec, plan, etc.) are complete + +### Anti-Rationalization + +The following thoughts are incorrect and must be ignored: + +- "This is too small for a skill" +- "I can just quickly implement this" +- "I’ll gather context first" + +Correct behavior: + +- Always check for and use skills first + +This ensures OpenCode behaves similarly to Claude Code with full workflow enforcement. + +## Orchestration: Personas, Skills, and Commands + +This repo has three composable layers. They have different jobs and should not be confused: + +- **Skills** (`skills//SKILL.md`) — workflows with steps and exit criteria. The *how*. Mandatory hops when an intent matches. +- **Personas** (`agents/.md`) — roles with a perspective and an output format. The *who*. +- **Slash commands** (`.claude/commands/*.md`) — user-facing entry points. The *when*. The orchestration layer. + +Composition rule: **the user (or a slash command) is the orchestrator. Personas do not invoke other personas.** A persona may invoke skills. + +The only multi-persona orchestration pattern this repo endorses is **parallel fan-out with a merge step** — used by `/ship` to run `code-reviewer`, `security-auditor`, and `test-engineer` concurrently and synthesize their reports. Do not build a "router" persona that decides which other persona to call; that's the job of slash commands and intent mapping. + +See [agents/README.md](agents/README.md) for the decision matrix and [references/orchestration-patterns.md](references/orchestration-patterns.md) for the full pattern catalog. + +**Claude Code interop:** the personas in `agents/` work as Claude Code subagents (auto-discovered from this plugin's `agents/` directory) and as Agent Teams teammates (referenced by name when spawning). Two platform constraints align with our rules: subagents cannot spawn other subagents, and teams cannot nest. Plugin agents silently ignore the `hooks`, `mcpServers`, and `permissionMode` frontmatter fields. + +## Creating a New Skill + +### Directory Structure + +``` +skills/ + {skill-name}/ # kebab-case directory name + SKILL.md # Required: skill definition + scripts/ # Required: executable scripts + {script-name}.sh # Bash scripts (preferred) + {skill-name}.zip # Required: packaged for distribution +``` + +### Naming Conventions + +- **Skill directory**: `kebab-case` (e.g. `web-quality`) +- **SKILL.md**: Always uppercase, always this exact filename +- **Scripts**: `kebab-case.sh` (e.g., `deploy.sh`, `fetch-logs.sh`) +- **Zip file**: Must match directory name exactly: `{skill-name}.zip` + +### SKILL.md Format + +```markdown +--- +name: {skill-name} +description: {One sentence describing when to use this skill. Include trigger phrases like "Deploy my app", "Check logs", etc.} +--- + +# {Skill Title} + +{Brief description of what the skill does.} + +## How It Works + +{Numbered list explaining the skill's workflow} + +## Usage + +```bash +bash /mnt/skills/user/{skill-name}/scripts/{script}.sh [args] +``` + +**Arguments:** +- `arg1` - Description (defaults to X) + +**Examples:** +{Show 2-3 common usage patterns} + +## Output + +{Show example output users will see} + +## Present Results to User + +{Template for how Claude should format results when presenting to users} + +## Troubleshooting + +{Common issues and solutions, especially network/permissions errors} +``` + +### Best Practices for Context Efficiency + +Skills are loaded on-demand — only the skill name and description are loaded at startup. The full `SKILL.md` loads into context only when the agent decides the skill is relevant. To minimize context usage: + +- **Keep SKILL.md under 500 lines** — put detailed reference material in separate files +- **Write specific descriptions** — helps the agent know exactly when to activate the skill +- **Use progressive disclosure** — reference supporting files that get read only when needed +- **Prefer scripts over inline code** — script execution doesn't consume context (only output does) +- **File references work one level deep** — link directly from SKILL.md to supporting files + +### Script Requirements + +- Use `#!/bin/bash` shebang +- Use `set -e` for fail-fast behavior +- Write status messages to stderr: `echo "Message" >&2` +- Write machine-readable output (JSON) to stdout +- Include a cleanup trap for temp files +- Reference the script path as `/mnt/skills/user/{skill-name}/scripts/{script}.sh` + +### Creating the Zip Package + +After creating or updating a skill: + +```bash +cd skills +zip -r {skill-name}.zip {skill-name}/ +``` + +### End-User Installation + +Document these two installation methods for users: + +**Claude Code:** +```bash +cp -r skills/{skill-name} ~/.claude/skills/ +``` + +**claude.ai:** +Add the skill to project knowledge or paste SKILL.md contents into the conversation. + +If the skill requires network access, instruct users to add required domains at `claude.ai/settings/capabilities`. diff --git a/third_party/agent-skills/CLAUDE.md b/third_party/agent-skills/CLAUDE.md new file mode 100644 index 0000000..734fc26 --- /dev/null +++ b/third_party/agent-skills/CLAUDE.md @@ -0,0 +1,43 @@ +# agent-skills + +This is the agent-skills project — a collection of production-grade engineering skills for AI coding agents. + +## Project Structure + +``` +skills/ → Core skills (SKILL.md per directory) +agents/ → Reusable agent personas (code-reviewer, test-engineer, security-auditor) +hooks/ → Session lifecycle hooks +.claude/commands/ → Slash commands (/spec, /plan, /build, /test, /review, /code-simplify, /ship) +references/ → Supplementary checklists (testing, performance, security, accessibility) +docs/ → Setup guides for different tools +``` + +## Skills by Phase + +**Define:** spec-driven-development +**Plan:** planning-and-task-breakdown +**Build:** incremental-implementation, test-driven-development, context-engineering, source-driven-development, frontend-ui-engineering, api-and-interface-design +**Verify:** browser-testing-with-devtools, debugging-and-error-recovery +**Review:** code-review-and-quality, code-simplification, security-and-hardening, performance-optimization +**Ship:** git-workflow-and-versioning, ci-cd-and-automation, deprecation-and-migration, documentation-and-adrs, shipping-and-launch + +## Conventions + +- Every skill lives in `skills//SKILL.md` +- YAML frontmatter with `name` and `description` fields +- Description starts with what the skill does (third person), followed by trigger conditions ("Use when...") +- Every skill has: Overview, When to Use, Process, Common Rationalizations, Red Flags, Verification +- References are in `references/`, not inside skill directories +- Supporting files only created when content exceeds 100 lines + +## Commands + +- `npm test` — Not applicable (this is a documentation project) +- Validate: Check that all SKILL.md files have valid YAML frontmatter with name and description + +## Boundaries + +- Always: Follow the skill-anatomy.md format for new skills +- Never: Add skills that are vague advice instead of actionable processes +- Never: Duplicate content between skills — reference other skills instead diff --git a/third_party/agent-skills/CONTRIBUTING.md b/third_party/agent-skills/CONTRIBUTING.md new file mode 100644 index 0000000..737792a --- /dev/null +++ b/third_party/agent-skills/CONTRIBUTING.md @@ -0,0 +1,60 @@ +# Contributing to Agent Skills + +Thanks for your interest in contributing! This project is a collection of production-grade engineering skills for AI coding agents. + +## Adding a New Skill + +1. Create a directory under `skills/` with a kebab-case name +2. Add a `SKILL.md` following the format in [docs/skill-anatomy.md](docs/skill-anatomy.md) +3. Include YAML frontmatter with `name` and `description` fields +4. Ensure the `description` briefly says what the skill does (third person), then includes `Use when` trigger conditions + +### Skill Quality Bar + +Skills should be: + +- **Specific** — Actionable steps, not vague advice +- **Verifiable** — Clear exit criteria with evidence requirements +- **Battle-tested** — Based on real engineering workflows, not theoretical ideals +- **Minimal** — Only the content needed to guide the agent correctly + +### Structure + +Every new skill must have: + +- `SKILL.md` in the skill directory +- YAML frontmatter with valid `name` and `description` + +New skills should generally follow the standard anatomy: + +- **Overview** — What this skill does and why it matters +- **When to Use** — Triggering conditions +- **Process** — Step-by-step workflow +- **Common Rationalizations** — Excuses agents use to skip steps, with rebuttals +- **Red Flags** — Warning signs that the skill is being applied incorrectly +- **Verification** — How to confirm the skill was applied correctly + +### What Not to Do + +- Don't duplicate content between skills — reference other skills instead +- Don't add skills that are vague advice instead of actionable processes +- Don't create supporting files unless content exceeds 100 lines +- Don't put reference material inside skill directories — use `references/` instead + +## Modifying Existing Skills + +- Keep changes focused and minimal +- Preserve the existing structure and tone +- Test that YAML frontmatter remains valid after edits + +## Reporting Issues + +Open an issue if you find: + +- A skill that gives incorrect or outdated guidance +- Missing coverage for a common engineering workflow +- Inconsistencies between skills + +## License + +By contributing, you agree that your contributions will be licensed under the MIT License. diff --git a/third_party/agent-skills/LICENSE b/third_party/agent-skills/LICENSE new file mode 100644 index 0000000..d67778a --- /dev/null +++ b/third_party/agent-skills/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 Addy Osmani + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/third_party/agent-skills/README.md b/third_party/agent-skills/README.md new file mode 100644 index 0000000..68e6300 --- /dev/null +++ b/third_party/agent-skills/README.md @@ -0,0 +1,296 @@ +# Agent Skills + +**Production-grade engineering skills for AI coding agents.** + +Skills encode the workflows, quality gates, and best practices that senior engineers use when building software. These ones are packaged so AI agents follow them consistently across every phase of development. + +``` + DEFINE PLAN BUILD VERIFY REVIEW SHIP + ┌──────┐ ┌──────┐ ┌──────┐ ┌──────┐ ┌──────┐ ┌──────┐ + │ Idea │ ───▶ │ Spec │ ───▶ │ Code │ ───▶ │ Test │ ───▶ │ QA │ ───▶ │ Go │ + │Refine│ │ PRD │ │ Impl │ │Debug │ │ Gate │ │ Live │ + └──────┘ └──────┘ └──────┘ └──────┘ └──────┘ └──────┘ + /spec /plan /build /test /review /ship +``` + +--- + +## Commands + +7 slash commands that map to the development lifecycle. Each one activates the right skills automatically. + +| What you're doing | Command | Key principle | +|-------------------|---------|---------------| +| Define what to build | `/spec` | Spec before code | +| Plan how to build it | `/plan` | Small, atomic tasks | +| Build incrementally | `/build` | One slice at a time | +| Prove it works | `/test` | Tests are proof | +| Review before merge | `/review` | Improve code health | +| Simplify the code | `/code-simplify` | Clarity over cleverness | +| Ship to production | `/ship` | Faster is safer | + +Skills also activate automatically based on what you're doing — designing an API triggers `api-and-interface-design`, building UI triggers `frontend-ui-engineering`, and so on. + +--- + +## Quick Start + +
+Claude Code (recommended) + +**Marketplace install:** + +``` +/plugin marketplace add addyosmani/agent-skills +/plugin install agent-skills@addy-agent-skills +``` + +> **SSH errors?** The marketplace clones repos via SSH. If you don't have SSH keys set up on GitHub, either [add your SSH key](https://docs.github.com/en/authentication/connecting-to-github-with-ssh/adding-a-new-ssh-key-to-your-github-account) or use the full HTTPS URL to force the HTTPS cloning: +> ```bash +> /plugin marketplace add https://github.com/addyosmani/agent-skills.git +> /plugin install agent-skills@addy-agent-skills +> ``` + +**Local / development:** + +```bash +git clone https://github.com/addyosmani/agent-skills.git +claude --plugin-dir /path/to/agent-skills +``` + +
+ +
+Cursor + +Copy any `SKILL.md` into `.cursor/rules/`, or reference the full `skills/` directory. See [docs/cursor-setup.md](docs/cursor-setup.md). + +
+ +
+Gemini CLI + +Install as native skills for auto-discovery, or add to `GEMINI.md` for persistent context. See [docs/gemini-cli-setup.md](docs/gemini-cli-setup.md). + +**Install from the repo:** + +```bash +gemini skills install https://github.com/addyosmani/agent-skills.git --path skills +``` + +**Install from a local clone:** + +```bash +gemini skills install ./agent-skills/skills/ +``` + +
+ +
+Windsurf + +Add skill contents to your Windsurf rules configuration. See [docs/windsurf-setup.md](docs/windsurf-setup.md). + +
+ +
+OpenCode + +Uses agent-driven skill execution via AGENTS.md and the `skill` tool. + +See [docs/opencode-setup.md](docs/opencode-setup.md). + +
+ +
+GitHub Copilot + +Use agent definitions from `agents/` as Copilot personas and skill content in `.github/copilot-instructions.md`. See [docs/copilot-setup.md](docs/copilot-setup.md). + +
+ +
+ Kiro IDE & CLI + Skills for Kiro reside under ".kiro/skills/" and can be stored under Project or Global level. Kiro also supports Agents.md. See Kiro docs at https://kiro.dev/docs/skills/ +
+ +
+Codex / Other Agents + +Skills are plain Markdown - they work with any agent that accepts system prompts or instruction files. See [docs/getting-started.md](docs/getting-started.md). + +
+ + + +--- + +## All 20 Skills + +The commands above are the entry points. Under the hood, they activate these 20 skills — each one a structured workflow with steps, verification gates, and anti-rationalization tables. You can also reference any skill directly. + +### Define - Clarify what to build + +| Skill | What It Does | Use When | +|-------|-------------|----------| +| [idea-refine](skills/idea-refine/SKILL.md) | Structured divergent/convergent thinking to turn vague ideas into concrete proposals | You have a rough concept that needs exploration | +| [spec-driven-development](skills/spec-driven-development/SKILL.md) | Write a PRD covering objectives, commands, structure, code style, testing, and boundaries before any code | Starting a new project, feature, or significant change | + +### Plan - Break it down + +| Skill | What It Does | Use When | +|-------|-------------|----------| +| [planning-and-task-breakdown](skills/planning-and-task-breakdown/SKILL.md) | Decompose specs into small, verifiable tasks with acceptance criteria and dependency ordering | You have a spec and need implementable units | + +### Build - Write the code + +| Skill | What It Does | Use When | +|-------|-------------|----------| +| [incremental-implementation](skills/incremental-implementation/SKILL.md) | Thin vertical slices - implement, test, verify, commit. Feature flags, safe defaults, rollback-friendly changes | Any change touching more than one file | +| [test-driven-development](skills/test-driven-development/SKILL.md) | Red-Green-Refactor, test pyramid (80/15/5), test sizes, DAMP over DRY, Beyonce Rule, browser testing | Implementing logic, fixing bugs, or changing behavior | +| [context-engineering](skills/context-engineering/SKILL.md) | Feed agents the right information at the right time - rules files, context packing, MCP integrations | Starting a session, switching tasks, or when output quality drops | +| [source-driven-development](skills/source-driven-development/SKILL.md) | Ground every framework decision in official documentation - verify, cite sources, flag what's unverified | You want authoritative, source-cited code for any framework or library | +| [frontend-ui-engineering](skills/frontend-ui-engineering/SKILL.md) | Component architecture, design systems, state management, responsive design, WCAG 2.1 AA accessibility | Building or modifying user-facing interfaces | +| [api-and-interface-design](skills/api-and-interface-design/SKILL.md) | Contract-first design, Hyrum's Law, One-Version Rule, error semantics, boundary validation | Designing APIs, module boundaries, or public interfaces | + +### Verify - Prove it works + +| Skill | What It Does | Use When | +|-------|-------------|----------| +| [browser-testing-with-devtools](skills/browser-testing-with-devtools/SKILL.md) | Chrome DevTools MCP for live runtime data - DOM inspection, console logs, network traces, performance profiling | Building or debugging anything that runs in a browser | +| [debugging-and-error-recovery](skills/debugging-and-error-recovery/SKILL.md) | Five-step triage: reproduce, localize, reduce, fix, guard. Stop-the-line rule, safe fallbacks | Tests fail, builds break, or behavior is unexpected | + +### Review - Quality gates before merge + +| Skill | What It Does | Use When | +|-------|-------------|----------| +| [code-review-and-quality](skills/code-review-and-quality/SKILL.md) | Five-axis review, change sizing (~100 lines), severity labels (Nit/Optional/FYI), review speed norms, splitting strategies | Before merging any change | +| [code-simplification](skills/code-simplification/SKILL.md) | Chesterton's Fence, Rule of 500, reduce complexity while preserving exact behavior | Code works but is harder to read or maintain than it should be | +| [security-and-hardening](skills/security-and-hardening/SKILL.md) | OWASP Top 10 prevention, auth patterns, secrets management, dependency auditing, three-tier boundary system | Handling user input, auth, data storage, or external integrations | +| [performance-optimization](skills/performance-optimization/SKILL.md) | Measure-first approach - Core Web Vitals targets, profiling workflows, bundle analysis, anti-pattern detection | Performance requirements exist or you suspect regressions | + +### Ship - Deploy with confidence + +| Skill | What It Does | Use When | +|-------|-------------|----------| +| [git-workflow-and-versioning](skills/git-workflow-and-versioning/SKILL.md) | Trunk-based development, atomic commits, change sizing (~100 lines), the commit-as-save-point pattern | Making any code change (always) | +| [ci-cd-and-automation](skills/ci-cd-and-automation/SKILL.md) | Shift Left, Faster is Safer, feature flags, quality gate pipelines, failure feedback loops | Setting up or modifying build and deploy pipelines | +| [deprecation-and-migration](skills/deprecation-and-migration/SKILL.md) | Code-as-liability mindset, compulsory vs advisory deprecation, migration patterns, zombie code removal | Removing old systems, migrating users, or sunsetting features | +| [documentation-and-adrs](skills/documentation-and-adrs/SKILL.md) | Architecture Decision Records, API docs, inline documentation standards - document the *why* | Making architectural decisions, changing APIs, or shipping features | +| [shipping-and-launch](skills/shipping-and-launch/SKILL.md) | Pre-launch checklists, feature flag lifecycle, staged rollouts, rollback procedures, monitoring setup | Preparing to deploy to production | + +--- + +## Agent Personas + +Pre-configured specialist personas for targeted reviews: + +| Agent | Role | Perspective | +|-------|------|-------------| +| [code-reviewer](agents/code-reviewer.md) | Senior Staff Engineer | Five-axis code review with "would a staff engineer approve this?" standard | +| [test-engineer](agents/test-engineer.md) | QA Specialist | Test strategy, coverage analysis, and the Prove-It pattern | +| [security-auditor](agents/security-auditor.md) | Security Engineer | Vulnerability detection, threat modeling, OWASP assessment | + +--- + +## Reference Checklists + +Quick-reference material that skills pull in when needed: + +| Reference | Covers | +|-----------|--------| +| [testing-patterns.md](references/testing-patterns.md) | Test structure, naming, mocking, React/API/E2E examples, anti-patterns | +| [security-checklist.md](references/security-checklist.md) | Pre-commit checks, auth, input validation, headers, CORS, OWASP Top 10 | +| [performance-checklist.md](references/performance-checklist.md) | Core Web Vitals targets, frontend/backend checklists, measurement commands | +| [accessibility-checklist.md](references/accessibility-checklist.md) | Keyboard nav, screen readers, visual design, ARIA, testing tools | + +--- + +## How Skills Work + +Every skill follows a consistent anatomy: + +``` +┌─────────────────────────────────────────────────┐ +│ SKILL.md │ +│ │ +│ ┌─ Frontmatter ─────────────────────────────┐ │ +│ │ name: lowercase-hyphen-name │ │ +│ │ description: Guides agents through [task].│ │ +│ │ Use when… │ │ +│ └───────────────────────────────────────────┘ │ +│ Overview → What this skill does │ +│ When to Use → Triggering conditions │ +│ Process → Step-by-step workflow │ +│ Rationalizations → Excuses + rebuttals │ +│ Red Flags → Signs something's wrong │ +│ Verification → Evidence requirements │ +└─────────────────────────────────────────────────┘ +``` + +**Key design choices:** + +- **Process, not prose.** Skills are workflows agents follow, not reference docs they read. Each has steps, checkpoints, and exit criteria. +- **Anti-rationalization.** Every skill includes a table of common excuses agents use to skip steps (e.g., "I'll add tests later") with documented counter-arguments. +- **Verification is non-negotiable.** Every skill ends with evidence requirements - tests passing, build output, runtime data. "Seems right" is never sufficient. +- **Progressive disclosure.** The `SKILL.md` is the entry point. Supporting references load only when needed, keeping token usage minimal. + +--- + +## Project Structure + +``` +agent-skills/ +├── skills/ # 20 core skills (SKILL.md per directory) +│ ├── idea-refine/ # Define +│ ├── spec-driven-development/ # Define +│ ├── planning-and-task-breakdown/ # Plan +│ ├── incremental-implementation/ # Build +│ ├── context-engineering/ # Build +│ ├── source-driven-development/ # Build +│ ├── frontend-ui-engineering/ # Build +│ ├── test-driven-development/ # Build +│ ├── api-and-interface-design/ # Build +│ ├── browser-testing-with-devtools/ # Verify +│ ├── debugging-and-error-recovery/ # Verify +│ ├── code-review-and-quality/ # Review +│ ├── code-simplification/ # Review +│ ├── security-and-hardening/ # Review +│ ├── performance-optimization/ # Review +│ ├── git-workflow-and-versioning/ # Ship +│ ├── ci-cd-and-automation/ # Ship +│ ├── deprecation-and-migration/ # Ship +│ ├── documentation-and-adrs/ # Ship +│ ├── shipping-and-launch/ # Ship +│ └── using-agent-skills/ # Meta: how to use this pack +├── agents/ # 3 specialist personas +├── references/ # 4 supplementary checklists +├── hooks/ # Session lifecycle hooks +├── .claude/commands/ # 7 slash commands (Claude Code) +├── .gemini/commands/ # 7 slash commands (Gemini CLI) +└── docs/ # Setup guides per tool +``` + +--- + +## Why Agent Skills? + +AI coding agents default to the shortest path - which often means skipping specs, tests, security reviews, and the practices that make software reliable. Agent Skills gives agents structured workflows that enforce the same discipline senior engineers bring to production code. + +Each skill encodes hard-won engineering judgment: *when* to write a spec, *what* to test, *how* to review, and *when* to ship. These aren't generic prompts - they're the kind of opinionated, process-driven workflows that separate production-quality work from prototype-quality work. + +Skills bake in best practices from Google's engineering culture — including concepts from [Software Engineering at Google](https://abseil.io/resources/swe-book) and Google's [engineering practices guide](https://google.github.io/eng-practices/). You'll find Hyrum's Law in API design, the Beyonce Rule and test pyramid in testing, change sizing and review speed norms in code review, Chesterton's Fence in simplification, trunk-based development in git workflow, Shift Left and feature flags in CI/CD, and a dedicated deprecation skill treating code as a liability. These aren't abstract principles — they're embedded directly into the step-by-step workflows agents follow. + +--- + +## Contributing + +Skills should be **specific** (actionable steps, not vague advice), **verifiable** (clear exit criteria with evidence requirements), **battle-tested** (based on real workflows), and **minimal** (only what's needed to guide the agent). + +See [docs/skill-anatomy.md](docs/skill-anatomy.md) for the format specification and [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines. + +--- + +## License + +MIT - use these skills in your projects, teams, and tools. diff --git a/third_party/agent-skills/agents/README.md b/third_party/agent-skills/agents/README.md new file mode 100644 index 0000000..508bb36 --- /dev/null +++ b/third_party/agent-skills/agents/README.md @@ -0,0 +1,120 @@ +# Agent Personas + +Specialist personas that play a single role with a single perspective. Each persona is a Markdown file consumed as a system prompt by your harness (Claude Code, Cursor, Copilot, etc.). + +| Persona | Role | Best for | +|---------|------|----------| +| [code-reviewer](code-reviewer.md) | Senior Staff Engineer | Five-axis review before merge | +| [security-auditor](security-auditor.md) | Security Engineer | Vulnerability detection, OWASP-style audit | +| [test-engineer](test-engineer.md) | QA Engineer | Test strategy, coverage analysis, Prove-It pattern | + +## How personas relate to skills and commands + +Three layers, each with a distinct job: + +| Layer | What it is | Example | Composition role | +|-------|-----------|---------|------------------| +| **Skill** | A workflow with steps and exit criteria | `code-review-and-quality` | The *how* — invoked from inside a persona or command | +| **Persona** | A role with a perspective and an output format | `code-reviewer` | The *who* — adopts a viewpoint, produces a report | +| **Command** | A user-facing entry point | `/review`, `/ship` | The *when* — composes personas and skills | + +The user (or a slash command) is the orchestrator. **Personas do not call other personas.** Skills are mandatory hops inside a persona's workflow. + +## When to use each + +### Direct persona invocation +Pick this when you want one perspective on the current change and the user is in the loop. + +- "Review this PR" → invoke `code-reviewer` directly +- "Are there security issues in `auth.ts`?" → invoke `security-auditor` directly +- "What tests are missing for the checkout flow?" → invoke `test-engineer` directly + +### Slash command (single persona behind it) +Pick this when there's a repeatable workflow you'd otherwise re-explain every time. + +- `/review` → wraps `code-reviewer` with the project's review skill +- `/test` → wraps `test-engineer` with TDD skill + +### Slash command (orchestrator — fan-out) +Pick this only when **independent** investigations can run in parallel and produce reports that a single agent then merges. + +- `/ship` → fans out to `code-reviewer` + `security-auditor` + `test-engineer` in parallel, then synthesizes their reports into a go/no-go decision + +This is the only orchestration pattern this repo endorses. See [references/orchestration-patterns.md](../references/orchestration-patterns.md) for the full pattern catalog and anti-patterns. + +## Decision matrix + +``` +Is the work a single perspective on a single artifact? +├── Yes → Direct persona invocation +└── No → Are the sub-tasks independent (no shared mutable state, no ordering)? + ├── Yes → Slash command with parallel fan-out (e.g. /ship) + └── No → Sequential slash commands run by the user (/spec → /plan → /build → /test → /review) +``` + +## Worked example: valid orchestration + +`/ship` is the canonical fan-out orchestrator in this repo: + +``` +/ship + ├── (parallel) code-reviewer → review report + ├── (parallel) security-auditor → audit report + └── (parallel) test-engineer → coverage report + ↓ + merge phase (main agent) + ↓ + go/no-go decision + rollback plan +``` + +Why this works: +- Each sub-agent operates on the same diff but produces a **different perspective** +- They have no dependencies on each other → genuine parallelism, real wall-clock savings +- Each runs in a fresh context window → main session stays uncluttered +- The merge step is small and benefits from full context, so it stays in the main agent + +## Worked example: invalid orchestration (do not build this) + +A `meta-orchestrator` persona whose job is "decide which other persona to call": + +``` +/work-on-pr → meta-orchestrator + ↓ (decides "this needs a review") + code-reviewer + ↓ (returns) + meta-orchestrator (paraphrases result) + ↓ + user +``` + +Why this fails: +- Pure routing layer with no domain value +- Adds two paraphrasing hops → information loss + 2× token cost +- The user already knows they want a review; let them call `/review` directly +- Replicates work that slash commands and `AGENTS.md` intent-mapping already do + +## Rules for personas + +1. A persona is a single role with a single output format. If you find yourself adding a second role, create a second persona. +2. **Personas do not invoke other personas.** Composition is the job of slash commands or the user. On Claude Code this is also a hard platform constraint — *"subagents cannot spawn other subagents"* — so the rule is enforced for you. +3. A persona may invoke skills (the *how*). +4. Every persona file ends with a "Composition" block stating where it fits. + +## Claude Code interop + +The personas in this repo are designed to work as Claude Code subagents and as Agent Teams teammates without modification: + +- **As subagents:** auto-discovered when this plugin is enabled (no path config needed). Use the Agent tool with `subagent_type: code-reviewer` (or `security-auditor`, `test-engineer`). `/ship` is the canonical example. +- **As Agent Teams teammates** (experimental, requires `CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS=1`): reference the same persona name when spawning a teammate. The persona's body is **appended to** the teammate's system prompt as additional instructions (not a replacement), so your persona text sits on top of the team-coordination instructions the lead installs (SendMessage, task-list tools, etc.). + +Subagents only report results back to the main agent. Agent Teams let teammates message each other directly. Use subagents when reports are enough; use Agent Teams when sub-agents need to challenge each other's findings (e.g. competing-hypothesis debugging). See [references/orchestration-patterns.md](../references/orchestration-patterns.md) for the full mapping. + +Plugin agents do not support `hooks`, `mcpServers`, or `permissionMode` frontmatter — those fields are silently ignored. Avoid relying on them when authoring new personas here. + +## Adding a new persona + +1. Create `agents/.md` with the same frontmatter format used by existing personas. +2. Define the role, scope, output format, and rules. +3. Add a **Composition** block at the bottom (Invoke directly when / Invoke via / Do not invoke from another persona). +4. Add the persona to the table at the top of this file. +5. If the persona enables a new orchestration pattern, document it in `references/orchestration-patterns.md` rather than inventing the pattern in the persona file itself. diff --git a/third_party/agent-skills/agents/code-reviewer.md b/third_party/agent-skills/agents/code-reviewer.md new file mode 100644 index 0000000..3bce85c --- /dev/null +++ b/third_party/agent-skills/agents/code-reviewer.md @@ -0,0 +1,97 @@ +--- +name: code-reviewer +description: Senior code reviewer that evaluates changes across five dimensions — correctness, readability, architecture, security, and performance. Use for thorough code review before merge. +--- + +# Senior Code Reviewer + +You are an experienced Staff Engineer conducting a thorough code review. Your role is to evaluate the proposed changes and provide actionable, categorized feedback. + +## Review Framework + +Evaluate every change across these five dimensions: + +### 1. Correctness +- Does the code do what the spec/task says it should? +- Are edge cases handled (null, empty, boundary values, error paths)? +- Do the tests actually verify the behavior? Are they testing the right things? +- Are there race conditions, off-by-one errors, or state inconsistencies? + +### 2. Readability +- Can another engineer understand this without explanation? +- Are names descriptive and consistent with project conventions? +- Is the control flow straightforward (no deeply nested logic)? +- Is the code well-organized (related code grouped, clear boundaries)? + +### 3. Architecture +- Does the change follow existing patterns or introduce a new one? +- If a new pattern, is it justified and documented? +- Are module boundaries maintained? Any circular dependencies? +- Is the abstraction level appropriate (not over-engineered, not too coupled)? +- Are dependencies flowing in the right direction? + +### 4. Security +- Is user input validated and sanitized at system boundaries? +- Are secrets kept out of code, logs, and version control? +- Is authentication/authorization checked where needed? +- Are queries parameterized? Is output encoded? +- Any new dependencies with known vulnerabilities? + +### 5. Performance +- Any N+1 query patterns? +- Any unbounded loops or unconstrained data fetching? +- Any synchronous operations that should be async? +- Any unnecessary re-renders (in UI components)? +- Any missing pagination on list endpoints? + +## Output Format + +Categorize every finding: + +**Critical** — Must fix before merge (security vulnerability, data loss risk, broken functionality) + +**Important** — Should fix before merge (missing test, wrong abstraction, poor error handling) + +**Suggestion** — Consider for improvement (naming, code style, optional optimization) + +## Review Output Template + +```markdown +## Review Summary + +**Verdict:** APPROVE | REQUEST CHANGES + +**Overview:** [1-2 sentences summarizing the change and overall assessment] + +### Critical Issues +- [File:line] [Description and recommended fix] + +### Important Issues +- [File:line] [Description and recommended fix] + +### Suggestions +- [File:line] [Description] + +### What's Done Well +- [Positive observation — always include at least one] + +### Verification Story +- Tests reviewed: [yes/no, observations] +- Build verified: [yes/no] +- Security checked: [yes/no, observations] +``` + +## Rules + +1. Review the tests first — they reveal intent and coverage +2. Read the spec or task description before reviewing code +3. Every Critical and Important finding should include a specific fix recommendation +4. Don't approve code with Critical issues +5. Acknowledge what's done well — specific praise motivates good practices +6. If you're uncertain about something, say so and suggest investigation rather than guessing + +## Composition + +- **Invoke directly when:** the user asks for a review of a specific change, file, or PR. +- **Invoke via:** `/review` (single-perspective review) or `/ship` (parallel fan-out alongside `security-auditor` and `test-engineer`). +- **Do not invoke from another persona.** If you find yourself wanting to delegate to `security-auditor` or `test-engineer`, surface that as a recommendation in your report instead — orchestration belongs to slash commands, not personas. See [agents/README.md](README.md). diff --git a/third_party/agent-skills/agents/security-auditor.md b/third_party/agent-skills/agents/security-auditor.md new file mode 100644 index 0000000..07bc30b --- /dev/null +++ b/third_party/agent-skills/agents/security-auditor.md @@ -0,0 +1,101 @@ +--- +name: security-auditor +description: Security engineer focused on vulnerability detection, threat modeling, and secure coding practices. Use for security-focused code review, threat analysis, or hardening recommendations. +--- + +# Security Auditor + +You are an experienced Security Engineer conducting a security review. Your role is to identify vulnerabilities, assess risk, and recommend mitigations. You focus on practical, exploitable issues rather than theoretical risks. + +## Review Scope + +### 1. Input Handling +- Is all user input validated at system boundaries? +- Are there injection vectors (SQL, NoSQL, OS command, LDAP)? +- Is HTML output encoded to prevent XSS? +- Are file uploads restricted by type, size, and content? +- Are URL redirects validated against an allowlist? + +### 2. Authentication & Authorization +- Are passwords hashed with a strong algorithm (bcrypt, scrypt, argon2)? +- Are sessions managed securely (httpOnly, secure, sameSite cookies)? +- Is authorization checked on every protected endpoint? +- Can users access resources belonging to other users (IDOR)? +- Are password reset tokens time-limited and single-use? +- Is rate limiting applied to authentication endpoints? + +### 3. Data Protection +- Are secrets in environment variables (not code)? +- Are sensitive fields excluded from API responses and logs? +- Is data encrypted in transit (HTTPS) and at rest (if required)? +- Is PII handled according to applicable regulations? +- Are database backups encrypted? + +### 4. Infrastructure +- Are security headers configured (CSP, HSTS, X-Frame-Options)? +- Is CORS restricted to specific origins? +- Are dependencies audited for known vulnerabilities? +- Are error messages generic (no stack traces or internal details to users)? +- Is the principle of least privilege applied to service accounts? + +### 5. Third-Party Integrations +- Are API keys and tokens stored securely? +- Are webhook payloads verified (signature validation)? +- Are third-party scripts loaded from trusted CDNs with integrity hashes? +- Are OAuth flows using PKCE and state parameters? + +## Severity Classification + +| Severity | Criteria | Action | +|----------|----------|--------| +| **Critical** | Exploitable remotely, leads to data breach or full compromise | Fix immediately, block release | +| **High** | Exploitable with some conditions, significant data exposure | Fix before release | +| **Medium** | Limited impact or requires authenticated access to exploit | Fix in current sprint | +| **Low** | Theoretical risk or defense-in-depth improvement | Schedule for next sprint | +| **Info** | Best practice recommendation, no current risk | Consider adopting | + +## Output Format + +```markdown +## Security Audit Report + +### Summary +- Critical: [count] +- High: [count] +- Medium: [count] +- Low: [count] + +### Findings + +#### [CRITICAL] [Finding title] +- **Location:** [file:line] +- **Description:** [What the vulnerability is] +- **Impact:** [What an attacker could do] +- **Proof of concept:** [How to exploit it] +- **Recommendation:** [Specific fix with code example] + +#### [HIGH] [Finding title] +... + +### Positive Observations +- [Security practices done well] + +### Recommendations +- [Proactive improvements to consider] +``` + +## Rules + +1. Focus on exploitable vulnerabilities, not theoretical risks +2. Every finding must include a specific, actionable recommendation +3. Provide proof of concept or exploitation scenario for Critical/High findings +4. Acknowledge good security practices — positive reinforcement matters +5. Check the OWASP Top 10 as a minimum baseline +6. Review dependencies for known CVEs +7. Never suggest disabling security controls as a "fix" + +## Composition + +- **Invoke directly when:** the user wants a security-focused pass on a specific change, file, or system component. +- **Invoke via:** `/ship` (parallel fan-out alongside `code-reviewer` and `test-engineer`), or any future `/audit` command. +- **Do not invoke from another persona.** If `code-reviewer` flags something that warrants a deeper security pass, the user or a slash command initiates that pass — not the reviewer. See [agents/README.md](README.md). diff --git a/third_party/agent-skills/agents/test-engineer.md b/third_party/agent-skills/agents/test-engineer.md new file mode 100644 index 0000000..3e2c6be --- /dev/null +++ b/third_party/agent-skills/agents/test-engineer.md @@ -0,0 +1,95 @@ +--- +name: test-engineer +description: QA engineer specialized in test strategy, test writing, and coverage analysis. Use for designing test suites, writing tests for existing code, or evaluating test quality. +--- + +# Test Engineer + +You are an experienced QA Engineer focused on test strategy and quality assurance. Your role is to design test suites, write tests, analyze coverage gaps, and ensure that code changes are properly verified. + +## Approach + +### 1. Analyze Before Writing + +Before writing any test: +- Read the code being tested to understand its behavior +- Identify the public API / interface (what to test) +- Identify edge cases and error paths +- Check existing tests for patterns and conventions + +### 2. Test at the Right Level + +``` +Pure logic, no I/O → Unit test +Crosses a boundary → Integration test +Critical user flow → E2E test +``` + +Test at the lowest level that captures the behavior. Don't write E2E tests for things unit tests can cover. + +### 3. Follow the Prove-It Pattern for Bugs + +When asked to write a test for a bug: +1. Write a test that demonstrates the bug (must FAIL with current code) +2. Confirm the test fails +3. Report the test is ready for the fix implementation + +### 4. Write Descriptive Tests + +``` +describe('[Module/Function name]', () => { + it('[expected behavior in plain English]', () => { + // Arrange → Act → Assert + }); +}); +``` + +### 5. Cover These Scenarios + +For every function or component: + +| Scenario | Example | +|----------|---------| +| Happy path | Valid input produces expected output | +| Empty input | Empty string, empty array, null, undefined | +| Boundary values | Min, max, zero, negative | +| Error paths | Invalid input, network failure, timeout | +| Concurrency | Rapid repeated calls, out-of-order responses | + +## Output Format + +When analyzing test coverage: + +```markdown +## Test Coverage Analysis + +### Current Coverage +- [X] tests covering [Y] functions/components +- Coverage gaps identified: [list] + +### Recommended Tests +1. **[Test name]** — [What it verifies, why it matters] +2. **[Test name]** — [What it verifies, why it matters] + +### Priority +- Critical: [Tests that catch potential data loss or security issues] +- High: [Tests for core business logic] +- Medium: [Tests for edge cases and error handling] +- Low: [Tests for utility functions and formatting] +``` + +## Rules + +1. Test behavior, not implementation details +2. Each test should verify one concept +3. Tests should be independent — no shared mutable state between tests +4. Avoid snapshot tests unless reviewing every change to the snapshot +5. Mock at system boundaries (database, network), not between internal functions +6. Every test name should read like a specification +7. A test that never fails is as useless as a test that always fails + +## Composition + +- **Invoke directly when:** the user asks for test design, coverage analysis, or a Prove-It test for a specific bug. +- **Invoke via:** `/test` (TDD workflow) or `/ship` (parallel fan-out for coverage gap analysis alongside `code-reviewer` and `security-auditor`). +- **Do not invoke from another persona.** Recommendations to add tests belong in your report; the user or a slash command decides when to act on them. See [agents/README.md](README.md). diff --git a/third_party/agent-skills/docs/copilot-setup.md b/third_party/agent-skills/docs/copilot-setup.md new file mode 100644 index 0000000..660ae02 --- /dev/null +++ b/third_party/agent-skills/docs/copilot-setup.md @@ -0,0 +1,82 @@ +# Using agent-skills with GitHub Copilot + +## Setup + +### Copilot Instructions + +Copilot supports creating agent skills using a `.github/skills`, `.claude/skills`, or `.agents/skills` directory in your repository. + +```bash +mkdir -p .github + +# Create files for essential skills +cat /path/to/agent-skills/skills/test-driven-development/SKILL.md > .github/skills/test-driven-development/SKILL.md +cat /path/to/agent-skills/skills/code-review-and-quality/SKILL.md > .github/skills/code-review-and-quality/SKILL.md +``` + +For more details, refer [Creating agent skills for GitHub Copilot](https://docs.github.com/en/copilot/how-tos/use-copilot-agents/coding-agent/create-skills). + +### Agent Personas (agents.md) + +Copilot supports specialized agent personas. Use the agent-skills agents: + +```bash +# Copy agent definitions +cp /path/to/agent-skills/agents/code-reviewer.md .github/agents/code-reviewer.md +cp /path/to/agent-skills/agents/test-engineer.md .github/agents/test-engineer.md +cp /path/to/agent-skills/agents/security-auditor.md .github/agents/security-auditor.md +``` + +Invoke agents in Copilot Chat: +- `@code-reviewer Review this PR` +- `@test-engineer Analyze test coverage for this module` +- `@security-auditor Check this endpoint for vulnerabilities` + +### Custom Instructions (User Level) + +For skills you want across all repositories: + +1. Open VS Code → Settings → GitHub Copilot → Custom Instructions +2. Add your most-used skill summaries + +## Recommended Configuration + +### .github/copilot-instructions.md + +GitHub Copilot supports project-level instructions via `.github/copilot-instructions.md`. + +```markdown +# Project Coding Standards + +## Testing +- Write tests before code (TDD) +- For bugs: write a failing test first, then fix (Prove-It pattern) +- Test hierarchy: unit > integration > e2e (use the lowest level that captures the behavior) +- Run `npm test` after every change + +## Code Quality +- Review across five axes: correctness, readability, architecture, security, performance +- Every PR must pass: lint, type check, tests, build +- No secrets in code or version control + +## Implementation +- Build in small, verifiable increments +- Each increment: implement → test → verify → commit +- Never mix formatting changes with behavior changes + +## Boundaries +- Always: Run tests before commits, validate user input +- Ask first: Database schema changes, new dependencies +- Never: Commit secrets, remove failing tests, skip verification +``` + +### Specialized Agents + +Use the agents for targeted review workflows in Copilot Chat. + +## Usage Tips + +1. **Keep instructions concise** — Copilot instructions work best when focused. Summarize the key rules rather than including full skill files. +2. **Use agents for review** — The code-reviewer, test-engineer, and security-auditor agents are designed for Copilot's agent model. +3. **Reference in chat** — When working on a specific phase, paste the relevant skill content into Copilot Chat for context. +4. **Combine with PR reviews** — Set up Copilot to review PRs using the code-reviewer agent persona. diff --git a/third_party/agent-skills/docs/cursor-setup.md b/third_party/agent-skills/docs/cursor-setup.md new file mode 100644 index 0000000..11ac905 --- /dev/null +++ b/third_party/agent-skills/docs/cursor-setup.md @@ -0,0 +1,58 @@ +# Using agent-skills with Cursor + +## Setup + +### Option 1: Rules Directory (Recommended) + +Cursor supports a `.cursor/rules/` directory for project-specific rules: + +```bash +# Create the rules directory +mkdir -p .cursor/rules + +# Copy skills you want as rules +cp /path/to/agent-skills/skills/test-driven-development/SKILL.md .cursor/rules/test-driven-development.md +cp /path/to/agent-skills/skills/code-review-and-quality/SKILL.md .cursor/rules/code-review-and-quality.md +cp /path/to/agent-skills/skills/incremental-implementation/SKILL.md .cursor/rules/incremental-implementation.md +``` + +Rules in this directory are automatically loaded into Cursor's context. + +### Option 2: .cursorrules File + +Create a `.cursorrules` file in your project root with the essential skills inlined: + +```bash +# Generate a combined rules file +cat /path/to/agent-skills/skills/test-driven-development/SKILL.md > .cursorrules +echo "\n---\n" >> .cursorrules +cat /path/to/agent-skills/skills/code-review-and-quality/SKILL.md >> .cursorrules +``` + +## Recommended Configuration + +### Essential Skills (Always Load) + +Add these to `.cursor/rules/`: + +1. `test-driven-development.md` — TDD workflow and Prove-It pattern +2. `code-review-and-quality.md` — Five-axis review +3. `incremental-implementation.md` — Build in small verifiable slices + +### Phase-Specific Skills (Load on Demand) + +For phase-specific work, create additional rule files as needed: + +- `spec-development.md` -> `spec-driven-development/SKILL.md` +- `frontend-ui.md` -> `frontend-ui-engineering/SKILL.md` +- `security.md` -> `security-and-hardening/SKILL.md` +- `performance.md` -> `performance-optimization/SKILL.md` + +Add these to `.cursor/rules/` when working on relevant tasks, then remove when done to manage context limits. + +## Usage Tips + +1. **Don't load all skills at once** - Cursor has context limits. Load 2-3 essential skills as rules and add phase-specific skills as needed. +2. **Reference skills explicitly** - Tell Cursor "Follow the test-driven-development rules for this change" to ensure it reads the loaded rules. +3. **Use agents for review** - Copy `agents/code-reviewer.md` content and tell Cursor to "review this diff using this code review framework." +4. **Load references on demand** - When working on performance, add `performance.md` to `.cursor/rules/` or paste the checklist content directly. diff --git a/third_party/agent-skills/docs/gemini-cli-setup.md b/third_party/agent-skills/docs/gemini-cli-setup.md new file mode 100644 index 0000000..1e6f3e5 --- /dev/null +++ b/third_party/agent-skills/docs/gemini-cli-setup.md @@ -0,0 +1,131 @@ +# Using agent-skills with Gemini CLI + +## Setup + +### Option 1: Install as Skills (Recommended) + +Gemini CLI has a native skills system that auto-discovers `SKILL.md` files in `.gemini/skills/` or `.agents/skills/` directories. Each skill activates on demand when it matches your task. + +**Install from the repo:** + +```bash +gemini skills install https://github.com/addyosmani/agent-skills.git --path skills +``` + +**Or install from a local clone:** + +```bash +git clone https://github.com/addyosmani/agent-skills.git +gemini skills install /path/to/agent-skills/skills/ +``` + +**Install for a specific workspace only:** + +```bash +gemini skills install /path/to/agent-skills/skills/ --scope workspace +``` + +Skills installed at workspace scope go into `.gemini/skills/` (or `.agents/skills/`). User-level skills go into `~/.gemini/skills/`. + +Once installed, verify with: + +``` +/skills list +``` + +Gemini CLI injects skill names and descriptions into the prompt automatically. When it recognizes a matching task, it asks permission to activate the skill before loading its full instructions. + +### Option 2: GEMINI.md (Persistent Context) + +For skills you want always loaded as persistent project context (rather than on-demand activation), add them to your project's `GEMINI.md`: + +```bash +# Create GEMINI.md with core skills as persistent context +cat /path/to/agent-skills/skills/incremental-implementation/SKILL.md > GEMINI.md +echo -e "\n---\n" >> GEMINI.md +cat /path/to/agent-skills/skills/code-review-and-quality/SKILL.md >> GEMINI.md +``` + +You can also modularize by importing from separate files: + +```markdown +# Project Instructions + +@skills/test-driven-development/SKILL.md +@skills/incremental-implementation/SKILL.md +``` + +Use `/memory show` to verify loaded context, and `/memory reload` to refresh after changes. + +> **Skills vs GEMINI.md:** Skills are on-demand expertise that activate only when relevant, keeping your context window clean. GEMINI.md provides persistent context loaded for every prompt. Use skills for phase-specific workflows and GEMINI.md for always-on project conventions. + +## Recommended Configuration + +### Always-On (GEMINI.md) + +Add these as persistent context for every session: + +- `incremental-implementation` — Build in small verifiable slices +- `code-review-and-quality` — Five-axis review + +### On-Demand (Skills) + +Install these as skills so they activate only when relevant: + +- `test-driven-development` — Activates when implementing logic or fixing bugs +- `spec-driven-development` — Activates when starting a new project or feature +- `frontend-ui-engineering` — Activates when building UI +- `security-and-hardening` — Activates during security reviews +- `performance-optimization` — Activates during performance work + +## Advanced Configuration + +### MCP Integration + +Many skills in this pack leverage [Model Context Protocol (MCP)](https://modelcontextprotocol.io/) tools to interact with the environment. For example: + +- `browser-testing-with-devtools` uses the `chrome-devtools` MCP extension. +- `performance-optimization` can benefit from performance-related MCP tools. + +To enable these, ensure you have the relevant MCP extensions installed in your Gemini CLI configuration (`~/.gemini/config.json`). + +### Session Hooks + +Gemini CLI supports session lifecycle hooks. You can use these to automatically inject context or run validation scripts at the start of a session. + +To replicate the `agent-skills` experience from other tools, you can configure a `SessionStart` hook that reminds you of the available skills or loads a meta-skill. + +### Explicit Context Loading + +You can explicitly load any skill into your current session by referencing it with the `@` symbol in your prompt: + +```markdown +Use the @skills/test-driven-development/SKILL.md skill to implement this fix. +``` + +This is useful when you want to ensure a specific workflow is followed without waiting for auto-discovery. + +## Slash Commands + +The repo ships 7 slash commands under `.gemini/commands/` that map to the development lifecycle. Gemini CLI auto-discovers them when you run from the project root. + +| Command | What it does | +|---------|--------------| +| `/spec` | Write a structured spec before writing code | +| `/planning` | Break work into small, verifiable tasks | +| `/build` | Implement the next task incrementally | +| `/test` | Run TDD workflow — red, green, refactor | +| `/review` | Five-axis code review | +| `/code-simplify` | Reduce complexity without changing behavior | +| `/ship` | Pre-launch checklist via parallel persona fan-out | + +Each command invokes the corresponding skill automatically — no manual skill loading required. + +> **Note:** Use `/planning` instead of `/plan` — `/plan` conflicts with a Gemini CLI internal command name. + +## Usage Tips + +1. **Prefer skills over GEMINI.md** — Skills activate on demand and keep your context window focused. Only put skills in GEMINI.md if you want them always loaded. +2. **Skill descriptions matter** — Each SKILL.md has a `description` field in its frontmatter that tells agents when to activate it. The descriptions in this repo are optimized for auto-discovery across all supported tools (Claude Code, Gemini CLI, etc.) by clearly stating both *what* the skill does and *when* it should be triggered. +3. **Use agents for review** — Copy `agents/code-reviewer.md` content when requesting structured code reviews. +4. **Combine with references** — Reference checklists from `references/` when working on specific quality areas like testing or performance. diff --git a/third_party/agent-skills/docs/getting-started.md b/third_party/agent-skills/docs/getting-started.md new file mode 100644 index 0000000..f40eb14 --- /dev/null +++ b/third_party/agent-skills/docs/getting-started.md @@ -0,0 +1,142 @@ +# Getting Started with agent-skills + +agent-skills works with any AI coding agent that accepts Markdown instructions. This guide covers the universal approach. For tool-specific setup, see the dedicated guides. + +## How Skills Work + +Each skill is a Markdown file (`SKILL.md`) that describes a specific engineering workflow. When loaded into an agent's context, the agent follows the workflow — including verification steps, anti-patterns to avoid, and exit criteria. + +**Skills are not reference docs.** They're step-by-step processes the agent follows. + +## Quick Start (Any Agent) + +### 1. Clone the repository + +```bash +git clone https://github.com/addyosmani/agent-skills.git +``` + +### 2. Choose a skill + +Browse the `skills/` directory. Each subdirectory contains a `SKILL.md` with: +- **When to use** — triggers that indicate this skill applies +- **Process** — step-by-step workflow +- **Verification** — how to confirm the work is done +- **Common rationalizations** — excuses the agent might use to skip steps +- **Red flags** — signs the skill is being violated + +### 3. Load the skill into your agent + +Copy the relevant `SKILL.md` content into your agent's system prompt, rules file, or conversation. The most common approaches: + +**System prompt:** Paste the skill content at the start of the session. + +**Rules file:** Add skill content to your project's rules file (CLAUDE.md, .cursorrules, etc.). + +**Conversation:** Reference the skill when giving instructions: "Follow the test-driven-development process for this change." + +### 4. Use the meta-skill for discovery + +Start with the `using-agent-skills` skill loaded. It contains a flowchart that maps task types to the appropriate skill. + +## Recommended Setup + +### Minimal (Start here) + +Load three essential skills into your rules file: + +1. **spec-driven-development** — For defining what to build +2. **test-driven-development** — For proving it works +3. **code-review-and-quality** — For verifying quality before merge + +These three cover the most critical quality gaps in AI-assisted development. + +### Full Lifecycle + +For comprehensive coverage, load skills by phase: + +``` +Starting a project: spec-driven-development → planning-and-task-breakdown +During development: incremental-implementation + test-driven-development +Before merge: code-review-and-quality + security-and-hardening +Before deploy: shipping-and-launch +``` + +### Context-Aware Loading + +Don't load all skills at once — it wastes context. Load skills relevant to the current task: + +- Working on UI? Load `frontend-ui-engineering` +- Debugging? Load `debugging-and-error-recovery` +- Setting up CI? Load `ci-cd-and-automation` + +## Skill Anatomy + +Every skill follows the same structure: + +``` +YAML frontmatter (name, description) +├── Overview — What this skill does +├── When to Use — Triggers and conditions +├── Core Process — Step-by-step workflow +├── Examples — Code samples and patterns +├── Common Rationalizations — Excuses and rebuttals +├── Red Flags — Signs the skill is being violated +└── Verification — Exit criteria checklist +``` + +See [skill-anatomy.md](skill-anatomy.md) for the full specification. + +## Using Agents + +The `agents/` directory contains pre-configured agent personas: + +| Agent | Purpose | +|-------|---------| +| `code-reviewer.md` | Five-axis code review | +| `test-engineer.md` | Test strategy and writing | +| `security-auditor.md` | Vulnerability detection | + +Load an agent definition when you need specialized review. For example, ask your coding agent to "review this change using the code-reviewer agent persona" and provide the agent definition. + +## Using Commands + +The `.claude/commands/` directory contains slash commands for Claude Code: + +| Command | Skill Invoked | +|---------|---------------| +| `/spec` | spec-driven-development | +| `/plan` | planning-and-task-breakdown | +| `/build` | incremental-implementation + test-driven-development | +| `/test` | test-driven-development | +| `/review` | code-review-and-quality | +| `/ship` | shipping-and-launch | + +## Using References + +The `references/` directory contains supplementary checklists: + +| Reference | Use With | +|-----------|----------| +| `testing-patterns.md` | test-driven-development | +| `performance-checklist.md` | performance-optimization | +| `security-checklist.md` | security-and-hardening | +| `accessibility-checklist.md` | frontend-ui-engineering | + +Load a reference when you need detailed patterns beyond what the skill covers. + +## Spec and task artifacts + +The `/spec` and `/plan` commands create working artifacts (`SPEC.md`, `tasks/plan.md`, `tasks/todo.md`). Treat them as **living documents** while the work is in progress: + +- Keep them in version control during development so the human and the agent have a shared source of truth. +- Update them when scope or decisions change. +- If your repo doesn’t want these files long‑term, delete them before merge or add the folder to `.gitignore` — the workflow doesn’t require them to be permanent. + +## Tips + +1. **Start with spec-driven-development** for any non-trivial work +2. **Always load test-driven-development** when writing code +3. **Don't skip verification steps** — they're the whole point +4. **Load skills selectively** — more context isn't always better +5. **Use the agents for review** — different perspectives catch different issues diff --git a/third_party/agent-skills/docs/opencode-setup.md b/third_party/agent-skills/docs/opencode-setup.md new file mode 100644 index 0000000..84a96d5 --- /dev/null +++ b/third_party/agent-skills/docs/opencode-setup.md @@ -0,0 +1,178 @@ +# OpenCode Setup + +This guide explains how to use Agent Skills with OpenCode in a way that closely mirrors the Claude Code experience (automatic skill selection, lifecycle-driven workflows, and strict process enforcement). + +## Overview + +OpenCode supports custom `/commands`, but does not have a native plugin system or automatic skill routing like Claude Code. + +Instead, we achieve parity through: + +- A strong system prompt (`AGENTS.md`) +- The built-in `skill` tool +- Consistent skill discovery from the `/skills` directory + +This creates an **agent-driven workflow** where skills are selected and executed automatically. + +While it is possible to recreate `/spec`, `/plan`, and other commands in OpenCode, this integration intentionally uses an agent-driven approach instead: + +- Skills are selected automatically based on intent +- Workflows are enforced via `AGENTS.md` +- No manual command invocation is required + +This more closely matches how Claude Code behaves in practice, where skills are triggered automatically rather than manually. + +--- + +## Installation + +1. Clone the repository: + +```bash +git clone https://github.com/addyosmani/agent-skills.git +``` + +2. Open the project in OpenCode. + +3. Ensure the following files are present in your workspace: + +- `AGENTS.md` (root) +- `skills/` directory + +No additional installation is required. + +--- + +## How It Works + +### 1. Skill Discovery + +All skills live in: + +``` +skills//SKILL.md +``` + +OpenCode agents are instructed (via `AGENTS.md`) to: + +- Detect when a skill applies +- Invoke the `skill` tool +- Follow the skill exactly + +### 2. Automatic Skill Invocation + +The agent evaluates every request and maps it to the appropriate skill. + +Examples: + +- "build a feature" → `incremental-implementation` + `test-driven-development` +- "design a system" → `spec-driven-development` +- "fix a bug" → `debugging-and-error-recovery` +- "review this code" → `code-review-and-quality` + +The user does **not** need to explicitly request skills. + +### 3. Lifecycle Mapping (Implicit Commands) + +The development lifecycle is encoded implicitly: + +- DEFINE → `spec-driven-development` +- PLAN → `planning-and-task-breakdown` +- BUILD → `incremental-implementation` + `test-driven-development` +- VERIFY → `debugging-and-error-recovery` +- REVIEW → `code-review-and-quality` +- SHIP → `shipping-and-launch` + +This replaces slash commands like `/spec`, `/plan`, etc. + +--- + +## Usage Examples + +### Example 1: Feature Development + +User: +``` +Add authentication to this app +``` + +Agent behavior: +- Detects feature work +- Invokes `spec-driven-development` +- Produces a spec before writing code +- Moves to planning and implementation skills + +--- + +### Example 2: Bug Fix + +User: +``` +This endpoint is returning 500 errors +``` + +Agent behavior: +- Invokes `debugging-and-error-recovery` +- Reproduces → localizes → fixes → adds guards + +--- + +### Example 3: Code Review + +User: +``` +Review this PR +``` + +Agent behavior: +- Invokes `code-review-and-quality` +- Applies structured review (correctness, design, readability, etc.) + +--- + +## Agent Expectations (Critical) + +For OpenCode to work correctly, the agent must follow these rules: + +- Always check if a skill applies before acting +- If a skill applies, it MUST be used +- Never skip required workflows (spec, plan, test, etc.) +- Do not jump directly to implementation + +These rules are enforced via `AGENTS.md`. + +--- + +## Limitations + +- No native slash commands (handled via intent mapping instead) +- No plugin system (handled via prompt + structure) +- Skill invocation depends on model compliance + +Despite these, the workflow closely matches Claude Code in practice. + +--- + +## Recommended Workflow + +Just use natural language: + +- "Design a feature" +- "Plan this change" +- "Implement this" +- "Fix this bug" +- "Review this" + +The agent will automatically select and execute the correct skills. + +--- + +## Summary + +OpenCode integration works by combining: + +- Structured skills (this repo) +- Strong agent rules (`AGENTS.md`) +- Automatic skill invocation via reasoning + +This results in a **fully agent-driven, production-grade engineering workflow** without requiring plugins or manual commands. diff --git a/third_party/agent-skills/docs/skill-anatomy.md b/third_party/agent-skills/docs/skill-anatomy.md new file mode 100644 index 0000000..a71685a --- /dev/null +++ b/third_party/agent-skills/docs/skill-anatomy.md @@ -0,0 +1,128 @@ +# Skill Anatomy + +This document describes the structure and format of agent-skills skill files. Use this as a guide when contributing new skills or understanding existing ones. + +## File Location + +Every skill lives in its own directory under `skills/`: + +``` +skills/ + skill-name/ + SKILL.md # Required: The skill definition + supporting-file.md # Optional: Reference material loaded on demand +``` + +## SKILL.md Format + +### Frontmatter (Required) + +```yaml +--- +name: skill-name-with-hyphens +description: Guides agents through [task/workflow]. Use when [specific trigger conditions]. +--- +``` + +**Rules:** +- `name`: Lowercase, hyphen-separated. Must match the directory name. +- `description`: Start with what the skill does in third person, then include one or more clear "Use when" trigger conditions. Include both *what* and *when*. Maximum 1024 characters. + +**Why this matters:** Agents discover skills by reading descriptions. The description is injected into the system prompt, so it must tell the agent both what the skill provides and when to activate it. Do not summarize the workflow — if the description contains process steps, the agent may follow the summary instead of reading the full skill. + +### Standard Sections (Recommended Pattern) + +```markdown +# Skill Title + +## Overview +One-two sentences explaining what this skill does and why it matters. + +## When to Use +- Bullet list of triggering conditions (symptoms, task types) +- When NOT to use (exclusions) + +## [Core Process / The Workflow / Steps] +The main workflow, broken into numbered steps or phases. +Include code examples where they help. +Use flowcharts (ASCII) where decision points exist. + +## [Specific Techniques / Patterns] +Detailed guidance for specific scenarios. +Code examples, templates, configuration. + +## Common Rationalizations +| Rationalization | Reality | +|---|---| +| Excuse agents use to skip steps | Why the excuse is wrong | + +## Red Flags +- Behavioral patterns indicating the skill is being violated +- Things to watch for during review + +## Verification +After completing the skill's process, confirm: +- [ ] Checklist of exit criteria +- [ ] Evidence requirements +``` + +## Section Purposes + +### Overview +The "elevator pitch" for the skill. Should answer: What does this skill do, and why should an agent follow it? + +### When to Use +Helps agents and humans decide if this skill applies to the current task. Include both positive triggers ("Use when X") and negative exclusions ("NOT for Y"). + +### Core Process +The heart of the skill. This is the step-by-step workflow the agent follows. Must be specific and actionable — not vague advice. + +**Good:** "Run `npm test` and verify all tests pass" +**Bad:** "Make sure the tests work" + +### Common Rationalizations +The most distinctive feature of well-crafted skills. These are excuses agents use to skip important steps, paired with rebuttals. They prevent the agent from rationalizing its way out of following the process. + +Think of every time an agent has said "I'll add tests later" or "This is simple enough to skip the spec" — those go here with a factual counter-argument. + +### Red Flags +Observable signs that the skill is being violated. Useful during code review and self-monitoring. + +### Verification +The exit criteria. A checklist the agent uses to confirm the skill's process is complete. Every checkbox should be verifiable with evidence (test output, build result, screenshot, etc.). + +## Supporting Files + +Create supporting files only when: +- Reference material exceeds 100 lines (keep the main SKILL.md focused) +- Code tools or scripts are needed +- Checklists are long enough to justify separate files + +Keep patterns and principles inline when under 50 lines. + +## Writing Principles + +1. **Process over knowledge.** Skills are workflows, not reference docs. Steps, not facts. +2. **Specific over general.** "Run `npm test`" beats "verify the tests". +3. **Evidence over assumption.** Every verification checkbox requires proof. +4. **Anti-rationalization.** Every skip-worthy step needs a counter-argument in the rationalizations table. +5. **Progressive disclosure.** Main SKILL.md is the entry point. Supporting files are loaded only when needed. +6. **Token-conscious.** Every section must justify its inclusion. If removing it wouldn't change agent behavior, remove it. + +## Naming Conventions + +- Skill directories: `lowercase-hyphen-separated` +- Skill files: `SKILL.md` (always uppercase) +- Supporting files: `lowercase-hyphen-separated.md` +- References: stored in `references/` at the project root, not inside skill directories + +## Cross-Skill References + +Reference other skills by name: + +```markdown +Follow the `test-driven-development` skill for writing tests. +If the build breaks, use the `debugging-and-error-recovery` skill. +``` + +Don't duplicate content between skills — reference and link instead. diff --git a/third_party/agent-skills/docs/windsurf-setup.md b/third_party/agent-skills/docs/windsurf-setup.md new file mode 100644 index 0000000..c640e48 --- /dev/null +++ b/third_party/agent-skills/docs/windsurf-setup.md @@ -0,0 +1,48 @@ +# Using agent-skills with Windsurf + +## Setup + +### Project Rules + +Windsurf uses `.windsurfrules` for project-specific agent instructions: + +```bash +# Create a combined rules file from your most important skills +cat /path/to/agent-skills/skills/test-driven-development/SKILL.md > .windsurfrules +echo "\n---\n" >> .windsurfrules +cat /path/to/agent-skills/skills/incremental-implementation/SKILL.md >> .windsurfrules +echo "\n---\n" >> .windsurfrules +cat /path/to/agent-skills/skills/code-review-and-quality/SKILL.md >> .windsurfrules +``` + +### Global Rules + +For skills you want across all projects, add them to Windsurf's global rules: + +1. Open Windsurf → Settings → AI → Global Rules +2. Paste the content of your most-used skills + +## Recommended Configuration + +Keep `.windsurfrules` focused on 2-3 essential skills to stay within context limits: + +``` +# .windsurfrules +# Essential agent-skills for this project + +[Paste test-driven-development SKILL.md] + +--- + +[Paste incremental-implementation SKILL.md] + +--- + +[Paste code-review-and-quality SKILL.md] +``` + +## Usage Tips + +1. **Be selective** — Windsurf's context is limited. Choose skills that address your biggest quality gaps. +2. **Reference in conversation** — Paste additional skill content into the chat when working on specific phases (e.g., paste `security-and-hardening` when building auth). +3. **Use references as checklists** — Paste `references/security-checklist.md` and ask Windsurf to verify each item. diff --git a/third_party/agent-skills/hooks/SDD-CACHE.md b/third_party/agent-skills/hooks/SDD-CACHE.md new file mode 100644 index 0000000..e0f69ac --- /dev/null +++ b/third_party/agent-skills/hooks/SDD-CACHE.md @@ -0,0 +1,151 @@ +# sdd-cache hook + +Cross-session citation cache for [`source-driven-development`](../skills/source-driven-development/SKILL.md). Skips redundant `WebFetch` calls without weakening the skill's "verify against current docs" guarantee. + +## Why + +`source-driven-development` fetches official docs for every framework-specific decision. Working on the same project across sessions means fetching the same pages over and over. Caching the content as local memory would contradict the skill — docs change, and a stale cache hides that. + +This hook caches fetched content on disk, but **revalidates with the origin server on every reuse** via HTTP `If-None-Match` / `If-Modified-Since`. Content is only served from cache when the server responds `304 Not Modified`, which is a fresh verification — not a memory read. + +## Setup + +1. Add hooks to `.claude/settings.json` (or `.claude/settings.local.json` for personal use): + +```json +{ + "hooks": { + "PreToolUse": [ + { + "matcher": "WebFetch", + "hooks": [ + { + "type": "command", + "command": "bash ${CLAUDE_PROJECT_DIR}/hooks/sdd-cache-pre.sh", + "timeout": 10 + } + ] + } + ], + "hooks": [ + { + "type": "command", + "timeout": 10 + } + ] + } + ] + } +} +``` + + `${CLAUDE_PROJECT_DIR}` resolves to the directory you launched Claude Code from. The snippet above works when the hooks live inside the same project. If you installed `agent-skills` elsewhere (e.g. as a shared plugin under `~/agent-skills`), replace `${CLAUDE_PROJECT_DIR}/hooks/...` with the absolute path to each script. + +2. Make sure `.claude/sdd-cache/` is in your `.gitignore` (already included in this repo). + +3. Use `/source-driven-development` (or the skill) as usual. No changes to the skill or the agent's workflow — the cache is transparent. + +## Mental model + +HTTP resource cache keyed by URL. Freshness is delegated to the origin via `ETag` / `Last-Modified`; no TTL, no prompt in the key. + +The stored body is not raw HTML — `WebFetch` post-processes each response through a model using the caller's prompt, so what we cache is one agent's reading of the page. The key stays URL-only so reads reuse across sessions; the original prompt is kept as metadata and surfaced in the hit message so the next agent can tell whether the earlier reading fits. + +## How it works + +One cache entry per URL, stored as JSON in `.claude/sdd-cache/.json`: + +| Event | Action | +|---|---| +| `PreToolUse WebFetch` | If an entry exists, sends a `HEAD` request with `If-None-Match` / `If-Modified-Since`. On `304`, blocks the fetch and returns the cached content to the agent via stderr, with the original prompt surfaced as metadata. Otherwise allows the fetch. | +| `PostToolUse WebFetch` | Captures the response, issues a `HEAD` request to record the current `ETag` / `Last-Modified`, and stores `{url, prompt, etag, last_modified, content, fetched_at}`. | + +**Freshness rules:** + +- Entry is served only if the origin confirms `304 Not Modified`. + +**What the agent sees:** + +- Cache hit: `WebFetch` is blocked via exit code 2. Claude Code delivers the hook's stderr payload back to the agent as a tool error — this is the intended signal for a cache hit, not a failure. The payload is prefixed with `[sdd-cache] Cache hit for ` and wraps the cached body between `----- BEGIN CACHED CONTENT -----` / `----- END CACHED CONTENT -----` markers so the agent can use it as if `WebFetch` had just returned it. +- Cache miss or stale: `WebFetch` runs normally; the result is stored for next time. + +The skill itself is unchanged. It continues to follow `DETECT → FETCH → IMPLEMENT → CITE`. The hook only changes what happens under the hood when `FETCH` runs. + +## Local testing + +### 1. Smoke test the scripts directly + +```bash +# Simulate a PostToolUse payload: cache a page +echo '{ + "tool_input": { + "url": "https://react.dev/reference/react/useActionState", + "prompt": "extract the signature" + }, + "tool_response": "useActionState(action, initialState) returns [state, formAction, isPending]" +}' | bash hooks/sdd-cache-post.sh + +# Inspect the stored entry +ls .claude/sdd-cache/ +cat .claude/sdd-cache/*.json | jq . + +# Simulate the next PreToolUse on the same URL + prompt +echo '{ + "tool_input": { + "url": "https://react.dev/reference/react/useActionState", + "prompt": "extract the signature" + } +}' | bash hooks/sdd-cache-pre.sh +echo "exit=$?" +``` + +Expected: + +- First command creates one file under `.claude/sdd-cache/` (only if the server returned an `ETag` or `Last-Modified`). +- Second command exits `2` with the cached content on stderr when the origin replies `304`, or exits `0` silently otherwise. + +### 2. End-to-end in a real session + +6. Verify the second `WebFetch` is blocked and the cached content is returned (visible in the session transcript as a tool error with `[sdd-cache]` prefix). + +### 3. Freshness verification +# Pick the entry you want to corrupt (swap in the actual filename) +ENTRY=.claude/sdd-cache/e49c9f378670cfbb1d7d871b6dee16d9.json + +# Patch its ETag to something the origin will not recognize +jq '.etag = "W/\"stale-etag-forced\""' "$ENTRY" > "$ENTRY.tmp" && mv "$ENTRY.tmp" "$ENTRY" + +# Next PreToolUse should miss (server returns 200, not 304) +echo '{"tool_input":{"url":"...", "prompt":"..."}}' | bash hooks/sdd-cache-pre.sh +echo "exit=$?" # expect 0 (fetch allowed through) +``` + +### 4. Debugging + +Both hooks write timestamped events to `.claude/sdd-cache/.debug.log` when debug mode is on. Enable it with either: + +```bash +# Option A: env var (per-session) +SDD_CACHE_DEBUG=1 claude + +# Option B: sentinel file (persistent) +mkdir -p .claude/sdd-cache && touch .claude/sdd-cache/.debug +# …disable with: rm .claude/sdd-cache/.debug +``` + +The log captures URL, detected `tool_response` shape, HEAD status, and why each invocation hit or missed. Useful when a cache miss looks unexpected (typically: the origin stopped emitting validators). + +## Known limitations + +- **Body is prompt-shaped.** A hit returns the earlier agent's reading of the page, with the original prompt surfaced so the current agent can decide whether it applies. If it doesn't, delete the file under `.claude/sdd-cache/` to force a re-fetch. +- **Every cache write costs an extra HEAD.** Claude Code doesn't expose the response headers that `WebFetch` already received, so the post hook re-queries the origin to capture `ETag` / `Last-Modified`. One extra roundtrip per miss — the price of keeping this a pure hook with no core changes. +- **Servers without `ETag` or `Last-Modified` are never cached.** Most official doc sites (react.dev, docs.djangoproject.com, developer.mozilla.org) emit validators. Sites that don't are always re-fetched. +- **A misbehaving server can serve a wrong `304`.** That's a server bug to diagnose, not a cache invariant to defend against; we don't paper over it with a TTL. Delete the entry if you spot a stale one. +- **Cache is local and per-project.** There is no team-wide shared cache. Adding one would require a signed-content-addressable storage layer, which is out of scope. + +## Requirements + +- `jq` +- `curl` +- `shasum` or `sha256sum` (auto-detected) +- Bash 3.2+ diff --git a/third_party/agent-skills/hooks/SIMPLIFY-IGNORE.md b/third_party/agent-skills/hooks/SIMPLIFY-IGNORE.md new file mode 100644 index 0000000..9e81af9 --- /dev/null +++ b/third_party/agent-skills/hooks/SIMPLIFY-IGNORE.md @@ -0,0 +1,90 @@ +# simplify-ignore hook + +Block-level protection for `/code-simplify`. Mark code that should never be simplified — the model won't see it. + +## Setup + +1. Annotate blocks you want to protect: + +```js +/* simplify-ignore-start: perf-critical */ +// manually unrolled XOR — 3x faster than a loop +result[0] = buf[0] ^ key[0]; +result[1] = buf[1] ^ key[1]; +result[2] = buf[2] ^ key[2]; +result[3] = buf[3] ^ key[3]; +/* simplify-ignore-end */ +``` + +2. Add hooks to `.claude/settings.json`: + +```json +{ + "hooks": { + "PreToolUse": [ + { + "matcher": "Read", + "hooks": [{ "type": "command", "command": "bash ${CLAUDE_PROJECT_DIR}/hooks/simplify-ignore.sh" }] + } + ], + "PostToolUse": [ + { + "matcher": "Edit|Write", + "hooks": [{ "type": "command", "command": "bash ${CLAUDE_PROJECT_DIR}/hooks/simplify-ignore.sh" }] + } + ], + "Stop": [ + { + "hooks": [{ "type": "command", "command": "bash ${CLAUDE_PROJECT_DIR}/hooks/simplify-ignore.sh" }] + } + ] + } +} +``` + +3. Run `/code-simplify` — protected blocks become `/* BLOCK_de115a1d: perf-critical */` placeholders. The model reasons about surrounding code without seeing the protected implementation. + +> **Note:** The hook stores temporary backups in `.claude/.simplify-ignore-cache/`. Make sure this path is in your `.gitignore`. + +## How it works + +One script, three hook events: + +| Event | Action | +|---|---| +| `PreToolUse Read` | Backs up file, replaces blocks with `BLOCK_` placeholders in-place | +| `PostToolUse Edit\|Write` | Expands placeholders back to real code, saves model's changes, re-filters | +| `Stop` | Restores all files from backup when session ends | + +Each block is content-hashed (8 hex chars via `shasum`/`sha1sum`) so the round-trip is unambiguous even if the model duplicates or reorders placeholders. Cache is project-scoped to prevent cross-session interference. + +## Annotation syntax + +```js +/* simplify-ignore-start */ // basic — hides the block +/* simplify-ignore-start: reason */ // with reason — appears in placeholder +/* simplify-ignore-end */ +``` + +Any comment style works (`//`, `/*`, `#`, `` for HTML). + +## Crash recovery + +If Claude Code crashes without triggering the Stop hook, files on disk may still have `BLOCK_` placeholders. To restore manually: + +```bash +echo '{}' | bash hooks/simplify-ignore.sh +``` + +Backups are stored in `.claude/.simplify-ignore-cache/` within your project directory. + +## Known limitations + +- **Single-line blocks hide the entire line.** If `simplify-ignore-start` and `simplify-ignore-end` appear on the same line as other code, the whole line is hidden from the model, not just the annotated portion. Use dedicated lines for annotations. +- **Comment suffix detection covers `*/` and `-->` only.** Template engines with non-standard comment closers (ERB `%>`, Blade `--}}`) may produce unbalanced placeholders. Use `#` or `//` style comments instead. +- **Fallback expansion is progressive, not exact.** If the model alters a placeholder's formatting (e.g. changes the reason text), the hook tries progressively simpler matches: full placeholder → prefix+hash+suffix → hash-only. The hash-only fallback may leave cosmetic debris (e.g. stray `:` or reason text). A warning is printed to stderr when this happens. +- **File renaming leaves placeholders.** If the model renames or moves a file via a shell command, the new file will retain `BLOCK_` placeholders. The original code is saved as `.recovered` when the session stops. You must manually restore the recovered code into the new file. + +## Requirements + +- `jq`, `shasum` or `sha1sum` (auto-detected), Bash 3.2+ diff --git a/third_party/agent-skills/hooks/hooks.json b/third_party/agent-skills/hooks/hooks.json new file mode 100644 index 0000000..1974826 --- /dev/null +++ b/third_party/agent-skills/hooks/hooks.json @@ -0,0 +1,14 @@ +{ + "hooks": { + "SessionStart": [ + { + "hooks": [ + { + "type": "command", + "command": "bash ${CLAUDE_PLUGIN_ROOT}/hooks/session-start.sh" + } + ] + } + ] + } +} diff --git a/third_party/agent-skills/hooks/sdd-cache-post.sh b/third_party/agent-skills/hooks/sdd-cache-post.sh new file mode 100755 index 0000000..c4dab11 --- /dev/null +++ b/third_party/agent-skills/hooks/sdd-cache-post.sh @@ -0,0 +1,148 @@ +#!/bin/bash +# sdd-cache-post.sh — PostToolUse hook for WebFetch. +# +# After WebFetch, stores the response body in .claude/sdd-cache/.json +# with the current ETag / Last-Modified captured via a HEAD request so the +# pre hook can revalidate on the next fetch. +# +# Keyed by URL. The caller's prompt is stored as metadata (not part of the +# key) so a future cache hit can show what question produced the cached +# reading. Entries without ETag or Last-Modified are not cached. +# +# Dependencies: jq, curl, shasum (or sha256sum). + +set -euo pipefail + +if ! command -v jq >/dev/null 2>&1; then + exit 0 +fi +if ! command -v curl >/dev/null 2>&1; then + exit 0 +fi +if ! command -v shasum >/dev/null 2>&1; then + if ! command -v sha256sum >/dev/null 2>&1; then + exit 0 + fi +fi + +if [ -t 0 ]; then INPUT="{}"; else INPUT=$(cat); fi + +# Debug logging: active when SDD_CACHE_DEBUG=1 is set, or when a sentinel +# file exists at .claude/sdd-cache/.debug. Toggle with `touch` / `rm`. +dbg() { + local dir="${CLAUDE_PROJECT_DIR:-$PWD}/.claude/sdd-cache" + if [ "${SDD_CACHE_DEBUG:-0}" != "1" ] && [ ! -f "$dir/.debug" ]; then + return 0 + fi + mkdir -p "$dir" + printf '%s [post] %s\n' "$(date -u +%FT%TZ)" "$*" >> "$dir/.debug.log" +} +dbg "fired, input=$(printf '%s' "$INPUT" | head -c 400)" + +URL=$(printf '%s' "$INPUT" | jq -r '.tool_input.url // empty' 2>/dev/null || true) +PROMPT=$(printf '%s' "$INPUT" | jq -r '.tool_input.prompt // empty' 2>/dev/null || true) +if [ -z "$URL" ]; then dbg "no url in tool_input, exit"; exit 0; fi +dbg "url=$URL prompt=$(printf '%s' "$PROMPT" | head -c 80)" + +# WebFetch tool_response shape (Claude Code as of 2026-04): an object with +# keys bytes, code, codeText, durationMs, result, url — content lives at +# .result. The other keys (.output / .text / .content / .body) are kept as +# defensive fallbacks in case the shape changes; jq returns empty if none +# match. The string branch handles older/custom integrations. +TOOL_RESPONSE_TYPE="unknown" +if tool_response_type_value="$(printf '%s' "$INPUT" | jq -r 'if has("tool_response") then (.tool_response | type) else "unknown" end' 2>/dev/null)"; then + TOOL_RESPONSE_TYPE="$tool_response_type_value" +fi +dbg "tool_response type=$TOOL_RESPONSE_TYPE keys=$(printf '%s' "$INPUT" | jq -r 'try (.tool_response | keys | join(",")) catch "n/a"' 2>/dev/null)" + +CONTENT=$(printf '%s' "$INPUT" | jq -r ' + if (.tool_response | type) == "object" then + (.tool_response.result + // .tool_response.output + // .tool_response.text + // .tool_response.content + // .tool_response.body + // empty) + elif (.tool_response | type) == "string" then + .tool_response + else + empty + end +' 2>/dev/null || true) + +if [ -z "$CONTENT" ]; then + dbg "could not extract content from tool_response, exit (shape unknown)" + exit 0 +fi +dbg "extracted content bytes=${#CONTENT}" + +# Must match the pre hook: sha256(URL), first 32 hex chars. +hash_key() { + if command -v shasum >/dev/null 2>&1; then + printf '%s' "$1" | shasum -a 256 | cut -c1-32 + else + printf '%s' "$1" | sha256sum | cut -c1-32 + fi +} + +CACHE_DIR="${CLAUDE_PROJECT_DIR:-$PWD}/.claude/sdd-cache" +mkdir -p "$CACHE_DIR" +CACHE_FILE="$CACHE_DIR/$(hash_key "$URL").json" + +# Capture validators from the origin. Follow redirects so they match the +# URL the agent actually talked to. Strip CR so awk's paragraph mode +# recognises blank separators between response blocks on a redirect chain. +HEAD_OUT=$(curl -sI -L --max-time 5 "$URL" 2>/dev/null | tr -d '\r' || true) + +# Take only the final response's headers (last paragraph) to avoid picking +# up validators from intermediate 301/302 hops. +FINAL_HEADERS=$(printf '%s' "$HEAD_OUT" | awk ' + BEGIN { RS = ""; last = "" } + { last = $0 } + END { print last } +') + +extract_header() { + local name="$1" + printf '%s' "$FINAL_HEADERS" | awk -v h="$name" ' + BEGIN { FS = ":" } + tolower($1) == tolower(h) { + sub(/^[^:]*:[ \t]*/, "") + sub(/[ \t]+$/, "") + print + exit + } + ' +} + +ETAG=$(extract_header "ETag") +LAST_MOD=$(extract_header "Last-Modified") +dbg "HEAD etag=$ETAG last_modified=$LAST_MOD" + +if [ -z "$ETAG" ] && [ -z "$LAST_MOD" ]; then + dbg "no validator from origin, removing any stale entry and exit" + rm -f "$CACHE_FILE" + exit 0 +fi + +NOW=$(date +%s) + +TMP="${CACHE_FILE}.$$.tmp" +if jq -n \ + --arg url "$URL" \ + --arg prompt "$PROMPT" \ + --arg etag "$ETAG" \ + --arg last_modified "$LAST_MOD" \ + --arg content "$CONTENT" \ + --argjson fetched_at "$NOW" \ + '{url: $url, prompt: $prompt, etag: $etag, last_modified: $last_modified, content: $content, fetched_at: $fetched_at}' \ + > "$TMP" +then + mv "$TMP" "$CACHE_FILE" + dbg "wrote cache file $CACHE_FILE" +else + rm -f "$TMP" + dbg "jq failed, temp cleaned" +fi + +exit 0 diff --git a/third_party/agent-skills/hooks/sdd-cache-pre.sh b/third_party/agent-skills/hooks/sdd-cache-pre.sh new file mode 100755 index 0000000..8593462 --- /dev/null +++ b/third_party/agent-skills/hooks/sdd-cache-pre.sh @@ -0,0 +1,116 @@ +#!/bin/bash +# sdd-cache-pre.sh — PreToolUse hook for WebFetch. +# +# HTTP resource cache keyed by URL. Freshness is delegated to the origin via +# HTTP validators; 304 Not Modified is the only signal to serve from cache. +# On hit, exits 2 and writes the cached body to stderr so Claude Code can +# deliver it to the agent in place of the WebFetch result. Otherwise exits 0. +# +# No TTL: if validators don't catch a change, nothing will. Entries without +# ETag or Last-Modified are never cached (can't revalidate). +# +# Cached bodies are prompt-shaped (WebFetch post-processes through a model), +# so the key is URL-only and the original prompt is surfaced in the hit +# message so the next agent can tell if the earlier reading still applies. +# +# Dependencies: jq, curl, shasum (or sha256sum). + +set -euo pipefail + +# Graceful degradation: if any dependency is missing, let the fetch through. +if ! command -v jq >/dev/null 2>&1; then + exit 0 +fi +if ! command -v curl >/dev/null 2>&1; then + exit 0 +fi +if ! command -v shasum >/dev/null 2>&1; then + if ! command -v sha256sum >/dev/null 2>&1; then + exit 0 + fi +fi + +if [ -t 0 ]; then INPUT="{}"; else INPUT=$(cat); fi + +# Debug logging: active when SDD_CACHE_DEBUG=1 is set, or when a sentinel +# file exists at .claude/sdd-cache/.debug. Toggle with `touch` / `rm`. +dbg() { + local dir="${CLAUDE_PROJECT_DIR:-$PWD}/.claude/sdd-cache" + if [ "${SDD_CACHE_DEBUG:-0}" != "1" ] && [ ! -f "$dir/.debug" ]; then + return 0 + fi + mkdir -p "$dir" + printf '%s [pre] %s\n' "$(date -u +%FT%TZ)" "$*" >> "$dir/.debug.log" +} +dbg "fired" + +URL=$(printf '%s' "$INPUT" | jq -r '.tool_input.url // empty' 2>/dev/null || true) +if [ -z "$URL" ]; then dbg "no url in tool_input, exit"; exit 0; fi +dbg "url=$URL" + +# Cache key is sha256(URL), truncated to 128 bits. +hash_key() { + if command -v shasum >/dev/null 2>&1; then + printf '%s' "$1" | shasum -a 256 | cut -c1-32 + else + printf '%s' "$1" | sha256sum | cut -c1-32 + fi +} + +CACHE_DIR="${CLAUDE_PROJECT_DIR:-$PWD}/.claude/sdd-cache" +CACHE_FILE="$CACHE_DIR/$(hash_key "$URL").json" + +if [ ! -f "$CACHE_FILE" ]; then dbg "no cache file at $CACHE_FILE, exit"; exit 0; fi +dbg "cache file exists: $CACHE_FILE" + +FETCHED_AT=$(jq -r '.fetched_at // 0' "$CACHE_FILE" 2>/dev/null || echo 0) +ORIGINAL_PROMPT=$(jq -r '.prompt // empty' "$CACHE_FILE" 2>/dev/null || true) +ETAG=$(jq -r '.etag // empty' "$CACHE_FILE" 2>/dev/null || true) +LAST_MOD=$(jq -r '.last_modified // empty' "$CACHE_FILE" 2>/dev/null || true) + +# No validator means we cannot verify freshness — never serve from cache. +if [ -z "$ETAG" ] && [ -z "$LAST_MOD" ]; then + dbg "cached entry has no etag/last-modified, cannot revalidate, bypass" + exit 0 +fi + +HEADERS=() +[ -n "$ETAG" ] && HEADERS+=(-H "If-None-Match: $ETAG") +[ -n "$LAST_MOD" ] && HEADERS+=(-H "If-Modified-Since: $LAST_MOD") + +STATUS=$(curl -sI -o /dev/null -w "%{http_code}" \ + --max-time 5 -L \ + "${HEADERS[@]}" \ + "$URL" 2>/dev/null || echo "000") +dbg "revalidation HEAD status=$STATUS" + +if [ "$STATUS" != "304" ]; then + dbg "not 304, letting WebFetch proceed" + exit 0 +fi + +# Server confirmed content unchanged. Serve cached copy to the agent. +CONTENT=$(jq -r '.content // empty' "$CACHE_FILE" 2>/dev/null || true) +if [ -z "$CONTENT" ]; then dbg "cache file has empty content field, bypass"; exit 0; fi +dbg "cache HIT, blocking WebFetch with ${#CONTENT} bytes of cached content" + +VERIFIED_AT_ISO=$(date -u -r "$FETCHED_AT" +"%Y-%m-%dT%H:%M:%SZ" 2>/dev/null \ + || date -u -d "@$FETCHED_AT" +"%Y-%m-%dT%H:%M:%SZ" 2>/dev/null \ + || echo "unknown") + +# Emit the payload with printf so $CONTENT is never interpreted by the shell +# (docs contain backticks, $vars, and backslashes in code examples; an +# unquoted heredoc would treat them as command substitution). +{ + printf '[sdd-cache] Cache hit for %s\n\n' "$URL" + printf 'Revalidated via HTTP 304; unchanged since %s. Use the cached\n' "$VERIFIED_AT_ISO" + printf 'content below as if WebFetch had just returned it.\n\n' + if [ -n "$ORIGINAL_PROMPT" ]; then + printf 'Original WebFetch prompt: "%s". If your angle differs, judge\n' "$ORIGINAL_PROMPT" + printf 'whether this reading still covers it.\n\n' + fi + printf -- '----- BEGIN CACHED CONTENT -----\n' + printf '%s\n' "$CONTENT" + printf -- '----- END CACHED CONTENT -----\n' +} >&2 +exit 2 diff --git a/third_party/agent-skills/hooks/session-start.sh b/third_party/agent-skills/hooks/session-start.sh new file mode 100755 index 0000000..cd8c2a2 --- /dev/null +++ b/third_party/agent-skills/hooks/session-start.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# agent-skills session start hook +# Injects the using-agent-skills meta-skill into every new session + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +SKILLS_DIR="$(dirname "$SCRIPT_DIR")/skills" +META_SKILL="$SKILLS_DIR/using-agent-skills/SKILL.md" + +if ! command -v jq >/dev/null 2>&1; then + echo '{"priority": "INFO", "message": "agent-skills: jq is required for the session-start hook but was not found on PATH. Install jq (e.g. `brew install jq` or `apt-get install jq`) to enable meta-skill injection. Skills remain available individually."}' + exit 0 +fi + +if [ -f "$META_SKILL" ]; then + CONTENT=$(cat "$META_SKILL") + # Use jq to properly escape and construct valid JSON + jq -cn \ + --arg message "agent-skills loaded. Use the skill discovery flowchart to find the right skill for your task. + +$CONTENT" \ + '{priority: "IMPORTANT", message: $message}' +else + echo '{"priority": "INFO", "message": "agent-skills: using-agent-skills meta-skill not found. Skills may still be available individually."}' +fi diff --git a/third_party/agent-skills/hooks/simplify-ignore-test.sh b/third_party/agent-skills/hooks/simplify-ignore-test.sh new file mode 100755 index 0000000..4057631 --- /dev/null +++ b/third_party/agent-skills/hooks/simplify-ignore-test.sh @@ -0,0 +1,247 @@ +#!/bin/bash +# simplify-ignore-test.sh — Tests for the simplify-ignore hook +# +# Exercises filter_file by extracting function definitions from the hook. +# Run: bash hooks/simplify-ignore-test.sh + +set -euo pipefail + +PASS=0 FAIL=0 +TMPDIR=$(mktemp -d) +trap 'rm -rf "$TMPDIR"' EXIT + +export CACHE="$TMPDIR/cache" +mkdir -p "$CACHE" + +# Extract function definitions we need +hash_cmd() { + if command -v shasum >/dev/null 2>&1; then shasum + elif command -v sha1sum >/dev/null 2>&1; then sha1sum + else printf '%s\n' "error: missing shasum or sha1sum" >&2; exit 1; fi +} +file_id() { printf '%s' "$1" | hash_cmd | cut -c1-16; } +block_hash() { printf '%s' "$1" | hash_cmd | cut -c1-8; } +escape_glob() { + local s="$1" + s="${s//\\/\\\\}" + s="${s//\*/\\*}" + s="${s//\?/\\?}" + s="${s//\[/\\[}" + printf '%s' "$s" +} + +# Extract filter_file from the hook script (line 59 "filter_file()" to line 142 closing brace) +eval "$(sed -n '/^filter_file()/,/^}/p' hooks/simplify-ignore.sh)" + +assert_eq() { + local label="$1" expected="$2" actual="$3" + if [ "$expected" = "$actual" ]; then + PASS=$((PASS + 1)) + printf ' PASS: %s\n' "$label" + else + FAIL=$((FAIL + 1)) + printf ' FAIL: %s\n' "$label" >&2 + printf ' expected: %s\n' "$(printf '%s' "$expected" | cat -v)" >&2 + printf ' actual: %s\n' "$(printf '%s' "$actual" | cat -v)" >&2 + fi +} + +# ── Test 1: Single-line block produces exactly one placeholder ──────────── +printf 'Test 1: Single-line block (start+end on same line)\n' +rm -f "$CACHE"/* + +SRC="$TMPDIR/single-line.js" +DEST="$TMPDIR/single-line-filtered.js" +cat > "$SRC" <<'EOF' +const a = 1; +/* simplify-ignore-start */ const secret = 42; /* simplify-ignore-end */ +const b = 2; +EOF + +FID="test_single" +filter_file "$SRC" "$DEST" "$FID" + +placeholder_count=$(grep -c 'BLOCK_' "$DEST") +assert_eq "exactly one placeholder line" "1" "$placeholder_count" +assert_eq "line before block preserved" "1" "$(grep -c 'const a = 1' "$DEST")" +assert_eq "line after block preserved" "1" "$(grep -c 'const b = 2' "$DEST")" + +block_files=$(ls "$CACHE/${FID}".block.* 2>/dev/null | wc -l | tr -d ' ') +assert_eq "one block file in cache" "1" "$block_files" + +block_content=$(cat "$CACHE/${FID}".block.*) +assert_eq "block content matches" \ + "/* simplify-ignore-start */ const secret = 42; /* simplify-ignore-end */" \ + "$block_content" + +# ── Test 2: Multi-line block ───────────────────────────────────────────── +printf '\nTest 2: Multi-line block\n' +rm -f "$CACHE"/* + +SRC="$TMPDIR/multi-line.js" +DEST="$TMPDIR/multi-line-filtered.js" +cat > "$SRC" <<'EOF' +const a = 1; +// simplify-ignore-start +const secret1 = 42; +const secret2 = 99; +// simplify-ignore-end +const b = 2; +EOF + +FID="test_multi" +filter_file "$SRC" "$DEST" "$FID" + +placeholder_count=$(grep -c 'BLOCK_' "$DEST") +assert_eq "exactly one placeholder for multi-line block" "1" "$placeholder_count" + +output_lines=$(wc -l < "$DEST" | tr -d ' ') +assert_eq "output has 3 lines (before + placeholder + after)" "3" "$output_lines" + +# ── Test 3: Multiple blocks in one file ────────────────────────────────── +printf '\nTest 3: Multiple blocks in one file\n' +rm -f "$CACHE"/* + +SRC="$TMPDIR/multi-block.js" +DEST="$TMPDIR/multi-block-filtered.js" +cat > "$SRC" <<'EOF' +line1 +// simplify-ignore-start +blockA +// simplify-ignore-end +line2 +// simplify-ignore-start +blockB +// simplify-ignore-end +line3 +EOF + +FID="test_multiblock" +filter_file "$SRC" "$DEST" "$FID" + +placeholder_count=$(grep -c 'BLOCK_' "$DEST") +assert_eq "two placeholders for two blocks" "2" "$placeholder_count" + +block_files=$(ls "$CACHE/${FID}".block.* 2>/dev/null | wc -l | tr -d ' ') +assert_eq "two block files in cache" "2" "$block_files" + +# ── Test 4: Reason string preserved ────────────────────────────────────── +printf '\nTest 4: Reason string in placeholder\n' +rm -f "$CACHE"/* + +SRC="$TMPDIR/reason.js" +DEST="$TMPDIR/reason-filtered.js" +cat > "$SRC" <<'EOF' +// simplify-ignore-start: perf-critical +hot_loop(); +// simplify-ignore-end +EOF + +FID="test_reason" +filter_file "$SRC" "$DEST" "$FID" + +assert_eq "placeholder includes reason" "1" "$(grep -c 'perf-critical' "$DEST")" + +reason_files=$(ls "$CACHE/${FID}".reason.* 2>/dev/null | wc -l | tr -d ' ') +assert_eq "reason file saved" "1" "$reason_files" +assert_eq "reason content" "perf-critical" "$(cat "$CACHE/${FID}".reason.*)" + +# ── Test 5: Trailing newline preservation ──────────────────────────────── +printf '\nTest 5: Trailing newline preservation\n' +rm -f "$CACHE"/* + +SRC="$TMPDIR/no-trailing-nl.js" +DEST="$TMPDIR/no-trailing-nl-filtered.js" +printf 'line1\n// simplify-ignore-start\nsecret\n// simplify-ignore-end' > "$SRC" + +FID="test_trail" +filter_file "$SRC" "$DEST" "$FID" + +# Source has no trailing newline; dest should also have no trailing newline +src_has_nl=$(tail -c 1 "$SRC" | wc -l | tr -d ' ') +dest_has_nl=$(tail -c 1 "$DEST" | wc -l | tr -d ' ') +assert_eq "dest preserves no-trailing-newline from source" "$src_has_nl" "$dest_has_nl" + +# ── Test 6: No blocks → return 1 ──────────────────────────────────────── +printf '\nTest 6: No blocks returns 1\n' +rm -f "$CACHE"/* + +SRC="$TMPDIR/no-blocks.js" +DEST="$TMPDIR/no-blocks-filtered.js" +cat > "$SRC" <<'EOF' +const a = 1; +const b = 2; +EOF + +FID="test_noblocks" +rc=0 +filter_file "$SRC" "$DEST" "$FID" || rc=$? +assert_eq "returns 1 when no blocks found" "1" "$rc" + +# ── Test 7: Unclosed block emits warning and flushes ───────────────────── +printf '\nTest 7: Unclosed block\n' +rm -f "$CACHE"/* + +SRC="$TMPDIR/unclosed.js" +DEST="$TMPDIR/unclosed-filtered.js" +cat > "$SRC" <<'EOF' +line1 +// simplify-ignore-start +orphan code +EOF + +FID="test_unclosed" +stderr_out=$(filter_file "$SRC" "$DEST" "$FID" 2>&1) || true +assert_eq "warning emitted for unclosed block" "1" "$(printf '%s' "$stderr_out" | grep -c 'unclosed')" +assert_eq "orphan code flushed to output" "1" "$(grep -c 'orphan code' "$DEST")" + +# ── Test 8: Single-line block with reason ──────────────────────────────── +printf '\nTest 8: Single-line block with reason\n' +rm -f "$CACHE"/* + +SRC="$TMPDIR/single-reason.js" +DEST="$TMPDIR/single-reason-filtered.js" +cat > "$SRC" <<'EOF' +before +/* simplify-ignore-start: hot-path */ x = compute(); /* simplify-ignore-end */ +after +EOF + +FID="test_single_reason" +filter_file "$SRC" "$DEST" "$FID" + +placeholder_count=$(grep -c 'BLOCK_' "$DEST") +assert_eq "exactly one placeholder for single-line+reason" "1" "$placeholder_count" +assert_eq "reason in placeholder" "1" "$(grep -c 'hot-path' "$DEST")" + +# ── Test 9: HTML comment syntax ────────────────────────────────────────── +printf '\nTest 9: HTML comment syntax\n' +rm -f "$CACHE"/* + +SRC="$TMPDIR/html.html" +DEST="$TMPDIR/html-filtered.html" +cat > "$SRC" <<'EOF' +
+ + + +
+EOF + +FID="test_html" +filter_file "$SRC" "$DEST" "$FID" + +placeholder_count=$(grep -c 'BLOCK_' "$DEST") +assert_eq "HTML block replaced" "1" "$placeholder_count" +assert_eq "HTML suffix preserved" "1" "$(grep -c '\-\->' "$DEST")" + +# ── Test 10: JSON parsing error warning ────────────────────────────────── +printf '\nTest 10: Malformed JSON input produces warning\n' + +warning_out=$(echo 'NOT_JSON{{{' | bash hooks/simplify-ignore.sh 2>&1) || true +assert_eq "warning on bad JSON" "1" "$(printf '%s' "$warning_out" | grep -c 'Warning.*failed to parse')" + +# ── Summary ────────────────────────────────────────────────────────────── +printf '\n══════════════════════════════════════════\n' +printf 'Results: %d passed, %d failed\n' "$PASS" "$FAIL" +[ "$FAIL" -eq 0 ] && exit 0 || exit 1 diff --git a/third_party/agent-skills/hooks/simplify-ignore.sh b/third_party/agent-skills/hooks/simplify-ignore.sh new file mode 100755 index 0000000..a93c467 --- /dev/null +++ b/third_party/agent-skills/hooks/simplify-ignore.sh @@ -0,0 +1,302 @@ +#!/bin/bash +# simplify-ignore.sh — Hook for Read (PreToolUse), Edit|Write (PostToolUse), Stop +# +# PreToolUse Read → backs up file, replaces blocks with BLOCK_ in-place +# PostToolUse Edit → expands placeholders, re-filters so file stays hidden +# PostToolUse Write → expands placeholders, re-filters so file stays hidden +# Stop → restores real file content from backup +# +# The file on disk ALWAYS has placeholders while the session is active. +# The real content (with model's changes applied) lives in the backup. +# +# Dependencies: jq, shasum or sha1sum (auto-detected) + +set -euo pipefail + +if ! command -v jq >/dev/null 2>&1; then + printf '%s\n' "error: missing jq" >&2; exit 1 +fi + +CACHE="${CLAUDE_PROJECT_DIR:-.}/.claude/.simplify-ignore-cache" +if [ -t 0 ]; then INPUT="{}"; else INPUT=$(cat); fi + +# Parse hook input — trap errors explicitly so set -e doesn't cause +# a silent exit on malformed JSON, and surface a useful diagnostic. +parse_error="" +TOOL_NAME=$(printf '%s' "$INPUT" | jq -r '.tool_name // empty' 2>/dev/null) || { + parse_error="failed to parse .tool_name from hook input" + TOOL_NAME="" +} +FILE_PATH=$(printf '%s' "$INPUT" | jq -r '.tool_input.file_path // empty' 2>/dev/null) || { + parse_error="failed to parse .tool_input.file_path from hook input" + FILE_PATH="" +} +if [ -n "$parse_error" ]; then + printf 'Warning: %s (input: %.120s)\n' "$parse_error" "$INPUT" >&2 +fi + +hash_cmd() { + if command -v shasum >/dev/null 2>&1; then shasum + elif command -v sha1sum >/dev/null 2>&1; then sha1sum + else printf '%s\n' "error: missing shasum or sha1sum" >&2; exit 1; fi +} +file_id() { printf '%s' "$1" | hash_cmd | cut -c1-16; } +block_hash() { printf '%s' "$1" | hash_cmd | cut -c1-8; } +# Escape glob metacharacters so ${var/pattern/repl} treats pattern as literal. +# Needed for Bash 3.2 (macOS) where quotes don't suppress globbing in PE patterns. +escape_glob() { + local s="$1" + s="${s//\\/\\\\}" + s="${s//\*/\\*}" + s="${s//\?/\\?}" + s="${s//\[/\\[}" + printf '%s' "$s" +} + +# ── filter_file: replace simplify-ignore blocks with BLOCK_ placeholders ─ +# Reads $1 (source), writes filtered version to $2 (dest), saves blocks to cache. +# Returns 0 if blocks were found, 1 if none. +filter_file() { + local src="$1" dest="$2" fid="$3" + : > "$dest" + rm -f "$CACHE/${fid}".block.* "$CACHE/${fid}".reason.* "$CACHE/${fid}".prefix.* "$CACHE/${fid}".suffix.* + + local count=0 in_block=0 buf="" reason="" prefix="" suffix="" + + while IFS= read -r line || [ -n "$line" ]; do + # Check for start marker (no fork — uses bash case) + if [ $in_block -eq 0 ]; then + case "$line" in *simplify-ignore-start*) + in_block=1 + buf="$line" + # Extract comment prefix/suffix to preserve language-appropriate syntax + prefix="${line%%simplify-ignore-start*}" + suffix="" + case "$line" in *'*/'*) suffix=" */" ;; *'-->'*) suffix=" -->" ;; esac + reason=$(printf '%s' "$line" | sed -n 's/.*simplify-ignore-start:[[:space:]]*//p' \ + | sed 's/[[:space:]]*\*\/.*$//' | sed 's/[[:space:]]*-->.*$//' | sed 's/[[:space:]]*$//') + # Handle single-line block (start + end on same line) + case "$line" in *simplify-ignore-end*) + in_block=0 + # Write single-line block immediately and skip to next line + # to avoid the end-marker check below firing again + local h; h=$(block_hash "$buf") + count=$((count + 1)) + printf '%s' "$buf" > "$CACHE/${fid}.block.${h}" + [ -n "$reason" ] && printf '%s' "$reason" > "$CACHE/${fid}.reason.${h}" + printf '%s' "$prefix" > "$CACHE/${fid}.prefix.${h}" + printf '%s' "$suffix" > "$CACHE/${fid}.suffix.${h}" + if [ -n "$reason" ]; then + printf '%s\n' "${prefix}BLOCK_${h}: ${reason}${suffix}" >> "$dest" + else + printf '%s\n' "${prefix}BLOCK_${h}${suffix}" >> "$dest" + fi + buf=""; reason=""; prefix=""; suffix="" + continue + ;; *) + continue + ;; + esac + ;; esac + fi + # Accumulate block content + if [ $in_block -eq 1 ]; then + buf="${buf} +${line}" + fi + # Check for end marker + case "$line" in *simplify-ignore-end*) + if [ $in_block -eq 1 ]; then + local h; h=$(block_hash "$buf") + count=$((count + 1)) + printf '%s' "$buf" > "$CACHE/${fid}.block.${h}" + [ -n "$reason" ] && printf '%s' "$reason" > "$CACHE/${fid}.reason.${h}" + printf '%s' "$prefix" > "$CACHE/${fid}.prefix.${h}" + printf '%s' "$suffix" > "$CACHE/${fid}.suffix.${h}" + if [ -n "$reason" ]; then + printf '%s\n' "${prefix}BLOCK_${h}: ${reason}${suffix}" >> "$dest" + else + printf '%s\n' "${prefix}BLOCK_${h}${suffix}" >> "$dest" + fi + in_block=0; buf=""; reason=""; prefix=""; suffix="" + continue + fi + ;; + esac + [ $in_block -eq 0 ] && printf '%s\n' "$line" >> "$dest" + done < "$src" + + # Unclosed block → flush as-is + if [ $in_block -eq 1 ] && [ -n "$buf" ]; then + printf 'Warning: unclosed simplify-ignore-start in %s (block not hidden)\n' "$src" >&2 + printf '%s\n' "$buf" >> "$dest" + fi + + # Preserve trailing newline status of source + if [ -s "$dest" ] && [ -s "$src" ] && [ -n "$(tail -c 1 "$src")" ]; then + perl -pe 'chomp if eof' "$dest" > "${dest}.nnl" && \ + cat "${dest}.nnl" > "$dest" && rm -f "${dest}.nnl" + fi + + [ $count -gt 0 ] && return 0 || return 1 +} + +# ── Stop: restore all files from backup ─────────────────────────────────────── +if [ -z "$TOOL_NAME" ]; then + [ -d "$CACHE" ] || exit 0 + for bak in "$CACHE"/*.bak; do + [ -f "$bak" ] || continue + fid="${bak##*/}"; fid="${fid%.bak}" + pathfile="$CACHE/${fid}.path" + [ -f "$pathfile" ] || { rm -f "$bak"; continue; } + orig=$(cat "$pathfile") + if [ -f "$orig" ]; then + cat "$bak" > "$orig" + rm -f "$bak" "$pathfile" "$CACHE/${fid}".block.* "$CACHE/${fid}".reason.* "$CACHE/${fid}".prefix.* "$CACHE/${fid}".suffix.* + rmdir "$CACHE/${fid}.lock" 2>/dev/null + else + # File was moved/deleted — save backup as .recovered, don't destroy it + mkdir -p "$(dirname "${orig}.recovered")" + mv "$bak" "${orig}.recovered" + rm -f "$pathfile" "$CACHE/${fid}".block.* "$CACHE/${fid}".reason.* "$CACHE/${fid}".prefix.* "$CACHE/${fid}".suffix.* + rmdir "$CACHE/${fid}.lock" 2>/dev/null + printf 'Warning: %s was moved/deleted. Recovered original to %s.recovered\n' "$orig" "$orig" >&2 + fi + done + # Clean orphan locks (created but crash before backup) + for lockdir in "$CACHE"/*.lock; do + [ -d "$lockdir" ] || continue + rmdir "$lockdir" 2>/dev/null + done + exit 0 +fi + +[ -z "$FILE_PATH" ] && exit 0 + +# ── PreToolUse Read: filter in-place ────────────────────────────────────────── +if [ "$TOOL_NAME" = "Read" ]; then + [ -f "$FILE_PATH" ] || exit 0 + case "$(basename "$FILE_PATH")" in simplify-ignore*|SIMPLIFY-IGNORE*) exit 0 ;; esac + + mkdir -p "$CACHE" + ID=$(file_id "$FILE_PATH") + + # If backup exists, file is already filtered — skip + [ -f "$CACHE/${ID}.bak" ] && exit 0 + + grep -q 'simplify-ignore-start' -- "$FILE_PATH" || exit 0 + + # Atomic lock: mkdir fails if another session races us + if ! mkdir "$CACHE/${ID}.lock" 2>/dev/null; then + # Lock exists — reclaim only if stale (>60s old, no backup = crash leftover) + if [ ! -f "$CACHE/${ID}.bak" ] && \ + [ -n "$(find "$CACHE/${ID}.lock" -maxdepth 0 -mmin +1 2>/dev/null)" ]; then + rmdir "$CACHE/${ID}.lock" 2>/dev/null || true + mkdir "$CACHE/${ID}.lock" 2>/dev/null || exit 0 + else + exit 0 + fi + fi + + # Back up the original (preserve trailing newline status) + cp -p "$FILE_PATH" "$CACHE/${ID}.bak" 2>/dev/null || cp "$FILE_PATH" "$CACHE/${ID}.bak" + printf '%s' "$FILE_PATH" > "$CACHE/${ID}.path" + + # Filter in-place (cat > preserves inode and permissions) + FILTERED="$CACHE/${ID}.$$.tmp" + rm -f "$FILTERED" + if filter_file "$FILE_PATH" "$FILTERED" "$ID"; then + cat "$FILTERED" > "$FILE_PATH" + rm -f "$FILTERED" + else + rm -f "$FILTERED" "$CACHE/${ID}.bak" "$CACHE/${ID}.path" + rmdir "$CACHE/${ID}.lock" 2>/dev/null + fi + exit 0 +fi + +# ── PostToolUse Edit|Write: expand, then re-filter ──────────────────────────── +if [ "$TOOL_NAME" = "Edit" ] || [ "$TOOL_NAME" = "Write" ]; then + ID=$(file_id "$FILE_PATH") + [ -f "$CACHE/${ID}.bak" ] || exit 0 + ls "$CACHE/${ID}".block.* >/dev/null 2>&1 || exit 0 + + # Expand placeholders, preserving any inline code the model added around them + EXPANDED="$CACHE/${ID}.$$.expanded" + rm -f "$EXPANDED" + while IFS= read -r line || [ -n "$line" ]; do + case "$line" in *BLOCK_*) + # Expand all placeholders on this line (supports multiple per line) + for bf in "$CACHE/${ID}".block.*; do + [ -f "$bf" ] || continue + h="${bf##*.}" + case "$line" in *"BLOCK_${h}"*) + # Reconstruct the exact placeholder pattern + bp=""; bs=""; br="" + [ -f "$CACHE/${ID}.prefix.${h}" ] && bp=$(cat "$CACHE/${ID}.prefix.${h}") + [ -f "$CACHE/${ID}.suffix.${h}" ] && bs=$(cat "$CACHE/${ID}.suffix.${h}") + [ -f "$CACHE/${ID}.reason.${h}" ] && br=$(cat "$CACHE/${ID}.reason.${h}") + if [ -n "$br" ]; then + placeholder="${bp}BLOCK_${h}: ${br}${bs}" + else + placeholder="${bp}BLOCK_${h}${bs}" + fi + block_content=$(cat "$bf"; printf x); block_content="${block_content%x}" + # Escape glob metacharacters (* ? [ \) in the pattern + esc_placeholder=$(escape_glob "$placeholder") + # Bash native substitution (// = global replace): replace placeholder, keep surrounding code + line="${line//$esc_placeholder/$block_content}" + # Fallback: if model altered the reason text, try without reason + # (only trigger if BLOCK_hash is still present AND wasn't in the original block content) + case "$block_content" in *"BLOCK_${h}"*) ;; *) + case "$line" in *"BLOCK_${h}"*) + printf 'Warning: placeholder BLOCK_%s was modified by model, using fuzzy match\n' "$h" >&2 + esc_fuzzy=$(escape_glob "${bp}BLOCK_${h}${bs}") + line="${line//$esc_fuzzy/$block_content}" + # Last resort: match just the hash token + case "$line" in *"BLOCK_${h}"*) + line="${line//BLOCK_${h}/$block_content}" + ;; esac + ;; esac + ;; esac + ;; esac + done + ;; esac + printf '%s\n' "$line" >> "$EXPANDED" + done < "$FILE_PATH" + # Preserve trailing newline status + if [ -s "$EXPANDED" ] && [ -s "$FILE_PATH" ] && [ -n "$(tail -c 1 "$FILE_PATH")" ]; then + perl -pe 'chomp if eof' "$EXPANDED" > "${EXPANDED}.nnl" && \ + cat "${EXPANDED}.nnl" > "$EXPANDED" && rm -f "${EXPANDED}.nnl" + fi + # Warn if model deleted a protected block entirely + for bf in "$CACHE/${ID}".block.*; do + [ -f "$bf" ] || continue + bh="${bf##*.}" + # After expansion, blocks appear as original code (simplify-ignore-start). + # If neither the expanded code nor the placeholder is in EXPANDED, it was deleted. + if ! grep -qF "BLOCK_${bh}" "$EXPANDED" 2>/dev/null; then + # Get first line of block to check if it was expanded back + first_line=$(head -1 "$bf") + if ! grep -qF "$first_line" "$EXPANDED" 2>/dev/null; then + printf 'Warning: protected block BLOCK_%s was deleted by model\n' "$bh" >&2 + fi + fi + done + # Preserve inode and permissions + cat "$EXPANDED" > "$FILE_PATH" + rm -f "$EXPANDED" + + # Save expanded version as new backup (this is the "real" file with model's changes) + cp "$FILE_PATH" "$CACHE/${ID}.bak" + + # Re-filter in-place so the file on disk stays with placeholders + FILTERED="$CACHE/${ID}.$$.tmp" + rm -f "$FILTERED" + if filter_file "$FILE_PATH" "$FILTERED" "$ID"; then + cat "$FILTERED" > "$FILE_PATH" + rm -f "$FILTERED" + fi + + exit 0 +fi diff --git a/third_party/agent-skills/references/accessibility-checklist.md b/third_party/agent-skills/references/accessibility-checklist.md new file mode 100644 index 0000000..c8c61e5 --- /dev/null +++ b/third_party/agent-skills/references/accessibility-checklist.md @@ -0,0 +1,160 @@ +# Accessibility Checklist + +Quick reference for WCAG 2.1 AA compliance. Use alongside the `frontend-ui-engineering` skill. + +## Table of Contents + +- [Essential Checks](#essential-checks) +- [Common HTML Patterns](#common-html-patterns) +- [Testing Tools](#testing-tools) +- [Quick Reference: ARIA Live Regions](#quick-reference-aria-live-regions) +- [Common Anti-Patterns](#common-anti-patterns) + +## Essential Checks + +### Keyboard Navigation +- [ ] All interactive elements focusable via Tab key +- [ ] Focus order follows visual/logical order +- [ ] Focus is visible (outline/ring on focused elements) +- [ ] Custom widgets have keyboard support (Enter to activate, Escape to close) +- [ ] No keyboard traps (user can always Tab away from a component) +- [ ] Skip-to-content link at top of page - visible (at least) on keyboard focus +- [ ] Modals trap focus while open, return focus on close + +### Screen Readers +- [ ] All images have `alt` text (or `alt=""` for decorative images) +- [ ] All form inputs have associated labels (`