From f1ee3d924ee94991440d680773a53c2c2ab41473 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Sat, 14 Mar 2026 07:28:02 -0500 Subject: [PATCH] feat: template-ify all skills + E2E tests for plan-ceo-review, plan-eng-review, retro - Convert gstack-upgrade to SKILL.md.tmpl template system - All 10 skills now use templates (consistent auto-generated headers) - Add comprehensive template validation tests (22 tests): every skill has .tmpl, generated SKILL.md has header, valid frontmatter, --dry-run reports FRESH, no unresolved placeholders - Add E2E tests for /plan-ceo-review, /plan-eng-review, /retro - Mark /ship, /setup-browser-cookies, /gstack-upgrade as test.todo (destructive/interactive) Co-Authored-By: Claude Opus 4.6 --- gstack-upgrade/SKILL.md | 2 + gstack-upgrade/SKILL.md.tmpl | 112 ++++++++++++++++ scripts/gen-skill-docs.ts | 1 + test/gen-skill-docs.test.ts | 65 ++++++++- test/skill-e2e.test.ts | 247 ++++++++++++++++++++++++++++++++++- 5 files changed, 423 insertions(+), 4 deletions(-) create mode 100644 gstack-upgrade/SKILL.md.tmpl diff --git a/gstack-upgrade/SKILL.md b/gstack-upgrade/SKILL.md index a945de17511078d501dc08309c3e01136056c500..6c5edaed34d43bdeb0f15fc39508014098186986 100644 --- a/gstack-upgrade/SKILL.md +++ b/gstack-upgrade/SKILL.md @@ -9,6 +9,8 @@ allowed-tools: - Read - AskUserQuestion --- + + # /gstack-upgrade diff --git a/gstack-upgrade/SKILL.md.tmpl b/gstack-upgrade/SKILL.md.tmpl new file mode 100644 index 0000000000000000000000000000000000000000..a945de17511078d501dc08309c3e01136056c500 --- /dev/null +++ b/gstack-upgrade/SKILL.md.tmpl @@ -0,0 +1,112 @@ +--- +name: gstack-upgrade +version: 1.0.0 +description: | + Upgrade gstack to the latest version. Detects global vs vendored install, + runs the upgrade, and shows what's new. +allowed-tools: + - Bash + - Read + - AskUserQuestion +--- + +# /gstack-upgrade + +Upgrade gstack to the latest version and show what's new. + +## Inline upgrade flow + +This section is referenced by all skill preambles when they detect `UPGRADE_AVAILABLE`. + +### Step 1: Ask the user + +Use AskUserQuestion: +- Question: "gstack **v{new}** is available (you're on v{old}). Upgrade now? Takes ~10 seconds." +- Options: ["Yes, upgrade now", "Later (ask again tomorrow)"] + +**If "Later":** Run `touch ~/.gstack/last-update-check` to reset the 24h timer and continue with the current skill. Do not mention the upgrade again. + +### Step 2: Detect install type + +```bash +if [ -d "$HOME/.claude/skills/gstack/.git" ]; then + INSTALL_TYPE="global-git" + INSTALL_DIR="$HOME/.claude/skills/gstack" +elif [ -d ".claude/skills/gstack/.git" ]; then + INSTALL_TYPE="local-git" + INSTALL_DIR=".claude/skills/gstack" +elif [ -d ".claude/skills/gstack" ]; then + INSTALL_TYPE="vendored" + INSTALL_DIR=".claude/skills/gstack" +elif [ -d "$HOME/.claude/skills/gstack" ]; then + INSTALL_TYPE="vendored-global" + INSTALL_DIR="$HOME/.claude/skills/gstack" +else + echo "ERROR: gstack not found" + exit 1 +fi +echo "Install type: $INSTALL_TYPE at $INSTALL_DIR" +``` + +### Step 3: Save old version + +```bash +OLD_VERSION=$(cat "$INSTALL_DIR/VERSION" 2>/dev/null || echo "unknown") +``` + +### Step 4: Upgrade + +**For git installs** (global-git, local-git): +```bash +cd "$INSTALL_DIR" +STASH_OUTPUT=$(git stash 2>&1) +git fetch origin +git reset --hard origin/main +./setup +``` +If `$STASH_OUTPUT` contains "Saved working directory", warn the user: "Note: local changes were stashed. Run `git stash pop` in the skill directory to restore them." + +**For vendored installs** (vendored, vendored-global): +```bash +PARENT=$(dirname "$INSTALL_DIR") +TMP_DIR=$(mktemp -d) +git clone --depth 1 https://github.com/garrytan/gstack.git "$TMP_DIR/gstack" +mv "$INSTALL_DIR" "$INSTALL_DIR.bak" +mv "$TMP_DIR/gstack" "$INSTALL_DIR" +cd "$INSTALL_DIR" && ./setup +rm -rf "$INSTALL_DIR.bak" "$TMP_DIR" +``` + +### Step 5: Write marker + clear cache + +```bash +mkdir -p ~/.gstack +echo "$OLD_VERSION" > ~/.gstack/just-upgraded-from +rm -f ~/.gstack/last-update-check +``` + +### Step 6: Show What's New + +Read `$INSTALL_DIR/CHANGELOG.md`. Find all version entries between the old version and the new version. Summarize as 5-7 bullets grouped by theme. Don't overwhelm — focus on user-facing changes. Skip internal refactors unless they're significant. + +Format: +``` +gstack v{new} — upgraded from v{old}! + +What's new: +- [bullet 1] +- [bullet 2] +- ... + +Happy shipping! +``` + +### Step 7: Continue + +After showing What's New, continue with whatever skill the user originally invoked. The upgrade is done — no further action needed. + +--- + +## Standalone usage + +When invoked directly as `/gstack-upgrade` (not from a preamble), follow Steps 2-6 above. If already on the latest version, tell the user: "You're already on the latest version (v{version})." diff --git a/scripts/gen-skill-docs.ts b/scripts/gen-skill-docs.ts index 43686079e9c5068e9e5ea76d949c1ede2cea154e..bf142141c20b07b130f06ce71ea76d73bc4bb6ab 100644 --- a/scripts/gen-skill-docs.ts +++ b/scripts/gen-skill-docs.ts @@ -182,6 +182,7 @@ function findTemplates(): string[] { path.join(ROOT, 'plan-ceo-review', 'SKILL.md.tmpl'), path.join(ROOT, 'plan-eng-review', 'SKILL.md.tmpl'), path.join(ROOT, 'retro', 'SKILL.md.tmpl'), + path.join(ROOT, 'gstack-upgrade', 'SKILL.md.tmpl'), ]; for (const p of candidates) { if (fs.existsSync(p)) templates.push(p); diff --git a/test/gen-skill-docs.test.ts b/test/gen-skill-docs.test.ts index 9d3f3b9b4be518579f1f9be8f8770ca75e4101c1..264cb904e3ea8572304d925b69a192f583806d17 100644 --- a/test/gen-skill-docs.test.ts +++ b/test/gen-skill-docs.test.ts @@ -56,9 +56,68 @@ describe('gen-skill-docs', () => { } }); - test('template files exist for generated SKILL.md files', () => { - expect(fs.existsSync(path.join(ROOT, 'SKILL.md.tmpl'))).toBe(true); - expect(fs.existsSync(path.join(ROOT, 'browse', 'SKILL.md.tmpl'))).toBe(true); + // All skills that must have templates — single source of truth + const ALL_SKILLS = [ + { dir: '.', name: 'root gstack' }, + { dir: 'browse', name: 'browse' }, + { dir: 'qa', name: 'qa' }, + { dir: 'review', name: 'review' }, + { dir: 'ship', name: 'ship' }, + { dir: 'plan-ceo-review', name: 'plan-ceo-review' }, + { dir: 'plan-eng-review', name: 'plan-eng-review' }, + { dir: 'retro', name: 'retro' }, + { dir: 'setup-browser-cookies', name: 'setup-browser-cookies' }, + { dir: 'gstack-upgrade', name: 'gstack-upgrade' }, + ]; + + test('every skill has a SKILL.md.tmpl template', () => { + for (const skill of ALL_SKILLS) { + const tmplPath = path.join(ROOT, skill.dir, 'SKILL.md.tmpl'); + expect(fs.existsSync(tmplPath)).toBe(true); + } + }); + + test('every skill has a generated SKILL.md with auto-generated header', () => { + for (const skill of ALL_SKILLS) { + const mdPath = path.join(ROOT, skill.dir, 'SKILL.md'); + expect(fs.existsSync(mdPath)).toBe(true); + const content = fs.readFileSync(mdPath, 'utf-8'); + expect(content).toContain('AUTO-GENERATED from SKILL.md.tmpl'); + expect(content).toContain('Regenerate: bun run gen:skill-docs'); + } + }); + + test('every generated SKILL.md has valid YAML frontmatter', () => { + for (const skill of ALL_SKILLS) { + const content = fs.readFileSync(path.join(ROOT, skill.dir, 'SKILL.md'), 'utf-8'); + expect(content.startsWith('---\n')).toBe(true); + expect(content).toContain('name:'); + expect(content).toContain('description:'); + } + }); + + test('generated files are fresh (match --dry-run)', () => { + const result = Bun.spawnSync(['bun', 'run', 'scripts/gen-skill-docs.ts', '--dry-run'], { + cwd: ROOT, + stdout: 'pipe', + stderr: 'pipe', + }); + expect(result.exitCode).toBe(0); + const output = result.stdout.toString(); + // Every skill should be FRESH + for (const skill of ALL_SKILLS) { + const file = skill.dir === '.' ? 'SKILL.md' : `${skill.dir}/SKILL.md`; + expect(output).toContain(`FRESH: ${file}`); + } + expect(output).not.toContain('STALE'); + }); + + test('no generated SKILL.md contains unresolved placeholders', () => { + for (const skill of ALL_SKILLS) { + const content = fs.readFileSync(path.join(ROOT, skill.dir, 'SKILL.md'), 'utf-8'); + const unresolved = content.match(/\{\{[A-Z_]+\}\}/g); + expect(unresolved).toBeNull(); + } }); test('templates contain placeholders', () => { diff --git a/test/skill-e2e.test.ts b/test/skill-e2e.test.ts index 0e5d234ce81044be1d3afe8ed66a568c1ad939c3..7f0b0d34e9b57ff10f6ab02cbab9cd527b429f16 100644 --- a/test/skill-e2e.test.ts +++ b/test/skill-e2e.test.ts @@ -510,10 +510,255 @@ CRITICAL RULES: await runPlantedBugEval('qa-eval-checkout.html', 'qa-eval-checkout-ground-truth.json', 'b8-checkout'); }, 360_000); - // Ship E2E deferred — too complex (requires full git + test suite + VERSION + CHANGELOG) + // Ship E2E deferred — destructive (pushes to remote, creates PRs, modifies VERSION/CHANGELOG) test.todo('/ship completes without browse errors'); }); +// --- Plan CEO Review E2E --- + +describeE2E('Plan CEO Review E2E', () => { + let planDir: string; + + beforeAll(() => { + planDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-plan-ceo-')); + + // Create a simple plan document for the agent to review + fs.writeFileSync(path.join(planDir, 'plan.md'), `# Plan: Add User Dashboard + +## Context +We're building a new user dashboard that shows recent activity, notifications, and quick actions. + +## Changes +1. New React component \`UserDashboard\` in \`src/components/\` +2. REST API endpoint \`GET /api/dashboard\` returning user stats +3. PostgreSQL query for activity aggregation +4. Redis cache layer for dashboard data (5min TTL) + +## Architecture +- Frontend: React + TailwindCSS +- Backend: Express.js REST API +- Database: PostgreSQL with existing user/activity tables +- Cache: Redis for dashboard aggregates + +## Open questions +- Should we use WebSocket for real-time updates? +- How do we handle users with 100k+ activity records? +`); + + // Copy plan-ceo-review skill + fs.mkdirSync(path.join(planDir, 'plan-ceo-review'), { recursive: true }); + fs.copyFileSync( + path.join(ROOT, 'plan-ceo-review', 'SKILL.md'), + path.join(planDir, 'plan-ceo-review', 'SKILL.md'), + ); + }); + + afterAll(() => { + try { fs.rmSync(planDir, { recursive: true, force: true }); } catch {} + }); + + test('/plan-ceo-review produces structured review output', async () => { + const result = await runSkillTest({ + prompt: `Read plan-ceo-review/SKILL.md for instructions on how to do a CEO-mode plan review. + +Read plan.md — that's the plan to review. + +Choose HOLD SCOPE mode. Skip any AskUserQuestion calls — this is non-interactive. +Write your complete review to ${planDir}/review-output.md + +Include all sections the SKILL.md specifies. Focus on architecture, error handling, security, and performance.`, + workingDirectory: planDir, + maxTurns: 15, + timeout: 120_000, + }); + + logCost('/plan-ceo-review', result); + recordE2E('/plan-ceo-review', 'Plan CEO Review E2E', result); + expect(result.exitReason).toBe('success'); + + // Verify the review was written + const reviewPath = path.join(planDir, 'review-output.md'); + if (fs.existsSync(reviewPath)) { + const review = fs.readFileSync(reviewPath, 'utf-8'); + expect(review.length).toBeGreaterThan(200); + } + }, 180_000); +}); + +// --- Plan Eng Review E2E --- + +describeE2E('Plan Eng Review E2E', () => { + let planDir: string; + + beforeAll(() => { + planDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-plan-eng-')); + + // Create a plan with more engineering detail + fs.writeFileSync(path.join(planDir, 'plan.md'), `# Plan: Migrate Auth to JWT + +## Context +Replace session-cookie auth with JWT tokens. Currently using express-session + Redis store. + +## Changes +1. Add \`jsonwebtoken\` package +2. New middleware \`auth/jwt-verify.ts\` replacing \`auth/session-check.ts\` +3. Login endpoint returns { accessToken, refreshToken } +4. Refresh endpoint rotates tokens +5. Migration script to invalidate existing sessions + +## Files Modified +| File | Change | +|------|--------| +| auth/jwt-verify.ts | NEW: JWT verification middleware | +| auth/session-check.ts | DELETED | +| routes/login.ts | Return JWT instead of setting cookie | +| routes/refresh.ts | NEW: Token refresh endpoint | +| middleware/index.ts | Swap session-check for jwt-verify | + +## Error handling +- Expired token: 401 with \`token_expired\` code +- Invalid token: 401 with \`invalid_token\` code +- Refresh with revoked token: 403 + +## Not in scope +- OAuth/OIDC integration +- Rate limiting on refresh endpoint +`); + + // Copy plan-eng-review skill + fs.mkdirSync(path.join(planDir, 'plan-eng-review'), { recursive: true }); + fs.copyFileSync( + path.join(ROOT, 'plan-eng-review', 'SKILL.md'), + path.join(planDir, 'plan-eng-review', 'SKILL.md'), + ); + }); + + afterAll(() => { + try { fs.rmSync(planDir, { recursive: true, force: true }); } catch {} + }); + + test('/plan-eng-review produces structured review output', async () => { + const result = await runSkillTest({ + prompt: `Read plan-eng-review/SKILL.md for instructions on how to do an engineering plan review. + +Read plan.md — that's the plan to review. + +Choose SMALL CHANGE mode. Skip any AskUserQuestion calls — this is non-interactive. +Write your complete review to ${planDir}/review-output.md + +Include architecture, code quality, tests, and performance sections.`, + workingDirectory: planDir, + maxTurns: 15, + timeout: 120_000, + }); + + logCost('/plan-eng-review', result); + recordE2E('/plan-eng-review', 'Plan Eng Review E2E', result); + expect(result.exitReason).toBe('success'); + + // Verify the review was written + const reviewPath = path.join(planDir, 'review-output.md'); + if (fs.existsSync(reviewPath)) { + const review = fs.readFileSync(reviewPath, 'utf-8'); + expect(review.length).toBeGreaterThan(200); + } + }, 180_000); +}); + +// --- Retro E2E --- + +describeE2E('Retro E2E', () => { + let retroDir: string; + + beforeAll(() => { + retroDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-retro-')); + const { spawnSync } = require('child_process'); + const run = (cmd: string, args: string[]) => + spawnSync(cmd, args, { cwd: retroDir, stdio: 'pipe', timeout: 5000 }); + + // Create a git repo with varied commit history + run('git', ['init']); + run('git', ['config', 'user.email', 'dev@example.com']); + run('git', ['config', 'user.name', 'Dev']); + + // Day 1 commits + fs.writeFileSync(path.join(retroDir, 'app.ts'), 'console.log("hello");\n'); + run('git', ['add', 'app.ts']); + run('git', ['commit', '-m', 'feat: initial app setup', '--date', '2026-03-10T09:00:00']); + + fs.writeFileSync(path.join(retroDir, 'auth.ts'), 'export function login() {}\n'); + run('git', ['add', 'auth.ts']); + run('git', ['commit', '-m', 'feat: add auth module', '--date', '2026-03-10T11:00:00']); + + // Day 2 commits + fs.writeFileSync(path.join(retroDir, 'app.ts'), 'import { login } from "./auth";\nconsole.log("hello");\nlogin();\n'); + run('git', ['add', 'app.ts']); + run('git', ['commit', '-m', 'fix: wire up auth to app', '--date', '2026-03-11T10:00:00']); + + fs.writeFileSync(path.join(retroDir, 'test.ts'), 'import { test } from "bun:test";\ntest("login", () => {});\n'); + run('git', ['add', 'test.ts']); + run('git', ['commit', '-m', 'test: add login test', '--date', '2026-03-11T14:00:00']); + + // Day 3 commits + fs.writeFileSync(path.join(retroDir, 'api.ts'), 'export function getUsers() { return []; }\n'); + run('git', ['add', 'api.ts']); + run('git', ['commit', '-m', 'feat: add users API endpoint', '--date', '2026-03-12T09:30:00']); + + fs.writeFileSync(path.join(retroDir, 'README.md'), '# My App\nA test application.\n'); + run('git', ['add', 'README.md']); + run('git', ['commit', '-m', 'docs: add README', '--date', '2026-03-12T16:00:00']); + + // Copy retro skill + fs.mkdirSync(path.join(retroDir, 'retro'), { recursive: true }); + fs.copyFileSync( + path.join(ROOT, 'retro', 'SKILL.md'), + path.join(retroDir, 'retro', 'SKILL.md'), + ); + }); + + afterAll(() => { + try { fs.rmSync(retroDir, { recursive: true, force: true }); } catch {} + }); + + test('/retro produces analysis from git history', async () => { + const result = await runSkillTest({ + prompt: `Read retro/SKILL.md for instructions on how to run a retrospective. + +Run /retro for the last 7 days of this git repo. Skip any AskUserQuestion calls — this is non-interactive. +Write your retrospective report to ${retroDir}/retro-output.md + +Analyze the git history and produce the narrative report as described in the SKILL.md.`, + workingDirectory: retroDir, + maxTurns: 15, + timeout: 120_000, + }); + + logCost('/retro', result); + recordE2E('/retro', 'Retro E2E', result); + expect(result.exitReason).toBe('success'); + + // Verify the retro was written + const retroPath = path.join(retroDir, 'retro-output.md'); + if (fs.existsSync(retroPath)) { + const retro = fs.readFileSync(retroPath, 'utf-8'); + expect(retro.length).toBeGreaterThan(100); + } + }, 180_000); +}); + +// --- Deferred skill E2E tests (destructive or require interactive UI) --- + +describeE2E('Deferred skill E2E', () => { + // Ship is destructive: pushes to remote, creates PRs, modifies VERSION/CHANGELOG + test.todo('/ship completes full workflow'); + + // Setup-browser-cookies requires interactive browser picker UI + test.todo('/setup-browser-cookies imports cookies'); + + // Gstack-upgrade is destructive: modifies skill installation directory + test.todo('/gstack-upgrade completes upgrade flow'); +}); + // Module-level afterAll — finalize eval collector after all tests complete afterAll(async () => { if (evalCollector) {