From ffd9ab29b932f6372d7d7746d7a2cddc993b4e75 Mon Sep 17 00:00:00 2001 From: Garry Tan Date: Mon, 23 Mar 2026 08:44:08 -0700 Subject: [PATCH] fix: enforce Codex 1024-char description limit + auto-heal stale installs (v0.11.9.0) (#391) * fix: enforce 1024-char Codex description limit + auto-heal stale installs Build-time guard in gen-skill-docs.ts throws if any Codex description exceeds 1024 chars. Setup always regenerates .agents/ to prevent stale files. One-time migration in gstack-update-check deletes oversized SKILL.md files so they get regenerated on next setup/upgrade. * chore: bump version and changelog (v0.11.9.0) Co-Authored-By: Claude Opus 4.6 --------- Co-authored-by: Claude Opus 4.6 --- CHANGELOG.md | 18 ++++++++++++++++++ README.md | 2 ++ TODOS.md | 14 ++++++++++++++ VERSION | 2 +- bin/gstack-update-check | 18 ++++++++++++++++++ package.json | 2 +- scripts/gen-skill-docs.ts | 9 +++++++++ setup | 14 +++++--------- test/codex-e2e.test.ts | 3 +++ test/gen-skill-docs.test.ts | 19 +++++++++++++++++++ test/helpers/codex-session-runner.ts | 3 +++ 11 files changed, 93 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b7105cdf3753713f1e5f76811b1020d0c707b7cb..0ed769e545340b43282f9854ad9f786b75b6ac0e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,23 @@ # Changelog +## [0.11.9.0] - 2026-03-23 — Codex Skill Loading Fix + +### Fixed + +- **Codex no longer rejects gstack skills with "invalid SKILL.md".** Existing installs had oversized description fields (>1024 chars) that Codex silently rejected. The build now errors if any Codex description exceeds 1024 chars, setup always regenerates `.agents/` to prevent stale files, and a one-time migration auto-cleans oversized descriptions on existing installs. +- **`package.json` version now stays in sync with `VERSION`.** Was 6 minor versions behind. A new CI test catches future drift. + +### Added + +- **Codex E2E tests now assert no skill loading errors.** The exact "Skipped loading skill(s)" error that prompted this fix is now a regression test — `stderr` is captured and checked. +- **Codex troubleshooting entry in README.** Manual fix instructions for users who hit the loading error before the auto-migration runs. + +### For contributors + +- `test/gen-skill-docs.test.ts` validates all `.agents/` descriptions stay within 1024 chars +- `gstack-update-check` includes a one-time migration that deletes oversized Codex SKILL.md files +- P1 TODO added: Codex→Claude reverse buddy check skill + ## [0.11.8.0] - 2026-03-23 — zsh Compatibility Fix ### Fixed diff --git a/README.md b/README.md index 424f3679902099872a8a7a2a54cabc68cda21cce..253d542529b4fd0dcebcb954491d2b797f709a3c 100644 --- a/README.md +++ b/README.md @@ -224,6 +224,8 @@ Data is stored in [Supabase](https://supabase.com) (open source Firebase alterna **Stale install?** Run `/gstack-upgrade` — or set `auto_upgrade: true` in `~/.gstack/config.yaml` +**Codex says "Skipped loading skill(s) due to invalid SKILL.md"?** Your Codex skill descriptions are stale. Fix: `cd ~/.codex/skills/gstack && git pull && ./setup --host codex` — or for repo-local installs: `cd "$(readlink -f .agents/skills/gstack)" && git pull && ./setup --host codex` + **Windows users:** gstack works on Windows 11 via Git Bash or WSL. Node.js is required in addition to Bun — Bun has a known bug with Playwright's pipe transport on Windows ([bun#4253](https://github.com/oven-sh/bun/issues/4253)). The browse server automatically falls back to Node.js. Make sure both `bun` and `node` are on your PATH. **Claude says it can't see the skills?** Make sure your project's `CLAUDE.md` has a gstack section. Add this: diff --git a/TODOS.md b/TODOS.md index f30f5550e1355d755253a281ad215b46b0ef73a0..428bb788fab9fbb36503ab43d020a50a6bf7b6a6 100644 --- a/TODOS.md +++ b/TODOS.md @@ -489,6 +489,20 @@ Shipped in v0.8.3. Step 8.5 added to `/ship` — after creating the PR, `/ship` **Depends on:** gstack-diff-scope (shipped) +## Codex + +### Codex→Claude reverse buddy check skill + +**What:** A Codex-native skill (`.agents/skills/gstack-claude/SKILL.md`) that runs `claude -p` to get an independent second opinion from Claude — the reverse of what `/codex` does today from Claude Code. + +**Why:** Codex users deserve the same cross-model challenge that Claude users get via `/codex`. Currently the flow is one-way (Claude→Codex). Codex users have no way to get a Claude second opinion. + +**Context:** The `/codex` skill template (`codex/SKILL.md.tmpl`) shows the pattern — it wraps `codex exec` with JSONL parsing, timeout handling, and structured output. The reverse skill would wrap `claude -p` with similar infrastructure. Would be generated into `.agents/skills/gstack-claude/` by `gen-skill-docs --host codex`. + +**Effort:** M (human: ~2 weeks / CC: ~30 min) +**Priority:** P1 +**Depends on:** None + ## Completeness ### Completeness metrics dashboard diff --git a/VERSION b/VERSION index f3b6bd46073155452fd399d08402551b34937836..b1d9a7913fd991c90b4b5f24d52216e748ebde81 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.11.8.0 +0.11.9.0 diff --git a/bin/gstack-update-check b/bin/gstack-update-check index d0d0f1f158f27c2450c512058fb3b04f8939715c..8f5193bee3215067478a3bc9bf8b989f496bc7a9 100755 --- a/bin/gstack-update-check +++ b/bin/gstack-update-check @@ -31,6 +31,24 @@ if [ "$_UC" = "false" ]; then exit 0 fi +# ─── Migration: fix stale Codex descriptions (one-time) ─────── +# Existing installs may have .agents/skills/gstack/SKILL.md with oversized +# descriptions (>1024 chars) that Codex rejects. We can't regenerate from +# the runtime root (no bun/scripts), so delete oversized files — the next +# ./setup or /gstack-upgrade will regenerate them properly. +# Marker file ensures this runs at most once per install. +if [ ! -f "$STATE_DIR/.codex-desc-healed" ]; then + for _AGENTS_SKILL in "$GSTACK_DIR"/.agents/skills/*/SKILL.md; do + [ -f "$_AGENTS_SKILL" ] || continue + _DESC=$(awk '/^---$/{n++;next}n==1&&/^description:/{d=1;sub(/^description:\s*/,"");if(length>0)print;next}d&&/^ /{sub(/^ /,"");print;next}d{d=0}' "$_AGENTS_SKILL" | wc -c | tr -d ' ') + if [ "${_DESC:-0}" -gt 1024 ]; then + rm -f "$_AGENTS_SKILL" + fi + done + mkdir -p "$STATE_DIR" + touch "$STATE_DIR/.codex-desc-healed" +fi + # ─── Snooze helper ────────────────────────────────────────── # check_snooze # Returns 0 if snoozed (should stay quiet), 1 if not snoozed (should output). diff --git a/package.json b/package.json index 2712d5e944e8d341de5f6d50780242ec286d0f71..b24b52535d85195b69f27283dac2ee42621f5f0f 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "gstack", - "version": "0.9.8.0", + "version": "0.11.9.0", "description": "Garry's Stack — Claude Code skills + fast headless browser. One repo, one install, entire AI engineering workflow.", "license": "MIT", "type": "module", diff --git a/scripts/gen-skill-docs.ts b/scripts/gen-skill-docs.ts index 06e780996c7116b0a2528c4627fde6ecab6ac907..340dbb3ca3919c3a2c6c5bf81207079f1384769b 100644 --- a/scripts/gen-skill-docs.ts +++ b/scripts/gen-skill-docs.ts @@ -2910,6 +2910,15 @@ function transformFrontmatter(content: string, host: Host): string { const body = content.slice(fmEnd + 4); // includes the leading \n after --- const { name, description } = extractNameAndDescription(content); + // Codex 1024-char description limit — fail build, don't ship broken skills + const MAX_DESC = 1024; + if (description.length > MAX_DESC) { + throw new Error( + `Codex description for "${name}" is ${description.length} chars (max ${MAX_DESC}). ` + + `Compress the description in the .tmpl file.` + ); + } + // Re-emit Codex frontmatter (name + description only) const indentedDesc = description.split('\n').map(l => ` ${l}`).join('\n'); const codexFm = `---\nname: ${name}\ndescription: |\n${indentedDesc}\n---`; diff --git a/setup b/setup index 75dbf7313775508e7541494ce0d09b605168b7c5..4d7d29c013a3807761e2e531f73057c9bd314e67 100755 --- a/setup +++ b/setup @@ -128,17 +128,13 @@ if [ ! -x "$BROWSE_BIN" ]; then exit 1 fi -# 1b. Generate .agents/ Codex skill docs if missing or stale +# 1b. Generate .agents/ Codex skill docs — always regenerate to prevent stale descriptions. # .agents/ is no longer committed — generated at setup time from .tmpl templates. -# bun run build already does this, but we need it when NEEDS_BUILD=0 (binary is fresh -# but .agents/ hasn't been generated yet, e.g., fresh clone). +# bun run build already does this, but we need it when NEEDS_BUILD=0 (binary is fresh). +# Always regenerate: generation is fast (<2s) and mtime-based staleness checks are fragile +# (miss stale files when timestamps match after clone/checkout/upgrade). AGENTS_DIR="$SOURCE_GSTACK_DIR/.agents/skills" -NEEDS_AGENTS_GEN=0 -if [ ! -d "$AGENTS_DIR" ]; then - NEEDS_AGENTS_GEN=1 -elif [ -n "$(find "$SOURCE_GSTACK_DIR" -maxdepth 2 -name 'SKILL.md.tmpl' -newer "$AGENTS_DIR" -print -quit 2>/dev/null)" ]; then - NEEDS_AGENTS_GEN=1 -fi +NEEDS_AGENTS_GEN=1 if [ "$NEEDS_AGENTS_GEN" -eq 1 ] && [ "$NEEDS_BUILD" -eq 0 ]; then echo "Generating .agents/ skill docs..." diff --git a/test/codex-e2e.test.ts b/test/codex-e2e.test.ts index 02c7e7832ea74f76b2bd5803351732cb64dd68aa..685d315c561576778147d27ff95ef5a18eda9670 100644 --- a/test/codex-e2e.test.ts +++ b/test/codex-e2e.test.ts @@ -139,6 +139,9 @@ describeCodex('Codex E2E', () => { expect(result.exitCode).toBe(0); expect(result.output.length).toBeGreaterThan(0); + // Skill loading errors mean our generated SKILL.md files are broken + expect(result.stderr).not.toContain('invalid'); + expect(result.stderr).not.toContain('Skipped loading'); // The output should reference the skill name in some form const outputLower = result.output.toLowerCase(); expect( diff --git a/test/gen-skill-docs.test.ts b/test/gen-skill-docs.test.ts index 0e179c1e2315b1e0a7e8503b91d971cefb94c928..32e77a3686f7d956b5975916360e29c56da5ad86 100644 --- a/test/gen-skill-docs.test.ts +++ b/test/gen-skill-docs.test.ts @@ -139,6 +139,25 @@ describe('gen-skill-docs', () => { } }); + test(`every Codex SKILL.md description stays within ${MAX_SKILL_DESCRIPTION_LENGTH} chars`, () => { + const agentsDir = path.join(ROOT, '.agents', 'skills'); + if (!fs.existsSync(agentsDir)) return; // skip if not generated + for (const entry of fs.readdirSync(agentsDir, { withFileTypes: true })) { + if (!entry.isDirectory()) continue; + const skillMd = path.join(agentsDir, entry.name, 'SKILL.md'); + if (!fs.existsSync(skillMd)) continue; + const content = fs.readFileSync(skillMd, 'utf-8'); + const description = extractDescription(content); + expect(description.length).toBeLessThanOrEqual(MAX_SKILL_DESCRIPTION_LENGTH); + } + }); + + test('package.json version matches VERSION file', () => { + const pkg = JSON.parse(fs.readFileSync(path.join(ROOT, 'package.json'), 'utf-8')); + const version = fs.readFileSync(path.join(ROOT, 'VERSION'), 'utf-8').trim(); + expect(pkg.version).toBe(version); + }); + test('generated files are fresh (match --dry-run)', () => { const result = Bun.spawnSync(['bun', 'run', 'scripts/gen-skill-docs.ts', '--dry-run'], { cwd: ROOT, diff --git a/test/helpers/codex-session-runner.ts b/test/helpers/codex-session-runner.ts index ac2b9e298fa58f381bdac25e4d909679dfbc641d..0be9dd7d60f16b5b62b306bdbdb2e7c9ce07c3f5 100644 --- a/test/helpers/codex-session-runner.ts +++ b/test/helpers/codex-session-runner.ts @@ -27,6 +27,7 @@ export interface CodexResult { durationMs: number; // Wall clock time sessionId: string | null; // Thread ID for session continuity rawLines: string[]; // Raw JSONL lines for debugging + stderr: string; // Stderr output (skill loading errors, auth failures) } // --- JSONL parser (ported from Python in codex/SKILL.md.tmpl) --- @@ -167,6 +168,7 @@ export async function runCodexSkill(opts: { durationMs: Date.now() - startTime, sessionId: null, rawLines: [], + stderr: '', }; } @@ -282,6 +284,7 @@ export async function runCodexSkill(opts: { durationMs, sessionId: parsed.sessionId, rawLines: collectedLines, + stderr, }; } finally { // Clean up temp HOME