~cytrogen/gstack: fix: enforce Codex 1024-char description limit + auto-heal stale installs (v0.11.9.0) (#391)

11 files changed, 93 insertions(+), 11 deletions(-)

M CHANGELOG.md
M README.md
M TODOS.md
M VERSION
M bin/gstack-update-check
M package.json
M scripts/gen-skill-docs.ts
M setup
M test/codex-e2e.test.ts
M test/gen-skill-docs.test.ts
M test/helpers/codex-session-runner.ts

M CHANGELOG.md => CHANGELOG.md +18 -0

@@ 1,5 1,23 @@
 # Changelog
 
+## [0.11.9.0] - 2026-03-23 — Codex Skill Loading Fix
+
+### Fixed
+
+- **Codex no longer rejects gstack skills with "invalid SKILL.md".** Existing installs had oversized description fields (>1024 chars) that Codex silently rejected. The build now errors if any Codex description exceeds 1024 chars, setup always regenerates `.agents/` to prevent stale files, and a one-time migration auto-cleans oversized descriptions on existing installs.
+- **`package.json` version now stays in sync with `VERSION`.** Was 6 minor versions behind. A new CI test catches future drift.
+
+### Added
+
+- **Codex E2E tests now assert no skill loading errors.** The exact "Skipped loading skill(s)" error that prompted this fix is now a regression test — `stderr` is captured and checked.
+- **Codex troubleshooting entry in README.** Manual fix instructions for users who hit the loading error before the auto-migration runs.
+
+### For contributors
+
+- `test/gen-skill-docs.test.ts` validates all `.agents/` descriptions stay within 1024 chars
+- `gstack-update-check` includes a one-time migration that deletes oversized Codex SKILL.md files
+- P1 TODO added: Codex→Claude reverse buddy check skill
+
 ## [0.11.8.0] - 2026-03-23 — zsh Compatibility Fix
 
 ### Fixed

M README.md => README.md +2 -0

@@ 224,6 224,8 @@ Data is stored in [Supabase](https://supabase.com) (open source Firebase alterna
 
 **Stale install?** Run `/gstack-upgrade` — or set `auto_upgrade: true` in `~/.gstack/config.yaml`
 
+**Codex says "Skipped loading skill(s) due to invalid SKILL.md"?** Your Codex skill descriptions are stale. Fix: `cd ~/.codex/skills/gstack && git pull && ./setup --host codex` — or for repo-local installs: `cd "$(readlink -f .agents/skills/gstack)" && git pull && ./setup --host codex`
+
 **Windows users:** gstack works on Windows 11 via Git Bash or WSL. Node.js is required in addition to Bun — Bun has a known bug with Playwright's pipe transport on Windows ([bun#4253](https://github.com/oven-sh/bun/issues/4253)). The browse server automatically falls back to Node.js. Make sure both `bun` and `node` are on your PATH.
 
 **Claude says it can't see the skills?** Make sure your project's `CLAUDE.md` has a gstack section. Add this:

M TODOS.md => TODOS.md +14 -0

@@ 489,6 489,20 @@ Shipped in v0.8.3. Step 8.5 added to `/ship` — after creating the PR, `/ship` 
 **Depends on:** gstack-diff-scope (shipped)
 
 
+## Codex
+
+### Codex→Claude reverse buddy check skill
+
+**What:** A Codex-native skill (`.agents/skills/gstack-claude/SKILL.md`) that runs `claude -p` to get an independent second opinion from Claude — the reverse of what `/codex` does today from Claude Code.
+
+**Why:** Codex users deserve the same cross-model challenge that Claude users get via `/codex`. Currently the flow is one-way (Claude→Codex). Codex users have no way to get a Claude second opinion.
+
+**Context:** The `/codex` skill template (`codex/SKILL.md.tmpl`) shows the pattern — it wraps `codex exec` with JSONL parsing, timeout handling, and structured output. The reverse skill would wrap `claude -p` with similar infrastructure. Would be generated into `.agents/skills/gstack-claude/` by `gen-skill-docs --host codex`.
+
+**Effort:** M (human: ~2 weeks / CC: ~30 min)
+**Priority:** P1
+**Depends on:** None
+
 ## Completeness
 
 ### Completeness metrics dashboard

M VERSION => VERSION +1 -1

@@ 1,1 1,1 @@
-0.11.8.0
+0.11.9.0

M bin/gstack-update-check => bin/gstack-update-check +18 -0

@@ 31,6 31,24 @@ if [ "$_UC" = "false" ]; then
   exit 0
 fi
 
+# ─── Migration: fix stale Codex descriptions (one-time) ───────
+# Existing installs may have .agents/skills/gstack/SKILL.md with oversized
+# descriptions (>1024 chars) that Codex rejects. We can't regenerate from
+# the runtime root (no bun/scripts), so delete oversized files — the next
+# ./setup or /gstack-upgrade will regenerate them properly.
+# Marker file ensures this runs at most once per install.
+if [ ! -f "$STATE_DIR/.codex-desc-healed" ]; then
+  for _AGENTS_SKILL in "$GSTACK_DIR"/.agents/skills/*/SKILL.md; do
+    [ -f "$_AGENTS_SKILL" ] || continue
+    _DESC=$(awk '/^---$/{n++;next}n==1&&/^description:/{d=1;sub(/^description:\s*/,"");if(length>0)print;next}d&&/^  /{sub(/^  /,"");print;next}d{d=0}' "$_AGENTS_SKILL" | wc -c | tr -d ' ')
+    if [ "${_DESC:-0}" -gt 1024 ]; then
+      rm -f "$_AGENTS_SKILL"
+    fi
+  done
+  mkdir -p "$STATE_DIR"
+  touch "$STATE_DIR/.codex-desc-healed"
+fi
+
 # ─── Snooze helper ──────────────────────────────────────────
 # check_snooze <remote_version>
 #   Returns 0 if snoozed (should stay quiet), 1 if not snoozed (should output).

M package.json => package.json +1 -1

@@ 1,6 1,6 @@
 {
   "name": "gstack",
-  "version": "0.9.8.0",
+  "version": "0.11.9.0",
   "description": "Garry's Stack — Claude Code skills + fast headless browser. One repo, one install, entire AI engineering workflow.",
   "license": "MIT",
   "type": "module",

M scripts/gen-skill-docs.ts => scripts/gen-skill-docs.ts +9 -0

@@ 2910,6 2910,15 @@ function transformFrontmatter(content: string, host: Host): string {
   const body = content.slice(fmEnd + 4); // includes the leading \n after ---
   const { name, description } = extractNameAndDescription(content);
 
+  // Codex 1024-char description limit — fail build, don't ship broken skills
+  const MAX_DESC = 1024;
+  if (description.length > MAX_DESC) {
+    throw new Error(
+      `Codex description for "${name}" is ${description.length} chars (max ${MAX_DESC}). ` +
+      `Compress the description in the .tmpl file.`
+    );
+  }
+
   // Re-emit Codex frontmatter (name + description only)
   const indentedDesc = description.split('\n').map(l => `  ${l}`).join('\n');
   const codexFm = `---\nname: ${name}\ndescription: |\n${indentedDesc}\n---`;

M setup => setup +5 -9

@@ 128,17 128,13 @@ if [ ! -x "$BROWSE_BIN" ]; then
   exit 1
 fi
 
-# 1b. Generate .agents/ Codex skill docs if missing or stale
+# 1b. Generate .agents/ Codex skill docs — always regenerate to prevent stale descriptions.
 # .agents/ is no longer committed — generated at setup time from .tmpl templates.
-# bun run build already does this, but we need it when NEEDS_BUILD=0 (binary is fresh
-# but .agents/ hasn't been generated yet, e.g., fresh clone).
+# bun run build already does this, but we need it when NEEDS_BUILD=0 (binary is fresh).
+# Always regenerate: generation is fast (<2s) and mtime-based staleness checks are fragile
+# (miss stale files when timestamps match after clone/checkout/upgrade).
 AGENTS_DIR="$SOURCE_GSTACK_DIR/.agents/skills"
-NEEDS_AGENTS_GEN=0
-if [ ! -d "$AGENTS_DIR" ]; then
-  NEEDS_AGENTS_GEN=1
-elif [ -n "$(find "$SOURCE_GSTACK_DIR" -maxdepth 2 -name 'SKILL.md.tmpl' -newer "$AGENTS_DIR" -print -quit 2>/dev/null)" ]; then
-  NEEDS_AGENTS_GEN=1
-fi
+NEEDS_AGENTS_GEN=1
 
 if [ "$NEEDS_AGENTS_GEN" -eq 1 ] && [ "$NEEDS_BUILD" -eq 0 ]; then
   echo "Generating .agents/ skill docs..."

M test/codex-e2e.test.ts => test/codex-e2e.test.ts +3 -0

@@ 139,6 139,9 @@ describeCodex('Codex E2E', () => {
 
     expect(result.exitCode).toBe(0);
     expect(result.output.length).toBeGreaterThan(0);
+    // Skill loading errors mean our generated SKILL.md files are broken
+    expect(result.stderr).not.toContain('invalid');
+    expect(result.stderr).not.toContain('Skipped loading');
     // The output should reference the skill name in some form
     const outputLower = result.output.toLowerCase();
     expect(

M test/gen-skill-docs.test.ts => test/gen-skill-docs.test.ts +19 -0

@@ 139,6 139,25 @@ describe('gen-skill-docs', () => {
     }
   });
 
+  test(`every Codex SKILL.md description stays within ${MAX_SKILL_DESCRIPTION_LENGTH} chars`, () => {
+    const agentsDir = path.join(ROOT, '.agents', 'skills');
+    if (!fs.existsSync(agentsDir)) return; // skip if not generated
+    for (const entry of fs.readdirSync(agentsDir, { withFileTypes: true })) {
+      if (!entry.isDirectory()) continue;
+      const skillMd = path.join(agentsDir, entry.name, 'SKILL.md');
+      if (!fs.existsSync(skillMd)) continue;
+      const content = fs.readFileSync(skillMd, 'utf-8');
+      const description = extractDescription(content);
+      expect(description.length).toBeLessThanOrEqual(MAX_SKILL_DESCRIPTION_LENGTH);
+    }
+  });
+
+  test('package.json version matches VERSION file', () => {
+    const pkg = JSON.parse(fs.readFileSync(path.join(ROOT, 'package.json'), 'utf-8'));
+    const version = fs.readFileSync(path.join(ROOT, 'VERSION'), 'utf-8').trim();
+    expect(pkg.version).toBe(version);
+  });
+
   test('generated files are fresh (match --dry-run)', () => {
     const result = Bun.spawnSync(['bun', 'run', 'scripts/gen-skill-docs.ts', '--dry-run'], {
       cwd: ROOT,

M test/helpers/codex-session-runner.ts => test/helpers/codex-session-runner.ts +3 -0

@@ 27,6 27,7 @@ export interface CodexResult {
   durationMs: number;       // Wall clock time
   sessionId: string | null; // Thread ID for session continuity
   rawLines: string[];       // Raw JSONL lines for debugging
+  stderr: string;           // Stderr output (skill loading errors, auth failures)
 }
 
 // --- JSONL parser (ported from Python in codex/SKILL.md.tmpl) ---


@@ 167,6 168,7 @@ export async function runCodexSkill(opts: {
       durationMs: Date.now() - startTime,
       sessionId: null,
       rawLines: [],
+      stderr: '',
     };
   }
 


@@ 282,6 284,7 @@ export async function runCodexSkill(opts: {
       durationMs,
       sessionId: parsed.sessionId,
       rawLines: collectedLines,
+      stderr,
     };
   } finally {
     // Clean up temp HOME