~cytrogen/gstack: fix: Codex description limit + wrong-repo bug (v0.11.19.0) (#471)

12 files changed, 93 insertions(+), 20 deletions(-)

M CHANGELOG.md
M SKILL.md
M SKILL.md.tmpl
M VERSION
M autoplan/SKILL.md
M autoplan/SKILL.md.tmpl
M codex/SKILL.md
M codex/SKILL.md.tmpl
M package.json
M scripts/resolvers/design.ts
M scripts/resolvers/review.ts
M test/gen-skill-docs.test.ts

M CHANGELOG.md => CHANGELOG.md +11 -0

@@ 1,5 1,16 @@
 # Changelog
 
+## [0.11.19.0] - 2026-03-24
+
+### Fixed
+
+- **Auto-upgrade no longer breaks.** The root gstack skill description was 7 characters from the Codex 1024-char limit. Every new skill addition pushed it closer. Moved the skill routing table from the description (bounded) to the body (unlimited), dropping from 1017 to 409 chars with 615 chars of headroom.
+- **Codex reviews now run in the correct repo.** In multi-workspace setups (like Conductor), Codex could pick up the wrong project directory. All `codex exec` calls now explicitly set `-C` to the git root.
+
+### Added
+
+- **900-char early warning test.** A new test fails if any Codex skill description exceeds 900 chars, catching description bloat before it breaks builds.
+
 ## [0.11.18.2] - 2026-03-24
 
 ### Fixed

M SKILL.md => SKILL.md +22 -0

@@ 297,6 297,28 @@ If `PROACTIVE` is `false`: do NOT proactively suggest other gstack skills during
 Only run skills the user explicitly invokes. This preference persists across sessions via
 `gstack-config`.
 
+If `PROACTIVE` is `true` (default): suggest adjacent gstack skills when relevant to the
+user's workflow stage:
+- Brainstorming → /office-hours
+- Strategy → /plan-ceo-review
+- Architecture → /plan-eng-review
+- Design → /plan-design-review or /design-consultation
+- Auto-review → /autoplan
+- Debugging → /investigate
+- QA → /qa
+- Code review → /review
+- Visual audit → /design-review
+- Shipping → /ship
+- Docs → /document-release
+- Retro → /retro
+- Second opinion → /codex
+- Prod safety → /careful or /guard
+- Scoped edits → /freeze or /unfreeze
+- Upgrades → /gstack-upgrade
+
+If the user opts out of suggestions, run `gstack-config set proactive false`.
+If they opt back in, run `gstack-config set proactive true`.
+
 # gstack browse: QA Testing & Dogfooding
 
 Persistent headless Chromium. First call auto-starts (~3s), then ~100-200ms per command.

M SKILL.md.tmpl => SKILL.md.tmpl +22 -0

@@ 20,6 20,28 @@ If `PROACTIVE` is `false`: do NOT proactively suggest other gstack skills during
 Only run skills the user explicitly invokes. This preference persists across sessions via
 `gstack-config`.
 
+If `PROACTIVE` is `true` (default): suggest adjacent gstack skills when relevant to the
+user's workflow stage:
+- Brainstorming → /office-hours
+- Strategy → /plan-ceo-review
+- Architecture → /plan-eng-review
+- Design → /plan-design-review or /design-consultation
+- Auto-review → /autoplan
+- Debugging → /investigate
+- QA → /qa
+- Code review → /review
+- Visual audit → /design-review
+- Shipping → /ship
+- Docs → /document-release
+- Retro → /retro
+- Second opinion → /codex
+- Prod safety → /careful or /guard
+- Scoped edits → /freeze or /unfreeze
+- Upgrades → /gstack-upgrade
+
+If the user opts out of suggestions, run `gstack-config set proactive false`.
+If they opt back in, run `gstack-config set proactive true`.
+
 # gstack browse: QA Testing & Dogfooding
 
 Persistent headless Chromium. First call auto-starts (~3s), then ~100-200ms per command.

M VERSION => VERSION +1 -1

@@ 1,1 1,1 @@
-0.11.18.2
+0.11.19.0

M autoplan/SKILL.md => autoplan/SKILL.md +3 -3

@@ 547,7 547,7 @@ Override: every AskUserQuestion → auto-decide using the 6 principles.
   What alternatives were dismissed too quickly? What competitive or market risks are
   unaddressed? What scope decisions will look foolish in 6 months? Be adversarial.
   No compliments. Just the strategic blind spots.
-  File: <plan_path>" -s read-only --enable web_search_cached`
+  File: <plan_path>" -C "$(git rev-parse --show-toplevel)" -s read-only --enable web_search_cached`
   Timeout: 10 minutes
 
   **Claude CEO subagent** (via Agent tool):


@@ 658,7 658,7 @@ Override: every AskUserQuestion → auto-decide using the 6 principles.
   accessibility requirements (keyboard nav, contrast, touch targets) specified or
   aspirational? Does the plan describe specific UI decisions or generic patterns?
   What design decisions will haunt the implementer if left ambiguous?
-  Be opinionated. No hedging." -s read-only --enable web_search_cached`
+  Be opinionated. No hedging." -C "$(git rev-parse --show-toplevel)" -s read-only --enable web_search_cached`
   Timeout: 10 minutes
 
   **Claude design subagent** (via Agent tool):


@@ 723,7 723,7 @@ Override: every AskUserQuestion → auto-decide using the 6 principles.
   CEO: <insert CEO consensus table summary — key concerns, DISAGREEs>
   Design: <insert Design consensus table summary, or 'skipped, no UI scope'>
 
-  File: <plan_path>" -s read-only --enable web_search_cached`
+  File: <plan_path>" -C "$(git rev-parse --show-toplevel)" -s read-only --enable web_search_cached`
   Timeout: 10 minutes
 
   **Claude eng subagent** (via Agent tool):

M autoplan/SKILL.md.tmpl => autoplan/SKILL.md.tmpl +3 -3

@@ 204,7 204,7 @@ Override: every AskUserQuestion → auto-decide using the 6 principles.
   What alternatives were dismissed too quickly? What competitive or market risks are
   unaddressed? What scope decisions will look foolish in 6 months? Be adversarial.
   No compliments. Just the strategic blind spots.
-  File: <plan_path>" -s read-only --enable web_search_cached`
+  File: <plan_path>" -C "$(git rev-parse --show-toplevel)" -s read-only --enable web_search_cached`
   Timeout: 10 minutes
 
   **Claude CEO subagent** (via Agent tool):


@@ 315,7 315,7 @@ Override: every AskUserQuestion → auto-decide using the 6 principles.
   accessibility requirements (keyboard nav, contrast, touch targets) specified or
   aspirational? Does the plan describe specific UI decisions or generic patterns?
   What design decisions will haunt the implementer if left ambiguous?
-  Be opinionated. No hedging." -s read-only --enable web_search_cached`
+  Be opinionated. No hedging." -C "$(git rev-parse --show-toplevel)" -s read-only --enable web_search_cached`
   Timeout: 10 minutes
 
   **Claude design subagent** (via Agent tool):


@@ 380,7 380,7 @@ Override: every AskUserQuestion → auto-decide using the 6 principles.
   CEO: <insert CEO consensus table summary — key concerns, DISAGREEs>
   Design: <insert Design consensus table summary, or 'skipped, no UI scope'>
 
-  File: <plan_path>" -s read-only --enable web_search_cached`
+  File: <plan_path>" -C "$(git rev-parse --show-toplevel)" -s read-only --enable web_search_cached`
   Timeout: 10 minutes
 
   **Claude eng subagent** (via Agent tool):

M codex/SKILL.md => codex/SKILL.md +3 -3

@@ 518,7 518,7 @@ With focus (e.g., "security"):
 
 2. Run codex exec with **JSONL output** to capture reasoning traces and tool calls (5-minute timeout):
 ```bash
-codex exec "<prompt>" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached --json 2>/dev/null | python3 -c "
+codex exec "<prompt>" -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached --json 2>/dev/null | python3 -c "
 import sys, json
 for line in sys.stdin:
     line = line.strip()


@@ 603,7 603,7 @@ THE PLAN:
 
 For a **new session:**
 ```bash
-codex exec "<prompt>" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached --json 2>"$TMPERR" | python3 -c "
+codex exec "<prompt>" -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached --json 2>"$TMPERR" | python3 -c "
 import sys, json
 for line in sys.stdin:
     line = line.strip()


@@ 636,7 636,7 @@ for line in sys.stdin:
 
 For a **resumed session** (user chose "Continue"):
 ```bash
-codex exec resume <session-id> "<prompt>" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached --json 2>"$TMPERR" | python3 -c "
+codex exec resume <session-id> "<prompt>" -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached --json 2>"$TMPERR" | python3 -c "
 <same python streaming parser as above>
 "
 ```

M codex/SKILL.md.tmpl => codex/SKILL.md.tmpl +3 -3

@@ 159,7 159,7 @@ With focus (e.g., "security"):
 
 2. Run codex exec with **JSONL output** to capture reasoning traces and tool calls (5-minute timeout):
 ```bash
-codex exec "<prompt>" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached --json 2>/dev/null | python3 -c "
+codex exec "<prompt>" -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached --json 2>/dev/null | python3 -c "
 import sys, json
 for line in sys.stdin:
     line = line.strip()


@@ 244,7 244,7 @@ THE PLAN:
 
 For a **new session:**
 ```bash
-codex exec "<prompt>" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached --json 2>"$TMPERR" | python3 -c "
+codex exec "<prompt>" -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached --json 2>"$TMPERR" | python3 -c "
 import sys, json
 for line in sys.stdin:
     line = line.strip()


@@ 277,7 277,7 @@ for line in sys.stdin:
 
 For a **resumed session** (user chose "Continue"):
 ```bash
-codex exec resume <session-id> "<prompt>" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached --json 2>"$TMPERR" | python3 -c "
+codex exec resume <session-id> "<prompt>" -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached --json 2>"$TMPERR" | python3 -c "
 <same python streaming parser as above>
 "
 ```

M package.json => package.json +1 -1

@@ 1,6 1,6 @@
 {
   "name": "gstack",
-  "version": "0.11.17.0",
+  "version": "0.11.19.0",
   "description": "Garry's Stack — Claude Code skills + fast headless browser. One repo, one install, entire AI engineering workflow.",
   "license": "MIT",
   "type": "module",

M scripts/resolvers/design.ts => scripts/resolvers/design.ts +3 -3

@@ 17,7 17,7 @@ If Codex is available, run a lightweight design check on the diff:
 
 \`\`\`bash
 TMPERR_DRL=$(mktemp /tmp/codex-drl-XXXXXXXX)
-codex exec "Review the git diff on this branch. Run 7 litmus checks (YES/NO each): ${litmusList} Flag any hard rejections: ${rejectionList} 5 most important design findings only. Reference file:line." -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_DRL"
+codex exec "Review the git diff on this branch. Run 7 litmus checks (YES/NO each): ${litmusList} Flag any hard rejections: ${rejectionList} 5 most important design findings only. Reference file:line." -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_DRL"
 \`\`\`
 
 Use a 5-minute timeout (\`timeout: 300000\`). After the command completes, read stderr:


@@ 467,7 467,7 @@ If user chooses A, launch both voices simultaneously:
 1. **Codex** (via Bash, \`model_reasoning_effort="medium"\`):
 \`\`\`bash
 TMPERR_SKETCH=$(mktemp /tmp/codex-sketch-XXXXXXXX)
-codex exec "For this product approach, provide: a visual thesis (one sentence — mood, material, energy), a content plan (hero → support → detail → CTA), and 2 interaction ideas that change page feel. Apply beautiful defaults: composition-first, brand-first, cardless, poster not document. Be opinionated." -s read-only -c 'model_reasoning_effort="medium"' --enable web_search_cached 2>"$TMPERR_SKETCH"
+codex exec "For this product approach, provide: a visual thesis (one sentence — mood, material, energy), a content plan (hero → support → detail → CTA), and 2 interaction ideas that change page feel. Apply beautiful defaults: composition-first, brand-first, cardless, poster not document. Be opinionated." -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="medium"' --enable web_search_cached 2>"$TMPERR_SKETCH"
 \`\`\`
 Use a 5-minute timeout (\`timeout: 300000\`). After completion: \`cat "$TMPERR_SKETCH" && rm -f "$TMPERR_SKETCH"\`
 


@@ 636,7 636,7 @@ which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
 1. **Codex design voice** (via Bash):
 \`\`\`bash
 TMPERR_DESIGN=$(mktemp /tmp/codex-design-XXXXXXXX)
-codex exec "${escapedCodexPrompt}" -s read-only -c 'model_reasoning_effort="${reasoningEffort}"' --enable web_search_cached 2>"$TMPERR_DESIGN"
+codex exec "${escapedCodexPrompt}" -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="${reasoningEffort}"' --enable web_search_cached 2>"$TMPERR_DESIGN"
 \`\`\`
 Use a 5-minute timeout (\`timeout: 300000\`). After the command completes, read stderr:
 \`\`\`bash

M scripts/resolvers/review.ts => scripts/resolvers/review.ts +3 -3

@@ 286,7 286,7 @@ Write the full prompt (context block + instructions) to this file. Use the mode-
 
 \`\`\`bash
 TMPERR_OH=$(mktemp /tmp/codex-oh-err-XXXXXXXX)
-codex exec "$(cat "$CODEX_PROMPT_FILE")" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached 2>"$TMPERR_OH"
+codex exec "$(cat "$CODEX_PROMPT_FILE")" -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached 2>"$TMPERR_OH"
 \`\`\`
 
 Use a 5-minute timeout (\`timeout: 300000\`). After the command completes, read stderr:


@@ 370,7 370,7 @@ Claude's structured review already ran. Now add a **cross-model adversarial chal
 
 \`\`\`bash
 TMPERR_ADV=$(mktemp /tmp/codex-adv-XXXXXXXX)
-codex exec "Review the changes on this branch against the base branch. Run git diff origin/<base> to see the diff. Your job is to find ways this code will fail in production. Think like an attacker and a chaos engineer. Find edge cases, race conditions, security holes, resource leaks, failure modes, and silent data corruption paths. Be adversarial. Be thorough. No compliments — just the problems." -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached 2>"$TMPERR_ADV"
+codex exec "Review the changes on this branch against the base branch. Run git diff origin/<base> to see the diff. Your job is to find ways this code will fail in production. Think like an attacker and a chaos engineer. Find edge cases, race conditions, security holes, resource leaks, failure modes, and silent data corruption paths. Be adversarial. Be thorough. No compliments — just the problems." -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached 2>"$TMPERR_ADV"
 \`\`\`
 
 Set the Bash tool's \`timeout\` parameter to \`300000\` (5 minutes). Do NOT use the \`timeout\` shell command — it doesn't exist on macOS. After the command completes, read stderr:


@@ 525,7 525,7 @@ THE PLAN:
 
 \`\`\`bash
 TMPERR_PV=$(mktemp /tmp/codex-planreview-XXXXXXXX)
-codex exec "<prompt>" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached 2>"$TMPERR_PV"
+codex exec "<prompt>" -C "$(git rev-parse --show-toplevel)" -s read-only -c 'model_reasoning_effort="xhigh"' --enable web_search_cached 2>"$TMPERR_PV"
 \`\`\`
 
 Use a 5-minute timeout (\`timeout: 300000\`). After the command completes, read stderr:

M test/gen-skill-docs.test.ts => test/gen-skill-docs.test.ts +18 -0

@@ 152,6 152,24 @@ describe('gen-skill-docs', () => {
     }
   });
 
+  test('every Codex SKILL.md description stays under 900-char warning threshold', () => {
+    const WARN_THRESHOLD = 900;
+    const agentsDir = path.join(ROOT, '.agents', 'skills');
+    if (!fs.existsSync(agentsDir)) return;
+    const violations: string[] = [];
+    for (const entry of fs.readdirSync(agentsDir, { withFileTypes: true })) {
+      if (!entry.isDirectory()) continue;
+      const skillMd = path.join(agentsDir, entry.name, 'SKILL.md');
+      if (!fs.existsSync(skillMd)) continue;
+      const content = fs.readFileSync(skillMd, 'utf-8');
+      const description = extractDescription(content);
+      if (description.length > WARN_THRESHOLD) {
+        violations.push(`${entry.name}: ${description.length} chars (limit ${MAX_SKILL_DESCRIPTION_LENGTH}, ${MAX_SKILL_DESCRIPTION_LENGTH - description.length} remaining)`);
+      }
+    }
+    expect(violations).toEqual([]);
+  });
+
   test('package.json version matches VERSION file', () => {
     const pkg = JSON.parse(fs.readFileSync(path.join(ROOT, 'package.json'), 'utf-8'));
     const version = fs.readFileSync(path.join(ROOT, 'VERSION'), 'utf-8').trim();