@@ 535,6 535,199 @@ Write your summary to ${benefitsDir}/benefits-summary.md`,
}, 180_000);
});
+// --- Plan Review Report E2E ---
+// Verifies that plan-eng-review writes a "## GSTACK REVIEW REPORT" section
+// to the bottom of the plan file (the living review status footer).
+
+describeIfSelected('Plan Review Report E2E', ['plan-review-report'], () => {
+ let planDir: string;
+
+ beforeAll(() => {
+ planDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-review-report-'));
+ const run = (cmd: string, args: string[]) =>
+ spawnSync(cmd, args, { cwd: planDir, stdio: 'pipe', timeout: 5000 });
+
+ run('git', ['init', '-b', 'main']);
+ run('git', ['config', 'user.email', 'test@test.com']);
+ run('git', ['config', 'user.name', 'Test']);
+
+ fs.writeFileSync(path.join(planDir, 'plan.md'), `# Plan: Add Notifications System
+
+## Context
+We're building a real-time notification system for our SaaS app.
+
+## Changes
+1. WebSocket server for push notifications
+2. Notification preferences API
+3. Email digest fallback for offline users
+4. PostgreSQL table for notification storage
+
+## Architecture
+- WebSocket: Socket.io on Express
+- Queue: Bull + Redis for email digests
+- Storage: PostgreSQL notifications table
+- Frontend: React toast component
+
+## Open questions
+- Retry policy for failed WebSocket delivery?
+- Max notifications stored per user?
+`);
+
+ run('git', ['add', '.']);
+ run('git', ['commit', '-m', 'add plan']);
+
+ // Copy plan-eng-review skill
+ fs.mkdirSync(path.join(planDir, 'plan-eng-review'), { recursive: true });
+ fs.copyFileSync(
+ path.join(ROOT, 'plan-eng-review', 'SKILL.md'),
+ path.join(planDir, 'plan-eng-review', 'SKILL.md'),
+ );
+ });
+
+ afterAll(() => {
+ try { fs.rmSync(planDir, { recursive: true, force: true }); } catch {}
+ });
+
+ test('/plan-eng-review writes GSTACK REVIEW REPORT to plan file', async () => {
+ const result = await runSkillTest({
+ prompt: `Read plan-eng-review/SKILL.md for the review workflow.
+
+Read plan.md — that's the plan to review. This is a standalone plan document, not a codebase — skip any codebase exploration steps.
+
+Proceed directly to the full review. Skip any AskUserQuestion calls — this is non-interactive.
+Skip the preamble bash block, lake intro, telemetry, and contributor mode sections.
+
+CRITICAL REQUIREMENT: plan.md IS the plan file for this review session. After completing your review, you MUST write a "## GSTACK REVIEW REPORT" section to the END of plan.md, exactly as described in the "Plan File Review Report" section of SKILL.md. If gstack-review-read is not available or returns NO_REVIEWS, write the placeholder table with all four review rows (CEO, Codex, Eng, Design). Use the Edit tool to append to plan.md — do NOT overwrite the existing plan content.
+
+This review report at the bottom of the plan is the MOST IMPORTANT deliverable of this test.`,
+ workingDirectory: planDir,
+ maxTurns: 20,
+ timeout: 360_000,
+ testName: 'plan-review-report',
+ runId,
+ model: 'claude-opus-4-6',
+ });
+
+ logCost('/plan-eng-review report', result);
+ recordE2E(evalCollector, '/plan-review-report', 'Plan Review Report E2E', result, {
+ passed: ['success', 'error_max_turns'].includes(result.exitReason),
+ });
+ expect(['success', 'error_max_turns']).toContain(result.exitReason);
+
+ // Verify the review report was written to the plan file
+ const planContent = fs.readFileSync(path.join(planDir, 'plan.md'), 'utf-8');
+
+ // Original plan content should still be present
+ expect(planContent).toContain('# Plan: Add Notifications System');
+ expect(planContent).toContain('WebSocket');
+
+ // Review report section must exist
+ expect(planContent).toContain('## GSTACK REVIEW REPORT');
+
+ // Report should be at the bottom of the file
+ const reportIndex = planContent.lastIndexOf('## GSTACK REVIEW REPORT');
+ const afterReport = planContent.slice(reportIndex);
+
+ // Should contain the review table with standard rows
+ expect(afterReport).toMatch(/\|\s*Review\s*\|/);
+ expect(afterReport).toContain('CEO Review');
+ expect(afterReport).toContain('Eng Review');
+ expect(afterReport).toContain('Design Review');
+
+ console.log('Plan review report found at bottom of plan.md');
+ }, 420_000);
+});
+
+// --- Codex Offering E2E ---
+// Verifies that Codex is properly offered (with availability check, user prompt,
+// and fallback) in office-hours, plan-ceo-review, plan-design-review, plan-eng-review.
+
+describeIfSelected('Codex Offering E2E', [
+ 'codex-offered-office-hours', 'codex-offered-ceo-review',
+ 'codex-offered-design-review', 'codex-offered-eng-review',
+], () => {
+ let testDir: string;
+
+ beforeAll(() => {
+ testDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-e2e-codex-offer-'));
+ const run = (cmd: string, args: string[]) =>
+ spawnSync(cmd, args, { cwd: testDir, stdio: 'pipe', timeout: 5000 });
+
+ run('git', ['init', '-b', 'main']);
+ run('git', ['config', 'user.email', 'test@test.com']);
+ run('git', ['config', 'user.name', 'Test']);
+ fs.writeFileSync(path.join(testDir, 'README.md'), '# Test Project\n');
+ run('git', ['add', '.']);
+ run('git', ['commit', '-m', 'init']);
+
+ // Copy all 4 SKILL.md files
+ for (const skill of ['office-hours', 'plan-ceo-review', 'plan-design-review', 'plan-eng-review']) {
+ fs.mkdirSync(path.join(testDir, skill), { recursive: true });
+ fs.copyFileSync(
+ path.join(ROOT, skill, 'SKILL.md'),
+ path.join(testDir, skill, 'SKILL.md'),
+ );
+ }
+ });
+
+ afterAll(() => {
+ try { fs.rmSync(testDir, { recursive: true, force: true }); } catch {}
+ });
+
+ async function checkCodexOffering(skill: string, testName: string, featureName: string) {
+ const result = await runSkillTest({
+ prompt: `Read ${skill}/SKILL.md. Search for ALL sections related to "codex", "outside voice", or "second opinion".
+
+Summarize the Codex/${featureName} integration — answer these specific questions:
+1. How is Codex availability checked? (what exact bash command?)
+2. How is the user prompted? (via AskUserQuestion? what are the options?)
+3. What happens when Codex is NOT available? (fallback to subagent? skip entirely?)
+4. Is this step blocking (gates the workflow) or optional (can be skipped)?
+5. What prompt/context is sent to Codex?
+
+Write your summary to ${testDir}/${testName}-summary.md`,
+ workingDirectory: testDir,
+ maxTurns: 8,
+ timeout: 120_000,
+ testName,
+ runId,
+ });
+
+ logCost(`/${skill} codex offering`, result);
+ recordE2E(evalCollector, `/${testName}`, 'Codex Offering E2E', result);
+ expect(result.exitReason).toBe('success');
+
+ const summaryPath = path.join(testDir, `${testName}-summary.md`);
+ expect(fs.existsSync(summaryPath)).toBe(true);
+
+ const summary = fs.readFileSync(summaryPath, 'utf-8').toLowerCase();
+ // All skills should have codex availability check
+ expect(summary).toMatch(/which codex/);
+ // All skills should have fallback behavior
+ expect(summary).toMatch(/fallback|subagent|unavailable|not available|skip/);
+ // All skills should show it's optional/non-blocking
+ expect(summary).toMatch(/optional|non.?blocking|skip|not.*required/);
+
+ console.log(`${skill}: Codex offering verified`);
+ }
+
+ testConcurrentIfSelected('codex-offered-office-hours', async () => {
+ await checkCodexOffering('office-hours', 'codex-offered-office-hours', 'second opinion');
+ }, 180_000);
+
+ testConcurrentIfSelected('codex-offered-ceo-review', async () => {
+ await checkCodexOffering('plan-ceo-review', 'codex-offered-ceo-review', 'outside voice');
+ }, 180_000);
+
+ testConcurrentIfSelected('codex-offered-design-review', async () => {
+ await checkCodexOffering('plan-design-review', 'codex-offered-design-review', 'design outside voices');
+ }, 180_000);
+
+ testConcurrentIfSelected('codex-offered-eng-review', async () => {
+ await checkCodexOffering('plan-eng-review', 'codex-offered-eng-review', 'outside voice');
+ }, 180_000);
+});
+
// Module-level afterAll — finalize eval collector after all tests complete
afterAll(async () => {
await finalizeEvalCollector(evalCollector);