~cytrogen/gstack: Merge remote-tracking branch 'origin/main' into v0.3.5-qa-upgrades

10 files changed, 232 insertions(+), 101 deletions(-)

M CHANGELOG.md
M SKILL.md
M browse/SKILL.md
M browse/src/commands.ts
M browse/src/server.ts
M browse/src/snapshot.ts
M scripts/gen-skill-docs.ts
M test/gen-skill-docs.test.ts
M test/skill-llm-eval.test.ts
M test/skill-validation.test.ts

M CHANGELOG.md => CHANGELOG.md +16 -0

@@ 1,5 1,21 @@
 # Changelog
 
+## Unreleased — 2026-03-14
+
+### Changed
+- Enriched 14 command descriptions with specific arg formats, valid values, error behavior, and return types
+- Fixed `header` usage from `<name> <value>` to `<name>:<value>` (matching actual implementation)
+- Added `cookie` usage syntax: `cookie <name>=<value>`
+- Enriched 4 snapshot flag descriptions with defaults, output paths, and behavior details
+- Snapshot flags section now shows long flag names (`-i / --interactive`) alongside short
+- Added ref numbering explanation and output format example to snapshot docs
+- Replaced hand-maintained server.ts help text with auto-generated `generateHelpText()` from COMMAND_DESCRIPTIONS
+- Upgraded LLM eval judge from Haiku to Sonnet 4.6 for more stable scoring
+
+### Added
+- Usage string consistency test: cross-checks `Usage:` patterns in implementation against COMMAND_DESCRIPTIONS
+- Pipe guard test: ensures no command description contains `|` (would break markdown tables)
+
 ## 0.3.3 — 2026-03-13
 
 ### Added

M SKILL.md => SKILL.md +37 -26

@@ 232,25 232,36 @@ $B css ".button" "background-color"
 The snapshot is your primary tool for understanding and interacting with pages.
 
 ```
--i        Interactive elements only (buttons, links, inputs) with @e refs
--c        Compact (no empty structural nodes)
--d <N>    Limit depth
--s <sel>  Scope to CSS selector
--D        Diff against previous snapshot (what changed?)
--a        Annotated screenshot with ref labels
--o <path> Output path for screenshot
--C        Cursor-interactive elements (@c refs — divs with pointer, onclick)
+-i        --interactive           Interactive elements only (buttons, links, inputs) with @e refs
+-c        --compact               Compact (no empty structural nodes)
+-d <N>    --depth                 Limit tree depth (0 = root only, default: unlimited)
+-s <sel>  --selector              Scope to CSS selector
+-D        --diff                  Unified diff against previous snapshot (first call stores baseline)
+-a        --annotate              Annotated screenshot with red overlay boxes and ref labels
+-o <path> --output                Output path for annotated screenshot (default: /tmp/browse-annotated.png)
+-C        --cursor-interactive    Cursor-interactive elements (@c refs — divs with pointer, onclick)
 ```
 
-Combine flags: `$B snapshot -i -a -C -o /tmp/annotated.png`
+All flags can be combined freely. `-o` only applies when `-a` is also used.
+Example: `$B snapshot -i -a -C -o /tmp/annotated.png`
 
-After snapshot, use @refs everywhere:
+**Ref numbering:** @e refs are assigned sequentially (@e1, @e2, ...) in tree order.
+@c refs from `-C` are numbered separately (@c1, @c2, ...).
+
+After snapshot, use @refs as selectors in any command:
 ```bash
 $B click @e3       $B fill @e4 "value"     $B hover @e1
 $B html @e2        $B css @e5 "color"      $B attrs @e6
 $B click @c1       # cursor-interactive ref (from -C)
 ```
 
+**Output format:** indented accessibility tree with @ref IDs, one element per line.
+```
+  @e1 [heading] "Welcome" [level=1]
+  @e2 [textbox] "Email"
+  @e3 [button] "Submit"
+```
+
 Refs are invalidated on navigation — run `snapshot` again after `goto`.
 
 ## Command Reference


@@ 269,7 280,7 @@ Refs are invalidated on navigation — run `snapshot` again after `goto`.
 |---------|-------------|
 | `accessibility` | Full ARIA tree |
 | `forms` | Form fields as JSON |
-| `html [selector]` | innerHTML |
+| `html [selector]` | innerHTML of selector (throws if not found), or full page HTML if no selector given |
 | `links` | All links as "text → href" |
 | `text` | Cleaned page text |
 


@@ 277,22 288,22 @@ Refs are invalidated on navigation — run `snapshot` again after `goto`.
 | Command | Description |
 |---------|-------------|
 | `click <sel>` | Click element |
-| `cookie` | Set cookie |
+| `cookie <name>=<value>` | Set cookie on current page domain |
 | `cookie-import <json>` | Import cookies from JSON file |
-| `cookie-import-browser [browser] [--domain d]` | Import cookies from real browser (opens picker UI, or direct with --domain) |
-| `dialog-accept [text]` | Auto-accept next alert/confirm/prompt |
+| `cookie-import-browser [browser] [--domain d]` | Import cookies from Comet, Chrome, Arc, Brave, or Edge (opens picker, or use --domain for direct import) |
+| `dialog-accept [text]` | Auto-accept next alert/confirm/prompt. Optional text is sent as the prompt response |
 | `dialog-dismiss` | Auto-dismiss next dialog |
 | `fill <sel> <val>` | Fill input |
-| `header <name> <value>` | Set custom request header |
+| `header <name>:<value>` | Set custom request header (colon-separated, sensitive values auto-redacted) |
 | `hover <sel>` | Hover element |
-| `press <key>` | Press key (Enter, Tab, Escape, etc.) |
-| `scroll [sel]` | Scroll element into view |
-| `select <sel> <val>` | Select dropdown option |
+| `press <key>` | Press key — Enter, Tab, Escape, ArrowUp/Down/Left/Right, Backspace, Delete, Home, End, PageUp, PageDown, or modifiers like Shift+Enter |
+| `scroll [sel]` | Scroll element into view, or scroll to page bottom if no selector |
+| `select <sel> <val>` | Select dropdown option by value, label, or visible text |
 | `type <text>` | Type into focused element |
-| `upload <sel> <file...>` | Upload file(s) |
+| `upload <sel> <file> [file2...]` | Upload file(s) |
 | `useragent <string>` | Set user agent |
 | `viewport <WxH>` | Set viewport size |
-| `wait <sel|--networkidle|--load>` | Wait for element/condition |
+| `wait <sel|--networkidle|--load>` | Wait for element, network idle, or page load (timeout: 15s) |
 
 ### Inspection
 | Command | Description |


@@ 302,30 313,30 @@ Refs are invalidated on navigation — run `snapshot` again after `goto`.
 | `cookies` | All cookies as JSON |
 | `css <sel> <prop>` | Computed CSS value |
 | `dialog [--clear]` | Dialog messages |
-| `eval <file>` | Run JS file |
+| `eval <file>` | Run JavaScript from file and return result as string (path must be under /tmp or cwd) |
 | `is <prop> <sel>` | State check (visible/hidden/enabled/disabled/checked/editable/focused) |
-| `js <expr>` | Run JavaScript |
+| `js <expr>` | Run JavaScript expression and return result as string |
 | `network [--clear]` | Network requests |
 | `perf` | Page load timings |
-| `storage [set k v]` | localStorage + sessionStorage |
+| `storage [set k v]` | Read all localStorage + sessionStorage as JSON, or set <key> <value> to write localStorage |
 
 ### Visual
 | Command | Description |
 |---------|-------------|
 | `diff <url1> <url2>` | Text diff between pages |
 | `pdf [path]` | Save as PDF |
-| `responsive [prefix]` | Mobile/tablet/desktop screenshots |
+| `responsive [prefix]` | Screenshots at mobile (375x812), tablet (768x1024), desktop (1280x720). Saves as {prefix}-mobile.png etc. |
 | `screenshot [path]` | Save screenshot |
 
 ### Snapshot
 | Command | Description |
 |---------|-------------|
-| `snapshot [flags]` | Accessibility tree with @refs |
+| `snapshot [flags]` | Accessibility tree with @e refs for element selection. Flags: -i interactive only, -c compact, -d N depth limit, -s sel scope, -D diff vs previous, -a annotated screenshot, -o path output, -C cursor-interactive @c refs |
 
 ### Meta
 | Command | Description |
 |---------|-------------|
-| `chain` | Multi-command from JSON stdin |
+| `chain` | Run commands from JSON stdin. Format: [["cmd","arg1",...],...] |
 
 ### Tabs
 | Command | Description |

M browse/SKILL.md => browse/SKILL.md +37 -26

@@ 104,25 104,36 @@ $B diff https://staging.app.com https://prod.app.com
 The snapshot is your primary tool for understanding and interacting with pages.
 
 ```
--i        Interactive elements only (buttons, links, inputs) with @e refs
--c        Compact (no empty structural nodes)
--d <N>    Limit depth
--s <sel>  Scope to CSS selector
--D        Diff against previous snapshot (what changed?)
--a        Annotated screenshot with ref labels
--o <path> Output path for screenshot
--C        Cursor-interactive elements (@c refs — divs with pointer, onclick)
+-i        --interactive           Interactive elements only (buttons, links, inputs) with @e refs
+-c        --compact               Compact (no empty structural nodes)
+-d <N>    --depth                 Limit tree depth (0 = root only, default: unlimited)
+-s <sel>  --selector              Scope to CSS selector
+-D        --diff                  Unified diff against previous snapshot (first call stores baseline)
+-a        --annotate              Annotated screenshot with red overlay boxes and ref labels
+-o <path> --output                Output path for annotated screenshot (default: /tmp/browse-annotated.png)
+-C        --cursor-interactive    Cursor-interactive elements (@c refs — divs with pointer, onclick)
 ```
 
-Combine flags: `$B snapshot -i -a -C -o /tmp/annotated.png`
+All flags can be combined freely. `-o` only applies when `-a` is also used.
+Example: `$B snapshot -i -a -C -o /tmp/annotated.png`
 
-After snapshot, use @refs everywhere:
+**Ref numbering:** @e refs are assigned sequentially (@e1, @e2, ...) in tree order.
+@c refs from `-C` are numbered separately (@c1, @c2, ...).
+
+After snapshot, use @refs as selectors in any command:
 ```bash
 $B click @e3       $B fill @e4 "value"     $B hover @e1
 $B html @e2        $B css @e5 "color"      $B attrs @e6
 $B click @c1       # cursor-interactive ref (from -C)
 ```
 
+**Output format:** indented accessibility tree with @ref IDs, one element per line.
+```
+  @e1 [heading] "Welcome" [level=1]
+  @e2 [textbox] "Email"
+  @e3 [button] "Submit"
+```
+
 Refs are invalidated on navigation — run `snapshot` again after `goto`.
 
 ## Full Command List


@@ 141,7 152,7 @@ Refs are invalidated on navigation — run `snapshot` again after `goto`.
 |---------|-------------|
 | `accessibility` | Full ARIA tree |
 | `forms` | Form fields as JSON |
-| `html [selector]` | innerHTML |
+| `html [selector]` | innerHTML of selector (throws if not found), or full page HTML if no selector given |
 | `links` | All links as "text → href" |
 | `text` | Cleaned page text |
 


@@ 149,22 160,22 @@ Refs are invalidated on navigation — run `snapshot` again after `goto`.
 | Command | Description |
 |---------|-------------|
 | `click <sel>` | Click element |
-| `cookie` | Set cookie |
+| `cookie <name>=<value>` | Set cookie on current page domain |
 | `cookie-import <json>` | Import cookies from JSON file |
-| `cookie-import-browser [browser] [--domain d]` | Import cookies from real browser (opens picker UI, or direct with --domain) |
-| `dialog-accept [text]` | Auto-accept next alert/confirm/prompt |
+| `cookie-import-browser [browser] [--domain d]` | Import cookies from Comet, Chrome, Arc, Brave, or Edge (opens picker, or use --domain for direct import) |
+| `dialog-accept [text]` | Auto-accept next alert/confirm/prompt. Optional text is sent as the prompt response |
 | `dialog-dismiss` | Auto-dismiss next dialog |
 | `fill <sel> <val>` | Fill input |
-| `header <name> <value>` | Set custom request header |
+| `header <name>:<value>` | Set custom request header (colon-separated, sensitive values auto-redacted) |
 | `hover <sel>` | Hover element |
-| `press <key>` | Press key (Enter, Tab, Escape, etc.) |
-| `scroll [sel]` | Scroll element into view |
-| `select <sel> <val>` | Select dropdown option |
+| `press <key>` | Press key — Enter, Tab, Escape, ArrowUp/Down/Left/Right, Backspace, Delete, Home, End, PageUp, PageDown, or modifiers like Shift+Enter |
+| `scroll [sel]` | Scroll element into view, or scroll to page bottom if no selector |
+| `select <sel> <val>` | Select dropdown option by value, label, or visible text |
 | `type <text>` | Type into focused element |
-| `upload <sel> <file...>` | Upload file(s) |
+| `upload <sel> <file> [file2...]` | Upload file(s) |
 | `useragent <string>` | Set user agent |
 | `viewport <WxH>` | Set viewport size |
-| `wait <sel|--networkidle|--load>` | Wait for element/condition |
+| `wait <sel|--networkidle|--load>` | Wait for element, network idle, or page load (timeout: 15s) |
 
 ### Inspection
 | Command | Description |


@@ 174,30 185,30 @@ Refs are invalidated on navigation — run `snapshot` again after `goto`.
 | `cookies` | All cookies as JSON |
 | `css <sel> <prop>` | Computed CSS value |
 | `dialog [--clear]` | Dialog messages |
-| `eval <file>` | Run JS file |
+| `eval <file>` | Run JavaScript from file and return result as string (path must be under /tmp or cwd) |
 | `is <prop> <sel>` | State check (visible/hidden/enabled/disabled/checked/editable/focused) |
-| `js <expr>` | Run JavaScript |
+| `js <expr>` | Run JavaScript expression and return result as string |
 | `network [--clear]` | Network requests |
 | `perf` | Page load timings |
-| `storage [set k v]` | localStorage + sessionStorage |
+| `storage [set k v]` | Read all localStorage + sessionStorage as JSON, or set <key> <value> to write localStorage |
 
 ### Visual
 | Command | Description |
 |---------|-------------|
 | `diff <url1> <url2>` | Text diff between pages |
 | `pdf [path]` | Save as PDF |
-| `responsive [prefix]` | Mobile/tablet/desktop screenshots |
+| `responsive [prefix]` | Screenshots at mobile (375x812), tablet (768x1024), desktop (1280x720). Saves as {prefix}-mobile.png etc. |
 | `screenshot [path]` | Save screenshot |
 
 ### Snapshot
 | Command | Description |
 |---------|-------------|
-| `snapshot [flags]` | Accessibility tree with @refs |
+| `snapshot [flags]` | Accessibility tree with @e refs for element selection. Flags: -i interactive only, -c compact, -d N depth limit, -s sel scope, -D diff vs previous, -a annotated screenshot, -o path output, -C cursor-interactive @c refs |
 
 ### Meta
 | Command | Description |
 |---------|-------------|
-| `chain` | Multi-command from JSON stdin |
+| `chain` | Run commands from JSON stdin. Format: [["cmd","arg1",...],...] |
 
 ### Tabs
 | Command | Description |

M browse/src/commands.ts => browse/src/commands.ts +16 -16

@@ 43,13 43,13 @@ export const COMMAND_DESCRIPTIONS: Record<string, { category: string; descriptio
   'url':     { category: 'Navigation', description: 'Print current URL' },
   // Reading
   'text':    { category: 'Reading', description: 'Cleaned page text' },
-  'html':    { category: 'Reading', description: 'innerHTML', usage: 'html [selector]' },
+  'html':    { category: 'Reading', description: 'innerHTML of selector (throws if not found), or full page HTML if no selector given', usage: 'html [selector]' },
   'links':   { category: 'Reading', description: 'All links as "text → href"' },
   'forms':   { category: 'Reading', description: 'Form fields as JSON' },
   'accessibility': { category: 'Reading', description: 'Full ARIA tree' },
   // Inspection
-  'js':      { category: 'Inspection', description: 'Run JavaScript', usage: 'js <expr>' },
-  'eval':    { category: 'Inspection', description: 'Run JS file', usage: 'eval <file>' },
+  'js':      { category: 'Inspection', description: 'Run JavaScript expression and return result as string', usage: 'js <expr>' },
+  'eval':    { category: 'Inspection', description: 'Run JavaScript from file and return result as string (path must be under /tmp or cwd)', usage: 'eval <file>' },
   'css':     { category: 'Inspection', description: 'Computed CSS value', usage: 'css <sel> <prop>' },
   'attrs':   { category: 'Inspection', description: 'Element attributes as JSON', usage: 'attrs <sel|@ref>' },
   'is':      { category: 'Inspection', description: 'State check (visible/hidden/enabled/disabled/checked/editable/focused)', usage: 'is <prop> <sel>' },


@@ 57,30 57,30 @@ export const COMMAND_DESCRIPTIONS: Record<string, { category: string; descriptio
   'network': { category: 'Inspection', description: 'Network requests', usage: 'network [--clear]' },
   'dialog':  { category: 'Inspection', description: 'Dialog messages', usage: 'dialog [--clear]' },
   'cookies': { category: 'Inspection', description: 'All cookies as JSON' },
-  'storage': { category: 'Inspection', description: 'localStorage + sessionStorage', usage: 'storage [set k v]' },
+  'storage': { category: 'Inspection', description: 'Read all localStorage + sessionStorage as JSON, or set <key> <value> to write localStorage', usage: 'storage [set k v]' },
   'perf':    { category: 'Inspection', description: 'Page load timings' },
   // Interaction
   'click':   { category: 'Interaction', description: 'Click element', usage: 'click <sel>' },
   'fill':    { category: 'Interaction', description: 'Fill input', usage: 'fill <sel> <val>' },
-  'select':  { category: 'Interaction', description: 'Select dropdown option', usage: 'select <sel> <val>' },
+  'select':  { category: 'Interaction', description: 'Select dropdown option by value, label, or visible text', usage: 'select <sel> <val>' },
   'hover':   { category: 'Interaction', description: 'Hover element', usage: 'hover <sel>' },
   'type':    { category: 'Interaction', description: 'Type into focused element', usage: 'type <text>' },
-  'press':   { category: 'Interaction', description: 'Press key (Enter, Tab, Escape, etc.)', usage: 'press <key>' },
-  'scroll':  { category: 'Interaction', description: 'Scroll element into view', usage: 'scroll [sel]' },
-  'wait':    { category: 'Interaction', description: 'Wait for element/condition', usage: 'wait <sel|--networkidle|--load>' },
-  'upload':  { category: 'Interaction', description: 'Upload file(s)', usage: 'upload <sel> <file...>' },
+  'press':   { category: 'Interaction', description: 'Press key — Enter, Tab, Escape, ArrowUp/Down/Left/Right, Backspace, Delete, Home, End, PageUp, PageDown, or modifiers like Shift+Enter', usage: 'press <key>' },
+  'scroll':  { category: 'Interaction', description: 'Scroll element into view, or scroll to page bottom if no selector', usage: 'scroll [sel]' },
+  'wait':    { category: 'Interaction', description: 'Wait for element, network idle, or page load (timeout: 15s)', usage: 'wait <sel|--networkidle|--load>' },
+  'upload':  { category: 'Interaction', description: 'Upload file(s)', usage: 'upload <sel> <file> [file2...]' },
   'viewport':{ category: 'Interaction', description: 'Set viewport size', usage: 'viewport <WxH>' },
-  'cookie':  { category: 'Interaction', description: 'Set cookie' },
+  'cookie':  { category: 'Interaction', description: 'Set cookie on current page domain', usage: 'cookie <name>=<value>' },
   'cookie-import': { category: 'Interaction', description: 'Import cookies from JSON file', usage: 'cookie-import <json>' },
-  'cookie-import-browser': { category: 'Interaction', description: 'Import cookies from real browser (opens picker UI, or direct with --domain)', usage: 'cookie-import-browser [browser] [--domain d]' },
-  'header':  { category: 'Interaction', description: 'Set custom request header', usage: 'header <name> <value>' },
+  'cookie-import-browser': { category: 'Interaction', description: 'Import cookies from Comet, Chrome, Arc, Brave, or Edge (opens picker, or use --domain for direct import)', usage: 'cookie-import-browser [browser] [--domain d]' },
+  'header':  { category: 'Interaction', description: 'Set custom request header (colon-separated, sensitive values auto-redacted)', usage: 'header <name>:<value>' },
   'useragent': { category: 'Interaction', description: 'Set user agent', usage: 'useragent <string>' },
-  'dialog-accept': { category: 'Interaction', description: 'Auto-accept next alert/confirm/prompt', usage: 'dialog-accept [text]' },
+  'dialog-accept': { category: 'Interaction', description: 'Auto-accept next alert/confirm/prompt. Optional text is sent as the prompt response', usage: 'dialog-accept [text]' },
   'dialog-dismiss': { category: 'Interaction', description: 'Auto-dismiss next dialog' },
   // Visual
   'screenshot': { category: 'Visual', description: 'Save screenshot', usage: 'screenshot [path]' },
   'pdf':     { category: 'Visual', description: 'Save as PDF', usage: 'pdf [path]' },
-  'responsive': { category: 'Visual', description: 'Mobile/tablet/desktop screenshots', usage: 'responsive [prefix]' },
+  'responsive': { category: 'Visual', description: 'Screenshots at mobile (375x812), tablet (768x1024), desktop (1280x720). Saves as {prefix}-mobile.png etc.', usage: 'responsive [prefix]' },
   'diff':    { category: 'Visual', description: 'Text diff between pages', usage: 'diff <url1> <url2>' },
   // Tabs
   'tabs':    { category: 'Tabs', description: 'List open tabs' },


@@ 92,8 92,8 @@ export const COMMAND_DESCRIPTIONS: Record<string, { category: string; descriptio
   'stop':    { category: 'Server', description: 'Shutdown server' },
   'restart': { category: 'Server', description: 'Restart server' },
   // Meta
-  'snapshot':{ category: 'Snapshot', description: 'Accessibility tree with @refs', usage: 'snapshot [flags]' },
-  'chain':   { category: 'Meta', description: 'Multi-command from JSON stdin' },
+  'snapshot':{ category: 'Snapshot', description: 'Accessibility tree with @e refs for element selection. Flags: -i interactive only, -c compact, -d N depth limit, -s sel scope, -D diff vs previous, -a annotated screenshot, -o path output, -C cursor-interactive @c refs', usage: 'snapshot [flags]' },
+  'chain':   { category: 'Meta', description: 'Run commands from JSON stdin. Format: [["cmd","arg1",...],...]' },
 };
 
 // Load-time validation: descriptions must cover exactly the command sets

M browse/src/server.ts => browse/src/server.ts +44 -23

@@ 18,6 18,8 @@ import { handleReadCommand } from './read-commands';
 import { handleWriteCommand } from './write-commands';
 import { handleMetaCommand } from './meta-commands';
 import { handleCookiePickerRoute } from './cookie-picker-routes';
+import { COMMAND_DESCRIPTIONS } from './commands';
+import { SNAPSHOT_FLAGS } from './snapshot';
 import { resolveConfig, ensureStateDir, readVersionHash } from './config';
 import * as fs from 'fs';
 import * as path from 'path';


@@ 37,6 39,47 @@ function validateAuth(req: Request): boolean {
   return header === `Bearer ${AUTH_TOKEN}`;
 }
 
+// ─── Help text (auto-generated from COMMAND_DESCRIPTIONS) ────────
+function generateHelpText(): string {
+  // Group commands by category
+  const groups = new Map<string, string[]>();
+  for (const [cmd, meta] of Object.entries(COMMAND_DESCRIPTIONS)) {
+    const display = meta.usage || cmd;
+    const list = groups.get(meta.category) || [];
+    list.push(display);
+    groups.set(meta.category, list);
+  }
+
+  const categoryOrder = [
+    'Navigation', 'Reading', 'Interaction', 'Inspection',
+    'Visual', 'Snapshot', 'Meta', 'Tabs', 'Server',
+  ];
+
+  const lines = ['gstack browse — headless browser for AI agents', '', 'Commands:'];
+  for (const cat of categoryOrder) {
+    const cmds = groups.get(cat);
+    if (!cmds) continue;
+    lines.push(`  ${(cat + ':').padEnd(15)}${cmds.join(', ')}`);
+  }
+
+  // Snapshot flags from source of truth
+  lines.push('');
+  lines.push('Snapshot flags:');
+  const flagPairs: string[] = [];
+  for (const flag of SNAPSHOT_FLAGS) {
+    const label = flag.valueHint ? `${flag.short} ${flag.valueHint}` : flag.short;
+    flagPairs.push(`${label}  ${flag.long}`);
+  }
+  // Print two flags per line for compact display
+  for (let i = 0; i < flagPairs.length; i += 2) {
+    const left = flagPairs[i].padEnd(28);
+    const right = flagPairs[i + 1] || '';
+    lines.push(`  ${left}${right}`);
+  }
+
+  return lines.join('\n');
+}
+
 // ─── Buffer (from buffers.ts) ────────────────────────────────────
 import { consoleBuffer, networkBuffer, dialogBuffer, addConsoleEntry, addNetworkEntry, addDialogEntry, type LogEntry, type NetworkEntry, type DialogEntry } from './buffers';
 export { consoleBuffer, networkBuffer, dialogBuffer, addConsoleEntry, addNetworkEntry, addDialogEntry, type LogEntry, type NetworkEntry, type DialogEntry };


@@ 191,29 234,7 @@ async function handleCommand(body: any): Promise<Response> {
     } else if (META_COMMANDS.has(command)) {
       result = await handleMetaCommand(command, args, browserManager, shutdown);
     } else if (command === 'help') {
-      const helpText = [
-        'gstack browse — headless browser for AI agents',
-        '',
-        'Commands:',
-        '  Navigation:    goto <url>, back, forward, reload',
-        '  Interaction:   click <sel>, fill <sel> <text>, select <sel> <val>, hover, type, press, scroll, wait',
-        '  Read:          text [sel], html [sel], links, forms, accessibility, cookies, storage, console, network, perf',
-        '  Evaluate:      js <expr>, eval <expr>, css <sel> <prop>, attrs <sel>, is <sel> <state>',
-        '  Snapshot:      snapshot [-i] [-c] [-d N] [-s sel] [-D] [-a] [-o path] [-C]',
-        '  Screenshot:    screenshot [path], pdf [path], responsive <widths>',
-        '  Tabs:          tabs, tab <id>, newtab [url], closetab [id]',
-        '  State:         cookie <set|get|clear>, cookie-import <json>, cookie-import-browser [browser]',
-        '  Headers:       header <set|clear> [name] [value], useragent [string]',
-        '  Upload:        upload <sel> <file1> [file2...]',
-        '  Dialogs:       dialog, dialog-accept [text], dialog-dismiss',
-        '  Meta:          status, stop, restart, diff, chain, help',
-        '',
-        'Snapshot flags:',
-        '  -i  interactive only    -c  compact (remove empty nodes)',
-        '  -d N  limit depth       -s sel  scope to CSS selector',
-        '  -D  diff vs previous    -a  annotated screenshot with ref labels',
-        '  -o path  output file    -C  cursor-interactive elements',
-      ].join('\n');
+      const helpText = generateHelpText();
       return new Response(helpText, {
         status: 200,
         headers: { 'Content-Type': 'text/plain' },

M browse/src/snapshot.ts => browse/src/snapshot.ts +4 -4

@@ 57,11 57,11 @@ export const SNAPSHOT_FLAGS: Array<{
 }> = [
   { short: '-i', long: '--interactive', description: 'Interactive elements only (buttons, links, inputs) with @e refs', optionKey: 'interactive' },
   { short: '-c', long: '--compact', description: 'Compact (no empty structural nodes)', optionKey: 'compact' },
-  { short: '-d', long: '--depth', description: 'Limit depth', takesValue: true, valueHint: '<N>', optionKey: 'depth' },
+  { short: '-d', long: '--depth', description: 'Limit tree depth (0 = root only, default: unlimited)', takesValue: true, valueHint: '<N>', optionKey: 'depth' },
   { short: '-s', long: '--selector', description: 'Scope to CSS selector', takesValue: true, valueHint: '<sel>', optionKey: 'selector' },
-  { short: '-D', long: '--diff', description: 'Diff against previous snapshot (what changed?)', optionKey: 'diff' },
-  { short: '-a', long: '--annotate', description: 'Annotated screenshot with ref labels', optionKey: 'annotate' },
-  { short: '-o', long: '--output', description: 'Output path for screenshot', takesValue: true, valueHint: '<path>', optionKey: 'outputPath' },
+  { short: '-D', long: '--diff', description: 'Unified diff against previous snapshot (first call stores baseline)', optionKey: 'diff' },
+  { short: '-a', long: '--annotate', description: 'Annotated screenshot with red overlay boxes and ref labels', optionKey: 'annotate' },
+  { short: '-o', long: '--output', description: 'Output path for annotated screenshot (default: /tmp/browse-annotated.png)', takesValue: true, valueHint: '<path>', optionKey: 'outputPath' },
   { short: '-C', long: '--cursor-interactive', description: 'Cursor-interactive elements (@c refs — divs with pointer, onclick)', optionKey: 'cursorInteractive' },
 ];

M scripts/gen-skill-docs.ts => scripts/gen-skill-docs.ts +14 -3

@@ 64,20 64,31 @@ function generateSnapshotFlags(): string {
 
   for (const flag of SNAPSHOT_FLAGS) {
     const label = flag.valueHint ? `${flag.short} ${flag.valueHint}` : flag.short;
-    lines.push(`${label.padEnd(10)}${flag.description}`);
+    lines.push(`${label.padEnd(10)}${flag.long.padEnd(24)}${flag.description}`);
   }
 
   lines.push('```');
   lines.push('');
-  lines.push('Combine flags: `$B snapshot -i -a -C -o /tmp/annotated.png`');
+  lines.push('All flags can be combined freely. `-o` only applies when `-a` is also used.');
+  lines.push('Example: `$B snapshot -i -a -C -o /tmp/annotated.png`');
   lines.push('');
-  lines.push('After snapshot, use @refs everywhere:');
+  lines.push('**Ref numbering:** @e refs are assigned sequentially (@e1, @e2, ...) in tree order.');
+  lines.push('@c refs from `-C` are numbered separately (@c1, @c2, ...).');
+  lines.push('');
+  lines.push('After snapshot, use @refs as selectors in any command:');
   lines.push('```bash');
   lines.push('$B click @e3       $B fill @e4 "value"     $B hover @e1');
   lines.push('$B html @e2        $B css @e5 "color"      $B attrs @e6');
   lines.push('$B click @c1       # cursor-interactive ref (from -C)');
   lines.push('```');
   lines.push('');
+  lines.push('**Output format:** indented accessibility tree with @ref IDs, one element per line.');
+  lines.push('```');
+  lines.push('  @e1 [heading] "Welcome" [level=1]');
+  lines.push('  @e2 [textbox] "Email"');
+  lines.push('  @e3 [button] "Submit"');
+  lines.push('```');
+  lines.push('');
   lines.push('Refs are invalidated on navigation — run `snapshot` again after `goto`.');
 
   return lines.join('\n');

M test/gen-skill-docs.test.ts => test/gen-skill-docs.test.ts +8 -0

@@ 139,6 139,14 @@ describe('description quality evals', () => {
     }
   });
 
+  // Guard: descriptions must not contain pipe (breaks markdown table cells)
+  // Usage strings are backtick-wrapped in the table so pipes there are safe.
+  test('no command description contains pipe character', () => {
+    for (const [cmd, meta] of Object.entries(COMMAND_DESCRIPTIONS)) {
+      expect(meta.description).not.toContain('|');
+    }
+  });
+
   // Guard: generated output uses → not ->
   test('generated SKILL.md uses unicode arrows', () => {
     const content = fs.readFileSync(path.join(ROOT, 'SKILL.md'), 'utf-8');

M test/skill-llm-eval.test.ts => test/skill-llm-eval.test.ts +3 -3

@@ 7,7 7,7 @@
  * Requires: ANTHROPIC_API_KEY env var
  * Run: ANTHROPIC_API_KEY=sk-... bun test test/skill-llm-eval.test.ts
  *
- * Cost: ~$0.01-0.03 per run (haiku)
+ * Cost: ~$0.05-0.15 per run (sonnet)
  */
 
 import { describe, test, expect } from 'bun:test';


@@ 30,7 30,7 @@ async function judge(section: string, prompt: string): Promise<JudgeScore> {
   const client = new Anthropic();
 
   const response = await client.messages.create({
-    model: 'claude-haiku-4-5-20251001',
+    model: 'claude-sonnet-4-6',
     max_tokens: 1024,
     messages: [{
       role: 'user',


@@ 158,7 158,7 @@ describeEval('LLM-as-judge quality evals', () => {
 
     const client = new Anthropic();
     const response = await client.messages.create({
-      model: 'claude-haiku-4-5-20251001',
+      model: 'claude-sonnet-4-6',
       max_tokens: 1024,
       messages: [{
         role: 'user',

M test/skill-validation.test.ts => test/skill-validation.test.ts +53 -0

@@ 80,6 80,59 @@ describe('Command registry consistency', () => {
   });
 });
 
+describe('Usage string consistency', () => {
+  // Normalize a usage string to its structural skeleton for comparison.
+  // Replaces <param-names> with <>, [optional] with [], strips parenthetical hints.
+  // This catches format mismatches (e.g., <name>:<value> vs <name> <value>)
+  // without tripping on abbreviation differences (e.g., <sel> vs <selector>).
+  function skeleton(usage: string): string {
+    return usage
+      .replace(/\(.*?\)/g, '')        // strip parenthetical hints like (e.g., Enter, Tab)
+      .replace(/<[^>]*>/g, '<>')      // normalize <param-name> → <>
+      .replace(/\[[^\]]*\]/g, '[]')   // normalize [optional] → []
+      .replace(/\s+/g, ' ')           // collapse whitespace
+      .trim();
+  }
+
+  // Cross-check Usage: patterns in implementation against COMMAND_DESCRIPTIONS
+  test('implementation Usage: structural format matches COMMAND_DESCRIPTIONS', () => {
+    const implFiles = [
+      path.join(ROOT, 'browse', 'src', 'write-commands.ts'),
+      path.join(ROOT, 'browse', 'src', 'read-commands.ts'),
+      path.join(ROOT, 'browse', 'src', 'meta-commands.ts'),
+    ];
+
+    // Extract "Usage: browse <pattern>" from throw new Error(...) calls
+    const usagePattern = /throw new Error\(['"`]Usage:\s*browse\s+(.+?)['"`]\)/g;
+    const implUsages = new Map<string, string>();
+
+    for (const file of implFiles) {
+      const content = fs.readFileSync(file, 'utf-8');
+      let match;
+      while ((match = usagePattern.exec(content)) !== null) {
+        const usage = match[1].split('\\n')[0].trim();
+        const cmd = usage.split(/\s/)[0];
+        implUsages.set(cmd, usage);
+      }
+    }
+
+    // Compare structural skeletons
+    const mismatches: string[] = [];
+    for (const [cmd, implUsage] of implUsages) {
+      const desc = COMMAND_DESCRIPTIONS[cmd];
+      if (!desc) continue;
+      if (!desc.usage) continue;
+      const descSkel = skeleton(desc.usage);
+      const implSkel = skeleton(implUsage);
+      if (descSkel !== implSkel) {
+        mismatches.push(`${cmd}: docs "${desc.usage}" (${descSkel}) vs impl "${implUsage}" (${implSkel})`);
+      }
+    }
+
+    expect(mismatches).toEqual([]);
+  });
+});
+
 describe('Generated SKILL.md freshness', () => {
   test('no unresolved {{placeholders}} in generated SKILL.md', () => {
     const content = fs.readFileSync(path.join(ROOT, 'SKILL.md'), 'utf-8');