M CHANGELOG.md => CHANGELOG.md +16 -0
@@ 1,5 1,21 @@
# Changelog
+## Unreleased — 2026-03-14
+
+### Changed
+- Enriched 14 command descriptions with specific arg formats, valid values, error behavior, and return types
+- Fixed `header` usage from `<name> <value>` to `<name>:<value>` (matching actual implementation)
+- Added `cookie` usage syntax: `cookie <name>=<value>`
+- Enriched 4 snapshot flag descriptions with defaults, output paths, and behavior details
+- Snapshot flags section now shows long flag names (`-i / --interactive`) alongside short
+- Added ref numbering explanation and output format example to snapshot docs
+- Replaced hand-maintained server.ts help text with auto-generated `generateHelpText()` from COMMAND_DESCRIPTIONS
+- Upgraded LLM eval judge from Haiku to Sonnet 4.6 for more stable scoring
+
+### Added
+- Usage string consistency test: cross-checks `Usage:` patterns in implementation against COMMAND_DESCRIPTIONS
+- Pipe guard test: ensures no command description contains `|` (would break markdown tables)
+
## 0.3.3 — 2026-03-13
### Added
M SKILL.md => SKILL.md +37 -26
@@ 232,25 232,36 @@ $B css ".button" "background-color"
The snapshot is your primary tool for understanding and interacting with pages.
```
--i Interactive elements only (buttons, links, inputs) with @e refs
--c Compact (no empty structural nodes)
--d <N> Limit depth
--s <sel> Scope to CSS selector
--D Diff against previous snapshot (what changed?)
--a Annotated screenshot with ref labels
--o <path> Output path for screenshot
--C Cursor-interactive elements (@c refs — divs with pointer, onclick)
+-i --interactive Interactive elements only (buttons, links, inputs) with @e refs
+-c --compact Compact (no empty structural nodes)
+-d <N> --depth Limit tree depth (0 = root only, default: unlimited)
+-s <sel> --selector Scope to CSS selector
+-D --diff Unified diff against previous snapshot (first call stores baseline)
+-a --annotate Annotated screenshot with red overlay boxes and ref labels
+-o <path> --output Output path for annotated screenshot (default: /tmp/browse-annotated.png)
+-C --cursor-interactive Cursor-interactive elements (@c refs — divs with pointer, onclick)
```
-Combine flags: `$B snapshot -i -a -C -o /tmp/annotated.png`
+All flags can be combined freely. `-o` only applies when `-a` is also used.
+Example: `$B snapshot -i -a -C -o /tmp/annotated.png`
-After snapshot, use @refs everywhere:
+**Ref numbering:** @e refs are assigned sequentially (@e1, @e2, ...) in tree order.
+@c refs from `-C` are numbered separately (@c1, @c2, ...).
+
+After snapshot, use @refs as selectors in any command:
```bash
$B click @e3 $B fill @e4 "value" $B hover @e1
$B html @e2 $B css @e5 "color" $B attrs @e6
$B click @c1 # cursor-interactive ref (from -C)
```
+**Output format:** indented accessibility tree with @ref IDs, one element per line.
+```
+ @e1 [heading] "Welcome" [level=1]
+ @e2 [textbox] "Email"
+ @e3 [button] "Submit"
+```
+
Refs are invalidated on navigation — run `snapshot` again after `goto`.
## Command Reference
@@ 269,7 280,7 @@ Refs are invalidated on navigation — run `snapshot` again after `goto`.
|---------|-------------|
| `accessibility` | Full ARIA tree |
| `forms` | Form fields as JSON |
-| `html [selector]` | innerHTML |
+| `html [selector]` | innerHTML of selector (throws if not found), or full page HTML if no selector given |
| `links` | All links as "text → href" |
| `text` | Cleaned page text |
@@ 277,22 288,22 @@ Refs are invalidated on navigation — run `snapshot` again after `goto`.
| Command | Description |
|---------|-------------|
| `click <sel>` | Click element |
-| `cookie` | Set cookie |
+| `cookie <name>=<value>` | Set cookie on current page domain |
| `cookie-import <json>` | Import cookies from JSON file |
-| `cookie-import-browser [browser] [--domain d]` | Import cookies from real browser (opens picker UI, or direct with --domain) |
-| `dialog-accept [text]` | Auto-accept next alert/confirm/prompt |
+| `cookie-import-browser [browser] [--domain d]` | Import cookies from Comet, Chrome, Arc, Brave, or Edge (opens picker, or use --domain for direct import) |
+| `dialog-accept [text]` | Auto-accept next alert/confirm/prompt. Optional text is sent as the prompt response |
| `dialog-dismiss` | Auto-dismiss next dialog |
| `fill <sel> <val>` | Fill input |
-| `header <name> <value>` | Set custom request header |
+| `header <name>:<value>` | Set custom request header (colon-separated, sensitive values auto-redacted) |
| `hover <sel>` | Hover element |
-| `press <key>` | Press key (Enter, Tab, Escape, etc.) |
-| `scroll [sel]` | Scroll element into view |
-| `select <sel> <val>` | Select dropdown option |
+| `press <key>` | Press key — Enter, Tab, Escape, ArrowUp/Down/Left/Right, Backspace, Delete, Home, End, PageUp, PageDown, or modifiers like Shift+Enter |
+| `scroll [sel]` | Scroll element into view, or scroll to page bottom if no selector |
+| `select <sel> <val>` | Select dropdown option by value, label, or visible text |
| `type <text>` | Type into focused element |
-| `upload <sel> <file...>` | Upload file(s) |
+| `upload <sel> <file> [file2...]` | Upload file(s) |
| `useragent <string>` | Set user agent |
| `viewport <WxH>` | Set viewport size |
-| `wait <sel|--networkidle|--load>` | Wait for element/condition |
+| `wait <sel|--networkidle|--load>` | Wait for element, network idle, or page load (timeout: 15s) |
### Inspection
| Command | Description |
@@ 302,30 313,30 @@ Refs are invalidated on navigation — run `snapshot` again after `goto`.
| `cookies` | All cookies as JSON |
| `css <sel> <prop>` | Computed CSS value |
| `dialog [--clear]` | Dialog messages |
-| `eval <file>` | Run JS file |
+| `eval <file>` | Run JavaScript from file and return result as string (path must be under /tmp or cwd) |
| `is <prop> <sel>` | State check (visible/hidden/enabled/disabled/checked/editable/focused) |
-| `js <expr>` | Run JavaScript |
+| `js <expr>` | Run JavaScript expression and return result as string |
| `network [--clear]` | Network requests |
| `perf` | Page load timings |
-| `storage [set k v]` | localStorage + sessionStorage |
+| `storage [set k v]` | Read all localStorage + sessionStorage as JSON, or set <key> <value> to write localStorage |
### Visual
| Command | Description |
|---------|-------------|
| `diff <url1> <url2>` | Text diff between pages |
| `pdf [path]` | Save as PDF |
-| `responsive [prefix]` | Mobile/tablet/desktop screenshots |
+| `responsive [prefix]` | Screenshots at mobile (375x812), tablet (768x1024), desktop (1280x720). Saves as {prefix}-mobile.png etc. |
| `screenshot [path]` | Save screenshot |
### Snapshot
| Command | Description |
|---------|-------------|
-| `snapshot [flags]` | Accessibility tree with @refs |
+| `snapshot [flags]` | Accessibility tree with @e refs for element selection. Flags: -i interactive only, -c compact, -d N depth limit, -s sel scope, -D diff vs previous, -a annotated screenshot, -o path output, -C cursor-interactive @c refs |
### Meta
| Command | Description |
|---------|-------------|
-| `chain` | Multi-command from JSON stdin |
+| `chain` | Run commands from JSON stdin. Format: [["cmd","arg1",...],...] |
### Tabs
| Command | Description |
M browse/SKILL.md => browse/SKILL.md +37 -26
@@ 104,25 104,36 @@ $B diff https://staging.app.com https://prod.app.com
The snapshot is your primary tool for understanding and interacting with pages.
```
--i Interactive elements only (buttons, links, inputs) with @e refs
--c Compact (no empty structural nodes)
--d <N> Limit depth
--s <sel> Scope to CSS selector
--D Diff against previous snapshot (what changed?)
--a Annotated screenshot with ref labels
--o <path> Output path for screenshot
--C Cursor-interactive elements (@c refs — divs with pointer, onclick)
+-i --interactive Interactive elements only (buttons, links, inputs) with @e refs
+-c --compact Compact (no empty structural nodes)
+-d <N> --depth Limit tree depth (0 = root only, default: unlimited)
+-s <sel> --selector Scope to CSS selector
+-D --diff Unified diff against previous snapshot (first call stores baseline)
+-a --annotate Annotated screenshot with red overlay boxes and ref labels
+-o <path> --output Output path for annotated screenshot (default: /tmp/browse-annotated.png)
+-C --cursor-interactive Cursor-interactive elements (@c refs — divs with pointer, onclick)
```
-Combine flags: `$B snapshot -i -a -C -o /tmp/annotated.png`
+All flags can be combined freely. `-o` only applies when `-a` is also used.
+Example: `$B snapshot -i -a -C -o /tmp/annotated.png`
-After snapshot, use @refs everywhere:
+**Ref numbering:** @e refs are assigned sequentially (@e1, @e2, ...) in tree order.
+@c refs from `-C` are numbered separately (@c1, @c2, ...).
+
+After snapshot, use @refs as selectors in any command:
```bash
$B click @e3 $B fill @e4 "value" $B hover @e1
$B html @e2 $B css @e5 "color" $B attrs @e6
$B click @c1 # cursor-interactive ref (from -C)
```
+**Output format:** indented accessibility tree with @ref IDs, one element per line.
+```
+ @e1 [heading] "Welcome" [level=1]
+ @e2 [textbox] "Email"
+ @e3 [button] "Submit"
+```
+
Refs are invalidated on navigation — run `snapshot` again after `goto`.
## Full Command List
@@ 141,7 152,7 @@ Refs are invalidated on navigation — run `snapshot` again after `goto`.
|---------|-------------|
| `accessibility` | Full ARIA tree |
| `forms` | Form fields as JSON |
-| `html [selector]` | innerHTML |
+| `html [selector]` | innerHTML of selector (throws if not found), or full page HTML if no selector given |
| `links` | All links as "text → href" |
| `text` | Cleaned page text |
@@ 149,22 160,22 @@ Refs are invalidated on navigation — run `snapshot` again after `goto`.
| Command | Description |
|---------|-------------|
| `click <sel>` | Click element |
-| `cookie` | Set cookie |
+| `cookie <name>=<value>` | Set cookie on current page domain |
| `cookie-import <json>` | Import cookies from JSON file |
-| `cookie-import-browser [browser] [--domain d]` | Import cookies from real browser (opens picker UI, or direct with --domain) |
-| `dialog-accept [text]` | Auto-accept next alert/confirm/prompt |
+| `cookie-import-browser [browser] [--domain d]` | Import cookies from Comet, Chrome, Arc, Brave, or Edge (opens picker, or use --domain for direct import) |
+| `dialog-accept [text]` | Auto-accept next alert/confirm/prompt. Optional text is sent as the prompt response |
| `dialog-dismiss` | Auto-dismiss next dialog |
| `fill <sel> <val>` | Fill input |
-| `header <name> <value>` | Set custom request header |
+| `header <name>:<value>` | Set custom request header (colon-separated, sensitive values auto-redacted) |
| `hover <sel>` | Hover element |
-| `press <key>` | Press key (Enter, Tab, Escape, etc.) |
-| `scroll [sel]` | Scroll element into view |
-| `select <sel> <val>` | Select dropdown option |
+| `press <key>` | Press key — Enter, Tab, Escape, ArrowUp/Down/Left/Right, Backspace, Delete, Home, End, PageUp, PageDown, or modifiers like Shift+Enter |
+| `scroll [sel]` | Scroll element into view, or scroll to page bottom if no selector |
+| `select <sel> <val>` | Select dropdown option by value, label, or visible text |
| `type <text>` | Type into focused element |
-| `upload <sel> <file...>` | Upload file(s) |
+| `upload <sel> <file> [file2...]` | Upload file(s) |
| `useragent <string>` | Set user agent |
| `viewport <WxH>` | Set viewport size |
-| `wait <sel|--networkidle|--load>` | Wait for element/condition |
+| `wait <sel|--networkidle|--load>` | Wait for element, network idle, or page load (timeout: 15s) |
### Inspection
| Command | Description |
@@ 174,30 185,30 @@ Refs are invalidated on navigation — run `snapshot` again after `goto`.
| `cookies` | All cookies as JSON |
| `css <sel> <prop>` | Computed CSS value |
| `dialog [--clear]` | Dialog messages |
-| `eval <file>` | Run JS file |
+| `eval <file>` | Run JavaScript from file and return result as string (path must be under /tmp or cwd) |
| `is <prop> <sel>` | State check (visible/hidden/enabled/disabled/checked/editable/focused) |
-| `js <expr>` | Run JavaScript |
+| `js <expr>` | Run JavaScript expression and return result as string |
| `network [--clear]` | Network requests |
| `perf` | Page load timings |
-| `storage [set k v]` | localStorage + sessionStorage |
+| `storage [set k v]` | Read all localStorage + sessionStorage as JSON, or set <key> <value> to write localStorage |
### Visual
| Command | Description |
|---------|-------------|
| `diff <url1> <url2>` | Text diff between pages |
| `pdf [path]` | Save as PDF |
-| `responsive [prefix]` | Mobile/tablet/desktop screenshots |
+| `responsive [prefix]` | Screenshots at mobile (375x812), tablet (768x1024), desktop (1280x720). Saves as {prefix}-mobile.png etc. |
| `screenshot [path]` | Save screenshot |
### Snapshot
| Command | Description |
|---------|-------------|
-| `snapshot [flags]` | Accessibility tree with @refs |
+| `snapshot [flags]` | Accessibility tree with @e refs for element selection. Flags: -i interactive only, -c compact, -d N depth limit, -s sel scope, -D diff vs previous, -a annotated screenshot, -o path output, -C cursor-interactive @c refs |
### Meta
| Command | Description |
|---------|-------------|
-| `chain` | Multi-command from JSON stdin |
+| `chain` | Run commands from JSON stdin. Format: [["cmd","arg1",...],...] |
### Tabs
| Command | Description |
M browse/src/commands.ts => browse/src/commands.ts +16 -16
@@ 43,13 43,13 @@ export const COMMAND_DESCRIPTIONS: Record<string, { category: string; descriptio
'url': { category: 'Navigation', description: 'Print current URL' },
// Reading
'text': { category: 'Reading', description: 'Cleaned page text' },
- 'html': { category: 'Reading', description: 'innerHTML', usage: 'html [selector]' },
+ 'html': { category: 'Reading', description: 'innerHTML of selector (throws if not found), or full page HTML if no selector given', usage: 'html [selector]' },
'links': { category: 'Reading', description: 'All links as "text → href"' },
'forms': { category: 'Reading', description: 'Form fields as JSON' },
'accessibility': { category: 'Reading', description: 'Full ARIA tree' },
// Inspection
- 'js': { category: 'Inspection', description: 'Run JavaScript', usage: 'js <expr>' },
- 'eval': { category: 'Inspection', description: 'Run JS file', usage: 'eval <file>' },
+ 'js': { category: 'Inspection', description: 'Run JavaScript expression and return result as string', usage: 'js <expr>' },
+ 'eval': { category: 'Inspection', description: 'Run JavaScript from file and return result as string (path must be under /tmp or cwd)', usage: 'eval <file>' },
'css': { category: 'Inspection', description: 'Computed CSS value', usage: 'css <sel> <prop>' },
'attrs': { category: 'Inspection', description: 'Element attributes as JSON', usage: 'attrs <sel|@ref>' },
'is': { category: 'Inspection', description: 'State check (visible/hidden/enabled/disabled/checked/editable/focused)', usage: 'is <prop> <sel>' },
@@ 57,30 57,30 @@ export const COMMAND_DESCRIPTIONS: Record<string, { category: string; descriptio
'network': { category: 'Inspection', description: 'Network requests', usage: 'network [--clear]' },
'dialog': { category: 'Inspection', description: 'Dialog messages', usage: 'dialog [--clear]' },
'cookies': { category: 'Inspection', description: 'All cookies as JSON' },
- 'storage': { category: 'Inspection', description: 'localStorage + sessionStorage', usage: 'storage [set k v]' },
+ 'storage': { category: 'Inspection', description: 'Read all localStorage + sessionStorage as JSON, or set <key> <value> to write localStorage', usage: 'storage [set k v]' },
'perf': { category: 'Inspection', description: 'Page load timings' },
// Interaction
'click': { category: 'Interaction', description: 'Click element', usage: 'click <sel>' },
'fill': { category: 'Interaction', description: 'Fill input', usage: 'fill <sel> <val>' },
- 'select': { category: 'Interaction', description: 'Select dropdown option', usage: 'select <sel> <val>' },
+ 'select': { category: 'Interaction', description: 'Select dropdown option by value, label, or visible text', usage: 'select <sel> <val>' },
'hover': { category: 'Interaction', description: 'Hover element', usage: 'hover <sel>' },
'type': { category: 'Interaction', description: 'Type into focused element', usage: 'type <text>' },
- 'press': { category: 'Interaction', description: 'Press key (Enter, Tab, Escape, etc.)', usage: 'press <key>' },
- 'scroll': { category: 'Interaction', description: 'Scroll element into view', usage: 'scroll [sel]' },
- 'wait': { category: 'Interaction', description: 'Wait for element/condition', usage: 'wait <sel|--networkidle|--load>' },
- 'upload': { category: 'Interaction', description: 'Upload file(s)', usage: 'upload <sel> <file...>' },
+ 'press': { category: 'Interaction', description: 'Press key — Enter, Tab, Escape, ArrowUp/Down/Left/Right, Backspace, Delete, Home, End, PageUp, PageDown, or modifiers like Shift+Enter', usage: 'press <key>' },
+ 'scroll': { category: 'Interaction', description: 'Scroll element into view, or scroll to page bottom if no selector', usage: 'scroll [sel]' },
+ 'wait': { category: 'Interaction', description: 'Wait for element, network idle, or page load (timeout: 15s)', usage: 'wait <sel|--networkidle|--load>' },
+ 'upload': { category: 'Interaction', description: 'Upload file(s)', usage: 'upload <sel> <file> [file2...]' },
'viewport':{ category: 'Interaction', description: 'Set viewport size', usage: 'viewport <WxH>' },
- 'cookie': { category: 'Interaction', description: 'Set cookie' },
+ 'cookie': { category: 'Interaction', description: 'Set cookie on current page domain', usage: 'cookie <name>=<value>' },
'cookie-import': { category: 'Interaction', description: 'Import cookies from JSON file', usage: 'cookie-import <json>' },
- 'cookie-import-browser': { category: 'Interaction', description: 'Import cookies from real browser (opens picker UI, or direct with --domain)', usage: 'cookie-import-browser [browser] [--domain d]' },
- 'header': { category: 'Interaction', description: 'Set custom request header', usage: 'header <name> <value>' },
+ 'cookie-import-browser': { category: 'Interaction', description: 'Import cookies from Comet, Chrome, Arc, Brave, or Edge (opens picker, or use --domain for direct import)', usage: 'cookie-import-browser [browser] [--domain d]' },
+ 'header': { category: 'Interaction', description: 'Set custom request header (colon-separated, sensitive values auto-redacted)', usage: 'header <name>:<value>' },
'useragent': { category: 'Interaction', description: 'Set user agent', usage: 'useragent <string>' },
- 'dialog-accept': { category: 'Interaction', description: 'Auto-accept next alert/confirm/prompt', usage: 'dialog-accept [text]' },
+ 'dialog-accept': { category: 'Interaction', description: 'Auto-accept next alert/confirm/prompt. Optional text is sent as the prompt response', usage: 'dialog-accept [text]' },
'dialog-dismiss': { category: 'Interaction', description: 'Auto-dismiss next dialog' },
// Visual
'screenshot': { category: 'Visual', description: 'Save screenshot', usage: 'screenshot [path]' },
'pdf': { category: 'Visual', description: 'Save as PDF', usage: 'pdf [path]' },
- 'responsive': { category: 'Visual', description: 'Mobile/tablet/desktop screenshots', usage: 'responsive [prefix]' },
+ 'responsive': { category: 'Visual', description: 'Screenshots at mobile (375x812), tablet (768x1024), desktop (1280x720). Saves as {prefix}-mobile.png etc.', usage: 'responsive [prefix]' },
'diff': { category: 'Visual', description: 'Text diff between pages', usage: 'diff <url1> <url2>' },
// Tabs
'tabs': { category: 'Tabs', description: 'List open tabs' },
@@ 92,8 92,8 @@ export const COMMAND_DESCRIPTIONS: Record<string, { category: string; descriptio
'stop': { category: 'Server', description: 'Shutdown server' },
'restart': { category: 'Server', description: 'Restart server' },
// Meta
- 'snapshot':{ category: 'Snapshot', description: 'Accessibility tree with @refs', usage: 'snapshot [flags]' },
- 'chain': { category: 'Meta', description: 'Multi-command from JSON stdin' },
+ 'snapshot':{ category: 'Snapshot', description: 'Accessibility tree with @e refs for element selection. Flags: -i interactive only, -c compact, -d N depth limit, -s sel scope, -D diff vs previous, -a annotated screenshot, -o path output, -C cursor-interactive @c refs', usage: 'snapshot [flags]' },
+ 'chain': { category: 'Meta', description: 'Run commands from JSON stdin. Format: [["cmd","arg1",...],...]' },
};
// Load-time validation: descriptions must cover exactly the command sets
M browse/src/server.ts => browse/src/server.ts +44 -23
@@ 18,6 18,8 @@ import { handleReadCommand } from './read-commands';
import { handleWriteCommand } from './write-commands';
import { handleMetaCommand } from './meta-commands';
import { handleCookiePickerRoute } from './cookie-picker-routes';
+import { COMMAND_DESCRIPTIONS } from './commands';
+import { SNAPSHOT_FLAGS } from './snapshot';
import { resolveConfig, ensureStateDir, readVersionHash } from './config';
import * as fs from 'fs';
import * as path from 'path';
@@ 37,6 39,47 @@ function validateAuth(req: Request): boolean {
return header === `Bearer ${AUTH_TOKEN}`;
}
+// ─── Help text (auto-generated from COMMAND_DESCRIPTIONS) ────────
+function generateHelpText(): string {
+ // Group commands by category
+ const groups = new Map<string, string[]>();
+ for (const [cmd, meta] of Object.entries(COMMAND_DESCRIPTIONS)) {
+ const display = meta.usage || cmd;
+ const list = groups.get(meta.category) || [];
+ list.push(display);
+ groups.set(meta.category, list);
+ }
+
+ const categoryOrder = [
+ 'Navigation', 'Reading', 'Interaction', 'Inspection',
+ 'Visual', 'Snapshot', 'Meta', 'Tabs', 'Server',
+ ];
+
+ const lines = ['gstack browse — headless browser for AI agents', '', 'Commands:'];
+ for (const cat of categoryOrder) {
+ const cmds = groups.get(cat);
+ if (!cmds) continue;
+ lines.push(` ${(cat + ':').padEnd(15)}${cmds.join(', ')}`);
+ }
+
+ // Snapshot flags from source of truth
+ lines.push('');
+ lines.push('Snapshot flags:');
+ const flagPairs: string[] = [];
+ for (const flag of SNAPSHOT_FLAGS) {
+ const label = flag.valueHint ? `${flag.short} ${flag.valueHint}` : flag.short;
+ flagPairs.push(`${label} ${flag.long}`);
+ }
+ // Print two flags per line for compact display
+ for (let i = 0; i < flagPairs.length; i += 2) {
+ const left = flagPairs[i].padEnd(28);
+ const right = flagPairs[i + 1] || '';
+ lines.push(` ${left}${right}`);
+ }
+
+ return lines.join('\n');
+}
+
// ─── Buffer (from buffers.ts) ────────────────────────────────────
import { consoleBuffer, networkBuffer, dialogBuffer, addConsoleEntry, addNetworkEntry, addDialogEntry, type LogEntry, type NetworkEntry, type DialogEntry } from './buffers';
export { consoleBuffer, networkBuffer, dialogBuffer, addConsoleEntry, addNetworkEntry, addDialogEntry, type LogEntry, type NetworkEntry, type DialogEntry };
@@ 191,29 234,7 @@ async function handleCommand(body: any): Promise<Response> {
} else if (META_COMMANDS.has(command)) {
result = await handleMetaCommand(command, args, browserManager, shutdown);
} else if (command === 'help') {
- const helpText = [
- 'gstack browse — headless browser for AI agents',
- '',
- 'Commands:',
- ' Navigation: goto <url>, back, forward, reload',
- ' Interaction: click <sel>, fill <sel> <text>, select <sel> <val>, hover, type, press, scroll, wait',
- ' Read: text [sel], html [sel], links, forms, accessibility, cookies, storage, console, network, perf',
- ' Evaluate: js <expr>, eval <expr>, css <sel> <prop>, attrs <sel>, is <sel> <state>',
- ' Snapshot: snapshot [-i] [-c] [-d N] [-s sel] [-D] [-a] [-o path] [-C]',
- ' Screenshot: screenshot [path], pdf [path], responsive <widths>',
- ' Tabs: tabs, tab <id>, newtab [url], closetab [id]',
- ' State: cookie <set|get|clear>, cookie-import <json>, cookie-import-browser [browser]',
- ' Headers: header <set|clear> [name] [value], useragent [string]',
- ' Upload: upload <sel> <file1> [file2...]',
- ' Dialogs: dialog, dialog-accept [text], dialog-dismiss',
- ' Meta: status, stop, restart, diff, chain, help',
- '',
- 'Snapshot flags:',
- ' -i interactive only -c compact (remove empty nodes)',
- ' -d N limit depth -s sel scope to CSS selector',
- ' -D diff vs previous -a annotated screenshot with ref labels',
- ' -o path output file -C cursor-interactive elements',
- ].join('\n');
+ const helpText = generateHelpText();
return new Response(helpText, {
status: 200,
headers: { 'Content-Type': 'text/plain' },
M browse/src/snapshot.ts => browse/src/snapshot.ts +4 -4
@@ 57,11 57,11 @@ export const SNAPSHOT_FLAGS: Array<{
}> = [
{ short: '-i', long: '--interactive', description: 'Interactive elements only (buttons, links, inputs) with @e refs', optionKey: 'interactive' },
{ short: '-c', long: '--compact', description: 'Compact (no empty structural nodes)', optionKey: 'compact' },
- { short: '-d', long: '--depth', description: 'Limit depth', takesValue: true, valueHint: '<N>', optionKey: 'depth' },
+ { short: '-d', long: '--depth', description: 'Limit tree depth (0 = root only, default: unlimited)', takesValue: true, valueHint: '<N>', optionKey: 'depth' },
{ short: '-s', long: '--selector', description: 'Scope to CSS selector', takesValue: true, valueHint: '<sel>', optionKey: 'selector' },
- { short: '-D', long: '--diff', description: 'Diff against previous snapshot (what changed?)', optionKey: 'diff' },
- { short: '-a', long: '--annotate', description: 'Annotated screenshot with ref labels', optionKey: 'annotate' },
- { short: '-o', long: '--output', description: 'Output path for screenshot', takesValue: true, valueHint: '<path>', optionKey: 'outputPath' },
+ { short: '-D', long: '--diff', description: 'Unified diff against previous snapshot (first call stores baseline)', optionKey: 'diff' },
+ { short: '-a', long: '--annotate', description: 'Annotated screenshot with red overlay boxes and ref labels', optionKey: 'annotate' },
+ { short: '-o', long: '--output', description: 'Output path for annotated screenshot (default: /tmp/browse-annotated.png)', takesValue: true, valueHint: '<path>', optionKey: 'outputPath' },
{ short: '-C', long: '--cursor-interactive', description: 'Cursor-interactive elements (@c refs — divs with pointer, onclick)', optionKey: 'cursorInteractive' },
];
M scripts/gen-skill-docs.ts => scripts/gen-skill-docs.ts +14 -3
@@ 64,20 64,31 @@ function generateSnapshotFlags(): string {
for (const flag of SNAPSHOT_FLAGS) {
const label = flag.valueHint ? `${flag.short} ${flag.valueHint}` : flag.short;
- lines.push(`${label.padEnd(10)}${flag.description}`);
+ lines.push(`${label.padEnd(10)}${flag.long.padEnd(24)}${flag.description}`);
}
lines.push('```');
lines.push('');
- lines.push('Combine flags: `$B snapshot -i -a -C -o /tmp/annotated.png`');
+ lines.push('All flags can be combined freely. `-o` only applies when `-a` is also used.');
+ lines.push('Example: `$B snapshot -i -a -C -o /tmp/annotated.png`');
lines.push('');
- lines.push('After snapshot, use @refs everywhere:');
+ lines.push('**Ref numbering:** @e refs are assigned sequentially (@e1, @e2, ...) in tree order.');
+ lines.push('@c refs from `-C` are numbered separately (@c1, @c2, ...).');
+ lines.push('');
+ lines.push('After snapshot, use @refs as selectors in any command:');
lines.push('```bash');
lines.push('$B click @e3 $B fill @e4 "value" $B hover @e1');
lines.push('$B html @e2 $B css @e5 "color" $B attrs @e6');
lines.push('$B click @c1 # cursor-interactive ref (from -C)');
lines.push('```');
lines.push('');
+ lines.push('**Output format:** indented accessibility tree with @ref IDs, one element per line.');
+ lines.push('```');
+ lines.push(' @e1 [heading] "Welcome" [level=1]');
+ lines.push(' @e2 [textbox] "Email"');
+ lines.push(' @e3 [button] "Submit"');
+ lines.push('```');
+ lines.push('');
lines.push('Refs are invalidated on navigation — run `snapshot` again after `goto`.');
return lines.join('\n');
M test/gen-skill-docs.test.ts => test/gen-skill-docs.test.ts +8 -0
@@ 139,6 139,14 @@ describe('description quality evals', () => {
}
});
+ // Guard: descriptions must not contain pipe (breaks markdown table cells)
+ // Usage strings are backtick-wrapped in the table so pipes there are safe.
+ test('no command description contains pipe character', () => {
+ for (const [cmd, meta] of Object.entries(COMMAND_DESCRIPTIONS)) {
+ expect(meta.description).not.toContain('|');
+ }
+ });
+
// Guard: generated output uses → not ->
test('generated SKILL.md uses unicode arrows', () => {
const content = fs.readFileSync(path.join(ROOT, 'SKILL.md'), 'utf-8');
M test/skill-llm-eval.test.ts => test/skill-llm-eval.test.ts +3 -3
@@ 7,7 7,7 @@
* Requires: ANTHROPIC_API_KEY env var
* Run: ANTHROPIC_API_KEY=sk-... bun test test/skill-llm-eval.test.ts
*
- * Cost: ~$0.01-0.03 per run (haiku)
+ * Cost: ~$0.05-0.15 per run (sonnet)
*/
import { describe, test, expect } from 'bun:test';
@@ 30,7 30,7 @@ async function judge(section: string, prompt: string): Promise<JudgeScore> {
const client = new Anthropic();
const response = await client.messages.create({
- model: 'claude-haiku-4-5-20251001',
+ model: 'claude-sonnet-4-6',
max_tokens: 1024,
messages: [{
role: 'user',
@@ 158,7 158,7 @@ describeEval('LLM-as-judge quality evals', () => {
const client = new Anthropic();
const response = await client.messages.create({
- model: 'claude-haiku-4-5-20251001',
+ model: 'claude-sonnet-4-6',
max_tokens: 1024,
messages: [{
role: 'user',
M test/skill-validation.test.ts => test/skill-validation.test.ts +53 -0
@@ 80,6 80,59 @@ describe('Command registry consistency', () => {
});
});
+describe('Usage string consistency', () => {
+ // Normalize a usage string to its structural skeleton for comparison.
+ // Replaces <param-names> with <>, [optional] with [], strips parenthetical hints.
+ // This catches format mismatches (e.g., <name>:<value> vs <name> <value>)
+ // without tripping on abbreviation differences (e.g., <sel> vs <selector>).
+ function skeleton(usage: string): string {
+ return usage
+ .replace(/\(.*?\)/g, '') // strip parenthetical hints like (e.g., Enter, Tab)
+ .replace(/<[^>]*>/g, '<>') // normalize <param-name> → <>
+ .replace(/\[[^\]]*\]/g, '[]') // normalize [optional] → []
+ .replace(/\s+/g, ' ') // collapse whitespace
+ .trim();
+ }
+
+ // Cross-check Usage: patterns in implementation against COMMAND_DESCRIPTIONS
+ test('implementation Usage: structural format matches COMMAND_DESCRIPTIONS', () => {
+ const implFiles = [
+ path.join(ROOT, 'browse', 'src', 'write-commands.ts'),
+ path.join(ROOT, 'browse', 'src', 'read-commands.ts'),
+ path.join(ROOT, 'browse', 'src', 'meta-commands.ts'),
+ ];
+
+ // Extract "Usage: browse <pattern>" from throw new Error(...) calls
+ const usagePattern = /throw new Error\(['"`]Usage:\s*browse\s+(.+?)['"`]\)/g;
+ const implUsages = new Map<string, string>();
+
+ for (const file of implFiles) {
+ const content = fs.readFileSync(file, 'utf-8');
+ let match;
+ while ((match = usagePattern.exec(content)) !== null) {
+ const usage = match[1].split('\\n')[0].trim();
+ const cmd = usage.split(/\s/)[0];
+ implUsages.set(cmd, usage);
+ }
+ }
+
+ // Compare structural skeletons
+ const mismatches: string[] = [];
+ for (const [cmd, implUsage] of implUsages) {
+ const desc = COMMAND_DESCRIPTIONS[cmd];
+ if (!desc) continue;
+ if (!desc.usage) continue;
+ const descSkel = skeleton(desc.usage);
+ const implSkel = skeleton(implUsage);
+ if (descSkel !== implSkel) {
+ mismatches.push(`${cmd}: docs "${desc.usage}" (${descSkel}) vs impl "${implUsage}" (${implSkel})`);
+ }
+ }
+
+ expect(mismatches).toEqual([]);
+ });
+});
+
describe('Generated SKILL.md freshness', () => {
test('no unresolved {{placeholders}} in generated SKILL.md', () => {
const content = fs.readFileSync(path.join(ROOT, 'SKILL.md'), 'utf-8');