/** * Write commands — navigate and interact with pages (side effects) * * goto, back, forward, reload, click, fill, select, hover, type, * press, scroll, wait, viewport, cookie, header, useragent */ import type { BrowserManager } from './browser-manager'; import { findInstalledBrowsers, importCookies, listSupportedBrowserNames } from './cookie-import-browser'; import { validateNavigationUrl } from './url-validation'; import * as fs from 'fs'; import * as path from 'path'; import { TEMP_DIR, isPathWithin } from './platform'; import { modifyStyle, undoModification, resetModifications, getModificationHistory } from './cdp-inspector'; // Security: Path validation for screenshot output const SAFE_DIRECTORIES = [TEMP_DIR, process.cwd()]; function validateOutputPath(filePath: string): void { const resolved = path.resolve(filePath); const isSafe = SAFE_DIRECTORIES.some(dir => isPathWithin(resolved, dir)); if (!isSafe) { throw new Error(`Path must be within: ${SAFE_DIRECTORIES.join(', ')}`); } } /** * Aggressive page cleanup selectors and heuristics. * Goal: make the page readable and clean while keeping it recognizable. * Inspired by uBlock Origin filter lists, Readability.js, and reader mode heuristics. */ const CLEANUP_SELECTORS = { ads: [ // Google Ads 'ins.adsbygoogle', '[id^="google_ads"]', '[id^="div-gpt-ad"]', 'iframe[src*="doubleclick"]', 'iframe[src*="googlesyndication"]', '[data-google-query-id]', '.google-auto-placed', // Generic ad patterns (uBlock Origin common filters) '[class*="ad-banner"]', '[class*="ad-wrapper"]', '[class*="ad-container"]', '[class*="ad-slot"]', '[class*="ad-unit"]', '[class*="ad-zone"]', '[class*="ad-placement"]', '[class*="ad-holder"]', '[class*="ad-block"]', '[class*="adbox"]', '[class*="adunit"]', '[class*="adwrap"]', '[id*="ad-banner"]', '[id*="ad-wrapper"]', '[id*="ad-container"]', '[id*="ad-slot"]', '[id*="ad_banner"]', '[id*="ad_container"]', '[data-ad]', '[data-ad-slot]', '[data-ad-unit]', '[data-adunit]', '[class*="sponsored"]', '[class*="Sponsored"]', '.ad', '.ads', '.advert', '.advertisement', '#ad', '#ads', '#advert', '#advertisement', // Common ad network iframes 'iframe[src*="amazon-adsystem"]', 'iframe[src*="outbrain"]', 'iframe[src*="taboola"]', 'iframe[src*="criteo"]', 'iframe[src*="adsafeprotected"]', 'iframe[src*="moatads"]', // Promoted/sponsored content '[class*="promoted"]', '[class*="Promoted"]', '[data-testid*="promo"]', '[class*="native-ad"]', // Empty ad placeholders (divs with only ad classes, no real content) 'aside[class*="ad"]', 'section[class*="ad-"]', ], cookies: [ // Cookie consent frameworks '[class*="cookie-consent"]', '[class*="cookie-banner"]', '[class*="cookie-notice"]', '[id*="cookie-consent"]', '[id*="cookie-banner"]', '[id*="cookie-notice"]', '[class*="consent-banner"]', '[class*="consent-modal"]', '[class*="consent-wall"]', '[class*="gdpr"]', '[id*="gdpr"]', '[class*="GDPR"]', '[class*="CookieConsent"]', '[id*="CookieConsent"]', // OneTrust (very common) '#onetrust-consent-sdk', '.onetrust-pc-dark-filter', '#onetrust-banner-sdk', // Cookiebot '#CybotCookiebotDialog', '#CybotCookiebotDialogBodyUnderlay', // TrustArc / TRUSTe '#truste-consent-track', '.truste_overlay', '.truste_box_overlay', // Quantcast '.qc-cmp2-container', '#qc-cmp2-main', // Generic patterns '[class*="cc-banner"]', '[class*="cc-window"]', '[class*="cc-overlay"]', '[class*="privacy-banner"]', '[class*="privacy-notice"]', '[id*="privacy-banner"]', '[id*="privacy-notice"]', '[class*="accept-cookies"]', '[id*="accept-cookies"]', ], overlays: [ // Paywall / subscription overlays '[class*="paywall"]', '[class*="Paywall"]', '[id*="paywall"]', '[class*="subscribe-wall"]', '[class*="subscription-wall"]', '[class*="meter-wall"]', '[class*="regwall"]', '[class*="reg-wall"]', // Newsletter / signup popups '[class*="newsletter-popup"]', '[class*="newsletter-modal"]', '[class*="signup-modal"]', '[class*="signup-popup"]', '[class*="email-capture"]', '[class*="lead-capture"]', '[class*="popup-modal"]', '[class*="modal-overlay"]', // Interstitials '[class*="interstitial"]', '[id*="interstitial"]', // Push notification prompts '[class*="push-notification"]', '[class*="notification-prompt"]', '[class*="web-push"]', // Survey / feedback popups '[class*="survey-"]', '[class*="feedback-modal"]', '[id*="survey-"]', '[class*="nps-"]', // App download banners '[class*="app-banner"]', '[class*="smart-banner"]', '[class*="app-download"]', '[id*="branch-banner"]', '.smartbanner', // Cross-promotion / "follow us" / "preferred source" widgets '[class*="promo-banner"]', '[class*="cross-promo"]', '[class*="partner-promo"]', '[class*="preferred-source"]', '[class*="google-promo"]', ], clutter: [ // Audio/podcast player widgets (not part of the article text) '[class*="audio-player"]', '[class*="podcast-player"]', '[class*="listen-widget"]', '[class*="everlit"]', '[class*="Everlit"]', 'audio', // bare audio elements // Sidebar games/puzzles widgets '[class*="puzzle"]', '[class*="daily-game"]', '[class*="games-widget"]', '[class*="crossword-promo"]', '[class*="mini-game"]', // "Most Popular" / "Trending" sidebar recirculation (not the top nav trending bar) 'aside [class*="most-popular"]', 'aside [class*="trending"]', 'aside [class*="most-read"]', 'aside [class*="recommended"]', // Related articles / recirculation at bottom '[class*="related-articles"]', '[class*="more-stories"]', '[class*="recirculation"]', '[class*="taboola"]', '[class*="outbrain"]', // Hearst-specific (SF Chronicle, etc.) '[class*="nativo"]', '[data-tb-region]', ], sticky: [ // Handled via JavaScript evaluation, not pure selectors ], social: [ '[class*="social-share"]', '[class*="share-buttons"]', '[class*="share-bar"]', '[class*="social-widget"]', '[class*="social-icons"]', '[class*="share-tools"]', 'iframe[src*="facebook.com/plugins"]', 'iframe[src*="platform.twitter"]', '[class*="fb-like"]', '[class*="tweet-button"]', '[class*="addthis"]', '[class*="sharethis"]', // Follow prompts '[class*="follow-us"]', '[class*="social-follow"]', ], }; export async function handleWriteCommand( command: string, args: string[], bm: BrowserManager ): Promise { const page = bm.getPage(); // Frame-aware target for locator-based operations (click, fill, etc.) const target = bm.getActiveFrameOrPage(); const inFrame = bm.getFrame() !== null; switch (command) { case 'goto': { if (inFrame) throw new Error('Cannot use goto inside a frame. Run \'frame main\' first.'); const url = args[0]; if (!url) throw new Error('Usage: browse goto '); await validateNavigationUrl(url); const response = await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 15000 }); const status = response?.status() || 'unknown'; return `Navigated to ${url} (${status})`; } case 'back': { if (inFrame) throw new Error('Cannot use back inside a frame. Run \'frame main\' first.'); await page.goBack({ waitUntil: 'domcontentloaded', timeout: 15000 }); return `Back → ${page.url()}`; } case 'forward': { if (inFrame) throw new Error('Cannot use forward inside a frame. Run \'frame main\' first.'); await page.goForward({ waitUntil: 'domcontentloaded', timeout: 15000 }); return `Forward → ${page.url()}`; } case 'reload': { if (inFrame) throw new Error('Cannot use reload inside a frame. Run \'frame main\' first.'); await page.reload({ waitUntil: 'domcontentloaded', timeout: 15000 }); return `Reloaded ${page.url()}`; } case 'click': { const selector = args[0]; if (!selector) throw new Error('Usage: browse click '); // Auto-route: if ref points to a real