diff --git a/AGENTS.md b/AGENTS.md index 079fb1a..9d1e61b 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -30,6 +30,7 @@ src/ ├── screenshot.rs ├── snapshot.rs ├── read_page.rs # read-page (Readability + HTML→Markdown) + ├── memory.rs # take-heapsnapshot (CDP streaming) + inspect-heapsnapshot-node (offline) ├── evaluate.rs ├── input.rs # click/fill/type/press/hover ├── emulation.rs # emulate (viewport/geolocation/blocklist) @@ -70,6 +71,18 @@ continuously collects `Network.*` and `Runtime.*` events. `console` and All commands default to human-readable text. `--json` and `--toon` (compact, LLM-friendly) produce structured output. Mutually exclusive. +### Offline Commands + +`inspect-heapsnapshot-node` and `kill-daemon` are intercepted early in `run()` +before any Chrome connection or daemon spawn. `inspect-heapsnapshot-node` parses +a local `.heapsnapshot` file purely offline. + +### Path Resolution + +The daemon retains its startup CWD, so the CLI resolves all relative file-path +arguments (`--output`, `--file-path`) to absolute paths in `build_request` +before sending them to the daemon. + ## Build & Test ```bash diff --git a/README.md b/README.md index ec00a7c..72ab50a 100644 --- a/README.md +++ b/README.md @@ -130,10 +130,13 @@ You can also use `--page ` for quick one-offs, or pass the raw hex target |---------|-------------| | `screenshot --output ` | Save screenshot to file | | `screenshot --full-page` | Capture full scrollable page | -| `read-page` | Read page content as clean markdown (extracts main article) | -| `read-page --output ` | Save markdown to file | +| `screenshot --max-width --max-height ` | Downscale screenshot to fit within dimensions | +| `read-page` | Read page content as clean Markdown (extracts main article) | +| `read-page --output ` | Save Markdown to file | | `evaluate [--dialog-action ]` | Run JavaScript (optionally handle dialogs: accept, dismiss, or prompt text) | | `snapshot` | Accessibility tree dump | +| `take-heapsnapshot --output ` | Capture V8 heap snapshot (streamed via CDP) | +| `inspect-heapsnapshot-node --file-path --node-id ` | Inspect a node in a local `.heapsnapshot` file (offline, no Chrome needed) | ### Interaction @@ -226,12 +229,10 @@ The daemon keeps a persistent CDP session on the current page to: - Re-attach to a new target when `--target` changes (the previous target's event buffers are discarded on the switch). ## Source layout - --``` -+```text - src/ - ├── main.rs # Entry point + daemon dispatch +```text +src/ +├── main.rs # Entry point + daemon dispatch ├── lib.rs # CLI (clap) + command routing ├── cdp.rs # Raw CDP over WebSocket (JSON-RPC) + persistent session ├── browser.rs # Auto-connect (DevToolsActivePort) @@ -251,6 +252,7 @@ The daemon keeps a persistent CDP session on the current page to: ├── screenshot.rs ├── snapshot.rs ├── read_page.rs # read-page (Readability extraction + HTML→Markdown) + ├── memory.rs # take-heapsnapshot (CDP streaming) + inspect-heapsnapshot-node (offline) ├── evaluate.rs ├── executor.rs # Command dispatch + persistent-session reuse ├── input.rs # click/fill/type/press/hover diff --git a/src/cdp.rs b/src/cdp.rs index 7774dd5..0efaf2f 100644 --- a/src/cdp.rs +++ b/src/cdp.rs @@ -406,7 +406,7 @@ impl CdpClient { std::mem::take(&mut self.console_events).into() } - fn push_event(&mut self, event: Value) { + pub(crate) fn push_event(&mut self, event: Value) { // Only route events into the persistent buffers when the event's // sessionId matches the persistent page session (flatten-mode events // are tagged with sessionId). Events from ad-hoc sessions (sw-logs diff --git a/src/commands/executor.rs b/src/commands/executor.rs index d3d368c..f12e1c0 100644 --- a/src/commands/executor.rs +++ b/src/commands/executor.rs @@ -36,7 +36,14 @@ pub fn known_args(cmd: &str) -> &'static [&'static str] { "clear_all", "output", ], - "screenshot" => &["output", "format", "full_page"], + "screenshot" => &[ + "output", + "format", + "full_page", + "quality", + "max_width", + "max_height", + ], "evaluate" => &["expression", "dialog_action", "output", "track_navigation"], "click" => &["selector"], "click-at" => &["x", "y"], @@ -46,6 +53,8 @@ pub fn known_args(cmd: &str) -> &'static [&'static str] { "hover" => &["selector"], "snapshot" => &["output"], "read-page" => &["output"], + "take-heapsnapshot" => &["output"], + "inspect-heapsnapshot-node" => &["file_path", "node_id"], "emulate" => &[ "viewport", "device_scale_factor", @@ -97,6 +106,13 @@ fn validate_args(cmd: &str, args: &serde_json::Value) -> Result<()> { } /// Whether a command operates at the browser level (no page session needed). +/// +/// `inspect-heapsnapshot-node` is intentionally excluded: it is intercepted +/// offline in the CLI before any daemon connection is established, so the +/// daemon should never receive it. If it ever does, omitting it here lets it +/// fall through to `inner_execute`'s catch-all `bail!("Unknown command")` +/// rather than hitting the `_ => unreachable!()` arm in the browser-level +/// dispatch and panicking. fn is_browser_level(cmd: &str) -> bool { matches!(cmd, "list-pages" | "new-page" | "sw-logs" | "kill-daemon") } @@ -361,11 +377,21 @@ async fn inner_execute( commands::screenshot::take_screenshot( client, session_id, - args.get("output").and_then(|v| v.as_str()), - args.get("format").and_then(|v| v.as_str()).unwrap_or("png"), - args.get("full_page") - .and_then(|v| v.as_bool()) - .unwrap_or(false), + commands::screenshot::ScreenshotOptions { + output: args.get("output").and_then(|v| v.as_str()).map(String::from), + format: args + .get("format") + .and_then(|v| v.as_str()) + .unwrap_or("png") + .to_string(), + full_page: args + .get("full_page") + .and_then(|v| v.as_bool()) + .unwrap_or(false), + quality: args.get("quality").and_then(|v| v.as_u64()), + max_width: args.get("max_width").and_then(|v| v.as_f64()), + max_height: args.get("max_height").and_then(|v| v.as_f64()), + }, ) .await } @@ -441,6 +467,12 @@ async fn inner_execute( ) .await } + "take-heapsnapshot" => match args.get("output").and_then(|v| v.as_str()) { + Some(output) => { + commands::memory::take_heapsnapshot(client, session_id, output, req.format()).await + } + None => bail!("output required"), + }, "emulate" => { // block/unblock come from the global request fields (the single flag // definition); the emulate handler applies them itself — in the right diff --git a/src/commands/memory.rs b/src/commands/memory.rs new file mode 100644 index 0000000..e56185d --- /dev/null +++ b/src/commands/memory.rs @@ -0,0 +1,384 @@ +use anyhow::{anyhow, bail, Result}; +use serde_json::json; +use std::fs::File; +use std::io::BufReader; + +use crate::cdp::CdpClient; +use crate::result::CommandResult; + +/// Take a heap snapshot of the page and save it to a file. +pub async fn take_heapsnapshot( + client: &mut CdpClient, + session_id: &str, + output: &str, + format: crate::format::OutputFormat, +) -> Result { + use anyhow::Context; + // Write to a temp file in the same directory so a failed/partial stream + // never leaves a corrupt file at the final output path. The temp file is + // renamed to `output` only after the snapshot completes successfully. + let output_path = std::path::Path::new(output); + // Unique temp file (PID-suffixed) in the same directory so concurrent runs + // can't collide, and rename is atomic (same filesystem). + let temp_path = output_path.with_file_name(format!( + ".{}.{}.tmp", + output_path.file_name().unwrap_or_default().to_string_lossy(), + std::process::id(), + )); + // Drop guard ensures the temp file is removed under all termination paths + // — including future cancellation (timeout, client disconnect, Ctrl+C) and + // panics — where the async cleanup below would never run. On the success + // path the file has been renamed away, so `remove_file` is a harmless no-op. + struct TempFileGuard { + path: std::path::PathBuf, + } + impl Drop for TempFileGuard { + fn drop(&mut self) { + let _ = std::fs::remove_file(&self.path); + } + } + let _guard = TempFileGuard { + path: temp_path.clone(), + }; + // Heap snapshots can be tens or hundreds of MB; buffer the writes to avoid a + // syscall per streamed chunk. + let mut file = tokio::io::BufWriter::new( + tokio::fs::File::create(&temp_path) + .await + .with_context(|| format!("Failed to create heap snapshot temp file: {}", temp_path.display()))?, + ); + + // First, let's enable the HeapProfiler. + client.send_to_target(session_id, "HeapProfiler.enable", json!({})) + .await + .context("Failed to enable HeapProfiler via CDP")?; + + let snapshot_result = async { + // Send the takeHeapSnapshot command without blocking so we can process chunks as they stream in + let msg_id = client.send_raw_no_wait( + Some(session_id), + "HeapProfiler.takeHeapSnapshot", + json!({ "reportProgress": false, "treatGlobalObjectsAsRoots": true, "captureNumericValue": true }), + ) + .await + .context("Failed to trigger non-blocking HeapProfiler.takeHeapSnapshot command")?; + + use tokio::io::AsyncWriteExt; + loop { + let text = client.read_text() + .await + .context("Failed to read WebSocket stream message during heap snapshot chunk collection")?; + let event: serde_json::Value = serde_json::from_str(&text) + .context("Failed to parse WebSocket text frame into JSON event")?; + + // Check if this is the completion response for our takeHeapSnapshot command + if event.get("id").and_then(|v| v.as_u64()) == Some(msg_id) { + if let Some(error) = event.get("error") { + bail!( + "CDP error in HeapProfiler.takeHeapSnapshot response: {}", + serde_json::to_string_pretty(error)? + ); + } + break; + } + + let method = event["method"].as_str().unwrap_or(""); + if method == "HeapProfiler.addHeapSnapshotChunk" { + if let Some(chunk) = event["params"]["chunk"].as_str() { + file.write_all(chunk.as_bytes()) + .await + .context("Failed to write snapshot chunk bytes to output file")?; + } + } else if event.get("method").is_some() { + // Route through push_event so Network/Runtime events land in + // network_events/console_events (capped) instead of the generic + // unbounded buffer, and other events get capped too. + client.push_event(event); + } + } + // Flush any buffered snapshot bytes before the writer is dropped; + // BufWriter::drop performs a blocking flush, which we avoid in async code. + file.flush() + .await + .context("Failed to flush buffered heap snapshot bytes to output file")?; + Ok::<(), anyhow::Error>(()) + } + .await; + + let _ = client.send_to_target(session_id, "HeapProfiler.disable", json!({})).await; + + if let Err(e) = snapshot_result { + return Err(e); + } + + // Drop the writer (and its underlying file handle) before the rename: on + // Windows an open handle blocks the move, and even on Unix releasing it + // before the atomic rename is the safe, portable ordering. + drop(file); + + // Atomically move the completed temp file to the final output path. + tokio::fs::rename(&temp_path, output_path) + .await + .with_context(|| format!("Failed to rename temp file to final output: {}", output))?; + + if format.is_text() { + Ok(CommandResult::output(format!( + "Heap snapshot successfully saved to {}", + output + ))) + } else { + let details = json!({ + "success": true, + "output": output, + "message": format!("Heap snapshot successfully saved to {}", output) + }); + Ok(CommandResult::output(crate::format::format_structured(&details, format)?)) + } +} + +#[derive(serde::Deserialize)] +struct MetaDetails { + node_fields: Vec, +} + +#[derive(serde::Deserialize)] +struct SnapshotMeta { + meta: MetaDetails, +} + +#[derive(serde::Deserialize)] +struct HeapSnapshot { + snapshot: SnapshotMeta, + nodes: Vec, + strings: Vec, +} + +/// Parse the JSON heap snapshot and locate details for the given node ID. +/// Returns a tuple of (node_name, self_size). +pub fn parse_node_from_snapshot( + file_path: &str, + node_id: u64, +) -> Result<(String, u64)> { + use anyhow::Context; + let file = File::open(file_path) + .with_context(|| format!("Failed to open heap snapshot file at: {}", file_path))?; + let reader = BufReader::new(file); + let val: HeapSnapshot = serde_json::from_reader(reader) + .context("Failed to deserialize heap snapshot file. Ensure it is valid JSON.")?; + + find_node_in_snapshot(&val, node_id) +} + +/// Pure schema-validation + node-lookup logic, separated from I/O so it can be +/// unit-tested without writing a temp file. +fn find_node_in_snapshot(val: &HeapSnapshot, node_id: u64) -> Result<(String, u64)> { + use anyhow::Context; + let nodes = &val.nodes; + let node_fields = &val.snapshot.meta.node_fields; + + // Find fields offsets within the flat nodes array + let id_offset = node_fields.iter().position(|f| f == "id") + .context("Invalid snapshot schema: 'id' node field meta is missing")?; + let name_offset = node_fields.iter().position(|f| f == "name") + .context("Invalid snapshot schema: 'name' node field meta is missing")?; + let self_size_offset = node_fields.iter().position(|f| f == "self_size") + .context("Invalid snapshot schema: 'self_size' node field meta is missing")?; + let node_size = node_fields.len(); + if node_size == 0 { + bail!("Invalid snapshot: node_fields schema is empty"); + } + + // Iterate over nodes using chunk sizes defined by the schema meta + let mut target_index = None; + let mut current_idx = 0; + while current_idx + id_offset < nodes.len() { + let id = nodes[current_idx + id_offset]; + if id == node_id { + target_index = Some(current_idx); + break; + } + current_idx += node_size; + } + + let target_node_index = match target_index { + Some(idx) => idx, + None => bail!("Node with ID {} not found in snapshot file", node_id), + }; + + if target_node_index + node_size > nodes.len() { + bail!("Corrupted snapshot structure: target node index out of flat bounds"); + } + + let name_str_idx = usize::try_from(nodes[target_node_index + name_offset]) + .ok() + .context("Corrupt snapshot: string index overflow on 32-bit architecture")?; + let name = val.strings.get(name_str_idx).cloned() + .ok_or_else(|| anyhow!("Corrupt snapshot: string index {} out of bounds (strings len {})", name_str_idx, val.strings.len()))?; + let self_size = nodes[target_node_index + self_size_offset]; + + Ok((name, self_size)) +} + +/// Format single node inspection details for display. +pub fn format_node_details( + node_id: u64, + name: &str, + self_size: u64, + format: crate::format::OutputFormat, +) -> Result { + if format.is_text() { + let mut out = String::new(); + out.push_str("nodeId,nodeName,selfSize\n"); + let escaped_name = if name.contains(',') || name.contains('"') || name.contains('\n') || name.contains('\r') { + format!("\"{}\"", name.replace('"', "\"\"")) + } else { + name.to_string() + }; + out.push_str(&format!( + "{},{},{}\n", + node_id, escaped_name, self_size + )); + Ok(out) + } else { + let details = json!({ + "nodeId": node_id, + "nodeName": name, + "selfSize": self_size, + }); + Ok(crate::format::format_structured(&details, format)?) + } +} + +/// Offline variant that doesn't require a Chrome connection. Used by the CLI's +/// early-intercept path so `inspect-heapsnapshot-node` works without a running +/// browser or daemon. +pub async fn inspect_heapsnapshot_node_offline( + file_path: &str, + node_id: u64, + format: crate::format::OutputFormat, +) -> Result { + let file_path_owned = file_path.to_string(); + let (name, self_size) = tokio::task::spawn_blocking(move || { + parse_node_from_snapshot(&file_path_owned, node_id) + }) + .await + .map_err(|e| anyhow!("Failed to execute blocking snapshot parser: {e}"))??; + + let out = format_node_details(node_id, &name, self_size, format)?; + Ok(CommandResult::output(out)) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Write; + use tempfile::NamedTempFile; + + #[test] + fn test_parse_node_from_snapshot() { + let mut file = NamedTempFile::new().unwrap(); + let test_snapshot = json!({ + "snapshot": { + "meta": { + "node_fields": ["id", "name", "self_size", "edge_count"], + "node_types": ["number", "string", "number", "number"] + } + }, + "nodes": [123, 0, 1024, 0, 456, 1, 2048, 0], + "strings": ["TestObject", "AnotherObject"] + }); + write!(file, "{}", test_snapshot.to_string()).unwrap(); + + let (name, size) = parse_node_from_snapshot(file.path().to_str().unwrap(), 456).unwrap(); + assert_eq!(name, "AnotherObject"); + assert_eq!(size, 2048); + } + + #[test] + fn test_find_node_in_snapshot_directly() { + // Exercise the pure helper without going through file I/O. + let snapshot = HeapSnapshot { + snapshot: SnapshotMeta { + meta: MetaDetails { + node_fields: vec!["id".into(), "name".into(), "self_size".into()], + }, + }, + nodes: vec![10, 0, 100, 20, 1, 200], + strings: vec!["Alpha".into(), "Beta".into()], + }; + + let (name, size) = find_node_in_snapshot(&snapshot, 20).unwrap(); + assert_eq!(name, "Beta"); + assert_eq!(size, 200); + } + + #[test] + fn test_find_node_not_found() { + let snapshot = HeapSnapshot { + snapshot: SnapshotMeta { + meta: MetaDetails { + node_fields: vec!["id".into(), "name".into(), "self_size".into()], + }, + }, + nodes: vec![10, 0, 100], + strings: vec!["Alpha".into()], + }; + + assert!(find_node_in_snapshot(&snapshot, 999).is_err()); + } + + #[test] + fn test_find_node_corrupt_string_index() { + // string index 5 is out of bounds (only 1 string exists) + let snapshot = HeapSnapshot { + snapshot: SnapshotMeta { + meta: MetaDetails { + node_fields: vec!["id".into(), "name".into(), "self_size".into()], + }, + }, + nodes: vec![10, 5, 100], + strings: vec!["Alpha".into()], + }; + + let err = find_node_in_snapshot(&snapshot, 10).unwrap_err(); + assert!(err.to_string().contains("out of bounds")); + } + + #[test] + fn test_format_node_details_csv_escaping() { + use crate::format::OutputFormat; + + // Regular name + let out_normal = format_node_details(123, "MyClass", 100, OutputFormat::Text).unwrap(); + assert_eq!(out_normal, "nodeId,nodeName,selfSize\n123,MyClass,100\n"); + + // Name with comma + let out_comma = format_node_details(123, "My,Class", 100, OutputFormat::Text).unwrap(); + assert_eq!(out_comma, "nodeId,nodeName,selfSize\n123,\"My,Class\",100\n"); + + // Name with quotes + let out_quotes = format_node_details(123, "My\"Class", 100, OutputFormat::Text).unwrap(); + assert_eq!(out_quotes, "nodeId,nodeName,selfSize\n123,\"My\"\"Class\",100\n"); + + // Name with newline + let out_nl = format_node_details(123, "My\nClass", 100, OutputFormat::Text).unwrap(); + assert_eq!(out_nl, "nodeId,nodeName,selfSize\n123,\"My\nClass\",100\n"); + } + + #[test] + fn test_format_node_details_structured() { + use crate::format::OutputFormat; + + // JSON format + let out_json = format_node_details(456, "ClassA", 200, OutputFormat::Json).unwrap(); + let parsed: serde_json::Value = serde_json::from_str(&out_json).unwrap(); + assert_eq!(parsed["nodeId"], 456); + assert_eq!(parsed["nodeName"], "ClassA"); + assert_eq!(parsed["selfSize"], 200); + + // TOON format + let out_toon = format_node_details(456, "ClassA", 200, OutputFormat::Toon).unwrap(); + assert!(out_toon.contains("nodeId")); + assert!(out_toon.contains("ClassA")); + } +} diff --git a/src/commands/mod.rs b/src/commands/mod.rs index b183737..f2b4062 100644 --- a/src/commands/mod.rs +++ b/src/commands/mod.rs @@ -3,6 +3,7 @@ pub mod emulation; pub mod evaluate; pub mod executor; pub mod input; +pub mod memory; pub mod navigate; pub mod network; pub mod pages; diff --git a/src/commands/screenshot.rs b/src/commands/screenshot.rs index 2e8a1fc..a95f19f 100644 --- a/src/commands/screenshot.rs +++ b/src/commands/screenshot.rs @@ -1,4 +1,4 @@ -use anyhow::Result; +use anyhow::{Context, Result}; use base64::Engine; use serde_json::json; @@ -6,45 +6,112 @@ use crate::cdp::CdpClient; use crate::result::CommandResult; /// Capture a screenshot of the current page. +pub struct ScreenshotOptions { + pub output: Option, + pub format: String, + pub full_page: bool, + pub quality: Option, + pub max_width: Option, + pub max_height: Option, +} + pub async fn take_screenshot( client: &mut CdpClient, session_id: &str, - output: Option<&str>, - format: &str, - full_page: bool, + opts: ScreenshotOptions, ) -> Result { + let ScreenshotOptions { + output, + format, + full_page, + quality, + max_width, + max_height, + } = opts; + // Normalize so case-insensitive input (e.g. "PNG") is handled correctly: + // CDP expects lowercase format values, and the quality check below relies on it. + let format = format.to_ascii_lowercase(); let mut params = json!({ "format": format, - "optimizeForSpeed": true, }); - if full_page { - params["captureBeyondViewport"] = json!(true); + + // optimizeForSpeed trades compression/quality for speed, which would override + // an explicit --quality setting; only enable it when quality isn't requested. + if quality.is_none() { + params["optimizeForSpeed"] = json!(true); + } + + if let Some(q) = quality { + if format != "png" { + params["quality"] = json!(q.min(100)); + } + } + + // src_w/src_h are only needed when a clip will be emitted + // (full-page capture, or downscaling via max_width/max_height). + let mut src_w = 1920.0; + let mut src_h = 1080.0; + // Scroll offsets of the layout viewport. Clip x/y are relative to the + // document origin, so a non-full-page capture of a scrolled viewport must + // use these to frame the visible region. + let mut scroll_x = 0.0; + let mut scroll_y = 0.0; + let needs_metrics = full_page || max_width.is_some() || max_height.is_some(); + + if needs_metrics { + if full_page { + params["captureBeyondViewport"] = json!(true); + } + + // Use Page.getLayoutMetrics instead of Runtime.evaluate: it queries the + // renderer's layout system directly, works on non-HTML pages (PDF viewers, + // chrome://), and avoids a JS execution round-trip. let metrics = client - .send_to_target( - session_id, - "Runtime.evaluate", - json!({ - "expression": "JSON.stringify({width: document.documentElement.scrollWidth, height: document.documentElement.scrollHeight})", - "returnByValue": true, - }), - ) - .await?; - if let Some(val) = metrics["result"]["value"].as_str() { - if let Ok(dims) = serde_json::from_str::(val) { - let w = dims["width"].as_f64().unwrap_or(1920.0); - let h = dims["height"].as_f64().unwrap_or(1080.0); - params["clip"] = json!({ - "x": 0, "y": 0, - "width": w, "height": h, - "scale": 1, - }); + .send_to_target(session_id, "Page.getLayoutMetrics", json!({})) + .await + .context("Failed to query page layout metrics")?; + + if full_page { + // cssContentSize is the full scrollable content area in CSS pixels. + // (The legacy `contentSize` returns DIPs, which are wrong on HiDPI + // or emulated devices.) + if let Some(size) = metrics.get("cssContentSize") { + // Filter non-positive values (empty/unrendered pages, certain + // document types) — they'd produce an invalid CDP clip. + src_w = size["width"].as_f64().filter(|&v| v > 0.0).unwrap_or(1920.0); + src_h = size["height"].as_f64().filter(|&v| v > 0.0).unwrap_or(1080.0); + } + } else { + // cssLayoutViewport.clientWidth/Height is the visible viewport in + // CSS pixels; pageX/pageY are its document-origin scroll offsets. + if let Some(viewport) = metrics.get("cssLayoutViewport") { + src_w = viewport["clientWidth"].as_f64().filter(|&v| v > 0.0).unwrap_or(1920.0); + src_h = viewport["clientHeight"].as_f64().filter(|&v| v > 0.0).unwrap_or(1080.0); + scroll_x = viewport["pageX"].as_f64().unwrap_or(0.0); + scroll_y = viewport["pageY"].as_f64().unwrap_or(0.0); } } } + let clip_scale = clip_scale_factor(src_w, src_h, max_width, max_height); + + // Clip coordinates are relative to the document origin. For full-page + // captures the region starts at the document origin (scroll is irrelevant + // since the whole content is captured). For viewport captures with + // downscaling, the layout viewport's scroll offsets (pageX/pageY) must be + // used so the visible region — not the document's top-left — is framed. + if full_page || clip_scale < 1.0 { + params["clip"] = json!({ + "x": scroll_x, "y": scroll_y, + "width": src_w, "height": src_h, + "scale": clip_scale, + }); + } + let result = client .send_to_target(session_id, "Page.captureScreenshot", params) - .await?; + .await + .context("Failed to capture screenshot via CDP")?; let data_b64 = result["data"] .as_str() @@ -54,7 +121,9 @@ pub async fn take_screenshot( match output { Some(path) => { - tokio::fs::write(path, &bytes).await?; + tokio::fs::write(&path, &bytes) + .await + .with_context(|| format!("Failed to write screenshot to {}", path))?; Ok(CommandResult::output(format!( "Screenshot saved to {path} ({} bytes)", bytes.len() @@ -63,3 +132,69 @@ pub async fn take_screenshot( None => Ok(CommandResult::output(data_b64.to_string())), } } + +/// Compute the downscale factor for a screenshot clip. +/// +/// Returns the smaller of the width and height scale ratios, clamped to <= 1.0 +/// (never upscales). A `None` dimension, or a non-positive max/src value, yields +/// 1.0 for that axis (no scaling). Returns 1.0 when neither dimension is set. +fn clip_scale_factor(src_w: f64, src_h: f64, max_width: Option, max_height: Option) -> f64 { + let width_scale = match max_width { + Some(max_w) if max_w > 0.0 && src_w > 0.0 => (max_w / src_w).min(1.0), + _ => 1.0, + }; + let height_scale = match max_height { + Some(max_h) if max_h > 0.0 && src_h > 0.0 => (max_h / src_h).min(1.0), + _ => 1.0, + }; + width_scale.min(height_scale) +} + +#[cfg(test)] +mod tests { + use super::clip_scale_factor; + + #[test] + fn no_max_dimensions_returns_one() { + assert_eq!(clip_scale_factor(1920.0, 1080.0, None, None), 1.0); + } + + #[test] + fn zero_max_is_treated_as_no_scaling() { + assert_eq!(clip_scale_factor(1920.0, 1080.0, Some(0.0), Some(0.0)), 1.0); + } + + #[test] + fn negative_max_is_treated_as_no_scaling() { + assert_eq!(clip_scale_factor(1920.0, 1080.0, Some(-100.0), Some(-50.0)), 1.0); + } + + #[test] + fn zero_source_is_treated_as_no_scaling() { + assert_eq!(clip_scale_factor(0.0, 0.0, Some(100.0), Some(100.0)), 1.0); + } + + #[test] + fn one_sided_width_downscales_only_width() { + // src 1920x1080, max_width 960 → width_scale 0.5, height_scale 1.0 → 0.5 + assert_eq!(clip_scale_factor(1920.0, 1080.0, Some(960.0), None), 0.5); + } + + #[test] + fn one_sided_height_downscales_only_height() { + // src 1920x1080, max_height 540 → height_scale 0.5, width_scale 1.0 → 0.5 + assert_eq!(clip_scale_factor(1920.0, 1080.0, None, Some(540.0)), 0.5); + } + + #[test] + fn both_dimensions_uses_the_smaller_ratio() { + // src 2000x1000, max 1000x250 → width_scale 0.5, height_scale 0.25 → 0.25 + assert_eq!(clip_scale_factor(2000.0, 1000.0, Some(1000.0), Some(250.0)), 0.25); + } + + #[test] + fn never_upscales_when_max_exceeds_source() { + // src 800x600, max 1600x1200 → both ratios > 1.0, clamped to 1.0 + assert_eq!(clip_scale_factor(800.0, 600.0, Some(1600.0), Some(1200.0)), 1.0); + } +} diff --git a/src/lib.rs b/src/lib.rs index 1b0d51a..a41c692 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -157,6 +157,15 @@ pub enum Commands { /// Capture full scrollable page #[arg(long)] full_page: bool, + /// Compression quality (0-100) for jpeg and webp formats + #[arg(long)] + quality: Option, + /// Max width in pixels to downscale the captured screenshot + #[arg(long)] + max_width: Option, + /// Max height in pixels to downscale the captured screenshot + #[arg(long)] + max_height: Option, }, /// Evaluate a JavaScript expression @@ -212,6 +221,25 @@ pub enum Commands { output: Option, }, + /// Take a heap snapshot of the page and save it to a file + #[command(name = "take-heapsnapshot")] + TakeHeapSnapshot { + /// Output file path to save the heap snapshot (e.g. heap.heapsnapshot) + #[arg(long, short)] + output: String, + }, + + /// Inspect a specific node ID details from a local heap snapshot + #[command(name = "inspect-heapsnapshot-node")] + InspectHeapSnapshotNode { + /// Path to the .heapsnapshot file to analyze + #[arg(long, short)] + file_path: String, + /// Node ID to inspect + #[arg(long, short)] + node_id: u64, + }, + /// Manage page emulation (viewport, geolocation, etc.) Emulate { /// Set viewport size as WxH (e.g. 1280x720) @@ -336,6 +364,8 @@ impl Cli { Commands::Hover { .. } => "hover", Commands::Snapshot { .. } => "snapshot", Commands::ReadPage { .. } => "read-page", + Commands::TakeHeapSnapshot { .. } => "take-heapsnapshot", + Commands::InspectHeapSnapshotNode { .. } => "inspect-heapsnapshot-node", Commands::Emulate { .. } => "emulate", Commands::WaitFor { .. } => "wait-for", Commands::List3pTools => "list-3p-tools", @@ -349,7 +379,33 @@ impl Cli { } /// Build a DaemonRequest from parsed CLI args. -fn build_request(cli: &Cli) -> DaemonRequest { +/// Resolve a relative path to an absolute one using the CLI process's CWD. +/// The daemon retains its own startup CWD, so relative paths sent to it would +/// resolve against the wrong directory. +fn absolutize_path(path: &str) -> Result { + let p = std::path::Path::new(path); + if p.is_absolute() { + Ok(path.to_string()) + } else { + // Fail loudly if the CWD can't be resolved: silently falling back to an + // empty path would send a bogus relative path to the daemon, which would + // resolve it against the daemon's (different) startup CWD. + let cwd = std::env::current_dir() + .map_err(|e| anyhow::anyhow!("Failed to resolve CLI working directory to absolutize path '{path}': {e}"))?; + Ok(cwd.join(p).to_string_lossy().to_string()) + } +} + +fn build_request(cli: &Cli) -> Result { + // Resolve relative file paths to absolute so the daemon (which retains its + // own startup CWD) resolves them correctly. + let absolutize = |p: &Option| -> Result> { + match p { + None => Ok(None), + Some(s) => Ok(Some(absolutize_path(s)?)), + } + }; + let (command, args) = match &cli.command { Commands::ListPages => ("list-pages", json!({})), Commands::Navigate { @@ -379,7 +435,7 @@ fn build_request(cli: &Cli) -> DaemonRequest { "geolocation": geolocation, "accuracy": accuracy, "clear_all": clear_all, - "output": output + "output": absolutize(output)? }), ), Commands::NewPage { @@ -408,13 +464,27 @@ fn build_request(cli: &Cli) -> DaemonRequest { Commands::SelectPage { id_or_index } => { ("select-page", json!({ "id_or_index": id_or_index })) } + Commands::TakeHeapSnapshot { output } => ( + "take-heapsnapshot", + json!({ "output": absolutize_path(output)? }), + ), Commands::Screenshot { output, format, full_page, + quality, + max_width, + max_height, } => ( "screenshot", - json!({"output": output, "format": format, "full_page": full_page}), + json!({ + "output": absolutize(output)?, + "format": format, + "full_page": full_page, + "quality": quality, + "max_width": max_width, + "max_height": max_height + }), ), Commands::Evaluate { expression, @@ -423,7 +493,7 @@ fn build_request(cli: &Cli) -> DaemonRequest { track_navigation, } => ( "evaluate", - json!({"expression": expression, "dialog_action": dialog_action, "output": output, "track_navigation": track_navigation}), + json!({"expression": expression, "dialog_action": dialog_action, "output": absolutize(output)?, "track_navigation": track_navigation}), ), Commands::Click { selector } => ("click", json!({"selector": selector})), Commands::ClickAt { x, y } => ("click-at", json!({"x": x, "y": y})), @@ -435,8 +505,8 @@ fn build_request(cli: &Cli) -> DaemonRequest { } Commands::PressKey { key } => ("press-key", json!({"key": key})), Commands::Hover { selector } => ("hover", json!({"selector": selector})), - Commands::Snapshot { output } => ("snapshot", json!({"output": output})), - Commands::ReadPage { output } => ("read-page", json!({"output": output})), + Commands::Snapshot { output } => ("snapshot", json!({"output": absolutize(output)?})), + Commands::ReadPage { output } => ("read-page", json!({"output": absolutize(output)?})), Commands::Emulate { viewport, device_scale_factor, @@ -472,9 +542,12 @@ fn build_request(cli: &Cli) -> DaemonRequest { json!({"duration": duration, "extension_id": extension_id}), ), Commands::KillDaemon => unreachable!("KillDaemon is handled before build_request"), + Commands::InspectHeapSnapshotNode { .. } => { + unreachable!("InspectHeapSnapshotNode is handled before build_request") + } }; - DaemonRequest { + Ok(DaemonRequest { command: command.to_string(), args, page: cli.page, @@ -483,7 +556,7 @@ fn build_request(cli: &Cli) -> DaemonRequest { output_format: Some(cli.output_format()), block_url: cli.block_url.clone(), unblock_url: cli.unblock_url.clone(), - } + }) } fn print_output(output: &str, navigated_to: Option<&str>, target_id: Option<&str>) { @@ -638,13 +711,26 @@ pub async fn run() -> Result<()> { return Ok(()); } + // Handle inspect-heapsnapshot-node without connecting to Chrome — it's a + // purely offline operation that parses a local .heapsnapshot file. + if let Commands::InspectHeapSnapshotNode { file_path, node_id } = &cli.command { + let result = commands::memory::inspect_heapsnapshot_node_offline( + file_path, + *node_id, + cli.output_format(), + ) + .await?; + print_result(&result); + return Ok(()); + } + let ws_url = browser::resolve_ws_url( cli.ws_endpoint.as_deref(), cli.user_data_dir.as_deref(), &cli.channel, )?; - let request = build_request(&cli); + let request = build_request(&cli)?; // Try daemon first if let Ok(resp) = client::send_to_daemon(&request).await { @@ -867,13 +953,30 @@ async fn run_direct(cli: &Cli, ws_url: &str) -> Result { output, format, full_page, + quality, + max_width, + max_height, } => { commands::screenshot::take_screenshot( &mut client, &session_id, - output.as_deref(), - format, - *full_page, + commands::screenshot::ScreenshotOptions { + output: output.clone(), + format: format.clone(), + full_page: *full_page, + quality: *quality, + max_width: *max_width, + max_height: *max_height, + }, + ) + .await + } + Commands::TakeHeapSnapshot { output } => { + commands::memory::take_heapsnapshot( + &mut client, + &session_id, + output, + cli.output_format(), ) .await } diff --git a/tests/telemetry_tests.rs b/tests/telemetry_tests.rs index 5c39f3f..af53385 100644 --- a/tests/telemetry_tests.rs +++ b/tests/telemetry_tests.rs @@ -2,7 +2,9 @@ #[test] fn test_telemetry_module_accessible() { use chrome_devtools_cli::telemetry; - assert!(true); + // Reference the module so the import isn't flagged unused; this test exists + // to assert the module path compiles (i.e. is publicly exported). + let _ = telemetry::init_logger_once; } /// Test that TelemetryLogger writes a valid JSON entry and cleans up.