diff --git a/hive-c0re/assets/app.js b/hive-c0re/assets/app.js index 3364cee..2c16c82 100644 --- a/hive-c0re/assets/app.js +++ b/hive-c0re/assets/app.js @@ -48,18 +48,75 @@ // perspective (we'd need to know which agent the message is about // to translate it). Prefer `/agents//state/...` in agent // outputs and the link will resolve. - // Each branch insists the final segment looks like a filename: - // at least one non-dot char, a literal dot, then an extension - // (`[\w-]+\.[\w.-]+`). That catches the common case (`notes.md`, - // `2026-01.log`, `foo.bar.baz`) while skipping bare directory - // names like `/agents/foo/state/notes` whether or not they carry - // a trailing slash. Misses extensionless files (`README`, - // `Makefile`) — accepted trade-off; the /api/state-file endpoint - // still serves them if the operator types the path manually. - // The endpoint also refuses non-files at the server level; this - // is the front-end peer so the operator doesn't see a dead link - // they'll just get an error from on click. - const PATH_RE = /(\/var\/lib\/hyperhive\/agents\/[\w.-]+\/state\/(?:[\w.-]+\/)*[\w-]+\.[\w.-]+|\/var\/lib\/hyperhive\/shared\/(?:[\w.-]+\/)*[\w-]+\.[\w.-]+|\/agents\/[\w.-]+\/state\/(?:[\w.-]+\/)*[\w-]+\.[\w.-]+|\/shared\/(?:[\w.-]+\/)*[\w-]+\.[\w.-]+)/g; + // Match anything that *looks* like a path under the allow-listed + // roots; the server endpoint `/api/state-file/check` is the + // authority on whether each match is actually a file. Optimistic + // anchors render first; a batched validation request downgrades + // non-files (dirs, missing, forbidden subtrees) back to plain + // text. No client-side filename heuristics — the regex's job is + // just "spot a path-shaped token". + const PATH_RE = /(\/var\/lib\/hyperhive\/agents\/[\w.-]+\/state\/[\w./-]+|\/var\/lib\/hyperhive\/shared\/[\w./-]+|\/agents\/[\w.-]+\/state\/[\w./-]+|\/shared\/[\w./-]+)/g; + + // Session-scoped truthiness cache for paths the server has + // already verified. `true` = render as a clickable anchor; + // `false` = strip the anchor on next reflow. Cleared only on + // page reload — agents creating new files mid-session show up + // next time the path is referenced. + const pathValidity = new Map(); + // Anchors awaiting validation. Keyed by path so we can rewrite + // every anchor for the same path in one shot when the result + // lands. Each entry: { anchor, details } so we can also drop + // the sibling preview when the path turns out to be invalid. + const pendingAnchors = new Map(); + let validateTimer = null; + function queuePathForValidation(path, anchor, details) { + if (!pendingAnchors.has(path)) pendingAnchors.set(path, []); + pendingAnchors.get(path).push({ anchor, details }); + if (validateTimer) clearTimeout(validateTimer); + // Coalesce bursts (a backfill replay can emit dozens of rows + // in one tick) into a single batched request. + validateTimer = setTimeout(flushPathValidation, 50); + } + async function flushPathValidation() { + validateTimer = null; + const paths = Array.from(pendingAnchors.keys()); + if (!paths.length) return; + // Snapshot the queue + clear it before we await — additional + // anchors that land while the request is in flight queue into + // a fresh batch. + const snapshot = new Map(pendingAnchors); + pendingAnchors.clear(); + let results = {}; + try { + const resp = await fetch('/api/state-file/check', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ paths }), + }); + if (resp.ok) results = (await resp.json()).results || {}; + } catch (err) { + console.warn('path validation batch failed', err); + // On transport failure leave anchors as-is — clicking them + // will surface the real error from /api/state-file inline. + return; + } + for (const [path, entries] of snapshot) { + const ok = !!results[path]; + pathValidity.set(path, ok); + if (ok) continue; + // Downgrade every pending anchor for this path back to + // plain text + drop its sibling
preview. + for (const { anchor, details } of entries) { + if (anchor.parentNode) { + anchor.parentNode.replaceChild(document.createTextNode(path), anchor); + } + if (details && details.parentNode) { + details.parentNode.removeChild(details); + } + } + } + } + async function fetchStateFile(path) { const resp = await fetch('/api/state-file?path=' + encodeURIComponent(path)); const text = await resp.text(); @@ -99,6 +156,9 @@ // Append `text` to `parent` as a mix of text nodes + path anchors. // Returns the array of generated `
` previews so the // caller can append them as block siblings under the row. + // Anchors render optimistically; paths unseen this session are + // queued for batch validation, and the server's verdict either + // confirms or strips them via `flushPathValidation`. function appendLinkified(parent, text) { const previews = []; if (text == null) return previews; @@ -110,9 +170,21 @@ if (m.index > lastIdx) { parent.appendChild(document.createTextNode(str.slice(lastIdx, m.index))); } - const { anchor, details } = makePathPreview(m[0]); - parent.appendChild(anchor); - previews.push(details); + const path = m[0]; + const cached = pathValidity.get(path); + if (cached === false) { + // Already known to be a non-file — render plain text, no + // anchor, no preview. The text still shows up so the + // operator sees the path; it's just not clickable. + parent.appendChild(document.createTextNode(path)); + } else { + const { anchor, details } = makePathPreview(path); + parent.appendChild(anchor); + previews.push(details); + // Unknown paths queue for validation; known-good ones + // skip the roundtrip entirely. + if (cached !== true) queuePathForValidation(path, anchor, details); + } lastIdx = m.index + m[0].length; } if (lastIdx < str.length) { diff --git a/hive-c0re/src/dashboard.rs b/hive-c0re/src/dashboard.rs index d854691..73618cf 100644 --- a/hive-c0re/src/dashboard.rs +++ b/hive-c0re/src/dashboard.rs @@ -55,6 +55,7 @@ pub async fn serve(port: u16, coord: Arc) -> Result<()> { .route("/purge-tombstone/{name}", post(post_purge_tombstone)) .route("/api/journal/{name}", get(get_journal)) .route("/api/state-file", get(get_state_file)) + .route("/api/state-file/check", post(post_state_file_check)) .route("/api/reminders", get(api_reminders)) .route("/cancel-reminder/{id}", post(post_cancel_reminder)) .route("/api/agent-config/{name}", get(get_agent_config)) @@ -911,15 +912,17 @@ struct StateFileQuery { /// traversal and symlink games can't escape the roots. Files larger /// than `MAX_BYTES` are truncated with a banner so a runaway log /// can't OOM the browser. -async fn get_state_file( - axum::extract::Query(q): axum::extract::Query, -) -> Response { - const MAX_BYTES: usize = 1 << 20; // 1 MiB +/// Resolve a caller-supplied path string to a canonical host path +/// that has been verified against the allow-list. Returns `Err` +/// with a human-readable reason for every failure mode (path +/// outside roots, canonicalize failure, escape via symlink, +/// per-agent subdir not `state`). Shared by `get_state_file` (read) +/// and `post_state_file_check` (existence probe) so both endpoints +/// apply identical security rules. +fn resolve_state_path(raw: &str) -> std::result::Result { const AGENTS_ROOT: &str = "/var/lib/hyperhive/agents"; const SHARED_ROOT: &str = "/var/lib/hyperhive/shared"; - let raw = q.path.trim(); - // Translate the container-view forms to host paths so the - // allow-list check has a single canonical shape to match. + let raw = raw.trim(); let mapped: std::path::PathBuf = if let Some(rest) = raw.strip_prefix("/agents/") { std::path::PathBuf::from(format!("{AGENTS_ROOT}/{rest}")) } else if let Some(rest) = raw.strip_prefix("/shared/") { @@ -927,38 +930,37 @@ async fn get_state_file( } else if raw.starts_with(AGENTS_ROOT) || raw.starts_with(SHARED_ROOT) { std::path::PathBuf::from(raw) } else { - return error_response(&format!("state-file: path not in allow-list: {raw}")); + return Err(format!("path not in allow-list: {raw}")); }; - // Canonicalise so `..` / symlinks resolve before the prefix - // check. A failure here means the path doesn't exist on disk - // (or we can't reach it) — surface the underlying error. - let canonical = match std::fs::canonicalize(&mapped) { - Ok(p) => p, - Err(e) => return error_response(&format!("state-file: {}: {e}", mapped.display())), - }; - let allowed = canonical.starts_with(AGENTS_ROOT) || canonical.starts_with(SHARED_ROOT); - if !allowed { - return error_response(&format!( - "state-file: resolved path escapes allow-list: {}", + let canonical = std::fs::canonicalize(&mapped).map_err(|e| format!("{}: {e}", mapped.display()))?; + if !(canonical.starts_with(AGENTS_ROOT) || canonical.starts_with(SHARED_ROOT)) { + return Err(format!( + "resolved path escapes allow-list: {}", canonical.display() )); } - // For per-agent paths, also require the second-from-root - // component to be `state` (not `claude` or `config`). Claude - // creds shouldn't leak through this endpoint; config is the - // applied repo (already exposed via /api/agent-config). Reading - // `/var/lib/hyperhive/agents//state/...` is the intended use. if let Ok(rel) = canonical.strip_prefix(AGENTS_ROOT) { let mut components = rel.components(); let _agent = components.next(); let dir = components.next().and_then(|c| c.as_os_str().to_str()); if dir != Some("state") { - return error_response(&format!( - "state-file: only per-agent state/ is readable here ({} dir not allowed)", + return Err(format!( + "only per-agent state/ is readable here ({} dir not allowed)", dir.unwrap_or("(root)") )); } } + Ok(canonical) +} + +async fn get_state_file( + axum::extract::Query(q): axum::extract::Query, +) -> Response { + const MAX_BYTES: usize = 1 << 20; // 1 MiB + let canonical = match resolve_state_path(&q.path) { + Ok(p) => p, + Err(e) => return error_response(&format!("state-file: {e}")), + }; let meta = match std::fs::metadata(&canonical) { Ok(m) => m, Err(e) => return error_response(&format!("state-file: stat {}: {e}", canonical.display())), @@ -985,6 +987,36 @@ async fn get_state_file( ([("content-type", "text/plain; charset=utf-8")], body).into_response() } +#[derive(Deserialize)] +struct StateFileCheckReq { + paths: Vec, +} + +/// Batch existence/file-ness probe behind the path-link autodetect. +/// The client collects regex-candidate paths from a message body, +/// fires one POST with the whole batch, and downgrades anchors +/// whose result is `false` back to plain text. Same security rules +/// as `get_state_file` (via `resolve_state_path`); a path is `true` +/// iff it resolves, lives in the allow-list, and is a regular file +/// (not a dir, symlink-to-dir, missing file, or forbidden subtree). +/// Capped per-request to keep a runaway message body from +/// triggering thousands of canonicalize calls in one request. +async fn post_state_file_check( + axum::Json(req): axum::Json, +) -> Response { + const MAX_PATHS: usize = 64; + let mut out: std::collections::HashMap = + std::collections::HashMap::with_capacity(req.paths.len().min(MAX_PATHS)); + for raw in req.paths.into_iter().take(MAX_PATHS) { + let is_file = match resolve_state_path(&raw) { + Ok(p) => std::fs::metadata(&p).is_ok_and(|m| m.is_file()), + Err(_) => false, + }; + out.insert(raw, is_file); + } + axum::Json(serde_json::json!({ "results": out })).into_response() +} + async fn api_reminders(State(state): State) -> Response { match state.coord.broker.list_pending_reminders() { Ok(rows) => axum::Json(rows).into_response(),