diff --git a/docs/web-ui.md b/docs/web-ui.md index 1304e66..dd933a9 100644 --- a/docs/web-ui.md +++ b/docs/web-ui.md @@ -208,12 +208,19 @@ not ours. and the host form. Canonicalises + verifies the path stays inside the allow-list, refuses anything but a regular file, refuses `/agents//claude` / `config` subtrees, truncates - bodies at 1 MiB. Backs the dashboard's inline path-link - preview (PATH_RE detects pointer strings in message bodies, - question/answer text, and the operator inbox; clicking - expands a `
` that lazy-fetches via this endpoint). - Trailing-slash matches (i.e. directory paths) are skipped on - the client side — only files linkify. + bodies at 1 MiB. Click-time backing for the inline path-link + preview. + + Detection of which tokens *are* path links is done + **server-side at broker-message ingest**, not client-side: + the broker forwarder calls `scan_validated_paths(body)` — + same allow-list helper the read endpoint uses — and attaches + the verified file tokens to the event as `file_refs: Vec`. + The client trusts that list and linkifies only those tokens, + so directories, missing files, and forbidden subtrees never + become anchors. No probe endpoint, no client-side regex + heuristics. Historical messages get the same treatment on + `/dashboard/history` backfill. - `GET /api/reminders` — list pending reminders for the dashboard's queued-reminders panel. - `POST /cancel-reminder/{id}` — hard-delete a pending reminder. diff --git a/hive-c0re/assets/app.js b/hive-c0re/assets/app.js index 2c16c82..bd4a3e3 100644 --- a/hive-c0re/assets/app.js +++ b/hive-c0re/assets/app.js @@ -48,75 +48,6 @@ // perspective (we'd need to know which agent the message is about // to translate it). Prefer `/agents//state/...` in agent // outputs and the link will resolve. - // Match anything that *looks* like a path under the allow-listed - // roots; the server endpoint `/api/state-file/check` is the - // authority on whether each match is actually a file. Optimistic - // anchors render first; a batched validation request downgrades - // non-files (dirs, missing, forbidden subtrees) back to plain - // text. No client-side filename heuristics — the regex's job is - // just "spot a path-shaped token". - const PATH_RE = /(\/var\/lib\/hyperhive\/agents\/[\w.-]+\/state\/[\w./-]+|\/var\/lib\/hyperhive\/shared\/[\w./-]+|\/agents\/[\w.-]+\/state\/[\w./-]+|\/shared\/[\w./-]+)/g; - - // Session-scoped truthiness cache for paths the server has - // already verified. `true` = render as a clickable anchor; - // `false` = strip the anchor on next reflow. Cleared only on - // page reload — agents creating new files mid-session show up - // next time the path is referenced. - const pathValidity = new Map(); - // Anchors awaiting validation. Keyed by path so we can rewrite - // every anchor for the same path in one shot when the result - // lands. Each entry: { anchor, details } so we can also drop - // the sibling preview when the path turns out to be invalid. - const pendingAnchors = new Map(); - let validateTimer = null; - function queuePathForValidation(path, anchor, details) { - if (!pendingAnchors.has(path)) pendingAnchors.set(path, []); - pendingAnchors.get(path).push({ anchor, details }); - if (validateTimer) clearTimeout(validateTimer); - // Coalesce bursts (a backfill replay can emit dozens of rows - // in one tick) into a single batched request. - validateTimer = setTimeout(flushPathValidation, 50); - } - async function flushPathValidation() { - validateTimer = null; - const paths = Array.from(pendingAnchors.keys()); - if (!paths.length) return; - // Snapshot the queue + clear it before we await — additional - // anchors that land while the request is in flight queue into - // a fresh batch. - const snapshot = new Map(pendingAnchors); - pendingAnchors.clear(); - let results = {}; - try { - const resp = await fetch('/api/state-file/check', { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ paths }), - }); - if (resp.ok) results = (await resp.json()).results || {}; - } catch (err) { - console.warn('path validation batch failed', err); - // On transport failure leave anchors as-is — clicking them - // will surface the real error from /api/state-file inline. - return; - } - for (const [path, entries] of snapshot) { - const ok = !!results[path]; - pathValidity.set(path, ok); - if (ok) continue; - // Downgrade every pending anchor for this path back to - // plain text + drop its sibling
preview. - for (const { anchor, details } of entries) { - if (anchor.parentNode) { - anchor.parentNode.replaceChild(document.createTextNode(path), anchor); - } - if (details && details.parentNode) { - details.parentNode.removeChild(details); - } - } - } - } - async function fetchStateFile(path) { const resp = await fetch('/api/state-file?path=' + encodeURIComponent(path)); const text = await resp.text(); @@ -153,42 +84,54 @@ }); return { anchor, details }; } - // Append `text` to `parent` as a mix of text nodes + path anchors. - // Returns the array of generated `
` previews so the - // caller can append them as block siblings under the row. - // Anchors render optimistically; paths unseen this session are - // queued for batch validation, and the server's verdict either - // confirms or strips them via `flushPathValidation`. - function appendLinkified(parent, text) { + // Append `text` to `parent` as a mix of text nodes + path + // anchors. `refs` is the server-attached `file_refs` array + // (verified-file tokens that appear in `text`); each occurrence + // of a ref in `text` becomes a clickable anchor + a sibling + //
preview that lazy-fetches from /api/state-file. + // Anything not in `refs` stays plain text. No client-side + // regex, no probe endpoint — the server saw the body first + // and made the call. When `refs` is empty/missing we just + // emit plain text. + function appendLinkified(parent, text, refs) { const previews = []; if (text == null) return previews; const str = String(text); - let lastIdx = 0; - PATH_RE.lastIndex = 0; - let m; - while ((m = PATH_RE.exec(str)) !== null) { - if (m.index > lastIdx) { - parent.appendChild(document.createTextNode(str.slice(lastIdx, m.index))); - } - const path = m[0]; - const cached = pathValidity.get(path); - if (cached === false) { - // Already known to be a non-file — render plain text, no - // anchor, no preview. The text still shows up so the - // operator sees the path; it's just not clickable. - parent.appendChild(document.createTextNode(path)); - } else { - const { anchor, details } = makePathPreview(path); - parent.appendChild(anchor); - previews.push(details); - // Unknown paths queue for validation; known-good ones - // skip the roundtrip entirely. - if (cached !== true) queuePathForValidation(path, anchor, details); - } - lastIdx = m.index + m[0].length; + const tokens = (refs || []).slice(); + if (!tokens.length) { + if (str) parent.appendChild(document.createTextNode(str)); + return previews; } - if (lastIdx < str.length) { - parent.appendChild(document.createTextNode(str.slice(lastIdx))); + // Walk the string left-to-right, at each step looking for the + // next occurrence of any token. Longest-first tie-break so a + // ref like `/agents/foo/state/x.md` wins over a (hypothetical) + // shorter token that prefixes it. O(text * refs) worst case; + // refs is bounded server-side to whatever fits in a body, so + // this stays cheap. + tokens.sort((a, b) => b.length - a.length); + let i = 0; + while (i < str.length) { + let bestStart = -1; + let bestToken = null; + for (const t of tokens) { + const idx = str.indexOf(t, i); + if (idx === -1) continue; + if (bestStart === -1 || idx < bestStart || (idx === bestStart && t.length > bestToken.length)) { + bestStart = idx; + bestToken = t; + } + } + if (bestStart === -1) { + parent.appendChild(document.createTextNode(str.slice(i))); + break; + } + if (bestStart > i) { + parent.appendChild(document.createTextNode(str.slice(i, bestStart))); + } + const { anchor, details } = makePathPreview(bestToken); + parent.appendChild(anchor); + previews.push(details); + i = bestStart + bestToken.length; } return previews; } @@ -1000,7 +943,12 @@ const operatorInbox = []; function inboxAppendFromEvent(ev) { if (ev.kind !== 'sent' || ev.to !== 'operator') return false; - operatorInbox.unshift({ from: ev.from, body: ev.body, at: ev.at }); + operatorInbox.unshift({ + from: ev.from, + body: ev.body, + at: ev.at, + file_refs: ev.file_refs || [], + }); if (operatorInbox.length > INBOX_LIMIT) operatorInbox.length = INBOX_LIMIT; return true; } @@ -1017,7 +965,7 @@ for (const m of operatorInbox) { const li = el('li'); const body = el('span', { class: 'msg-body' }); - const previews = appendLinkified(body, m.body); + const previews = appendLinkified(body, m.body, m.file_refs); li.append( el('span', { class: 'msg-ts' }, fmt(m.at)), ' ', el('span', { class: 'msg-from' }, m.from), ' ', @@ -1543,7 +1491,7 @@ to.className = 'msg-to'; to.textContent = ev.to; const body = document.createElement('span'); body.className = 'msg-body'; - const previews = appendLinkified(body, ev.body); + const previews = appendLinkified(body, ev.body, ev.file_refs); row.append(ts, ' ', arrow, ' ', from, ' ', sep, ' ', to, ' ', body); for (const d of previews) row.appendChild(d); } diff --git a/hive-c0re/src/dashboard.rs b/hive-c0re/src/dashboard.rs index 73618cf..c23acb7 100644 --- a/hive-c0re/src/dashboard.rs +++ b/hive-c0re/src/dashboard.rs @@ -55,7 +55,6 @@ pub async fn serve(port: u16, coord: Arc) -> Result<()> { .route("/purge-tombstone/{name}", post(post_purge_tombstone)) .route("/api/journal/{name}", get(get_journal)) .route("/api/state-file", get(get_state_file)) - .route("/api/state-file/check", post(post_state_file_check)) .route("/api/reminders", get(api_reminders)) .route("/cancel-reminder/{id}", post(post_cancel_reminder)) .route("/api/agent-config/{name}", get(get_agent_config)) @@ -635,21 +634,25 @@ async fn dashboard_history(State(state): State) -> Response { .into_iter() .map(|m| match m { crate::broker::MessageEvent::Sent { from, to, body, at } => { + let file_refs = scan_validated_paths(&body); crate::dashboard_events::DashboardEvent::Sent { seq: 0, from, to, body, at, + file_refs, } } crate::broker::MessageEvent::Delivered { from, to, body, at } => { + let file_refs = scan_validated_paths(&body); crate::dashboard_events::DashboardEvent::Delivered { seq: 0, from, to, body, at, + file_refs, } } }) @@ -953,6 +956,53 @@ fn resolve_state_path(raw: &str) -> std::result::Result Vec { + const PREFIXES: [&str; 4] = [ + "/agents/", + "/shared/", + "/var/lib/hyperhive/agents/", + "/var/lib/hyperhive/shared/", + ]; + let mut out = Vec::::new(); + for raw in body.split(|c: char| c.is_whitespace()) { + // Trim trailing natural-language punctuation that wouldn't + // be part of any real path. Inline rather than via a regex + // dep — the set is small and the call is hot. + let token = raw.trim_end_matches(|c: char| matches!(c, ',' | ';' | ':' | ')' | ']' | '}' | '.' | '\'' | '"')); + if token.is_empty() { + continue; + } + if !PREFIXES.iter().any(|p| token.starts_with(p)) { + continue; + } + // Cheap dedupe — typical message has 0-3 refs. + if out.iter().any(|s| s == token) { + continue; + } + if let Ok(canonical) = resolve_state_path(token) { + if std::fs::metadata(&canonical).is_ok_and(|m| m.is_file()) { + out.push(token.to_owned()); + } + } + } + out +} + async fn get_state_file( axum::extract::Query(q): axum::extract::Query, ) -> Response { @@ -987,36 +1037,6 @@ async fn get_state_file( ([("content-type", "text/plain; charset=utf-8")], body).into_response() } -#[derive(Deserialize)] -struct StateFileCheckReq { - paths: Vec, -} - -/// Batch existence/file-ness probe behind the path-link autodetect. -/// The client collects regex-candidate paths from a message body, -/// fires one POST with the whole batch, and downgrades anchors -/// whose result is `false` back to plain text. Same security rules -/// as `get_state_file` (via `resolve_state_path`); a path is `true` -/// iff it resolves, lives in the allow-list, and is a regular file -/// (not a dir, symlink-to-dir, missing file, or forbidden subtree). -/// Capped per-request to keep a runaway message body from -/// triggering thousands of canonicalize calls in one request. -async fn post_state_file_check( - axum::Json(req): axum::Json, -) -> Response { - const MAX_PATHS: usize = 64; - let mut out: std::collections::HashMap = - std::collections::HashMap::with_capacity(req.paths.len().min(MAX_PATHS)); - for raw in req.paths.into_iter().take(MAX_PATHS) { - let is_file = match resolve_state_path(&raw) { - Ok(p) => std::fs::metadata(&p).is_ok_and(|m| m.is_file()), - Err(_) => false, - }; - out.insert(raw, is_file); - } - axum::Json(serde_json::json!({ "results": out })).into_response() -} - async fn api_reminders(State(state): State) -> Response { match state.coord.broker.list_pending_reminders() { Ok(rows) => axum::Json(rows).into_response(), diff --git a/hive-c0re/src/dashboard_events.rs b/hive-c0re/src/dashboard_events.rs index e17a23e..4e531c2 100644 --- a/hive-c0re/src/dashboard_events.rs +++ b/hive-c0re/src/dashboard_events.rs @@ -31,20 +31,31 @@ use crate::container_view::ContainerView; #[serde(rename_all = "snake_case", tag = "kind")] pub enum DashboardEvent { /// Broker `Sent` event mirrored onto the dashboard channel. + /// `file_refs` carries every path-shaped token in `body` that + /// hive-c0re verified is a regular file under the allow-listed + /// roots (per-agent `state/` + `shared/`). The forwarder + /// pre-validates so the dashboard doesn't need a probe + /// endpoint — the client renders anchors only for tokens that + /// appear in this list, everything else stays plain text. Sent { seq: u64, from: String, to: String, body: String, at: i64, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + file_refs: Vec, }, /// Broker `Delivered` event mirrored onto the dashboard channel. + /// `file_refs` is the same shape as `Sent`. Delivered { seq: u64, from: String, to: String, body: String, at: i64, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + file_refs: Vec, }, /// A new approval landed in the pending queue. Payload carries /// enough to render the dashboard row without a `/api/state` diff --git a/hive-c0re/src/main.rs b/hive-c0re/src/main.rs index c543269..052d647 100644 --- a/hive-c0re/src/main.rs +++ b/hive-c0re/src/main.rs @@ -226,21 +226,25 @@ fn spawn_broker_to_dashboard_forwarder(coord: Arc) { loop { match rx.recv().await { Ok(MessageEvent::Sent { from, to, body, at }) => { + let file_refs = dashboard::scan_validated_paths(&body); coord.emit_dashboard_event(DashboardEvent::Sent { seq: coord.next_seq(), from, to, body, at, + file_refs, }); } Ok(MessageEvent::Delivered { from, to, body, at }) => { + let file_refs = dashboard::scan_validated_paths(&body); coord.emit_dashboard_event(DashboardEvent::Delivered { seq: coord.next_seq(), from, to, body, at, + file_refs, }); } Err(tokio::sync::broadcast::error::RecvError::Lagged(n)) => {