path linkify: server attaches file_refs at message ingest

drop the /api/state-file/check probe endpoint (which let any
dashboard visitor enumerate filesystem layout by feeding paths)
and the client's optimistic-then-downgrade dance. instead, the
broker forwarder calls scan_validated_paths(body) — same
allow-list helper as the read endpoint — and attaches the
verified file tokens to DashboardEvent::Sent/Delivered as
file_refs: Vec<String>. /dashboard/history backfill does the
same per-row.

client appendLinkified takes a (text, refs) pair, walks
left-to-right linkifying every occurrence of any ref token,
longest-first tie-break. no regex, no probe, no cache, no
queue. when refs is empty/absent the body emits as plain text
(question/answer/reminder rendering — refs for those are a
follow-up).

operator inbox stores file_refs from the sent event so its
renderer gets the same anchors as the message-flow terminal.
This commit is contained in:
müde 2026-05-17 23:44:50 +02:00
parent 6e098fad29
commit 76e4034e01
5 changed files with 131 additions and 141 deletions

View file

@ -208,12 +208,19 @@ not ours.
and the host form. Canonicalises + verifies the path stays and the host form. Canonicalises + verifies the path stays
inside the allow-list, refuses anything but a regular file, inside the allow-list, refuses anything but a regular file,
refuses `/agents/<n>/claude` / `config` subtrees, truncates refuses `/agents/<n>/claude` / `config` subtrees, truncates
bodies at 1 MiB. Backs the dashboard's inline path-link bodies at 1 MiB. Click-time backing for the inline path-link
preview (PATH_RE detects pointer strings in message bodies, preview.
question/answer text, and the operator inbox; clicking
expands a `<details>` that lazy-fetches via this endpoint). Detection of which tokens *are* path links is done
Trailing-slash matches (i.e. directory paths) are skipped on **server-side at broker-message ingest**, not client-side:
the client side — only files linkify. the broker forwarder calls `scan_validated_paths(body)`
same allow-list helper the read endpoint uses — and attaches
the verified file tokens to the event as `file_refs: Vec<String>`.
The client trusts that list and linkifies only those tokens,
so directories, missing files, and forbidden subtrees never
become anchors. No probe endpoint, no client-side regex
heuristics. Historical messages get the same treatment on
`/dashboard/history` backfill.
- `GET /api/reminders` — list pending reminders for the - `GET /api/reminders` — list pending reminders for the
dashboard's queued-reminders panel. dashboard's queued-reminders panel.
- `POST /cancel-reminder/{id}` — hard-delete a pending reminder. - `POST /cancel-reminder/{id}` — hard-delete a pending reminder.

View file

@ -48,75 +48,6 @@
// perspective (we'd need to know which agent the message is about // perspective (we'd need to know which agent the message is about
// to translate it). Prefer `/agents/<name>/state/...` in agent // to translate it). Prefer `/agents/<name>/state/...` in agent
// outputs and the link will resolve. // outputs and the link will resolve.
// Match anything that *looks* like a path under the allow-listed
// roots; the server endpoint `/api/state-file/check` is the
// authority on whether each match is actually a file. Optimistic
// anchors render first; a batched validation request downgrades
// non-files (dirs, missing, forbidden subtrees) back to plain
// text. No client-side filename heuristics — the regex's job is
// just "spot a path-shaped token".
const PATH_RE = /(\/var\/lib\/hyperhive\/agents\/[\w.-]+\/state\/[\w./-]+|\/var\/lib\/hyperhive\/shared\/[\w./-]+|\/agents\/[\w.-]+\/state\/[\w./-]+|\/shared\/[\w./-]+)/g;
// Session-scoped truthiness cache for paths the server has
// already verified. `true` = render as a clickable anchor;
// `false` = strip the anchor on next reflow. Cleared only on
// page reload — agents creating new files mid-session show up
// next time the path is referenced.
const pathValidity = new Map();
// Anchors awaiting validation. Keyed by path so we can rewrite
// every anchor for the same path in one shot when the result
// lands. Each entry: { anchor, details } so we can also drop
// the sibling preview when the path turns out to be invalid.
const pendingAnchors = new Map();
let validateTimer = null;
function queuePathForValidation(path, anchor, details) {
if (!pendingAnchors.has(path)) pendingAnchors.set(path, []);
pendingAnchors.get(path).push({ anchor, details });
if (validateTimer) clearTimeout(validateTimer);
// Coalesce bursts (a backfill replay can emit dozens of rows
// in one tick) into a single batched request.
validateTimer = setTimeout(flushPathValidation, 50);
}
async function flushPathValidation() {
validateTimer = null;
const paths = Array.from(pendingAnchors.keys());
if (!paths.length) return;
// Snapshot the queue + clear it before we await — additional
// anchors that land while the request is in flight queue into
// a fresh batch.
const snapshot = new Map(pendingAnchors);
pendingAnchors.clear();
let results = {};
try {
const resp = await fetch('/api/state-file/check', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ paths }),
});
if (resp.ok) results = (await resp.json()).results || {};
} catch (err) {
console.warn('path validation batch failed', err);
// On transport failure leave anchors as-is — clicking them
// will surface the real error from /api/state-file inline.
return;
}
for (const [path, entries] of snapshot) {
const ok = !!results[path];
pathValidity.set(path, ok);
if (ok) continue;
// Downgrade every pending anchor for this path back to
// plain text + drop its sibling <details> preview.
for (const { anchor, details } of entries) {
if (anchor.parentNode) {
anchor.parentNode.replaceChild(document.createTextNode(path), anchor);
}
if (details && details.parentNode) {
details.parentNode.removeChild(details);
}
}
}
}
async function fetchStateFile(path) { async function fetchStateFile(path) {
const resp = await fetch('/api/state-file?path=' + encodeURIComponent(path)); const resp = await fetch('/api/state-file?path=' + encodeURIComponent(path));
const text = await resp.text(); const text = await resp.text();
@ -153,42 +84,54 @@
}); });
return { anchor, details }; return { anchor, details };
} }
// Append `text` to `parent` as a mix of text nodes + path anchors. // Append `text` to `parent` as a mix of text nodes + path
// Returns the array of generated `<details>` previews so the // anchors. `refs` is the server-attached `file_refs` array
// caller can append them as block siblings under the row. // (verified-file tokens that appear in `text`); each occurrence
// Anchors render optimistically; paths unseen this session are // of a ref in `text` becomes a clickable anchor + a sibling
// queued for batch validation, and the server's verdict either // <details> preview that lazy-fetches from /api/state-file.
// confirms or strips them via `flushPathValidation`. // Anything not in `refs` stays plain text. No client-side
function appendLinkified(parent, text) { // regex, no probe endpoint — the server saw the body first
// and made the call. When `refs` is empty/missing we just
// emit plain text.
function appendLinkified(parent, text, refs) {
const previews = []; const previews = [];
if (text == null) return previews; if (text == null) return previews;
const str = String(text); const str = String(text);
let lastIdx = 0; const tokens = (refs || []).slice();
PATH_RE.lastIndex = 0; if (!tokens.length) {
let m; if (str) parent.appendChild(document.createTextNode(str));
while ((m = PATH_RE.exec(str)) !== null) { return previews;
if (m.index > lastIdx) {
parent.appendChild(document.createTextNode(str.slice(lastIdx, m.index)));
}
const path = m[0];
const cached = pathValidity.get(path);
if (cached === false) {
// Already known to be a non-file — render plain text, no
// anchor, no preview. The text still shows up so the
// operator sees the path; it's just not clickable.
parent.appendChild(document.createTextNode(path));
} else {
const { anchor, details } = makePathPreview(path);
parent.appendChild(anchor);
previews.push(details);
// Unknown paths queue for validation; known-good ones
// skip the roundtrip entirely.
if (cached !== true) queuePathForValidation(path, anchor, details);
}
lastIdx = m.index + m[0].length;
} }
if (lastIdx < str.length) { // Walk the string left-to-right, at each step looking for the
parent.appendChild(document.createTextNode(str.slice(lastIdx))); // next occurrence of any token. Longest-first tie-break so a
// ref like `/agents/foo/state/x.md` wins over a (hypothetical)
// shorter token that prefixes it. O(text * refs) worst case;
// refs is bounded server-side to whatever fits in a body, so
// this stays cheap.
tokens.sort((a, b) => b.length - a.length);
let i = 0;
while (i < str.length) {
let bestStart = -1;
let bestToken = null;
for (const t of tokens) {
const idx = str.indexOf(t, i);
if (idx === -1) continue;
if (bestStart === -1 || idx < bestStart || (idx === bestStart && t.length > bestToken.length)) {
bestStart = idx;
bestToken = t;
}
}
if (bestStart === -1) {
parent.appendChild(document.createTextNode(str.slice(i)));
break;
}
if (bestStart > i) {
parent.appendChild(document.createTextNode(str.slice(i, bestStart)));
}
const { anchor, details } = makePathPreview(bestToken);
parent.appendChild(anchor);
previews.push(details);
i = bestStart + bestToken.length;
} }
return previews; return previews;
} }
@ -1000,7 +943,12 @@
const operatorInbox = []; const operatorInbox = [];
function inboxAppendFromEvent(ev) { function inboxAppendFromEvent(ev) {
if (ev.kind !== 'sent' || ev.to !== 'operator') return false; if (ev.kind !== 'sent' || ev.to !== 'operator') return false;
operatorInbox.unshift({ from: ev.from, body: ev.body, at: ev.at }); operatorInbox.unshift({
from: ev.from,
body: ev.body,
at: ev.at,
file_refs: ev.file_refs || [],
});
if (operatorInbox.length > INBOX_LIMIT) operatorInbox.length = INBOX_LIMIT; if (operatorInbox.length > INBOX_LIMIT) operatorInbox.length = INBOX_LIMIT;
return true; return true;
} }
@ -1017,7 +965,7 @@
for (const m of operatorInbox) { for (const m of operatorInbox) {
const li = el('li'); const li = el('li');
const body = el('span', { class: 'msg-body' }); const body = el('span', { class: 'msg-body' });
const previews = appendLinkified(body, m.body); const previews = appendLinkified(body, m.body, m.file_refs);
li.append( li.append(
el('span', { class: 'msg-ts' }, fmt(m.at)), ' ', el('span', { class: 'msg-ts' }, fmt(m.at)), ' ',
el('span', { class: 'msg-from' }, m.from), ' ', el('span', { class: 'msg-from' }, m.from), ' ',
@ -1543,7 +1491,7 @@
to.className = 'msg-to'; to.textContent = ev.to; to.className = 'msg-to'; to.textContent = ev.to;
const body = document.createElement('span'); const body = document.createElement('span');
body.className = 'msg-body'; body.className = 'msg-body';
const previews = appendLinkified(body, ev.body); const previews = appendLinkified(body, ev.body, ev.file_refs);
row.append(ts, ' ', arrow, ' ', from, ' ', sep, ' ', to, ' ', body); row.append(ts, ' ', arrow, ' ', from, ' ', sep, ' ', to, ' ', body);
for (const d of previews) row.appendChild(d); for (const d of previews) row.appendChild(d);
} }

View file

@ -55,7 +55,6 @@ pub async fn serve(port: u16, coord: Arc<Coordinator>) -> Result<()> {
.route("/purge-tombstone/{name}", post(post_purge_tombstone)) .route("/purge-tombstone/{name}", post(post_purge_tombstone))
.route("/api/journal/{name}", get(get_journal)) .route("/api/journal/{name}", get(get_journal))
.route("/api/state-file", get(get_state_file)) .route("/api/state-file", get(get_state_file))
.route("/api/state-file/check", post(post_state_file_check))
.route("/api/reminders", get(api_reminders)) .route("/api/reminders", get(api_reminders))
.route("/cancel-reminder/{id}", post(post_cancel_reminder)) .route("/cancel-reminder/{id}", post(post_cancel_reminder))
.route("/api/agent-config/{name}", get(get_agent_config)) .route("/api/agent-config/{name}", get(get_agent_config))
@ -635,21 +634,25 @@ async fn dashboard_history(State(state): State<AppState>) -> Response {
.into_iter() .into_iter()
.map(|m| match m { .map(|m| match m {
crate::broker::MessageEvent::Sent { from, to, body, at } => { crate::broker::MessageEvent::Sent { from, to, body, at } => {
let file_refs = scan_validated_paths(&body);
crate::dashboard_events::DashboardEvent::Sent { crate::dashboard_events::DashboardEvent::Sent {
seq: 0, seq: 0,
from, from,
to, to,
body, body,
at, at,
file_refs,
} }
} }
crate::broker::MessageEvent::Delivered { from, to, body, at } => { crate::broker::MessageEvent::Delivered { from, to, body, at } => {
let file_refs = scan_validated_paths(&body);
crate::dashboard_events::DashboardEvent::Delivered { crate::dashboard_events::DashboardEvent::Delivered {
seq: 0, seq: 0,
from, from,
to, to,
body, body,
at, at,
file_refs,
} }
} }
}) })
@ -953,6 +956,53 @@ fn resolve_state_path(raw: &str) -> std::result::Result<std::path::PathBuf, Stri
Ok(canonical) Ok(canonical)
} }
/// Scan `body` for path-shaped tokens, validate each against the
/// allow-list, return the unique set of tokens that resolve to a
/// regular file. Called at broker-message ingest time so the
/// dashboard event already carries the verified set — no client-
/// side probe endpoint required, and historical messages get the
/// same treatment on `/dashboard/history` backfill.
///
/// Tokenisation: split on whitespace + a handful of trailing
/// punctuation chars (`,;:)]}`) that commonly follow paths in
/// natural-language text but aren't part of the path itself. Any
/// token starting with `/agents/`, `/shared/`, or
/// `/var/lib/hyperhive/{agents,shared}/` is a candidate. The
/// allow-list + is_file check happens via the same
/// `resolve_state_path` helper the read endpoint uses, so the
/// security rules can't drift.
pub(crate) fn scan_validated_paths(body: &str) -> Vec<String> {
const PREFIXES: [&str; 4] = [
"/agents/",
"/shared/",
"/var/lib/hyperhive/agents/",
"/var/lib/hyperhive/shared/",
];
let mut out = Vec::<String>::new();
for raw in body.split(|c: char| c.is_whitespace()) {
// Trim trailing natural-language punctuation that wouldn't
// be part of any real path. Inline rather than via a regex
// dep — the set is small and the call is hot.
let token = raw.trim_end_matches(|c: char| matches!(c, ',' | ';' | ':' | ')' | ']' | '}' | '.' | '\'' | '"'));
if token.is_empty() {
continue;
}
if !PREFIXES.iter().any(|p| token.starts_with(p)) {
continue;
}
// Cheap dedupe — typical message has 0-3 refs.
if out.iter().any(|s| s == token) {
continue;
}
if let Ok(canonical) = resolve_state_path(token) {
if std::fs::metadata(&canonical).is_ok_and(|m| m.is_file()) {
out.push(token.to_owned());
}
}
}
out
}
async fn get_state_file( async fn get_state_file(
axum::extract::Query(q): axum::extract::Query<StateFileQuery>, axum::extract::Query(q): axum::extract::Query<StateFileQuery>,
) -> Response { ) -> Response {
@ -987,36 +1037,6 @@ async fn get_state_file(
([("content-type", "text/plain; charset=utf-8")], body).into_response() ([("content-type", "text/plain; charset=utf-8")], body).into_response()
} }
#[derive(Deserialize)]
struct StateFileCheckReq {
paths: Vec<String>,
}
/// Batch existence/file-ness probe behind the path-link autodetect.
/// The client collects regex-candidate paths from a message body,
/// fires one POST with the whole batch, and downgrades anchors
/// whose result is `false` back to plain text. Same security rules
/// as `get_state_file` (via `resolve_state_path`); a path is `true`
/// iff it resolves, lives in the allow-list, and is a regular file
/// (not a dir, symlink-to-dir, missing file, or forbidden subtree).
/// Capped per-request to keep a runaway message body from
/// triggering thousands of canonicalize calls in one request.
async fn post_state_file_check(
axum::Json(req): axum::Json<StateFileCheckReq>,
) -> Response {
const MAX_PATHS: usize = 64;
let mut out: std::collections::HashMap<String, bool> =
std::collections::HashMap::with_capacity(req.paths.len().min(MAX_PATHS));
for raw in req.paths.into_iter().take(MAX_PATHS) {
let is_file = match resolve_state_path(&raw) {
Ok(p) => std::fs::metadata(&p).is_ok_and(|m| m.is_file()),
Err(_) => false,
};
out.insert(raw, is_file);
}
axum::Json(serde_json::json!({ "results": out })).into_response()
}
async fn api_reminders(State(state): State<AppState>) -> Response { async fn api_reminders(State(state): State<AppState>) -> Response {
match state.coord.broker.list_pending_reminders() { match state.coord.broker.list_pending_reminders() {
Ok(rows) => axum::Json(rows).into_response(), Ok(rows) => axum::Json(rows).into_response(),

View file

@ -31,20 +31,31 @@ use crate::container_view::ContainerView;
#[serde(rename_all = "snake_case", tag = "kind")] #[serde(rename_all = "snake_case", tag = "kind")]
pub enum DashboardEvent { pub enum DashboardEvent {
/// Broker `Sent` event mirrored onto the dashboard channel. /// Broker `Sent` event mirrored onto the dashboard channel.
/// `file_refs` carries every path-shaped token in `body` that
/// hive-c0re verified is a regular file under the allow-listed
/// roots (per-agent `state/` + `shared/`). The forwarder
/// pre-validates so the dashboard doesn't need a probe
/// endpoint — the client renders anchors only for tokens that
/// appear in this list, everything else stays plain text.
Sent { Sent {
seq: u64, seq: u64,
from: String, from: String,
to: String, to: String,
body: String, body: String,
at: i64, at: i64,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
file_refs: Vec<String>,
}, },
/// Broker `Delivered` event mirrored onto the dashboard channel. /// Broker `Delivered` event mirrored onto the dashboard channel.
/// `file_refs` is the same shape as `Sent`.
Delivered { Delivered {
seq: u64, seq: u64,
from: String, from: String,
to: String, to: String,
body: String, body: String,
at: i64, at: i64,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
file_refs: Vec<String>,
}, },
/// A new approval landed in the pending queue. Payload carries /// A new approval landed in the pending queue. Payload carries
/// enough to render the dashboard row without a `/api/state` /// enough to render the dashboard row without a `/api/state`

View file

@ -226,21 +226,25 @@ fn spawn_broker_to_dashboard_forwarder(coord: Arc<Coordinator>) {
loop { loop {
match rx.recv().await { match rx.recv().await {
Ok(MessageEvent::Sent { from, to, body, at }) => { Ok(MessageEvent::Sent { from, to, body, at }) => {
let file_refs = dashboard::scan_validated_paths(&body);
coord.emit_dashboard_event(DashboardEvent::Sent { coord.emit_dashboard_event(DashboardEvent::Sent {
seq: coord.next_seq(), seq: coord.next_seq(),
from, from,
to, to,
body, body,
at, at,
file_refs,
}); });
} }
Ok(MessageEvent::Delivered { from, to, body, at }) => { Ok(MessageEvent::Delivered { from, to, body, at }) => {
let file_refs = dashboard::scan_validated_paths(&body);
coord.emit_dashboard_event(DashboardEvent::Delivered { coord.emit_dashboard_event(DashboardEvent::Delivered {
seq: coord.next_seq(), seq: coord.next_seq(),
from, from,
to, to,
body, body,
at, at,
file_refs,
}); });
} }
Err(tokio::sync::broadcast::error::RecvError::Lagged(n)) => { Err(tokio::sync::broadcast::error::RecvError::Lagged(n)) => {