path linkify: server attaches file_refs at message ingest

drop the /api/state-file/check probe endpoint (which let any
dashboard visitor enumerate filesystem layout by feeding paths)
and the client's optimistic-then-downgrade dance. instead, the
broker forwarder calls scan_validated_paths(body) — same
allow-list helper as the read endpoint — and attaches the
verified file tokens to DashboardEvent::Sent/Delivered as
file_refs: Vec<String>. /dashboard/history backfill does the
same per-row.

client appendLinkified takes a (text, refs) pair, walks
left-to-right linkifying every occurrence of any ref token,
longest-first tie-break. no regex, no probe, no cache, no
queue. when refs is empty/absent the body emits as plain text
(question/answer/reminder rendering — refs for those are a
follow-up).

operator inbox stores file_refs from the sent event so its
renderer gets the same anchors as the message-flow terminal.
This commit is contained in:
müde 2026-05-17 23:44:50 +02:00
parent 6e098fad29
commit 76e4034e01
5 changed files with 131 additions and 141 deletions

View file

@ -55,7 +55,6 @@ pub async fn serve(port: u16, coord: Arc<Coordinator>) -> Result<()> {
.route("/purge-tombstone/{name}", post(post_purge_tombstone))
.route("/api/journal/{name}", get(get_journal))
.route("/api/state-file", get(get_state_file))
.route("/api/state-file/check", post(post_state_file_check))
.route("/api/reminders", get(api_reminders))
.route("/cancel-reminder/{id}", post(post_cancel_reminder))
.route("/api/agent-config/{name}", get(get_agent_config))
@ -635,21 +634,25 @@ async fn dashboard_history(State(state): State<AppState>) -> Response {
.into_iter()
.map(|m| match m {
crate::broker::MessageEvent::Sent { from, to, body, at } => {
let file_refs = scan_validated_paths(&body);
crate::dashboard_events::DashboardEvent::Sent {
seq: 0,
from,
to,
body,
at,
file_refs,
}
}
crate::broker::MessageEvent::Delivered { from, to, body, at } => {
let file_refs = scan_validated_paths(&body);
crate::dashboard_events::DashboardEvent::Delivered {
seq: 0,
from,
to,
body,
at,
file_refs,
}
}
})
@ -953,6 +956,53 @@ fn resolve_state_path(raw: &str) -> std::result::Result<std::path::PathBuf, Stri
Ok(canonical)
}
/// Scan `body` for path-shaped tokens, validate each against the
/// allow-list, return the unique set of tokens that resolve to a
/// regular file. Called at broker-message ingest time so the
/// dashboard event already carries the verified set — no client-
/// side probe endpoint required, and historical messages get the
/// same treatment on `/dashboard/history` backfill.
///
/// Tokenisation: split on whitespace + a handful of trailing
/// punctuation chars (`,;:)]}`) that commonly follow paths in
/// natural-language text but aren't part of the path itself. Any
/// token starting with `/agents/`, `/shared/`, or
/// `/var/lib/hyperhive/{agents,shared}/` is a candidate. The
/// allow-list + is_file check happens via the same
/// `resolve_state_path` helper the read endpoint uses, so the
/// security rules can't drift.
pub(crate) fn scan_validated_paths(body: &str) -> Vec<String> {
const PREFIXES: [&str; 4] = [
"/agents/",
"/shared/",
"/var/lib/hyperhive/agents/",
"/var/lib/hyperhive/shared/",
];
let mut out = Vec::<String>::new();
for raw in body.split(|c: char| c.is_whitespace()) {
// Trim trailing natural-language punctuation that wouldn't
// be part of any real path. Inline rather than via a regex
// dep — the set is small and the call is hot.
let token = raw.trim_end_matches(|c: char| matches!(c, ',' | ';' | ':' | ')' | ']' | '}' | '.' | '\'' | '"'));
if token.is_empty() {
continue;
}
if !PREFIXES.iter().any(|p| token.starts_with(p)) {
continue;
}
// Cheap dedupe — typical message has 0-3 refs.
if out.iter().any(|s| s == token) {
continue;
}
if let Ok(canonical) = resolve_state_path(token) {
if std::fs::metadata(&canonical).is_ok_and(|m| m.is_file()) {
out.push(token.to_owned());
}
}
}
out
}
async fn get_state_file(
axum::extract::Query(q): axum::extract::Query<StateFileQuery>,
) -> Response {
@ -987,36 +1037,6 @@ async fn get_state_file(
([("content-type", "text/plain; charset=utf-8")], body).into_response()
}
#[derive(Deserialize)]
struct StateFileCheckReq {
paths: Vec<String>,
}
/// Batch existence/file-ness probe behind the path-link autodetect.
/// The client collects regex-candidate paths from a message body,
/// fires one POST with the whole batch, and downgrades anchors
/// whose result is `false` back to plain text. Same security rules
/// as `get_state_file` (via `resolve_state_path`); a path is `true`
/// iff it resolves, lives in the allow-list, and is a regular file
/// (not a dir, symlink-to-dir, missing file, or forbidden subtree).
/// Capped per-request to keep a runaway message body from
/// triggering thousands of canonicalize calls in one request.
async fn post_state_file_check(
axum::Json(req): axum::Json<StateFileCheckReq>,
) -> Response {
const MAX_PATHS: usize = 64;
let mut out: std::collections::HashMap<String, bool> =
std::collections::HashMap::with_capacity(req.paths.len().min(MAX_PATHS));
for raw in req.paths.into_iter().take(MAX_PATHS) {
let is_file = match resolve_state_path(&raw) {
Ok(p) => std::fs::metadata(&p).is_ok_and(|m| m.is_file()),
Err(_) => false,
};
out.insert(raw, is_file);
}
axum::Json(serde_json::json!({ "results": out })).into_response()
}
async fn api_reminders(State(state): State<AppState>) -> Response {
match state.coord.broker.list_pending_reminders() {
Ok(rows) => axum::Json(rows).into_response(),

View file

@ -31,20 +31,31 @@ use crate::container_view::ContainerView;
#[serde(rename_all = "snake_case", tag = "kind")]
pub enum DashboardEvent {
/// Broker `Sent` event mirrored onto the dashboard channel.
/// `file_refs` carries every path-shaped token in `body` that
/// hive-c0re verified is a regular file under the allow-listed
/// roots (per-agent `state/` + `shared/`). The forwarder
/// pre-validates so the dashboard doesn't need a probe
/// endpoint — the client renders anchors only for tokens that
/// appear in this list, everything else stays plain text.
Sent {
seq: u64,
from: String,
to: String,
body: String,
at: i64,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
file_refs: Vec<String>,
},
/// Broker `Delivered` event mirrored onto the dashboard channel.
/// `file_refs` is the same shape as `Sent`.
Delivered {
seq: u64,
from: String,
to: String,
body: String,
at: i64,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
file_refs: Vec<String>,
},
/// A new approval landed in the pending queue. Payload carries
/// enough to render the dashboard row without a `/api/state`

View file

@ -226,21 +226,25 @@ fn spawn_broker_to_dashboard_forwarder(coord: Arc<Coordinator>) {
loop {
match rx.recv().await {
Ok(MessageEvent::Sent { from, to, body, at }) => {
let file_refs = dashboard::scan_validated_paths(&body);
coord.emit_dashboard_event(DashboardEvent::Sent {
seq: coord.next_seq(),
from,
to,
body,
at,
file_refs,
});
}
Ok(MessageEvent::Delivered { from, to, body, at }) => {
let file_refs = dashboard::scan_validated_paths(&body);
coord.emit_dashboard_event(DashboardEvent::Delivered {
seq: coord.next_seq(),
from,
to,
body,
at,
file_refs,
});
}
Err(tokio::sync::broadcast::error::RecvError::Lagged(n)) => {