path linkify: server-side validation via /api/state-file/check

regex back to permissive ("looks like a path") — the server is
authoritative on whether each match is a file. anchors render
optimistically, paths queue for batch validation (50ms coalesce),
non-files downgrade to plain text + the sibling <details>
preview is dropped. session-scoped cache (pathValidity Map) so
repeated paths skip the roundtrip.

new endpoint POST /api/state-file/check accepts { paths } and
returns { results: {<path>: bool} }. shares resolve_state_path
helper with the read endpoint so security rules can't drift —
both refuse anything outside the allow-list, anything resolved
outside via symlink, or anything in a per-agent subdir other
than state/. capped at 64 paths/request.

drops the brittle client-side filename heuristic (the .ext-
required rule that missed README/Makefile and still matched bare
dirs without trailing slash). single source of truth.
This commit is contained in:
müde 2026-05-17 23:36:44 +02:00
parent 0e2d26304e
commit 6e098fad29
2 changed files with 145 additions and 41 deletions

View file

@ -48,18 +48,75 @@
// perspective (we'd need to know which agent the message is about // perspective (we'd need to know which agent the message is about
// to translate it). Prefer `/agents/<name>/state/...` in agent // to translate it). Prefer `/agents/<name>/state/...` in agent
// outputs and the link will resolve. // outputs and the link will resolve.
// Each branch insists the final segment looks like a filename: // Match anything that *looks* like a path under the allow-listed
// at least one non-dot char, a literal dot, then an extension // roots; the server endpoint `/api/state-file/check` is the
// (`[\w-]+\.[\w.-]+`). That catches the common case (`notes.md`, // authority on whether each match is actually a file. Optimistic
// `2026-01.log`, `foo.bar.baz`) while skipping bare directory // anchors render first; a batched validation request downgrades
// names like `/agents/foo/state/notes` whether or not they carry // non-files (dirs, missing, forbidden subtrees) back to plain
// a trailing slash. Misses extensionless files (`README`, // text. No client-side filename heuristics — the regex's job is
// `Makefile`) — accepted trade-off; the /api/state-file endpoint // just "spot a path-shaped token".
// still serves them if the operator types the path manually. const PATH_RE = /(\/var\/lib\/hyperhive\/agents\/[\w.-]+\/state\/[\w./-]+|\/var\/lib\/hyperhive\/shared\/[\w./-]+|\/agents\/[\w.-]+\/state\/[\w./-]+|\/shared\/[\w./-]+)/g;
// The endpoint also refuses non-files at the server level; this
// is the front-end peer so the operator doesn't see a dead link // Session-scoped truthiness cache for paths the server has
// they'll just get an error from on click. // already verified. `true` = render as a clickable anchor;
const PATH_RE = /(\/var\/lib\/hyperhive\/agents\/[\w.-]+\/state\/(?:[\w.-]+\/)*[\w-]+\.[\w.-]+|\/var\/lib\/hyperhive\/shared\/(?:[\w.-]+\/)*[\w-]+\.[\w.-]+|\/agents\/[\w.-]+\/state\/(?:[\w.-]+\/)*[\w-]+\.[\w.-]+|\/shared\/(?:[\w.-]+\/)*[\w-]+\.[\w.-]+)/g; // `false` = strip the anchor on next reflow. Cleared only on
// page reload — agents creating new files mid-session show up
// next time the path is referenced.
const pathValidity = new Map();
// Anchors awaiting validation. Keyed by path so we can rewrite
// every anchor for the same path in one shot when the result
// lands. Each entry: { anchor, details } so we can also drop
// the sibling preview when the path turns out to be invalid.
const pendingAnchors = new Map();
let validateTimer = null;
function queuePathForValidation(path, anchor, details) {
if (!pendingAnchors.has(path)) pendingAnchors.set(path, []);
pendingAnchors.get(path).push({ anchor, details });
if (validateTimer) clearTimeout(validateTimer);
// Coalesce bursts (a backfill replay can emit dozens of rows
// in one tick) into a single batched request.
validateTimer = setTimeout(flushPathValidation, 50);
}
async function flushPathValidation() {
validateTimer = null;
const paths = Array.from(pendingAnchors.keys());
if (!paths.length) return;
// Snapshot the queue + clear it before we await — additional
// anchors that land while the request is in flight queue into
// a fresh batch.
const snapshot = new Map(pendingAnchors);
pendingAnchors.clear();
let results = {};
try {
const resp = await fetch('/api/state-file/check', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ paths }),
});
if (resp.ok) results = (await resp.json()).results || {};
} catch (err) {
console.warn('path validation batch failed', err);
// On transport failure leave anchors as-is — clicking them
// will surface the real error from /api/state-file inline.
return;
}
for (const [path, entries] of snapshot) {
const ok = !!results[path];
pathValidity.set(path, ok);
if (ok) continue;
// Downgrade every pending anchor for this path back to
// plain text + drop its sibling <details> preview.
for (const { anchor, details } of entries) {
if (anchor.parentNode) {
anchor.parentNode.replaceChild(document.createTextNode(path), anchor);
}
if (details && details.parentNode) {
details.parentNode.removeChild(details);
}
}
}
}
async function fetchStateFile(path) { async function fetchStateFile(path) {
const resp = await fetch('/api/state-file?path=' + encodeURIComponent(path)); const resp = await fetch('/api/state-file?path=' + encodeURIComponent(path));
const text = await resp.text(); const text = await resp.text();
@ -99,6 +156,9 @@
// Append `text` to `parent` as a mix of text nodes + path anchors. // Append `text` to `parent` as a mix of text nodes + path anchors.
// Returns the array of generated `<details>` previews so the // Returns the array of generated `<details>` previews so the
// caller can append them as block siblings under the row. // caller can append them as block siblings under the row.
// Anchors render optimistically; paths unseen this session are
// queued for batch validation, and the server's verdict either
// confirms or strips them via `flushPathValidation`.
function appendLinkified(parent, text) { function appendLinkified(parent, text) {
const previews = []; const previews = [];
if (text == null) return previews; if (text == null) return previews;
@ -110,9 +170,21 @@
if (m.index > lastIdx) { if (m.index > lastIdx) {
parent.appendChild(document.createTextNode(str.slice(lastIdx, m.index))); parent.appendChild(document.createTextNode(str.slice(lastIdx, m.index)));
} }
const { anchor, details } = makePathPreview(m[0]); const path = m[0];
const cached = pathValidity.get(path);
if (cached === false) {
// Already known to be a non-file — render plain text, no
// anchor, no preview. The text still shows up so the
// operator sees the path; it's just not clickable.
parent.appendChild(document.createTextNode(path));
} else {
const { anchor, details } = makePathPreview(path);
parent.appendChild(anchor); parent.appendChild(anchor);
previews.push(details); previews.push(details);
// Unknown paths queue for validation; known-good ones
// skip the roundtrip entirely.
if (cached !== true) queuePathForValidation(path, anchor, details);
}
lastIdx = m.index + m[0].length; lastIdx = m.index + m[0].length;
} }
if (lastIdx < str.length) { if (lastIdx < str.length) {

View file

@ -55,6 +55,7 @@ pub async fn serve(port: u16, coord: Arc<Coordinator>) -> Result<()> {
.route("/purge-tombstone/{name}", post(post_purge_tombstone)) .route("/purge-tombstone/{name}", post(post_purge_tombstone))
.route("/api/journal/{name}", get(get_journal)) .route("/api/journal/{name}", get(get_journal))
.route("/api/state-file", get(get_state_file)) .route("/api/state-file", get(get_state_file))
.route("/api/state-file/check", post(post_state_file_check))
.route("/api/reminders", get(api_reminders)) .route("/api/reminders", get(api_reminders))
.route("/cancel-reminder/{id}", post(post_cancel_reminder)) .route("/cancel-reminder/{id}", post(post_cancel_reminder))
.route("/api/agent-config/{name}", get(get_agent_config)) .route("/api/agent-config/{name}", get(get_agent_config))
@ -911,15 +912,17 @@ struct StateFileQuery {
/// traversal and symlink games can't escape the roots. Files larger /// traversal and symlink games can't escape the roots. Files larger
/// than `MAX_BYTES` are truncated with a banner so a runaway log /// than `MAX_BYTES` are truncated with a banner so a runaway log
/// can't OOM the browser. /// can't OOM the browser.
async fn get_state_file( /// Resolve a caller-supplied path string to a canonical host path
axum::extract::Query(q): axum::extract::Query<StateFileQuery>, /// that has been verified against the allow-list. Returns `Err`
) -> Response { /// with a human-readable reason for every failure mode (path
const MAX_BYTES: usize = 1 << 20; // 1 MiB /// outside roots, canonicalize failure, escape via symlink,
/// per-agent subdir not `state`). Shared by `get_state_file` (read)
/// and `post_state_file_check` (existence probe) so both endpoints
/// apply identical security rules.
fn resolve_state_path(raw: &str) -> std::result::Result<std::path::PathBuf, String> {
const AGENTS_ROOT: &str = "/var/lib/hyperhive/agents"; const AGENTS_ROOT: &str = "/var/lib/hyperhive/agents";
const SHARED_ROOT: &str = "/var/lib/hyperhive/shared"; const SHARED_ROOT: &str = "/var/lib/hyperhive/shared";
let raw = q.path.trim(); let raw = raw.trim();
// Translate the container-view forms to host paths so the
// allow-list check has a single canonical shape to match.
let mapped: std::path::PathBuf = if let Some(rest) = raw.strip_prefix("/agents/") { let mapped: std::path::PathBuf = if let Some(rest) = raw.strip_prefix("/agents/") {
std::path::PathBuf::from(format!("{AGENTS_ROOT}/{rest}")) std::path::PathBuf::from(format!("{AGENTS_ROOT}/{rest}"))
} else if let Some(rest) = raw.strip_prefix("/shared/") { } else if let Some(rest) = raw.strip_prefix("/shared/") {
@ -927,38 +930,37 @@ async fn get_state_file(
} else if raw.starts_with(AGENTS_ROOT) || raw.starts_with(SHARED_ROOT) { } else if raw.starts_with(AGENTS_ROOT) || raw.starts_with(SHARED_ROOT) {
std::path::PathBuf::from(raw) std::path::PathBuf::from(raw)
} else { } else {
return error_response(&format!("state-file: path not in allow-list: {raw}")); return Err(format!("path not in allow-list: {raw}"));
}; };
// Canonicalise so `..` / symlinks resolve before the prefix let canonical = std::fs::canonicalize(&mapped).map_err(|e| format!("{}: {e}", mapped.display()))?;
// check. A failure here means the path doesn't exist on disk if !(canonical.starts_with(AGENTS_ROOT) || canonical.starts_with(SHARED_ROOT)) {
// (or we can't reach it) — surface the underlying error. return Err(format!(
let canonical = match std::fs::canonicalize(&mapped) { "resolved path escapes allow-list: {}",
Ok(p) => p,
Err(e) => return error_response(&format!("state-file: {}: {e}", mapped.display())),
};
let allowed = canonical.starts_with(AGENTS_ROOT) || canonical.starts_with(SHARED_ROOT);
if !allowed {
return error_response(&format!(
"state-file: resolved path escapes allow-list: {}",
canonical.display() canonical.display()
)); ));
} }
// For per-agent paths, also require the second-from-root
// component to be `state` (not `claude` or `config`). Claude
// creds shouldn't leak through this endpoint; config is the
// applied repo (already exposed via /api/agent-config). Reading
// `/var/lib/hyperhive/agents/<n>/state/...` is the intended use.
if let Ok(rel) = canonical.strip_prefix(AGENTS_ROOT) { if let Ok(rel) = canonical.strip_prefix(AGENTS_ROOT) {
let mut components = rel.components(); let mut components = rel.components();
let _agent = components.next(); let _agent = components.next();
let dir = components.next().and_then(|c| c.as_os_str().to_str()); let dir = components.next().and_then(|c| c.as_os_str().to_str());
if dir != Some("state") { if dir != Some("state") {
return error_response(&format!( return Err(format!(
"state-file: only per-agent state/ is readable here ({} dir not allowed)", "only per-agent state/ is readable here ({} dir not allowed)",
dir.unwrap_or("(root)") dir.unwrap_or("(root)")
)); ));
} }
} }
Ok(canonical)
}
async fn get_state_file(
axum::extract::Query(q): axum::extract::Query<StateFileQuery>,
) -> Response {
const MAX_BYTES: usize = 1 << 20; // 1 MiB
let canonical = match resolve_state_path(&q.path) {
Ok(p) => p,
Err(e) => return error_response(&format!("state-file: {e}")),
};
let meta = match std::fs::metadata(&canonical) { let meta = match std::fs::metadata(&canonical) {
Ok(m) => m, Ok(m) => m,
Err(e) => return error_response(&format!("state-file: stat {}: {e}", canonical.display())), Err(e) => return error_response(&format!("state-file: stat {}: {e}", canonical.display())),
@ -985,6 +987,36 @@ async fn get_state_file(
([("content-type", "text/plain; charset=utf-8")], body).into_response() ([("content-type", "text/plain; charset=utf-8")], body).into_response()
} }
#[derive(Deserialize)]
struct StateFileCheckReq {
paths: Vec<String>,
}
/// Batch existence/file-ness probe behind the path-link autodetect.
/// The client collects regex-candidate paths from a message body,
/// fires one POST with the whole batch, and downgrades anchors
/// whose result is `false` back to plain text. Same security rules
/// as `get_state_file` (via `resolve_state_path`); a path is `true`
/// iff it resolves, lives in the allow-list, and is a regular file
/// (not a dir, symlink-to-dir, missing file, or forbidden subtree).
/// Capped per-request to keep a runaway message body from
/// triggering thousands of canonicalize calls in one request.
async fn post_state_file_check(
axum::Json(req): axum::Json<StateFileCheckReq>,
) -> Response {
const MAX_PATHS: usize = 64;
let mut out: std::collections::HashMap<String, bool> =
std::collections::HashMap::with_capacity(req.paths.len().min(MAX_PATHS));
for raw in req.paths.into_iter().take(MAX_PATHS) {
let is_file = match resolve_state_path(&raw) {
Ok(p) => std::fs::metadata(&p).is_ok_and(|m| m.is_file()),
Err(_) => false,
};
out.insert(raw, is_file);
}
axum::Json(serde_json::json!({ "results": out })).into_response()
}
async fn api_reminders(State(state): State<AppState>) -> Response { async fn api_reminders(State(state): State<AppState>) -> Response {
match state.coord.broker.list_pending_reminders() { match state.coord.broker.list_pending_reminders() {
Ok(rows) => axum::Json(rows).into_response(), Ok(rows) => axum::Json(rows).into_response(),