diff --git a/Cargo.lock b/Cargo.lock index f09335f..ca9b6f9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -471,6 +471,7 @@ dependencies = [ "axum", "clap", "hive-sh4re", + "libc", "rusqlite", "serde", "serde_json", diff --git a/hive-c0re/Cargo.toml b/hive-c0re/Cargo.toml index 99e1874..47ef2a8 100644 --- a/hive-c0re/Cargo.toml +++ b/hive-c0re/Cargo.toml @@ -11,6 +11,7 @@ anyhow.workspace = true axum.workspace = true clap.workspace = true hive-sh4re.workspace = true +libc = "0.2" rusqlite.workspace = true serde.workspace = true serde_json.workspace = true diff --git a/hive-c0re/src/agent_server.rs b/hive-c0re/src/agent_server.rs index e90da40..978eab4 100644 --- a/hive-c0re/src/agent_server.rs +++ b/hive-c0re/src/agent_server.rs @@ -177,7 +177,7 @@ async fn dispatch(req: &AgentRequest, agent: &str, coord: &Arc) -> message, timing, file_path, - } => handle_remind(broker, agent, message, timing, file_path.as_deref()), + } => handle_remind(coord, agent, message, timing, file_path.as_deref()), } } @@ -214,7 +214,7 @@ fn handle_ask_operator( } } -/// Cap on the inline `message` byte length the Remind request accepts. +/// Cap on the inline `message` byte length when no `file_path` is set. /// Reminders land in the agent's inbox and feed the next wake prompt — a /// multi-kilobyte body bloats every subsequent turn's context. Anything /// bigger should be persisted to disk by the caller and pointed at via @@ -222,19 +222,31 @@ fn handle_ask_operator( /// than the full body). const REMIND_MESSAGE_MAX: usize = 4096; +/// Upper cap when `file_path` IS set. The body still lands in the +/// reminders sqlite row until delivery, so without an upper bound a +/// caller could DOS the broker DB with a single multi-megabyte +/// reminder. 64 KiB is generous for any reasonable payload + keeps a +/// single row small enough that sqlite won't choke. +const REMIND_MESSAGE_MAX_WITH_FILE: usize = 64 * 1024; + fn handle_remind( - broker: &crate::broker::Broker, + coord: &Arc, agent: &str, message: &str, timing: &hive_sh4re::ReminderTiming, file_path: Option<&str>, ) -> AgentResponse { - if file_path.is_none() && message.len() > REMIND_MESSAGE_MAX { + let (cap, hint) = match file_path { + None => ( + REMIND_MESSAGE_MAX, + "; set `file_path` to persist a larger payload to a file instead", + ), + Some(_) => (REMIND_MESSAGE_MAX_WITH_FILE, ""), + }; + if message.len() > cap { return AgentResponse::Err { message: format!( - "reminder body too long ({} bytes, max {REMIND_MESSAGE_MAX}); write the \ - payload to a file under your /state/ dir and pass its path as \ - `file_path` so the reminder delivers a pointer instead of the full body", + "reminder body too long ({} bytes, max {cap}){hint}", message.len() ), }; @@ -247,7 +259,7 @@ fn handle_remind( }; } }; - match broker.store_reminder(agent, message, file_path, due_at) { + match coord.broker.store_reminder(agent, message, file_path, due_at) { Ok(id) => { tracing::info!(%id, %agent, %due_at, "reminder scheduled"); AgentResponse::Ok diff --git a/hive-c0re/src/reminder_scheduler.rs b/hive-c0re/src/reminder_scheduler.rs index c21b752..e8d0a78 100644 --- a/hive-c0re/src/reminder_scheduler.rs +++ b/hive-c0re/src/reminder_scheduler.rs @@ -10,10 +10,18 @@ //! the host path (`/var/lib/hyperhive/agents//state/foo.md`) //! so hive-c0re can write to it from outside the container. //! - Reject anything that isn't under the agent's own state subtree, -//! or that contains `..` (path traversal). Falling outside the -//! allowed prefix means the file write is skipped and the original -//! message is delivered inline (with a noted warning) — the -//! reminder still fires, just without the payload split. +//! contains `..` (path traversal), or has an empty relative tail. +//! Falling outside the allowed prefix means the file write is +//! skipped and the original message is delivered inline (with a +//! noted warning) — the reminder still fires, just without the +//! payload split. +//! - Defend against symlink escape: after `create_dir_all`, the +//! parent dir is canonicalized and re-verified to live under the +//! agent's host state root. Then we open the final file with +//! `O_NOFOLLOW | O_CREAT | O_TRUNC` so an existing-symlink basename +//! can't redirect the write either. Without this an agent could +//! `ln -s /etc /agents/foo/state/escape` and bounce a write to an +//! arbitrary host path. //! - Write the reminder body to disk and deliver a short pointer //! message in its place, so the agent's inbox/wake-prompt stays //! small and the bulky payload can be read out of band. @@ -22,6 +30,8 @@ //! inside `Broker::deliver_reminder`; this module only computes the //! body string before calling it. +use std::io::Write; +use std::os::unix::fs::OpenOptionsExt; use std::path::{Path, PathBuf}; use std::sync::Arc; use std::time::Duration; @@ -31,7 +41,9 @@ use crate::coordinator::Coordinator; /// Per-tick cap on reminders delivered. Anything over this stays due /// in the table and gets picked up on the next tick — keeps a /// 10k-deep backlog from flooding the broker (or hogging the broker -/// mutex) in one shot. +/// mutex) in one shot. 100/tick × 5s tick = sustained throughput cap +/// of ~20 reminders/sec; bump together if the loose-ends tracker +/// starts firing higher rates. const REMINDER_BATCH_LIMIT: u64 = 100; /// Poll interval. Trade-off between latency on a freshly due reminder @@ -72,9 +84,9 @@ fn tick(coord: &Arc) { /// Build the inbox body for a due reminder. When `file_path` is None /// the body is the original message verbatim. When set, we attempt to /// persist the message body to the requested file and return a short -/// pointer string instead. Failures (bad prefix, write error, missing -/// parent) fall back to inline delivery with a noted warning so the -/// reminder still fires. +/// pointer string instead. Failures (bad prefix, symlink escape, +/// write error, missing parent) fall back to inline delivery with a +/// noted warning so the reminder still fires. fn prepare_body(agent: &str, message: &str, file_path: Option<&str>) -> String { let Some(req_path) = file_path else { return message.to_owned(); @@ -83,34 +95,80 @@ fn prepare_body(agent: &str, message: &str, file_path: Option<&str>) -> String { Ok(p) => p, Err(reason) => { tracing::warn!(%agent, %req_path, %reason, "reminder file_path rejected; delivering inline"); - return format!( - "[reminder file_path '{req_path}' rejected: {reason}; delivering body inline]\n\n{message}" - ); + return inline_fallback(req_path, &format!("rejected: {reason}"), message); } }; - if let Some(parent) = host_path.parent() - && let Err(e) = std::fs::create_dir_all(parent) - { - tracing::warn!(%agent, path = %host_path.display(), error = ?e, "reminder file_path parent mkdir failed; delivering inline"); - return format!( - "[reminder file_path '{req_path}' parent dir create failed: {e}; delivering body inline]\n\n{message}" - ); + match write_payload(agent, &host_path, message) { + Ok(()) => { + let bytes = message.len(); + // debug! not info! — under load this would dominate the log. + tracing::debug!(%agent, path = %host_path.display(), bytes, "reminder body written to file"); + format!( + "reminder body persisted to `{req_path}` ({bytes} bytes); read with your filesystem tools" + ) + } + Err(reason) => { + tracing::warn!(%agent, path = %host_path.display(), %reason, "reminder file_path write failed; delivering inline"); + inline_fallback(req_path, &reason, message) + } } - if let Err(e) = std::fs::write(&host_path, message) { - tracing::warn!(%agent, path = %host_path.display(), error = ?e, "reminder file_path write failed; delivering inline"); - return format!( - "[reminder file_path '{req_path}' write failed: {e}; delivering body inline]\n\n{message}" - ); +} + +fn inline_fallback(req_path: &str, reason: &str, message: &str) -> String { + format!("[reminder file_path '{req_path}' {reason}; delivering body inline]\n\n{message}") +} + +/// Persist `message` to `host_path` with the symlink-escape defenses +/// described in the module docs. Returns `Ok(())` on success, or a +/// human-readable reason string on any failure (caller logs + +/// inline-falls-back). +fn write_payload(agent: &str, host_path: &Path, message: &str) -> Result<(), String> { + let Some(parent) = host_path.parent() else { + return Err("internal: host path has no parent".to_owned()); + }; + std::fs::create_dir_all(parent) + .map_err(|e| format!("parent dir create failed: {e}"))?; + // Resolve symlinks in the parent chain, then re-verify the + // canonical form still lives under the agent's host state root — + // catches `ln -s /etc state/escape` style attacks. + let parent_canonical = parent + .canonicalize() + .map_err(|e| format!("parent canonicalize failed: {e}"))?; + let agent_root = Coordinator::agent_notes_dir(agent) + .canonicalize() + .map_err(|e| format!("agent state root canonicalize failed: {e}"))?; + if !parent_canonical.starts_with(&agent_root) { + return Err(format!( + "symlink escape: canonical parent `{}` outside agent root `{}`", + parent_canonical.display(), + agent_root.display() + )); } - let bytes = message.len(); - tracing::info!(%agent, path = %host_path.display(), bytes, "reminder body written to file"); - format!("reminder body persisted to `{req_path}` ({bytes} bytes); read with your filesystem tools") + let basename = host_path + .file_name() + .ok_or_else(|| "missing basename".to_owned())?; + let target = parent_canonical.join(basename); + // O_NOFOLLOW on the final component refuses to open if the + // basename is itself an existing symlink. Combined with the + // canonicalize-parent check above, no symlink anywhere in the + // path can redirect the write. + let mut file = std::fs::OpenOptions::new() + .write(true) + .create(true) + .truncate(true) + .custom_flags(libc::O_NOFOLLOW) + .open(&target) + .map_err(|e| format!("open failed: {e}"))?; + file.write_all(message.as_bytes()) + .map_err(|e| format!("write failed: {e}"))?; + Ok(()) } /// Map an agent-visible container path to the matching host path, -/// validating that it lives under the agent's own state subtree and -/// doesn't try to traverse out via `..`. Returns the host `PathBuf` on -/// success, or a human-readable reason string on rejection. +/// validating that it lives under the agent's own state subtree, has +/// a non-empty relative tail, and doesn't try to traverse out via +/// `..`. Returns the host `PathBuf` on success, or a human-readable +/// reason string on rejection. fn resolve_host_path(agent: &str, req_path: &str) -> Result { let prefix = format!("/agents/{agent}/state/"); let Some(rel) = req_path.strip_prefix(&prefix) else { @@ -118,6 +176,9 @@ fn resolve_host_path(agent: &str, req_path: &str) -> Result { "must be absolute and under `{prefix}` (got `{req_path}`)" )); }; + if rel.is_empty() { + return Err("file_path must include a filename, not just the state dir".to_owned()); + } let rel_path = Path::new(rel); for comp in rel_path.components() { match comp { @@ -149,6 +210,15 @@ mod tests { assert!(resolve_host_path("foo", "/agents/foo/state/./x.md").is_err()); } + #[test] + fn rejects_empty_relative_tail() { + // Trailing slash → empty tail. Used to fall through to + // create_dir_all + write-to-dir → confusing inline fallback; + // explicit reject gives a cleaner log. + let err = resolve_host_path("foo", "/agents/foo/state/").unwrap_err(); + assert!(err.contains("must include a filename"), "got: {err}"); + } + #[test] fn accepts_well_formed_path() { let p = resolve_host_path("foo", "/agents/foo/state/reminders/123.md").unwrap();