reminder: fix symlink escape + db bloat cap + handler consistency

This commit is contained in:
damocles 2026-05-17 11:26:59 +02:00
parent 3da6912e73
commit 753409a5ef
4 changed files with 121 additions and 37 deletions

1
Cargo.lock generated
View file

@ -471,6 +471,7 @@ dependencies = [
"axum", "axum",
"clap", "clap",
"hive-sh4re", "hive-sh4re",
"libc",
"rusqlite", "rusqlite",
"serde", "serde",
"serde_json", "serde_json",

View file

@ -11,6 +11,7 @@ anyhow.workspace = true
axum.workspace = true axum.workspace = true
clap.workspace = true clap.workspace = true
hive-sh4re.workspace = true hive-sh4re.workspace = true
libc = "0.2"
rusqlite.workspace = true rusqlite.workspace = true
serde.workspace = true serde.workspace = true
serde_json.workspace = true serde_json.workspace = true

View file

@ -177,7 +177,7 @@ async fn dispatch(req: &AgentRequest, agent: &str, coord: &Arc<Coordinator>) ->
message, message,
timing, timing,
file_path, file_path,
} => handle_remind(broker, agent, message, timing, file_path.as_deref()), } => handle_remind(coord, agent, message, timing, file_path.as_deref()),
} }
} }
@ -214,7 +214,7 @@ fn handle_ask_operator(
} }
} }
/// Cap on the inline `message` byte length the Remind request accepts. /// Cap on the inline `message` byte length when no `file_path` is set.
/// Reminders land in the agent's inbox and feed the next wake prompt — a /// Reminders land in the agent's inbox and feed the next wake prompt — a
/// multi-kilobyte body bloats every subsequent turn's context. Anything /// multi-kilobyte body bloats every subsequent turn's context. Anything
/// bigger should be persisted to disk by the caller and pointed at via /// bigger should be persisted to disk by the caller and pointed at via
@ -222,19 +222,31 @@ fn handle_ask_operator(
/// than the full body). /// than the full body).
const REMIND_MESSAGE_MAX: usize = 4096; const REMIND_MESSAGE_MAX: usize = 4096;
/// Upper cap when `file_path` IS set. The body still lands in the
/// reminders sqlite row until delivery, so without an upper bound a
/// caller could DOS the broker DB with a single multi-megabyte
/// reminder. 64 KiB is generous for any reasonable payload + keeps a
/// single row small enough that sqlite won't choke.
const REMIND_MESSAGE_MAX_WITH_FILE: usize = 64 * 1024;
fn handle_remind( fn handle_remind(
broker: &crate::broker::Broker, coord: &Arc<Coordinator>,
agent: &str, agent: &str,
message: &str, message: &str,
timing: &hive_sh4re::ReminderTiming, timing: &hive_sh4re::ReminderTiming,
file_path: Option<&str>, file_path: Option<&str>,
) -> AgentResponse { ) -> AgentResponse {
if file_path.is_none() && message.len() > REMIND_MESSAGE_MAX { let (cap, hint) = match file_path {
None => (
REMIND_MESSAGE_MAX,
"; set `file_path` to persist a larger payload to a file instead",
),
Some(_) => (REMIND_MESSAGE_MAX_WITH_FILE, ""),
};
if message.len() > cap {
return AgentResponse::Err { return AgentResponse::Err {
message: format!( message: format!(
"reminder body too long ({} bytes, max {REMIND_MESSAGE_MAX}); write the \ "reminder body too long ({} bytes, max {cap}){hint}",
payload to a file under your /state/ dir and pass its path as \
`file_path` so the reminder delivers a pointer instead of the full body",
message.len() message.len()
), ),
}; };
@ -247,7 +259,7 @@ fn handle_remind(
}; };
} }
}; };
match broker.store_reminder(agent, message, file_path, due_at) { match coord.broker.store_reminder(agent, message, file_path, due_at) {
Ok(id) => { Ok(id) => {
tracing::info!(%id, %agent, %due_at, "reminder scheduled"); tracing::info!(%id, %agent, %due_at, "reminder scheduled");
AgentResponse::Ok AgentResponse::Ok

View file

@ -10,10 +10,18 @@
//! the host path (`/var/lib/hyperhive/agents/<agent>/state/foo.md`) //! the host path (`/var/lib/hyperhive/agents/<agent>/state/foo.md`)
//! so hive-c0re can write to it from outside the container. //! so hive-c0re can write to it from outside the container.
//! - Reject anything that isn't under the agent's own state subtree, //! - Reject anything that isn't under the agent's own state subtree,
//! or that contains `..` (path traversal). Falling outside the //! contains `..` (path traversal), or has an empty relative tail.
//! allowed prefix means the file write is skipped and the original //! Falling outside the allowed prefix means the file write is
//! message is delivered inline (with a noted warning) — the //! skipped and the original message is delivered inline (with a
//! reminder still fires, just without the payload split. //! noted warning) — the reminder still fires, just without the
//! payload split.
//! - Defend against symlink escape: after `create_dir_all`, the
//! parent dir is canonicalized and re-verified to live under the
//! agent's host state root. Then we open the final file with
//! `O_NOFOLLOW | O_CREAT | O_TRUNC` so an existing-symlink basename
//! can't redirect the write either. Without this an agent could
//! `ln -s /etc /agents/foo/state/escape` and bounce a write to an
//! arbitrary host path.
//! - Write the reminder body to disk and deliver a short pointer //! - Write the reminder body to disk and deliver a short pointer
//! message in its place, so the agent's inbox/wake-prompt stays //! message in its place, so the agent's inbox/wake-prompt stays
//! small and the bulky payload can be read out of band. //! small and the bulky payload can be read out of band.
@ -22,6 +30,8 @@
//! inside `Broker::deliver_reminder`; this module only computes the //! inside `Broker::deliver_reminder`; this module only computes the
//! body string before calling it. //! body string before calling it.
use std::io::Write;
use std::os::unix::fs::OpenOptionsExt;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::sync::Arc; use std::sync::Arc;
use std::time::Duration; use std::time::Duration;
@ -31,7 +41,9 @@ use crate::coordinator::Coordinator;
/// Per-tick cap on reminders delivered. Anything over this stays due /// Per-tick cap on reminders delivered. Anything over this stays due
/// in the table and gets picked up on the next tick — keeps a /// in the table and gets picked up on the next tick — keeps a
/// 10k-deep backlog from flooding the broker (or hogging the broker /// 10k-deep backlog from flooding the broker (or hogging the broker
/// mutex) in one shot. /// mutex) in one shot. 100/tick × 5s tick = sustained throughput cap
/// of ~20 reminders/sec; bump together if the loose-ends tracker
/// starts firing higher rates.
const REMINDER_BATCH_LIMIT: u64 = 100; const REMINDER_BATCH_LIMIT: u64 = 100;
/// Poll interval. Trade-off between latency on a freshly due reminder /// Poll interval. Trade-off between latency on a freshly due reminder
@ -72,9 +84,9 @@ fn tick(coord: &Arc<Coordinator>) {
/// Build the inbox body for a due reminder. When `file_path` is None /// Build the inbox body for a due reminder. When `file_path` is None
/// the body is the original message verbatim. When set, we attempt to /// the body is the original message verbatim. When set, we attempt to
/// persist the message body to the requested file and return a short /// persist the message body to the requested file and return a short
/// pointer string instead. Failures (bad prefix, write error, missing /// pointer string instead. Failures (bad prefix, symlink escape,
/// parent) fall back to inline delivery with a noted warning so the /// write error, missing parent) fall back to inline delivery with a
/// reminder still fires. /// noted warning so the reminder still fires.
fn prepare_body(agent: &str, message: &str, file_path: Option<&str>) -> String { fn prepare_body(agent: &str, message: &str, file_path: Option<&str>) -> String {
let Some(req_path) = file_path else { let Some(req_path) = file_path else {
return message.to_owned(); return message.to_owned();
@ -83,34 +95,80 @@ fn prepare_body(agent: &str, message: &str, file_path: Option<&str>) -> String {
Ok(p) => p, Ok(p) => p,
Err(reason) => { Err(reason) => {
tracing::warn!(%agent, %req_path, %reason, "reminder file_path rejected; delivering inline"); tracing::warn!(%agent, %req_path, %reason, "reminder file_path rejected; delivering inline");
return format!( return inline_fallback(req_path, &format!("rejected: {reason}"), message);
"[reminder file_path '{req_path}' rejected: {reason}; delivering body inline]\n\n{message}"
);
} }
}; };
if let Some(parent) = host_path.parent() match write_payload(agent, &host_path, message) {
&& let Err(e) = std::fs::create_dir_all(parent) Ok(()) => {
{ let bytes = message.len();
tracing::warn!(%agent, path = %host_path.display(), error = ?e, "reminder file_path parent mkdir failed; delivering inline"); // debug! not info! — under load this would dominate the log.
return format!( tracing::debug!(%agent, path = %host_path.display(), bytes, "reminder body written to file");
"[reminder file_path '{req_path}' parent dir create failed: {e}; delivering body inline]\n\n{message}" format!(
); "reminder body persisted to `{req_path}` ({bytes} bytes); read with your filesystem tools"
)
}
Err(reason) => {
tracing::warn!(%agent, path = %host_path.display(), %reason, "reminder file_path write failed; delivering inline");
inline_fallback(req_path, &reason, message)
}
} }
if let Err(e) = std::fs::write(&host_path, message) { }
tracing::warn!(%agent, path = %host_path.display(), error = ?e, "reminder file_path write failed; delivering inline");
return format!( fn inline_fallback(req_path: &str, reason: &str, message: &str) -> String {
"[reminder file_path '{req_path}' write failed: {e}; delivering body inline]\n\n{message}" format!("[reminder file_path '{req_path}' {reason}; delivering body inline]\n\n{message}")
); }
/// Persist `message` to `host_path` with the symlink-escape defenses
/// described in the module docs. Returns `Ok(())` on success, or a
/// human-readable reason string on any failure (caller logs +
/// inline-falls-back).
fn write_payload(agent: &str, host_path: &Path, message: &str) -> Result<(), String> {
let Some(parent) = host_path.parent() else {
return Err("internal: host path has no parent".to_owned());
};
std::fs::create_dir_all(parent)
.map_err(|e| format!("parent dir create failed: {e}"))?;
// Resolve symlinks in the parent chain, then re-verify the
// canonical form still lives under the agent's host state root —
// catches `ln -s /etc state/escape` style attacks.
let parent_canonical = parent
.canonicalize()
.map_err(|e| format!("parent canonicalize failed: {e}"))?;
let agent_root = Coordinator::agent_notes_dir(agent)
.canonicalize()
.map_err(|e| format!("agent state root canonicalize failed: {e}"))?;
if !parent_canonical.starts_with(&agent_root) {
return Err(format!(
"symlink escape: canonical parent `{}` outside agent root `{}`",
parent_canonical.display(),
agent_root.display()
));
} }
let bytes = message.len(); let basename = host_path
tracing::info!(%agent, path = %host_path.display(), bytes, "reminder body written to file"); .file_name()
format!("reminder body persisted to `{req_path}` ({bytes} bytes); read with your filesystem tools") .ok_or_else(|| "missing basename".to_owned())?;
let target = parent_canonical.join(basename);
// O_NOFOLLOW on the final component refuses to open if the
// basename is itself an existing symlink. Combined with the
// canonicalize-parent check above, no symlink anywhere in the
// path can redirect the write.
let mut file = std::fs::OpenOptions::new()
.write(true)
.create(true)
.truncate(true)
.custom_flags(libc::O_NOFOLLOW)
.open(&target)
.map_err(|e| format!("open failed: {e}"))?;
file.write_all(message.as_bytes())
.map_err(|e| format!("write failed: {e}"))?;
Ok(())
} }
/// Map an agent-visible container path to the matching host path, /// Map an agent-visible container path to the matching host path,
/// validating that it lives under the agent's own state subtree and /// validating that it lives under the agent's own state subtree, has
/// doesn't try to traverse out via `..`. Returns the host `PathBuf` on /// a non-empty relative tail, and doesn't try to traverse out via
/// success, or a human-readable reason string on rejection. /// `..`. Returns the host `PathBuf` on success, or a human-readable
/// reason string on rejection.
fn resolve_host_path(agent: &str, req_path: &str) -> Result<PathBuf, String> { fn resolve_host_path(agent: &str, req_path: &str) -> Result<PathBuf, String> {
let prefix = format!("/agents/{agent}/state/"); let prefix = format!("/agents/{agent}/state/");
let Some(rel) = req_path.strip_prefix(&prefix) else { let Some(rel) = req_path.strip_prefix(&prefix) else {
@ -118,6 +176,9 @@ fn resolve_host_path(agent: &str, req_path: &str) -> Result<PathBuf, String> {
"must be absolute and under `{prefix}` (got `{req_path}`)" "must be absolute and under `{prefix}` (got `{req_path}`)"
)); ));
}; };
if rel.is_empty() {
return Err("file_path must include a filename, not just the state dir".to_owned());
}
let rel_path = Path::new(rel); let rel_path = Path::new(rel);
for comp in rel_path.components() { for comp in rel_path.components() {
match comp { match comp {
@ -149,6 +210,15 @@ mod tests {
assert!(resolve_host_path("foo", "/agents/foo/state/./x.md").is_err()); assert!(resolve_host_path("foo", "/agents/foo/state/./x.md").is_err());
} }
#[test]
fn rejects_empty_relative_tail() {
// Trailing slash → empty tail. Used to fall through to
// create_dir_all + write-to-dir → confusing inline fallback;
// explicit reject gives a cleaner log.
let err = resolve_host_path("foo", "/agents/foo/state/").unwrap_err();
assert!(err.contains("must include a filename"), "got: {err}");
}
#[test] #[test]
fn accepts_well_formed_path() { fn accepts_well_formed_path() {
let p = resolve_host_path("foo", "/agents/foo/state/reminders/123.md").unwrap(); let p = resolve_host_path("foo", "/agents/foo/state/reminders/123.md").unwrap();