reminder: fix symlink escape + db bloat cap + handler consistency
This commit is contained in:
parent
3da6912e73
commit
753409a5ef
4 changed files with 121 additions and 37 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
|
@ -471,6 +471,7 @@ dependencies = [
|
||||||
"axum",
|
"axum",
|
||||||
"clap",
|
"clap",
|
||||||
"hive-sh4re",
|
"hive-sh4re",
|
||||||
|
"libc",
|
||||||
"rusqlite",
|
"rusqlite",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,7 @@ anyhow.workspace = true
|
||||||
axum.workspace = true
|
axum.workspace = true
|
||||||
clap.workspace = true
|
clap.workspace = true
|
||||||
hive-sh4re.workspace = true
|
hive-sh4re.workspace = true
|
||||||
|
libc = "0.2"
|
||||||
rusqlite.workspace = true
|
rusqlite.workspace = true
|
||||||
serde.workspace = true
|
serde.workspace = true
|
||||||
serde_json.workspace = true
|
serde_json.workspace = true
|
||||||
|
|
|
||||||
|
|
@ -177,7 +177,7 @@ async fn dispatch(req: &AgentRequest, agent: &str, coord: &Arc<Coordinator>) ->
|
||||||
message,
|
message,
|
||||||
timing,
|
timing,
|
||||||
file_path,
|
file_path,
|
||||||
} => handle_remind(broker, agent, message, timing, file_path.as_deref()),
|
} => handle_remind(coord, agent, message, timing, file_path.as_deref()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -214,7 +214,7 @@ fn handle_ask_operator(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Cap on the inline `message` byte length the Remind request accepts.
|
/// Cap on the inline `message` byte length when no `file_path` is set.
|
||||||
/// Reminders land in the agent's inbox and feed the next wake prompt — a
|
/// Reminders land in the agent's inbox and feed the next wake prompt — a
|
||||||
/// multi-kilobyte body bloats every subsequent turn's context. Anything
|
/// multi-kilobyte body bloats every subsequent turn's context. Anything
|
||||||
/// bigger should be persisted to disk by the caller and pointed at via
|
/// bigger should be persisted to disk by the caller and pointed at via
|
||||||
|
|
@ -222,19 +222,31 @@ fn handle_ask_operator(
|
||||||
/// than the full body).
|
/// than the full body).
|
||||||
const REMIND_MESSAGE_MAX: usize = 4096;
|
const REMIND_MESSAGE_MAX: usize = 4096;
|
||||||
|
|
||||||
|
/// Upper cap when `file_path` IS set. The body still lands in the
|
||||||
|
/// reminders sqlite row until delivery, so without an upper bound a
|
||||||
|
/// caller could DOS the broker DB with a single multi-megabyte
|
||||||
|
/// reminder. 64 KiB is generous for any reasonable payload + keeps a
|
||||||
|
/// single row small enough that sqlite won't choke.
|
||||||
|
const REMIND_MESSAGE_MAX_WITH_FILE: usize = 64 * 1024;
|
||||||
|
|
||||||
fn handle_remind(
|
fn handle_remind(
|
||||||
broker: &crate::broker::Broker,
|
coord: &Arc<Coordinator>,
|
||||||
agent: &str,
|
agent: &str,
|
||||||
message: &str,
|
message: &str,
|
||||||
timing: &hive_sh4re::ReminderTiming,
|
timing: &hive_sh4re::ReminderTiming,
|
||||||
file_path: Option<&str>,
|
file_path: Option<&str>,
|
||||||
) -> AgentResponse {
|
) -> AgentResponse {
|
||||||
if file_path.is_none() && message.len() > REMIND_MESSAGE_MAX {
|
let (cap, hint) = match file_path {
|
||||||
|
None => (
|
||||||
|
REMIND_MESSAGE_MAX,
|
||||||
|
"; set `file_path` to persist a larger payload to a file instead",
|
||||||
|
),
|
||||||
|
Some(_) => (REMIND_MESSAGE_MAX_WITH_FILE, ""),
|
||||||
|
};
|
||||||
|
if message.len() > cap {
|
||||||
return AgentResponse::Err {
|
return AgentResponse::Err {
|
||||||
message: format!(
|
message: format!(
|
||||||
"reminder body too long ({} bytes, max {REMIND_MESSAGE_MAX}); write the \
|
"reminder body too long ({} bytes, max {cap}){hint}",
|
||||||
payload to a file under your /state/ dir and pass its path as \
|
|
||||||
`file_path` so the reminder delivers a pointer instead of the full body",
|
|
||||||
message.len()
|
message.len()
|
||||||
),
|
),
|
||||||
};
|
};
|
||||||
|
|
@ -247,7 +259,7 @@ fn handle_remind(
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
match broker.store_reminder(agent, message, file_path, due_at) {
|
match coord.broker.store_reminder(agent, message, file_path, due_at) {
|
||||||
Ok(id) => {
|
Ok(id) => {
|
||||||
tracing::info!(%id, %agent, %due_at, "reminder scheduled");
|
tracing::info!(%id, %agent, %due_at, "reminder scheduled");
|
||||||
AgentResponse::Ok
|
AgentResponse::Ok
|
||||||
|
|
|
||||||
|
|
@ -10,10 +10,18 @@
|
||||||
//! the host path (`/var/lib/hyperhive/agents/<agent>/state/foo.md`)
|
//! the host path (`/var/lib/hyperhive/agents/<agent>/state/foo.md`)
|
||||||
//! so hive-c0re can write to it from outside the container.
|
//! so hive-c0re can write to it from outside the container.
|
||||||
//! - Reject anything that isn't under the agent's own state subtree,
|
//! - Reject anything that isn't under the agent's own state subtree,
|
||||||
//! or that contains `..` (path traversal). Falling outside the
|
//! contains `..` (path traversal), or has an empty relative tail.
|
||||||
//! allowed prefix means the file write is skipped and the original
|
//! Falling outside the allowed prefix means the file write is
|
||||||
//! message is delivered inline (with a noted warning) — the
|
//! skipped and the original message is delivered inline (with a
|
||||||
//! reminder still fires, just without the payload split.
|
//! noted warning) — the reminder still fires, just without the
|
||||||
|
//! payload split.
|
||||||
|
//! - Defend against symlink escape: after `create_dir_all`, the
|
||||||
|
//! parent dir is canonicalized and re-verified to live under the
|
||||||
|
//! agent's host state root. Then we open the final file with
|
||||||
|
//! `O_NOFOLLOW | O_CREAT | O_TRUNC` so an existing-symlink basename
|
||||||
|
//! can't redirect the write either. Without this an agent could
|
||||||
|
//! `ln -s /etc /agents/foo/state/escape` and bounce a write to an
|
||||||
|
//! arbitrary host path.
|
||||||
//! - Write the reminder body to disk and deliver a short pointer
|
//! - Write the reminder body to disk and deliver a short pointer
|
||||||
//! message in its place, so the agent's inbox/wake-prompt stays
|
//! message in its place, so the agent's inbox/wake-prompt stays
|
||||||
//! small and the bulky payload can be read out of band.
|
//! small and the bulky payload can be read out of band.
|
||||||
|
|
@ -22,6 +30,8 @@
|
||||||
//! inside `Broker::deliver_reminder`; this module only computes the
|
//! inside `Broker::deliver_reminder`; this module only computes the
|
||||||
//! body string before calling it.
|
//! body string before calling it.
|
||||||
|
|
||||||
|
use std::io::Write;
|
||||||
|
use std::os::unix::fs::OpenOptionsExt;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
@ -31,7 +41,9 @@ use crate::coordinator::Coordinator;
|
||||||
/// Per-tick cap on reminders delivered. Anything over this stays due
|
/// Per-tick cap on reminders delivered. Anything over this stays due
|
||||||
/// in the table and gets picked up on the next tick — keeps a
|
/// in the table and gets picked up on the next tick — keeps a
|
||||||
/// 10k-deep backlog from flooding the broker (or hogging the broker
|
/// 10k-deep backlog from flooding the broker (or hogging the broker
|
||||||
/// mutex) in one shot.
|
/// mutex) in one shot. 100/tick × 5s tick = sustained throughput cap
|
||||||
|
/// of ~20 reminders/sec; bump together if the loose-ends tracker
|
||||||
|
/// starts firing higher rates.
|
||||||
const REMINDER_BATCH_LIMIT: u64 = 100;
|
const REMINDER_BATCH_LIMIT: u64 = 100;
|
||||||
|
|
||||||
/// Poll interval. Trade-off between latency on a freshly due reminder
|
/// Poll interval. Trade-off between latency on a freshly due reminder
|
||||||
|
|
@ -72,9 +84,9 @@ fn tick(coord: &Arc<Coordinator>) {
|
||||||
/// Build the inbox body for a due reminder. When `file_path` is None
|
/// Build the inbox body for a due reminder. When `file_path` is None
|
||||||
/// the body is the original message verbatim. When set, we attempt to
|
/// the body is the original message verbatim. When set, we attempt to
|
||||||
/// persist the message body to the requested file and return a short
|
/// persist the message body to the requested file and return a short
|
||||||
/// pointer string instead. Failures (bad prefix, write error, missing
|
/// pointer string instead. Failures (bad prefix, symlink escape,
|
||||||
/// parent) fall back to inline delivery with a noted warning so the
|
/// write error, missing parent) fall back to inline delivery with a
|
||||||
/// reminder still fires.
|
/// noted warning so the reminder still fires.
|
||||||
fn prepare_body(agent: &str, message: &str, file_path: Option<&str>) -> String {
|
fn prepare_body(agent: &str, message: &str, file_path: Option<&str>) -> String {
|
||||||
let Some(req_path) = file_path else {
|
let Some(req_path) = file_path else {
|
||||||
return message.to_owned();
|
return message.to_owned();
|
||||||
|
|
@ -83,34 +95,80 @@ fn prepare_body(agent: &str, message: &str, file_path: Option<&str>) -> String {
|
||||||
Ok(p) => p,
|
Ok(p) => p,
|
||||||
Err(reason) => {
|
Err(reason) => {
|
||||||
tracing::warn!(%agent, %req_path, %reason, "reminder file_path rejected; delivering inline");
|
tracing::warn!(%agent, %req_path, %reason, "reminder file_path rejected; delivering inline");
|
||||||
return format!(
|
return inline_fallback(req_path, &format!("rejected: {reason}"), message);
|
||||||
"[reminder file_path '{req_path}' rejected: {reason}; delivering body inline]\n\n{message}"
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
if let Some(parent) = host_path.parent()
|
match write_payload(agent, &host_path, message) {
|
||||||
&& let Err(e) = std::fs::create_dir_all(parent)
|
Ok(()) => {
|
||||||
{
|
|
||||||
tracing::warn!(%agent, path = %host_path.display(), error = ?e, "reminder file_path parent mkdir failed; delivering inline");
|
|
||||||
return format!(
|
|
||||||
"[reminder file_path '{req_path}' parent dir create failed: {e}; delivering body inline]\n\n{message}"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
if let Err(e) = std::fs::write(&host_path, message) {
|
|
||||||
tracing::warn!(%agent, path = %host_path.display(), error = ?e, "reminder file_path write failed; delivering inline");
|
|
||||||
return format!(
|
|
||||||
"[reminder file_path '{req_path}' write failed: {e}; delivering body inline]\n\n{message}"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
let bytes = message.len();
|
let bytes = message.len();
|
||||||
tracing::info!(%agent, path = %host_path.display(), bytes, "reminder body written to file");
|
// debug! not info! — under load this would dominate the log.
|
||||||
format!("reminder body persisted to `{req_path}` ({bytes} bytes); read with your filesystem tools")
|
tracing::debug!(%agent, path = %host_path.display(), bytes, "reminder body written to file");
|
||||||
|
format!(
|
||||||
|
"reminder body persisted to `{req_path}` ({bytes} bytes); read with your filesystem tools"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
Err(reason) => {
|
||||||
|
tracing::warn!(%agent, path = %host_path.display(), %reason, "reminder file_path write failed; delivering inline");
|
||||||
|
inline_fallback(req_path, &reason, message)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn inline_fallback(req_path: &str, reason: &str, message: &str) -> String {
|
||||||
|
format!("[reminder file_path '{req_path}' {reason}; delivering body inline]\n\n{message}")
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Persist `message` to `host_path` with the symlink-escape defenses
|
||||||
|
/// described in the module docs. Returns `Ok(())` on success, or a
|
||||||
|
/// human-readable reason string on any failure (caller logs +
|
||||||
|
/// inline-falls-back).
|
||||||
|
fn write_payload(agent: &str, host_path: &Path, message: &str) -> Result<(), String> {
|
||||||
|
let Some(parent) = host_path.parent() else {
|
||||||
|
return Err("internal: host path has no parent".to_owned());
|
||||||
|
};
|
||||||
|
std::fs::create_dir_all(parent)
|
||||||
|
.map_err(|e| format!("parent dir create failed: {e}"))?;
|
||||||
|
// Resolve symlinks in the parent chain, then re-verify the
|
||||||
|
// canonical form still lives under the agent's host state root —
|
||||||
|
// catches `ln -s /etc state/escape` style attacks.
|
||||||
|
let parent_canonical = parent
|
||||||
|
.canonicalize()
|
||||||
|
.map_err(|e| format!("parent canonicalize failed: {e}"))?;
|
||||||
|
let agent_root = Coordinator::agent_notes_dir(agent)
|
||||||
|
.canonicalize()
|
||||||
|
.map_err(|e| format!("agent state root canonicalize failed: {e}"))?;
|
||||||
|
if !parent_canonical.starts_with(&agent_root) {
|
||||||
|
return Err(format!(
|
||||||
|
"symlink escape: canonical parent `{}` outside agent root `{}`",
|
||||||
|
parent_canonical.display(),
|
||||||
|
agent_root.display()
|
||||||
|
));
|
||||||
|
}
|
||||||
|
let basename = host_path
|
||||||
|
.file_name()
|
||||||
|
.ok_or_else(|| "missing basename".to_owned())?;
|
||||||
|
let target = parent_canonical.join(basename);
|
||||||
|
// O_NOFOLLOW on the final component refuses to open if the
|
||||||
|
// basename is itself an existing symlink. Combined with the
|
||||||
|
// canonicalize-parent check above, no symlink anywhere in the
|
||||||
|
// path can redirect the write.
|
||||||
|
let mut file = std::fs::OpenOptions::new()
|
||||||
|
.write(true)
|
||||||
|
.create(true)
|
||||||
|
.truncate(true)
|
||||||
|
.custom_flags(libc::O_NOFOLLOW)
|
||||||
|
.open(&target)
|
||||||
|
.map_err(|e| format!("open failed: {e}"))?;
|
||||||
|
file.write_all(message.as_bytes())
|
||||||
|
.map_err(|e| format!("write failed: {e}"))?;
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Map an agent-visible container path to the matching host path,
|
/// Map an agent-visible container path to the matching host path,
|
||||||
/// validating that it lives under the agent's own state subtree and
|
/// validating that it lives under the agent's own state subtree, has
|
||||||
/// doesn't try to traverse out via `..`. Returns the host `PathBuf` on
|
/// a non-empty relative tail, and doesn't try to traverse out via
|
||||||
/// success, or a human-readable reason string on rejection.
|
/// `..`. Returns the host `PathBuf` on success, or a human-readable
|
||||||
|
/// reason string on rejection.
|
||||||
fn resolve_host_path(agent: &str, req_path: &str) -> Result<PathBuf, String> {
|
fn resolve_host_path(agent: &str, req_path: &str) -> Result<PathBuf, String> {
|
||||||
let prefix = format!("/agents/{agent}/state/");
|
let prefix = format!("/agents/{agent}/state/");
|
||||||
let Some(rel) = req_path.strip_prefix(&prefix) else {
|
let Some(rel) = req_path.strip_prefix(&prefix) else {
|
||||||
|
|
@ -118,6 +176,9 @@ fn resolve_host_path(agent: &str, req_path: &str) -> Result<PathBuf, String> {
|
||||||
"must be absolute and under `{prefix}` (got `{req_path}`)"
|
"must be absolute and under `{prefix}` (got `{req_path}`)"
|
||||||
));
|
));
|
||||||
};
|
};
|
||||||
|
if rel.is_empty() {
|
||||||
|
return Err("file_path must include a filename, not just the state dir".to_owned());
|
||||||
|
}
|
||||||
let rel_path = Path::new(rel);
|
let rel_path = Path::new(rel);
|
||||||
for comp in rel_path.components() {
|
for comp in rel_path.components() {
|
||||||
match comp {
|
match comp {
|
||||||
|
|
@ -149,6 +210,15 @@ mod tests {
|
||||||
assert!(resolve_host_path("foo", "/agents/foo/state/./x.md").is_err());
|
assert!(resolve_host_path("foo", "/agents/foo/state/./x.md").is_err());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn rejects_empty_relative_tail() {
|
||||||
|
// Trailing slash → empty tail. Used to fall through to
|
||||||
|
// create_dir_all + write-to-dir → confusing inline fallback;
|
||||||
|
// explicit reject gives a cleaner log.
|
||||||
|
let err = resolve_host_path("foo", "/agents/foo/state/").unwrap_err();
|
||||||
|
assert!(err.contains("must include a filename"), "got: {err}");
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn accepts_well_formed_path() {
|
fn accepts_well_formed_path() {
|
||||||
let p = resolve_host_path("foo", "/agents/foo/state/reminders/123.md").unwrap();
|
let p = resolve_host_path("foo", "/agents/foo/state/reminders/123.md").unwrap();
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue