350 lines
13 KiB
Rust
350 lines
13 KiB
Rust
//! Per-agent socket listener. Each socket file's existence on disk
|
|
//! authenticates the caller: connecting to `<.../agents/foo/mcp.sock>` means
|
|
//! you are `foo`.
|
|
|
|
use std::path::{Path, PathBuf};
|
|
use std::sync::Arc;
|
|
|
|
use anyhow::{Context, Result};
|
|
use hive_sh4re::{AgentRequest, AgentResponse, Message};
|
|
use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader};
|
|
use tokio::net::{UnixListener, UnixStream};
|
|
use tokio::task::JoinHandle;
|
|
|
|
use crate::coordinator::Coordinator;
|
|
|
|
pub struct AgentSocket {
|
|
pub path: PathBuf,
|
|
pub handle: JoinHandle<()>,
|
|
}
|
|
|
|
pub fn start(agent: &str, socket_path: &Path, coord: Arc<Coordinator>) -> Result<AgentSocket> {
|
|
let agent = agent.to_owned();
|
|
if let Some(parent) = socket_path.parent() {
|
|
std::fs::create_dir_all(parent)
|
|
.with_context(|| format!("create agent socket dir {}", parent.display()))?;
|
|
}
|
|
if socket_path.exists() {
|
|
std::fs::remove_file(socket_path).context("remove stale agent socket")?;
|
|
}
|
|
let listener = UnixListener::bind(socket_path)
|
|
.with_context(|| format!("bind agent socket {}", socket_path.display()))?;
|
|
tracing::info!(%agent, socket = %socket_path.display(), "agent socket listening");
|
|
|
|
let path = socket_path.to_path_buf();
|
|
let handle = tokio::spawn(async move {
|
|
loop {
|
|
match listener.accept().await {
|
|
Ok((stream, _)) => {
|
|
let agent = agent.clone();
|
|
let coord = coord.clone();
|
|
tokio::spawn(async move {
|
|
if let Err(e) = serve(stream, agent, coord).await {
|
|
tracing::warn!(error = ?e, "agent connection failed");
|
|
}
|
|
});
|
|
}
|
|
Err(e) => {
|
|
tracing::warn!(error = ?e, "agent listener accept failed; exiting");
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
});
|
|
Ok(AgentSocket { path, handle })
|
|
}
|
|
|
|
async fn serve(stream: UnixStream, agent: String, coord: Arc<Coordinator>) -> Result<()> {
|
|
let (read, mut write) = stream.into_split();
|
|
let mut reader = BufReader::new(read);
|
|
let mut line = String::new();
|
|
loop {
|
|
line.clear();
|
|
let n = reader.read_line(&mut line).await?;
|
|
if n == 0 {
|
|
return Ok(());
|
|
}
|
|
let resp = match serde_json::from_str::<AgentRequest>(line.trim()) {
|
|
Ok(req) => dispatch(&req, &agent, &coord).await,
|
|
Err(e) => AgentResponse::Err {
|
|
message: format!("parse error: {e}"),
|
|
},
|
|
};
|
|
let mut payload = serde_json::to_string(&resp)?;
|
|
payload.push('\n');
|
|
write.write_all(payload.as_bytes()).await?;
|
|
write.flush().await?;
|
|
}
|
|
}
|
|
|
|
/// Max long-poll window the caller can ask for; values above the
|
|
/// cap are clamped. 180s keeps us under typical TCP/proxy idle
|
|
/// limits while still letting agents park their turn until a
|
|
/// message arrives. Omitting `wait_seconds` (or passing `0`) means
|
|
/// "peek, don't wait" — claude can call recv whenever it wants a
|
|
/// cheap "is there anything pending?" check without blocking the
|
|
/// turn for 30 seconds. To actually park, the caller passes a
|
|
/// positive `wait_seconds`.
|
|
const RECV_LONG_POLL_MAX: std::time::Duration = std::time::Duration::from_secs(180);
|
|
|
|
fn recv_timeout(wait_seconds: Option<u64>) -> std::time::Duration {
|
|
match wait_seconds {
|
|
Some(s) => std::time::Duration::from_secs(s).min(RECV_LONG_POLL_MAX),
|
|
None => std::time::Duration::ZERO,
|
|
}
|
|
}
|
|
|
|
async fn dispatch(req: &AgentRequest, agent: &str, coord: &Arc<Coordinator>) -> AgentResponse {
|
|
let broker = &coord.broker;
|
|
match req {
|
|
AgentRequest::Send { to, body } => handle_send(coord, agent, to, body),
|
|
AgentRequest::Recv { wait_seconds } => match broker
|
|
.recv_blocking(agent, recv_timeout(*wait_seconds))
|
|
.await
|
|
{
|
|
Ok(Some(msg)) => AgentResponse::Message {
|
|
from: msg.from,
|
|
body: msg.body,
|
|
},
|
|
Ok(None) => AgentResponse::Empty,
|
|
Err(e) => AgentResponse::Err {
|
|
message: format!("{e:#}"),
|
|
},
|
|
},
|
|
AgentRequest::Status => match broker.count_pending(agent) {
|
|
Ok(unread) => AgentResponse::Status { unread },
|
|
Err(e) => AgentResponse::Err {
|
|
message: format!("{e:#}"),
|
|
},
|
|
},
|
|
AgentRequest::OperatorMsg { body } => match broker.send(&Message {
|
|
from: hive_sh4re::OPERATOR_RECIPIENT.to_owned(),
|
|
to: agent.to_owned(),
|
|
body: body.clone(),
|
|
}) {
|
|
Ok(()) => AgentResponse::Ok,
|
|
Err(e) => AgentResponse::Err {
|
|
message: format!("{e:#}"),
|
|
},
|
|
},
|
|
AgentRequest::Wake { from, body } => match broker.send(&Message {
|
|
from: from.clone(),
|
|
to: agent.to_owned(),
|
|
body: body.clone(),
|
|
}) {
|
|
Ok(()) => AgentResponse::Ok,
|
|
Err(e) => AgentResponse::Err {
|
|
message: format!("{e:#}"),
|
|
},
|
|
},
|
|
AgentRequest::Recent { limit } => match broker.recent_for(agent, *limit) {
|
|
Ok(rows) => AgentResponse::Recent { rows },
|
|
Err(e) => AgentResponse::Err {
|
|
message: format!("{e:#}"),
|
|
},
|
|
},
|
|
AgentRequest::Ask {
|
|
question,
|
|
options,
|
|
multi,
|
|
ttl_seconds,
|
|
to,
|
|
} => crate::questions::handle_ask(
|
|
coord,
|
|
agent,
|
|
question,
|
|
options,
|
|
*multi,
|
|
*ttl_seconds,
|
|
to.as_deref(),
|
|
)
|
|
.map_or_else(
|
|
|message| AgentResponse::Err { message },
|
|
|id| AgentResponse::QuestionQueued { id },
|
|
),
|
|
AgentRequest::Answer { id, answer } => crate::questions::handle_answer(
|
|
coord, agent, *id, answer,
|
|
)
|
|
.map_or_else(
|
|
|message| AgentResponse::Err { message },
|
|
|()| AgentResponse::Ok,
|
|
),
|
|
AgentRequest::Remind {
|
|
message,
|
|
timing,
|
|
file_path,
|
|
} => handle_remind(coord, agent, message, timing, file_path.as_deref()),
|
|
}
|
|
}
|
|
|
|
/// Common Send handler shared between dispatch arms. Applies the
|
|
/// 1 KiB body cap, then routes broadcast (`to == "*"`) vs unicast
|
|
/// through their respective broker calls. Pulled out of `dispatch`
|
|
/// to keep that function under the clippy too-many-lines limit; the
|
|
/// behaviour is identical to inlining.
|
|
fn handle_send(coord: &Arc<Coordinator>, agent: &str, to: &str, body: &str) -> AgentResponse {
|
|
if let Err(message) = crate::limits::check_size("send", body) {
|
|
return AgentResponse::Err { message };
|
|
}
|
|
if to == "*" {
|
|
let errors = coord.broadcast_send(agent, body);
|
|
return if errors.is_empty() {
|
|
AgentResponse::Ok
|
|
} else {
|
|
AgentResponse::Err {
|
|
message: format!("broadcast failed for agents: {}", errors.join(", ")),
|
|
}
|
|
};
|
|
}
|
|
match coord.broker.send(&Message {
|
|
from: agent.to_owned(),
|
|
to: to.to_owned(),
|
|
body: body.to_owned(),
|
|
}) {
|
|
Ok(()) => AgentResponse::Ok,
|
|
Err(e) => AgentResponse::Err {
|
|
message: format!("{e:#}"),
|
|
},
|
|
}
|
|
}
|
|
|
|
fn handle_remind(
|
|
coord: &Arc<Coordinator>,
|
|
agent: &str,
|
|
message: &str,
|
|
timing: &hive_sh4re::ReminderTiming,
|
|
file_path: Option<&str>,
|
|
) -> AgentResponse {
|
|
match store_remind(coord, agent, message, timing, file_path) {
|
|
Ok(()) => AgentResponse::Ok,
|
|
Err(message) => AgentResponse::Err { message },
|
|
}
|
|
}
|
|
|
|
/// Shared remind-storage path used by both the agent and the manager
|
|
/// dispatchers. Validates timing, applies the auto-file overflow
|
|
/// dance (see [`prepare_remind_storage`]), and writes the reminder
|
|
/// row. Returns `Ok(())` on success, or a caller-ready error string
|
|
/// the dispatcher wraps in `*Response::Err`.
|
|
pub(crate) fn store_remind(
|
|
coord: &Arc<Coordinator>,
|
|
agent: &str,
|
|
message: &str,
|
|
timing: &hive_sh4re::ReminderTiming,
|
|
file_path: Option<&str>,
|
|
) -> Result<(), String> {
|
|
let due_at = resolve_due_at(timing).map_err(|e| format!("invalid reminder timing: {e:#}"))?;
|
|
let (stored_message, stored_path) = prepare_remind_storage(agent, message, file_path)?;
|
|
let id = coord
|
|
.broker
|
|
.store_reminder(agent, &stored_message, stored_path.as_deref(), due_at)
|
|
.map_err(|e| format!("failed to store reminder: {e:#}"))?;
|
|
tracing::info!(%id, %agent, %due_at, "reminder scheduled");
|
|
Ok(())
|
|
}
|
|
|
|
/// Decide what we actually store in the reminders row, applying the
|
|
/// same byte cap as the rest of the wire protocol
|
|
/// ([`crate::limits::MESSAGE_MAX_BYTES`]). Three outcomes:
|
|
///
|
|
/// 1. Body within the cap → stored verbatim, with whatever `file_path`
|
|
/// the caller passed (None or Some). The scheduler honours
|
|
/// `file_path` at delivery time as before.
|
|
/// 2. Body over the cap, no caller `file_path` → auto-generate a path
|
|
/// under `/agents/<agent>/state/reminders/auto-<ts>.md`, write the
|
|
/// body to disk now, store a short pointer hint as the message and
|
|
/// clear `file_path` (so the scheduler doesn't re-write at
|
|
/// delivery and overwrite the body with the hint).
|
|
/// 3. Body over the cap, caller provided `file_path` → honour the
|
|
/// caller's path: write the body to it now, store the same hint
|
|
/// and clear `file_path` for the same reason as (2).
|
|
///
|
|
/// Returns `(stored_message, stored_file_path)` on success, or a
|
|
/// caller-ready error string on auto-save failure (which is the only
|
|
/// way a Remind request can be refused for size — the agent never has
|
|
/// to think about the cap).
|
|
fn prepare_remind_storage(
|
|
agent: &str,
|
|
message: &str,
|
|
file_path: Option<&str>,
|
|
) -> Result<(String, Option<String>), String> {
|
|
if message.len() <= crate::limits::MESSAGE_MAX_BYTES {
|
|
return Ok((message.to_owned(), file_path.map(str::to_owned)));
|
|
}
|
|
let req_path = match file_path {
|
|
Some(p) => p.to_owned(),
|
|
None => auto_reminder_path(agent),
|
|
};
|
|
let host_path = crate::reminder_scheduler::resolve_host_path(agent, &req_path)
|
|
.map_err(|reason| format!("auto-save path `{req_path}` rejected: {reason}"))?;
|
|
crate::reminder_scheduler::write_payload(agent, &host_path, message)
|
|
.map_err(|reason| format!("auto-save of large reminder body to `{req_path}` failed: {reason}"))?;
|
|
let hint = format!(
|
|
"[reminder body of {} bytes auto-saved to `{req_path}`; read with your filesystem tools]",
|
|
message.len()
|
|
);
|
|
Ok((hint, None))
|
|
}
|
|
|
|
/// Generate a per-agent path for an auto-saved reminder body. Uses
|
|
/// `unix_nanos` plus the agent name to keep collisions infinitesimal
|
|
/// across the agent's own state subtree (we're not stamping a hostname
|
|
/// since hive-c0re is single-host).
|
|
fn auto_reminder_path(agent: &str) -> String {
|
|
let ts_ns = std::time::SystemTime::now()
|
|
.duration_since(std::time::UNIX_EPOCH)
|
|
.map(|d| d.as_nanos())
|
|
.unwrap_or(0);
|
|
format!("/agents/{agent}/state/reminders/auto-{ts_ns}.md")
|
|
}
|
|
|
|
/// Resolve the `due_at` unix timestamp for a Remind request. Returns
|
|
/// distinct error messages for each failure mode (overflow on
|
|
/// `InSeconds`, pre-epoch clock, `i64` cast wrap) so the caller can tell
|
|
/// what went wrong without inspecting the chain.
|
|
fn resolve_due_at(timing: &hive_sh4re::ReminderTiming) -> anyhow::Result<i64> {
|
|
use hive_sh4re::ReminderTiming;
|
|
match timing {
|
|
ReminderTiming::InSeconds { seconds } => {
|
|
let now = std::time::SystemTime::now();
|
|
let future = now
|
|
.checked_add(std::time::Duration::from_secs(*seconds))
|
|
.ok_or_else(|| {
|
|
anyhow::anyhow!("InSeconds overflow: {seconds}s exceeds system time range")
|
|
})?;
|
|
let duration = future
|
|
.duration_since(std::time::UNIX_EPOCH)
|
|
.map_err(|e| anyhow::anyhow!("system time before UNIX_EPOCH: {e}"))?;
|
|
i64::try_from(duration.as_secs())
|
|
.map_err(|e| anyhow::anyhow!("unix timestamp exceeds i64 range: {e}"))
|
|
}
|
|
ReminderTiming::At { unix_timestamp } => Ok(*unix_timestamp),
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn auto_reminder_path_format() {
|
|
let p = auto_reminder_path("damocles");
|
|
assert!(p.starts_with("/agents/damocles/state/reminders/auto-"));
|
|
assert!(p.ends_with(".md"));
|
|
}
|
|
|
|
#[test]
|
|
fn prepare_remind_storage_passthrough_under_cap() {
|
|
let (msg, fp) = prepare_remind_storage("foo", "small body", None).unwrap();
|
|
assert_eq!(msg, "small body");
|
|
assert_eq!(fp, None);
|
|
}
|
|
|
|
#[test]
|
|
fn prepare_remind_storage_passthrough_with_caller_file_path() {
|
|
let (msg, fp) =
|
|
prepare_remind_storage("foo", "small", Some("/agents/foo/state/x.md")).unwrap();
|
|
assert_eq!(msg, "small");
|
|
assert_eq!(fp.as_deref(), Some("/agents/foo/state/x.md"));
|
|
}
|
|
}
|