//! Runtime state + config shared between the host admin socket, the manager //! socket, and the per-agent sockets: the broker, configured `agent_flake`, //! and the map of registered agent sockets. use std::collections::HashMap; use std::path::{Path, PathBuf}; use std::sync::{Arc, Mutex}; use anyhow::{Context, Result}; use crate::agent_server::{self, AgentSocket}; use crate::approvals::Approvals; use crate::broker::Broker; use crate::operator_questions::OperatorQuestions; const AGENT_RUNTIME_ROOT: &str = "/run/hyperhive/agents"; const MANAGER_RUNTIME_ROOT: &str = "/run/hyperhive/manager"; /// Manager-editable per-agent config repos. Bind-mounted RW into the manager /// container as `/agents//`. Hive-c0re only writes to these on first /// spawn (initial commit); after that it's manager-only. const AGENT_STATE_ROOT: &str = "/var/lib/hyperhive/agents"; /// Hive-c0re-only authoritative per-agent config repos. Containers build from /// these. Manager has no filesystem access; the only way to update is via /// `request_apply_commit` + user approval. const APPLIED_STATE_ROOT: &str = "/var/lib/hyperhive/applied"; pub struct Coordinator { pub broker: Arc, pub approvals: Arc, pub questions: Arc, /// URL of the hyperhive flake (no fragment). Inlined into per-agent /// `flake.nix` files as `inputs.hyperhive.url`. pub hyperhive_flake: String, /// TCP port the host's hive-c0re dashboard listens on. Inlined into /// each per-agent flake so the agent's web UI can build the right /// rebuild-button URL pointing back at the dashboard. pub dashboard_port: u16, agents: Mutex>, /// Agents whose lifecycle action (currently just spawn) is in flight. /// Read by the dashboard to render a spinner; cleared when the action /// resolves (success or failure). transient: Mutex>, } /// Per-agent in-progress state that the dashboard surfaces between approve /// click and container ready. #[derive(Debug, Clone)] pub struct TransientState { pub kind: TransientKind, pub since: std::time::Instant, } #[derive(Debug, Clone, Copy)] pub enum TransientKind { /// `lifecycle::spawn` is running (nixos-container create + update + start). Spawning, } impl Coordinator { pub fn open(db_path: &Path, hyperhive_flake: String, dashboard_port: u16) -> Result { let broker = Broker::open(db_path).context("open broker")?; let approvals = Approvals::open(db_path).context("open approvals")?; let questions = OperatorQuestions::open(db_path).context("open operator_questions")?; Ok(Self { broker: Arc::new(broker), approvals: Arc::new(approvals), questions: Arc::new(questions), hyperhive_flake, dashboard_port, agents: Mutex::new(HashMap::new()), transient: Mutex::new(HashMap::new()), }) } pub fn register_agent(&self, name: &str) -> Result { // Idempotent: drop any existing listener so re-registration (e.g. on rebuild, // or after a hive-c0re restart cleared /run/hyperhive) gets a fresh socket. self.unregister_agent(name); let agent_dir = Self::agent_dir(name); std::fs::create_dir_all(&agent_dir) .with_context(|| format!("create agent dir {}", agent_dir.display()))?; let socket_path = Self::socket_path(name); let socket = agent_server::start(name, &socket_path, self.broker.clone())?; self.agents.lock().unwrap().insert(name.to_owned(), socket); Ok(agent_dir) } pub fn unregister_agent(&self, name: &str) { if let Some(socket) = self.agents.lock().unwrap().remove(name) { socket.handle.abort(); let _ = std::fs::remove_file(&socket.path); } } /// Mark an agent as in-progress (only one state per agent for now). pub fn set_transient(&self, name: &str, kind: TransientKind) { self.transient.lock().unwrap().insert( name.to_owned(), TransientState { kind, since: std::time::Instant::now(), }, ); } pub fn clear_transient(&self, name: &str) { self.transient.lock().unwrap().remove(name); } pub fn transient_snapshot(&self) -> HashMap { self.transient.lock().unwrap().clone() } /// Push a `HelperEvent` into the manager's inbox. Encoded as JSON in /// `Message::body`; sender = `SYSTEM_SENDER`. The manager harness /// recognises the sender and parses the body. Best-effort: a serde or /// broker error is logged but does not propagate. pub fn notify_manager(&self, event: &hive_sh4re::HelperEvent) { let body = match serde_json::to_string(event) { Ok(s) => s, Err(e) => { tracing::warn!(error = ?e, "failed to encode helper event"); return; } }; if let Err(e) = self.broker.send(&hive_sh4re::Message { from: hive_sh4re::SYSTEM_SENDER.to_owned(), to: hive_sh4re::MANAGER_AGENT.to_owned(), body, }) { tracing::warn!(error = ?e, "failed to push helper event to manager"); } } pub fn agent_dir(name: &str) -> PathBuf { PathBuf::from(format!("{AGENT_RUNTIME_ROOT}/{name}")) } pub fn socket_path(name: &str) -> PathBuf { Self::agent_dir(name).join("mcp.sock") } pub fn manager_dir() -> PathBuf { PathBuf::from(MANAGER_RUNTIME_ROOT) } pub fn manager_socket_path() -> PathBuf { Self::manager_dir().join("mcp.sock") } /// Ensure a runtime dir + (for sub-agents) per-agent socket exists. For /// the manager, `manager_server::start` owns the socket — just return /// the dir. For sub-agents this is `register_agent` (creates a fresh /// listener bound to `socket_path(name)`). Source directory of the /// `/run/hive/mcp.sock` bind that ends up in `set_nspawn_flags`. pub fn ensure_runtime(&self, name: &str) -> Result { if name == crate::lifecycle::MANAGER_NAME { let dir = Self::manager_dir(); std::fs::create_dir_all(&dir) .with_context(|| format!("create manager dir {}", dir.display()))?; return Ok(dir); } self.register_agent(name) } /// Per-agent state root (parent of `config/`, future `prompts/`, etc.). pub fn agent_state_root(name: &str) -> PathBuf { PathBuf::from(format!("{AGENT_STATE_ROOT}/{name}")) } /// Manager-editable proposed config repo. Bind-mounted into the manager /// container as `/agents//config/`. pub fn agent_proposed_dir(name: &str) -> PathBuf { Self::agent_state_root(name).join("config") } /// Per-agent Claude credentials dir. Bind-mounted RW into the agent /// container at `/root/.claude` so OAuth state survives container /// destroy/recreate. Each agent owns its own token lineage — sharing /// would break on the first refresh-token rotation. pub fn agent_claude_dir(name: &str) -> PathBuf { Self::agent_state_root(name).join("claude") } /// Per-agent durable knowledge dir. Bind-mounted RW into the agent /// container at `/state`. Survives destroy/recreate alongside the /// claude dir. Agents are told (via the system prompt) to write /// long-lived notes / scratch state here. pub fn agent_notes_dir(name: &str) -> PathBuf { Self::agent_state_root(name).join("state") } /// Authoritative applied config repo. Hive-c0re-only. pub fn agent_applied_dir(name: &str) -> PathBuf { PathBuf::from(format!("{APPLIED_STATE_ROOT}/{name}")) } }