DashboardEvent::QuestionAdded gains question_refs and QuestionResolved gains answer_refs — both populated via scan_validated_paths at emit time, same helper the broker forwarder uses for Sent/Delivered. cold-load snapshot wraps each OpQuestion in QuestionView with the same fields computed once per /api/state. client threads refs through questionsState rows (pending + history) and passes them to appendLinkified at every render site (live pane, history details). path tokens in question and answer bodies now linkify with the same server-vouched guarantee broker messages already enjoyed.
618 lines
25 KiB
Rust
618 lines
25 KiB
Rust
//! Runtime state + config shared between the host admin socket, the manager
|
|
//! socket, and the per-agent sockets: the broker, configured `agent_flake`,
|
|
//! and the map of registered agent sockets.
|
|
|
|
use std::collections::HashMap;
|
|
use std::path::{Path, PathBuf};
|
|
use std::sync::atomic::{AtomicU64, Ordering};
|
|
use std::sync::{Arc, Mutex};
|
|
|
|
use anyhow::{Context, Result};
|
|
use tokio::sync::broadcast;
|
|
|
|
use crate::agent_server::{self, AgentSocket};
|
|
use crate::approvals::Approvals;
|
|
use crate::broker::Broker;
|
|
use crate::container_view::{self, ContainerView};
|
|
use crate::dashboard_events::DashboardEvent;
|
|
use crate::operator_questions::OperatorQuestions;
|
|
|
|
/// Capacity of the dashboard event channel. Slow browser subscribers
|
|
/// (idle tab, throttled connection) drop frames past this — that's
|
|
/// fine, the seq dedupe makes a reconnect resync safe.
|
|
const DASHBOARD_CHANNEL: usize = 256;
|
|
|
|
const AGENT_RUNTIME_ROOT: &str = "/run/hyperhive/agents";
|
|
const MANAGER_RUNTIME_ROOT: &str = "/run/hyperhive/manager";
|
|
/// Manager-editable per-agent config repos. Bind-mounted RW into the manager
|
|
/// container as `/agents/<name>/`. Hive-c0re only writes to these on first
|
|
/// spawn (initial commit); after that it's manager-only.
|
|
const AGENT_STATE_ROOT: &str = "/var/lib/hyperhive/agents";
|
|
/// Hive-c0re-only authoritative per-agent config repos. Containers build from
|
|
/// these. Manager has no filesystem access; the only way to update is via
|
|
/// `request_apply_commit` + user approval.
|
|
const APPLIED_STATE_ROOT: &str = "/var/lib/hyperhive/applied";
|
|
|
|
pub struct Coordinator {
|
|
pub broker: Arc<Broker>,
|
|
pub approvals: Arc<Approvals>,
|
|
pub questions: Arc<OperatorQuestions>,
|
|
/// URL of the hyperhive flake (no fragment). Inlined into per-agent
|
|
/// `flake.nix` files as `inputs.hyperhive.url`.
|
|
pub hyperhive_flake: String,
|
|
/// TCP port the host's hive-c0re dashboard listens on. Inlined into
|
|
/// each per-agent flake so the agent's web UI can build the right
|
|
/// rebuild-button URL pointing back at the dashboard.
|
|
pub dashboard_port: u16,
|
|
/// Operator pronouns (free text) — `she/her` by default, set via
|
|
/// the NixOS module option `services.hive-c0re.operatorPronouns`.
|
|
/// Reaches each container as the `HIVE_OPERATOR_PRONOUNS` env var
|
|
/// (injected into systemd.services.<harness>.environment by the
|
|
/// meta flake); the harness substitutes it into the agent /
|
|
/// manager system prompt at boot.
|
|
pub operator_pronouns: String,
|
|
agents: Mutex<HashMap<String, AgentSocket>>,
|
|
/// Agents whose lifecycle action (currently just spawn) is in flight.
|
|
/// Read by the dashboard to render a spinner; cleared when the action
|
|
/// resolves (success or failure).
|
|
transient: Mutex<HashMap<String, TransientState>>,
|
|
/// Unified wire-facing event channel feeding the dashboard SSE
|
|
/// stream. Carries broker messages (mirrored from `broker.subscribe`
|
|
/// by the forwarder task in `main.rs`) and dashboard-only mutation
|
|
/// events (approval added/resolved, question added/answered, etc.).
|
|
/// Snapshot endpoints capture `event_seq` before reading state so
|
|
/// the client can dedupe its buffered live traffic against the
|
|
/// snapshot.
|
|
dashboard_events: broadcast::Sender<DashboardEvent>,
|
|
event_seq: AtomicU64,
|
|
/// Last container snapshot seen by `rescan_containers_and_emit`,
|
|
/// keyed by `ContainerView.name`. The rescan diffs a fresh
|
|
/// `container_view::build_all` against this map and emits one
|
|
/// `ContainerStateChanged` per added/changed row and one
|
|
/// `ContainerRemoved` per disappeared row. Async — guarded by a
|
|
/// tokio mutex so the rescan can `await` `lifecycle::list` /
|
|
/// `is_running` without blocking other coordinator paths.
|
|
last_containers: tokio::sync::Mutex<HashMap<String, ContainerView>>,
|
|
}
|
|
|
|
/// Per-agent in-progress state that the dashboard surfaces between approve
|
|
/// click and container ready.
|
|
#[derive(Debug, Clone)]
|
|
pub struct TransientState {
|
|
pub kind: TransientKind,
|
|
pub since: std::time::Instant,
|
|
}
|
|
|
|
/// RAII handle returned by `Coordinator::transient_guard`. Cleared on
|
|
/// drop — including drop-via-cancellation, the path that bare
|
|
/// `set_transient` / `clear_transient` pairs leaked through. Holds an
|
|
/// `Arc<Coordinator>` so the guard is freely returnable / movable.
|
|
pub struct TransientGuard {
|
|
coord: Arc<Coordinator>,
|
|
name: String,
|
|
}
|
|
|
|
impl Drop for TransientGuard {
|
|
fn drop(&mut self) {
|
|
self.coord.clear_transient(&self.name);
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone, Copy)]
|
|
pub enum TransientKind {
|
|
/// `lifecycle::spawn` is running (nixos-container create + update + start).
|
|
Spawning,
|
|
/// `lifecycle::start` is running.
|
|
Starting,
|
|
/// `lifecycle::kill` is running.
|
|
Stopping,
|
|
/// `lifecycle::restart` is running.
|
|
Restarting,
|
|
/// `lifecycle::rebuild` is running (nixos-container update).
|
|
Rebuilding,
|
|
/// `actions::destroy` is running.
|
|
Destroying,
|
|
}
|
|
|
|
impl TransientKind {
|
|
/// Wire/UI label. Matches the strings the dashboard already
|
|
/// renders in the transient spinner.
|
|
pub fn as_str(self) -> &'static str {
|
|
match self {
|
|
TransientKind::Spawning => "spawning",
|
|
TransientKind::Starting => "starting",
|
|
TransientKind::Stopping => "stopping",
|
|
TransientKind::Restarting => "restarting",
|
|
TransientKind::Rebuilding => "rebuilding",
|
|
TransientKind::Destroying => "destroying",
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Coordinator {
|
|
pub fn open(
|
|
db_path: &Path,
|
|
hyperhive_flake: String,
|
|
dashboard_port: u16,
|
|
operator_pronouns: String,
|
|
) -> Result<Self> {
|
|
let broker = Broker::open(db_path).context("open broker")?;
|
|
let approvals = Approvals::open(db_path).context("open approvals")?;
|
|
let questions = OperatorQuestions::open(db_path).context("open operator_questions")?;
|
|
let (dashboard_events, _) = broadcast::channel(DASHBOARD_CHANNEL);
|
|
Ok(Self {
|
|
broker: Arc::new(broker),
|
|
approvals: Arc::new(approvals),
|
|
questions: Arc::new(questions),
|
|
hyperhive_flake,
|
|
dashboard_port,
|
|
operator_pronouns,
|
|
agents: Mutex::new(HashMap::new()),
|
|
transient: Mutex::new(HashMap::new()),
|
|
dashboard_events,
|
|
event_seq: AtomicU64::new(0),
|
|
last_containers: tokio::sync::Mutex::new(HashMap::new()),
|
|
})
|
|
}
|
|
|
|
/// Subscribe to the unified dashboard event channel. Used by the
|
|
/// `/dashboard/stream` SSE handler and by the broker-to-dashboard
|
|
/// forwarder task.
|
|
pub fn dashboard_subscribe(&self) -> broadcast::Receiver<DashboardEvent> {
|
|
self.dashboard_events.subscribe()
|
|
}
|
|
|
|
/// Stamp the next sequence number. Each emission of a
|
|
/// `DashboardEvent` should fill its `seq` with `next_seq()` so the
|
|
/// frame the wire carries is the one the client uses to dedupe.
|
|
pub fn next_seq(&self) -> u64 {
|
|
self.event_seq.fetch_add(1, Ordering::SeqCst) + 1
|
|
}
|
|
|
|
/// Current high-water seq. Snapshot endpoints read this *before*
|
|
/// gathering state so the (snapshot.seq, snapshot) pair satisfies:
|
|
/// any frame with `seq > snapshot.seq` is post-snapshot. The seq
|
|
/// captured here may grow during snapshot construction — clients
|
|
/// may double-apply such events, which renderers must tolerate.
|
|
pub fn current_seq(&self) -> u64 {
|
|
self.event_seq.load(Ordering::SeqCst)
|
|
}
|
|
|
|
/// Broadcast a freshly-built `DashboardEvent` (caller fills `seq`
|
|
/// via `next_seq()`). Returns silently when there are no
|
|
/// subscribers — the dashboard channel is best-effort presentation
|
|
/// plumbing, not a delivery guarantee.
|
|
pub fn emit_dashboard_event(&self, event: DashboardEvent) {
|
|
let _ = self.dashboard_events.send(event);
|
|
}
|
|
|
|
/// Emit `ApprovalAdded` immediately after the row is inserted in
|
|
/// sqlite. Caller passes the diff text it already computed (or
|
|
/// `None` for spawn approvals which carry no diff).
|
|
pub fn emit_approval_added(
|
|
&self,
|
|
id: i64,
|
|
agent: &str,
|
|
approval_kind: &'static str,
|
|
sha_short: Option<String>,
|
|
diff: Option<String>,
|
|
description: Option<String>,
|
|
) {
|
|
self.emit_dashboard_event(DashboardEvent::ApprovalAdded {
|
|
seq: self.next_seq(),
|
|
id,
|
|
agent: agent.to_owned(),
|
|
approval_kind,
|
|
sha_short,
|
|
diff,
|
|
description,
|
|
});
|
|
}
|
|
|
|
/// Emit `ApprovalResolved` after `mark_approved` / `mark_denied` /
|
|
/// `mark_failed` lands. `resolved_at` is stamped from the system
|
|
/// clock here so call sites don't repeat the conversion; if you
|
|
/// already have an authoritative timestamp from the db update,
|
|
/// the tiny skew between "row updated" and "event emitted" is
|
|
/// presentation-only and doesn't matter to clients.
|
|
pub fn emit_approval_resolved(
|
|
&self,
|
|
id: i64,
|
|
agent: &str,
|
|
approval_kind: &'static str,
|
|
sha_short: Option<String>,
|
|
status: &'static str,
|
|
note: Option<String>,
|
|
description: Option<String>,
|
|
) {
|
|
let resolved_at = std::time::SystemTime::now()
|
|
.duration_since(std::time::UNIX_EPOCH)
|
|
.ok()
|
|
.and_then(|d| i64::try_from(d.as_secs()).ok())
|
|
.unwrap_or(0);
|
|
self.emit_dashboard_event(DashboardEvent::ApprovalResolved {
|
|
seq: self.next_seq(),
|
|
id,
|
|
agent: agent.to_owned(),
|
|
approval_kind,
|
|
sha_short,
|
|
status,
|
|
resolved_at,
|
|
note,
|
|
description,
|
|
});
|
|
}
|
|
|
|
/// Emit `QuestionAdded` after a question is inserted. Fires for
|
|
/// both operator-targeted (`target = None`) and peer-to-peer
|
|
/// (`target = Some(agent)`) threads — the dashboard surfaces
|
|
/// both, distinguishing visually + offering operator override.
|
|
pub fn emit_question_added(
|
|
&self,
|
|
id: i64,
|
|
asker: &str,
|
|
question: &str,
|
|
options: &[String],
|
|
multi: bool,
|
|
deadline_at: Option<i64>,
|
|
target: Option<&str>,
|
|
) {
|
|
let asked_at = std::time::SystemTime::now()
|
|
.duration_since(std::time::UNIX_EPOCH)
|
|
.ok()
|
|
.and_then(|d| i64::try_from(d.as_secs()).ok())
|
|
.unwrap_or(0);
|
|
let question_refs = crate::dashboard::scan_validated_paths(question);
|
|
self.emit_dashboard_event(DashboardEvent::QuestionAdded {
|
|
seq: self.next_seq(),
|
|
id,
|
|
asker: asker.to_owned(),
|
|
question: question.to_owned(),
|
|
options: options.to_vec(),
|
|
multi,
|
|
asked_at,
|
|
deadline_at,
|
|
target: target.map(str::to_owned),
|
|
question_refs,
|
|
});
|
|
}
|
|
|
|
/// Emit `QuestionResolved` when a question transitions to
|
|
/// answered (operator answer, peer answer, operator override on
|
|
/// a peer thread, operator cancel, or ttl watchdog). Both
|
|
/// operator-targeted and peer threads fire so the dashboard's
|
|
/// derived store can move the row from pending to history.
|
|
pub fn emit_question_resolved(
|
|
&self,
|
|
id: i64,
|
|
answer: &str,
|
|
answerer: &str,
|
|
cancelled: bool,
|
|
target: Option<&str>,
|
|
) {
|
|
let answered_at = std::time::SystemTime::now()
|
|
.duration_since(std::time::UNIX_EPOCH)
|
|
.ok()
|
|
.and_then(|d| i64::try_from(d.as_secs()).ok())
|
|
.unwrap_or(0);
|
|
let answer_refs = crate::dashboard::scan_validated_paths(answer);
|
|
self.emit_dashboard_event(DashboardEvent::QuestionResolved {
|
|
seq: self.next_seq(),
|
|
id,
|
|
answer: answer.to_owned(),
|
|
answerer: answerer.to_owned(),
|
|
answered_at,
|
|
cancelled,
|
|
target: target.map(str::to_owned),
|
|
answer_refs,
|
|
});
|
|
}
|
|
|
|
/// Rebuild the per-container snapshot, diff it against the last
|
|
/// one cached on `self`, and emit one
|
|
/// `DashboardEvent::ContainerStateChanged` per added/changed row
|
|
/// and one `DashboardEvent::ContainerRemoved` per disappeared row.
|
|
/// Call after any mutation that could affect what
|
|
/// `nixos-container list` returns or what a row's
|
|
/// `running` / `needs_update` / `needs_login` / `deployed_sha`
|
|
/// resolves to — lifecycle ops, destroy, approve (post-spawn),
|
|
/// rebuild, meta-update, and the crash-watcher's periodic poll.
|
|
/// Cheap when nothing changed (one `nixos-container list` + a
|
|
/// HashMap diff + zero emits).
|
|
pub async fn rescan_containers_and_emit(self: &Arc<Self>) {
|
|
let fresh = container_view::build_all(self).await;
|
|
let mut last = self.last_containers.lock().await;
|
|
let mut changed_or_new = Vec::new();
|
|
let mut removed = Vec::new();
|
|
// Diff into change vs. add.
|
|
for view in &fresh {
|
|
match last.get(&view.name) {
|
|
Some(prev) if prev == view => {} // unchanged
|
|
_ => changed_or_new.push(view.clone()),
|
|
}
|
|
}
|
|
// Anything in `last` but not in `fresh` is gone.
|
|
let fresh_names: std::collections::HashSet<&str> =
|
|
fresh.iter().map(|c| c.name.as_str()).collect();
|
|
for name in last.keys() {
|
|
if !fresh_names.contains(name.as_str()) {
|
|
removed.push(name.clone());
|
|
}
|
|
}
|
|
// Rebuild the cache from the fresh snapshot.
|
|
last.clear();
|
|
for c in fresh {
|
|
last.insert(c.name.clone(), c);
|
|
}
|
|
drop(last);
|
|
for c in changed_or_new {
|
|
self.emit_dashboard_event(DashboardEvent::ContainerStateChanged {
|
|
seq: self.next_seq(),
|
|
container: c,
|
|
});
|
|
}
|
|
for name in removed {
|
|
self.emit_dashboard_event(DashboardEvent::ContainerRemoved {
|
|
seq: self.next_seq(),
|
|
name,
|
|
});
|
|
}
|
|
}
|
|
|
|
/// Read-only snapshot of the last cached container view. Used by
|
|
/// `/api/state` to cold-load page-open clients without re-running
|
|
/// `nixos-container list` themselves; the
|
|
/// `rescan_containers_and_emit` calls keep this fresh.
|
|
pub async fn containers_snapshot(&self) -> Vec<ContainerView> {
|
|
let last = self.last_containers.lock().await;
|
|
let mut out: Vec<ContainerView> = last.values().cloned().collect();
|
|
out.sort_by(|a, b| a.name.cmp(&b.name));
|
|
out
|
|
}
|
|
|
|
pub fn register_agent(self: &Arc<Self>, name: &str) -> Result<PathBuf> {
|
|
// Idempotent: drop any existing listener so re-registration (e.g. on rebuild,
|
|
// or after a hive-c0re restart cleared /run/hyperhive) gets a fresh socket.
|
|
self.unregister_agent(name);
|
|
let agent_dir = Self::agent_dir(name);
|
|
std::fs::create_dir_all(&agent_dir)
|
|
.with_context(|| format!("create agent dir {}", agent_dir.display()))?;
|
|
let socket_path = Self::socket_path(name);
|
|
// Hand the full Coordinator to the per-agent socket — it
|
|
// needs broker + operator_questions to handle the agent-side
|
|
// `ask` / `answer` tools, not just the broker.
|
|
let socket = agent_server::start(name, &socket_path, self.clone())?;
|
|
self.agents.lock().unwrap().insert(name.to_owned(), socket);
|
|
Ok(agent_dir)
|
|
}
|
|
|
|
pub fn unregister_agent(&self, name: &str) {
|
|
if let Some(socket) = self.agents.lock().unwrap().remove(name) {
|
|
socket.handle.abort();
|
|
let _ = std::fs::remove_file(&socket.path);
|
|
}
|
|
}
|
|
pub fn list_agents(&self) -> Vec<String> {
|
|
self.agents.lock().unwrap().keys().cloned().collect()
|
|
}
|
|
|
|
/// Mark an agent as in-progress (only one state per agent for now).
|
|
///
|
|
/// Prefer `transient_guard` when possible — it auto-clears on drop
|
|
/// even if the surrounding future is cancelled (HTTP request
|
|
/// aborted, runtime shutdown mid-rebuild, panic between set and
|
|
/// clear). The bare `set_transient` / `clear_transient` pair leaks
|
|
/// the transient on any of those paths and the dashboard then
|
|
/// shows the agent stuck in "rebuilding…" forever.
|
|
pub fn set_transient(&self, name: &str, kind: TransientKind) {
|
|
self.transient.lock().unwrap().insert(
|
|
name.to_owned(),
|
|
TransientState {
|
|
kind,
|
|
since: std::time::Instant::now(),
|
|
},
|
|
);
|
|
// Live-update dashboards. `since_unix` is wall-clock so the
|
|
// browser can tick "Ns spawning…" without polling. The
|
|
// intra-process map keeps using `Instant` for monotonicity.
|
|
let since_unix = std::time::SystemTime::now()
|
|
.duration_since(std::time::UNIX_EPOCH)
|
|
.ok()
|
|
.and_then(|d| i64::try_from(d.as_secs()).ok())
|
|
.unwrap_or(0);
|
|
self.emit_dashboard_event(DashboardEvent::TransientSet {
|
|
seq: self.next_seq(),
|
|
name: name.to_owned(),
|
|
transient_kind: kind.as_str(),
|
|
since_unix,
|
|
});
|
|
}
|
|
|
|
pub fn clear_transient(&self, name: &str) {
|
|
let removed = self.transient.lock().unwrap().remove(name).is_some();
|
|
if removed {
|
|
self.emit_dashboard_event(DashboardEvent::TransientCleared {
|
|
seq: self.next_seq(),
|
|
name: name.to_owned(),
|
|
});
|
|
}
|
|
}
|
|
|
|
/// Set a transient state and return a guard that clears it on drop.
|
|
/// Use this from any path where the surrounding future could be
|
|
/// cancelled or panic between set and clear (HTTP handlers, spawned
|
|
/// tasks). The guard's `Drop` runs even on task cancellation, so
|
|
/// the dashboard's spinner can't get pinned forever.
|
|
pub fn transient_guard(self: &Arc<Self>, name: &str, kind: TransientKind) -> TransientGuard {
|
|
self.set_transient(name, kind);
|
|
TransientGuard {
|
|
coord: self.clone(),
|
|
name: name.to_owned(),
|
|
}
|
|
}
|
|
|
|
pub fn transient_snapshot(&self) -> HashMap<String, TransientState> {
|
|
self.transient.lock().unwrap().clone()
|
|
}
|
|
|
|
/// Drop a system message into the given agent's inbox. Wakes the
|
|
/// turn loop with a "you were just (re)started" hint — operator
|
|
/// caused the transition, agent picks up where it left off
|
|
/// (notes are in the bind-mounted state dir, last turn is in
|
|
/// --continue's session). Best-effort; broker errors are logged
|
|
/// but don't propagate.
|
|
pub fn kick_agent(&self, name: &str, reason: &str) {
|
|
// Sub-agents bind their state at /agents/<name>/state. The
|
|
// manager has both /state (legacy mount) and /agents
|
|
// bind-mounted, so /agents/<name>/state resolves there too —
|
|
// use that uniformly so the wake message has one canonical
|
|
// path that works everywhere.
|
|
let body = format!(
|
|
"{reason}\n\nYou were just (re)started by the operator. \
|
|
If you were mid-task, check `/agents/{name}/state/` for \
|
|
your notes and pick up where you left off. claude's \
|
|
`--continue` session is intact, so prior context is \
|
|
still in your window."
|
|
);
|
|
if let Err(e) = self.broker.send(&hive_sh4re::Message {
|
|
from: hive_sh4re::SYSTEM_SENDER.to_owned(),
|
|
to: name.to_owned(),
|
|
body,
|
|
}) {
|
|
tracing::warn!(error = ?e, %name, "kick_agent: broker.send failed");
|
|
}
|
|
}
|
|
|
|
/// Push a `HelperEvent` into the manager's inbox. Encoded as JSON in
|
|
/// `Message::body`; sender = `SYSTEM_SENDER`. The manager harness
|
|
/// recognises the sender and parses the body. Best-effort: a serde or
|
|
/// broker error is logged but does not propagate.
|
|
pub fn notify_manager(&self, event: &hive_sh4re::HelperEvent) {
|
|
self.notify_agent(hive_sh4re::MANAGER_AGENT, event);
|
|
}
|
|
|
|
/// Push a `HelperEvent` into an arbitrary agent's inbox. Encoded
|
|
/// the same way as `notify_manager` (sender = `SYSTEM_SENDER`,
|
|
/// body = JSON-encoded event). Used to route `QuestionAnswered`
|
|
/// events back to the agent that called `ask`, `QuestionAsked`
|
|
/// events to the target of a peer question, etc.
|
|
pub fn notify_agent(&self, agent: &str, event: &hive_sh4re::HelperEvent) {
|
|
let body = match serde_json::to_string(event) {
|
|
Ok(s) => s,
|
|
Err(e) => {
|
|
tracing::warn!(error = ?e, "failed to encode helper event");
|
|
return;
|
|
}
|
|
};
|
|
if let Err(e) = self.broker.send(&hive_sh4re::Message {
|
|
from: hive_sh4re::SYSTEM_SENDER.to_owned(),
|
|
to: agent.to_owned(),
|
|
body,
|
|
}) {
|
|
tracing::warn!(error = ?e, target = %agent, "failed to push helper event");
|
|
}
|
|
}
|
|
|
|
/// Deliver `body` to every currently-registered agent, appending the
|
|
/// standard broadcast hint. Returns a list of per-agent error strings
|
|
/// for any that failed (empty = all ok). The sender's own inbox is
|
|
/// included — the hint text tells agents to ignore if no action needed.
|
|
pub fn broadcast_send(&self, from: &str, body: &str) -> Vec<String> {
|
|
const HINT: &str =
|
|
"\n\n⚠️ _hint: this was a broadcast and may not need any action from you_";
|
|
let broadcast_body = format!("{body}{HINT}");
|
|
let mut errors = Vec::new();
|
|
for agent_name in self.list_agents() {
|
|
if let Err(e) = self.broker.send(&hive_sh4re::Message {
|
|
from: from.to_owned(),
|
|
to: agent_name.clone(),
|
|
body: broadcast_body.clone(),
|
|
}) {
|
|
errors.push(format!("{agent_name}: {e}"));
|
|
}
|
|
}
|
|
errors
|
|
}
|
|
|
|
pub fn agent_dir(name: &str) -> PathBuf {
|
|
PathBuf::from(format!("{AGENT_RUNTIME_ROOT}/{name}"))
|
|
}
|
|
|
|
pub fn socket_path(name: &str) -> PathBuf {
|
|
Self::agent_dir(name).join("mcp.sock")
|
|
}
|
|
|
|
pub fn manager_dir() -> PathBuf {
|
|
PathBuf::from(MANAGER_RUNTIME_ROOT)
|
|
}
|
|
|
|
pub fn manager_socket_path() -> PathBuf {
|
|
Self::manager_dir().join("mcp.sock")
|
|
}
|
|
|
|
/// Ensure a runtime dir + (for sub-agents) per-agent socket exists. For
|
|
/// the manager, `manager_server::start` owns the socket — just return
|
|
/// the dir. For sub-agents this is `register_agent` (creates a fresh
|
|
/// listener bound to `socket_path(name)`). Source directory of the
|
|
/// `/run/hive/mcp.sock` bind that ends up in `set_nspawn_flags`.
|
|
pub fn ensure_runtime(self: &Arc<Self>, name: &str) -> Result<PathBuf> {
|
|
if name == crate::lifecycle::MANAGER_NAME {
|
|
let dir = Self::manager_dir();
|
|
std::fs::create_dir_all(&dir)
|
|
.with_context(|| format!("create manager dir {}", dir.display()))?;
|
|
return Ok(dir);
|
|
}
|
|
self.register_agent(name)
|
|
}
|
|
|
|
/// Per-agent state root (parent of `config/`, future `prompts/`, etc.).
|
|
pub fn agent_state_root(name: &str) -> PathBuf {
|
|
PathBuf::from(format!("{AGENT_STATE_ROOT}/{name}"))
|
|
}
|
|
|
|
/// Manager-editable proposed config repo. Bind-mounted into the manager
|
|
/// container as `/agents/<name>/config/`.
|
|
pub fn agent_proposed_dir(name: &str) -> PathBuf {
|
|
Self::agent_state_root(name).join("config")
|
|
}
|
|
|
|
/// Per-agent Claude credentials dir. Bind-mounted RW into the agent
|
|
/// container at `/root/.claude` so OAuth state survives container
|
|
/// destroy/recreate. Each agent owns its own token lineage — sharing
|
|
/// would break on the first refresh-token rotation.
|
|
pub fn agent_claude_dir(name: &str) -> PathBuf {
|
|
Self::agent_state_root(name).join("claude")
|
|
}
|
|
|
|
/// Per-agent durable knowledge dir. Bind-mounted RW into the agent
|
|
/// container at `/state`. Survives destroy/recreate alongside the
|
|
/// claude dir. Agents are told (via the system prompt) to write
|
|
/// long-lived notes / scratch state here.
|
|
pub fn agent_notes_dir(name: &str) -> PathBuf {
|
|
Self::agent_state_root(name).join("state")
|
|
}
|
|
|
|
/// Authoritative applied config repo. Hive-c0re-only.
|
|
pub fn agent_applied_dir(name: &str) -> PathBuf {
|
|
PathBuf::from(format!("{APPLIED_STATE_ROOT}/{name}"))
|
|
}
|
|
|
|
/// Enumerate names that have a persistent state dir under
|
|
/// `/var/lib/hyperhive/agents/` (i.e. config / claude creds /
|
|
/// notes survive). Includes both currently-existing containers and
|
|
/// destroyed-but-kept tombstones; callers filter the latter by
|
|
/// subtracting `lifecycle::list()`.
|
|
#[must_use]
|
|
pub fn kept_state_names() -> Vec<String> {
|
|
let Ok(rd) = std::fs::read_dir(AGENT_STATE_ROOT) else {
|
|
return Vec::new();
|
|
};
|
|
let mut out: Vec<String> = rd
|
|
.flatten()
|
|
.filter(|e| e.file_type().is_ok_and(|t| t.is_dir()))
|
|
.filter_map(|e| e.file_name().into_string().ok())
|
|
.collect();
|
|
out.sort();
|
|
out
|
|
}
|
|
}
|