hyperhive/hive-c0re/src/main.rs

239 lines
9.7 KiB
Rust

use std::path::PathBuf;
use std::sync::Arc;
use anyhow::{Result, bail};
use clap::{Parser, Subcommand};
use hive_sh4re::{HostRequest, HostResponse};
mod actions;
mod agent_server;
mod approvals;
mod auto_update;
mod broker;
mod client;
mod coordinator;
mod crash_watch;
mod dashboard;
mod events_vacuum;
mod lifecycle;
mod manager_server;
mod meta;
mod migrate;
mod operator_questions;
mod server;
use coordinator::Coordinator;
#[derive(Parser)]
#[command(name = "hive-c0re", about = "hyperhive coordinator daemon and CLI")]
struct Cli {
/// Path to the host admin socket.
#[arg(long, global = true, default_value = "/run/hyperhive/host.sock")]
socket: PathBuf,
#[command(subcommand)]
cmd: Cmd,
}
#[derive(Subcommand)]
enum Cmd {
/// Run the coordinator daemon.
Serve {
/// URL of the hyperhive flake. Inlined into each per-agent
/// `flake.nix` as the `hyperhive` input.
#[arg(long, default_value = "/etc/hyperhive")]
hyperhive_flake: String,
/// Path to the sqlite message store.
#[arg(long, default_value = "/var/lib/hyperhive/broker.sqlite")]
db: PathBuf,
/// Dashboard HTTP port.
#[arg(long, default_value_t = 7000)]
dashboard_port: u16,
/// Operator pronouns (free text). Threaded into each
/// container's harness via `HIVE_OPERATOR_PRONOUNS` so the
/// system prompt can mention them. Default: `she/her`.
#[arg(long, default_value = "she/her")]
operator_pronouns: String,
},
/// Spawn a new agent container directly (`hive-agent-<name>`). Bypasses
/// the approval queue — use only as an operator on the host. For
/// approval-gated spawns, use `request-spawn` instead.
Spawn { name: String },
/// Queue a spawn request as an approval. The container is created on
/// `approve <id>` (CLI) or the dashboard's APPR0VE button.
RequestSpawn { name: String },
/// Stop a managed container (graceful).
Kill { name: String },
/// Tear down a sub-agent container. Container is removed; persistent
/// state (config repos + Claude credentials) is kept by default. Pass
/// `--purge` to also wipe the agent's state dirs (config + creds +
/// notes). No undo.
Destroy {
name: String,
#[arg(long)]
purge: bool,
},
/// Apply pending config to a managed container.
Rebuild { name: String },
/// List managed containers.
List,
/// List pending approval requests submitted by the manager.
Pending,
/// Approve a pending request by id; the action runs immediately.
Approve { id: i64 },
/// Deny a pending request by id.
Deny { id: i64 },
}
#[tokio::main]
async fn main() -> Result<()> {
tracing_subscriber::fmt()
.with_env_filter(
tracing_subscriber::EnvFilter::try_from_default_env()
.unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info")),
)
.init();
let cli = Cli::parse();
match cli.cmd {
Cmd::Serve {
hyperhive_flake,
db,
dashboard_port,
operator_pronouns,
} => {
let coord = Arc::new(Coordinator::open(
&db,
hyperhive_flake,
dashboard_port,
operator_pronouns,
)?);
manager_server::start(coord.clone())?;
// Idempotent pre-flight: rewrite pre-meta-layout applied
// repos, ensure proposed repos carry the `applied`
// remote, bootstrap the meta repo, repoint containers at
// `meta#<name>` (one-shot, guarded by a marker file).
// Runs before manager auto-spawn so the new manager is
// built against meta from the first attempt.
if let Err(e) = migrate::run(&coord).await {
tracing::warn!(error = ?e, "startup migration failed");
}
// Auto-create the manager container if it isn't there yet. Block
// on this — without hm1nd the system has no manager harness.
// Failures are logged but allowed: a broken auto-spawn shouldn't
// make the dashboard unreachable for debugging.
if let Err(e) = auto_update::ensure_manager(&coord).await {
tracing::warn!(error = ?e, "auto-spawn manager failed");
}
// Auto-update in the background — don't block service start.
// Sub-agent rebuilds can take tens of seconds; we want the admin
// socket up immediately.
let update_coord = coord.clone();
tokio::spawn(async move {
if let Err(e) = auto_update::run(update_coord).await {
tracing::warn!(error = ?e, "auto-update task failed");
}
});
// Periodic broker vacuum: drop delivered messages older than
// 30 days. Undelivered messages are always kept (still in
// flight). Runs hourly; first sweep happens immediately.
let vacuum_coord = coord.clone();
tokio::spawn(async move {
let interval_secs = 3600u64;
let keep_secs: i64 = 30 * 24 * 3600;
loop {
match vacuum_coord.broker.vacuum_delivered(keep_secs) {
Ok(0) => {}
Ok(n) => tracing::info!(removed = n, "broker vacuum"),
Err(e) => tracing::warn!(error = ?e, "broker vacuum failed"),
}
tokio::time::sleep(std::time::Duration::from_secs(interval_secs)).await;
}
});
// Per-agent events.sqlite vacuum: host-side so the harness
// doesn't need any retention wiring of its own.
events_vacuum::spawn(coord.clone());
// Container crash watcher: emits HelperEvent::ContainerCrash
// when a previously-running container goes away without an
// operator-initiated transient state.
crash_watch::spawn(coord.clone());
// Reminder scheduler: checks for due reminders every 5 seconds,
// delivers them as inbox messages from "reminder".
let reminder_coord = coord.clone();
tokio::spawn(async move {
use hive_sh4re::Message;
loop {
// Query all due reminders in a single DB call
match reminder_coord.broker.get_all_due_reminders() {
Ok(reminders) => {
for (agent, id, message, _file_path) in reminders {
// Deliver as inbox message from "reminder"
if let Err(e) = reminder_coord.broker.send(&Message {
from: "reminder".to_owned(),
to: agent.clone(),
body: message.clone(),
}) {
tracing::warn!(
reminder_id = id,
%agent,
error = ?e,
"failed to deliver reminder"
);
continue;
}
// Mark as sent
if let Err(e) = reminder_coord.broker.mark_reminder_sent(id) {
tracing::warn!(
reminder_id = id,
error = ?e,
"failed to mark reminder sent"
);
}
}
}
Err(e) => {
tracing::warn!(error = ?e, "failed to query due reminders");
}
}
tokio::time::sleep(std::time::Duration::from_secs(5)).await;
}
});
let dash_coord = coord.clone();
tokio::spawn(async move {
if let Err(e) = dashboard::serve(dashboard_port, dash_coord).await {
tracing::error!(error = ?e, "dashboard failed");
}
});
server::serve(&cli.socket, coord).await
}
Cmd::Spawn { name } => {
render(client::request(&cli.socket, HostRequest::Spawn { name }).await?)
}
Cmd::RequestSpawn { name } => {
render(client::request(&cli.socket, HostRequest::RequestSpawn { name }).await?)
}
Cmd::Kill { name } => {
render(client::request(&cli.socket, HostRequest::Kill { name }).await?)
}
Cmd::Destroy { name, purge } => {
render(client::request(&cli.socket, HostRequest::Destroy { name, purge }).await?)
}
Cmd::Rebuild { name } => {
render(client::request(&cli.socket, HostRequest::Rebuild { name }).await?)
}
Cmd::List => render(client::request(&cli.socket, HostRequest::List).await?),
Cmd::Pending => render(client::request(&cli.socket, HostRequest::Pending).await?),
Cmd::Approve { id } => {
render(client::request(&cli.socket, HostRequest::Approve { id }).await?)
}
Cmd::Deny { id } => render(client::request(&cli.socket, HostRequest::Deny { id }).await?),
}
}
fn render(resp: HostResponse) -> Result<()> {
println!("{}", serde_json::to_string_pretty(&resp)?);
if !resp.ok {
bail!(resp.error.unwrap_or_else(|| "request failed".to_owned()));
}
Ok(())
}