diff --git a/CLAUDE.md b/CLAUDE.md index 1a42edb..e958146 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -159,16 +159,28 @@ docs/damocles-migration.md options for moving damocles onto hyperhive ## Auto-update on startup `hive-c0re serve` runs `auto_update::run` in a background task right after -opening the coordinator. It enumerates sub-agent containers (manager -excluded — its config comes from the host's NixOS module) and rebuilds any -whose recorded hyperhive rev differs from the current one. Rev = canonical -filesystem path of `cfg.hyperhiveFlake` (so `/etc/hyperhive` resolving to a -new `/nix/store/...-source` triggers a rebuild). Marker file: +opening the coordinator. It enumerates managed containers and rebuilds any +whose recorded hyperhive rev differs from the current one: + +- **Sub-agents** rebuild via `lifecycle::rebuild` (regenerates + `applied//flake.nix`, sets nspawn flags, `nixos-container update --flake`). +- **Manager** runs `nixos-container update hm1nd` (no `--flake`). The + manager's config lives in the host's NixOS module; this is belt-and-braces + on top of NixOS's own container activation. Idempotent when nothing has + actually changed. + +"Rev" = canonical filesystem path of `cfg.hyperhiveFlake` (so `/etc/hyperhive` +resolving to a new `/nix/store/...-source` triggers a rebuild). Marker file: `/var/lib/hyperhive/applied/..hyperhive-rev`. If the flake input has no canonical path (e.g. a `github:` URL), auto-update is a no-op — rebuild manually. The task is async and never blocks the admin socket; failures are logged and don't take the daemon down. +The dashboard surfaces pending updates per agent: a clickable "needs update +↻" badge appears whenever the marker differs from current rev. The badge +POSTs `/rebuild/`, calling the same `auto_update::rebuild_agent` / +`rebuild_manager` path so manual triggers and the startup scan can't drift. + ## Build / deploy / test ```sh diff --git a/hive-c0re/src/auto_update.rs b/hive-c0re/src/auto_update.rs index 609e064..235004d 100644 --- a/hive-c0re/src/auto_update.rs +++ b/hive-c0re/src/auto_update.rs @@ -11,22 +11,25 @@ use std::path::{Path, PathBuf}; use std::sync::Arc; -use anyhow::Result; +use anyhow::{Context, Result, bail}; +use tokio::process::Command; use crate::coordinator::Coordinator; -use crate::lifecycle::{self, AGENT_PREFIX}; +use crate::lifecycle::{self, AGENT_PREFIX, MANAGER_NAME}; /// Marker file recording the hyperhive rev a sub-agent's container was last /// built against. Sibling of `applied//` (rather than inside it) to -/// keep it out of the applied repo's git history. -fn rev_marker_path(name: &str) -> PathBuf { +/// keep it out of the applied repo's git history. Uses a leading dot so a +/// glob over `applied/*` doesn't include it. +pub fn rev_marker_path(name: &str) -> PathBuf { PathBuf::from(format!("/var/lib/hyperhive/applied/.{name}.hyperhive-rev")) } /// Resolve the current rev of `hyperhive_flake`. For a path on disk we /// canonicalize (following symlinks) so a /etc/hyperhive → /nix/store/... /// update yields a different string. For anything else we return None. -fn current_flake_rev(hyperhive_flake: &str) -> Option { +#[must_use] +pub fn current_flake_rev(hyperhive_flake: &str) -> Option { let path = Path::new(hyperhive_flake); if !path.exists() { return None; @@ -36,6 +39,62 @@ fn current_flake_rev(hyperhive_flake: &str) -> Option { .map(|p| p.display().to_string()) } +/// Read the marker for `name` and return whether the recorded rev matches +/// `current_rev`. Missing/unreadable marker counts as out-of-date. +#[must_use] +pub fn agent_needs_update(name: &str, current_rev: &str) -> bool { + let prev = std::fs::read_to_string(rev_marker_path(name)) + .ok() + .map(|s| s.trim().to_owned()); + prev.as_deref() != Some(current_rev) +} + +/// Rebuild one sub-agent and refresh its marker. Used by both the startup +/// scanner and the dashboard's manual "update" button so the two paths +/// can't diverge. +pub async fn rebuild_agent(coord: &Arc, name: &str, current_rev: &str) -> Result<()> { + tracing::info!(%name, rev = %current_rev, "rebuild agent"); + let agent_dir = coord + .register_agent(name) + .with_context(|| format!("register_agent {name}"))?; + let applied_dir = Coordinator::agent_applied_dir(name); + let claude_dir = Coordinator::agent_claude_dir(name); + lifecycle::rebuild( + name, + &coord.hyperhive_flake, + &agent_dir, + &applied_dir, + &claude_dir, + ) + .await?; + std::fs::write(rev_marker_path(name), current_rev) + .with_context(|| format!("write rev marker for {name}"))?; + Ok(()) +} + +/// Apply the manager's host-declared config: `nixos-container update hm1nd` +/// (no `--flake`) re-reads `/etc/nixos-containers/hm1nd.conf`, which the +/// host's `nixos-rebuild switch` rewrites to point at the new `SYSTEM_PATH`. +/// Idempotent when nothing has changed. +pub async fn rebuild_manager(current_rev: &str) -> Result<()> { + tracing::info!(rev = %current_rev, "rebuild manager (nixos-container update hm1nd)"); + let out = Command::new("nixos-container") + .args(["update", MANAGER_NAME]) + .output() + .await + .context("invoke nixos-container update hm1nd")?; + if !out.status.success() { + bail!( + "nixos-container update {MANAGER_NAME} failed ({}): {}", + out.status, + String::from_utf8_lossy(&out.stderr).trim() + ); + } + std::fs::write(rev_marker_path(MANAGER_NAME), current_rev) + .with_context(|| format!("write rev marker for {MANAGER_NAME}"))?; + Ok(()) +} + /// Rebuild every sub-agent whose marker differs from the current rev. Logs /// per-agent outcomes and continues past failures. Returns Ok even if some /// rebuilds failed — startup shouldn't be blocked by a broken agent. @@ -58,55 +117,38 @@ pub async fn run(coord: Arc) -> Result<()> { }; let mut tasks = Vec::new(); + let mut manager_present = false; for container in containers { + if container == MANAGER_NAME { + manager_present = true; + continue; + } let Some(name) = container.strip_prefix(AGENT_PREFIX) else { continue; }; let name = name.to_owned(); - let marker = rev_marker_path(&name); - let prev = std::fs::read_to_string(&marker).ok(); - if prev.as_deref().map(str::trim) == Some(current_rev.as_str()) { + if !agent_needs_update(&name, ¤t_rev) { tracing::debug!(%name, "auto-update: up-to-date"); continue; } - let coord = coord.clone(); let current_rev = current_rev.clone(); tasks.push(tokio::spawn(async move { - tracing::info!( - %name, - prev = ?prev, - rev = %current_rev, - "auto-update: rebuilding agent", - ); - let agent_dir = match coord.register_agent(&name) { - Ok(d) => d, - Err(e) => { - tracing::warn!(%name, error = ?e, "auto-update: register_agent failed"); - return; - } - }; - let applied_dir = Coordinator::agent_applied_dir(&name); - let claude_dir = Coordinator::agent_claude_dir(&name); - match lifecycle::rebuild( - &name, - &coord.hyperhive_flake, - &agent_dir, - &applied_dir, - &claude_dir, - ) - .await - { - Ok(()) => { - if let Err(e) = std::fs::write(&marker, ¤t_rev) { - tracing::warn!(%name, error = ?e, "auto-update: write rev marker failed"); - } else { - tracing::info!(%name, "auto-update: agent rebuilt"); - } - } - Err(e) => { - tracing::warn!(%name, error = ?e, "auto-update: rebuild failed"); - } + if let Err(e) = rebuild_agent(&coord, &name, ¤t_rev).await { + tracing::warn!(%name, error = ?e, "auto-update: rebuild failed"); + } + })); + } + + // Manager runs unconditionally when its marker differs: even if the host + // hasn't been rebuilt yet, `nixos-container update hm1nd` is a no-op, so + // there's no harm. The host's own activation already updates declarative + // containers — this is belt-and-braces for hive-c0re restarts. + if manager_present && agent_needs_update(MANAGER_NAME, ¤t_rev) { + let current_rev = current_rev.clone(); + tasks.push(tokio::spawn(async move { + if let Err(e) = rebuild_manager(¤t_rev).await { + tracing::warn!(error = ?e, "auto-update: manager rebuild failed"); } })); } diff --git a/hive-c0re/src/dashboard.rs b/hive-c0re/src/dashboard.rs index 06e47f9..1450d34 100644 --- a/hive-c0re/src/dashboard.rs +++ b/hive-c0re/src/dashboard.rs @@ -42,6 +42,7 @@ pub async fn serve(port: u16, coord: Arc) -> Result<()> { .route("/approve/{id}", post(post_approve)) .route("/deny/{id}", post(post_deny)) .route("/destroy/{name}", post(post_destroy)) + .route("/rebuild/{name}", post(post_rebuild)) .route("/request-spawn", post(post_request_spawn)) .route("/send", post(post_send)) .route("/messages/stream", get(messages_stream)) @@ -64,6 +65,7 @@ async fn index(headers: HeaderMap, State(state): State) -> Html) -> Html\n\n\n\nhyperhive // h1ve-c0re\n{refresh}\n{STYLE}\n\n\n{BANNER}\n{containers}\n{talk}\n{approvals_html}\n{MSG_FLOW}\n{FOOTER}\n{MSG_FLOW_JS}\n\n\n", - containers = render_containers(&containers, &transient, &hostname), + containers = render_containers(&containers, &transient, current_rev.as_deref(), &hostname), talk = render_talk(&containers), )) } @@ -163,6 +165,24 @@ async fn post_request_spawn( } } +async fn post_rebuild(State(state): State, AxumPath(name): AxumPath) -> Response { + let Some(current_rev) = crate::auto_update::current_flake_rev(&state.coord.hyperhive_flake) + else { + return error_response( + "rebuild: hyperhive_flake has no canonical path; manual rebuild only via `hive-c0re rebuild`", + ); + }; + let result = if name == lifecycle::MANAGER_NAME { + crate::auto_update::rebuild_manager(¤t_rev).await + } else { + crate::auto_update::rebuild_agent(&state.coord, &name, ¤t_rev).await + }; + match result { + Ok(()) => Redirect::to("/").into_response(), + Err(e) => error_response(&format!("rebuild {name} failed: {e:#}")), + } +} + async fn post_destroy(State(state): State, AxumPath(name): AxumPath) -> Response { match actions::destroy(&state.coord, &name).await { Ok(()) => Redirect::to("/").into_response(), @@ -184,6 +204,7 @@ fn error_response(message: &str) -> Response { fn render_containers( containers: &[String], transient: &std::collections::HashMap, + current_rev: Option<&str>, hostname: &str, ) -> String { let mut out = String::from( @@ -217,9 +238,10 @@ fn render_containers( out.push_str("
    \n"); for container in containers { if container == MANAGER_NAME { + let update_badge = update_badge_for(MANAGER_NAME, current_rev); let _ = writeln!( out, - "
  • ▓█▓▒░ {container} m1nd :{MANAGER_PORT}
  • ", + "
  • ▓█▓▒░ {container} m1nd{update_badge} :{MANAGER_PORT}
  • ", ); } else if let Some(name) = container.strip_prefix(AGENT_PREFIX) { let port = lifecycle::agent_web_port(name); @@ -231,9 +253,10 @@ fn render_containers( " needs login →", ) }; + let update_badge = update_badge_for(name, current_rev); let _ = writeln!( out, - "
  • ▒░▒░░ {name} ag3nt{login_badge} {container} :{port}\n
    \n
  • ", + "
  • ▒░▒░░ {name} ag3nt{login_badge}{update_badge} {container} :{port}\n
    \n
  • ", ); } } @@ -319,6 +342,20 @@ fn gc_orphans(coord: &Coordinator, approvals: Vec) -> Vec { .collect() } +/// Returns either an empty string (agent is up-to-date / no rev known) or +/// a clickable "needs update" badge whose form POSTs to /rebuild/. +fn update_badge_for(name: &str, current_rev: Option<&str>) -> String { + let Some(rev) = current_rev else { + return String::new(); + }; + if !crate::auto_update::agent_needs_update(name, rev) { + return String::new(); + } + format!( + "
    ", + ) +} + /// Host-side mirror of `hive_ag3nt::login::has_session`. Returns true if the /// agent's bound `~/.claude/` dir on disk contains any regular file. The /// dashboard reads this each render so logins driven from the agent web UI @@ -550,6 +587,13 @@ const STYLE: &str = r#" .spawnform input::placeholder { color: var(--muted); } .spawnform input:focus { outline: 1px solid var(--purple); } .role-pending { color: var(--amber); border-color: var(--amber); } + .btn-inline { + font-family: inherit; + background: transparent; + cursor: pointer; + margin-left: 0.4em; + } + .btn-inline:hover { background: rgba(255, 184, 77, 0.1); } .kind { display: inline-block; margin-left: 0.4em;