auto-update: surface pending updates in dashboard + include manager
This commit is contained in:
parent
a4e1556f90
commit
e777576528
3 changed files with 149 additions and 51 deletions
|
|
@ -11,22 +11,25 @@
|
|||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::Result;
|
||||
use anyhow::{Context, Result, bail};
|
||||
use tokio::process::Command;
|
||||
|
||||
use crate::coordinator::Coordinator;
|
||||
use crate::lifecycle::{self, AGENT_PREFIX};
|
||||
use crate::lifecycle::{self, AGENT_PREFIX, MANAGER_NAME};
|
||||
|
||||
/// Marker file recording the hyperhive rev a sub-agent's container was last
|
||||
/// built against. Sibling of `applied/<name>/` (rather than inside it) to
|
||||
/// keep it out of the applied repo's git history.
|
||||
fn rev_marker_path(name: &str) -> PathBuf {
|
||||
/// keep it out of the applied repo's git history. Uses a leading dot so a
|
||||
/// glob over `applied/*` doesn't include it.
|
||||
pub fn rev_marker_path(name: &str) -> PathBuf {
|
||||
PathBuf::from(format!("/var/lib/hyperhive/applied/.{name}.hyperhive-rev"))
|
||||
}
|
||||
|
||||
/// Resolve the current rev of `hyperhive_flake`. For a path on disk we
|
||||
/// canonicalize (following symlinks) so a /etc/hyperhive → /nix/store/...
|
||||
/// update yields a different string. For anything else we return None.
|
||||
fn current_flake_rev(hyperhive_flake: &str) -> Option<String> {
|
||||
#[must_use]
|
||||
pub fn current_flake_rev(hyperhive_flake: &str) -> Option<String> {
|
||||
let path = Path::new(hyperhive_flake);
|
||||
if !path.exists() {
|
||||
return None;
|
||||
|
|
@ -36,6 +39,62 @@ fn current_flake_rev(hyperhive_flake: &str) -> Option<String> {
|
|||
.map(|p| p.display().to_string())
|
||||
}
|
||||
|
||||
/// Read the marker for `name` and return whether the recorded rev matches
|
||||
/// `current_rev`. Missing/unreadable marker counts as out-of-date.
|
||||
#[must_use]
|
||||
pub fn agent_needs_update(name: &str, current_rev: &str) -> bool {
|
||||
let prev = std::fs::read_to_string(rev_marker_path(name))
|
||||
.ok()
|
||||
.map(|s| s.trim().to_owned());
|
||||
prev.as_deref() != Some(current_rev)
|
||||
}
|
||||
|
||||
/// Rebuild one sub-agent and refresh its marker. Used by both the startup
|
||||
/// scanner and the dashboard's manual "update" button so the two paths
|
||||
/// can't diverge.
|
||||
pub async fn rebuild_agent(coord: &Arc<Coordinator>, name: &str, current_rev: &str) -> Result<()> {
|
||||
tracing::info!(%name, rev = %current_rev, "rebuild agent");
|
||||
let agent_dir = coord
|
||||
.register_agent(name)
|
||||
.with_context(|| format!("register_agent {name}"))?;
|
||||
let applied_dir = Coordinator::agent_applied_dir(name);
|
||||
let claude_dir = Coordinator::agent_claude_dir(name);
|
||||
lifecycle::rebuild(
|
||||
name,
|
||||
&coord.hyperhive_flake,
|
||||
&agent_dir,
|
||||
&applied_dir,
|
||||
&claude_dir,
|
||||
)
|
||||
.await?;
|
||||
std::fs::write(rev_marker_path(name), current_rev)
|
||||
.with_context(|| format!("write rev marker for {name}"))?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Apply the manager's host-declared config: `nixos-container update hm1nd`
|
||||
/// (no `--flake`) re-reads `/etc/nixos-containers/hm1nd.conf`, which the
|
||||
/// host's `nixos-rebuild switch` rewrites to point at the new `SYSTEM_PATH`.
|
||||
/// Idempotent when nothing has changed.
|
||||
pub async fn rebuild_manager(current_rev: &str) -> Result<()> {
|
||||
tracing::info!(rev = %current_rev, "rebuild manager (nixos-container update hm1nd)");
|
||||
let out = Command::new("nixos-container")
|
||||
.args(["update", MANAGER_NAME])
|
||||
.output()
|
||||
.await
|
||||
.context("invoke nixos-container update hm1nd")?;
|
||||
if !out.status.success() {
|
||||
bail!(
|
||||
"nixos-container update {MANAGER_NAME} failed ({}): {}",
|
||||
out.status,
|
||||
String::from_utf8_lossy(&out.stderr).trim()
|
||||
);
|
||||
}
|
||||
std::fs::write(rev_marker_path(MANAGER_NAME), current_rev)
|
||||
.with_context(|| format!("write rev marker for {MANAGER_NAME}"))?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Rebuild every sub-agent whose marker differs from the current rev. Logs
|
||||
/// per-agent outcomes and continues past failures. Returns Ok even if some
|
||||
/// rebuilds failed — startup shouldn't be blocked by a broken agent.
|
||||
|
|
@ -58,55 +117,38 @@ pub async fn run(coord: Arc<Coordinator>) -> Result<()> {
|
|||
};
|
||||
|
||||
let mut tasks = Vec::new();
|
||||
let mut manager_present = false;
|
||||
for container in containers {
|
||||
if container == MANAGER_NAME {
|
||||
manager_present = true;
|
||||
continue;
|
||||
}
|
||||
let Some(name) = container.strip_prefix(AGENT_PREFIX) else {
|
||||
continue;
|
||||
};
|
||||
let name = name.to_owned();
|
||||
let marker = rev_marker_path(&name);
|
||||
let prev = std::fs::read_to_string(&marker).ok();
|
||||
if prev.as_deref().map(str::trim) == Some(current_rev.as_str()) {
|
||||
if !agent_needs_update(&name, ¤t_rev) {
|
||||
tracing::debug!(%name, "auto-update: up-to-date");
|
||||
continue;
|
||||
}
|
||||
|
||||
let coord = coord.clone();
|
||||
let current_rev = current_rev.clone();
|
||||
tasks.push(tokio::spawn(async move {
|
||||
tracing::info!(
|
||||
%name,
|
||||
prev = ?prev,
|
||||
rev = %current_rev,
|
||||
"auto-update: rebuilding agent",
|
||||
);
|
||||
let agent_dir = match coord.register_agent(&name) {
|
||||
Ok(d) => d,
|
||||
Err(e) => {
|
||||
tracing::warn!(%name, error = ?e, "auto-update: register_agent failed");
|
||||
return;
|
||||
}
|
||||
};
|
||||
let applied_dir = Coordinator::agent_applied_dir(&name);
|
||||
let claude_dir = Coordinator::agent_claude_dir(&name);
|
||||
match lifecycle::rebuild(
|
||||
&name,
|
||||
&coord.hyperhive_flake,
|
||||
&agent_dir,
|
||||
&applied_dir,
|
||||
&claude_dir,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(()) => {
|
||||
if let Err(e) = std::fs::write(&marker, ¤t_rev) {
|
||||
tracing::warn!(%name, error = ?e, "auto-update: write rev marker failed");
|
||||
} else {
|
||||
tracing::info!(%name, "auto-update: agent rebuilt");
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!(%name, error = ?e, "auto-update: rebuild failed");
|
||||
}
|
||||
if let Err(e) = rebuild_agent(&coord, &name, ¤t_rev).await {
|
||||
tracing::warn!(%name, error = ?e, "auto-update: rebuild failed");
|
||||
}
|
||||
}));
|
||||
}
|
||||
|
||||
// Manager runs unconditionally when its marker differs: even if the host
|
||||
// hasn't been rebuilt yet, `nixos-container update hm1nd` is a no-op, so
|
||||
// there's no harm. The host's own activation already updates declarative
|
||||
// containers — this is belt-and-braces for hive-c0re restarts.
|
||||
if manager_present && agent_needs_update(MANAGER_NAME, ¤t_rev) {
|
||||
let current_rev = current_rev.clone();
|
||||
tasks.push(tokio::spawn(async move {
|
||||
if let Err(e) = rebuild_manager(¤t_rev).await {
|
||||
tracing::warn!(error = ?e, "auto-update: manager rebuild failed");
|
||||
}
|
||||
}));
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue