fix: rebuild containers when meta flake changes, not only on hyperhive rev

Closes #78
This commit is contained in:
damocles 2026-05-20 17:06:16 +02:00 committed by Mara
parent 47279f110d
commit 1e325c84f2

View file

@ -38,6 +38,42 @@ pub fn current_flake_rev(hyperhive_flake: &str) -> Option<String> {
.map(|p| p.display().to_string()) .map(|p| p.display().to_string())
} }
/// Read the current git HEAD of the meta flake at
/// `/var/lib/hyperhive/meta`. Returns `None` when the repo does not exist
/// or `git rev-parse HEAD` fails (non-path flake, first-boot before
/// `sync_agents` has run, etc.). Callers treat `None` as "unknown" and
/// skip the meta-rev component of the combined marker.
#[must_use]
pub fn current_meta_rev() -> Option<String> {
let out = std::process::Command::new("git")
.args(["-C", "/var/lib/hyperhive/meta", "rev-parse", "HEAD"])
.output()
.ok()?;
if !out.status.success() {
return None;
}
let rev = String::from_utf8(out.stdout).ok()?;
let rev = rev.trim().to_owned();
if rev.is_empty() {
None
} else {
Some(rev)
}
}
/// Combine the hyperhive package rev and the optional meta flake rev into
/// one opaque marker string stored on disk. Including the meta rev means a
/// `sync_agents` run that rewrites the meta flake (e.g. adding a new
/// `HIVE_CONTEXT_WINDOW_TOKENS_*` env var) is detected and triggers a
/// container rebuild on the next hive-c0re boot.
#[must_use]
pub fn combined_rev(hyperhive_rev: &str, meta_rev: Option<&str>) -> String {
match meta_rev {
Some(m) => format!("{hyperhive_rev}:{m}"),
None => hyperhive_rev.to_owned(),
}
}
/// Read the marker for `name` and return whether the recorded rev matches /// Read the marker for `name` and return whether the recorded rev matches
/// `current_rev`. Missing/unreadable marker counts as out-of-date. /// `current_rev`. Missing/unreadable marker counts as out-of-date.
#[must_use] #[must_use]
@ -123,7 +159,10 @@ pub async fn rebuild_agent(coord: &Arc<Coordinator>, name: &str, current_rev: &s
/// the approval queue — manager is required infrastructure. Idempotent. /// the approval queue — manager is required infrastructure. Idempotent.
pub async fn ensure_manager(coord: &Arc<Coordinator>) -> Result<()> { pub async fn ensure_manager(coord: &Arc<Coordinator>) -> Result<()> {
let existing = lifecycle::list().await.unwrap_or_default(); let existing = lifecycle::list().await.unwrap_or_default();
let current_rev = current_flake_rev(&coord.hyperhive_flake); let flake_rev = current_flake_rev(&coord.hyperhive_flake);
let meta_rev = current_meta_rev();
let current_rev =
flake_rev.as_deref().map(|f| combined_rev(f, meta_rev.as_deref()));
if existing.iter().any(|c| c == MANAGER_NAME) { if existing.iter().any(|c| c == MANAGER_NAME) {
// Container exists already. If it predates the unified lifecycle // Container exists already. If it predates the unified lifecycle
// (no applied flake on disk) we must rebuild — otherwise it's // (no applied flake on disk) we must rebuild — otherwise it's
@ -174,13 +213,15 @@ pub async fn ensure_manager(coord: &Arc<Coordinator>) -> Result<()> {
/// per-agent outcomes and continues past failures. Returns Ok even if some /// per-agent outcomes and continues past failures. Returns Ok even if some
/// rebuilds failed — startup shouldn't be blocked by a broken agent. /// rebuilds failed — startup shouldn't be blocked by a broken agent.
pub async fn run(coord: Arc<Coordinator>) -> Result<()> { pub async fn run(coord: Arc<Coordinator>) -> Result<()> {
let Some(current_rev) = current_flake_rev(&coord.hyperhive_flake) else { let Some(flake_rev) = current_flake_rev(&coord.hyperhive_flake) else {
tracing::info!( tracing::info!(
flake = %coord.hyperhive_flake, flake = %coord.hyperhive_flake,
"auto-update: hyperhive_flake has no canonical path; skipping", "auto-update: hyperhive_flake has no canonical path; skipping",
); );
return Ok(()); return Ok(());
}; };
let meta_rev = current_meta_rev();
let current_rev = combined_rev(&flake_rev, meta_rev.as_deref());
tracing::info!(rev = %current_rev, "auto-update: scanning agents"); tracing::info!(rev = %current_rev, "auto-update: scanning agents");
// Bump meta's hyperhive input up-front so the per-agent rebuilds // Bump meta's hyperhive input up-front so the per-agent rebuilds