ensure_manager: rebuild hm1nd if applied flake missing (migration safety)

This commit is contained in:
müde 2026-05-15 15:53:39 +02:00
parent edc1de3197
commit 824914807a

View file

@ -78,8 +78,26 @@ pub async fn rebuild_agent(coord: &Arc<Coordinator>, name: &str, current_rev: &s
/// the approval queue — manager is required infrastructure. Idempotent.
pub async fn ensure_manager(coord: &Arc<Coordinator>) -> Result<()> {
let existing = lifecycle::list().await.unwrap_or_default();
let current_rev = current_flake_rev(&coord.hyperhive_flake);
if existing.iter().any(|c| c == MANAGER_NAME) {
// Container exists already. If it predates the unified lifecycle
// (no applied flake on disk) we must rebuild — otherwise it's
// running whatever the host-declarative config was at create
// time, with a wrong systemd unit and port.
let applied_flake = Coordinator::agent_applied_dir(MANAGER_NAME).join("flake.nix");
if !applied_flake.exists()
&& let Some(rev) = current_rev.as_ref()
{
tracing::warn!(
"manager container exists but no applied flake — forcing rebuild to migrate"
);
let coord_clone = coord.clone();
if let Err(e) = rebuild_agent(&coord_clone, MANAGER_NAME, rev).await {
tracing::warn!(error = ?e, "manager migration rebuild failed");
}
} else {
tracing::debug!("manager container already present");
}
return Ok(());
}
tracing::info!("manager container missing — spawning");
@ -96,7 +114,7 @@ pub async fn ensure_manager(coord: &Arc<Coordinator>) -> Result<()> {
&claude_dir,
)
.await?;
if let Some(rev) = current_flake_rev(&coord.hyperhive_flake) {
if let Some(rev) = current_rev {
let _ = std::fs::write(rev_marker_path(MANAGER_NAME), rev);
}
Ok(())