//! Startup auto-migration from the pre-meta layout. Runs before //! `auto_update::run` and consists of four phases, each idempotent: //! //! 1. Per-agent applied repo: rewrite `flake.nix` to the module-only //! boilerplate if it isn't already, commit, relocate `deployed/0` //! to HEAD so `setup_applied`'s existence check passes. //! 2. Per-agent proposed repo: ensure the `applied` git remote //! points at `/applied//.git` (re-runs `setup_proposed`'s //! `ensure_applied_remote` indirectly via a host-side git call). //! 3. Meta repo: `meta::sync_agents` over the current agent list — //! init the repo on first call, rerender + relock if anything //! drifted. //! 4. Container repoint: for every existing container, run //! `nixos-container update --flake meta#` so it //! activates against the meta flake. Guarded by a marker file //! so the (expensive) phase 4 only runs once across hive-c0re //! restarts. //! //! Env kill-switch: `HIVE_SKIP_META_MIGRATION=1` skips the whole //! migration. Use when smoke-testing one agent at a time by hand. use std::path::{Path, PathBuf}; use std::sync::Arc; use anyhow::{Context, Result}; use tokio::process::Command; use crate::coordinator::Coordinator; use crate::lifecycle::{self, AGENT_PREFIX, MANAGER_NAME}; use crate::meta; const KILL_SWITCH: &str = "HIVE_SKIP_META_MIGRATION"; /// Marker for phase 4. Once present, container repoint is skipped on /// future restarts. fn repoint_marker() -> PathBuf { PathBuf::from("/var/lib/hyperhive/.meta-migration-done") } const MODULE_FLAKE_MARKER: &str = "nixosModules.default = import ./agent.nix"; pub async fn run(coord: &Arc) -> Result<()> { if std::env::var(KILL_SWITCH).is_ok() { tracing::info!("migration: {KILL_SWITCH} set — skipping"); return Ok(()); } let names = enumerate_agents().await; tracing::info!(count = names.len(), "migration: scanning"); // Phase 1 + 2: per-agent applied + proposed. for name in &names { if let Err(e) = migrate_applied_repo(name).await { tracing::warn!(%name, error = ?e, "migration: applied repo rewrite failed"); } if let Err(e) = lifecycle::setup_proposed(&Coordinator::agent_proposed_dir(name), name) .await { tracing::warn!(%name, error = ?e, "migration: setup_proposed failed"); } } // Phase 3: meta repo. let agents = lifecycle::agents_for_meta_listing().await.unwrap_or_default(); if let Err(e) = meta::sync_agents(&coord.hyperhive_flake, coord.dashboard_port, &agents).await { tracing::warn!(error = ?e, "migration: meta sync_agents failed"); } // Phase 4: container repoint, guarded by marker. if repoint_marker().exists() { tracing::debug!("migration: phase 4 marker present, skipping repoint"); return Ok(()); } let mut all_ok = true; for name in &names { if let Err(e) = repoint_container(name).await { tracing::warn!(%name, error = ?e, "migration: container repoint failed"); all_ok = false; } } if all_ok && !names.is_empty() && let Err(e) = std::fs::write(repoint_marker(), b"done\n") { tracing::warn!(error = ?e, "migration: write repoint marker failed"); } Ok(()) } async fn enumerate_agents() -> Vec { let containers = lifecycle::list().await.unwrap_or_default(); containers .into_iter() .filter_map(|c| { if c == MANAGER_NAME { Some(MANAGER_NAME.to_owned()) } else { c.strip_prefix(AGENT_PREFIX).map(str::to_owned) } }) .collect() } async fn migrate_applied_repo(name: &str) -> Result<()> { let dir = Coordinator::agent_applied_dir(name); if !dir.join(".git").exists() { return Ok(()); } let flake_path = dir.join("flake.nix"); let cur = std::fs::read_to_string(&flake_path).unwrap_or_default(); if cur.contains(MODULE_FLAKE_MARKER) { return Ok(()); } let want = lifecycle::initial_flake_nix(); std::fs::write(&flake_path, want) .with_context(|| format!("write {}", flake_path.display()))?; raw_git( &dir, &[ "-c", "user.name=hive-c0re", "-c", "user.email=hive-c0re@hyperhive", "add", "flake.nix", ], ) .await?; raw_git( &dir, &[ "-c", "user.name=hive-c0re", "-c", "user.email=hive-c0re@hyperhive", "commit", "-m", "migration: module-only flake", ], ) .await?; // Relocate deployed/0 to the migration commit so // setup_applied's existence check passes. raw_git(&dir, &["tag", "-f", "deployed/0", "HEAD"]).await?; tracing::info!(%name, "migration: applied repo migrated to module-only flake"); Ok(()) } async fn repoint_container(name: &str) -> Result<()> { let container = lifecycle::container_name(name); let flake_ref = format!("{}#{name}", meta::meta_dir().display()); let out = Command::new("nixos-container") .args(["update", &container, "--flake", &flake_ref]) .output() .await .with_context(|| format!("nixos-container update {container}"))?; if !out.status.success() { anyhow::bail!( "nixos-container update {container} exited {}: {}", out.status, String::from_utf8_lossy(&out.stderr).trim() ); } tracing::info!(%name, %container, "migration: container repointed at meta"); Ok(()) } async fn raw_git(dir: &Path, args: &[&str]) -> Result<()> { let out = lifecycle::git_command() .current_dir(dir) .args(args) .output() .await .with_context(|| format!("git {} in {}", args.join(" "), dir.display()))?; if !out.status.success() { anyhow::bail!( "git {} failed: {}", args.join(" "), String::from_utf8_lossy(&out.stderr).trim() ); } Ok(()) }