//! Single hive-c0re-owned flake at `/var/lib/hyperhive/meta/` that //! consumes every agent's applied repo as a flake input and exports one //! `nixosConfiguration` per agent. Containers run against //! `--flake /var/lib/hyperhive/meta#`; lifecycle ops here drive the //! lock file so meta's git log is the system-wide deploy audit trail. //! //! Flow: //! - `sync_agents` (idempotent) — render `flake.nix` for the current //! agent set, init the repo on first call, relock if the rendered //! contents changed, commit. Used by spawn / destroy / startup //! migration. //! - `prepare_deploy` + `finalize_deploy` / `abort_deploy` — two-phase //! for the `request_apply_commit` path so a failed //! `nixos-container update` leaves no orphan commit in meta. Prepare //! writes the new lock without committing; finalize commits with the //! deploy message; abort `git restore`s the lock back. //! - `lock_update_hyperhive` — one-shot for the auto-update path. use std::path::{Path, PathBuf}; use anyhow::{Context, Result, bail}; use tokio::process::Command; use crate::lifecycle; const META_ROOT: &str = "/var/lib/hyperhive/meta"; const APPLIED_ROOT: &str = "/var/lib/hyperhive/applied"; const GIT_NAME: &str = "hive-c0re"; const GIT_EMAIL: &str = "hive-c0re@hyperhive"; /// Where the manager sees this directory inside its container (RO bind). #[allow(dead_code)] // wired up by set_nspawn_flags in a follow-up commit pub const CONTAINER_MANAGER_META_MOUNT: &str = "/meta"; #[derive(Debug, Clone)] pub struct AgentSpec { pub name: String, pub is_manager: bool, pub port: u16, } #[must_use] pub fn meta_dir() -> PathBuf { PathBuf::from(META_ROOT) } /// Idempotently reconcile the meta repo with the current agent set. /// First call inits the git repo, runs `nix flake lock`, and lands a /// seed commit. Subsequent calls only touch `flake.nix` when the /// rendered contents differ from disk; an unchanged `flake.nix` is a /// no-op. #[allow(dead_code)] // first caller lands in a later commit pub async fn sync_agents( hyperhive_flake: &str, dashboard_port: u16, agents: &[AgentSpec], ) -> Result<()> { let dir = meta_dir(); std::fs::create_dir_all(&dir).with_context(|| format!("create {}", dir.display()))?; let new_flake = render_flake(hyperhive_flake, dashboard_port, agents); let flake_path = dir.join("flake.nix"); let on_disk = std::fs::read_to_string(&flake_path).unwrap_or_default(); let initial = !dir.join(".git").exists(); if !initial && on_disk == new_flake { return Ok(()); } std::fs::write(&flake_path, &new_flake) .with_context(|| format!("write {}", flake_path.display()))?; if initial { git(&dir, &["init", "--initial-branch=main"]).await?; } nix(&dir, &["flake", "lock"]).await?; git(&dir, &["add", "-A"]).await?; let msg = if initial { format!("seed meta from {} agent(s)", agents.len()) } else { "regenerate meta flake".to_owned() }; git_commit(&dir, &msg).await?; Ok(()) } /// Phase 1 of an apply-commit deploy. Updates the locked rev of /// `agent-` to whatever `applied//main` currently points /// at. **Doesn't commit** — caller must follow with /// `finalize_deploy` on build success or `abort_deploy` on failure. #[allow(dead_code)] // wired up by actions::run_apply_commit in a later commit pub async fn prepare_deploy(name: &str) -> Result<()> { let dir = meta_dir(); let input = format!("agent-{name}"); nix(&dir, &["flake", "lock", "--update-input", &input]).await } /// Phase 2-success. Commits the staged `flake.lock` change with a /// deploy-shaped message. No-op (clean working tree) is tolerated — /// some lock-updates resolve to the same rev that's already locked. #[allow(dead_code)] pub async fn finalize_deploy(name: &str, sha: &str, tag: &str) -> Result<()> { let dir = meta_dir(); if git_is_clean(&dir).await? { return Ok(()); } git(&dir, &["add", "flake.lock"]).await?; let short = &sha[..sha.len().min(12)]; git_commit(&dir, &format!("deploy {name} {tag} {short}")).await } /// Phase 2-failure. Drops the uncommitted `flake.lock` change so meta /// stays pinned at the previously-deployed shas. The failed proposal /// is still captured in `applied/`'s annotated `failed/` tag — /// meta's history only carries successful deploys. #[allow(dead_code)] pub async fn abort_deploy() -> Result<()> { let dir = meta_dir(); git(&dir, &["restore", "flake.lock"]).await } /// One-shot used by the manual-rebuild path: relock just one /// agent's input and commit the lock change if any. Single-phase /// (no separate finalize) because rebuild has no failure-revert /// semantics — it always wants the latest main. #[allow(dead_code)] // wired up by lifecycle::rebuild in this commit pub async fn lock_update_for_rebuild(name: &str) -> Result<()> { let dir = meta_dir(); let input = format!("agent-{name}"); nix(&dir, &["flake", "lock", "--update-input", &input]).await?; if !git_is_clean(&dir).await? { git(&dir, &["add", "flake.lock"]).await?; git_commit(&dir, &format!("rebuild {name}: lock update")).await?; } Ok(()) } /// One-shot used by the auto-update path: pin the latest hyperhive /// rev, commit if the lock changed. Cheaper than `sync_agents` /// because the per-agent inputs aren't touched. #[allow(dead_code)] pub async fn lock_update_hyperhive() -> Result<()> { let dir = meta_dir(); nix(&dir, &["flake", "lock", "--update-input", "hyperhive"]).await?; if !git_is_clean(&dir).await? { git(&dir, &["add", "flake.lock"]).await?; git_commit(&dir, "bump hyperhive").await?; } Ok(()) } fn render_flake(hyperhive_flake: &str, dashboard_port: u16, agents: &[AgentSpec]) -> String { use std::fmt::Write as _; let mut out = String::new(); out.push_str("{\n description = \"hyperhive deployed agents\";\n inputs = {\n"); let _ = writeln!(out, " hyperhive.url = \"{hyperhive_flake}\";"); for spec in agents { let _ = writeln!( out, " agent-{}.url = \"git+file://{APPLIED_ROOT}/{}\";", spec.name, spec.name, ); } out.push_str(" };\n outputs =\n { self, hyperhive, ... }@inputs:\n let\n"); let _ = writeln!( out, " dashboardPort = {dashboard_port};\n mkAgent = {{ name, isManager, port }}:" ); out.push_str( r#" let base = if isManager then hyperhive.nixosConfigurations.manager else hyperhive.nixosConfigurations.agent-base; input = inputs."agent-${name}"; service = if isManager then "hive-m1nd" else "hive-ag3nt"; in base.extendModules { modules = [ input.nixosModules.default { programs.git.config.user = { name = name; email = "${name}@hyperhive"; }; systemd.services.${service}.environment = { HIVE_PORT = toString port; HIVE_LABEL = name; HIVE_DASHBOARD_PORT = toString dashboardPort; }; } ]; }; in { nixosConfigurations = { "#, ); for spec in agents { let _ = writeln!( out, " {} = mkAgent {{ name = \"{}\"; isManager = {}; port = {}; }};", spec.name, spec.name, if spec.is_manager { "true" } else { "false" }, spec.port, ); } out.push_str(" };\n };\n}\n"); out } async fn git_is_clean(dir: &Path) -> Result { let out = lifecycle::git_command() .current_dir(dir) .args(["status", "--porcelain"]) .output() .await .with_context(|| format!("git status in {}", dir.display()))?; Ok(out.stdout.iter().all(u8::is_ascii_whitespace)) } async fn git(dir: &Path, args: &[&str]) -> Result<()> { let out = lifecycle::git_command() .current_dir(dir) .args(args) .output() .await .with_context(|| format!("git {} in {}", args.join(" "), dir.display()))?; if !out.status.success() { bail!( "git {} failed ({}): {}", args.join(" "), out.status, String::from_utf8_lossy(&out.stderr).trim() ); } Ok(()) } async fn git_commit(dir: &Path, message: &str) -> Result<()> { git( dir, &[ "-c", &format!("user.name={GIT_NAME}"), "-c", &format!("user.email={GIT_EMAIL}"), "commit", "-m", message, ], ) .await } async fn nix(dir: &Path, args: &[&str]) -> Result<()> { // `--extra-experimental-features` belt-and-suspenders for hosts // that haven't set this in nix.conf. The hyperhive module's // deploy guide assumes flakes are already enabled, but the cost // of being defensive is one extra argv each call. let mut all = vec!["--extra-experimental-features", "nix-command flakes"]; all.extend(args); let out = Command::new("nix") .current_dir(dir) .args(&all) .output() .await .with_context(|| format!("nix {} in {}", args.join(" "), dir.display()))?; if !out.status.success() { bail!( "nix {} failed ({}): {}", args.join(" "), out.status, String::from_utf8_lossy(&out.stderr).trim() ); } Ok(()) }