From 92822efe1634b30510c7351dcc16721e50629818 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?m=C3=BCde?= Date: Sat, 16 May 2026 00:22:37 +0200 Subject: [PATCH] meta: new hive-c0re module owns /var/lib/hyperhive/meta/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit leaf module with no runtime callers yet (every public item is #[allow(dead_code)] until lifecycle / actions / auto_update rewire to use it). API surface: - sync_agents — idempotent: render flake.nix for the given agent set, git-init on first call, nix flake lock, commit if anything changed. - prepare_deploy / finalize_deploy / abort_deploy — two-phase for the request_apply_commit path. prepare runs nix flake lock --update-input agent- without committing; finalize commits with a 'deploy deployed/ ' message; abort git-restores the lock so a failed build leaves no orphan commit. - lock_update_hyperhive — one-shot for the auto-update path. flake.nix template defines mkAgent that pulls each agent's nixosModules.default from its input and wraps with the identity / HIVE_PORT / HIVE_LABEL / HIVE_DASHBOARD_PORT module — what setup_applied used to generate inline. nix invocations carry --extra-experimental-features as a belt in case flakes aren't enabled in nix.conf. --- hive-c0re/src/lifecycle.rs | 4 +- hive-c0re/src/main.rs | 1 + hive-c0re/src/meta.rs | 262 +++++++++++++++++++++++++++++++++++++ 3 files changed, 265 insertions(+), 2 deletions(-) create mode 100644 hive-c0re/src/meta.rs diff --git a/hive-c0re/src/lifecycle.rs b/hive-c0re/src/lifecycle.rs index d71ea23..31a3304 100644 --- a/hive-c0re/src/lifecycle.rs +++ b/hive-c0re/src/lifecycle.rs @@ -396,8 +396,8 @@ fn initial_agent_nix(name: &str) -> String { /// Module-only flake exposed by every agent's repo. Consumed by the /// hive-c0re-owned meta flake at `/var/lib/hyperhive/meta/` as a flake -/// input. Identity injection (HIVE_PORT / HIVE_LABEL / dashboard port / -/// git committer) lives in the meta flake's wrapper, not here. +/// input. Identity injection (`HIVE_PORT` / `HIVE_LABEL` / dashboard +/// port / git committer) lives in the meta flake's wrapper, not here. fn initial_flake_nix() -> &'static str { "{\n description = \"hyperhive agent\";\n inputs = { };\n outputs = { self }: {\n nixosModules.default = import ./agent.nix;\n };\n}\n" } diff --git a/hive-c0re/src/main.rs b/hive-c0re/src/main.rs index c16410d..eea5a96 100644 --- a/hive-c0re/src/main.rs +++ b/hive-c0re/src/main.rs @@ -17,6 +17,7 @@ mod dashboard; mod events_vacuum; mod lifecycle; mod manager_server; +mod meta; mod operator_questions; mod server; diff --git a/hive-c0re/src/meta.rs b/hive-c0re/src/meta.rs new file mode 100644 index 0000000..1b7ed9b --- /dev/null +++ b/hive-c0re/src/meta.rs @@ -0,0 +1,262 @@ +//! Single hive-c0re-owned flake at `/var/lib/hyperhive/meta/` that +//! consumes every agent's applied repo as a flake input and exports one +//! `nixosConfiguration` per agent. Containers run against +//! `--flake /var/lib/hyperhive/meta#`; lifecycle ops here drive the +//! lock file so meta's git log is the system-wide deploy audit trail. +//! +//! Flow: +//! - `sync_agents` (idempotent) — render `flake.nix` for the current +//! agent set, init the repo on first call, relock if the rendered +//! contents changed, commit. Used by spawn / destroy / startup +//! migration. +//! - `prepare_deploy` + `finalize_deploy` / `abort_deploy` — two-phase +//! for the `request_apply_commit` path so a failed +//! `nixos-container update` leaves no orphan commit in meta. Prepare +//! writes the new lock without committing; finalize commits with the +//! deploy message; abort `git restore`s the lock back. +//! - `lock_update_hyperhive` — one-shot for the auto-update path. + +use std::path::{Path, PathBuf}; + +use anyhow::{Context, Result, bail}; +use tokio::process::Command; + +use crate::lifecycle; + +const META_ROOT: &str = "/var/lib/hyperhive/meta"; +const APPLIED_ROOT: &str = "/var/lib/hyperhive/applied"; +const GIT_NAME: &str = "hive-c0re"; +const GIT_EMAIL: &str = "hive-c0re@hyperhive"; + +/// Where the manager sees this directory inside its container (RO bind). +#[allow(dead_code)] // wired up by set_nspawn_flags in a follow-up commit +pub const CONTAINER_MANAGER_META_MOUNT: &str = "/meta"; + +#[derive(Debug, Clone)] +pub struct AgentSpec { + pub name: String, + pub is_manager: bool, + pub port: u16, +} + +#[must_use] +pub fn meta_dir() -> PathBuf { + PathBuf::from(META_ROOT) +} + +/// Idempotently reconcile the meta repo with the current agent set. +/// First call inits the git repo, runs `nix flake lock`, and lands a +/// seed commit. Subsequent calls only touch `flake.nix` when the +/// rendered contents differ from disk; an unchanged `flake.nix` is a +/// no-op. +#[allow(dead_code)] // first caller lands in a later commit +pub async fn sync_agents( + hyperhive_flake: &str, + dashboard_port: u16, + agents: &[AgentSpec], +) -> Result<()> { + let dir = meta_dir(); + std::fs::create_dir_all(&dir).with_context(|| format!("create {}", dir.display()))?; + + let new_flake = render_flake(hyperhive_flake, dashboard_port, agents); + let flake_path = dir.join("flake.nix"); + let on_disk = std::fs::read_to_string(&flake_path).unwrap_or_default(); + let initial = !dir.join(".git").exists(); + + if !initial && on_disk == new_flake { + return Ok(()); + } + + std::fs::write(&flake_path, &new_flake) + .with_context(|| format!("write {}", flake_path.display()))?; + + if initial { + git(&dir, &["init", "--initial-branch=main"]).await?; + } + nix(&dir, &["flake", "lock"]).await?; + git(&dir, &["add", "-A"]).await?; + let msg = if initial { + format!("seed meta from {} agent(s)", agents.len()) + } else { + "regenerate meta flake".to_owned() + }; + git_commit(&dir, &msg).await?; + Ok(()) +} + +/// Phase 1 of an apply-commit deploy. Updates the locked rev of +/// `agent-` to whatever `applied//main` currently points +/// at. **Doesn't commit** — caller must follow with +/// `finalize_deploy` on build success or `abort_deploy` on failure. +#[allow(dead_code)] // wired up by actions::run_apply_commit in a later commit +pub async fn prepare_deploy(name: &str) -> Result<()> { + let dir = meta_dir(); + let input = format!("agent-{name}"); + nix(&dir, &["flake", "lock", "--update-input", &input]).await +} + +/// Phase 2-success. Commits the staged `flake.lock` change with a +/// deploy-shaped message. No-op (clean working tree) is tolerated — +/// some lock-updates resolve to the same rev that's already locked. +#[allow(dead_code)] +pub async fn finalize_deploy(name: &str, sha: &str, tag: &str) -> Result<()> { + let dir = meta_dir(); + if git_is_clean(&dir).await? { + return Ok(()); + } + git(&dir, &["add", "flake.lock"]).await?; + let short = &sha[..sha.len().min(12)]; + git_commit(&dir, &format!("deploy {name} {tag} {short}")).await +} + +/// Phase 2-failure. Drops the uncommitted `flake.lock` change so meta +/// stays pinned at the previously-deployed shas. The failed proposal +/// is still captured in `applied/`'s annotated `failed/` tag — +/// meta's history only carries successful deploys. +#[allow(dead_code)] +pub async fn abort_deploy() -> Result<()> { + let dir = meta_dir(); + git(&dir, &["restore", "flake.lock"]).await +} + +/// One-shot used by the auto-update path: pin the latest hyperhive +/// rev, commit if the lock changed. Cheaper than `sync_agents` +/// because the per-agent inputs aren't touched. +#[allow(dead_code)] +pub async fn lock_update_hyperhive() -> Result<()> { + let dir = meta_dir(); + nix(&dir, &["flake", "lock", "--update-input", "hyperhive"]).await?; + if !git_is_clean(&dir).await? { + git(&dir, &["add", "flake.lock"]).await?; + git_commit(&dir, "bump hyperhive").await?; + } + Ok(()) +} + +fn render_flake(hyperhive_flake: &str, dashboard_port: u16, agents: &[AgentSpec]) -> String { + use std::fmt::Write as _; + let mut out = String::new(); + out.push_str("{\n description = \"hyperhive deployed agents\";\n inputs = {\n"); + let _ = writeln!(out, " hyperhive.url = \"{hyperhive_flake}\";"); + for spec in agents { + let _ = writeln!( + out, + " agent-{}.url = \"git+file://{APPLIED_ROOT}/{}\";", + spec.name, spec.name, + ); + } + out.push_str(" };\n outputs =\n { self, hyperhive, ... }@inputs:\n let\n"); + let _ = writeln!( + out, + " dashboardPort = {dashboard_port};\n mkAgent = {{ name, isManager, port }}:" + ); + out.push_str( + r#" let + base = if isManager + then hyperhive.nixosConfigurations.manager + else hyperhive.nixosConfigurations.agent-base; + input = inputs."agent-${name}"; + service = if isManager then "hive-m1nd" else "hive-ag3nt"; + in + base.extendModules { + modules = [ + input.nixosModules.default + { + programs.git.config.user = { + name = name; + email = "${name}@hyperhive"; + }; + systemd.services.${service}.environment = { + HIVE_PORT = toString port; + HIVE_LABEL = name; + HIVE_DASHBOARD_PORT = toString dashboardPort; + }; + } + ]; + }; + in + { + nixosConfigurations = { +"#, + ); + for spec in agents { + let _ = writeln!( + out, + " {} = mkAgent {{ name = \"{}\"; isManager = {}; port = {}; }};", + spec.name, + spec.name, + if spec.is_manager { "true" } else { "false" }, + spec.port, + ); + } + out.push_str(" };\n };\n}\n"); + out +} + +async fn git_is_clean(dir: &Path) -> Result { + let out = lifecycle::git_command() + .current_dir(dir) + .args(["status", "--porcelain"]) + .output() + .await + .with_context(|| format!("git status in {}", dir.display()))?; + Ok(out.stdout.iter().all(u8::is_ascii_whitespace)) +} + +async fn git(dir: &Path, args: &[&str]) -> Result<()> { + let out = lifecycle::git_command() + .current_dir(dir) + .args(args) + .output() + .await + .with_context(|| format!("git {} in {}", args.join(" "), dir.display()))?; + if !out.status.success() { + bail!( + "git {} failed ({}): {}", + args.join(" "), + out.status, + String::from_utf8_lossy(&out.stderr).trim() + ); + } + Ok(()) +} + +async fn git_commit(dir: &Path, message: &str) -> Result<()> { + git( + dir, + &[ + "-c", + &format!("user.name={GIT_NAME}"), + "-c", + &format!("user.email={GIT_EMAIL}"), + "commit", + "-m", + message, + ], + ) + .await +} + +async fn nix(dir: &Path, args: &[&str]) -> Result<()> { + // `--extra-experimental-features` belt-and-suspenders for hosts + // that haven't set this in nix.conf. The hyperhive module's + // deploy guide assumes flakes are already enabled, but the cost + // of being defensive is one extra argv each call. + let mut all = vec!["--extra-experimental-features", "nix-command flakes"]; + all.extend(args); + let out = Command::new("nix") + .current_dir(dir) + .args(&all) + .output() + .await + .with_context(|| format!("nix {} in {}", args.join(" "), dir.display()))?; + if !out.status.success() { + bail!( + "nix {} failed ({}): {}", + args.join(" "), + out.status, + String::from_utf8_lossy(&out.stderr).trim() + ); + } + Ok(()) +}