rebuild now does sync_agents (idempotent — no-op when the rendered flake matches disk; recovers from a divergent meta repo on the side) followed by lock_update_for_rebuild which relocks just this agent's input and commits the lock change if any. flake ref for nixos-container update flips from applied/<n>#default to meta#<name>. new helper meta::lock_update_for_rebuild is single-phase (no separate finalize): rebuild has no failure-revert semantics — it always wants the latest applied/<n>/main. spawn already syncs meta before container create; rebuild now picks up the meta side on every manual ↻ R3BU1LD.
278 lines
9.5 KiB
Rust
278 lines
9.5 KiB
Rust
//! Single hive-c0re-owned flake at `/var/lib/hyperhive/meta/` that
|
|
//! consumes every agent's applied repo as a flake input and exports one
|
|
//! `nixosConfiguration` per agent. Containers run against
|
|
//! `--flake /var/lib/hyperhive/meta#<name>`; lifecycle ops here drive the
|
|
//! lock file so meta's git log is the system-wide deploy audit trail.
|
|
//!
|
|
//! Flow:
|
|
//! - `sync_agents` (idempotent) — render `flake.nix` for the current
|
|
//! agent set, init the repo on first call, relock if the rendered
|
|
//! contents changed, commit. Used by spawn / destroy / startup
|
|
//! migration.
|
|
//! - `prepare_deploy` + `finalize_deploy` / `abort_deploy` — two-phase
|
|
//! for the `request_apply_commit` path so a failed
|
|
//! `nixos-container update` leaves no orphan commit in meta. Prepare
|
|
//! writes the new lock without committing; finalize commits with the
|
|
//! deploy message; abort `git restore`s the lock back.
|
|
//! - `lock_update_hyperhive` — one-shot for the auto-update path.
|
|
|
|
use std::path::{Path, PathBuf};
|
|
|
|
use anyhow::{Context, Result, bail};
|
|
use tokio::process::Command;
|
|
|
|
use crate::lifecycle;
|
|
|
|
const META_ROOT: &str = "/var/lib/hyperhive/meta";
|
|
const APPLIED_ROOT: &str = "/var/lib/hyperhive/applied";
|
|
const GIT_NAME: &str = "hive-c0re";
|
|
const GIT_EMAIL: &str = "hive-c0re@hyperhive";
|
|
|
|
/// Where the manager sees this directory inside its container (RO bind).
|
|
#[allow(dead_code)] // wired up by set_nspawn_flags in a follow-up commit
|
|
pub const CONTAINER_MANAGER_META_MOUNT: &str = "/meta";
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct AgentSpec {
|
|
pub name: String,
|
|
pub is_manager: bool,
|
|
pub port: u16,
|
|
}
|
|
|
|
#[must_use]
|
|
pub fn meta_dir() -> PathBuf {
|
|
PathBuf::from(META_ROOT)
|
|
}
|
|
|
|
/// Idempotently reconcile the meta repo with the current agent set.
|
|
/// First call inits the git repo, runs `nix flake lock`, and lands a
|
|
/// seed commit. Subsequent calls only touch `flake.nix` when the
|
|
/// rendered contents differ from disk; an unchanged `flake.nix` is a
|
|
/// no-op.
|
|
#[allow(dead_code)] // first caller lands in a later commit
|
|
pub async fn sync_agents(
|
|
hyperhive_flake: &str,
|
|
dashboard_port: u16,
|
|
agents: &[AgentSpec],
|
|
) -> Result<()> {
|
|
let dir = meta_dir();
|
|
std::fs::create_dir_all(&dir).with_context(|| format!("create {}", dir.display()))?;
|
|
|
|
let new_flake = render_flake(hyperhive_flake, dashboard_port, agents);
|
|
let flake_path = dir.join("flake.nix");
|
|
let on_disk = std::fs::read_to_string(&flake_path).unwrap_or_default();
|
|
let initial = !dir.join(".git").exists();
|
|
|
|
if !initial && on_disk == new_flake {
|
|
return Ok(());
|
|
}
|
|
|
|
std::fs::write(&flake_path, &new_flake)
|
|
.with_context(|| format!("write {}", flake_path.display()))?;
|
|
|
|
if initial {
|
|
git(&dir, &["init", "--initial-branch=main"]).await?;
|
|
}
|
|
nix(&dir, &["flake", "lock"]).await?;
|
|
git(&dir, &["add", "-A"]).await?;
|
|
let msg = if initial {
|
|
format!("seed meta from {} agent(s)", agents.len())
|
|
} else {
|
|
"regenerate meta flake".to_owned()
|
|
};
|
|
git_commit(&dir, &msg).await?;
|
|
Ok(())
|
|
}
|
|
|
|
/// Phase 1 of an apply-commit deploy. Updates the locked rev of
|
|
/// `agent-<name>` to whatever `applied/<name>/main` currently points
|
|
/// at. **Doesn't commit** — caller must follow with
|
|
/// `finalize_deploy` on build success or `abort_deploy` on failure.
|
|
#[allow(dead_code)] // wired up by actions::run_apply_commit in a later commit
|
|
pub async fn prepare_deploy(name: &str) -> Result<()> {
|
|
let dir = meta_dir();
|
|
let input = format!("agent-{name}");
|
|
nix(&dir, &["flake", "lock", "--update-input", &input]).await
|
|
}
|
|
|
|
/// Phase 2-success. Commits the staged `flake.lock` change with a
|
|
/// deploy-shaped message. No-op (clean working tree) is tolerated —
|
|
/// some lock-updates resolve to the same rev that's already locked.
|
|
#[allow(dead_code)]
|
|
pub async fn finalize_deploy(name: &str, sha: &str, tag: &str) -> Result<()> {
|
|
let dir = meta_dir();
|
|
if git_is_clean(&dir).await? {
|
|
return Ok(());
|
|
}
|
|
git(&dir, &["add", "flake.lock"]).await?;
|
|
let short = &sha[..sha.len().min(12)];
|
|
git_commit(&dir, &format!("deploy {name} {tag} {short}")).await
|
|
}
|
|
|
|
/// Phase 2-failure. Drops the uncommitted `flake.lock` change so meta
|
|
/// stays pinned at the previously-deployed shas. The failed proposal
|
|
/// is still captured in `applied/<n>`'s annotated `failed/<id>` tag —
|
|
/// meta's history only carries successful deploys.
|
|
#[allow(dead_code)]
|
|
pub async fn abort_deploy() -> Result<()> {
|
|
let dir = meta_dir();
|
|
git(&dir, &["restore", "flake.lock"]).await
|
|
}
|
|
|
|
/// One-shot used by the manual-rebuild path: relock just one
|
|
/// agent's input and commit the lock change if any. Single-phase
|
|
/// (no separate finalize) because rebuild has no failure-revert
|
|
/// semantics — it always wants the latest main.
|
|
#[allow(dead_code)] // wired up by lifecycle::rebuild in this commit
|
|
pub async fn lock_update_for_rebuild(name: &str) -> Result<()> {
|
|
let dir = meta_dir();
|
|
let input = format!("agent-{name}");
|
|
nix(&dir, &["flake", "lock", "--update-input", &input]).await?;
|
|
if !git_is_clean(&dir).await? {
|
|
git(&dir, &["add", "flake.lock"]).await?;
|
|
git_commit(&dir, &format!("rebuild {name}: lock update")).await?;
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
/// One-shot used by the auto-update path: pin the latest hyperhive
|
|
/// rev, commit if the lock changed. Cheaper than `sync_agents`
|
|
/// because the per-agent inputs aren't touched.
|
|
#[allow(dead_code)]
|
|
pub async fn lock_update_hyperhive() -> Result<()> {
|
|
let dir = meta_dir();
|
|
nix(&dir, &["flake", "lock", "--update-input", "hyperhive"]).await?;
|
|
if !git_is_clean(&dir).await? {
|
|
git(&dir, &["add", "flake.lock"]).await?;
|
|
git_commit(&dir, "bump hyperhive").await?;
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
fn render_flake(hyperhive_flake: &str, dashboard_port: u16, agents: &[AgentSpec]) -> String {
|
|
use std::fmt::Write as _;
|
|
let mut out = String::new();
|
|
out.push_str("{\n description = \"hyperhive deployed agents\";\n inputs = {\n");
|
|
let _ = writeln!(out, " hyperhive.url = \"{hyperhive_flake}\";");
|
|
for spec in agents {
|
|
let _ = writeln!(
|
|
out,
|
|
" agent-{}.url = \"git+file://{APPLIED_ROOT}/{}\";",
|
|
spec.name, spec.name,
|
|
);
|
|
}
|
|
out.push_str(" };\n outputs =\n { self, hyperhive, ... }@inputs:\n let\n");
|
|
let _ = writeln!(
|
|
out,
|
|
" dashboardPort = {dashboard_port};\n mkAgent = {{ name, isManager, port }}:"
|
|
);
|
|
out.push_str(
|
|
r#" let
|
|
base = if isManager
|
|
then hyperhive.nixosConfigurations.manager
|
|
else hyperhive.nixosConfigurations.agent-base;
|
|
input = inputs."agent-${name}";
|
|
service = if isManager then "hive-m1nd" else "hive-ag3nt";
|
|
in
|
|
base.extendModules {
|
|
modules = [
|
|
input.nixosModules.default
|
|
{
|
|
programs.git.config.user = {
|
|
name = name;
|
|
email = "${name}@hyperhive";
|
|
};
|
|
systemd.services.${service}.environment = {
|
|
HIVE_PORT = toString port;
|
|
HIVE_LABEL = name;
|
|
HIVE_DASHBOARD_PORT = toString dashboardPort;
|
|
};
|
|
}
|
|
];
|
|
};
|
|
in
|
|
{
|
|
nixosConfigurations = {
|
|
"#,
|
|
);
|
|
for spec in agents {
|
|
let _ = writeln!(
|
|
out,
|
|
" {} = mkAgent {{ name = \"{}\"; isManager = {}; port = {}; }};",
|
|
spec.name,
|
|
spec.name,
|
|
if spec.is_manager { "true" } else { "false" },
|
|
spec.port,
|
|
);
|
|
}
|
|
out.push_str(" };\n };\n}\n");
|
|
out
|
|
}
|
|
|
|
async fn git_is_clean(dir: &Path) -> Result<bool> {
|
|
let out = lifecycle::git_command()
|
|
.current_dir(dir)
|
|
.args(["status", "--porcelain"])
|
|
.output()
|
|
.await
|
|
.with_context(|| format!("git status in {}", dir.display()))?;
|
|
Ok(out.stdout.iter().all(u8::is_ascii_whitespace))
|
|
}
|
|
|
|
async fn git(dir: &Path, args: &[&str]) -> Result<()> {
|
|
let out = lifecycle::git_command()
|
|
.current_dir(dir)
|
|
.args(args)
|
|
.output()
|
|
.await
|
|
.with_context(|| format!("git {} in {}", args.join(" "), dir.display()))?;
|
|
if !out.status.success() {
|
|
bail!(
|
|
"git {} failed ({}): {}",
|
|
args.join(" "),
|
|
out.status,
|
|
String::from_utf8_lossy(&out.stderr).trim()
|
|
);
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
async fn git_commit(dir: &Path, message: &str) -> Result<()> {
|
|
git(
|
|
dir,
|
|
&[
|
|
"-c",
|
|
&format!("user.name={GIT_NAME}"),
|
|
"-c",
|
|
&format!("user.email={GIT_EMAIL}"),
|
|
"commit",
|
|
"-m",
|
|
message,
|
|
],
|
|
)
|
|
.await
|
|
}
|
|
|
|
async fn nix(dir: &Path, args: &[&str]) -> Result<()> {
|
|
// `--extra-experimental-features` belt-and-suspenders for hosts
|
|
// that haven't set this in nix.conf. The hyperhive module's
|
|
// deploy guide assumes flakes are already enabled, but the cost
|
|
// of being defensive is one extra argv each call.
|
|
let mut all = vec!["--extra-experimental-features", "nix-command flakes"];
|
|
all.extend(args);
|
|
let out = Command::new("nix")
|
|
.current_dir(dir)
|
|
.args(&all)
|
|
.output()
|
|
.await
|
|
.with_context(|| format!("nix {} in {}", args.join(" "), dir.display()))?;
|
|
if !out.status.success() {
|
|
bail!(
|
|
"nix {} failed ({}): {}",
|
|
args.join(" "),
|
|
out.status,
|
|
String::from_utf8_lossy(&out.stderr).trim()
|
|
);
|
|
}
|
|
Ok(())
|
|
}
|