meta: new hive-c0re module owns /var/lib/hyperhive/meta/

leaf module with no runtime callers yet (every public item is
#[allow(dead_code)] until lifecycle / actions / auto_update
rewire to use it). API surface:

- sync_agents — idempotent: render flake.nix for the given
  agent set, git-init on first call, nix flake lock, commit if
  anything changed.
- prepare_deploy / finalize_deploy / abort_deploy — two-phase
  for the request_apply_commit path. prepare runs nix flake
  lock --update-input agent-<n> without committing; finalize
  commits with a 'deploy <n> deployed/<id> <sha12>' message;
  abort git-restores the lock so a failed build leaves no
  orphan commit.
- lock_update_hyperhive — one-shot for the auto-update path.

flake.nix template defines mkAgent that pulls each agent's
nixosModules.default from its input and wraps with the
identity / HIVE_PORT / HIVE_LABEL / HIVE_DASHBOARD_PORT
module — what setup_applied used to generate inline. nix
invocations carry --extra-experimental-features as a belt
in case flakes aren't enabled in nix.conf.
This commit is contained in:
müde 2026-05-16 00:22:37 +02:00
parent 5b5a93e0c6
commit 92822efe16
3 changed files with 265 additions and 2 deletions

View file

@ -396,8 +396,8 @@ fn initial_agent_nix(name: &str) -> String {
/// Module-only flake exposed by every agent's repo. Consumed by the
/// hive-c0re-owned meta flake at `/var/lib/hyperhive/meta/` as a flake
/// input. Identity injection (HIVE_PORT / HIVE_LABEL / dashboard port /
/// git committer) lives in the meta flake's wrapper, not here.
/// input. Identity injection (`HIVE_PORT` / `HIVE_LABEL` / dashboard
/// port / git committer) lives in the meta flake's wrapper, not here.
fn initial_flake_nix() -> &'static str {
"{\n description = \"hyperhive agent\";\n inputs = { };\n outputs = { self }: {\n nixosModules.default = import ./agent.nix;\n };\n}\n"
}

View file

@ -17,6 +17,7 @@ mod dashboard;
mod events_vacuum;
mod lifecycle;
mod manager_server;
mod meta;
mod operator_questions;
mod server;

262
hive-c0re/src/meta.rs Normal file
View file

@ -0,0 +1,262 @@
//! Single hive-c0re-owned flake at `/var/lib/hyperhive/meta/` that
//! consumes every agent's applied repo as a flake input and exports one
//! `nixosConfiguration` per agent. Containers run against
//! `--flake /var/lib/hyperhive/meta#<name>`; lifecycle ops here drive the
//! lock file so meta's git log is the system-wide deploy audit trail.
//!
//! Flow:
//! - `sync_agents` (idempotent) — render `flake.nix` for the current
//! agent set, init the repo on first call, relock if the rendered
//! contents changed, commit. Used by spawn / destroy / startup
//! migration.
//! - `prepare_deploy` + `finalize_deploy` / `abort_deploy` — two-phase
//! for the `request_apply_commit` path so a failed
//! `nixos-container update` leaves no orphan commit in meta. Prepare
//! writes the new lock without committing; finalize commits with the
//! deploy message; abort `git restore`s the lock back.
//! - `lock_update_hyperhive` — one-shot for the auto-update path.
use std::path::{Path, PathBuf};
use anyhow::{Context, Result, bail};
use tokio::process::Command;
use crate::lifecycle;
const META_ROOT: &str = "/var/lib/hyperhive/meta";
const APPLIED_ROOT: &str = "/var/lib/hyperhive/applied";
const GIT_NAME: &str = "hive-c0re";
const GIT_EMAIL: &str = "hive-c0re@hyperhive";
/// Where the manager sees this directory inside its container (RO bind).
#[allow(dead_code)] // wired up by set_nspawn_flags in a follow-up commit
pub const CONTAINER_MANAGER_META_MOUNT: &str = "/meta";
#[derive(Debug, Clone)]
pub struct AgentSpec {
pub name: String,
pub is_manager: bool,
pub port: u16,
}
#[must_use]
pub fn meta_dir() -> PathBuf {
PathBuf::from(META_ROOT)
}
/// Idempotently reconcile the meta repo with the current agent set.
/// First call inits the git repo, runs `nix flake lock`, and lands a
/// seed commit. Subsequent calls only touch `flake.nix` when the
/// rendered contents differ from disk; an unchanged `flake.nix` is a
/// no-op.
#[allow(dead_code)] // first caller lands in a later commit
pub async fn sync_agents(
hyperhive_flake: &str,
dashboard_port: u16,
agents: &[AgentSpec],
) -> Result<()> {
let dir = meta_dir();
std::fs::create_dir_all(&dir).with_context(|| format!("create {}", dir.display()))?;
let new_flake = render_flake(hyperhive_flake, dashboard_port, agents);
let flake_path = dir.join("flake.nix");
let on_disk = std::fs::read_to_string(&flake_path).unwrap_or_default();
let initial = !dir.join(".git").exists();
if !initial && on_disk == new_flake {
return Ok(());
}
std::fs::write(&flake_path, &new_flake)
.with_context(|| format!("write {}", flake_path.display()))?;
if initial {
git(&dir, &["init", "--initial-branch=main"]).await?;
}
nix(&dir, &["flake", "lock"]).await?;
git(&dir, &["add", "-A"]).await?;
let msg = if initial {
format!("seed meta from {} agent(s)", agents.len())
} else {
"regenerate meta flake".to_owned()
};
git_commit(&dir, &msg).await?;
Ok(())
}
/// Phase 1 of an apply-commit deploy. Updates the locked rev of
/// `agent-<name>` to whatever `applied/<name>/main` currently points
/// at. **Doesn't commit** — caller must follow with
/// `finalize_deploy` on build success or `abort_deploy` on failure.
#[allow(dead_code)] // wired up by actions::run_apply_commit in a later commit
pub async fn prepare_deploy(name: &str) -> Result<()> {
let dir = meta_dir();
let input = format!("agent-{name}");
nix(&dir, &["flake", "lock", "--update-input", &input]).await
}
/// Phase 2-success. Commits the staged `flake.lock` change with a
/// deploy-shaped message. No-op (clean working tree) is tolerated —
/// some lock-updates resolve to the same rev that's already locked.
#[allow(dead_code)]
pub async fn finalize_deploy(name: &str, sha: &str, tag: &str) -> Result<()> {
let dir = meta_dir();
if git_is_clean(&dir).await? {
return Ok(());
}
git(&dir, &["add", "flake.lock"]).await?;
let short = &sha[..sha.len().min(12)];
git_commit(&dir, &format!("deploy {name} {tag} {short}")).await
}
/// Phase 2-failure. Drops the uncommitted `flake.lock` change so meta
/// stays pinned at the previously-deployed shas. The failed proposal
/// is still captured in `applied/<n>`'s annotated `failed/<id>` tag —
/// meta's history only carries successful deploys.
#[allow(dead_code)]
pub async fn abort_deploy() -> Result<()> {
let dir = meta_dir();
git(&dir, &["restore", "flake.lock"]).await
}
/// One-shot used by the auto-update path: pin the latest hyperhive
/// rev, commit if the lock changed. Cheaper than `sync_agents`
/// because the per-agent inputs aren't touched.
#[allow(dead_code)]
pub async fn lock_update_hyperhive() -> Result<()> {
let dir = meta_dir();
nix(&dir, &["flake", "lock", "--update-input", "hyperhive"]).await?;
if !git_is_clean(&dir).await? {
git(&dir, &["add", "flake.lock"]).await?;
git_commit(&dir, "bump hyperhive").await?;
}
Ok(())
}
fn render_flake(hyperhive_flake: &str, dashboard_port: u16, agents: &[AgentSpec]) -> String {
use std::fmt::Write as _;
let mut out = String::new();
out.push_str("{\n description = \"hyperhive deployed agents\";\n inputs = {\n");
let _ = writeln!(out, " hyperhive.url = \"{hyperhive_flake}\";");
for spec in agents {
let _ = writeln!(
out,
" agent-{}.url = \"git+file://{APPLIED_ROOT}/{}\";",
spec.name, spec.name,
);
}
out.push_str(" };\n outputs =\n { self, hyperhive, ... }@inputs:\n let\n");
let _ = writeln!(
out,
" dashboardPort = {dashboard_port};\n mkAgent = {{ name, isManager, port }}:"
);
out.push_str(
r#" let
base = if isManager
then hyperhive.nixosConfigurations.manager
else hyperhive.nixosConfigurations.agent-base;
input = inputs."agent-${name}";
service = if isManager then "hive-m1nd" else "hive-ag3nt";
in
base.extendModules {
modules = [
input.nixosModules.default
{
programs.git.config.user = {
name = name;
email = "${name}@hyperhive";
};
systemd.services.${service}.environment = {
HIVE_PORT = toString port;
HIVE_LABEL = name;
HIVE_DASHBOARD_PORT = toString dashboardPort;
};
}
];
};
in
{
nixosConfigurations = {
"#,
);
for spec in agents {
let _ = writeln!(
out,
" {} = mkAgent {{ name = \"{}\"; isManager = {}; port = {}; }};",
spec.name,
spec.name,
if spec.is_manager { "true" } else { "false" },
spec.port,
);
}
out.push_str(" };\n };\n}\n");
out
}
async fn git_is_clean(dir: &Path) -> Result<bool> {
let out = lifecycle::git_command()
.current_dir(dir)
.args(["status", "--porcelain"])
.output()
.await
.with_context(|| format!("git status in {}", dir.display()))?;
Ok(out.stdout.iter().all(u8::is_ascii_whitespace))
}
async fn git(dir: &Path, args: &[&str]) -> Result<()> {
let out = lifecycle::git_command()
.current_dir(dir)
.args(args)
.output()
.await
.with_context(|| format!("git {} in {}", args.join(" "), dir.display()))?;
if !out.status.success() {
bail!(
"git {} failed ({}): {}",
args.join(" "),
out.status,
String::from_utf8_lossy(&out.stderr).trim()
);
}
Ok(())
}
async fn git_commit(dir: &Path, message: &str) -> Result<()> {
git(
dir,
&[
"-c",
&format!("user.name={GIT_NAME}"),
"-c",
&format!("user.email={GIT_EMAIL}"),
"commit",
"-m",
message,
],
)
.await
}
async fn nix(dir: &Path, args: &[&str]) -> Result<()> {
// `--extra-experimental-features` belt-and-suspenders for hosts
// that haven't set this in nix.conf. The hyperhive module's
// deploy guide assumes flakes are already enabled, but the cost
// of being defensive is one extra argv each call.
let mut all = vec!["--extra-experimental-features", "nix-command flakes"];
all.extend(args);
let out = Command::new("nix")
.current_dir(dir)
.args(&all)
.output()
.await
.with_context(|| format!("nix {} in {}", args.join(" "), dir.display()))?;
if !out.status.success() {
bail!(
"nix {} failed ({}): {}",
args.join(" "),
out.status,
String::from_utf8_lossy(&out.stderr).trim()
);
}
Ok(())
}