hyperhive/hive-c0re/src/lifecycle.rs

251 lines
8 KiB
Rust

//! `nixos-container` lifecycle + per-agent config flake generation.
use std::path::Path;
use anyhow::{Context, Result, bail};
use tokio::process::Command;
/// Sub-agent container prefix. `nixos-container` caps the total container name
/// at 11 chars (it gets encoded into network interface names), so the agent
/// name itself can be at most `MAX_AGENT_NAME` chars.
pub const AGENT_PREFIX: &str = "h-";
pub const MAX_AGENT_NAME: usize = 9;
/// Container name of the manager (a separate slot from sub-agents).
pub const MANAGER_NAME: &str = "hm1nd";
/// Mount point of the per-agent runtime directory inside the container.
pub const CONTAINER_RUNTIME_MOUNT: &str = "/run/hive";
const GIT_NAME: &str = "hive-c0re";
const GIT_EMAIL: &str = "hive-c0re@hyperhive";
pub fn container_name(name: &str) -> String {
format!("{AGENT_PREFIX}{name}")
}
fn validate(name: &str) -> Result<()> {
if name.is_empty() {
bail!("agent name must not be empty");
}
if name.len() > MAX_AGENT_NAME {
bail!(
"agent name '{name}' is too long ({} chars); max {MAX_AGENT_NAME}",
name.len()
);
}
Ok(())
}
pub async fn spawn(
name: &str,
hyperhive_flake: &str,
agent_dir: &Path,
config_dir: &Path,
) -> Result<()> {
validate(name)?;
setup_config(config_dir, name, hyperhive_flake).await?;
let container = container_name(name);
let flake_ref = format!("{}#default", config_dir.display());
run(&["create", &container, "--flake", &flake_ref]).await?;
set_nspawn_flags(&container, agent_dir)?;
run(&["start", &container]).await
}
pub async fn kill(name: &str) -> Result<()> {
validate(name)?;
let container = container_name(name);
run(&["stop", &container]).await
}
pub async fn rebuild(
name: &str,
hyperhive_flake: &str,
agent_dir: &Path,
config_dir: &Path,
) -> Result<()> {
validate(name)?;
setup_config(config_dir, name, hyperhive_flake).await?;
let container = container_name(name);
let flake_ref = format!("{}#default", config_dir.display());
set_nspawn_flags(&container, agent_dir)?;
run(&["update", &container, "--flake", &flake_ref]).await?;
// Restart so any nspawn-level changes (bind mounts, networking, etc.) apply.
run(&["stop", &container]).await?;
run(&["start", &container]).await
}
pub async fn list() -> Result<Vec<String>> {
let out = Command::new("nixos-container")
.arg("list")
.output()
.await
.context("invoke nixos-container list")?;
if !out.status.success() {
bail!(
"nixos-container list exited with status {}: {}",
out.status,
String::from_utf8_lossy(&out.stderr).trim()
);
}
Ok(String::from_utf8_lossy(&out.stdout)
.lines()
.map(str::trim)
.filter(|line| line.starts_with(AGENT_PREFIX) || *line == MANAGER_NAME)
.map(str::to_owned)
.collect())
}
/// Ensure `config_dir` exists as a git repo containing a per-agent flake. The
/// `flake.nix` is rewritten every call (so a new hyperhive store path
/// propagates on rebuild); `agent.nix` is written only the first time
/// (manager-editable thereafter).
pub async fn setup_config(config_dir: &Path, name: &str, hyperhive_flake: &str) -> Result<()> {
std::fs::create_dir_all(config_dir)
.with_context(|| format!("create {}", config_dir.display()))?;
let flake_path = config_dir.join("flake.nix");
let flake_body = format!(
r#"{{
description = "hyperhive sub-agent {name}";
inputs.hyperhive.url = "{hyperhive_flake}";
outputs =
{{ hyperhive, ... }}:
{{
nixosConfigurations.default = hyperhive.nixosConfigurations.agent-base.extendModules {{
modules = [ ./agent.nix ];
}};
}};
}}
"#,
);
std::fs::write(&flake_path, flake_body)
.with_context(|| format!("write {}", flake_path.display()))?;
let agent_path = config_dir.join("agent.nix");
if !agent_path.exists() {
let initial = format!(
"{{ ... }}:\n{{\n # Per-agent overrides for {name}. The manager edits this\n # file (and commits) to customise the agent's NixOS config.\n}}\n",
);
std::fs::write(&agent_path, initial)
.with_context(|| format!("write {}", agent_path.display()))?;
}
if !config_dir.join(".git").exists() {
git(config_dir, &["init", "--initial-branch=main"]).await?;
}
git(config_dir, &["add", "-A"]).await?;
let clean = git_status(config_dir, &["diff", "--cached", "--quiet"]).await?;
if !clean {
git(
config_dir,
&[
"-c",
&format!("user.name={GIT_NAME}"),
"-c",
&format!("user.email={GIT_EMAIL}"),
"commit",
"-m",
"hive-c0re sync",
],
)
.await?;
}
Ok(())
}
/// Verify `commit_ref` exists in the config repo, advance `main` to it, and
/// reset the working tree. Caller is responsible for the subsequent rebuild.
pub async fn apply_commit(config_dir: &Path, commit_ref: &str) -> Result<()> {
let st = Command::new("git")
.current_dir(config_dir)
.args(["cat-file", "-e", commit_ref])
.status()
.await
.with_context(|| format!("git cat-file in {}", config_dir.display()))?;
if !st.success() {
bail!(
"commit {commit_ref} not found in {}",
config_dir.display()
);
}
git(config_dir, &["update-ref", "refs/heads/main", commit_ref]).await?;
git(config_dir, &["reset", "--hard", commit_ref]).await?;
Ok(())
}
async fn git(dir: &Path, args: &[&str]) -> Result<()> {
let out = Command::new("git")
.current_dir(dir)
.args(args)
.output()
.await
.with_context(|| format!("git {} in {}", args.join(" "), dir.display()))?;
if !out.status.success() {
bail!(
"git {} failed ({}): {}",
args.join(" "),
out.status,
String::from_utf8_lossy(&out.stderr).trim()
);
}
Ok(())
}
/// Returns true if the command exits 0.
async fn git_status(dir: &Path, args: &[&str]) -> Result<bool> {
let st = Command::new("git")
.current_dir(dir)
.args(args)
.status()
.await
.with_context(|| format!("git {} in {}", args.join(" "), dir.display()))?;
Ok(st.success())
}
/// Idempotently rewrite the `EXTRA_NSPAWN_FLAGS` line in
/// `/etc/nixos-containers/<container>.conf`. The start script expands this
/// variable unquoted into the `systemd-nspawn` command.
fn set_nspawn_flags(container: &str, agent_dir: &Path) -> Result<()> {
let path = format!("/etc/nixos-containers/{container}.conf");
let original = std::fs::read_to_string(&path).with_context(|| format!("read {path}"))?;
let flag = format!(
"EXTRA_NSPAWN_FLAGS=\"--bind={}:{CONTAINER_RUNTIME_MOUNT}\"",
agent_dir.display()
);
let mut lines: Vec<String> = original
.lines()
.filter(|line| !line.trim_start().starts_with("EXTRA_NSPAWN_FLAGS="))
.map(str::to_owned)
.collect();
lines.push(flag);
let mut content = lines.join("\n");
content.push('\n');
std::fs::write(&path, content).with_context(|| format!("write {path}"))?;
tracing::info!(%path, "set EXTRA_NSPAWN_FLAGS");
Ok(())
}
async fn run(args: &[&str]) -> Result<()> {
let out = Command::new("nixos-container")
.args(args)
.output()
.await
.with_context(|| format!("invoke nixos-container {}", args.join(" ")))?;
let stdout = String::from_utf8_lossy(&out.stdout);
let stderr = String::from_utf8_lossy(&out.stderr);
if !stdout.trim().is_empty() {
tracing::info!(target: "nixos-container", "{}", stdout.trim());
}
if !stderr.trim().is_empty() {
tracing::warn!(target: "nixos-container", "{}", stderr.trim());
}
if !out.status.success() {
bail!(
"nixos-container {} failed ({}): {}",
args.join(" "),
out.status,
stderr.trim()
);
}
Ok(())
}