//! `nixos-container` lifecycle + per-agent config flake generation. use std::path::Path; use anyhow::{Context, Result, bail}; use tokio::process::Command; /// Sub-agent container prefix. `nixos-container` caps the total container name /// at 11 chars (it gets encoded into network interface names), so the agent /// name itself can be at most `MAX_AGENT_NAME` chars. pub const AGENT_PREFIX: &str = "h-"; pub const MAX_AGENT_NAME: usize = 9; /// Container name of the manager. Lives in the same path scheme as sub-agents /// (`/var/lib/hyperhive/agents/hm1nd/`, `/var/lib/hyperhive/applied/hm1nd/`), /// but its container has no `h-` prefix and extends a different /// nixosConfiguration (`manager`, not `agent-base`). pub const MANAGER_NAME: &str = "hm1nd"; /// Web UI port reserved for the manager (sub-agents hash into 8100..8999). pub const MANAGER_PORT: u16 = 8000; /// Mount point of the per-agent runtime directory inside the container. pub const CONTAINER_RUNTIME_MOUNT: &str = "/run/hive"; /// Mount point of the per-agent Claude credentials dir inside the container. /// Persistent across destroy/recreate so OAuth login survives. pub const CONTAINER_CLAUDE_MOUNT: &str = "/root/.claude"; /// Mount point of the per-agent durable knowledge dir inside the container. /// Agents are told (system prompt) to keep `notes.md` and any other scratch /// state here; persists across destroy/recreate. pub const CONTAINER_NOTES_MOUNT: &str = "/state"; const GIT_NAME: &str = "hive-c0re"; const GIT_EMAIL: &str = "hive-c0re@hyperhive"; /// Sub-agent web UI port range. Deterministic from the agent's name (FNV-1a /// hash mod range size), so the dashboard can compute the same port without /// asking hive-c0re. const WEB_PORT_BASE: u16 = 8100; const WEB_PORT_RANGE: u16 = 900; /// Default resource caps applied to every managed container via a systemd /// drop-in under `/run/systemd/system/container@.service.d/`. const DEFAULT_MEMORY_MAX: &str = "2G"; const DEFAULT_CPU_QUOTA: &str = "50%"; /// Per-agent web UI port. Manager is fixed at `MANAGER_PORT`; every /// sub-agent is `WEB_PORT_BASE + FNV-1a(name) % WEB_PORT_RANGE`, /// pure and reproducible from just the name. Collisions are /// possible (birthday paradox at ~30 agents); the operator resolves /// them by renaming an agent (different hash → different port). /// Stable across hosts, restarts, and dashboard renders — no /// state-file dance. #[must_use] pub fn agent_web_port(name: &str) -> u16 { if name == MANAGER_NAME { return MANAGER_PORT; } let mut hash: u32 = 2_166_136_261; for b in name.bytes() { hash ^= u32::from(b); hash = hash.wrapping_mul(16_777_619); } // Modulo of a u32 by a u16's value is guaranteed < u16::MAX, so try_from never fails. WEB_PORT_BASE + u16::try_from(hash % u32::from(WEB_PORT_RANGE)).unwrap_or(0) } #[must_use] pub fn container_name(name: &str) -> String { if name == MANAGER_NAME { MANAGER_NAME.to_owned() } else { format!("{AGENT_PREFIX}{name}") } } #[must_use] pub fn is_manager(name: &str) -> bool { name == MANAGER_NAME } fn validate(name: &str) -> Result<()> { if name.is_empty() { bail!("agent name must not be empty"); } if is_manager(name) { return Ok(()); } if name.len() > MAX_AGENT_NAME { bail!( "agent name '{name}' is too long ({} chars); max {MAX_AGENT_NAME}", name.len() ); } Ok(()) } /// First name (≠ `self_name`) currently running whose hashed port /// matches this agent's. The harness inside the colliding container /// would otherwise loop on `AddrInUse` forever; we surface the /// conflict here so spawn / rebuild fails loudly with an actionable /// message instead. async fn port_collision(self_name: &str) -> Option { let port = agent_web_port(self_name); let raw = list().await.unwrap_or_default(); for c in raw { let other = if c == MANAGER_NAME { MANAGER_NAME.to_owned() } else if let Some(n) = c.strip_prefix(AGENT_PREFIX) { n.to_owned() } else { continue; }; if other == self_name { continue; } if agent_web_port(&other) == port && is_running(&other).await { return Some(other); } } None } #[allow(clippy::too_many_arguments)] pub async fn spawn( name: &str, hyperhive_flake: &str, agent_dir: &Path, proposed_dir: &Path, applied_dir: &Path, claude_dir: &Path, notes_dir: &Path, dashboard_port: u16, ) -> Result<()> { validate(name)?; if let Some(other) = port_collision(name).await { bail!( "port {} is already taken by '{other}' — rename one of them and retry", agent_web_port(name) ); } setup_proposed(proposed_dir, name).await?; setup_applied(applied_dir, Some(proposed_dir), name).await?; ensure_claude_dir(claude_dir)?; ensure_state_dir(notes_dir)?; // Meta flake gets the new agent's input + nixosConfiguration // before `nixos-container create` so the `--flake meta#` // ref resolves. let agents = agents_after_spawn(name).await?; crate::meta::sync_agents(hyperhive_flake, dashboard_port, &agents).await?; let container = container_name(name); let flake_ref = format!("{}#{name}", crate::meta::meta_dir().display()); run(&["create", &container, "--flake", &flake_ref]).await?; set_nspawn_flags(&container, agent_dir, claude_dir, notes_dir)?; set_resource_limits(&container)?; systemd_daemon_reload().await?; run(&["start", &container]).await } /// Build the `AgentSpec` list for the meta flake from `nixos-container /// list` + a hypothetical extra name not yet in the list (for spawn /// where the new agent's container doesn't exist yet). Pass empty /// `name_to_add` from rebuild paths where the agent is already in the /// container list. async fn agents_for_meta(name_to_add: Option<&str>) -> Result> { let containers = list().await.unwrap_or_default(); let mut out: Vec = containers .into_iter() .filter_map(|c| { let (name, is_manager) = if c == MANAGER_NAME { (MANAGER_NAME.to_owned(), true) } else if let Some(n) = c.strip_prefix(AGENT_PREFIX) { (n.to_owned(), false) } else { return None; }; Some(crate::meta::AgentSpec { port: agent_web_port(&name), name, is_manager, }) }) .collect(); if let Some(extra) = name_to_add && !out.iter().any(|a| a.name == extra) { out.push(crate::meta::AgentSpec { name: extra.to_owned(), is_manager: is_manager(extra), port: agent_web_port(extra), }); } out.sort_by(|a, b| a.name.cmp(&b.name)); Ok(out) } async fn agents_after_spawn(name: &str) -> Result> { agents_for_meta(Some(name)).await } /// Public enumeration of currently-existing agents (whatever /// `nixos-container list` says), sorted, no extras. For callers /// outside this module that need to reseed meta after lifecycle /// changes — destroy, startup reconciliation, etc. pub async fn agents_for_meta_listing() -> Result> { agents_for_meta(None).await } pub async fn kill(name: &str) -> Result<()> { validate(name)?; let container = container_name(name); run(&["stop", &container]).await } pub async fn start(name: &str) -> Result<()> { validate(name)?; let container = container_name(name); run(&["start", &container]).await } /// Stop + start without regenerating any config. For "kick the container" /// without touching the flake or nspawn flags. pub async fn restart(name: &str) -> Result<()> { kill(name).await?; start(name).await } /// True when the container's systemd unit is active. Used by the dashboard /// to gate stop/restart buttons. pub async fn is_running(name: &str) -> bool { let container = container_name(name); let unit = format!("container@{container}.service"); Command::new("systemctl") .args(["is-active", "--quiet", &unit]) .status() .await .map(|s| s.success()) .unwrap_or(false) } /// Fully tear down a sub-agent's container: stop + remove via `nixos-container /// destroy`, then clean our own systemd drop-in. Leaves it to the caller to /// wipe `/var/lib/hyperhive/...` state and the per-agent runtime dir. pub async fn destroy(name: &str) -> Result<()> { validate(name)?; let container = container_name(name); // nixos-container destroy handles stop + removal of /var/lib/nixos-containers/ // and /etc/nixos-containers/.conf. Tolerate "no such container". if let Err(e) = run(&["destroy", &container]).await { tracing::warn!(error = ?e, "nixos-container destroy returned an error; continuing cleanup"); } let dropin_dir = format!("/run/systemd/system/container@{container}.service.d"); if std::path::Path::new(&dropin_dir).exists() { std::fs::remove_dir_all(&dropin_dir).with_context(|| format!("remove {dropin_dir}"))?; } Ok(()) } pub async fn rebuild( name: &str, hyperhive_flake: &str, agent_dir: &Path, applied_dir: &Path, claude_dir: &Path, notes_dir: &Path, dashboard_port: u16, ) -> Result<()> { // Sync the meta flake (idempotent — no-op when the rendered // flake matches disk) so a manual rebuild from the dashboard // can also recover from a divergent meta repo (e.g. an agent // got added directly via `nixos-container create` outside // hive-c0re). let agents = agents_for_meta(None).await?; crate::meta::sync_agents(hyperhive_flake, dashboard_port, &agents).await?; // Then bump just this agent's input — picks up whatever // `applied//main` currently points at (deployed/). // Commits the lock if it changed. crate::meta::lock_update_for_rebuild(name).await?; rebuild_no_meta(name, agent_dir, applied_dir, claude_dir, notes_dir).await } /// Container-level rebuild without touching the meta repo. Callers /// that own the meta side themselves (`actions::run_apply_commit` /// drives meta through the two-phase prepare/finalize/abort flow) /// use this directly. Public `rebuild` wraps it with idempotent meta /// sync + lock-bump-and-commit. pub async fn rebuild_no_meta( name: &str, agent_dir: &Path, applied_dir: &Path, claude_dir: &Path, notes_dir: &Path, ) -> Result<()> { validate(name)?; if let Some(other) = port_collision(name).await { bail!( "port {} is already taken by '{other}' — rename one of them and retry", agent_web_port(name) ); } setup_applied(applied_dir, None, name).await?; ensure_claude_dir(claude_dir)?; ensure_state_dir(notes_dir)?; let container = container_name(name); let flake_ref = format!("{}#{name}", crate::meta::meta_dir().display()); set_nspawn_flags(&container, agent_dir, claude_dir, notes_dir)?; set_resource_limits(&container)?; systemd_daemon_reload().await?; run(&["update", &container, "--flake", &flake_ref]).await?; // Restart so any nspawn-level changes (bind mounts, networking, etc.) apply. run(&["stop", &container]).await?; run(&["start", &container]).await } pub async fn list() -> Result> { let out = Command::new("nixos-container") .arg("list") .output() .await .context("invoke nixos-container list")?; if !out.status.success() { bail!( "nixos-container list exited with status {}: {}", out.status, String::from_utf8_lossy(&out.stderr).trim() ); } Ok(String::from_utf8_lossy(&out.stdout) .lines() .map(str::trim) .filter(|line| line.starts_with(AGENT_PREFIX) || *line == MANAGER_NAME) .map(str::to_owned) .collect()) } /// Initialize the manager-editable proposed repo. Seeds two tracked /// files: `agent.nix` (the module the manager edits) and `flake.nix` /// (the boilerplate that lets the meta flake import this repo as an /// input — meta locks at a specific sha and reads /// `nixosModules.default`, so `flake.nix` must be in the commit). The /// manager shouldn't edit `flake.nix` (the prompt says so) but it's /// visible so they can introspect. /// /// Touched by hive-c0re only on first spawn — never again — so the /// manager can't be surprised by hive-c0re commits or working-tree /// resets. pub async fn setup_proposed(proposed_dir: &Path, name: &str) -> Result<()> { let fresh = !proposed_dir.join(".git").exists(); if fresh { std::fs::create_dir_all(proposed_dir) .with_context(|| format!("create {}", proposed_dir.display()))?; let agent_path = proposed_dir.join("agent.nix"); if !agent_path.exists() { std::fs::write(&agent_path, initial_agent_nix(name)) .with_context(|| format!("write {}", agent_path.display()))?; } let flake_path = proposed_dir.join("flake.nix"); if !flake_path.exists() { std::fs::write(&flake_path, initial_flake_nix()) .with_context(|| format!("write {}", flake_path.display()))?; } git(proposed_dir, &["init", "--initial-branch=main"]).await?; git(proposed_dir, &["add", "agent.nix", "flake.nix"]).await?; git_commit(proposed_dir, "hive-c0re init").await?; } // Idempotently wire the `applied` remote — purely for the // manager's ergonomics. The URL is the path inside the manager // container (`/applied//.git`), where the RO bind in // `set_nspawn_flags` makes it real. hive-c0re itself never // dereferences this remote; the host-side fetch in // `request_apply_commit` uses absolute host paths. ensure_applied_remote(proposed_dir, name).await } async fn ensure_applied_remote(proposed_dir: &Path, name: &str) -> Result<()> { let want = format!("/applied/{name}/.git"); let existing = git_command() .current_dir(proposed_dir) .args(["remote", "get-url", "applied"]) .output() .await .with_context(|| format!("git remote get-url applied in {}", proposed_dir.display()))?; if existing.status.success() { let current = String::from_utf8_lossy(&existing.stdout).trim().to_owned(); if current == want { return Ok(()); } // URL drifted (path scheme changed, etc.) — re-point it. return git(proposed_dir, &["remote", "set-url", "applied", &want]).await; } git(proposed_dir, &["remote", "add", "applied", &want]).await } /// Set up the applied repo. First-spawn only: init the repo, pull /// proposed's initial commit in via `git fetch`, tag it `deployed/0`. /// This is the *only* time hive-c0re reads from `proposed` for an /// agent — subsequent proposals are fetched at `request_apply_commit` /// time and tagged `proposal/` (see `actions::approve` for the /// tag state machine). /// /// `proposed_dir` is `None` on rebuild paths where the repo already /// exists — we just verify it's the right shape and bail otherwise. /// Unlike the pre-overhaul code path, `flake.nix` is no longer /// regenerated at the host level: it's tracked in proposed (seeded by /// `setup_proposed`) and rides along on every fetch. pub async fn setup_applied( applied_dir: &Path, proposed_dir: Option<&Path>, name: &str, ) -> Result<()> { std::fs::create_dir_all(applied_dir) .with_context(|| format!("create {}", applied_dir.display()))?; if !applied_dir.join(".git").exists() { let Some(proposed) = proposed_dir else { bail!( "applied repo at {} is missing its .git directory; \ cannot rebuild without a proposed source to seed from. \ destroy --purge and re-spawn this agent.", applied_dir.display() ); }; git(applied_dir, &["init", "--initial-branch=main"]).await?; let proposed_str = proposed.display().to_string(); git( applied_dir, &["fetch", "--no-tags", &proposed_str, "main:refs/heads/main"], ) .await?; git_read_tree_reset(applied_dir, "refs/heads/main").await?; git_tag(applied_dir, "deployed/0", "refs/heads/main").await?; } else if git_rev_parse(applied_dir, "refs/tags/deployed/0") .await .is_err() { // Pre-overhaul applied repo — no deployed/* tag scheme, // flake.nix may be untracked, agent.nix possibly authored by // hive-c0re directly. The startup auto-migration fixes this // in place; if it didn't run (or got skipped), surface a // clear error. bail!( "applied repo at {} predates the meta-flake layout. \ Restart hive-c0re to let the auto-migration run, or \ destroy --purge {name} and re-spawn.", applied_dir.display() ); } Ok(()) } /// Create the per-agent Claude credentials dir if missing. Mode 0700 — only /// root inside the container reads/writes it. Idempotent: existing dirs are /// left untouched (an agent's OAuth tokens survive `destroy`/recreate). fn ensure_claude_dir(claude_dir: &Path) -> Result<()> { if !claude_dir.exists() { std::fs::create_dir_all(claude_dir) .with_context(|| format!("create {}", claude_dir.display()))?; #[cfg(unix)] { use std::os::unix::fs::PermissionsExt; std::fs::set_permissions(claude_dir, std::fs::Permissions::from_mode(0o700)) .with_context(|| format!("chmod {}", claude_dir.display()))?; } } Ok(()) } fn ensure_state_dir(notes_dir: &Path) -> Result<()> { if !notes_dir.exists() { std::fs::create_dir_all(notes_dir) .with_context(|| format!("create {}", notes_dir.display()))?; } Ok(()) } fn initial_agent_nix(name: &str) -> String { format!( "{{ config, pkgs, lib, ... }}:\n{{\n # Per-agent overrides for {name}. This is a regular NixOS module\n # — add packages, services, modules, imports as needed.\n #\n # imports = [ ./extra-module.nix ];\n # environment.systemPackages = with pkgs; [ ];\n}}\n", ) } /// Module-only flake exposed by every agent's repo. Consumed by the /// hive-c0re-owned meta flake at `/var/lib/hyperhive/meta/` as a flake /// input. Identity injection (`HIVE_PORT` / `HIVE_LABEL` / dashboard /// port / git committer) lives in the meta flake's wrapper, not here. pub fn initial_flake_nix() -> &'static str { "{\n description = \"hyperhive agent\";\n inputs = { };\n outputs = { self }: {\n nixosModules.default = import ./agent.nix;\n };\n}\n" } async fn git_commit(dir: &Path, message: &str) -> Result<()> { git( dir, &[ "-c", &format!("user.name={GIT_NAME}"), "-c", &format!("user.email={GIT_EMAIL}"), "commit", "-m", message, ], ) .await } /// Spawn `git` honoring the `HYPERHIVE_GIT` env var (absolute path baked in /// by the NixOS module), falling back to bare `git` (PATH lookup) otherwise. #[must_use] pub fn git_command() -> Command { let exe = std::env::var("HYPERHIVE_GIT").unwrap_or_else(|_| "git".into()); Command::new(exe) } async fn git(dir: &Path, args: &[&str]) -> Result<()> { let out = git_command() .current_dir(dir) .args(args) .output() .await .with_context(|| format!("git {} in {}", args.join(" "), dir.display()))?; if !out.status.success() { bail!( "git {} failed ({}): {}", args.join(" "), out.status, String::from_utf8_lossy(&out.stderr).trim() ); } Ok(()) } /// Fetch `sha` from the `src` git repo into `dst` and pin it as /// `refs/tags/`. Used at `request_apply_commit` time so hive-c0re /// captures an immutable handle on the manager's commit; subsequent /// amendments / force-pushes in `src` no longer affect what gets /// built. Returns the resolved sha (which equals `sha` on success /// but normalised — short shas get expanded). pub async fn git_fetch_to_tag(dst: &Path, src: &Path, sha: &str, tag: &str) -> Result { let src_str = src.display().to_string(); let refspec = format!("{sha}:refs/tags/{tag}"); git(dst, &["fetch", "--no-tags", &src_str, &refspec]).await?; git_rev_parse(dst, &format!("refs/tags/{tag}")).await } /// Resolve `refname` (a tag, branch, or sha) in `dir` to its full sha. pub async fn git_rev_parse(dir: &Path, refname: &str) -> Result { let out = git_command() .current_dir(dir) .args(["rev-parse", refname]) .output() .await .with_context(|| format!("git rev-parse {refname} in {}", dir.display()))?; if !out.status.success() { bail!( "git rev-parse {refname} failed ({}): {}", out.status, String::from_utf8_lossy(&out.stderr).trim() ); } Ok(String::from_utf8_lossy(&out.stdout).trim().to_owned()) } /// Plant a lightweight tag at `target`. Errors if the tag already /// exists — we want loud failures on id reuse, not silent /// overwrites. pub async fn git_tag(dir: &Path, name: &str, target: &str) -> Result<()> { git(dir, &["tag", name, target]).await } /// Plant an annotated tag with `body` as the message. Used for /// `failed/` (body = build error) and `denied/` (body = /// operator note). Multi-line bodies handled via stdin so we don't /// have to escape anything. pub async fn git_tag_annotated(dir: &Path, name: &str, target: &str, body: &str) -> Result<()> { use tokio::io::AsyncWriteExt; let mut child = git_command() .current_dir(dir) .args(["tag", "-a", name, target, "-F", "-"]) .stdin(std::process::Stdio::piped()) .stdout(std::process::Stdio::piped()) .stderr(std::process::Stdio::piped()) .spawn() .with_context(|| format!("spawn git tag -a {name} in {}", dir.display()))?; if let Some(mut stdin) = child.stdin.take() { stdin .write_all(body.as_bytes()) .await .context("write tag body to git stdin")?; // Drop closes stdin so git can finish reading. drop(stdin); } let out = child.wait_with_output().await.context("wait git tag -a")?; if !out.status.success() { bail!( "git tag -a {name} failed ({}): {}", out.status, String::from_utf8_lossy(&out.stderr).trim() ); } Ok(()) } /// Replace working tree + index with the tree at `target` without /// moving HEAD. `applied/main` stays pointing at the last known-good /// `deployed/*` while we let `nixos-container update` evaluate the /// candidate. On build failure callers reset back to HEAD; on /// success they fast-forward main to `target`. pub async fn git_read_tree_reset(dir: &Path, target: &str) -> Result<()> { git(dir, &["read-tree", "--reset", "-u", target]).await } /// Hard-set a ref to `target`. Used to fast-forward `refs/heads/main` /// to the just-deployed proposal commit. Uses `update-ref`, not /// `branch -f`, so it works regardless of where HEAD currently sits. pub async fn git_update_ref(dir: &Path, refname: &str, target: &str) -> Result<()> { git(dir, &["update-ref", refname, target]).await } /// Write a systemd drop-in for `container@.service` that applies /// our default resource caps. Goes under `/run/systemd/system/...` so it's /// ephemeral (regenerated on every spawn / rebuild). fn set_resource_limits(container: &str) -> Result<()> { let dir = format!("/run/systemd/system/container@{container}.service.d"); std::fs::create_dir_all(&dir).with_context(|| format!("create {dir}"))?; let path = format!("{dir}/hyperhive-limits.conf"); let content = format!("[Service]\nMemoryMax={DEFAULT_MEMORY_MAX}\nCPUQuota={DEFAULT_CPU_QUOTA}\n",); std::fs::write(&path, content).with_context(|| format!("write {path}"))?; tracing::info!( %path, memory_max = DEFAULT_MEMORY_MAX, cpu_quota = DEFAULT_CPU_QUOTA, "wrote resource limits drop-in" ); Ok(()) } async fn systemd_daemon_reload() -> Result<()> { let out = Command::new("systemctl") .arg("daemon-reload") .output() .await .context("invoke systemctl daemon-reload")?; if !out.status.success() { bail!( "systemctl daemon-reload failed ({}): {}", out.status, String::from_utf8_lossy(&out.stderr).trim() ); } Ok(()) } /// Idempotently rewrite the lines in `/etc/nixos-containers/.conf` /// that hive-c0re owns: `PRIVATE_NETWORK` (forced 0 so the agent's web UI port /// is reachable on the host) and `EXTRA_NSPAWN_FLAGS` (the runtime-dir bind). /// The start script expands `$EXTRA_NSPAWN_FLAGS` unquoted into the /// `systemd-nspawn` command. /// Where in the container's filesystem the manager sees its agents tree. /// Matches the `/agents` path that pre-Phase-8 hosts declared via /// `containers.hm1nd.bindMounts."/agents"`. pub const CONTAINER_MANAGER_AGENTS_MOUNT: &str = "/agents"; /// Where the manager sees the applied trees of every agent, read-only. /// Manager runs `git fetch /applied//.git refs/tags/*:refs/tags/applied/*` /// to learn what hive-c0re deployed (or rejected, or failed to /// build); the RO bind makes accidental writes impossible from /// inside the container. pub const CONTAINER_MANAGER_APPLIED_MOUNT: &str = "/applied"; /// The on-host root that gets bind-mounted to `/agents` inside the manager. /// Hard-coded to match `AGENT_STATE_ROOT` in coordinator.rs (kept duplicated /// here so lifecycle stays usable as a leaf module). const HOST_AGENTS_ROOT: &str = "/var/lib/hyperhive/agents"; /// On-host applied repo root, mirrored RO into the manager. Matches /// `APPLIED_STATE_ROOT` in coordinator.rs. const HOST_APPLIED_ROOT: &str = "/var/lib/hyperhive/applied"; /// On-host meta repo root, mirrored RO into the manager. Matches /// `meta::meta_dir()` but duplicated here so lifecycle stays a leaf. const HOST_META_ROOT: &str = "/var/lib/hyperhive/meta"; fn set_nspawn_flags( container: &str, runtime_dir: &Path, claude_dir: &Path, notes_dir: &Path, ) -> Result<()> { let path = format!("/etc/nixos-containers/{container}.conf"); let original = std::fs::read_to_string(&path).with_context(|| format!("read {path}"))?; let mut binds = format!( "--bind={runtime}:{CONTAINER_RUNTIME_MOUNT} --bind={claude}:{CONTAINER_CLAUDE_MOUNT} --bind={notes}:{CONTAINER_NOTES_MOUNT}", runtime = runtime_dir.display(), claude = claude_dir.display(), notes = notes_dir.display(), ); if container == MANAGER_NAME { use std::fmt::Write as _; // systemd-nspawn refuses to start a container whose bind // source doesn't exist. The meta repo is created by the // startup migration, but make sure the directory is there // before the manager comes up in case set_nspawn_flags fires // first (e.g. cold start with no agents). std::fs::create_dir_all(HOST_META_ROOT) .with_context(|| format!("create {HOST_META_ROOT}"))?; // Manager edits sub-agent proposed/ repos and its own. RW so it can // git-commit. Sub-agents see only their own /run/hive socket and // /root/.claude (no /agents or /applied). // // /applied is a separate RO mount of the hive-c0re-only applied // repos so the manager can `git fetch /applied//.git // refs/tags/*:refs/tags/applied/*` to mirror deployed/failed/ // denied tags into its proposed clones and diff against // what's actually deployed. RO bind makes destructive git // plumbing inside the container unable to corrupt applied. // // /meta is a third RO mount exposing the system-wide deploy // flake (`git log /meta --oneline` shows every deploy across // every agent; `cat /meta/flake.lock` resolves which sha each // agent is pinned at right now). let _ = write!( binds, " --bind={HOST_AGENTS_ROOT}:{CONTAINER_MANAGER_AGENTS_MOUNT}", ); let _ = write!( binds, " --bind-ro={HOST_APPLIED_ROOT}:{CONTAINER_MANAGER_APPLIED_MOUNT}", ); let _ = write!( binds, " --bind-ro={HOST_META_ROOT}:{mount}", mount = crate::meta::CONTAINER_MANAGER_META_MOUNT, ); } let bind_flag = format!("EXTRA_NSPAWN_FLAGS=\"{binds}\""); let mut lines: Vec = original .lines() .filter(|line| { let trimmed = line.trim_start(); // Strip any network-namespace knobs nixos-container's create // might have populated. The start script adds `--network-veth` // whenever HOST_ADDRESS / LOCAL_ADDRESS (or their IPv6 cousins) // are non-empty — and veth implies a private netns, hiding our // web-UI port from the host. Force host netns. !trimmed.starts_with("EXTRA_NSPAWN_FLAGS=") && !trimmed.starts_with("PRIVATE_NETWORK=") && !trimmed.starts_with("HOST_ADDRESS=") && !trimmed.starts_with("LOCAL_ADDRESS=") && !trimmed.starts_with("HOST_ADDRESS6=") && !trimmed.starts_with("LOCAL_ADDRESS6=") && !trimmed.starts_with("HOST_BRIDGE=") }) .map(str::to_owned) .collect(); lines.push("PRIVATE_NETWORK=0".to_owned()); lines.push("HOST_ADDRESS=".to_owned()); lines.push("LOCAL_ADDRESS=".to_owned()); lines.push("HOST_ADDRESS6=".to_owned()); lines.push("LOCAL_ADDRESS6=".to_owned()); lines.push("HOST_BRIDGE=".to_owned()); lines.push(bind_flag); let mut content = lines.join("\n"); content.push('\n'); std::fs::write(&path, content).with_context(|| format!("write {path}"))?; tracing::info!(%path, "set PRIVATE_NETWORK=0 + EXTRA_NSPAWN_FLAGS"); Ok(()) } async fn run(args: &[&str]) -> Result<()> { let out = Command::new("nixos-container") .args(args) .output() .await .with_context(|| format!("invoke nixos-container {}", args.join(" ")))?; let stdout = String::from_utf8_lossy(&out.stdout); let stderr = String::from_utf8_lossy(&out.stderr); if !stdout.trim().is_empty() { tracing::info!(target: "nixos-container", "{}", stdout.trim()); } if !stderr.trim().is_empty() { tracing::warn!(target: "nixos-container", "{}", stderr.trim()); } if !out.status.success() { bail!( "nixos-container {} failed ({}): {}", args.join(" "), out.status, stderr.trim() ); } Ok(()) }