Phase 7d: per-container MemoryMax + CPUQuota via systemd drop-in

This commit is contained in:
müde 2026-05-15 00:30:48 +02:00
parent 2267800c51
commit ee99774d17

View file

@ -25,6 +25,11 @@ const GIT_EMAIL: &str = "hive-c0re@hyperhive";
const WEB_PORT_BASE: u16 = 8100;
const WEB_PORT_RANGE: u16 = 900;
/// Default resource caps applied to every managed container via a systemd
/// drop-in under `/run/systemd/system/container@<NAME>.service.d/`.
const DEFAULT_MEMORY_MAX: &str = "2G";
const DEFAULT_CPU_QUOTA: &str = "50%";
/// Returns the per-agent web UI port. Same hash on both sides — manager,
/// dashboard, and agent harness all agree.
#[must_use]
@ -69,6 +74,8 @@ pub async fn spawn(
let flake_ref = format!("{}#default", applied_dir.display());
run(&["create", &container, "--flake", &flake_ref]).await?;
set_nspawn_flags(&container, agent_dir)?;
set_resource_limits(&container)?;
systemd_daemon_reload().await?;
run(&["start", &container]).await
}
@ -89,6 +96,8 @@ pub async fn rebuild(
let container = container_name(name);
let flake_ref = format!("{}#default", applied_dir.display());
set_nspawn_flags(&container, agent_dir)?;
set_resource_limits(&container)?;
systemd_daemon_reload().await?;
run(&["update", &container, "--flake", &flake_ref]).await?;
// Restart so any nspawn-level changes (bind mounts, networking, etc.) apply.
run(&["stop", &container]).await?;
@ -280,6 +289,42 @@ async fn git_status(dir: &Path, args: &[&str]) -> Result<bool> {
Ok(st.success())
}
/// Write a systemd drop-in for `container@<container>.service` that applies
/// our default resource caps. Goes under `/run/systemd/system/...` so it's
/// ephemeral (regenerated on every spawn / rebuild).
fn set_resource_limits(container: &str) -> Result<()> {
let dir = format!("/run/systemd/system/container@{container}.service.d");
std::fs::create_dir_all(&dir).with_context(|| format!("create {dir}"))?;
let path = format!("{dir}/hyperhive-limits.conf");
let content = format!(
"[Service]\nMemoryMax={DEFAULT_MEMORY_MAX}\nCPUQuota={DEFAULT_CPU_QUOTA}\n",
);
std::fs::write(&path, content).with_context(|| format!("write {path}"))?;
tracing::info!(
%path,
memory_max = DEFAULT_MEMORY_MAX,
cpu_quota = DEFAULT_CPU_QUOTA,
"wrote resource limits drop-in"
);
Ok(())
}
async fn systemd_daemon_reload() -> Result<()> {
let out = Command::new("systemctl")
.arg("daemon-reload")
.output()
.await
.context("invoke systemctl daemon-reload")?;
if !out.status.success() {
bail!(
"systemctl daemon-reload failed ({}): {}",
out.status,
String::from_utf8_lossy(&out.stderr).trim()
);
}
Ok(())
}
/// Idempotently rewrite the `EXTRA_NSPAWN_FLAGS` line in
/// `/etc/nixos-containers/<container>.conf`. The start script expands this
/// variable unquoted into the `systemd-nspawn` command.