From ee99774d177129edb65dc67a0710dc7d967a7cd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?m=C3=BCde?= Date: Fri, 15 May 2026 00:30:48 +0200 Subject: [PATCH] Phase 7d: per-container MemoryMax + CPUQuota via systemd drop-in --- hive-c0re/src/lifecycle.rs | 45 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/hive-c0re/src/lifecycle.rs b/hive-c0re/src/lifecycle.rs index be88be6..ddb1e6a 100644 --- a/hive-c0re/src/lifecycle.rs +++ b/hive-c0re/src/lifecycle.rs @@ -25,6 +25,11 @@ const GIT_EMAIL: &str = "hive-c0re@hyperhive"; const WEB_PORT_BASE: u16 = 8100; const WEB_PORT_RANGE: u16 = 900; +/// Default resource caps applied to every managed container via a systemd +/// drop-in under `/run/systemd/system/container@.service.d/`. +const DEFAULT_MEMORY_MAX: &str = "2G"; +const DEFAULT_CPU_QUOTA: &str = "50%"; + /// Returns the per-agent web UI port. Same hash on both sides — manager, /// dashboard, and agent harness all agree. #[must_use] @@ -69,6 +74,8 @@ pub async fn spawn( let flake_ref = format!("{}#default", applied_dir.display()); run(&["create", &container, "--flake", &flake_ref]).await?; set_nspawn_flags(&container, agent_dir)?; + set_resource_limits(&container)?; + systemd_daemon_reload().await?; run(&["start", &container]).await } @@ -89,6 +96,8 @@ pub async fn rebuild( let container = container_name(name); let flake_ref = format!("{}#default", applied_dir.display()); set_nspawn_flags(&container, agent_dir)?; + set_resource_limits(&container)?; + systemd_daemon_reload().await?; run(&["update", &container, "--flake", &flake_ref]).await?; // Restart so any nspawn-level changes (bind mounts, networking, etc.) apply. run(&["stop", &container]).await?; @@ -280,6 +289,42 @@ async fn git_status(dir: &Path, args: &[&str]) -> Result { Ok(st.success()) } +/// Write a systemd drop-in for `container@.service` that applies +/// our default resource caps. Goes under `/run/systemd/system/...` so it's +/// ephemeral (regenerated on every spawn / rebuild). +fn set_resource_limits(container: &str) -> Result<()> { + let dir = format!("/run/systemd/system/container@{container}.service.d"); + std::fs::create_dir_all(&dir).with_context(|| format!("create {dir}"))?; + let path = format!("{dir}/hyperhive-limits.conf"); + let content = format!( + "[Service]\nMemoryMax={DEFAULT_MEMORY_MAX}\nCPUQuota={DEFAULT_CPU_QUOTA}\n", + ); + std::fs::write(&path, content).with_context(|| format!("write {path}"))?; + tracing::info!( + %path, + memory_max = DEFAULT_MEMORY_MAX, + cpu_quota = DEFAULT_CPU_QUOTA, + "wrote resource limits drop-in" + ); + Ok(()) +} + +async fn systemd_daemon_reload() -> Result<()> { + let out = Command::new("systemctl") + .arg("daemon-reload") + .output() + .await + .context("invoke systemctl daemon-reload")?; + if !out.status.success() { + bail!( + "systemctl daemon-reload failed ({}): {}", + out.status, + String::from_utf8_lossy(&out.stderr).trim() + ); + } + Ok(()) +} + /// Idempotently rewrite the `EXTRA_NSPAWN_FLAGS` line in /// `/etc/nixos-containers/.conf`. The start script expands this /// variable unquoted into the `systemd-nspawn` command.