lifecycle: stream nixos-container stdout/stderr line-by-line

run() previously buffered the child's output via .output() and
only logged at exit — a multi-minute 'nixos-container update'
(typical on a fresh hyperhive bump) showed nothing in journald
until the very end. operator watching 'journalctl -u hive-c0re
-f' couldn't tell 'slow nix build' from 'wedged daemon'.

new shape: spawn with piped stdio, pump each line into tracing
as it arrives (stdout → INFO, stderr → WARN), keep a tail of
the last 32 stderr lines for the bail message so the eventual
'failed (status 2)' still carries the actual nix eval error.
target field 'nixos-container', argv-equivalent attached via
the 'cmdline' field so filtering by subcommand works.
This commit is contained in:
müde 2026-05-16 02:57:16 +02:00
parent 1278f880da
commit 6f1b664c85

View file

@ -781,27 +781,74 @@ fn set_nspawn_flags(
Ok(())
}
/// Spawn `nixos-container <args>` and pipe its stdout + stderr into
/// `tracing` one line at a time so a long-running command (most
/// notably `update`, which kicks off a full nix build that can run
/// for minutes on a stale flake) shows progress in journald as it
/// happens. The buffered `.output()` we used before only flushed the
/// summary at exit, which made "slow" and "stuck" look identical to
/// the operator watching `journalctl -u hive-c0re -f`.
///
/// stdout lines log at INFO, stderr at WARN. Stderr lines are also
/// collected into a single string so the bailout message at the end
/// can include the actual failure reason (nix dumps eval errors to
/// stderr).
async fn run(args: &[&str]) -> Result<()> {
let out = Command::new("nixos-container")
use tokio::io::{AsyncBufReadExt, BufReader};
let cmdline = args.join(" ");
let mut child = Command::new("nixos-container")
.args(args)
.output()
.stdout(std::process::Stdio::piped())
.stderr(std::process::Stdio::piped())
.spawn()
.with_context(|| format!("invoke nixos-container {cmdline}"))?;
let stdout = child.stdout.take().expect("piped stdout");
let stderr = child.stderr.take().expect("piped stderr");
let stdout_cmdline = cmdline.clone();
let pump_stdout = tokio::spawn(async move {
let mut lines = BufReader::new(stdout).lines();
while let Ok(Some(line)) = lines.next_line().await {
tracing::info!(target: "nixos-container", cmdline = %stdout_cmdline, "{line}");
}
});
// Tail of stderr lines (last 32) for the bailout message. Newer
// lines push older ones out; nix's actual error usually lands
// in the last few lines.
let stderr_cmdline = cmdline.clone();
let stderr_tail: std::sync::Arc<std::sync::Mutex<std::collections::VecDeque<String>>> =
std::sync::Arc::new(std::sync::Mutex::new(std::collections::VecDeque::with_capacity(32)));
let stderr_tail_pump = stderr_tail.clone();
let pump_stderr = tokio::spawn(async move {
let mut lines = BufReader::new(stderr).lines();
while let Ok(Some(line)) = lines.next_line().await {
tracing::warn!(target: "nixos-container", cmdline = %stderr_cmdline, "{line}");
let mut tail = stderr_tail_pump.lock().unwrap();
if tail.len() == 32 {
tail.pop_front();
}
tail.push_back(line);
}
});
let status = child
.wait()
.await
.with_context(|| format!("invoke nixos-container {}", args.join(" ")))?;
let stdout = String::from_utf8_lossy(&out.stdout);
let stderr = String::from_utf8_lossy(&out.stderr);
if !stdout.trim().is_empty() {
tracing::info!(target: "nixos-container", "{}", stdout.trim());
}
if !stderr.trim().is_empty() {
tracing::warn!(target: "nixos-container", "{}", stderr.trim());
}
if !out.status.success() {
bail!(
"nixos-container {} failed ({}): {}",
args.join(" "),
out.status,
stderr.trim()
);
.with_context(|| format!("wait nixos-container {cmdline}"))?;
let _ = pump_stdout.await;
let _ = pump_stderr.await;
if !status.success() {
let tail = stderr_tail
.lock()
.unwrap()
.iter()
.cloned()
.collect::<Vec<_>>()
.join("\n");
bail!("nixos-container {cmdline} failed ({status}): {tail}");
}
Ok(())
}