diff --git a/hive-c0re/src/lifecycle.rs b/hive-c0re/src/lifecycle.rs index fe3432d..b3395db 100644 --- a/hive-c0re/src/lifecycle.rs +++ b/hive-c0re/src/lifecycle.rs @@ -781,27 +781,74 @@ fn set_nspawn_flags( Ok(()) } +/// Spawn `nixos-container ` and pipe its stdout + stderr into +/// `tracing` one line at a time so a long-running command (most +/// notably `update`, which kicks off a full nix build that can run +/// for minutes on a stale flake) shows progress in journald as it +/// happens. The buffered `.output()` we used before only flushed the +/// summary at exit, which made "slow" and "stuck" look identical to +/// the operator watching `journalctl -u hive-c0re -f`. +/// +/// stdout lines log at INFO, stderr at WARN. Stderr lines are also +/// collected into a single string so the bailout message at the end +/// can include the actual failure reason (nix dumps eval errors to +/// stderr). async fn run(args: &[&str]) -> Result<()> { - let out = Command::new("nixos-container") + use tokio::io::{AsyncBufReadExt, BufReader}; + let cmdline = args.join(" "); + let mut child = Command::new("nixos-container") .args(args) - .output() + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::piped()) + .spawn() + .with_context(|| format!("invoke nixos-container {cmdline}"))?; + + let stdout = child.stdout.take().expect("piped stdout"); + let stderr = child.stderr.take().expect("piped stderr"); + + let stdout_cmdline = cmdline.clone(); + let pump_stdout = tokio::spawn(async move { + let mut lines = BufReader::new(stdout).lines(); + while let Ok(Some(line)) = lines.next_line().await { + tracing::info!(target: "nixos-container", cmdline = %stdout_cmdline, "{line}"); + } + }); + + // Tail of stderr lines (last 32) for the bailout message. Newer + // lines push older ones out; nix's actual error usually lands + // in the last few lines. + let stderr_cmdline = cmdline.clone(); + let stderr_tail: std::sync::Arc>> = + std::sync::Arc::new(std::sync::Mutex::new(std::collections::VecDeque::with_capacity(32))); + let stderr_tail_pump = stderr_tail.clone(); + let pump_stderr = tokio::spawn(async move { + let mut lines = BufReader::new(stderr).lines(); + while let Ok(Some(line)) = lines.next_line().await { + tracing::warn!(target: "nixos-container", cmdline = %stderr_cmdline, "{line}"); + let mut tail = stderr_tail_pump.lock().unwrap(); + if tail.len() == 32 { + tail.pop_front(); + } + tail.push_back(line); + } + }); + + let status = child + .wait() .await - .with_context(|| format!("invoke nixos-container {}", args.join(" ")))?; - let stdout = String::from_utf8_lossy(&out.stdout); - let stderr = String::from_utf8_lossy(&out.stderr); - if !stdout.trim().is_empty() { - tracing::info!(target: "nixos-container", "{}", stdout.trim()); - } - if !stderr.trim().is_empty() { - tracing::warn!(target: "nixos-container", "{}", stderr.trim()); - } - if !out.status.success() { - bail!( - "nixos-container {} failed ({}): {}", - args.join(" "), - out.status, - stderr.trim() - ); + .with_context(|| format!("wait nixos-container {cmdline}"))?; + let _ = pump_stdout.await; + let _ = pump_stderr.await; + + if !status.success() { + let tail = stderr_tail + .lock() + .unwrap() + .iter() + .cloned() + .collect::>() + .join("\n"); + bail!("nixos-container {cmdline} failed ({status}): {tail}"); } Ok(()) }