lifecycle: append container journal tail to failed nixos-container update

2026-05-20 11:25:08 +02:00 · 2026-05-20 11:25:08 +02:00 · 94781ccd08
commit 94781ccd08
parent ba04a5a360
2 changed files with 45 additions and 1 deletions
--- a/CLAUDE.md
+++ b/CLAUDE.md
@ -185,6 +185,17 @@ read them à la carte.
 In-flight or recent context that hasn't earned a section yet.
 Prune freely.
 - **Just landed:** failed `nixos-container update` self-documents.
  `lifecycle::run` now appends the tail (40 lines) of the target
  container's own journal to the bail message when an `update`
  fails. `nixos-container`'s own stderr on a reload-phase failure
  is terse ("failed to reload container"); the real cause —
  which unit failed `switch-to-configuration` — lives in the
  *container* journal. Scoped to `update` (container's still up
  on the old generation, so `journalctl -M` works); best-effort,
  appends nothing if the journal can't be read. The manager's
  `update` tool / rebuild errors now carry the failing-unit
  detail without a second `get_logs` call.
 - **Just landed:** `hyperhive.westonRdp.enable` option. New
  `nix/templates/weston-rdp.nix` declares a per-agent bool;
  enabling it runs weston with the RDP backend as a systemd
--- a/hive-c0re/src/lifecycle.rs
+++ b/hive-c0re/src/lifecycle.rs
@ -959,7 +959,40 @@ async fn run(args: &[&str]) -> Result<()> {
            .cloned()
            .collect::<Vec<_>>()
            .join("\n");
-        bail!("nixos-container {cmdline} failed ({status}): {tail}");
+        let journal = container_journal_tail(args).await;
        bail!("nixos-container {cmdline} failed ({status}): {tail}{journal}");
    }
    Ok(())
 }
 /// On a failed `nixos-container update`, the stderr nixos-container
 /// itself prints is often terse ("failed to reload container") — the
 /// real reason (which unit failed `switch-to-configuration` during
 /// the reload phase) lands in the *container's* own journal, not on
 /// the host. Fetch the tail of it so a failed rebuild self-documents
 /// the failing unit in the error string, no second round-trip.
 ///
 /// Scoped to `update`: that's the reload-phase case, and the
 /// container is still up (running the old generation) so
 /// `journalctl -M` works. Best-effort — returns "" for other verbs
 /// or when the journal can't be read (machine gone, journalctl
 /// missing); it never produces an error of its own.
 async fn container_journal_tail(args: &[&str]) -> String {
    if args.first().copied() != Some("update") {
        return String::new();
    }
    let Some(container) = args.get(1) else {
        return String::new();
    };
    let out = Command::new("journalctl")
        .args(["-M", container, "-n", "40", "--no-pager", "--output=short"])
        .output()
        .await;
    match out {
        Ok(o) if !o.stdout.is_empty() => format!(
            "\n--- last 40 journal lines from container '{container}' ---\n{}",
            String::from_utf8_lossy(&o.stdout).trim_end()
        ),
        _ => String::new(),
    }
 }