lifecycle: append container journal tail to failed nixos-container update

This commit is contained in:
damocles 2026-05-20 11:25:08 +02:00
parent ba04a5a360
commit 94781ccd08
2 changed files with 45 additions and 1 deletions

View file

@ -185,6 +185,17 @@ read them à la carte.
In-flight or recent context that hasn't earned a section yet. In-flight or recent context that hasn't earned a section yet.
Prune freely. Prune freely.
- **Just landed:** failed `nixos-container update` self-documents.
`lifecycle::run` now appends the tail (40 lines) of the target
container's own journal to the bail message when an `update`
fails. `nixos-container`'s own stderr on a reload-phase failure
is terse ("failed to reload container"); the real cause —
which unit failed `switch-to-configuration` — lives in the
*container* journal. Scoped to `update` (container's still up
on the old generation, so `journalctl -M` works); best-effort,
appends nothing if the journal can't be read. The manager's
`update` tool / rebuild errors now carry the failing-unit
detail without a second `get_logs` call.
- **Just landed:** `hyperhive.westonRdp.enable` option. New - **Just landed:** `hyperhive.westonRdp.enable` option. New
`nix/templates/weston-rdp.nix` declares a per-agent bool; `nix/templates/weston-rdp.nix` declares a per-agent bool;
enabling it runs weston with the RDP backend as a systemd enabling it runs weston with the RDP backend as a systemd

View file

@ -959,7 +959,40 @@ async fn run(args: &[&str]) -> Result<()> {
.cloned() .cloned()
.collect::<Vec<_>>() .collect::<Vec<_>>()
.join("\n"); .join("\n");
bail!("nixos-container {cmdline} failed ({status}): {tail}"); let journal = container_journal_tail(args).await;
bail!("nixos-container {cmdline} failed ({status}): {tail}{journal}");
} }
Ok(()) Ok(())
} }
/// On a failed `nixos-container update`, the stderr nixos-container
/// itself prints is often terse ("failed to reload container") — the
/// real reason (which unit failed `switch-to-configuration` during
/// the reload phase) lands in the *container's* own journal, not on
/// the host. Fetch the tail of it so a failed rebuild self-documents
/// the failing unit in the error string, no second round-trip.
///
/// Scoped to `update`: that's the reload-phase case, and the
/// container is still up (running the old generation) so
/// `journalctl -M` works. Best-effort — returns "" for other verbs
/// or when the journal can't be read (machine gone, journalctl
/// missing); it never produces an error of its own.
async fn container_journal_tail(args: &[&str]) -> String {
if args.first().copied() != Some("update") {
return String::new();
}
let Some(container) = args.get(1) else {
return String::new();
};
let out = Command::new("journalctl")
.args(["-M", container, "-n", "40", "--no-pager", "--output=short"])
.output()
.await;
match out {
Ok(o) if !o.stdout.is_empty() => format!(
"\n--- last 40 journal lines from container '{container}' ---\n{}",
String::from_utf8_lossy(&o.stdout).trim_end()
),
_ => String::new(),
}
}