systemd: fetch unit state for local + remote nspawn containers (step 5)

This commit is contained in:
Damocles 2026-05-07 20:50:39 +02:00
commit 5676b1ac62
3 changed files with 192 additions and 70 deletions

View file

@ -6,11 +6,13 @@
// to require QuickControls2.prl files that nixpkgs strips. Switch to
// QList<QVariantMap> when a release ships with both fixes.
//
// Local data uses native zbus over the system + session buses. Remote data is
// fetched by spawning `busctl --host=<target>`, which tunnels sd-bus through
// SSH using whatever the user already has set up in `~/.ssh/config`. We parse
// busctl's JSON output with serde_json. Remote fetches only happen while the
// applet is open (toggled via `setAppletOpen`).
// Local host data uses native zbus over the system + session buses. Everything
// else (local nspawn containers, configured remote hosts, and remote nspawn
// containers) is fetched by spawning `busctl` with the right combination of
// `--host=<ssh-target>` and `--machine=<container>` flags. busctl uses
// systemd-machined's OpenMachine() to tunnel sd-bus into a container, and ssh
// to reach a remote host. Remote fetches only happen while the applet is open
// (toggled via `setAppletOpen`).
use crate::modules_service;
use core::pin::Pin;
@ -37,14 +39,15 @@ pub mod qobject {
#[qproperty(QString, hostname)]
// Local failed unit count (drives the bar module label).
#[qproperty(i32, failed_count, cxx_name = "failedCount")]
// JSON array, local first then nspawn containers then configured remotes.
// Recursive JSON tree. Top level is [local, remote1, remote2, ...].
// Each entry:
// { name, isLocal, marker, systemState, runningCount, totalCount,
// failedUnits: [{name, description, subState, scope, machine}],
// runningUnits: [...],
// errorKind: ""|"transient"|"permanent",
// errorReason: string,
// lastSeen: 0 | unix_seconds }
// lastSeen: 0 | unix_seconds,
// containers: [<MachineJson>, ...] }
#[qproperty(QString, machines_json, cxx_name = "machinesJson")]
type SystemdService = super::SystemdServiceRust;
@ -136,6 +139,8 @@ struct MachineJson {
error_reason: String,
#[serde(rename = "lastSeen")]
last_seen: u64,
#[serde(default)]
containers: Vec<MachineJson>,
}
impl MachineJson {
@ -152,6 +157,24 @@ impl MachineJson {
error_kind: String::new(),
error_reason: String::new(),
last_seen: 0,
containers: Vec::new(),
}
}
fn errored(name: String, kind: &'static str, reason: String, prev_last_seen: u64) -> Self {
Self {
name,
is_local: false,
marker: String::new(),
system_state: "unreachable".into(),
running_count: 0,
total_count: 0,
failed_units: Vec::new(),
running_units: Vec::new(),
error_kind: kind.to_string(),
error_reason: reason,
last_seen: prev_last_seen,
containers: Vec::new(),
}
}
}
@ -161,7 +184,7 @@ pub struct SystemdServiceRust {
failed_count: i32,
machines_json: QString,
applet_open: bool,
// Last successful or attempted remote fetch per target. Persists across
// Last successful or attempted remote fetch per ssh target. Persists across
// polls so the UI can show stale data with a "last seen" timestamp when
// the host goes unreachable.
remote_cache: HashMap<String, MachineJson>,
@ -202,7 +225,7 @@ fn rt() -> &'static Runtime {
})
}
async fn fetch_units(bus: &Connection) -> (String, Vec<UnitTuple>) {
async fn fetch_units_zbus(bus: &Connection) -> (String, Vec<UnitTuple>) {
let mut state = String::from("unknown");
let mut units = Vec::new();
if let Ok(mgr) = SystemdManagerProxy::new(bus).await {
@ -242,7 +265,7 @@ fn partition_units(
(failed, running, total)
}
async fn poll_local() -> (
async fn poll_local_zbus() -> (
String,
Vec<UnitTuple>,
Vec<UnitTuple>,
@ -254,7 +277,7 @@ async fn poll_local() -> (
let mut machines = Vec::new();
if let Ok(c) = Connection::system().await {
let (s, u) = fetch_units(&c).await;
let (s, u) = fetch_units_zbus(&c).await;
sys_state = s;
sys_units = u;
if let Ok(m) = MachinedProxy::new(&c).await {
@ -268,63 +291,74 @@ async fn poll_local() -> (
}
}
if let Ok(c) = Connection::session().await {
let (_, u) = fetch_units(&c).await;
let (_, u) = fetch_units_zbus(&c).await;
user_units = u;
}
(sys_state, sys_units, user_units, machines)
}
// Spawn `busctl --host=<target>` to fetch system_state + units. Returns the
// MachineJson on success, or an (kind, reason) on failure.
fn fetch_remote(target: &str) -> Result<MachineJson, (&'static str, String)> {
let state_out = std::process::Command::new("busctl")
.args([
"--host",
target,
// Run busctl with the given prefix args plus the trailing call args. Returns
// a parsed `data` value or an error classification.
fn busctl_call(
prefix: &[&str],
call: &[&str],
) -> Result<serde_json::Value, (&'static str, String)> {
let mut args: Vec<&str> = prefix.to_vec();
args.extend_from_slice(call);
let out = std::process::Command::new("busctl")
.args(&args)
.output()
.map_err(|e| ("transient", format!("busctl spawn failed: {e}")))?;
if !out.status.success() {
let stderr = String::from_utf8_lossy(&out.stderr).into_owned();
return Err((classify_error(&stderr), stderr));
}
serde_json::from_slice(&out.stdout)
.map_err(|e| ("transient", format!("invalid busctl json: {e}")))
}
// Fetch SystemState + ListUnits via busctl with the given prefix flags.
// `prefix` examples:
// [] - local system bus (unused; we use zbus locally)
// ["--host", "muede-pc2"] - remote host
// ["--machine", "damocles-lab"] - local container
// ["--host", "pc2", "--machine", "x"] - container on a remote host
fn fetch_via_busctl(
name: &str,
prefix: &[&str],
is_local: bool,
marker: &str,
) -> Result<MachineJson, (&'static str, String)> {
let state_v = busctl_call(
prefix,
&[
"--json=short",
"get-property",
"org.freedesktop.systemd1",
"/org/freedesktop/systemd1",
"org.freedesktop.systemd1.Manager",
"SystemState",
])
.output()
.map_err(|e| ("transient", format!("busctl spawn failed: {e}")))?;
if !state_out.status.success() {
let stderr = String::from_utf8_lossy(&state_out.stderr).into_owned();
return Err((classify_error(&stderr), stderr));
}
let state_v: serde_json::Value = serde_json::from_slice(&state_out.stdout)
.map_err(|e| ("transient", format!("invalid SystemState json: {e}")))?;
],
)?;
let system_state = state_v
.get("data")
.and_then(|d| d.as_str())
.unwrap_or("unknown")
.to_string();
let units_out = std::process::Command::new("busctl")
.args([
"--host",
target,
let units_v = busctl_call(
prefix,
&[
"--json=short",
"call",
"org.freedesktop.systemd1",
"/org/freedesktop/systemd1",
"org.freedesktop.systemd1.Manager",
"ListUnits",
])
.output()
.map_err(|e| ("transient", format!("busctl spawn failed: {e}")))?;
if !units_out.status.success() {
let stderr = String::from_utf8_lossy(&units_out.stderr).into_owned();
return Err((classify_error(&stderr), stderr));
}
let v: serde_json::Value = serde_json::from_slice(&units_out.stdout)
.map_err(|e| ("transient", format!("invalid ListUnits json: {e}")))?;
let arr = v
],
)?;
let arr = units_v
.get("data")
.and_then(|d| d.get(0))
.and_then(|d| d.as_array())
@ -344,7 +378,7 @@ fn fetch_remote(target: &str) -> Result<MachineJson, (&'static str, String)> {
description: arr[1].as_str().unwrap_or("").to_string(),
sub_state: arr[4].as_str().unwrap_or("").to_string(),
scope: "system".to_string(),
machine: target.to_string(),
machine: name.to_string(),
};
match arr[3].as_str() {
Some("failed") => failed.push(entry),
@ -354,9 +388,9 @@ fn fetch_remote(target: &str) -> Result<MachineJson, (&'static str, String)> {
}
Ok(MachineJson {
name: target.to_string(),
is_local: false,
marker: String::new(),
name: name.to_string(),
is_local,
marker: marker.to_string(),
system_state,
running_count: running.len() as i32,
total_count: total,
@ -365,9 +399,42 @@ fn fetch_remote(target: &str) -> Result<MachineJson, (&'static str, String)> {
error_kind: String::new(),
error_reason: String::new(),
last_seen: now_unix(),
containers: Vec::new(),
})
}
// List nspawn containers under the given busctl prefix (empty = local).
fn list_containers_busctl(prefix: &[&str]) -> Vec<String> {
let v = match busctl_call(
prefix,
&[
"--json=short",
"call",
"org.freedesktop.machine1",
"/org/freedesktop/machine1",
"org.freedesktop.machine1.Manager",
"ListMachines",
],
) {
Ok(v) => v,
Err(_) => return Vec::new(),
};
let arr = match v
.get("data")
.and_then(|d| d.get(0))
.and_then(|d| d.as_array())
{
Some(a) => a,
None => return Vec::new(),
};
arr.iter()
.filter_map(|m| m.as_array())
.filter_map(|a| a.first()?.as_str())
.filter(|n| *n != ".host")
.map(String::from)
.collect()
}
// Classify an ssh/busctl stderr blob into transient vs permanent so the UI
// can pick a color and decide whether to retry aggressively.
fn classify_error(stderr: &str) -> &'static str {
@ -392,7 +459,8 @@ impl cxx_qt::Initialize for qobject::SystemdService {
impl qobject::SystemdService {
fn poll(mut self: Pin<&mut Self>) {
let (sys_state, sys_units, user_units, containers) = rt().block_on(poll_local());
let (sys_state, sys_units, user_units, local_container_names) =
rt().block_on(poll_local_zbus());
let (sys_failed, sys_running, sys_total) = partition_units(sys_units, "system", "");
let (user_failed, user_running, user_total) = partition_units(user_units, "user", "");
@ -408,6 +476,21 @@ impl qobject::SystemdService {
let local_running_count = running.len() as i32;
let local_total_count = sys_total + user_total;
let local_containers: Vec<MachineJson> = if self.as_ref().rust().applet_open {
local_container_names
.iter()
.map(|(name, _, _)| {
fetch_via_busctl(name, &["--machine", name], false, "")
.unwrap_or_else(|(k, r)| MachineJson::errored(name.clone(), k, r, 0))
})
.collect()
} else {
local_container_names
.iter()
.map(|(n, _, _)| MachineJson::placeholder(n.clone(), false))
.collect()
};
let local = MachineJson {
name: read_hostname(),
is_local: true,
@ -420,16 +503,12 @@ impl qobject::SystemdService {
error_kind: String::new(),
error_reason: String::new(),
last_seen: now_unix(),
containers: local_containers,
};
let mut all_machines = Vec::new();
all_machines.push(local);
// Local nspawn containers (unit fetching for them lands in step 5).
for (name, _class, _service) in &containers {
all_machines.push(MachineJson::placeholder(name.clone(), false));
}
// Configured remote machines. Dedup local: drop entries matching the
// local hostname or `localhost`, with or without a `user@` prefix.
let host = read_hostname();
@ -455,21 +534,28 @@ impl qobject::SystemdService {
.get(target)
.map(|m| m.last_seen)
.unwrap_or(0);
let entry = match fetch_remote(target) {
Ok(m) => m,
Err((kind, reason)) => MachineJson {
name: target.clone(),
is_local: false,
marker: String::new(),
system_state: "unreachable".into(),
running_count: 0,
total_count: 0,
failed_units: Vec::new(),
running_units: Vec::new(),
error_kind: kind.to_string(),
error_reason: reason,
last_seen: prev_last_seen,
},
let entry = match fetch_via_busctl(target, &["--host", target], false, "") {
Ok(mut m) => {
// Enumerate + fetch remote nspawn containers via the
// remote host's machined.
let names = list_containers_busctl(&["--host", target]);
m.containers = names
.iter()
.map(|cn| {
fetch_via_busctl(
cn,
&["--host", target, "--machine", cn],
false,
"",
)
.unwrap_or_else(|(k, r)| MachineJson::errored(cn.clone(), k, r, 0))
})
.collect();
m
}
Err((kind, reason)) => {
MachineJson::errored(target.clone(), kind, reason, prev_last_seen)
}
};
self.as_mut()
.rust_mut()
@ -519,7 +605,7 @@ impl qobject::SystemdService {
let machine = machine.to_string();
let _ = self;
rt().block_on(async move {
// Local-only restart for now. Container/remote restart comes later.
// Local-only restart for now. Container/remote restart is TODO.
if !machine.is_empty() {
tracing::warn!(target: "nova_plugin", machine = %machine, "container/remote restart not yet implemented");
return;

View file

@ -43,6 +43,7 @@ Column {
errorKind: _row.modelData.errorKind ?? ""
errorReason: _row.modelData.errorReason ?? ""
lastSeen: _row.modelData.lastSeen ?? 0
containers: _row.modelData.containers ?? []
onContentResized: root.contentResized()
}
}

View file

@ -22,6 +22,8 @@ Column {
property string errorKind: ""
property string errorReason: ""
property int lastSeen: 0
property var containers: []
property int depth: 0
signal contentResized
onHeightChanged: root.contentResized()
@ -231,4 +233,37 @@ Column {
accentColor: root.accentColor
}
}
// Nested containers running on this machine. Indented to convey hierarchy.
Repeater {
model: root.containers ?? []
delegate: Item {
id: _childWrap
required property var modelData
width: root.width
height: _child.height + 4
SystemdMachineSection {
id: _child
anchors.left: parent.left
anchors.leftMargin: 16
width: parent.width - 16
accentColor: root.accentColor
machineName: _childWrap.modelData.name
title: _childWrap.modelData.name
marker: _childWrap.modelData.marker ?? ""
systemState: _childWrap.modelData.systemState ?? "unknown"
runningCount: _childWrap.modelData.runningCount ?? 0
totalCount: _childWrap.modelData.totalCount ?? 0
failedUnits: _childWrap.modelData.failedUnits ?? []
runningUnits: _childWrap.modelData.runningUnits ?? []
errorKind: _childWrap.modelData.errorKind ?? ""
errorReason: _childWrap.modelData.errorReason ?? ""
lastSeen: _childWrap.modelData.lastSeen ?? 0
containers: _childWrap.modelData.containers ?? []
depth: root.depth + 1
onContentResized: root.contentResized()
}
}
}
}