phase 6: container events + drop the 5s /api/state poll

new DashboardEvent::ContainerStateChanged + ContainerRemoved
close the last refetch loop on the dashboard. Coordinator's
rescan_containers_and_emit diffs a fresh container_view::build_all
against a cached last_containers map and fires per-row events.
called from actions::approve (post-spawn), actions::destroy,
the lifecycle_action wrapper, auto_update::rebuild_agent, and
the existing 10s crash_watch poll.

ContainerView extracted to its own module so coordinator and
dashboard can both build it. dashboard endpoints flip to 200;
container-lifecycle forms carry data-no-refresh. client drops
the periodic poll entirely — initial cold load + SSE for
everything afterwards. pending overlay reads from the existing
transientsState since the new event payload doesn't carry it.

PURG3 + meta-update keep the post-submit refetch since
tombstones + meta_inputs aren't event-derived yet; tracked in
TODO.md.
This commit is contained in:
müde 2026-05-17 22:01:15 +02:00
parent f153639cb4
commit e7ce35c503
11 changed files with 396 additions and 195 deletions

View file

@ -13,6 +13,7 @@ use tokio::sync::broadcast;
use crate::agent_server::{self, AgentSocket};
use crate::approvals::Approvals;
use crate::broker::Broker;
use crate::container_view::{self, ContainerView};
use crate::dashboard_events::DashboardEvent;
use crate::operator_questions::OperatorQuestions;
@ -64,6 +65,14 @@ pub struct Coordinator {
/// snapshot.
dashboard_events: broadcast::Sender<DashboardEvent>,
event_seq: AtomicU64,
/// Last container snapshot seen by `rescan_containers_and_emit`,
/// keyed by `ContainerView.name`. The rescan diffs a fresh
/// `container_view::build_all` against this map and emits one
/// `ContainerStateChanged` per added/changed row and one
/// `ContainerRemoved` per disappeared row. Async — guarded by a
/// tokio mutex so the rescan can `await` `lifecycle::list` /
/// `is_running` without blocking other coordinator paths.
last_containers: tokio::sync::Mutex<HashMap<String, ContainerView>>,
}
/// Per-agent in-progress state that the dashboard surfaces between approve
@ -142,6 +151,7 @@ impl Coordinator {
transient: Mutex::new(HashMap::new()),
dashboard_events,
event_seq: AtomicU64::new(0),
last_containers: tokio::sync::Mutex::new(HashMap::new()),
})
}
@ -291,6 +301,68 @@ impl Coordinator {
});
}
/// Rebuild the per-container snapshot, diff it against the last
/// one cached on `self`, and emit one
/// `DashboardEvent::ContainerStateChanged` per added/changed row
/// and one `DashboardEvent::ContainerRemoved` per disappeared row.
/// Call after any mutation that could affect what
/// `nixos-container list` returns or what a row's
/// `running` / `needs_update` / `needs_login` / `deployed_sha`
/// resolves to — lifecycle ops, destroy, approve (post-spawn),
/// rebuild, meta-update, and the crash-watcher's periodic poll.
/// Cheap when nothing changed (one `nixos-container list` + a
/// HashMap diff + zero emits).
pub async fn rescan_containers_and_emit(self: &Arc<Self>) {
let fresh = container_view::build_all(self).await;
let mut last = self.last_containers.lock().await;
let mut changed_or_new = Vec::new();
let mut removed = Vec::new();
// Diff into change vs. add.
for view in &fresh {
match last.get(&view.name) {
Some(prev) if prev == view => {} // unchanged
_ => changed_or_new.push(view.clone()),
}
}
// Anything in `last` but not in `fresh` is gone.
let fresh_names: std::collections::HashSet<&str> =
fresh.iter().map(|c| c.name.as_str()).collect();
for name in last.keys() {
if !fresh_names.contains(name.as_str()) {
removed.push(name.clone());
}
}
// Rebuild the cache from the fresh snapshot.
last.clear();
for c in fresh {
last.insert(c.name.clone(), c);
}
drop(last);
for c in changed_or_new {
self.emit_dashboard_event(DashboardEvent::ContainerStateChanged {
seq: self.next_seq(),
container: c,
});
}
for name in removed {
self.emit_dashboard_event(DashboardEvent::ContainerRemoved {
seq: self.next_seq(),
name,
});
}
}
/// Read-only snapshot of the last cached container view. Used by
/// `/api/state` to cold-load page-open clients without re-running
/// `nixos-container list` themselves; the
/// `rescan_containers_and_emit` calls keep this fresh.
pub async fn containers_snapshot(&self) -> Vec<ContainerView> {
let last = self.last_containers.lock().await;
let mut out: Vec<ContainerView> = last.values().cloned().collect();
out.sort_by(|a, b| a.name.cmp(&b.name));
out
}
pub fn register_agent(self: &Arc<Self>, name: &str) -> Result<PathBuf> {
// Idempotent: drop any existing listener so re-registration (e.g. on rebuild,
// or after a hive-c0re restart cleared /run/hyperhive) gets a fresh socket.