dashboard: surface silent unwrap_or_default in api_state

every snapshot source backing /api/state used .unwrap_or_default()
— sqlite errors, broker errors, nixos-container list failures,
operator_questions decode crashes all degraded to empty lists
without a log line. the 'pending question doesn't render'
bug we've been chasing was likely a row-decode panic in
OperatorQuestions::pending() being swallowed this way.

new log_default(what, result) replaces each call site: same
default value on Err but emits target=api_state warn with the
source name + dbg error first. five sources covered:
nixos-container list, approvals.pending,
approvals.recent_resolved, broker.recent_for(operator),
questions.pending. next time the question goes missing the
journal will say which source failed and how.

todo updated — pending-question entry now points at the new
log instead of three suspect paths.
This commit is contained in:
müde 2026-05-16 03:49:49 +02:00
parent 74ba8a63e1
commit 40938d8b54
2 changed files with 45 additions and 29 deletions

View file

@ -239,6 +239,25 @@ struct ApprovalView {
diff_html: Option<String>,
}
/// Replace silent `.unwrap_or_default()` on the data sources behind
/// `/api/state` so that whichever query degrades surfaces in journald
/// instead of leaving the operator staring at an empty list. The
/// dashboard still degrades to a sensible default value; the warn
/// is just the diagnostic breadcrumb the old code swallowed.
fn log_default<T, E>(what: &str, result: std::result::Result<T, E>) -> T
where
T: Default,
E: std::fmt::Debug,
{
match result {
Ok(v) => v,
Err(e) => {
tracing::warn!(target: "api_state", source = %what, error = ?e, "snapshot source failed; using default");
T::default()
}
}
}
async fn api_state(headers: HeaderMap, State(state): State<AppState>) -> axum::Json<StateSnapshot> {
let host = headers
.get("host")
@ -246,35 +265,36 @@ async fn api_state(headers: HeaderMap, State(state): State<AppState>) -> axum::J
.unwrap_or("localhost");
let hostname = host.split(':').next().unwrap_or(host).to_owned();
let raw_containers = lifecycle::list().await.unwrap_or_default();
let raw_containers = log_default("nixos-container list", lifecycle::list().await);
let current_rev = crate::auto_update::current_flake_rev(&state.coord.hyperhive_flake);
let transient_snapshot = state.coord.transient_snapshot();
let pending_approvals = gc_orphans(
&state.coord,
state.coord.approvals.pending().unwrap_or_default(),
log_default("approvals.pending", state.coord.approvals.pending()),
);
let (containers, any_stale) =
build_container_views(&raw_containers, current_rev.as_deref(), &transient_snapshot).await;
let transients = build_transient_views(&raw_containers, &transient_snapshot);
let approvals = build_approval_views(pending_approvals).await;
let approval_history = state
.coord
.approvals
.recent_resolved(30)
.unwrap_or_default()
.into_iter()
.map(history_view)
.collect();
let approval_history = log_default(
"approvals.recent_resolved",
state.coord.approvals.recent_resolved(30),
)
.into_iter()
.map(history_view)
.collect();
let tombstones = build_tombstone_views(&state.coord, &containers, &transient_snapshot);
let port_conflicts = build_port_conflicts(&containers);
let operator_inbox = state
.coord
.broker
.recent_for(hive_sh4re::OPERATOR_RECIPIENT, 50)
.unwrap_or_default();
let questions = state.coord.questions.pending().unwrap_or_default();
let operator_inbox = log_default(
"broker.recent_for(operator)",
state
.coord
.broker
.recent_for(hive_sh4re::OPERATOR_RECIPIENT, 50),
);
let questions = log_default("questions.pending", state.coord.questions.pending());
axum::Json(StateSnapshot {
hostname,