rebuild_queue: switch dashboard / auto-update / manager call sites to enqueue

This commit is contained in:
damocles 2026-05-23 11:53:14 +02:00 committed by Mara
parent 37f6bc4b6b
commit 11db5c2a8f
3 changed files with 48 additions and 139 deletions

View file

@ -206,10 +206,9 @@ pub async fn run(coord: Arc<Coordinator>) -> Result<()> {
}
};
let current_rev =
current_flake_rev(&coord.hyperhive_flake).unwrap_or_default();
let _current_rev = current_flake_rev(&coord.hyperhive_flake).unwrap_or_default();
tracing::info!(agents = containers.len(), "auto-update: rebuilding all on startup");
tracing::info!(agents = containers.len(), "auto-update: queueing all on startup");
for container in containers {
let logical = if container == MANAGER_NAME {
Some(MANAGER_NAME.to_owned())
@ -217,9 +216,14 @@ pub async fn run(coord: Arc<Coordinator>) -> Result<()> {
container.strip_prefix(AGENT_PREFIX).map(str::to_owned)
};
let Some(name) = logical else { continue };
if let Err(e) = rebuild_agent(&coord, &name, &current_rev).await {
tracing::warn!(%name, error = ?e, "auto-update: rebuild failed");
}
coord.rebuild_queue.enqueue(
crate::rebuild_queue::QueueKind::Rebuild,
name,
crate::rebuild_queue::QueueSource::AutoUpdate,
"startup sweep".to_owned(),
None,
);
}
coord.emit_rebuild_queue_snapshot();
Ok(())
}

View file

@ -1571,82 +1571,18 @@ async fn post_meta_update(
if inputs.is_empty() {
return error_response("meta-update: no inputs selected");
}
let coord = state.coord.clone();
let inputs_clone = inputs.clone();
tokio::spawn(async move {
run_meta_update(&coord, &inputs_clone).await;
// Lock file changed — emit so dashboards refresh the
// meta-inputs panel without a snapshot poll.
emit_meta_inputs_snapshot(&coord);
});
state.coord.rebuild_queue.enqueue_with_inputs(
crate::rebuild_queue::QueueKind::MetaUpdate,
"hyperhive".to_owned(),
crate::rebuild_queue::QueueSource::Manual,
format!("meta-update via dashboard ({})", inputs.join(", ")),
None,
inputs,
);
state.coord.emit_rebuild_queue_snapshot();
(StatusCode::OK, "ok").into_response()
}
/// Background task: run `nix flake update <inputs>` in meta + commit,
/// then rebuild every agent whose input was touched (or all agents
/// when `hyperhive` was bumped, since that's the shared base). Each
/// rebuild fires `Rebuilt { ok, note, ... }` to the manager so the
/// operator and manager get the same feedback they'd see from an
/// auto-update / manual dashboard rebuild.
async fn run_meta_update(coord: &Arc<crate::coordinator::Coordinator>, inputs: &[String]) {
// Held for the whole run (incl. the early `return` on lock failure):
// emits `MetaUpdateRunning { running: true }` now and `false` on
// drop so the META INPUTS panel shows progress (issue #259).
let _progress = coord.meta_update_guard();
tracing::info!(?inputs, "meta-update: starting");
if let Err(e) = crate::meta::lock_update(inputs).await {
tracing::warn!(error = ?e, "meta-update: lock_update failed");
return;
}
// Decide which agents to rebuild. Inputs are slash-paths from
// the meta root — `hyperhive`, `hyperhive/nixpkgs`,
// `agent-coder`, `agent-coder/mcp-matrix`, etc. Anything in the
// hyperhive subtree affects every agent (shared base); anything
// in `agent-<n>/...` only the named agent.
let touched_hyperhive = inputs
.iter()
.any(|i| i == "hyperhive" || i.starts_with("hyperhive/"));
let touched_agents: Vec<String> = inputs
.iter()
.filter_map(|i| i.strip_prefix("agent-"))
.map(|rest| rest.split('/').next().unwrap_or(rest).to_owned())
.collect();
let agents_to_rebuild: Vec<String> = if touched_hyperhive {
crate::lifecycle::list()
.await
.unwrap_or_default()
.into_iter()
.filter_map(|c| {
if c == crate::lifecycle::MANAGER_NAME {
Some(crate::lifecycle::MANAGER_NAME.to_owned())
} else {
c.strip_prefix(crate::lifecycle::AGENT_PREFIX)
.map(str::to_owned)
}
})
.collect()
} else {
touched_agents
};
let current_rev =
crate::auto_update::current_flake_rev(&coord.hyperhive_flake).unwrap_or_default();
// Sequential rebuild loop — the META_LOCK guards meta-side
// races but parallel nix builds also serialise via nix-daemon,
// so sequential is just as fast in practice and keeps logs
// readable.
for name in agents_to_rebuild {
tracing::info!(%name, "meta-update: rebuilding agent");
if let Err(e) = crate::auto_update::rebuild_agent(coord, &name, &current_rev).await {
tracing::warn!(%name, error = ?e, "meta-update: rebuild failed");
// continue: surface each per-agent failure via its own
// Rebuilt event; don't abort the whole batch.
}
}
tracing::info!("meta-update: done");
}
async fn post_op_send(State(state): State<AppState>, Form(form): Form<OpSendForm>) -> Response {
let to = form.to.trim().to_owned();
let body = form.body.trim().to_owned();
@ -1708,28 +1644,16 @@ async fn post_request_spawn(
}
async fn post_rebuild(State(state): State<AppState>, AxumPath(name): AxumPath<String>) -> Response {
let Some(current_rev) = crate::auto_update::current_flake_rev(&state.coord.hyperhive_flake)
else {
return error_response(
"rebuild: hyperhive_flake has no canonical path; manual rebuild only via `hive-c0re rebuild`",
let logical = strip_container_prefix(&name);
state.coord.rebuild_queue.enqueue(
crate::rebuild_queue::QueueKind::Rebuild,
logical,
crate::rebuild_queue::QueueSource::Manual,
"manual via dashboard ↻ R3BU1LD button".to_owned(),
None,
);
};
let coord = state.coord.clone();
lifecycle_action(
&state,
&name,
crate::coordinator::TransientKind::Rebuilding,
"rebuild",
move |n| {
let coord = coord.clone();
let rev = current_rev.clone();
async move { crate::auto_update::rebuild_agent(&coord, &n, &rev).await }
},
// rebuild_agent fires kick_agent on success itself, so the
// extra-closure is a no-op here.
|_, _| {},
)
.await
state.coord.emit_rebuild_queue_snapshot();
(StatusCode::OK, "ok").into_response()
}
/// Common shape for the simple lifecycle action handlers (start /
@ -1816,12 +1740,7 @@ async fn post_start(State(state): State<AppState>, AxumPath(name): AxumPath<Stri
}
async fn post_update_all(State(state): State<AppState>) -> Response {
let Some(current_rev) = crate::auto_update::current_flake_rev(&state.coord.hyperhive_flake)
else {
return error_response("update-all: hyperhive_flake has no canonical path");
};
let containers = lifecycle::list().await.unwrap_or_default();
let mut errors = Vec::new();
for container in containers {
let logical = if container == lifecycle::MANAGER_NAME {
lifecycle::MANAGER_NAME.to_owned()
@ -1830,21 +1749,16 @@ async fn post_update_all(State(state): State<AppState>) -> Response {
} else {
continue;
};
if let Err(e) =
crate::auto_update::rebuild_agent(&state.coord, &logical, &current_rev).await
{
errors.push(format!("{logical}: {e:#}"));
state.coord.rebuild_queue.enqueue(
crate::rebuild_queue::QueueKind::Rebuild,
logical,
crate::rebuild_queue::QueueSource::Manual,
"manual via dashboard 🌀 UPDATE ALL".to_owned(),
None,
);
}
}
if errors.is_empty() {
// Each rebuild_agent rescanned; no extra refetch needed.
state.coord.emit_rebuild_queue_snapshot();
(StatusCode::OK, "ok").into_response()
} else {
error_response(&format!(
"update-all partial failure:\n{}",
errors.join("\n")
))
}
}
fn transient_label(k: crate::coordinator::TransientKind) -> &'static str {

View file

@ -291,26 +291,17 @@ async fn dispatch(req: &ManagerRequest, coord: &Arc<Coordinator>) -> ManagerResp
}
}
ManagerRequest::Update { name } => {
tracing::info!(%name, "manager: update");
let Some(current_rev) = crate::auto_update::current_flake_rev(&coord.hyperhive_flake)
else {
return ManagerResponse::Err {
message: "update: hyperhive_flake has no canonical path".into(),
};
};
let guard = coord.transient_guard(name, crate::coordinator::TransientKind::Rebuilding);
let result = crate::auto_update::rebuild_agent(coord, name, &current_rev).await;
drop(guard);
match result {
Ok(()) => {
coord.kick_agent(name, "container rebuilt");
tracing::info!(%name, "manager: enqueue update");
coord.rebuild_queue.enqueue(
crate::rebuild_queue::QueueKind::Rebuild,
name.to_owned(),
crate::rebuild_queue::QueueSource::Manual,
"manager `update` tool".to_owned(),
None,
);
coord.emit_rebuild_queue_snapshot();
ManagerResponse::Ok
}
Err(e) => ManagerResponse::Err {
message: format!("{e:#}"),
},
}
}
ManagerRequest::RequestUpdateMetaInputs {
inputs,
description,