rebuild_queue: switch dashboard / auto-update / manager call sites to enqueue
This commit is contained in:
parent
37f6bc4b6b
commit
11db5c2a8f
3 changed files with 48 additions and 139 deletions
|
|
@ -1571,82 +1571,18 @@ async fn post_meta_update(
|
|||
if inputs.is_empty() {
|
||||
return error_response("meta-update: no inputs selected");
|
||||
}
|
||||
let coord = state.coord.clone();
|
||||
let inputs_clone = inputs.clone();
|
||||
tokio::spawn(async move {
|
||||
run_meta_update(&coord, &inputs_clone).await;
|
||||
// Lock file changed — emit so dashboards refresh the
|
||||
// meta-inputs panel without a snapshot poll.
|
||||
emit_meta_inputs_snapshot(&coord);
|
||||
});
|
||||
state.coord.rebuild_queue.enqueue_with_inputs(
|
||||
crate::rebuild_queue::QueueKind::MetaUpdate,
|
||||
"hyperhive".to_owned(),
|
||||
crate::rebuild_queue::QueueSource::Manual,
|
||||
format!("meta-update via dashboard ({})", inputs.join(", ")),
|
||||
None,
|
||||
inputs,
|
||||
);
|
||||
state.coord.emit_rebuild_queue_snapshot();
|
||||
(StatusCode::OK, "ok").into_response()
|
||||
}
|
||||
|
||||
/// Background task: run `nix flake update <inputs>` in meta + commit,
|
||||
/// then rebuild every agent whose input was touched (or all agents
|
||||
/// when `hyperhive` was bumped, since that's the shared base). Each
|
||||
/// rebuild fires `Rebuilt { ok, note, ... }` to the manager so the
|
||||
/// operator and manager get the same feedback they'd see from an
|
||||
/// auto-update / manual dashboard rebuild.
|
||||
async fn run_meta_update(coord: &Arc<crate::coordinator::Coordinator>, inputs: &[String]) {
|
||||
// Held for the whole run (incl. the early `return` on lock failure):
|
||||
// emits `MetaUpdateRunning { running: true }` now and `false` on
|
||||
// drop so the META INPUTS panel shows progress (issue #259).
|
||||
let _progress = coord.meta_update_guard();
|
||||
tracing::info!(?inputs, "meta-update: starting");
|
||||
if let Err(e) = crate::meta::lock_update(inputs).await {
|
||||
tracing::warn!(error = ?e, "meta-update: lock_update failed");
|
||||
return;
|
||||
}
|
||||
|
||||
// Decide which agents to rebuild. Inputs are slash-paths from
|
||||
// the meta root — `hyperhive`, `hyperhive/nixpkgs`,
|
||||
// `agent-coder`, `agent-coder/mcp-matrix`, etc. Anything in the
|
||||
// hyperhive subtree affects every agent (shared base); anything
|
||||
// in `agent-<n>/...` only the named agent.
|
||||
let touched_hyperhive = inputs
|
||||
.iter()
|
||||
.any(|i| i == "hyperhive" || i.starts_with("hyperhive/"));
|
||||
let touched_agents: Vec<String> = inputs
|
||||
.iter()
|
||||
.filter_map(|i| i.strip_prefix("agent-"))
|
||||
.map(|rest| rest.split('/').next().unwrap_or(rest).to_owned())
|
||||
.collect();
|
||||
let agents_to_rebuild: Vec<String> = if touched_hyperhive {
|
||||
crate::lifecycle::list()
|
||||
.await
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.filter_map(|c| {
|
||||
if c == crate::lifecycle::MANAGER_NAME {
|
||||
Some(crate::lifecycle::MANAGER_NAME.to_owned())
|
||||
} else {
|
||||
c.strip_prefix(crate::lifecycle::AGENT_PREFIX)
|
||||
.map(str::to_owned)
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
} else {
|
||||
touched_agents
|
||||
};
|
||||
|
||||
let current_rev =
|
||||
crate::auto_update::current_flake_rev(&coord.hyperhive_flake).unwrap_or_default();
|
||||
// Sequential rebuild loop — the META_LOCK guards meta-side
|
||||
// races but parallel nix builds also serialise via nix-daemon,
|
||||
// so sequential is just as fast in practice and keeps logs
|
||||
// readable.
|
||||
for name in agents_to_rebuild {
|
||||
tracing::info!(%name, "meta-update: rebuilding agent");
|
||||
if let Err(e) = crate::auto_update::rebuild_agent(coord, &name, ¤t_rev).await {
|
||||
tracing::warn!(%name, error = ?e, "meta-update: rebuild failed");
|
||||
// continue: surface each per-agent failure via its own
|
||||
// Rebuilt event; don't abort the whole batch.
|
||||
}
|
||||
}
|
||||
tracing::info!("meta-update: done");
|
||||
}
|
||||
|
||||
async fn post_op_send(State(state): State<AppState>, Form(form): Form<OpSendForm>) -> Response {
|
||||
let to = form.to.trim().to_owned();
|
||||
let body = form.body.trim().to_owned();
|
||||
|
|
@ -1708,28 +1644,16 @@ async fn post_request_spawn(
|
|||
}
|
||||
|
||||
async fn post_rebuild(State(state): State<AppState>, AxumPath(name): AxumPath<String>) -> Response {
|
||||
let Some(current_rev) = crate::auto_update::current_flake_rev(&state.coord.hyperhive_flake)
|
||||
else {
|
||||
return error_response(
|
||||
"rebuild: hyperhive_flake has no canonical path; manual rebuild only via `hive-c0re rebuild`",
|
||||
);
|
||||
};
|
||||
let coord = state.coord.clone();
|
||||
lifecycle_action(
|
||||
&state,
|
||||
&name,
|
||||
crate::coordinator::TransientKind::Rebuilding,
|
||||
"rebuild",
|
||||
move |n| {
|
||||
let coord = coord.clone();
|
||||
let rev = current_rev.clone();
|
||||
async move { crate::auto_update::rebuild_agent(&coord, &n, &rev).await }
|
||||
},
|
||||
// rebuild_agent fires kick_agent on success itself, so the
|
||||
// extra-closure is a no-op here.
|
||||
|_, _| {},
|
||||
)
|
||||
.await
|
||||
let logical = strip_container_prefix(&name);
|
||||
state.coord.rebuild_queue.enqueue(
|
||||
crate::rebuild_queue::QueueKind::Rebuild,
|
||||
logical,
|
||||
crate::rebuild_queue::QueueSource::Manual,
|
||||
"manual via dashboard ↻ R3BU1LD button".to_owned(),
|
||||
None,
|
||||
);
|
||||
state.coord.emit_rebuild_queue_snapshot();
|
||||
(StatusCode::OK, "ok").into_response()
|
||||
}
|
||||
|
||||
/// Common shape for the simple lifecycle action handlers (start /
|
||||
|
|
@ -1816,12 +1740,7 @@ async fn post_start(State(state): State<AppState>, AxumPath(name): AxumPath<Stri
|
|||
}
|
||||
|
||||
async fn post_update_all(State(state): State<AppState>) -> Response {
|
||||
let Some(current_rev) = crate::auto_update::current_flake_rev(&state.coord.hyperhive_flake)
|
||||
else {
|
||||
return error_response("update-all: hyperhive_flake has no canonical path");
|
||||
};
|
||||
let containers = lifecycle::list().await.unwrap_or_default();
|
||||
let mut errors = Vec::new();
|
||||
for container in containers {
|
||||
let logical = if container == lifecycle::MANAGER_NAME {
|
||||
lifecycle::MANAGER_NAME.to_owned()
|
||||
|
|
@ -1830,21 +1749,16 @@ async fn post_update_all(State(state): State<AppState>) -> Response {
|
|||
} else {
|
||||
continue;
|
||||
};
|
||||
if let Err(e) =
|
||||
crate::auto_update::rebuild_agent(&state.coord, &logical, ¤t_rev).await
|
||||
{
|
||||
errors.push(format!("{logical}: {e:#}"));
|
||||
}
|
||||
}
|
||||
if errors.is_empty() {
|
||||
// Each rebuild_agent rescanned; no extra refetch needed.
|
||||
(StatusCode::OK, "ok").into_response()
|
||||
} else {
|
||||
error_response(&format!(
|
||||
"update-all partial failure:\n{}",
|
||||
errors.join("\n")
|
||||
))
|
||||
state.coord.rebuild_queue.enqueue(
|
||||
crate::rebuild_queue::QueueKind::Rebuild,
|
||||
logical,
|
||||
crate::rebuild_queue::QueueSource::Manual,
|
||||
"manual via dashboard 🌀 UPDATE ALL".to_owned(),
|
||||
None,
|
||||
);
|
||||
}
|
||||
state.coord.emit_rebuild_queue_snapshot();
|
||||
(StatusCode::OK, "ok").into_response()
|
||||
}
|
||||
|
||||
fn transient_label(k: crate::coordinator::TransientKind) -> &'static str {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue