rebuild_queue: switch dashboard / auto-update / manager call sites to enqueue
This commit is contained in:
parent
37f6bc4b6b
commit
11db5c2a8f
3 changed files with 48 additions and 139 deletions
|
|
@ -206,10 +206,9 @@ pub async fn run(coord: Arc<Coordinator>) -> Result<()> {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let current_rev =
|
let _current_rev = current_flake_rev(&coord.hyperhive_flake).unwrap_or_default();
|
||||||
current_flake_rev(&coord.hyperhive_flake).unwrap_or_default();
|
|
||||||
|
|
||||||
tracing::info!(agents = containers.len(), "auto-update: rebuilding all on startup");
|
tracing::info!(agents = containers.len(), "auto-update: queueing all on startup");
|
||||||
for container in containers {
|
for container in containers {
|
||||||
let logical = if container == MANAGER_NAME {
|
let logical = if container == MANAGER_NAME {
|
||||||
Some(MANAGER_NAME.to_owned())
|
Some(MANAGER_NAME.to_owned())
|
||||||
|
|
@ -217,9 +216,14 @@ pub async fn run(coord: Arc<Coordinator>) -> Result<()> {
|
||||||
container.strip_prefix(AGENT_PREFIX).map(str::to_owned)
|
container.strip_prefix(AGENT_PREFIX).map(str::to_owned)
|
||||||
};
|
};
|
||||||
let Some(name) = logical else { continue };
|
let Some(name) = logical else { continue };
|
||||||
if let Err(e) = rebuild_agent(&coord, &name, ¤t_rev).await {
|
coord.rebuild_queue.enqueue(
|
||||||
tracing::warn!(%name, error = ?e, "auto-update: rebuild failed");
|
crate::rebuild_queue::QueueKind::Rebuild,
|
||||||
}
|
name,
|
||||||
|
crate::rebuild_queue::QueueSource::AutoUpdate,
|
||||||
|
"startup sweep".to_owned(),
|
||||||
|
None,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
coord.emit_rebuild_queue_snapshot();
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1571,82 +1571,18 @@ async fn post_meta_update(
|
||||||
if inputs.is_empty() {
|
if inputs.is_empty() {
|
||||||
return error_response("meta-update: no inputs selected");
|
return error_response("meta-update: no inputs selected");
|
||||||
}
|
}
|
||||||
let coord = state.coord.clone();
|
state.coord.rebuild_queue.enqueue_with_inputs(
|
||||||
let inputs_clone = inputs.clone();
|
crate::rebuild_queue::QueueKind::MetaUpdate,
|
||||||
tokio::spawn(async move {
|
"hyperhive".to_owned(),
|
||||||
run_meta_update(&coord, &inputs_clone).await;
|
crate::rebuild_queue::QueueSource::Manual,
|
||||||
// Lock file changed — emit so dashboards refresh the
|
format!("meta-update via dashboard ({})", inputs.join(", ")),
|
||||||
// meta-inputs panel without a snapshot poll.
|
None,
|
||||||
emit_meta_inputs_snapshot(&coord);
|
inputs,
|
||||||
});
|
);
|
||||||
|
state.coord.emit_rebuild_queue_snapshot();
|
||||||
(StatusCode::OK, "ok").into_response()
|
(StatusCode::OK, "ok").into_response()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Background task: run `nix flake update <inputs>` in meta + commit,
|
|
||||||
/// then rebuild every agent whose input was touched (or all agents
|
|
||||||
/// when `hyperhive` was bumped, since that's the shared base). Each
|
|
||||||
/// rebuild fires `Rebuilt { ok, note, ... }` to the manager so the
|
|
||||||
/// operator and manager get the same feedback they'd see from an
|
|
||||||
/// auto-update / manual dashboard rebuild.
|
|
||||||
async fn run_meta_update(coord: &Arc<crate::coordinator::Coordinator>, inputs: &[String]) {
|
|
||||||
// Held for the whole run (incl. the early `return` on lock failure):
|
|
||||||
// emits `MetaUpdateRunning { running: true }` now and `false` on
|
|
||||||
// drop so the META INPUTS panel shows progress (issue #259).
|
|
||||||
let _progress = coord.meta_update_guard();
|
|
||||||
tracing::info!(?inputs, "meta-update: starting");
|
|
||||||
if let Err(e) = crate::meta::lock_update(inputs).await {
|
|
||||||
tracing::warn!(error = ?e, "meta-update: lock_update failed");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Decide which agents to rebuild. Inputs are slash-paths from
|
|
||||||
// the meta root — `hyperhive`, `hyperhive/nixpkgs`,
|
|
||||||
// `agent-coder`, `agent-coder/mcp-matrix`, etc. Anything in the
|
|
||||||
// hyperhive subtree affects every agent (shared base); anything
|
|
||||||
// in `agent-<n>/...` only the named agent.
|
|
||||||
let touched_hyperhive = inputs
|
|
||||||
.iter()
|
|
||||||
.any(|i| i == "hyperhive" || i.starts_with("hyperhive/"));
|
|
||||||
let touched_agents: Vec<String> = inputs
|
|
||||||
.iter()
|
|
||||||
.filter_map(|i| i.strip_prefix("agent-"))
|
|
||||||
.map(|rest| rest.split('/').next().unwrap_or(rest).to_owned())
|
|
||||||
.collect();
|
|
||||||
let agents_to_rebuild: Vec<String> = if touched_hyperhive {
|
|
||||||
crate::lifecycle::list()
|
|
||||||
.await
|
|
||||||
.unwrap_or_default()
|
|
||||||
.into_iter()
|
|
||||||
.filter_map(|c| {
|
|
||||||
if c == crate::lifecycle::MANAGER_NAME {
|
|
||||||
Some(crate::lifecycle::MANAGER_NAME.to_owned())
|
|
||||||
} else {
|
|
||||||
c.strip_prefix(crate::lifecycle::AGENT_PREFIX)
|
|
||||||
.map(str::to_owned)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.collect()
|
|
||||||
} else {
|
|
||||||
touched_agents
|
|
||||||
};
|
|
||||||
|
|
||||||
let current_rev =
|
|
||||||
crate::auto_update::current_flake_rev(&coord.hyperhive_flake).unwrap_or_default();
|
|
||||||
// Sequential rebuild loop — the META_LOCK guards meta-side
|
|
||||||
// races but parallel nix builds also serialise via nix-daemon,
|
|
||||||
// so sequential is just as fast in practice and keeps logs
|
|
||||||
// readable.
|
|
||||||
for name in agents_to_rebuild {
|
|
||||||
tracing::info!(%name, "meta-update: rebuilding agent");
|
|
||||||
if let Err(e) = crate::auto_update::rebuild_agent(coord, &name, ¤t_rev).await {
|
|
||||||
tracing::warn!(%name, error = ?e, "meta-update: rebuild failed");
|
|
||||||
// continue: surface each per-agent failure via its own
|
|
||||||
// Rebuilt event; don't abort the whole batch.
|
|
||||||
}
|
|
||||||
}
|
|
||||||
tracing::info!("meta-update: done");
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn post_op_send(State(state): State<AppState>, Form(form): Form<OpSendForm>) -> Response {
|
async fn post_op_send(State(state): State<AppState>, Form(form): Form<OpSendForm>) -> Response {
|
||||||
let to = form.to.trim().to_owned();
|
let to = form.to.trim().to_owned();
|
||||||
let body = form.body.trim().to_owned();
|
let body = form.body.trim().to_owned();
|
||||||
|
|
@ -1708,28 +1644,16 @@ async fn post_request_spawn(
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn post_rebuild(State(state): State<AppState>, AxumPath(name): AxumPath<String>) -> Response {
|
async fn post_rebuild(State(state): State<AppState>, AxumPath(name): AxumPath<String>) -> Response {
|
||||||
let Some(current_rev) = crate::auto_update::current_flake_rev(&state.coord.hyperhive_flake)
|
let logical = strip_container_prefix(&name);
|
||||||
else {
|
state.coord.rebuild_queue.enqueue(
|
||||||
return error_response(
|
crate::rebuild_queue::QueueKind::Rebuild,
|
||||||
"rebuild: hyperhive_flake has no canonical path; manual rebuild only via `hive-c0re rebuild`",
|
logical,
|
||||||
|
crate::rebuild_queue::QueueSource::Manual,
|
||||||
|
"manual via dashboard ↻ R3BU1LD button".to_owned(),
|
||||||
|
None,
|
||||||
);
|
);
|
||||||
};
|
state.coord.emit_rebuild_queue_snapshot();
|
||||||
let coord = state.coord.clone();
|
(StatusCode::OK, "ok").into_response()
|
||||||
lifecycle_action(
|
|
||||||
&state,
|
|
||||||
&name,
|
|
||||||
crate::coordinator::TransientKind::Rebuilding,
|
|
||||||
"rebuild",
|
|
||||||
move |n| {
|
|
||||||
let coord = coord.clone();
|
|
||||||
let rev = current_rev.clone();
|
|
||||||
async move { crate::auto_update::rebuild_agent(&coord, &n, &rev).await }
|
|
||||||
},
|
|
||||||
// rebuild_agent fires kick_agent on success itself, so the
|
|
||||||
// extra-closure is a no-op here.
|
|
||||||
|_, _| {},
|
|
||||||
)
|
|
||||||
.await
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Common shape for the simple lifecycle action handlers (start /
|
/// Common shape for the simple lifecycle action handlers (start /
|
||||||
|
|
@ -1816,12 +1740,7 @@ async fn post_start(State(state): State<AppState>, AxumPath(name): AxumPath<Stri
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn post_update_all(State(state): State<AppState>) -> Response {
|
async fn post_update_all(State(state): State<AppState>) -> Response {
|
||||||
let Some(current_rev) = crate::auto_update::current_flake_rev(&state.coord.hyperhive_flake)
|
|
||||||
else {
|
|
||||||
return error_response("update-all: hyperhive_flake has no canonical path");
|
|
||||||
};
|
|
||||||
let containers = lifecycle::list().await.unwrap_or_default();
|
let containers = lifecycle::list().await.unwrap_or_default();
|
||||||
let mut errors = Vec::new();
|
|
||||||
for container in containers {
|
for container in containers {
|
||||||
let logical = if container == lifecycle::MANAGER_NAME {
|
let logical = if container == lifecycle::MANAGER_NAME {
|
||||||
lifecycle::MANAGER_NAME.to_owned()
|
lifecycle::MANAGER_NAME.to_owned()
|
||||||
|
|
@ -1830,21 +1749,16 @@ async fn post_update_all(State(state): State<AppState>) -> Response {
|
||||||
} else {
|
} else {
|
||||||
continue;
|
continue;
|
||||||
};
|
};
|
||||||
if let Err(e) =
|
state.coord.rebuild_queue.enqueue(
|
||||||
crate::auto_update::rebuild_agent(&state.coord, &logical, ¤t_rev).await
|
crate::rebuild_queue::QueueKind::Rebuild,
|
||||||
{
|
logical,
|
||||||
errors.push(format!("{logical}: {e:#}"));
|
crate::rebuild_queue::QueueSource::Manual,
|
||||||
|
"manual via dashboard 🌀 UPDATE ALL".to_owned(),
|
||||||
|
None,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
state.coord.emit_rebuild_queue_snapshot();
|
||||||
if errors.is_empty() {
|
|
||||||
// Each rebuild_agent rescanned; no extra refetch needed.
|
|
||||||
(StatusCode::OK, "ok").into_response()
|
(StatusCode::OK, "ok").into_response()
|
||||||
} else {
|
|
||||||
error_response(&format!(
|
|
||||||
"update-all partial failure:\n{}",
|
|
||||||
errors.join("\n")
|
|
||||||
))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn transient_label(k: crate::coordinator::TransientKind) -> &'static str {
|
fn transient_label(k: crate::coordinator::TransientKind) -> &'static str {
|
||||||
|
|
|
||||||
|
|
@ -291,26 +291,17 @@ async fn dispatch(req: &ManagerRequest, coord: &Arc<Coordinator>) -> ManagerResp
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ManagerRequest::Update { name } => {
|
ManagerRequest::Update { name } => {
|
||||||
tracing::info!(%name, "manager: update");
|
tracing::info!(%name, "manager: enqueue update");
|
||||||
let Some(current_rev) = crate::auto_update::current_flake_rev(&coord.hyperhive_flake)
|
coord.rebuild_queue.enqueue(
|
||||||
else {
|
crate::rebuild_queue::QueueKind::Rebuild,
|
||||||
return ManagerResponse::Err {
|
name.to_owned(),
|
||||||
message: "update: hyperhive_flake has no canonical path".into(),
|
crate::rebuild_queue::QueueSource::Manual,
|
||||||
};
|
"manager `update` tool".to_owned(),
|
||||||
};
|
None,
|
||||||
let guard = coord.transient_guard(name, crate::coordinator::TransientKind::Rebuilding);
|
);
|
||||||
let result = crate::auto_update::rebuild_agent(coord, name, ¤t_rev).await;
|
coord.emit_rebuild_queue_snapshot();
|
||||||
drop(guard);
|
|
||||||
match result {
|
|
||||||
Ok(()) => {
|
|
||||||
coord.kick_agent(name, "container rebuilt");
|
|
||||||
ManagerResponse::Ok
|
ManagerResponse::Ok
|
||||||
}
|
}
|
||||||
Err(e) => ManagerResponse::Err {
|
|
||||||
message: format!("{e:#}"),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ManagerRequest::RequestUpdateMetaInputs {
|
ManagerRequest::RequestUpdateMetaInputs {
|
||||||
inputs,
|
inputs,
|
||||||
description,
|
description,
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue