dashboard: meta flake inputs UI + sequential rebuild loop
new section 'M3T4 1NPUTS' between approvals and message flow:
one row per input in meta/flake.lock (hyperhive first, then
agent-<n> alphabetically). each row shows the input name, the
first 12 chars of the locked sha, a relative timestamp from
locked.lastModified, and the original.url when available.
checkbox per row; submit button is disabled until at least one
box is checked; submitting confirms then POSTs the selected
names to /meta-update.
backend:
- meta::lock_update(inputs: &[String]) — runs 'nix flake update
<names>' in the meta dir, commits the lock change with a
combined message ('lock update: hyperhive, agent-coder').
preserves the existing META_LOCK serialization. existing
lock_update_for_rebuild / lock_update_hyperhive stay for
their single-input callers.
- POST /meta-update — comma-separated 'inputs' form field
(JS joins checkboxes since axum::Form doesn't natively
decode repeated keys); spawns a background task that runs
the lock update + per-agent rebuild loop. hyperhive
selection fans out to all agents; agent-<n> selection only
rebuilds <n>. each rebuild fires Rebuilt to the manager
exactly like dashboard / admin-CLI / auto-update.
rebuild loop is sequential — auto_update::run too (was
parallel via tokio::spawn). parallel rebuilds collide on
nix-store's sqlite cache ('sqlite db busy, not using cache')
and the meta META_LOCK contention. nix-daemon serializes the
heavy build steps anyway, so this isn't a throughput loss.
This commit is contained in:
parent
891223219e
commit
266c2c7a77
6 changed files with 331 additions and 18 deletions
|
|
@ -57,6 +57,7 @@ pub async fn serve(port: u16, coord: Arc<Coordinator>) -> Result<()> {
|
|||
.route("/api/agent-config/{name}", get(get_agent_config))
|
||||
.route("/request-spawn", post(post_request_spawn))
|
||||
.route("/op-send", post(post_op_send))
|
||||
.route("/meta-update", post(post_meta_update))
|
||||
.route("/messages/stream", get(messages_stream))
|
||||
.with_state(AppState { coord });
|
||||
let addr = SocketAddr::from(([0, 0, 0, 0], port));
|
||||
|
|
@ -154,6 +155,9 @@ struct StateSnapshot {
|
|||
/// least one other agent. Operator resolves by renaming. The
|
||||
/// dashboard renders a banner at the top listing each cluster.
|
||||
port_conflicts: Vec<PortConflict>,
|
||||
/// Inputs in `meta/flake.lock` the operator can selectively
|
||||
/// `nix flake update`. Hyperhive first, then `agent-<n>` rows.
|
||||
meta_inputs: Vec<MetaInputView>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
|
|
@ -280,6 +284,7 @@ async fn api_state(headers: HeaderMap, State(state): State<AppState>) -> axum::J
|
|||
transients,
|
||||
approvals,
|
||||
approval_history,
|
||||
meta_inputs: read_meta_inputs(),
|
||||
operator_inbox,
|
||||
questions,
|
||||
tombstones,
|
||||
|
|
@ -360,7 +365,33 @@ async fn build_container_views(
|
|||
/// yields an empty map so the dashboard degrades gracefully when the
|
||||
/// meta repo hasn't been seeded yet.
|
||||
fn read_meta_locked_revs() -> std::collections::HashMap<String, String> {
|
||||
let mut out = std::collections::HashMap::new();
|
||||
read_meta_inputs()
|
||||
.into_iter()
|
||||
.map(|i| (i.name, i.rev))
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
struct MetaInputView {
|
||||
/// Input key in meta's `flake.nix` — `hyperhive`, `agent-<n>`, etc.
|
||||
name: String,
|
||||
/// Full locked sha. Not displayed verbatim; the dashboard
|
||||
/// truncates to the first 12 chars for the chip.
|
||||
rev: String,
|
||||
/// Unix seconds — `locked.lastModified`. Drives the relative
|
||||
/// "2h ago" timestamp on each input row.
|
||||
last_modified: i64,
|
||||
/// `original.url` if available, for the tooltip / row meta text.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
url: Option<String>,
|
||||
}
|
||||
|
||||
/// Walk `flake.lock`'s `nodes` map → `Vec<MetaInputView>`. Only
|
||||
/// includes nodes the root depends on (i.e. real inputs), skipping
|
||||
/// the synthetic `root` entry. Sorted with `hyperhive` first then
|
||||
/// alphabetically so the UI's top entry is the swarm-wide base.
|
||||
fn read_meta_inputs() -> Vec<MetaInputView> {
|
||||
let mut out = Vec::new();
|
||||
let Ok(raw) = std::fs::read_to_string("/var/lib/hyperhive/meta/flake.lock") else {
|
||||
return out;
|
||||
};
|
||||
|
|
@ -370,15 +401,48 @@ fn read_meta_locked_revs() -> std::collections::HashMap<String, String> {
|
|||
let Some(nodes) = json.get("nodes").and_then(|v| v.as_object()) else {
|
||||
return out;
|
||||
};
|
||||
let Some(root_name) = json.get("root").and_then(|v| v.as_str()) else {
|
||||
return out;
|
||||
};
|
||||
let root_inputs: std::collections::BTreeSet<String> = nodes
|
||||
.get(root_name)
|
||||
.and_then(|n| n.get("inputs"))
|
||||
.and_then(|v| v.as_object())
|
||||
.map(|m| m.keys().cloned().collect())
|
||||
.unwrap_or_default();
|
||||
for (name, node) in nodes {
|
||||
if let Some(rev) = node
|
||||
.get("locked")
|
||||
if !root_inputs.contains(name) {
|
||||
continue;
|
||||
}
|
||||
let locked = node.get("locked");
|
||||
let Some(rev) = locked
|
||||
.and_then(|v| v.get("rev"))
|
||||
.and_then(|v| v.as_str())
|
||||
{
|
||||
out.insert(name.clone(), rev.to_owned());
|
||||
}
|
||||
else {
|
||||
continue;
|
||||
};
|
||||
let last_modified = locked
|
||||
.and_then(|v| v.get("lastModified"))
|
||||
.and_then(serde_json::Value::as_i64)
|
||||
.unwrap_or(0);
|
||||
let url = node
|
||||
.get("original")
|
||||
.and_then(|v| v.get("url"))
|
||||
.and_then(|v| v.as_str())
|
||||
.map(str::to_owned);
|
||||
out.push(MetaInputView {
|
||||
name: name.clone(),
|
||||
rev: rev.to_owned(),
|
||||
last_modified,
|
||||
url,
|
||||
});
|
||||
}
|
||||
// hyperhive first, then alphabetical.
|
||||
out.sort_by(|a, b| match (a.name.as_str(), b.name.as_str()) {
|
||||
("hyperhive", _) => std::cmp::Ordering::Less,
|
||||
(_, "hyperhive") => std::cmp::Ordering::Greater,
|
||||
_ => a.name.cmp(&b.name),
|
||||
});
|
||||
out
|
||||
}
|
||||
|
||||
|
|
@ -784,6 +848,96 @@ struct OpSendForm {
|
|||
body: String,
|
||||
}
|
||||
|
||||
/// Form for `POST /meta-update`. Inputs ride in as a comma-separated
|
||||
/// list under the `inputs` field — the JS submitter joins the
|
||||
/// checked boxes since axum's `Form` extractor doesn't natively
|
||||
/// decode repeated keys without a helper.
|
||||
#[derive(Deserialize)]
|
||||
struct MetaUpdateForm {
|
||||
inputs: String,
|
||||
}
|
||||
|
||||
/// Bulk-update selected meta flake inputs, then rebuild the affected
|
||||
/// agents in the background. Idempotent w.r.t. selection — choosing
|
||||
/// an input that's already at the latest sha is a no-op (no commit,
|
||||
/// no rebuild ripple). Returns immediately after queueing the work;
|
||||
/// dashboard polls for progress via container `pending` spinners +
|
||||
/// the meta-inputs row sha update.
|
||||
async fn post_meta_update(
|
||||
State(state): State<AppState>,
|
||||
Form(form): Form<MetaUpdateForm>,
|
||||
) -> Response {
|
||||
let inputs: Vec<String> = form
|
||||
.inputs
|
||||
.split(',')
|
||||
.map(|s| s.trim().to_owned())
|
||||
.filter(|s| !s.is_empty())
|
||||
.collect();
|
||||
if inputs.is_empty() {
|
||||
return error_response("meta-update: no inputs selected");
|
||||
}
|
||||
let coord = state.coord.clone();
|
||||
let inputs_clone = inputs.clone();
|
||||
tokio::spawn(async move {
|
||||
run_meta_update(&coord, &inputs_clone).await;
|
||||
});
|
||||
Redirect::to("/").into_response()
|
||||
}
|
||||
|
||||
/// Background task: run `nix flake update <inputs>` in meta + commit,
|
||||
/// then rebuild every agent whose input was touched (or all agents
|
||||
/// when `hyperhive` was bumped, since that's the shared base). Each
|
||||
/// rebuild fires `Rebuilt { ok, note, ... }` to the manager so the
|
||||
/// operator and manager get the same feedback they'd see from an
|
||||
/// auto-update / manual dashboard rebuild.
|
||||
async fn run_meta_update(coord: &Arc<crate::coordinator::Coordinator>, inputs: &[String]) {
|
||||
tracing::info!(?inputs, "meta-update: starting");
|
||||
if let Err(e) = crate::meta::lock_update(inputs).await {
|
||||
tracing::warn!(error = ?e, "meta-update: lock_update failed");
|
||||
return;
|
||||
}
|
||||
|
||||
// Decide which agents to rebuild.
|
||||
let touched_hyperhive = inputs.iter().any(|i| i == "hyperhive");
|
||||
let touched_agents: Vec<String> = inputs
|
||||
.iter()
|
||||
.filter_map(|i| i.strip_prefix("agent-").map(str::to_owned))
|
||||
.collect();
|
||||
let agents_to_rebuild: Vec<String> = if touched_hyperhive {
|
||||
crate::lifecycle::list()
|
||||
.await
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.filter_map(|c| {
|
||||
if c == crate::lifecycle::MANAGER_NAME {
|
||||
Some(crate::lifecycle::MANAGER_NAME.to_owned())
|
||||
} else {
|
||||
c.strip_prefix(crate::lifecycle::AGENT_PREFIX)
|
||||
.map(str::to_owned)
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
} else {
|
||||
touched_agents
|
||||
};
|
||||
|
||||
let current_rev = crate::auto_update::current_flake_rev(&coord.hyperhive_flake)
|
||||
.unwrap_or_default();
|
||||
// Sequential rebuild loop — the META_LOCK guards meta-side
|
||||
// races but parallel nix builds also serialise via nix-daemon,
|
||||
// so sequential is just as fast in practice and keeps logs
|
||||
// readable.
|
||||
for name in agents_to_rebuild {
|
||||
tracing::info!(%name, "meta-update: rebuilding agent");
|
||||
if let Err(e) = crate::auto_update::rebuild_agent(coord, &name, ¤t_rev).await {
|
||||
tracing::warn!(%name, error = ?e, "meta-update: rebuild failed");
|
||||
// continue: surface each per-agent failure via its own
|
||||
// Rebuilt event; don't abort the whole batch.
|
||||
}
|
||||
}
|
||||
tracing::info!("meta-update: done");
|
||||
}
|
||||
|
||||
async fn post_op_send(State(state): State<AppState>, Form(form): Form<OpSendForm>) -> Response {
|
||||
let to = form.to.trim().to_owned();
|
||||
let body = form.body.trim().to_owned();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue