dashboard: meta flake inputs UI + sequential rebuild loop
new section 'M3T4 1NPUTS' between approvals and message flow:
one row per input in meta/flake.lock (hyperhive first, then
agent-<n> alphabetically). each row shows the input name, the
first 12 chars of the locked sha, a relative timestamp from
locked.lastModified, and the original.url when available.
checkbox per row; submit button is disabled until at least one
box is checked; submitting confirms then POSTs the selected
names to /meta-update.
backend:
- meta::lock_update(inputs: &[String]) — runs 'nix flake update
<names>' in the meta dir, commits the lock change with a
combined message ('lock update: hyperhive, agent-coder').
preserves the existing META_LOCK serialization. existing
lock_update_for_rebuild / lock_update_hyperhive stay for
their single-input callers.
- POST /meta-update — comma-separated 'inputs' form field
(JS joins checkboxes since axum::Form doesn't natively
decode repeated keys); spawns a background task that runs
the lock update + per-agent rebuild loop. hyperhive
selection fans out to all agents; agent-<n> selection only
rebuilds <n>. each rebuild fires Rebuilt to the manager
exactly like dashboard / admin-CLI / auto-update.
rebuild loop is sequential — auto_update::run too (was
parallel via tokio::spawn). parallel rebuilds collide on
nix-store's sqlite cache ('sqlite db busy, not using cache')
and the meta META_LOCK contention. nix-daemon serializes the
heavy build steps anyway, so this isn't a throughput loss.
This commit is contained in:
parent
891223219e
commit
266c2c7a77
6 changed files with 331 additions and 18 deletions
|
|
@ -760,6 +760,77 @@
|
|||
return Math.floor(ageSec / 86400) + 'd ago';
|
||||
}
|
||||
|
||||
function renderMetaInputs(s) {
|
||||
const root = $('meta-inputs-section');
|
||||
if (!root) return;
|
||||
root.innerHTML = '';
|
||||
const inputs = s.meta_inputs || [];
|
||||
if (!inputs.length) {
|
||||
root.append(el('p', { class: 'empty' }, 'meta repo not seeded yet'));
|
||||
return;
|
||||
}
|
||||
const form = el('form', {
|
||||
method: 'POST',
|
||||
action: '/meta-update',
|
||||
class: 'meta-inputs-form',
|
||||
'data-async': '',
|
||||
'data-confirm': 'update selected meta flake inputs + rebuild affected agents?',
|
||||
});
|
||||
const ul = el('ul', { class: 'meta-inputs' });
|
||||
for (const inp of inputs) {
|
||||
const li = el('li');
|
||||
const id = 'meta-input-' + inp.name.replace(/[^a-z0-9-]/gi, '_');
|
||||
const cb = el('input', {
|
||||
type: 'checkbox',
|
||||
name: 'meta_input_' + inp.name,
|
||||
id,
|
||||
value: inp.name,
|
||||
'data-meta-input': inp.name,
|
||||
});
|
||||
const label = el('label', { for: id });
|
||||
label.append(
|
||||
cb,
|
||||
el('span', { class: 'meta-input-name' }, inp.name), ' ',
|
||||
el('code', { class: 'meta-input-rev' }, inp.rev.slice(0, 12)), ' ',
|
||||
el('span', { class: 'meta-input-ts' }, fmtAgo(inp.last_modified)),
|
||||
);
|
||||
if (inp.url) {
|
||||
label.append(' ', el('span', { class: 'meta-input-url', title: inp.url },
|
||||
'· ' + truncate(inp.url, 48)));
|
||||
}
|
||||
li.append(label);
|
||||
ul.append(li);
|
||||
}
|
||||
form.append(ul);
|
||||
// Hidden input the POST handler reads — populated at submit
|
||||
// time from the checkbox states. axum's Form extractor doesn't
|
||||
// natively decode repeated keys, so we join into one CSV.
|
||||
const hidden = el('input', { type: 'hidden', name: 'inputs', value: '' });
|
||||
form.append(hidden);
|
||||
const btn = el('button', {
|
||||
type: 'submit',
|
||||
class: 'btn btn-meta-update',
|
||||
disabled: '',
|
||||
}, '◆ UPD4TE & R3BU1LD');
|
||||
form.append(btn);
|
||||
function refreshDisabled() {
|
||||
const any = form.querySelectorAll('input[data-meta-input]:checked').length > 0;
|
||||
if (any) btn.removeAttribute('disabled');
|
||||
else btn.setAttribute('disabled', '');
|
||||
}
|
||||
form.addEventListener('change', refreshDisabled);
|
||||
form.addEventListener('submit', () => {
|
||||
const selected = Array.from(form.querySelectorAll('input[data-meta-input]:checked'))
|
||||
.map((b) => b.dataset.metaInput);
|
||||
hidden.value = selected.join(',');
|
||||
});
|
||||
root.append(form);
|
||||
}
|
||||
|
||||
function truncate(s, n) {
|
||||
return s.length <= n ? s : s.slice(0, n - 1) + '…';
|
||||
}
|
||||
|
||||
// ─── state polling ──────────────────────────────────────────────────────
|
||||
let pollTimer = null;
|
||||
// Sections whose innerHTML gets blown away on each refresh. If the
|
||||
|
|
@ -771,6 +842,7 @@
|
|||
'questions-section',
|
||||
'inbox-section',
|
||||
'approvals-section',
|
||||
'meta-inputs-section',
|
||||
];
|
||||
// <details> sections that should survive a refresh need a stable
|
||||
// `data-restore-key` attribute. snapshotOpenDetails walks managed
|
||||
|
|
@ -833,6 +905,7 @@
|
|||
renderQuestions(s);
|
||||
renderInbox(s);
|
||||
renderApprovals(s);
|
||||
renderMetaInputs(s);
|
||||
restoreOpenDetails(openDetails);
|
||||
notifyDeltas(s);
|
||||
// Auto-refresh: fast (2s) while a spawn or a per-container
|
||||
|
|
|
|||
|
|
@ -288,6 +288,56 @@ code {
|
|||
.glyph-approved { color: var(--green); }
|
||||
.glyph-denied { color: var(--red); }
|
||||
.glyph-failed { color: var(--amber); }
|
||||
.meta-inputs {
|
||||
list-style: none;
|
||||
padding: 0;
|
||||
margin: 0 0 0.8em;
|
||||
display: grid;
|
||||
gap: 0.2em;
|
||||
}
|
||||
.meta-inputs li {
|
||||
padding: 0.25em 0.6em;
|
||||
border: 1px solid var(--border);
|
||||
background: rgba(24, 24, 37, 0.6);
|
||||
}
|
||||
.meta-inputs label {
|
||||
display: flex;
|
||||
align-items: baseline;
|
||||
gap: 0.5em;
|
||||
cursor: pointer;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
.meta-input-name { color: var(--amber); font-weight: bold; }
|
||||
.meta-input-rev { color: var(--muted); }
|
||||
.meta-input-ts { color: var(--muted); font-size: 0.85em; }
|
||||
.meta-input-url {
|
||||
color: var(--muted);
|
||||
font-size: 0.85em;
|
||||
margin-left: auto;
|
||||
overflow: hidden;
|
||||
text-overflow: ellipsis;
|
||||
white-space: nowrap;
|
||||
}
|
||||
.btn-meta-update {
|
||||
background: rgba(203, 166, 247, 0.12);
|
||||
border: 1px solid var(--purple);
|
||||
color: var(--purple);
|
||||
text-shadow: 0 0 4px currentColor;
|
||||
padding: 0.3em 1em;
|
||||
font: inherit;
|
||||
font-size: 0.85em;
|
||||
letter-spacing: 0.08em;
|
||||
cursor: pointer;
|
||||
transition: box-shadow 0.15s ease, background 0.15s ease;
|
||||
}
|
||||
.btn-meta-update:hover:not([disabled]) {
|
||||
background: rgba(203, 166, 247, 0.22);
|
||||
box-shadow: 0 0 10px -2px currentColor;
|
||||
}
|
||||
.btn-meta-update[disabled] {
|
||||
opacity: 0.35;
|
||||
cursor: not-allowed;
|
||||
}
|
||||
.history-note {
|
||||
margin-left: 1.8em;
|
||||
margin-top: 0.2em;
|
||||
|
|
|
|||
|
|
@ -47,6 +47,13 @@
|
|||
<p class="meta">loading…</p>
|
||||
</div>
|
||||
|
||||
<h2>◆ M3T4 1NPUTS ◆</h2>
|
||||
<div class="divider">══════════════════════════════════════════════════════════════</div>
|
||||
<p class="meta">select inputs to <code>nix flake update</code> in <code>/meta/</code>. selected agents rebuild in sequence after the lock bump; manager learns each outcome via the usual <code>rebuilt</code> system event.</p>
|
||||
<div id="meta-inputs-section">
|
||||
<p class="meta">loading…</p>
|
||||
</div>
|
||||
|
||||
<h2>◆ MESS4GE FL0W ◆</h2>
|
||||
<div class="divider">══════════════════════════════════════════════════════════════</div>
|
||||
<p class="meta">live tail — newest at the top. tap on every <code>send</code> / <code>recv</code> through the broker. compose below: <code>@name</code> picks the recipient (sticky until you @ someone else); <code>tab</code> completes.</p>
|
||||
|
|
|
|||
|
|
@ -182,7 +182,12 @@ pub async fn run(coord: Arc<Coordinator>) -> Result<()> {
|
|||
}
|
||||
};
|
||||
|
||||
let mut tasks = Vec::new();
|
||||
// Sequential, one agent at a time. Parallel rebuilds collide on
|
||||
// nix-store's sqlite cache (the "sqlite db busy, not using
|
||||
// cache" warning) and also race the meta-lock mutex; the
|
||||
// resulting log interleave was bad enough on its own. Builds
|
||||
// serialize on nix-daemon internally anyway, so this isn't a
|
||||
// throughput loss in practice.
|
||||
for container in containers {
|
||||
// Manager and sub-agents share the same lifecycle now; both go
|
||||
// through rebuild_agent with name-derived paths.
|
||||
|
|
@ -198,17 +203,9 @@ pub async fn run(coord: Arc<Coordinator>) -> Result<()> {
|
|||
tracing::debug!(%name, "auto-update: up-to-date");
|
||||
continue;
|
||||
}
|
||||
let coord = coord.clone();
|
||||
let current_rev = current_rev.clone();
|
||||
tasks.push(tokio::spawn(async move {
|
||||
if let Err(e) = rebuild_agent(&coord, &name, ¤t_rev).await {
|
||||
tracing::warn!(%name, error = ?e, "auto-update: rebuild failed");
|
||||
}
|
||||
}));
|
||||
}
|
||||
|
||||
for t in tasks {
|
||||
let _ = t.await;
|
||||
if let Err(e) = rebuild_agent(&coord, &name, ¤t_rev).await {
|
||||
tracing::warn!(%name, error = ?e, "auto-update: rebuild failed");
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -57,6 +57,7 @@ pub async fn serve(port: u16, coord: Arc<Coordinator>) -> Result<()> {
|
|||
.route("/api/agent-config/{name}", get(get_agent_config))
|
||||
.route("/request-spawn", post(post_request_spawn))
|
||||
.route("/op-send", post(post_op_send))
|
||||
.route("/meta-update", post(post_meta_update))
|
||||
.route("/messages/stream", get(messages_stream))
|
||||
.with_state(AppState { coord });
|
||||
let addr = SocketAddr::from(([0, 0, 0, 0], port));
|
||||
|
|
@ -154,6 +155,9 @@ struct StateSnapshot {
|
|||
/// least one other agent. Operator resolves by renaming. The
|
||||
/// dashboard renders a banner at the top listing each cluster.
|
||||
port_conflicts: Vec<PortConflict>,
|
||||
/// Inputs in `meta/flake.lock` the operator can selectively
|
||||
/// `nix flake update`. Hyperhive first, then `agent-<n>` rows.
|
||||
meta_inputs: Vec<MetaInputView>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
|
|
@ -280,6 +284,7 @@ async fn api_state(headers: HeaderMap, State(state): State<AppState>) -> axum::J
|
|||
transients,
|
||||
approvals,
|
||||
approval_history,
|
||||
meta_inputs: read_meta_inputs(),
|
||||
operator_inbox,
|
||||
questions,
|
||||
tombstones,
|
||||
|
|
@ -360,7 +365,33 @@ async fn build_container_views(
|
|||
/// yields an empty map so the dashboard degrades gracefully when the
|
||||
/// meta repo hasn't been seeded yet.
|
||||
fn read_meta_locked_revs() -> std::collections::HashMap<String, String> {
|
||||
let mut out = std::collections::HashMap::new();
|
||||
read_meta_inputs()
|
||||
.into_iter()
|
||||
.map(|i| (i.name, i.rev))
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[derive(Serialize, Clone)]
|
||||
struct MetaInputView {
|
||||
/// Input key in meta's `flake.nix` — `hyperhive`, `agent-<n>`, etc.
|
||||
name: String,
|
||||
/// Full locked sha. Not displayed verbatim; the dashboard
|
||||
/// truncates to the first 12 chars for the chip.
|
||||
rev: String,
|
||||
/// Unix seconds — `locked.lastModified`. Drives the relative
|
||||
/// "2h ago" timestamp on each input row.
|
||||
last_modified: i64,
|
||||
/// `original.url` if available, for the tooltip / row meta text.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
url: Option<String>,
|
||||
}
|
||||
|
||||
/// Walk `flake.lock`'s `nodes` map → `Vec<MetaInputView>`. Only
|
||||
/// includes nodes the root depends on (i.e. real inputs), skipping
|
||||
/// the synthetic `root` entry. Sorted with `hyperhive` first then
|
||||
/// alphabetically so the UI's top entry is the swarm-wide base.
|
||||
fn read_meta_inputs() -> Vec<MetaInputView> {
|
||||
let mut out = Vec::new();
|
||||
let Ok(raw) = std::fs::read_to_string("/var/lib/hyperhive/meta/flake.lock") else {
|
||||
return out;
|
||||
};
|
||||
|
|
@ -370,15 +401,48 @@ fn read_meta_locked_revs() -> std::collections::HashMap<String, String> {
|
|||
let Some(nodes) = json.get("nodes").and_then(|v| v.as_object()) else {
|
||||
return out;
|
||||
};
|
||||
let Some(root_name) = json.get("root").and_then(|v| v.as_str()) else {
|
||||
return out;
|
||||
};
|
||||
let root_inputs: std::collections::BTreeSet<String> = nodes
|
||||
.get(root_name)
|
||||
.and_then(|n| n.get("inputs"))
|
||||
.and_then(|v| v.as_object())
|
||||
.map(|m| m.keys().cloned().collect())
|
||||
.unwrap_or_default();
|
||||
for (name, node) in nodes {
|
||||
if let Some(rev) = node
|
||||
.get("locked")
|
||||
if !root_inputs.contains(name) {
|
||||
continue;
|
||||
}
|
||||
let locked = node.get("locked");
|
||||
let Some(rev) = locked
|
||||
.and_then(|v| v.get("rev"))
|
||||
.and_then(|v| v.as_str())
|
||||
{
|
||||
out.insert(name.clone(), rev.to_owned());
|
||||
}
|
||||
else {
|
||||
continue;
|
||||
};
|
||||
let last_modified = locked
|
||||
.and_then(|v| v.get("lastModified"))
|
||||
.and_then(serde_json::Value::as_i64)
|
||||
.unwrap_or(0);
|
||||
let url = node
|
||||
.get("original")
|
||||
.and_then(|v| v.get("url"))
|
||||
.and_then(|v| v.as_str())
|
||||
.map(str::to_owned);
|
||||
out.push(MetaInputView {
|
||||
name: name.clone(),
|
||||
rev: rev.to_owned(),
|
||||
last_modified,
|
||||
url,
|
||||
});
|
||||
}
|
||||
// hyperhive first, then alphabetical.
|
||||
out.sort_by(|a, b| match (a.name.as_str(), b.name.as_str()) {
|
||||
("hyperhive", _) => std::cmp::Ordering::Less,
|
||||
(_, "hyperhive") => std::cmp::Ordering::Greater,
|
||||
_ => a.name.cmp(&b.name),
|
||||
});
|
||||
out
|
||||
}
|
||||
|
||||
|
|
@ -784,6 +848,96 @@ struct OpSendForm {
|
|||
body: String,
|
||||
}
|
||||
|
||||
/// Form for `POST /meta-update`. Inputs ride in as a comma-separated
|
||||
/// list under the `inputs` field — the JS submitter joins the
|
||||
/// checked boxes since axum's `Form` extractor doesn't natively
|
||||
/// decode repeated keys without a helper.
|
||||
#[derive(Deserialize)]
|
||||
struct MetaUpdateForm {
|
||||
inputs: String,
|
||||
}
|
||||
|
||||
/// Bulk-update selected meta flake inputs, then rebuild the affected
|
||||
/// agents in the background. Idempotent w.r.t. selection — choosing
|
||||
/// an input that's already at the latest sha is a no-op (no commit,
|
||||
/// no rebuild ripple). Returns immediately after queueing the work;
|
||||
/// dashboard polls for progress via container `pending` spinners +
|
||||
/// the meta-inputs row sha update.
|
||||
async fn post_meta_update(
|
||||
State(state): State<AppState>,
|
||||
Form(form): Form<MetaUpdateForm>,
|
||||
) -> Response {
|
||||
let inputs: Vec<String> = form
|
||||
.inputs
|
||||
.split(',')
|
||||
.map(|s| s.trim().to_owned())
|
||||
.filter(|s| !s.is_empty())
|
||||
.collect();
|
||||
if inputs.is_empty() {
|
||||
return error_response("meta-update: no inputs selected");
|
||||
}
|
||||
let coord = state.coord.clone();
|
||||
let inputs_clone = inputs.clone();
|
||||
tokio::spawn(async move {
|
||||
run_meta_update(&coord, &inputs_clone).await;
|
||||
});
|
||||
Redirect::to("/").into_response()
|
||||
}
|
||||
|
||||
/// Background task: run `nix flake update <inputs>` in meta + commit,
|
||||
/// then rebuild every agent whose input was touched (or all agents
|
||||
/// when `hyperhive` was bumped, since that's the shared base). Each
|
||||
/// rebuild fires `Rebuilt { ok, note, ... }` to the manager so the
|
||||
/// operator and manager get the same feedback they'd see from an
|
||||
/// auto-update / manual dashboard rebuild.
|
||||
async fn run_meta_update(coord: &Arc<crate::coordinator::Coordinator>, inputs: &[String]) {
|
||||
tracing::info!(?inputs, "meta-update: starting");
|
||||
if let Err(e) = crate::meta::lock_update(inputs).await {
|
||||
tracing::warn!(error = ?e, "meta-update: lock_update failed");
|
||||
return;
|
||||
}
|
||||
|
||||
// Decide which agents to rebuild.
|
||||
let touched_hyperhive = inputs.iter().any(|i| i == "hyperhive");
|
||||
let touched_agents: Vec<String> = inputs
|
||||
.iter()
|
||||
.filter_map(|i| i.strip_prefix("agent-").map(str::to_owned))
|
||||
.collect();
|
||||
let agents_to_rebuild: Vec<String> = if touched_hyperhive {
|
||||
crate::lifecycle::list()
|
||||
.await
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.filter_map(|c| {
|
||||
if c == crate::lifecycle::MANAGER_NAME {
|
||||
Some(crate::lifecycle::MANAGER_NAME.to_owned())
|
||||
} else {
|
||||
c.strip_prefix(crate::lifecycle::AGENT_PREFIX)
|
||||
.map(str::to_owned)
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
} else {
|
||||
touched_agents
|
||||
};
|
||||
|
||||
let current_rev = crate::auto_update::current_flake_rev(&coord.hyperhive_flake)
|
||||
.unwrap_or_default();
|
||||
// Sequential rebuild loop — the META_LOCK guards meta-side
|
||||
// races but parallel nix builds also serialise via nix-daemon,
|
||||
// so sequential is just as fast in practice and keeps logs
|
||||
// readable.
|
||||
for name in agents_to_rebuild {
|
||||
tracing::info!(%name, "meta-update: rebuilding agent");
|
||||
if let Err(e) = crate::auto_update::rebuild_agent(coord, &name, ¤t_rev).await {
|
||||
tracing::warn!(%name, error = ?e, "meta-update: rebuild failed");
|
||||
// continue: surface each per-agent failure via its own
|
||||
// Rebuilt event; don't abort the whole batch.
|
||||
}
|
||||
}
|
||||
tracing::info!("meta-update: done");
|
||||
}
|
||||
|
||||
async fn post_op_send(State(state): State<AppState>, Form(form): Form<OpSendForm>) -> Response {
|
||||
let to = form.to.trim().to_owned();
|
||||
let body = form.body.trim().to_owned();
|
||||
|
|
|
|||
|
|
@ -184,6 +184,38 @@ pub async fn lock_update_for_rebuild(name: &str) -> Result<()> {
|
|||
git_commit(&dir, &format!("rebuild {name}: lock update")).await
|
||||
}
|
||||
|
||||
/// Update one or more named inputs in the meta flake and commit
|
||||
/// the resulting lock change with a single combined message.
|
||||
/// Used by the dashboard's "update meta inputs" form so the
|
||||
/// operator can bulk-bump `hyperhive` + selected agents in one
|
||||
/// shot. Each input name is passed verbatim to
|
||||
/// `nix flake update`; the caller is responsible for picking
|
||||
/// real input keys (e.g. via `inputs_view()` snapshotted from
|
||||
/// the lock file).
|
||||
#[allow(dead_code)] // wired up by dashboard handler in the same commit
|
||||
pub async fn lock_update(inputs: &[String]) -> Result<()> {
|
||||
if inputs.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
let _guard = META_LOCK.lock().await;
|
||||
let dir = meta_dir();
|
||||
let mut args: Vec<&str> = vec!["flake", "update"];
|
||||
for i in inputs {
|
||||
args.push(i.as_str());
|
||||
}
|
||||
nix(&dir, &args).await?;
|
||||
if git_is_clean(&dir).await? {
|
||||
return Ok(());
|
||||
}
|
||||
git(&dir, &["add", "flake.lock"]).await?;
|
||||
let msg = if inputs.len() == 1 {
|
||||
format!("lock update: {}", inputs[0])
|
||||
} else {
|
||||
format!("lock update: {}", inputs.join(", "))
|
||||
};
|
||||
git_commit(&dir, &msg).await
|
||||
}
|
||||
|
||||
/// One-shot used by the auto-update path: pin the latest hyperhive
|
||||
/// rev, commit if the lock changed. Cheaper than `sync_agents`
|
||||
/// because the per-agent inputs aren't touched.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue