dashboard: stop/restart per-container + update-all when any stale
This commit is contained in:
parent
e2aa40409e
commit
8428c693e0
3 changed files with 131 additions and 3 deletions
|
|
@ -42,7 +42,10 @@ pub async fn serve(port: u16, coord: Arc<Coordinator>) -> Result<()> {
|
|||
.route("/approve/{id}", post(post_approve))
|
||||
.route("/deny/{id}", post(post_deny))
|
||||
.route("/destroy/{name}", post(post_destroy))
|
||||
.route("/kill/{name}", post(post_kill))
|
||||
.route("/restart/{name}", post(post_restart))
|
||||
.route("/rebuild/{name}", post(post_rebuild))
|
||||
.route("/update-all", post(post_update_all))
|
||||
.route("/request-spawn", post(post_request_spawn))
|
||||
.route("/messages/stream", get(messages_stream))
|
||||
.with_state(AppState { coord });
|
||||
|
|
@ -65,6 +68,15 @@ async fn index(headers: HeaderMap, State(state): State<AppState>) -> Html<String
|
|||
let containers = lifecycle::list().await.unwrap_or_default();
|
||||
let transient = state.coord.transient_snapshot();
|
||||
let current_rev = crate::auto_update::current_flake_rev(&state.coord.hyperhive_flake);
|
||||
let mut running: std::collections::HashMap<String, bool> =
|
||||
std::collections::HashMap::new();
|
||||
for c in &containers {
|
||||
let logical = c
|
||||
.strip_prefix(lifecycle::AGENT_PREFIX)
|
||||
.unwrap_or(c.as_str())
|
||||
.to_owned();
|
||||
running.insert(c.clone(), lifecycle::is_running(&logical).await);
|
||||
}
|
||||
let approvals = gc_orphans(
|
||||
&state.coord,
|
||||
state.coord.approvals.pending().unwrap_or_default(),
|
||||
|
|
@ -83,7 +95,8 @@ async fn index(headers: HeaderMap, State(state): State<AppState>) -> Html<String
|
|||
|
||||
Html(format!(
|
||||
"<!doctype html>\n<html lang=\"en\">\n<head>\n<meta charset=\"utf-8\">\n<title>hyperhive // h1ve-c0re</title>\n{refresh}\n{STYLE}\n</head>\n<body>\n{BANNER}\n{containers}\n{approvals_html}\n{MSG_FLOW}\n{FOOTER}\n{ASYNC_FORMS_JS}\n{MSG_FLOW_JS}\n</body>\n</html>\n",
|
||||
containers = render_containers(&containers, &transient, current_rev.as_deref(), &hostname),
|
||||
containers =
|
||||
render_containers(&containers, &running, &transient, current_rev.as_deref(), &hostname),
|
||||
))
|
||||
}
|
||||
|
||||
|
|
@ -154,6 +167,67 @@ async fn post_rebuild(State(state): State<AppState>, AxumPath(name): AxumPath<St
|
|||
}
|
||||
}
|
||||
|
||||
async fn post_kill(State(state): State<AppState>, AxumPath(name): AxumPath<String>) -> Response {
|
||||
let logical = strip_container_prefix(&name);
|
||||
if logical == lifecycle::MANAGER_NAME {
|
||||
return error_response("kill: refusing to stop the manager");
|
||||
}
|
||||
match lifecycle::kill(&logical).await {
|
||||
Ok(()) => {
|
||||
state.coord.unregister_agent(&logical);
|
||||
Redirect::to("/").into_response()
|
||||
}
|
||||
Err(e) => error_response(&format!("kill {logical} failed: {e:#}")),
|
||||
}
|
||||
}
|
||||
|
||||
async fn post_restart(State(_state): State<AppState>, AxumPath(name): AxumPath<String>) -> Response {
|
||||
let logical = strip_container_prefix(&name);
|
||||
match lifecycle::restart(&logical).await {
|
||||
Ok(()) => Redirect::to("/").into_response(),
|
||||
Err(e) => error_response(&format!("restart {logical} failed: {e:#}")),
|
||||
}
|
||||
}
|
||||
|
||||
async fn post_update_all(State(state): State<AppState>) -> Response {
|
||||
let Some(current_rev) = crate::auto_update::current_flake_rev(&state.coord.hyperhive_flake)
|
||||
else {
|
||||
return error_response("update-all: hyperhive_flake has no canonical path");
|
||||
};
|
||||
let containers = lifecycle::list().await.unwrap_or_default();
|
||||
let mut errors = Vec::new();
|
||||
for container in containers {
|
||||
let logical = if container == lifecycle::MANAGER_NAME {
|
||||
lifecycle::MANAGER_NAME.to_owned()
|
||||
} else if let Some(n) = container.strip_prefix(lifecycle::AGENT_PREFIX) {
|
||||
n.to_owned()
|
||||
} else {
|
||||
continue;
|
||||
};
|
||||
if !crate::auto_update::agent_needs_update(&logical, ¤t_rev) {
|
||||
continue;
|
||||
}
|
||||
if let Err(e) =
|
||||
crate::auto_update::rebuild_agent(&state.coord, &logical, ¤t_rev).await
|
||||
{
|
||||
errors.push(format!("{logical}: {e:#}"));
|
||||
}
|
||||
}
|
||||
if errors.is_empty() {
|
||||
Redirect::to("/").into_response()
|
||||
} else {
|
||||
error_response(&format!("update-all partial failure:\n{}", errors.join("\n")))
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert either a logical name or a container name back to the logical
|
||||
/// name. Sub-agents are `h-foo` → `foo`; manager stays `hm1nd`.
|
||||
fn strip_container_prefix(name: &str) -> String {
|
||||
name.strip_prefix(lifecycle::AGENT_PREFIX)
|
||||
.unwrap_or(name)
|
||||
.to_owned()
|
||||
}
|
||||
|
||||
async fn post_destroy(State(state): State<AppState>, AxumPath(name): AxumPath<String>) -> Response {
|
||||
match actions::destroy(&state.coord, &name).await {
|
||||
Ok(()) => Redirect::to("/").into_response(),
|
||||
|
|
@ -174,6 +248,7 @@ fn error_response(message: &str) -> Response {
|
|||
|
||||
fn render_containers(
|
||||
containers: &[String],
|
||||
running: &std::collections::HashMap<String, bool>,
|
||||
transient: &std::collections::HashMap<String, crate::coordinator::TransientState>,
|
||||
current_rev: Option<&str>,
|
||||
hostname: &str,
|
||||
|
|
@ -181,6 +256,16 @@ fn render_containers(
|
|||
let mut out = String::from(
|
||||
"<h2>◆ C0NTAINERS ◆</h2>\n<div class=\"divider\">══════════════════════════════════════════════════════════════</div>\n",
|
||||
);
|
||||
// "update all" header button only when at least one container is stale.
|
||||
if let Some(rev) = current_rev {
|
||||
let any_stale = containers.iter().any(|c| {
|
||||
let logical = c.strip_prefix(AGENT_PREFIX).unwrap_or(c);
|
||||
crate::auto_update::agent_needs_update(logical, rev)
|
||||
});
|
||||
if any_stale {
|
||||
out.push_str("<form method=\"POST\" action=\"/update-all\" class=\"inline\" data-async data-confirm=\"rebuild every stale container?\"><button class=\"btn btn-rebuild\" type=\"submit\">↻ UPD4TE 4LL</button></form>\n");
|
||||
}
|
||||
}
|
||||
out.push_str("<form method=\"POST\" action=\"/request-spawn\" class=\"spawnform\" data-async>\n <input name=\"name\" placeholder=\"new agent name (≤9 chars)\" maxlength=\"9\" required autocomplete=\"off\">\n <button type=\"submit\" class=\"btn btn-spawn\">◆ R3QU3ST SP4WN</button>\n</form>\n<p class=\"meta\">spawn requests queue as approvals. operator approves below to actually create the container.</p>\n");
|
||||
// Render in-flight spawns first so the operator sees feedback immediately.
|
||||
if !transient.is_empty() {
|
||||
|
|
@ -208,11 +293,19 @@ fn render_containers(
|
|||
}
|
||||
out.push_str("<ul>\n");
|
||||
for container in containers {
|
||||
let is_running = running.get(container).copied().unwrap_or(false);
|
||||
if container == MANAGER_NAME {
|
||||
let update_badge = update_badge_for(MANAGER_NAME, current_rev);
|
||||
let restart_btn = if is_running {
|
||||
format!(
|
||||
" <form method=\"POST\" action=\"/restart/{MANAGER_NAME}\" class=\"inline\" data-async data-confirm=\"restart manager?\"><button class=\"btn btn-restart\" type=\"submit\">↺ R3ST4RT</button></form>\n",
|
||||
)
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"<li><span class=\"glyph\">▓█▓▒░</span> <a href=\"http://{hostname}:{MANAGER_PORT}/\">{container}</a> <span class=\"role role-m1nd\">m1nd</span>{update_badge} <span class=\"meta\">:{MANAGER_PORT}</span>\n <form method=\"POST\" action=\"/rebuild/{MANAGER_NAME}\" class=\"inline\" data-async data-confirm=\"rebuild manager? hot-reloads the container.\"><button class=\"btn btn-rebuild\" type=\"submit\">↻ R3BU1LD</button></form>\n</li>",
|
||||
"<li><span class=\"glyph\">▓█▓▒░</span> <a href=\"http://{hostname}:{MANAGER_PORT}/\">{container}</a> <span class=\"role role-m1nd\">m1nd</span>{update_badge} <span class=\"meta\">:{MANAGER_PORT}</span>\n{restart_btn} <form method=\"POST\" action=\"/rebuild/{MANAGER_NAME}\" class=\"inline\" data-async data-confirm=\"rebuild manager? hot-reloads the container.\"><button class=\"btn btn-rebuild\" type=\"submit\">↻ R3BU1LD</button></form>\n</li>",
|
||||
);
|
||||
} else if let Some(name) = container.strip_prefix(AGENT_PREFIX) {
|
||||
let port = lifecycle::agent_web_port(name);
|
||||
|
|
@ -225,9 +318,16 @@ fn render_containers(
|
|||
)
|
||||
};
|
||||
let update_badge = update_badge_for(name, current_rev);
|
||||
let running_buttons = if is_running {
|
||||
format!(
|
||||
" <form method=\"POST\" action=\"/restart/{name}\" class=\"inline\" data-async data-confirm=\"restart {name}?\"><button class=\"btn btn-restart\" type=\"submit\">↺ R3ST4RT</button></form>\n <form method=\"POST\" action=\"/kill/{name}\" class=\"inline\" data-async data-confirm=\"stop {name}?\"><button class=\"btn btn-stop\" type=\"submit\">■ ST0P</button></form>\n",
|
||||
)
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"<li><span class=\"glyph\">▒░▒░░</span> <a href=\"http://{hostname}:{port}/\">{name}</a> <span class=\"role role-ag3nt\">ag3nt</span>{login_badge}{update_badge} <span class=\"meta\">{container} :{port}</span>\n <form method=\"POST\" action=\"/rebuild/{name}\" class=\"inline\" data-async data-confirm=\"rebuild {name}? hot-reloads the container.\"><button class=\"btn btn-rebuild\" type=\"submit\">↻ R3BU1LD</button></form>\n <form method=\"POST\" action=\"/destroy/{name}\" class=\"inline\" data-async data-confirm=\"destroy {name}? container is removed; state + creds kept.\"><button class=\"btn btn-destroy\" type=\"submit\">DESTR0Y</button></form>\n</li>",
|
||||
"<li><span class=\"glyph\">▒░▒░░</span> <a href=\"http://{hostname}:{port}/\">{name}</a> <span class=\"role role-ag3nt\">ag3nt</span>{login_badge}{update_badge} <span class=\"meta\">{container} :{port}</span>\n{running_buttons} <form method=\"POST\" action=\"/rebuild/{name}\" class=\"inline\" data-async data-confirm=\"rebuild {name}? hot-reloads the container.\"><button class=\"btn btn-rebuild\" type=\"submit\">↻ R3BU1LD</button></form>\n <form method=\"POST\" action=\"/destroy/{name}\" class=\"inline\" data-async data-confirm=\"destroy {name}? container is removed; state + creds kept.\"><button class=\"btn btn-destroy\" type=\"submit\">DESTR0Y</button></form>\n</li>",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -126,6 +126,32 @@ pub async fn kill(name: &str) -> Result<()> {
|
|||
run(&["stop", &container]).await
|
||||
}
|
||||
|
||||
pub async fn start(name: &str) -> Result<()> {
|
||||
validate(name)?;
|
||||
let container = container_name(name);
|
||||
run(&["start", &container]).await
|
||||
}
|
||||
|
||||
/// Stop + start without regenerating any config. For "kick the container"
|
||||
/// without touching the flake or nspawn flags.
|
||||
pub async fn restart(name: &str) -> Result<()> {
|
||||
kill(name).await?;
|
||||
start(name).await
|
||||
}
|
||||
|
||||
/// True when the container's systemd unit is active. Used by the dashboard
|
||||
/// to gate stop/restart buttons.
|
||||
pub async fn is_running(name: &str) -> bool {
|
||||
let container = container_name(name);
|
||||
let unit = format!("container@{container}.service");
|
||||
Command::new("systemctl")
|
||||
.args(["is-active", "--quiet", &unit])
|
||||
.status()
|
||||
.await
|
||||
.map(|s| s.success())
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
/// Fully tear down a sub-agent's container: stop + remove via `nixos-container
|
||||
/// destroy`, then clean our own systemd drop-in. Leaves it to the caller to
|
||||
/// wipe `/var/lib/hyperhive/...` state and the per-agent runtime dir.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue