dashboard: K3PT ST4T3 section + agent links open in new tab
new section between containers and questions: lists every name with a state dir under /var/lib/hyperhive/agents/ that doesn't correspond to a live container. shows state size + last-modified age + whether claude creds are kept. two actions per row: - R3V1V3 — queues a spawn approval with the same name (operator approves to recreate; spawn flow reuses prior config + claude creds, no re-login needed) - PURG3 — wipes the agent's state + applied dirs (post /purge-tombstone/ endpoint; refuses if a live container with that name still exists) dashboard also opens agent links in new tabs now (target=_blank + rel=noopener) so the operator's overview tab stays put when they dive into an agent.
This commit is contained in:
parent
8344dd9ab7
commit
5ee65d2f15
6 changed files with 212 additions and 3 deletions
|
|
@ -204,4 +204,23 @@ impl Coordinator {
|
|||
pub fn agent_applied_dir(name: &str) -> PathBuf {
|
||||
PathBuf::from(format!("{APPLIED_STATE_ROOT}/{name}"))
|
||||
}
|
||||
|
||||
/// Enumerate names that have a persistent state dir under
|
||||
/// `/var/lib/hyperhive/agents/` (i.e. config / claude creds /
|
||||
/// notes survive). Includes both currently-existing containers and
|
||||
/// destroyed-but-kept tombstones; callers filter the latter by
|
||||
/// subtracting `lifecycle::list()`.
|
||||
#[must_use]
|
||||
pub fn kept_state_names() -> Vec<String> {
|
||||
let Ok(rd) = std::fs::read_dir(AGENT_STATE_ROOT) else {
|
||||
return Vec::new();
|
||||
};
|
||||
let mut out: Vec<String> = rd
|
||||
.flatten()
|
||||
.filter(|e| e.file_type().is_ok_and(|t| t.is_dir()))
|
||||
.filter_map(|e| e.file_name().into_string().ok())
|
||||
.collect();
|
||||
out.sort();
|
||||
out
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -51,6 +51,7 @@ pub async fn serve(port: u16, coord: Arc<Coordinator>) -> Result<()> {
|
|||
.route("/rebuild/{name}", post(post_rebuild))
|
||||
.route("/update-all", post(post_update_all))
|
||||
.route("/answer-question/{id}", post(post_answer_question))
|
||||
.route("/purge-tombstone/{name}", post(post_purge_tombstone))
|
||||
.route("/request-spawn", post(post_request_spawn))
|
||||
.route("/messages/stream", get(messages_stream))
|
||||
.with_state(AppState { coord });
|
||||
|
|
@ -106,6 +107,21 @@ struct StateSnapshot {
|
|||
/// we mark the row answered and fire `HelperEvent::OperatorAnswered`
|
||||
/// into the manager's inbox.
|
||||
questions: Vec<crate::operator_questions::OpQuestion>,
|
||||
/// State dirs (config history + claude creds + /state/ notes) that
|
||||
/// survive after a destroy-without-purge. The operator can re-spawn
|
||||
/// with the same name to resume, or PURG3 to wipe them.
|
||||
tombstones: Vec<TombstoneView>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct TombstoneView {
|
||||
name: String,
|
||||
/// Bytes used by the state dir tree. Cheap-ish to compute; let the
|
||||
/// operator know how much they're holding onto.
|
||||
state_bytes: u64,
|
||||
/// Mtime (unix seconds) of the state dir; rough "last seen".
|
||||
last_seen: i64,
|
||||
has_creds: bool,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
|
|
@ -145,6 +161,7 @@ struct ApprovalView {
|
|||
diff_html: Option<String>,
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_lines)]
|
||||
async fn api_state(headers: HeaderMap, State(state): State<AppState>) -> axum::Json<StateSnapshot> {
|
||||
let host = headers
|
||||
.get("host")
|
||||
|
|
@ -242,6 +259,35 @@ async fn api_state(headers: HeaderMap, State(state): State<AppState>) -> axum::J
|
|||
.unwrap_or_default();
|
||||
let questions = state.coord.questions.pending().unwrap_or_default();
|
||||
|
||||
// Tombstones: state-dir names that don't appear in the live container
|
||||
// list (and aren't the manager). Operator can re-spawn or PURG3.
|
||||
let live: std::collections::HashSet<String> = containers
|
||||
.iter()
|
||||
.map(|c| c.name.clone())
|
||||
.chain(state.coord.transient_snapshot().into_keys())
|
||||
.collect();
|
||||
let tombstones: Vec<TombstoneView> = Coordinator::kept_state_names()
|
||||
.into_iter()
|
||||
.filter(|name| name != MANAGER_NAME && !live.contains(name))
|
||||
.map(|name| {
|
||||
let root = Coordinator::agent_state_root(&name);
|
||||
let state_bytes = dir_size_bytes(&root);
|
||||
let last_seen = std::fs::metadata(&root)
|
||||
.and_then(|m| m.modified())
|
||||
.ok()
|
||||
.and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
|
||||
.and_then(|d| i64::try_from(d.as_secs()).ok())
|
||||
.unwrap_or(0);
|
||||
let has_creds = claude_has_session(&Coordinator::agent_claude_dir(&name));
|
||||
TombstoneView {
|
||||
name,
|
||||
state_bytes,
|
||||
last_seen,
|
||||
has_creds,
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
axum::Json(StateSnapshot {
|
||||
hostname,
|
||||
manager_port: MANAGER_PORT,
|
||||
|
|
@ -251,9 +297,33 @@ async fn api_state(headers: HeaderMap, State(state): State<AppState>) -> axum::J
|
|||
approvals: approval_views,
|
||||
operator_inbox,
|
||||
questions,
|
||||
tombstones,
|
||||
})
|
||||
}
|
||||
|
||||
/// Sum the byte size of every regular file under `root`. Cheap to compute
|
||||
/// for typical agent state (config repo + claude creds + notes file —
|
||||
/// usually a few MB); fine to do inline on each /api/state. Returns 0 on
|
||||
/// any error.
|
||||
fn dir_size_bytes(root: &Path) -> u64 {
|
||||
fn walk(p: &Path, acc: &mut u64) {
|
||||
let Ok(rd) = std::fs::read_dir(p) else { return };
|
||||
for entry in rd.flatten() {
|
||||
let Ok(ft) = entry.file_type() else { continue };
|
||||
if ft.is_dir() {
|
||||
walk(&entry.path(), acc);
|
||||
} else if ft.is_file()
|
||||
&& let Ok(meta) = entry.metadata()
|
||||
{
|
||||
*acc += meta.len();
|
||||
}
|
||||
}
|
||||
}
|
||||
let mut total = 0u64;
|
||||
walk(root, &mut total);
|
||||
total
|
||||
}
|
||||
|
||||
async fn messages_stream(
|
||||
State(state): State<AppState>,
|
||||
) -> Sse<impl Stream<Item = Result<Event, Infallible>>> {
|
||||
|
|
@ -316,6 +386,48 @@ async fn post_answer_question(
|
|||
}
|
||||
}
|
||||
|
||||
async fn post_purge_tombstone(
|
||||
State(state): State<AppState>,
|
||||
AxumPath(name): AxumPath<String>,
|
||||
) -> Response {
|
||||
if name == lifecycle::MANAGER_NAME {
|
||||
return error_response("refusing to purge the manager's state");
|
||||
}
|
||||
// Sanity: refuse to purge if a live container still exists with this
|
||||
// name. The dashboard already filters tombstones to non-live names,
|
||||
// but the operator could send a stale POST.
|
||||
let live = lifecycle::list().await.unwrap_or_default();
|
||||
if live
|
||||
.iter()
|
||||
.any(|c| c == &format!("{}{name}", lifecycle::AGENT_PREFIX) || c == &name)
|
||||
{
|
||||
return error_response(&format!(
|
||||
"refusing to purge {name}: container still exists — use DESTR0Y first"
|
||||
));
|
||||
}
|
||||
let mut errors = Vec::new();
|
||||
for dir in [
|
||||
Coordinator::agent_state_root(&name),
|
||||
Coordinator::agent_applied_dir(&name),
|
||||
] {
|
||||
if dir.exists()
|
||||
&& let Err(e) = std::fs::remove_dir_all(&dir)
|
||||
{
|
||||
errors.push(format!("{}: {e}", dir.display()));
|
||||
}
|
||||
}
|
||||
let _ = state
|
||||
.coord
|
||||
.approvals
|
||||
.fail_pending_for_agent(&name, "agent state purged");
|
||||
if errors.is_empty() {
|
||||
tracing::info!(%name, "tombstone purged");
|
||||
Redirect::to("/").into_response()
|
||||
} else {
|
||||
error_response(&format!("purge {name} partial: {}", errors.join(", ")))
|
||||
}
|
||||
}
|
||||
|
||||
async fn post_request_spawn(
|
||||
State(state): State<AppState>,
|
||||
Form(form): Form<RequestSpawnForm>,
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ CREATE INDEX IF NOT EXISTS idx_operator_questions_pending
|
|||
";
|
||||
|
||||
/// Add the `multi` column to pre-existing databases. `ALTER TABLE ADD COLUMN`
|
||||
/// has no `IF NOT EXISTS` form in sqlite, so we check pragma_table_info first.
|
||||
/// has no `IF NOT EXISTS` form in sqlite, so we check `pragma_table_info` first.
|
||||
fn ensure_multi_column(conn: &Connection) -> Result<()> {
|
||||
let has: bool = conn
|
||||
.prepare("SELECT 1 FROM pragma_table_info('operator_questions') WHERE name = 'multi'")?
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue