manager events: Spawned/Rebuilt/Killed/Destroyed + start button

This commit is contained in:
müde 2026-05-15 17:38:41 +02:00
parent 06ea0cf283
commit 37c6504462
9 changed files with 165 additions and 70 deletions

View file

@ -147,6 +147,11 @@
form('/kill/' + c.name, 'btn-stop', '■ ST0P', 'stop ' + c.name + '?'),
);
}
} else {
li.append(
' ',
form('/start/' + c.name, 'btn-start', '▶ ST4RT', 'start ' + c.name + '?'),
);
}
li.append(
' ',

View file

@ -100,6 +100,7 @@ ul form.inline { display: inline-block; }
.btn-rebuild { color: var(--amber); border-color: var(--amber); font-size: 0.75em; padding: 0.15em 0.5em; margin-left: 0.6em; }
.btn-restart { color: var(--cyan); border-color: var(--cyan); font-size: 0.75em; padding: 0.15em 0.5em; margin-left: 0.6em; }
.btn-stop { color: var(--pink); border-color: var(--pink); font-size: 0.75em; padding: 0.15em 0.5em; margin-left: 0.6em; }
.btn-start { color: var(--green); border-color: var(--green); font-size: 0.75em; padding: 0.15em 0.5em; margin-left: 0.6em; }
.btn-talk { color: var(--cyan); border-color: var(--cyan); }
.btn-spawn { color: var(--amber); border-color: var(--amber); }
.spawnform { display: flex; gap: 0.6em; align-items: stretch; margin: 0.5em 0; }

View file

@ -6,9 +6,7 @@
use std::sync::Arc;
use anyhow::{Result, bail};
use hive_sh4re::{
ApprovalKind, ApprovalStatus, HelperEvent, MANAGER_AGENT, Message, SYSTEM_SENDER,
};
use hive_sh4re::{ApprovalKind, ApprovalStatus, HelperEvent, MANAGER_AGENT};
use crate::coordinator::{Coordinator, TransientKind};
use crate::lifecycle::{self, MANAGER_NAME};
@ -90,36 +88,39 @@ fn finish_approval(
approval: &hive_sh4re::Approval,
result: Result<()>,
) -> Result<()> {
match result {
Ok(()) => {
notify_manager(
coord,
&HelperEvent::ApprovalResolved {
id: approval.id,
agent: approval.agent.clone(),
commit_ref: approval.commit_ref.clone(),
status: ApprovalStatus::Approved,
note: None,
},
);
Ok(())
}
let (status, note, ok) = match &result {
Ok(()) => (ApprovalStatus::Approved, None, true),
Err(e) => {
let note = format!("{e:#}");
let _ = coord.approvals.mark_failed(approval.id, &note);
notify_manager(
coord,
&HelperEvent::ApprovalResolved {
id: approval.id,
agent: approval.agent.clone(),
commit_ref: approval.commit_ref.clone(),
status: ApprovalStatus::Failed,
note: Some(note),
},
);
Err(e)
(ApprovalStatus::Failed, Some(note), false)
}
};
coord.notify_manager(&HelperEvent::ApprovalResolved {
id: approval.id,
agent: approval.agent.clone(),
commit_ref: approval.commit_ref.clone(),
status,
note: note.clone(),
});
// For spawn/rebuild approvals, also surface the underlying action so
// the manager knows whether the container actually came up. The
// ApprovalResolved event already carries the same `ok` signal but
// separating it lets the manager react to the lifecycle change
// without having to special-case approvals.
match approval.kind {
ApprovalKind::Spawn => coord.notify_manager(&HelperEvent::Spawned {
agent: approval.agent.clone(),
ok,
note,
}),
ApprovalKind::ApplyCommit => coord.notify_manager(&HelperEvent::Rebuilt {
agent: approval.agent.clone(),
ok,
note,
}),
}
result
}
/// Tear down a sub-agent container. By default this is non-destructive to
@ -144,6 +145,9 @@ pub async fn destroy(coord: &Coordinator, name: &str) -> Result<()> {
let _ = coord
.approvals
.fail_pending_for_agent(name, "agent destroyed");
coord.notify_manager(&HelperEvent::Destroyed {
agent: name.to_owned(),
});
Ok(())
}
@ -152,33 +156,13 @@ pub fn deny(coord: &Coordinator, id: i64) -> Result<()> {
coord.approvals.mark_denied(id)?;
tracing::info!(%id, "approval denied");
if let Some(a) = approval {
notify_manager(
coord,
&HelperEvent::ApprovalResolved {
id: a.id,
agent: a.agent,
commit_ref: a.commit_ref,
status: ApprovalStatus::Denied,
note: None,
},
);
coord.notify_manager(&HelperEvent::ApprovalResolved {
id: a.id,
agent: a.agent,
commit_ref: a.commit_ref,
status: ApprovalStatus::Denied,
note: None,
});
}
Ok(())
}
fn notify_manager(coord: &Coordinator, event: &HelperEvent) {
let body = match serde_json::to_string(event) {
Ok(s) => s,
Err(e) => {
tracing::warn!(error = ?e, "failed to encode helper event");
return;
}
};
if let Err(e) = coord.broker.send(&Message {
from: SYSTEM_SENDER.to_owned(),
to: MANAGER_AGENT.to_owned(),
body,
}) {
tracing::warn!(error = ?e, "failed to push helper event to manager");
}
}

View file

@ -58,7 +58,7 @@ pub async fn rebuild_agent(coord: &Arc<Coordinator>, name: &str, current_rev: &s
.with_context(|| format!("ensure_runtime {name}"))?;
let applied_dir = Coordinator::agent_applied_dir(name);
let claude_dir = Coordinator::agent_claude_dir(name);
lifecycle::rebuild(
let result = lifecycle::rebuild(
name,
&coord.hyperhive_flake,
&agent_dir,
@ -66,10 +66,27 @@ pub async fn rebuild_agent(coord: &Arc<Coordinator>, name: &str, current_rev: &s
&claude_dir,
coord.dashboard_port,
)
.await?;
std::fs::write(rev_marker_path(name), current_rev)
.with_context(|| format!("write rev marker for {name}"))?;
Ok(())
.await;
match &result {
Ok(()) => {
if let Err(e) = std::fs::write(rev_marker_path(name), current_rev) {
tracing::warn!(%name, error = ?e, "write rev marker failed");
}
coord.notify_manager(&hive_sh4re::HelperEvent::Rebuilt {
agent: name.to_owned(),
ok: true,
note: None,
});
}
Err(e) => {
coord.notify_manager(&hive_sh4re::HelperEvent::Rebuilt {
agent: name.to_owned(),
ok: false,
note: Some(format!("{e:#}")),
});
}
}
result
}
/// Auto-create the manager container on startup if it isn't already there.

View file

@ -107,6 +107,27 @@ impl Coordinator {
self.transient.lock().unwrap().clone()
}
/// Push a `HelperEvent` into the manager's inbox. Encoded as JSON in
/// `Message::body`; sender = `SYSTEM_SENDER`. The manager harness
/// recognises the sender and parses the body. Best-effort: a serde or
/// broker error is logged but does not propagate.
pub fn notify_manager(&self, event: &hive_sh4re::HelperEvent) {
let body = match serde_json::to_string(event) {
Ok(s) => s,
Err(e) => {
tracing::warn!(error = ?e, "failed to encode helper event");
return;
}
};
if let Err(e) = self.broker.send(&hive_sh4re::Message {
from: hive_sh4re::SYSTEM_SENDER.to_owned(),
to: hive_sh4re::MANAGER_AGENT.to_owned(),
body,
}) {
tracing::warn!(error = ?e, "failed to push helper event to manager");
}
}
pub fn agent_dir(name: &str) -> PathBuf {
PathBuf::from(format!("{AGENT_RUNTIME_ROOT}/{name}"))
}

View file

@ -47,6 +47,7 @@ pub async fn serve(port: u16, coord: Arc<Coordinator>) -> Result<()> {
.route("/destroy/{name}", post(post_destroy))
.route("/kill/{name}", post(post_kill))
.route("/restart/{name}", post(post_restart))
.route("/start/{name}", post(post_start))
.route("/rebuild/{name}", post(post_rebuild))
.route("/update-all", post(post_update_all))
.route("/request-spawn", post(post_request_spawn))
@ -313,6 +314,11 @@ async fn post_kill(State(state): State<AppState>, AxumPath(name): AxumPath<Strin
match lifecycle::kill(&logical).await {
Ok(()) => {
state.coord.unregister_agent(&logical);
state
.coord
.notify_manager(&hive_sh4re::HelperEvent::Killed {
agent: logical.clone(),
});
Redirect::to("/").into_response()
}
Err(e) => error_response(&format!("kill {logical} failed: {e:#}")),
@ -327,6 +333,14 @@ async fn post_restart(State(_state): State<AppState>, AxumPath(name): AxumPath<S
}
}
async fn post_start(State(_state): State<AppState>, AxumPath(name): AxumPath<String>) -> Response {
let logical = strip_container_prefix(&name);
match lifecycle::start(&logical).await {
Ok(()) => Redirect::to("/").into_response(),
Err(e) => error_response(&format!("start {logical} failed: {e:#}")),
}
}
async fn post_update_all(State(state): State<AppState>) -> Response {
let Some(current_rev) = crate::auto_update::current_flake_rev(&state.coord.hyperhive_flake)
else {

View file

@ -65,7 +65,7 @@ async fn dispatch(req: &HostRequest, coord: Arc<Coordinator>) -> HostResponse {
let proposed_dir = Coordinator::agent_proposed_dir(name);
let applied_dir = Coordinator::agent_applied_dir(name);
let claude_dir = Coordinator::agent_claude_dir(name);
if let Err(e) = lifecycle::spawn(
match lifecycle::spawn(
name,
&coord.hyperhive_flake,
&agent_dir,
@ -76,9 +76,23 @@ async fn dispatch(req: &HostRequest, coord: Arc<Coordinator>) -> HostResponse {
)
.await
{
// Roll back socket registration if container creation failed.
coord.unregister_agent(name);
return Err(e);
Ok(()) => {
coord.notify_manager(&hive_sh4re::HelperEvent::Spawned {
agent: name.clone(),
ok: true,
note: None,
});
}
Err(e) => {
// Roll back socket registration if container creation failed.
coord.unregister_agent(name);
coord.notify_manager(&hive_sh4re::HelperEvent::Spawned {
agent: name.clone(),
ok: false,
note: Some(format!("{e:#}")),
});
return Err(e);
}
}
HostResponse::success()
}
@ -94,6 +108,9 @@ async fn dispatch(req: &HostRequest, coord: Arc<Coordinator>) -> HostResponse {
tracing::info!(%name, "kill");
lifecycle::kill(name).await?;
coord.unregister_agent(name);
coord.notify_manager(&hive_sh4re::HelperEvent::Killed {
agent: name.clone(),
});
HostResponse::success()
}
HostRequest::Destroy { name } => {