manager events: Spawned/Rebuilt/Killed/Destroyed + start button

This commit is contained in:
müde 2026-05-15 17:38:41 +02:00
parent 06ea0cf283
commit 37c6504462
9 changed files with 165 additions and 70 deletions

View file

@ -132,8 +132,11 @@ async fn serve(socket: &Path, interval: Duration, bus: Bus) -> Result<()> {
match recv { match recv {
Ok(ManagerResponse::Message { from, body }) => { Ok(ManagerResponse::Message { from, body }) => {
if from == SYSTEM_SENDER { if from == SYSTEM_SENDER {
// Helper events (ApprovalResolved, etc.) — log + surface // Helper events (ApprovalResolved / Spawned / Rebuilt /
// in live view but don't burn a claude turn on them. // Killed / Destroyed) — these are FYI for the manager;
// we surface them in the live view and forward them as
// a normal claude turn so the manager can react (e.g.
// greet a newly-spawned agent, retry a failed rebuild).
let parsed = serde_json::from_str::<HelperEvent>(&body).ok(); let parsed = serde_json::from_str::<HelperEvent>(&body).ok();
if let Some(event) = parsed { if let Some(event) = parsed {
tracing::info!(?event, "helper event"); tracing::info!(?event, "helper event");
@ -141,7 +144,9 @@ async fn serve(socket: &Path, interval: Duration, bus: Bus) -> Result<()> {
tracing::info!(%from, %body, "system message"); tracing::info!(%from, %body, "system message");
} }
bus.emit(LiveEvent::Note(format!("[system] {body}"))); bus.emit(LiveEvent::Note(format!("[system] {body}")));
continue; // Fall through: drive a turn with the event in the wake
// prompt body so claude sees it. Sender stays "system"
// so the wake prompt can label it as such.
} }
tracing::info!(%from, %body, "manager inbox"); tracing::info!(%from, %body, "manager inbox");
bus.emit(LiveEvent::TurnStart { bus.emit(LiveEvent::TurnStart {
@ -172,6 +177,15 @@ async fn serve(socket: &Path, interval: Duration, bus: Bus) -> Result<()> {
/// prompt doesn't have access to, and points the manager at its own /// prompt doesn't have access to, and points the manager at its own
/// editable config repo for self-modification. /// editable config repo for self-modification.
fn format_wake_prompt(label: &str, from: &str, body: &str) -> String { fn format_wake_prompt(label: &str, from: &str, body: &str) -> String {
let from_note = if from == SYSTEM_SENDER {
"\n The sender `system` means this is a hyperhive helper event \
(JSON body, `event` field discriminates): `approval_resolved`, \
`spawned`, `rebuilt`, `killed`, `destroyed`. Use these to react to \
lifecycle changes e.g. greet a freshly-spawned agent, retry a \
failed rebuild, or note the change to the operator.\n"
} else {
""
};
format!( format!(
"You are the hyperhive manager `{label}` in a multi-agent system. You \ "You are the hyperhive manager `{label}` in a multi-agent system. You \
coordinate sub-agents and relay between them and the operator.\n\ coordinate sub-agents and relay between them and the operator.\n\
@ -179,7 +193,7 @@ fn format_wake_prompt(label: &str, from: &str, body: &str) -> String {
Incoming message from `{from}`:\n\ Incoming message from `{from}`:\n\
---\n\ ---\n\
{body}\n\ {body}\n\
---\n\ ---\n{from_note}
\n\ \n\
Tools (hyperhive surface):\n\ Tools (hyperhive surface):\n\
- `mcp__hyperhive__recv()` drain one more message from your inbox.\n\ - `mcp__hyperhive__recv()` drain one more message from your inbox.\n\

View file

@ -147,6 +147,11 @@
form('/kill/' + c.name, 'btn-stop', '■ ST0P', 'stop ' + c.name + '?'), form('/kill/' + c.name, 'btn-stop', '■ ST0P', 'stop ' + c.name + '?'),
); );
} }
} else {
li.append(
' ',
form('/start/' + c.name, 'btn-start', '▶ ST4RT', 'start ' + c.name + '?'),
);
} }
li.append( li.append(
' ', ' ',

View file

@ -100,6 +100,7 @@ ul form.inline { display: inline-block; }
.btn-rebuild { color: var(--amber); border-color: var(--amber); font-size: 0.75em; padding: 0.15em 0.5em; margin-left: 0.6em; } .btn-rebuild { color: var(--amber); border-color: var(--amber); font-size: 0.75em; padding: 0.15em 0.5em; margin-left: 0.6em; }
.btn-restart { color: var(--cyan); border-color: var(--cyan); font-size: 0.75em; padding: 0.15em 0.5em; margin-left: 0.6em; } .btn-restart { color: var(--cyan); border-color: var(--cyan); font-size: 0.75em; padding: 0.15em 0.5em; margin-left: 0.6em; }
.btn-stop { color: var(--pink); border-color: var(--pink); font-size: 0.75em; padding: 0.15em 0.5em; margin-left: 0.6em; } .btn-stop { color: var(--pink); border-color: var(--pink); font-size: 0.75em; padding: 0.15em 0.5em; margin-left: 0.6em; }
.btn-start { color: var(--green); border-color: var(--green); font-size: 0.75em; padding: 0.15em 0.5em; margin-left: 0.6em; }
.btn-talk { color: var(--cyan); border-color: var(--cyan); } .btn-talk { color: var(--cyan); border-color: var(--cyan); }
.btn-spawn { color: var(--amber); border-color: var(--amber); } .btn-spawn { color: var(--amber); border-color: var(--amber); }
.spawnform { display: flex; gap: 0.6em; align-items: stretch; margin: 0.5em 0; } .spawnform { display: flex; gap: 0.6em; align-items: stretch; margin: 0.5em 0; }

View file

@ -6,9 +6,7 @@
use std::sync::Arc; use std::sync::Arc;
use anyhow::{Result, bail}; use anyhow::{Result, bail};
use hive_sh4re::{ use hive_sh4re::{ApprovalKind, ApprovalStatus, HelperEvent, MANAGER_AGENT};
ApprovalKind, ApprovalStatus, HelperEvent, MANAGER_AGENT, Message, SYSTEM_SENDER,
};
use crate::coordinator::{Coordinator, TransientKind}; use crate::coordinator::{Coordinator, TransientKind};
use crate::lifecycle::{self, MANAGER_NAME}; use crate::lifecycle::{self, MANAGER_NAME};
@ -90,36 +88,39 @@ fn finish_approval(
approval: &hive_sh4re::Approval, approval: &hive_sh4re::Approval,
result: Result<()>, result: Result<()>,
) -> Result<()> { ) -> Result<()> {
match result { let (status, note, ok) = match &result {
Ok(()) => { Ok(()) => (ApprovalStatus::Approved, None, true),
notify_manager(
coord,
&HelperEvent::ApprovalResolved {
id: approval.id,
agent: approval.agent.clone(),
commit_ref: approval.commit_ref.clone(),
status: ApprovalStatus::Approved,
note: None,
},
);
Ok(())
}
Err(e) => { Err(e) => {
let note = format!("{e:#}"); let note = format!("{e:#}");
let _ = coord.approvals.mark_failed(approval.id, &note); let _ = coord.approvals.mark_failed(approval.id, &note);
notify_manager( (ApprovalStatus::Failed, Some(note), false)
coord, }
&HelperEvent::ApprovalResolved { };
coord.notify_manager(&HelperEvent::ApprovalResolved {
id: approval.id, id: approval.id,
agent: approval.agent.clone(), agent: approval.agent.clone(),
commit_ref: approval.commit_ref.clone(), commit_ref: approval.commit_ref.clone(),
status: ApprovalStatus::Failed, status,
note: Some(note), note: note.clone(),
}, });
); // For spawn/rebuild approvals, also surface the underlying action so
Err(e) // the manager knows whether the container actually came up. The
} // ApprovalResolved event already carries the same `ok` signal but
// separating it lets the manager react to the lifecycle change
// without having to special-case approvals.
match approval.kind {
ApprovalKind::Spawn => coord.notify_manager(&HelperEvent::Spawned {
agent: approval.agent.clone(),
ok,
note,
}),
ApprovalKind::ApplyCommit => coord.notify_manager(&HelperEvent::Rebuilt {
agent: approval.agent.clone(),
ok,
note,
}),
} }
result
} }
/// Tear down a sub-agent container. By default this is non-destructive to /// Tear down a sub-agent container. By default this is non-destructive to
@ -144,6 +145,9 @@ pub async fn destroy(coord: &Coordinator, name: &str) -> Result<()> {
let _ = coord let _ = coord
.approvals .approvals
.fail_pending_for_agent(name, "agent destroyed"); .fail_pending_for_agent(name, "agent destroyed");
coord.notify_manager(&HelperEvent::Destroyed {
agent: name.to_owned(),
});
Ok(()) Ok(())
} }
@ -152,33 +156,13 @@ pub fn deny(coord: &Coordinator, id: i64) -> Result<()> {
coord.approvals.mark_denied(id)?; coord.approvals.mark_denied(id)?;
tracing::info!(%id, "approval denied"); tracing::info!(%id, "approval denied");
if let Some(a) = approval { if let Some(a) = approval {
notify_manager( coord.notify_manager(&HelperEvent::ApprovalResolved {
coord,
&HelperEvent::ApprovalResolved {
id: a.id, id: a.id,
agent: a.agent, agent: a.agent,
commit_ref: a.commit_ref, commit_ref: a.commit_ref,
status: ApprovalStatus::Denied, status: ApprovalStatus::Denied,
note: None, note: None,
}, });
);
} }
Ok(()) Ok(())
} }
fn notify_manager(coord: &Coordinator, event: &HelperEvent) {
let body = match serde_json::to_string(event) {
Ok(s) => s,
Err(e) => {
tracing::warn!(error = ?e, "failed to encode helper event");
return;
}
};
if let Err(e) = coord.broker.send(&Message {
from: SYSTEM_SENDER.to_owned(),
to: MANAGER_AGENT.to_owned(),
body,
}) {
tracing::warn!(error = ?e, "failed to push helper event to manager");
}
}

View file

@ -58,7 +58,7 @@ pub async fn rebuild_agent(coord: &Arc<Coordinator>, name: &str, current_rev: &s
.with_context(|| format!("ensure_runtime {name}"))?; .with_context(|| format!("ensure_runtime {name}"))?;
let applied_dir = Coordinator::agent_applied_dir(name); let applied_dir = Coordinator::agent_applied_dir(name);
let claude_dir = Coordinator::agent_claude_dir(name); let claude_dir = Coordinator::agent_claude_dir(name);
lifecycle::rebuild( let result = lifecycle::rebuild(
name, name,
&coord.hyperhive_flake, &coord.hyperhive_flake,
&agent_dir, &agent_dir,
@ -66,10 +66,27 @@ pub async fn rebuild_agent(coord: &Arc<Coordinator>, name: &str, current_rev: &s
&claude_dir, &claude_dir,
coord.dashboard_port, coord.dashboard_port,
) )
.await?; .await;
std::fs::write(rev_marker_path(name), current_rev) match &result {
.with_context(|| format!("write rev marker for {name}"))?; Ok(()) => {
Ok(()) if let Err(e) = std::fs::write(rev_marker_path(name), current_rev) {
tracing::warn!(%name, error = ?e, "write rev marker failed");
}
coord.notify_manager(&hive_sh4re::HelperEvent::Rebuilt {
agent: name.to_owned(),
ok: true,
note: None,
});
}
Err(e) => {
coord.notify_manager(&hive_sh4re::HelperEvent::Rebuilt {
agent: name.to_owned(),
ok: false,
note: Some(format!("{e:#}")),
});
}
}
result
} }
/// Auto-create the manager container on startup if it isn't already there. /// Auto-create the manager container on startup if it isn't already there.

View file

@ -107,6 +107,27 @@ impl Coordinator {
self.transient.lock().unwrap().clone() self.transient.lock().unwrap().clone()
} }
/// Push a `HelperEvent` into the manager's inbox. Encoded as JSON in
/// `Message::body`; sender = `SYSTEM_SENDER`. The manager harness
/// recognises the sender and parses the body. Best-effort: a serde or
/// broker error is logged but does not propagate.
pub fn notify_manager(&self, event: &hive_sh4re::HelperEvent) {
let body = match serde_json::to_string(event) {
Ok(s) => s,
Err(e) => {
tracing::warn!(error = ?e, "failed to encode helper event");
return;
}
};
if let Err(e) = self.broker.send(&hive_sh4re::Message {
from: hive_sh4re::SYSTEM_SENDER.to_owned(),
to: hive_sh4re::MANAGER_AGENT.to_owned(),
body,
}) {
tracing::warn!(error = ?e, "failed to push helper event to manager");
}
}
pub fn agent_dir(name: &str) -> PathBuf { pub fn agent_dir(name: &str) -> PathBuf {
PathBuf::from(format!("{AGENT_RUNTIME_ROOT}/{name}")) PathBuf::from(format!("{AGENT_RUNTIME_ROOT}/{name}"))
} }

View file

@ -47,6 +47,7 @@ pub async fn serve(port: u16, coord: Arc<Coordinator>) -> Result<()> {
.route("/destroy/{name}", post(post_destroy)) .route("/destroy/{name}", post(post_destroy))
.route("/kill/{name}", post(post_kill)) .route("/kill/{name}", post(post_kill))
.route("/restart/{name}", post(post_restart)) .route("/restart/{name}", post(post_restart))
.route("/start/{name}", post(post_start))
.route("/rebuild/{name}", post(post_rebuild)) .route("/rebuild/{name}", post(post_rebuild))
.route("/update-all", post(post_update_all)) .route("/update-all", post(post_update_all))
.route("/request-spawn", post(post_request_spawn)) .route("/request-spawn", post(post_request_spawn))
@ -313,6 +314,11 @@ async fn post_kill(State(state): State<AppState>, AxumPath(name): AxumPath<Strin
match lifecycle::kill(&logical).await { match lifecycle::kill(&logical).await {
Ok(()) => { Ok(()) => {
state.coord.unregister_agent(&logical); state.coord.unregister_agent(&logical);
state
.coord
.notify_manager(&hive_sh4re::HelperEvent::Killed {
agent: logical.clone(),
});
Redirect::to("/").into_response() Redirect::to("/").into_response()
} }
Err(e) => error_response(&format!("kill {logical} failed: {e:#}")), Err(e) => error_response(&format!("kill {logical} failed: {e:#}")),
@ -327,6 +333,14 @@ async fn post_restart(State(_state): State<AppState>, AxumPath(name): AxumPath<S
} }
} }
async fn post_start(State(_state): State<AppState>, AxumPath(name): AxumPath<String>) -> Response {
let logical = strip_container_prefix(&name);
match lifecycle::start(&logical).await {
Ok(()) => Redirect::to("/").into_response(),
Err(e) => error_response(&format!("start {logical} failed: {e:#}")),
}
}
async fn post_update_all(State(state): State<AppState>) -> Response { async fn post_update_all(State(state): State<AppState>) -> Response {
let Some(current_rev) = crate::auto_update::current_flake_rev(&state.coord.hyperhive_flake) let Some(current_rev) = crate::auto_update::current_flake_rev(&state.coord.hyperhive_flake)
else { else {

View file

@ -65,7 +65,7 @@ async fn dispatch(req: &HostRequest, coord: Arc<Coordinator>) -> HostResponse {
let proposed_dir = Coordinator::agent_proposed_dir(name); let proposed_dir = Coordinator::agent_proposed_dir(name);
let applied_dir = Coordinator::agent_applied_dir(name); let applied_dir = Coordinator::agent_applied_dir(name);
let claude_dir = Coordinator::agent_claude_dir(name); let claude_dir = Coordinator::agent_claude_dir(name);
if let Err(e) = lifecycle::spawn( match lifecycle::spawn(
name, name,
&coord.hyperhive_flake, &coord.hyperhive_flake,
&agent_dir, &agent_dir,
@ -76,10 +76,24 @@ async fn dispatch(req: &HostRequest, coord: Arc<Coordinator>) -> HostResponse {
) )
.await .await
{ {
Ok(()) => {
coord.notify_manager(&hive_sh4re::HelperEvent::Spawned {
agent: name.clone(),
ok: true,
note: None,
});
}
Err(e) => {
// Roll back socket registration if container creation failed. // Roll back socket registration if container creation failed.
coord.unregister_agent(name); coord.unregister_agent(name);
coord.notify_manager(&hive_sh4re::HelperEvent::Spawned {
agent: name.clone(),
ok: false,
note: Some(format!("{e:#}")),
});
return Err(e); return Err(e);
} }
}
HostResponse::success() HostResponse::success()
} }
HostRequest::RequestSpawn { name } => { HostRequest::RequestSpawn { name } => {
@ -94,6 +108,9 @@ async fn dispatch(req: &HostRequest, coord: Arc<Coordinator>) -> HostResponse {
tracing::info!(%name, "kill"); tracing::info!(%name, "kill");
lifecycle::kill(name).await?; lifecycle::kill(name).await?;
coord.unregister_agent(name); coord.unregister_agent(name);
coord.notify_manager(&hive_sh4re::HelperEvent::Killed {
agent: name.clone(),
});
HostResponse::success() HostResponse::success()
} }
HostRequest::Destroy { name } => { HostRequest::Destroy { name } => {

View file

@ -196,8 +196,8 @@ pub const SYSTEM_SENDER: &str = "system";
#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "event", rename_all = "snake_case")] #[serde(tag = "event", rename_all = "snake_case")]
pub enum HelperEvent { pub enum HelperEvent {
/// An approval was approved or denied; if approved, the rebuild has /// An approval was approved/denied/failed; if approved, the underlying
/// already run (status = Approved on success, Failed on error). /// action (rebuild or spawn) has already run by the time this lands.
ApprovalResolved { ApprovalResolved {
id: i64, id: i64,
agent: String, agent: String,
@ -206,6 +206,28 @@ pub enum HelperEvent {
#[serde(default, skip_serializing_if = "Option::is_none")] #[serde(default, skip_serializing_if = "Option::is_none")]
note: Option<String>, note: Option<String>,
}, },
/// A new container was spawned (post-approval or via the admin CLI
/// bypass path). `ok=false` means the spawn failed.
Spawned {
agent: String,
ok: bool,
#[serde(default, skip_serializing_if = "Option::is_none")]
note: Option<String>,
},
/// A container was rebuilt (auto-update on flake rev change, or a
/// manual rebuild from CLI/dashboard).
Rebuilt {
agent: String,
ok: bool,
#[serde(default, skip_serializing_if = "Option::is_none")]
note: Option<String>,
},
/// A sub-agent's container was stopped (the systemd unit is down;
/// persistent state is unchanged).
Killed { agent: String },
/// A sub-agent's container was torn down (container removed; state
/// dirs preserved per `destroy` semantics).
Destroyed { agent: String },
} }
/// Requests on the manager socket. Manager has the agent surface (send/recv) /// Requests on the manager socket. Manager has the agent surface (send/recv)