manager events: Spawned/Rebuilt/Killed/Destroyed + start button
This commit is contained in:
parent
06ea0cf283
commit
37c6504462
9 changed files with 165 additions and 70 deletions
|
|
@ -6,9 +6,7 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use anyhow::{Result, bail};
|
||||
use hive_sh4re::{
|
||||
ApprovalKind, ApprovalStatus, HelperEvent, MANAGER_AGENT, Message, SYSTEM_SENDER,
|
||||
};
|
||||
use hive_sh4re::{ApprovalKind, ApprovalStatus, HelperEvent, MANAGER_AGENT};
|
||||
|
||||
use crate::coordinator::{Coordinator, TransientKind};
|
||||
use crate::lifecycle::{self, MANAGER_NAME};
|
||||
|
|
@ -90,36 +88,39 @@ fn finish_approval(
|
|||
approval: &hive_sh4re::Approval,
|
||||
result: Result<()>,
|
||||
) -> Result<()> {
|
||||
match result {
|
||||
Ok(()) => {
|
||||
notify_manager(
|
||||
coord,
|
||||
&HelperEvent::ApprovalResolved {
|
||||
id: approval.id,
|
||||
agent: approval.agent.clone(),
|
||||
commit_ref: approval.commit_ref.clone(),
|
||||
status: ApprovalStatus::Approved,
|
||||
note: None,
|
||||
},
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
let (status, note, ok) = match &result {
|
||||
Ok(()) => (ApprovalStatus::Approved, None, true),
|
||||
Err(e) => {
|
||||
let note = format!("{e:#}");
|
||||
let _ = coord.approvals.mark_failed(approval.id, ¬e);
|
||||
notify_manager(
|
||||
coord,
|
||||
&HelperEvent::ApprovalResolved {
|
||||
id: approval.id,
|
||||
agent: approval.agent.clone(),
|
||||
commit_ref: approval.commit_ref.clone(),
|
||||
status: ApprovalStatus::Failed,
|
||||
note: Some(note),
|
||||
},
|
||||
);
|
||||
Err(e)
|
||||
(ApprovalStatus::Failed, Some(note), false)
|
||||
}
|
||||
};
|
||||
coord.notify_manager(&HelperEvent::ApprovalResolved {
|
||||
id: approval.id,
|
||||
agent: approval.agent.clone(),
|
||||
commit_ref: approval.commit_ref.clone(),
|
||||
status,
|
||||
note: note.clone(),
|
||||
});
|
||||
// For spawn/rebuild approvals, also surface the underlying action so
|
||||
// the manager knows whether the container actually came up. The
|
||||
// ApprovalResolved event already carries the same `ok` signal but
|
||||
// separating it lets the manager react to the lifecycle change
|
||||
// without having to special-case approvals.
|
||||
match approval.kind {
|
||||
ApprovalKind::Spawn => coord.notify_manager(&HelperEvent::Spawned {
|
||||
agent: approval.agent.clone(),
|
||||
ok,
|
||||
note,
|
||||
}),
|
||||
ApprovalKind::ApplyCommit => coord.notify_manager(&HelperEvent::Rebuilt {
|
||||
agent: approval.agent.clone(),
|
||||
ok,
|
||||
note,
|
||||
}),
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
/// Tear down a sub-agent container. By default this is non-destructive to
|
||||
|
|
@ -144,6 +145,9 @@ pub async fn destroy(coord: &Coordinator, name: &str) -> Result<()> {
|
|||
let _ = coord
|
||||
.approvals
|
||||
.fail_pending_for_agent(name, "agent destroyed");
|
||||
coord.notify_manager(&HelperEvent::Destroyed {
|
||||
agent: name.to_owned(),
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
|
@ -152,33 +156,13 @@ pub fn deny(coord: &Coordinator, id: i64) -> Result<()> {
|
|||
coord.approvals.mark_denied(id)?;
|
||||
tracing::info!(%id, "approval denied");
|
||||
if let Some(a) = approval {
|
||||
notify_manager(
|
||||
coord,
|
||||
&HelperEvent::ApprovalResolved {
|
||||
id: a.id,
|
||||
agent: a.agent,
|
||||
commit_ref: a.commit_ref,
|
||||
status: ApprovalStatus::Denied,
|
||||
note: None,
|
||||
},
|
||||
);
|
||||
coord.notify_manager(&HelperEvent::ApprovalResolved {
|
||||
id: a.id,
|
||||
agent: a.agent,
|
||||
commit_ref: a.commit_ref,
|
||||
status: ApprovalStatus::Denied,
|
||||
note: None,
|
||||
});
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn notify_manager(coord: &Coordinator, event: &HelperEvent) {
|
||||
let body = match serde_json::to_string(event) {
|
||||
Ok(s) => s,
|
||||
Err(e) => {
|
||||
tracing::warn!(error = ?e, "failed to encode helper event");
|
||||
return;
|
||||
}
|
||||
};
|
||||
if let Err(e) = coord.broker.send(&Message {
|
||||
from: SYSTEM_SENDER.to_owned(),
|
||||
to: MANAGER_AGENT.to_owned(),
|
||||
body,
|
||||
}) {
|
||||
tracing::warn!(error = ?e, "failed to push helper event to manager");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -58,7 +58,7 @@ pub async fn rebuild_agent(coord: &Arc<Coordinator>, name: &str, current_rev: &s
|
|||
.with_context(|| format!("ensure_runtime {name}"))?;
|
||||
let applied_dir = Coordinator::agent_applied_dir(name);
|
||||
let claude_dir = Coordinator::agent_claude_dir(name);
|
||||
lifecycle::rebuild(
|
||||
let result = lifecycle::rebuild(
|
||||
name,
|
||||
&coord.hyperhive_flake,
|
||||
&agent_dir,
|
||||
|
|
@ -66,10 +66,27 @@ pub async fn rebuild_agent(coord: &Arc<Coordinator>, name: &str, current_rev: &s
|
|||
&claude_dir,
|
||||
coord.dashboard_port,
|
||||
)
|
||||
.await?;
|
||||
std::fs::write(rev_marker_path(name), current_rev)
|
||||
.with_context(|| format!("write rev marker for {name}"))?;
|
||||
Ok(())
|
||||
.await;
|
||||
match &result {
|
||||
Ok(()) => {
|
||||
if let Err(e) = std::fs::write(rev_marker_path(name), current_rev) {
|
||||
tracing::warn!(%name, error = ?e, "write rev marker failed");
|
||||
}
|
||||
coord.notify_manager(&hive_sh4re::HelperEvent::Rebuilt {
|
||||
agent: name.to_owned(),
|
||||
ok: true,
|
||||
note: None,
|
||||
});
|
||||
}
|
||||
Err(e) => {
|
||||
coord.notify_manager(&hive_sh4re::HelperEvent::Rebuilt {
|
||||
agent: name.to_owned(),
|
||||
ok: false,
|
||||
note: Some(format!("{e:#}")),
|
||||
});
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
/// Auto-create the manager container on startup if it isn't already there.
|
||||
|
|
|
|||
|
|
@ -107,6 +107,27 @@ impl Coordinator {
|
|||
self.transient.lock().unwrap().clone()
|
||||
}
|
||||
|
||||
/// Push a `HelperEvent` into the manager's inbox. Encoded as JSON in
|
||||
/// `Message::body`; sender = `SYSTEM_SENDER`. The manager harness
|
||||
/// recognises the sender and parses the body. Best-effort: a serde or
|
||||
/// broker error is logged but does not propagate.
|
||||
pub fn notify_manager(&self, event: &hive_sh4re::HelperEvent) {
|
||||
let body = match serde_json::to_string(event) {
|
||||
Ok(s) => s,
|
||||
Err(e) => {
|
||||
tracing::warn!(error = ?e, "failed to encode helper event");
|
||||
return;
|
||||
}
|
||||
};
|
||||
if let Err(e) = self.broker.send(&hive_sh4re::Message {
|
||||
from: hive_sh4re::SYSTEM_SENDER.to_owned(),
|
||||
to: hive_sh4re::MANAGER_AGENT.to_owned(),
|
||||
body,
|
||||
}) {
|
||||
tracing::warn!(error = ?e, "failed to push helper event to manager");
|
||||
}
|
||||
}
|
||||
|
||||
pub fn agent_dir(name: &str) -> PathBuf {
|
||||
PathBuf::from(format!("{AGENT_RUNTIME_ROOT}/{name}"))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -47,6 +47,7 @@ pub async fn serve(port: u16, coord: Arc<Coordinator>) -> Result<()> {
|
|||
.route("/destroy/{name}", post(post_destroy))
|
||||
.route("/kill/{name}", post(post_kill))
|
||||
.route("/restart/{name}", post(post_restart))
|
||||
.route("/start/{name}", post(post_start))
|
||||
.route("/rebuild/{name}", post(post_rebuild))
|
||||
.route("/update-all", post(post_update_all))
|
||||
.route("/request-spawn", post(post_request_spawn))
|
||||
|
|
@ -313,6 +314,11 @@ async fn post_kill(State(state): State<AppState>, AxumPath(name): AxumPath<Strin
|
|||
match lifecycle::kill(&logical).await {
|
||||
Ok(()) => {
|
||||
state.coord.unregister_agent(&logical);
|
||||
state
|
||||
.coord
|
||||
.notify_manager(&hive_sh4re::HelperEvent::Killed {
|
||||
agent: logical.clone(),
|
||||
});
|
||||
Redirect::to("/").into_response()
|
||||
}
|
||||
Err(e) => error_response(&format!("kill {logical} failed: {e:#}")),
|
||||
|
|
@ -327,6 +333,14 @@ async fn post_restart(State(_state): State<AppState>, AxumPath(name): AxumPath<S
|
|||
}
|
||||
}
|
||||
|
||||
async fn post_start(State(_state): State<AppState>, AxumPath(name): AxumPath<String>) -> Response {
|
||||
let logical = strip_container_prefix(&name);
|
||||
match lifecycle::start(&logical).await {
|
||||
Ok(()) => Redirect::to("/").into_response(),
|
||||
Err(e) => error_response(&format!("start {logical} failed: {e:#}")),
|
||||
}
|
||||
}
|
||||
|
||||
async fn post_update_all(State(state): State<AppState>) -> Response {
|
||||
let Some(current_rev) = crate::auto_update::current_flake_rev(&state.coord.hyperhive_flake)
|
||||
else {
|
||||
|
|
|
|||
|
|
@ -65,7 +65,7 @@ async fn dispatch(req: &HostRequest, coord: Arc<Coordinator>) -> HostResponse {
|
|||
let proposed_dir = Coordinator::agent_proposed_dir(name);
|
||||
let applied_dir = Coordinator::agent_applied_dir(name);
|
||||
let claude_dir = Coordinator::agent_claude_dir(name);
|
||||
if let Err(e) = lifecycle::spawn(
|
||||
match lifecycle::spawn(
|
||||
name,
|
||||
&coord.hyperhive_flake,
|
||||
&agent_dir,
|
||||
|
|
@ -76,9 +76,23 @@ async fn dispatch(req: &HostRequest, coord: Arc<Coordinator>) -> HostResponse {
|
|||
)
|
||||
.await
|
||||
{
|
||||
// Roll back socket registration if container creation failed.
|
||||
coord.unregister_agent(name);
|
||||
return Err(e);
|
||||
Ok(()) => {
|
||||
coord.notify_manager(&hive_sh4re::HelperEvent::Spawned {
|
||||
agent: name.clone(),
|
||||
ok: true,
|
||||
note: None,
|
||||
});
|
||||
}
|
||||
Err(e) => {
|
||||
// Roll back socket registration if container creation failed.
|
||||
coord.unregister_agent(name);
|
||||
coord.notify_manager(&hive_sh4re::HelperEvent::Spawned {
|
||||
agent: name.clone(),
|
||||
ok: false,
|
||||
note: Some(format!("{e:#}")),
|
||||
});
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
HostResponse::success()
|
||||
}
|
||||
|
|
@ -94,6 +108,9 @@ async fn dispatch(req: &HostRequest, coord: Arc<Coordinator>) -> HostResponse {
|
|||
tracing::info!(%name, "kill");
|
||||
lifecycle::kill(name).await?;
|
||||
coord.unregister_agent(name);
|
||||
coord.notify_manager(&hive_sh4re::HelperEvent::Killed {
|
||||
agent: name.clone(),
|
||||
});
|
||||
HostResponse::success()
|
||||
}
|
||||
HostRequest::Destroy { name } => {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue