apply_commit handles first-time spawns, request_spawn deprecated

This commit is contained in:
damocles 2026-05-22 09:20:50 +02:00
parent 6974634326
commit 66f1568e8f
6 changed files with 166 additions and 34 deletions

View file

@ -41,7 +41,7 @@ pub async fn approve(coord: Arc<Coordinator>, id: i64) -> Result<()> {
match approval.kind {
ApprovalKind::ApplyCommit => {
let (result, terminal_tag) = run_apply_commit(
let (result, terminal_tag, is_first_spawn) = run_apply_commit(
&coord,
&approval,
&agent_dir,
@ -55,7 +55,21 @@ pub async fn approve(coord: Arc<Coordinator>, id: i64) -> Result<()> {
if let Err(e) = crate::forge::push_config(&approval.agent).await {
tracing::warn!(agent = %approval.agent, error = ?e, "forge: push_config after apply failed");
}
finish_approval(&coord, &approval, result, terminal_tag)
if is_first_spawn && result.is_ok() {
// First-spawn bookkeeping: create the per-agent forge user
// and mirror the applied repo into agent-configs/<n>.
if let Err(e) = crate::forge::ensure_user_for(&approval.agent).await {
tracing::warn!(agent = %approval.agent, error = ?e, "forge: ensure_user after first spawn failed");
}
if let Err(e) = crate::forge::ensure_config_repo(&approval.agent).await {
tracing::warn!(agent = %approval.agent, error = ?e, "forge: ensure_config_repo after first spawn failed");
}
// New container row appeared — rescan so the dashboard
// reflects the post-spawn state without a manual refetch.
coord.rescan_containers_and_emit().await;
crate::dashboard::emit_tombstones_snapshot(&coord).await;
}
finish_approval(&coord, &approval, result, terminal_tag, is_first_spawn)
}
ApprovalKind::InitConfig => {
// Seed the proposed config repo. Runs synchronously — it's just
@ -67,7 +81,7 @@ pub async fn approve(coord: Arc<Coordinator>, id: i64) -> Result<()> {
Ok(())
}
.await;
finish_approval(&coord, &approval, result, None)
finish_approval(&coord, &approval, result, None, false)
}
ApprovalKind::UpdateMetaInputs => {
// Decode the inputs from the commit_ref field (stored as JSON
@ -117,7 +131,7 @@ pub async fn approve(coord: Arc<Coordinator>, id: i64) -> Result<()> {
tracing::warn!(agent = %agent_bg, error = ?e, "forge: push_config after spawn failed");
}
}
if let Err(e) = finish_approval(&coord_bg, &approval_bg, result, None) {
if let Err(e) = finish_approval(&coord_bg, &approval_bg, result, None, false) {
tracing::warn!(agent = %agent_bg, error = ?e, "spawn approval failed");
}
// New container row appeared (or didn't, on failure
@ -139,6 +153,7 @@ fn finish_approval(
approval: &hive_sh4re::Approval,
result: Result<()>,
terminal_tag: Option<String>,
is_first_spawn: bool,
) -> Result<()> {
let (status, note, ok) = match &result {
Ok(()) => (ApprovalStatus::Approved, None, true),
@ -201,6 +216,14 @@ fn finish_approval(
note,
sha: approval.fetched_sha.clone(),
}),
ApprovalKind::ApplyCommit if is_first_spawn => {
coord.notify_manager(&HelperEvent::Spawned {
agent: approval.agent.clone(),
ok,
note,
sha: approval.fetched_sha.clone(),
});
}
ApprovalKind::ApplyCommit => coord.notify_manager(&HelperEvent::Rebuilt {
agent: approval.agent.clone(),
ok,
@ -232,10 +255,14 @@ async fn run_apply_commit(
applied_dir: &std::path::Path,
claude_dir: &std::path::Path,
notes_dir: &std::path::Path,
) -> (Result<()>, Option<String>) {
) -> (Result<()>, Option<String>, bool) {
let id = approval.id;
let proposal_ref = format!("refs/tags/proposal/{id}");
// Detect first spawn before we touch anything so we can branch on it
// throughout this function.
let is_first_spawn = !lifecycle::container_exists(&approval.agent).await;
// Defensive: submit-time should have planted proposal/<id>, but if
// the row was migrated from an older schema or the tag got pruned
// we fail early with a clear note rather than building a stale
@ -246,6 +273,7 @@ async fn run_apply_commit(
"missing proposal tag {proposal_ref}: {e:#}"
)),
None,
is_first_spawn,
);
}
@ -253,16 +281,30 @@ async fn run_apply_commit(
// (and the meta lock indirectly) back if the build fails.
let prev_main_sha = match lifecycle::git_rev_parse(applied_dir, "refs/heads/main").await {
Ok(s) => s,
Err(e) => return (Err(anyhow::anyhow!("read applied/main: {e:#}")), None),
Err(e) => {
return (
Err(anyhow::anyhow!("read applied/main: {e:#}")),
None,
is_first_spawn,
)
}
};
if let Err(e) = lifecycle::git_tag(applied_dir, &format!("approved/{id}"), &proposal_ref).await
{
return (Err(anyhow::anyhow!("plant approved/{id}: {e:#}")), None);
return (
Err(anyhow::anyhow!("plant approved/{id}: {e:#}")),
None,
is_first_spawn,
);
}
if let Err(e) = lifecycle::git_tag(applied_dir, &format!("building/{id}"), &proposal_ref).await
{
return (Err(anyhow::anyhow!("plant building/{id}: {e:#}")), None);
return (
Err(anyhow::anyhow!("plant building/{id}: {e:#}")),
None,
is_first_spawn,
);
}
// Fast-forward applied/main to proposal/<id> + sync the working
@ -274,23 +316,70 @@ async fn run_apply_commit(
return (
Err(anyhow::anyhow!("ff main to {proposal_ref}: {e:#}")),
None,
is_first_spawn,
);
}
if let Err(e) = lifecycle::git_read_tree_reset(applied_dir, "refs/heads/main").await {
// main is ahead; working tree didn't sync. Roll main back to
// keep the two consistent before bailing.
let _ = lifecycle::git_update_ref(applied_dir, "refs/heads/main", &prev_main_sha).await;
return (Err(anyhow::anyhow!("read-tree to main: {e:#}")), None);
return (
Err(anyhow::anyhow!("read-tree to main: {e:#}")),
None,
is_first_spawn,
);
}
// First spawn: sync_agents must add this agent to the meta flake
// before prepare_deploy can update its input lock (which won't
// exist yet if this is the agent's first deploy).
if is_first_spawn {
let agents = match lifecycle::agents_for_meta_listing_with(&approval.agent).await {
Ok(a) => a,
Err(e) => {
let _ =
lifecycle::git_update_ref(applied_dir, "refs/heads/main", &prev_main_sha)
.await;
let _ = lifecycle::git_read_tree_reset(applied_dir, "refs/heads/main").await;
return (
Err(anyhow::anyhow!("agents_for_meta_listing_with: {e:#}")),
None,
is_first_spawn,
);
}
};
if let Err(e) = crate::meta::sync_agents(
&coord.hyperhive_flake,
coord.dashboard_port,
&coord.operator_pronouns,
&coord.context_window_tokens,
&agents,
)
.await
{
let _ =
lifecycle::git_update_ref(applied_dir, "refs/heads/main", &prev_main_sha).await;
let _ = lifecycle::git_read_tree_reset(applied_dir, "refs/heads/main").await;
return (
Err(anyhow::anyhow!("meta sync_agents for first spawn: {e:#}")),
None,
is_first_spawn,
);
}
}
// Phase 1 of the meta two-phase deploy: relock without committing.
if let Err(e) = crate::meta::prepare_deploy(&approval.agent).await {
let _ = lifecycle::git_update_ref(applied_dir, "refs/heads/main", &prev_main_sha).await;
let _ = lifecycle::git_read_tree_reset(applied_dir, "refs/heads/main").await;
return (Err(anyhow::anyhow!("meta prepare_deploy: {e:#}")), None);
return (
Err(anyhow::anyhow!("meta prepare_deploy: {e:#}")),
None,
is_first_spawn,
);
}
// Container-level rebuild against meta#<name>.
// Container-level rebuild (or first-time create) against meta#<name>.
let build_result = lifecycle::rebuild_no_meta(
&approval.agent,
agent_dir,
@ -324,7 +413,7 @@ async fn run_apply_commit(
// proposal, agent picks up where it left off with the
// new env / packages.
coord.kick_agent(&approval.agent, "config update applied");
(Ok(()), Some(tag))
(Ok(()), Some(tag), is_first_spawn)
}
Err(e) => {
let tag = format!("failed/{id}");
@ -350,7 +439,7 @@ async fn run_apply_commit(
tracing::warn!(agent = %approval.agent, %id, error = ?ae, "meta abort_deploy failed");
}
let _ = coord;
(Err(e), Some(tag))
(Err(e), Some(tag), is_first_spawn)
}
}
}