From 315d4289c7b317febfe3cd995cafb2dbf0531e71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?m=C3=BCde?= Date: Fri, 15 May 2026 23:00:01 +0200 Subject: [PATCH] actions: tag-driven approve(ApplyCommit) flow run_apply_commit walks the approval through the tag state machine in applied: approved/ + building/ stamped before the build, then git read-tree --reset to proposal/ populates the working dir without moving HEAD. on rebuild success deployed/ is planted and refs/heads/main fast- forwards to the proposal. on failure failed/ is annotated with the build error and the working tree resets back to main so the agent stays evaluable. helper events Rebuilt + ApprovalResolved both carry the terminal tag so the manager can git-show the exact tree (and read the failure note from an annotated tag) against its read-only applied.git mount. finish_approval grows a terminal_tag param; spawn path passes None. lifecycle::apply_commit deleted. --- hive-c0re/src/actions.rs | 127 ++++++++++++++++++++++++++++++++----- hive-c0re/src/lifecycle.rs | 18 ------ 2 files changed, 110 insertions(+), 35 deletions(-) diff --git a/hive-c0re/src/actions.rs b/hive-c0re/src/actions.rs index bd90f0e..6a20c91 100644 --- a/hive-c0re/src/actions.rs +++ b/hive-c0re/src/actions.rs @@ -41,21 +41,16 @@ pub async fn approve(coord: Arc, id: i64) -> Result<()> { match approval.kind { ApprovalKind::ApplyCommit => { - let result = async { - lifecycle::apply_commit(&applied_dir, &proposed_dir, &approval.commit_ref).await?; - lifecycle::rebuild( - &approval.agent, - &coord.hyperhive_flake, - &agent_dir, - &applied_dir, - &claude_dir, - ¬es_dir, - coord.dashboard_port, - ) - .await - } + let (result, terminal_tag) = run_apply_commit( + &coord, + &approval, + &agent_dir, + &applied_dir, + &claude_dir, + ¬es_dir, + ) .await; - finish_approval(&coord, &approval, result) + finish_approval(&coord, &approval, result, terminal_tag) } ApprovalKind::Spawn => { // Run the spawn in the background so the approve POST returns @@ -77,7 +72,7 @@ pub async fn approve(coord: Arc, id: i64) -> Result<()> { ) .await; coord_bg.clear_transient(&agent_bg); - if let Err(e) = finish_approval(&coord_bg, &approval_bg, result) { + if let Err(e) = finish_approval(&coord_bg, &approval_bg, result, None) { tracing::warn!(agent = %agent_bg, error = ?e, "spawn approval failed"); } }); @@ -90,6 +85,7 @@ fn finish_approval( coord: &Coordinator, approval: &hive_sh4re::Approval, result: Result<()>, + terminal_tag: Option, ) -> Result<()> { let (status, note, ok) = match &result { Ok(()) => (ApprovalStatus::Approved, None, true), @@ -106,7 +102,7 @@ fn finish_approval( status, note: note.clone(), sha: approval.fetched_sha.clone(), - tag: None, + tag: terminal_tag.clone(), }); // For spawn/rebuild approvals, also surface the underlying action so // the manager knows whether the container actually came up. The @@ -125,12 +121,109 @@ fn finish_approval( ok, note, sha: approval.fetched_sha.clone(), - tag: None, + tag: terminal_tag, }), } result } +/// Tag-driven ApplyCommit handler. Walks the approval through the tag +/// state machine documented in `docs/approvals.md`: stamp `approved/` +/// + `building/` first so the audit trail captures intent, then +/// drop the candidate tree into the working dir without moving HEAD, +/// run the rebuild, and either fast-forward `applied/main` to the +/// proposal commit on success (`deployed/`) or annotate +/// `failed/` with the build error and reset the working tree back +/// to the last known-good main. main never advances on a failed +/// build, so a crash-and-recover doesn't leave the agent pointing at +/// a tree it can't evaluate. +async fn run_apply_commit( + coord: &Arc, + approval: &hive_sh4re::Approval, + agent_dir: &std::path::Path, + applied_dir: &std::path::Path, + claude_dir: &std::path::Path, + notes_dir: &std::path::Path, +) -> (Result<()>, Option) { + let id = approval.id; + let proposal_ref = format!("refs/tags/proposal/{id}"); + // Defensive: submit-time should have planted proposal/, but if + // the row was migrated from an older schema or the tag got pruned + // we fail early with a clear note rather than building a stale + // tree. + if let Err(e) = lifecycle::git_rev_parse(applied_dir, &proposal_ref).await { + return ( + Err(anyhow::anyhow!( + "missing proposal tag {proposal_ref}: {e:#}" + )), + None, + ); + } + if let Err(e) = lifecycle::git_tag(applied_dir, &format!("approved/{id}"), &proposal_ref).await + { + return (Err(anyhow::anyhow!("plant approved/{id}: {e:#}")), None); + } + if let Err(e) = lifecycle::git_tag(applied_dir, &format!("building/{id}"), &proposal_ref).await + { + return (Err(anyhow::anyhow!("plant building/{id}: {e:#}")), None); + } + if let Err(e) = lifecycle::git_read_tree_reset(applied_dir, &proposal_ref).await { + return ( + Err(anyhow::anyhow!("read-tree to {proposal_ref}: {e:#}")), + None, + ); + } + + let rebuild_result = lifecycle::rebuild( + &approval.agent, + &coord.hyperhive_flake, + agent_dir, + applied_dir, + claude_dir, + notes_dir, + coord.dashboard_port, + ) + .await; + + match rebuild_result { + Ok(()) => { + let tag = format!("deployed/{id}"); + if let Err(e) = lifecycle::git_tag(applied_dir, &tag, &proposal_ref).await { + tracing::warn!(agent = %approval.agent, %id, error = ?e, "plant deployed tag failed"); + } + if let Err(e) = + lifecycle::git_update_ref(applied_dir, "refs/heads/main", &proposal_ref).await + { + // Working tree already matches proposal/, but main + // didn't advance — surface as a build failure so the + // operator notices the desync. + return ( + Err(anyhow::anyhow!("ff main to {proposal_ref}: {e:#}")), + Some(tag), + ); + } + (Ok(()), Some(tag)) + } + Err(e) => { + let tag = format!("failed/{id}"); + let body = format!("{e:#}"); + if let Err(te) = + lifecycle::git_tag_annotated(applied_dir, &tag, &proposal_ref, &body).await + { + tracing::warn!(agent = %approval.agent, %id, error = ?te, "annotate failed tag failed"); + } + // Roll working tree back to last known-good main so the + // on-disk state matches what nixos-container last + // successfully built. main hasn't moved, so this is just + // a content reset. + if let Err(re) = lifecycle::git_read_tree_reset(applied_dir, "refs/heads/main").await { + tracing::warn!(agent = %approval.agent, %id, error = ?re, "rollback read-tree failed"); + } + (Err(e), Some(tag)) + } + } +} + /// Tear down a sub-agent container. By default this is non-destructive to /// persistent state: the proposed/applied config repos and the Claude /// credentials dir under `/var/lib/hyperhive/{agents,applied}//` are diff --git a/hive-c0re/src/lifecycle.rs b/hive-c0re/src/lifecycle.rs index 15628bd..b035dd6 100644 --- a/hive-c0re/src/lifecycle.rs +++ b/hive-c0re/src/lifecycle.rs @@ -293,24 +293,6 @@ pub async fn setup_proposed(proposed_dir: &Path, name: &str) -> Result<()> { Ok(()) } -/// Placeholder for the old file-copy apply path; the real -/// tag-driven flow lives in `actions::approve` and gets wired up -/// in a follow-up commit. Leaving this function as a hard error -/// keeps `actions.rs` compiling while the rewrite lands; an -/// ApplyCommit approval that races the deploy will surface a -/// clear failure note instead of silently no-op'ing. -#[allow(unused_variables)] -pub async fn apply_commit( - _applied_dir: &Path, - _proposed_dir: &Path, - _commit_ref: &str, -) -> Result<()> { - bail!( - "apply_commit not yet wired up to the tag-driven flow; \ - approve again after the next deploy lands" - ) -} - /// Set up the applied repo. Two responsibilities: /// - First-spawn only: init the repo, pull proposed's initial commit /// in via `git fetch`, tag it `deployed/0`. This is the *only* time