actions: tag-driven approve(ApplyCommit) flow
run_apply_commit walks the approval through the tag state machine in applied: approved/<id> + building/<id> stamped before the build, then git read-tree --reset to proposal/<id> populates the working dir without moving HEAD. on rebuild success deployed/<id> is planted and refs/heads/main fast- forwards to the proposal. on failure failed/<id> is annotated with the build error and the working tree resets back to main so the agent stays evaluable. helper events Rebuilt + ApprovalResolved both carry the terminal tag so the manager can git-show the exact tree (and read the failure note from an annotated tag) against its read-only applied.git mount. finish_approval grows a terminal_tag param; spawn path passes None. lifecycle::apply_commit deleted.
This commit is contained in:
parent
35b0edaf27
commit
315d4289c7
2 changed files with 110 additions and 35 deletions
|
|
@ -41,21 +41,16 @@ pub async fn approve(coord: Arc<Coordinator>, id: i64) -> Result<()> {
|
|||
|
||||
match approval.kind {
|
||||
ApprovalKind::ApplyCommit => {
|
||||
let result = async {
|
||||
lifecycle::apply_commit(&applied_dir, &proposed_dir, &approval.commit_ref).await?;
|
||||
lifecycle::rebuild(
|
||||
&approval.agent,
|
||||
&coord.hyperhive_flake,
|
||||
&agent_dir,
|
||||
&applied_dir,
|
||||
&claude_dir,
|
||||
¬es_dir,
|
||||
coord.dashboard_port,
|
||||
)
|
||||
.await
|
||||
}
|
||||
let (result, terminal_tag) = run_apply_commit(
|
||||
&coord,
|
||||
&approval,
|
||||
&agent_dir,
|
||||
&applied_dir,
|
||||
&claude_dir,
|
||||
¬es_dir,
|
||||
)
|
||||
.await;
|
||||
finish_approval(&coord, &approval, result)
|
||||
finish_approval(&coord, &approval, result, terminal_tag)
|
||||
}
|
||||
ApprovalKind::Spawn => {
|
||||
// Run the spawn in the background so the approve POST returns
|
||||
|
|
@ -77,7 +72,7 @@ pub async fn approve(coord: Arc<Coordinator>, id: i64) -> Result<()> {
|
|||
)
|
||||
.await;
|
||||
coord_bg.clear_transient(&agent_bg);
|
||||
if let Err(e) = finish_approval(&coord_bg, &approval_bg, result) {
|
||||
if let Err(e) = finish_approval(&coord_bg, &approval_bg, result, None) {
|
||||
tracing::warn!(agent = %agent_bg, error = ?e, "spawn approval failed");
|
||||
}
|
||||
});
|
||||
|
|
@ -90,6 +85,7 @@ fn finish_approval(
|
|||
coord: &Coordinator,
|
||||
approval: &hive_sh4re::Approval,
|
||||
result: Result<()>,
|
||||
terminal_tag: Option<String>,
|
||||
) -> Result<()> {
|
||||
let (status, note, ok) = match &result {
|
||||
Ok(()) => (ApprovalStatus::Approved, None, true),
|
||||
|
|
@ -106,7 +102,7 @@ fn finish_approval(
|
|||
status,
|
||||
note: note.clone(),
|
||||
sha: approval.fetched_sha.clone(),
|
||||
tag: None,
|
||||
tag: terminal_tag.clone(),
|
||||
});
|
||||
// For spawn/rebuild approvals, also surface the underlying action so
|
||||
// the manager knows whether the container actually came up. The
|
||||
|
|
@ -125,12 +121,109 @@ fn finish_approval(
|
|||
ok,
|
||||
note,
|
||||
sha: approval.fetched_sha.clone(),
|
||||
tag: None,
|
||||
tag: terminal_tag,
|
||||
}),
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
/// Tag-driven ApplyCommit handler. Walks the approval through the tag
|
||||
/// state machine documented in `docs/approvals.md`: stamp `approved/<id>`
|
||||
/// + `building/<id>` first so the audit trail captures intent, then
|
||||
/// drop the candidate tree into the working dir without moving HEAD,
|
||||
/// run the rebuild, and either fast-forward `applied/main` to the
|
||||
/// proposal commit on success (`deployed/<id>`) or annotate
|
||||
/// `failed/<id>` with the build error and reset the working tree back
|
||||
/// to the last known-good main. main never advances on a failed
|
||||
/// build, so a crash-and-recover doesn't leave the agent pointing at
|
||||
/// a tree it can't evaluate.
|
||||
async fn run_apply_commit(
|
||||
coord: &Arc<Coordinator>,
|
||||
approval: &hive_sh4re::Approval,
|
||||
agent_dir: &std::path::Path,
|
||||
applied_dir: &std::path::Path,
|
||||
claude_dir: &std::path::Path,
|
||||
notes_dir: &std::path::Path,
|
||||
) -> (Result<()>, Option<String>) {
|
||||
let id = approval.id;
|
||||
let proposal_ref = format!("refs/tags/proposal/{id}");
|
||||
// Defensive: submit-time should have planted proposal/<id>, but if
|
||||
// the row was migrated from an older schema or the tag got pruned
|
||||
// we fail early with a clear note rather than building a stale
|
||||
// tree.
|
||||
if let Err(e) = lifecycle::git_rev_parse(applied_dir, &proposal_ref).await {
|
||||
return (
|
||||
Err(anyhow::anyhow!(
|
||||
"missing proposal tag {proposal_ref}: {e:#}"
|
||||
)),
|
||||
None,
|
||||
);
|
||||
}
|
||||
if let Err(e) = lifecycle::git_tag(applied_dir, &format!("approved/{id}"), &proposal_ref).await
|
||||
{
|
||||
return (Err(anyhow::anyhow!("plant approved/{id}: {e:#}")), None);
|
||||
}
|
||||
if let Err(e) = lifecycle::git_tag(applied_dir, &format!("building/{id}"), &proposal_ref).await
|
||||
{
|
||||
return (Err(anyhow::anyhow!("plant building/{id}: {e:#}")), None);
|
||||
}
|
||||
if let Err(e) = lifecycle::git_read_tree_reset(applied_dir, &proposal_ref).await {
|
||||
return (
|
||||
Err(anyhow::anyhow!("read-tree to {proposal_ref}: {e:#}")),
|
||||
None,
|
||||
);
|
||||
}
|
||||
|
||||
let rebuild_result = lifecycle::rebuild(
|
||||
&approval.agent,
|
||||
&coord.hyperhive_flake,
|
||||
agent_dir,
|
||||
applied_dir,
|
||||
claude_dir,
|
||||
notes_dir,
|
||||
coord.dashboard_port,
|
||||
)
|
||||
.await;
|
||||
|
||||
match rebuild_result {
|
||||
Ok(()) => {
|
||||
let tag = format!("deployed/{id}");
|
||||
if let Err(e) = lifecycle::git_tag(applied_dir, &tag, &proposal_ref).await {
|
||||
tracing::warn!(agent = %approval.agent, %id, error = ?e, "plant deployed tag failed");
|
||||
}
|
||||
if let Err(e) =
|
||||
lifecycle::git_update_ref(applied_dir, "refs/heads/main", &proposal_ref).await
|
||||
{
|
||||
// Working tree already matches proposal/<id>, but main
|
||||
// didn't advance — surface as a build failure so the
|
||||
// operator notices the desync.
|
||||
return (
|
||||
Err(anyhow::anyhow!("ff main to {proposal_ref}: {e:#}")),
|
||||
Some(tag),
|
||||
);
|
||||
}
|
||||
(Ok(()), Some(tag))
|
||||
}
|
||||
Err(e) => {
|
||||
let tag = format!("failed/{id}");
|
||||
let body = format!("{e:#}");
|
||||
if let Err(te) =
|
||||
lifecycle::git_tag_annotated(applied_dir, &tag, &proposal_ref, &body).await
|
||||
{
|
||||
tracing::warn!(agent = %approval.agent, %id, error = ?te, "annotate failed tag failed");
|
||||
}
|
||||
// Roll working tree back to last known-good main so the
|
||||
// on-disk state matches what nixos-container last
|
||||
// successfully built. main hasn't moved, so this is just
|
||||
// a content reset.
|
||||
if let Err(re) = lifecycle::git_read_tree_reset(applied_dir, "refs/heads/main").await {
|
||||
tracing::warn!(agent = %approval.agent, %id, error = ?re, "rollback read-tree failed");
|
||||
}
|
||||
(Err(e), Some(tag))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Tear down a sub-agent container. By default this is non-destructive to
|
||||
/// persistent state: the proposed/applied config repos and the Claude
|
||||
/// credentials dir under `/var/lib/hyperhive/{agents,applied}/<name>/` are
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue