actions: tag-driven approve(ApplyCommit) flow
run_apply_commit walks the approval through the tag state machine in applied: approved/<id> + building/<id> stamped before the build, then git read-tree --reset to proposal/<id> populates the working dir without moving HEAD. on rebuild success deployed/<id> is planted and refs/heads/main fast- forwards to the proposal. on failure failed/<id> is annotated with the build error and the working tree resets back to main so the agent stays evaluable. helper events Rebuilt + ApprovalResolved both carry the terminal tag so the manager can git-show the exact tree (and read the failure note from an annotated tag) against its read-only applied.git mount. finish_approval grows a terminal_tag param; spawn path passes None. lifecycle::apply_commit deleted.
This commit is contained in:
parent
35b0edaf27
commit
315d4289c7
2 changed files with 110 additions and 35 deletions
|
|
@ -41,21 +41,16 @@ pub async fn approve(coord: Arc<Coordinator>, id: i64) -> Result<()> {
|
||||||
|
|
||||||
match approval.kind {
|
match approval.kind {
|
||||||
ApprovalKind::ApplyCommit => {
|
ApprovalKind::ApplyCommit => {
|
||||||
let result = async {
|
let (result, terminal_tag) = run_apply_commit(
|
||||||
lifecycle::apply_commit(&applied_dir, &proposed_dir, &approval.commit_ref).await?;
|
&coord,
|
||||||
lifecycle::rebuild(
|
&approval,
|
||||||
&approval.agent,
|
|
||||||
&coord.hyperhive_flake,
|
|
||||||
&agent_dir,
|
&agent_dir,
|
||||||
&applied_dir,
|
&applied_dir,
|
||||||
&claude_dir,
|
&claude_dir,
|
||||||
¬es_dir,
|
¬es_dir,
|
||||||
coord.dashboard_port,
|
|
||||||
)
|
)
|
||||||
.await
|
|
||||||
}
|
|
||||||
.await;
|
.await;
|
||||||
finish_approval(&coord, &approval, result)
|
finish_approval(&coord, &approval, result, terminal_tag)
|
||||||
}
|
}
|
||||||
ApprovalKind::Spawn => {
|
ApprovalKind::Spawn => {
|
||||||
// Run the spawn in the background so the approve POST returns
|
// Run the spawn in the background so the approve POST returns
|
||||||
|
|
@ -77,7 +72,7 @@ pub async fn approve(coord: Arc<Coordinator>, id: i64) -> Result<()> {
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
coord_bg.clear_transient(&agent_bg);
|
coord_bg.clear_transient(&agent_bg);
|
||||||
if let Err(e) = finish_approval(&coord_bg, &approval_bg, result) {
|
if let Err(e) = finish_approval(&coord_bg, &approval_bg, result, None) {
|
||||||
tracing::warn!(agent = %agent_bg, error = ?e, "spawn approval failed");
|
tracing::warn!(agent = %agent_bg, error = ?e, "spawn approval failed");
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
@ -90,6 +85,7 @@ fn finish_approval(
|
||||||
coord: &Coordinator,
|
coord: &Coordinator,
|
||||||
approval: &hive_sh4re::Approval,
|
approval: &hive_sh4re::Approval,
|
||||||
result: Result<()>,
|
result: Result<()>,
|
||||||
|
terminal_tag: Option<String>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let (status, note, ok) = match &result {
|
let (status, note, ok) = match &result {
|
||||||
Ok(()) => (ApprovalStatus::Approved, None, true),
|
Ok(()) => (ApprovalStatus::Approved, None, true),
|
||||||
|
|
@ -106,7 +102,7 @@ fn finish_approval(
|
||||||
status,
|
status,
|
||||||
note: note.clone(),
|
note: note.clone(),
|
||||||
sha: approval.fetched_sha.clone(),
|
sha: approval.fetched_sha.clone(),
|
||||||
tag: None,
|
tag: terminal_tag.clone(),
|
||||||
});
|
});
|
||||||
// For spawn/rebuild approvals, also surface the underlying action so
|
// For spawn/rebuild approvals, also surface the underlying action so
|
||||||
// the manager knows whether the container actually came up. The
|
// the manager knows whether the container actually came up. The
|
||||||
|
|
@ -125,12 +121,109 @@ fn finish_approval(
|
||||||
ok,
|
ok,
|
||||||
note,
|
note,
|
||||||
sha: approval.fetched_sha.clone(),
|
sha: approval.fetched_sha.clone(),
|
||||||
tag: None,
|
tag: terminal_tag,
|
||||||
}),
|
}),
|
||||||
}
|
}
|
||||||
result
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Tag-driven ApplyCommit handler. Walks the approval through the tag
|
||||||
|
/// state machine documented in `docs/approvals.md`: stamp `approved/<id>`
|
||||||
|
/// + `building/<id>` first so the audit trail captures intent, then
|
||||||
|
/// drop the candidate tree into the working dir without moving HEAD,
|
||||||
|
/// run the rebuild, and either fast-forward `applied/main` to the
|
||||||
|
/// proposal commit on success (`deployed/<id>`) or annotate
|
||||||
|
/// `failed/<id>` with the build error and reset the working tree back
|
||||||
|
/// to the last known-good main. main never advances on a failed
|
||||||
|
/// build, so a crash-and-recover doesn't leave the agent pointing at
|
||||||
|
/// a tree it can't evaluate.
|
||||||
|
async fn run_apply_commit(
|
||||||
|
coord: &Arc<Coordinator>,
|
||||||
|
approval: &hive_sh4re::Approval,
|
||||||
|
agent_dir: &std::path::Path,
|
||||||
|
applied_dir: &std::path::Path,
|
||||||
|
claude_dir: &std::path::Path,
|
||||||
|
notes_dir: &std::path::Path,
|
||||||
|
) -> (Result<()>, Option<String>) {
|
||||||
|
let id = approval.id;
|
||||||
|
let proposal_ref = format!("refs/tags/proposal/{id}");
|
||||||
|
// Defensive: submit-time should have planted proposal/<id>, but if
|
||||||
|
// the row was migrated from an older schema or the tag got pruned
|
||||||
|
// we fail early with a clear note rather than building a stale
|
||||||
|
// tree.
|
||||||
|
if let Err(e) = lifecycle::git_rev_parse(applied_dir, &proposal_ref).await {
|
||||||
|
return (
|
||||||
|
Err(anyhow::anyhow!(
|
||||||
|
"missing proposal tag {proposal_ref}: {e:#}"
|
||||||
|
)),
|
||||||
|
None,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if let Err(e) = lifecycle::git_tag(applied_dir, &format!("approved/{id}"), &proposal_ref).await
|
||||||
|
{
|
||||||
|
return (Err(anyhow::anyhow!("plant approved/{id}: {e:#}")), None);
|
||||||
|
}
|
||||||
|
if let Err(e) = lifecycle::git_tag(applied_dir, &format!("building/{id}"), &proposal_ref).await
|
||||||
|
{
|
||||||
|
return (Err(anyhow::anyhow!("plant building/{id}: {e:#}")), None);
|
||||||
|
}
|
||||||
|
if let Err(e) = lifecycle::git_read_tree_reset(applied_dir, &proposal_ref).await {
|
||||||
|
return (
|
||||||
|
Err(anyhow::anyhow!("read-tree to {proposal_ref}: {e:#}")),
|
||||||
|
None,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
let rebuild_result = lifecycle::rebuild(
|
||||||
|
&approval.agent,
|
||||||
|
&coord.hyperhive_flake,
|
||||||
|
agent_dir,
|
||||||
|
applied_dir,
|
||||||
|
claude_dir,
|
||||||
|
notes_dir,
|
||||||
|
coord.dashboard_port,
|
||||||
|
)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
match rebuild_result {
|
||||||
|
Ok(()) => {
|
||||||
|
let tag = format!("deployed/{id}");
|
||||||
|
if let Err(e) = lifecycle::git_tag(applied_dir, &tag, &proposal_ref).await {
|
||||||
|
tracing::warn!(agent = %approval.agent, %id, error = ?e, "plant deployed tag failed");
|
||||||
|
}
|
||||||
|
if let Err(e) =
|
||||||
|
lifecycle::git_update_ref(applied_dir, "refs/heads/main", &proposal_ref).await
|
||||||
|
{
|
||||||
|
// Working tree already matches proposal/<id>, but main
|
||||||
|
// didn't advance — surface as a build failure so the
|
||||||
|
// operator notices the desync.
|
||||||
|
return (
|
||||||
|
Err(anyhow::anyhow!("ff main to {proposal_ref}: {e:#}")),
|
||||||
|
Some(tag),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
(Ok(()), Some(tag))
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
let tag = format!("failed/{id}");
|
||||||
|
let body = format!("{e:#}");
|
||||||
|
if let Err(te) =
|
||||||
|
lifecycle::git_tag_annotated(applied_dir, &tag, &proposal_ref, &body).await
|
||||||
|
{
|
||||||
|
tracing::warn!(agent = %approval.agent, %id, error = ?te, "annotate failed tag failed");
|
||||||
|
}
|
||||||
|
// Roll working tree back to last known-good main so the
|
||||||
|
// on-disk state matches what nixos-container last
|
||||||
|
// successfully built. main hasn't moved, so this is just
|
||||||
|
// a content reset.
|
||||||
|
if let Err(re) = lifecycle::git_read_tree_reset(applied_dir, "refs/heads/main").await {
|
||||||
|
tracing::warn!(agent = %approval.agent, %id, error = ?re, "rollback read-tree failed");
|
||||||
|
}
|
||||||
|
(Err(e), Some(tag))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Tear down a sub-agent container. By default this is non-destructive to
|
/// Tear down a sub-agent container. By default this is non-destructive to
|
||||||
/// persistent state: the proposed/applied config repos and the Claude
|
/// persistent state: the proposed/applied config repos and the Claude
|
||||||
/// credentials dir under `/var/lib/hyperhive/{agents,applied}/<name>/` are
|
/// credentials dir under `/var/lib/hyperhive/{agents,applied}/<name>/` are
|
||||||
|
|
|
||||||
|
|
@ -293,24 +293,6 @@ pub async fn setup_proposed(proposed_dir: &Path, name: &str) -> Result<()> {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Placeholder for the old file-copy apply path; the real
|
|
||||||
/// tag-driven flow lives in `actions::approve` and gets wired up
|
|
||||||
/// in a follow-up commit. Leaving this function as a hard error
|
|
||||||
/// keeps `actions.rs` compiling while the rewrite lands; an
|
|
||||||
/// ApplyCommit approval that races the deploy will surface a
|
|
||||||
/// clear failure note instead of silently no-op'ing.
|
|
||||||
#[allow(unused_variables)]
|
|
||||||
pub async fn apply_commit(
|
|
||||||
_applied_dir: &Path,
|
|
||||||
_proposed_dir: &Path,
|
|
||||||
_commit_ref: &str,
|
|
||||||
) -> Result<()> {
|
|
||||||
bail!(
|
|
||||||
"apply_commit not yet wired up to the tag-driven flow; \
|
|
||||||
approve again after the next deploy lands"
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Set up the applied repo. Two responsibilities:
|
/// Set up the applied repo. Two responsibilities:
|
||||||
/// - First-spawn only: init the repo, pull proposed's initial commit
|
/// - First-spawn only: init the repo, pull proposed's initial commit
|
||||||
/// in via `git fetch`, tag it `deployed/0`. This is the *only* time
|
/// in via `git fetch`, tag it `deployed/0`. This is the *only* time
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue