apply_commit handles first-time spawns, request_spawn deprecated
This commit is contained in:
parent
6974634326
commit
66f1568e8f
6 changed files with 166 additions and 34 deletions
|
|
@ -41,7 +41,7 @@ pub async fn approve(coord: Arc<Coordinator>, id: i64) -> Result<()> {
|
|||
|
||||
match approval.kind {
|
||||
ApprovalKind::ApplyCommit => {
|
||||
let (result, terminal_tag) = run_apply_commit(
|
||||
let (result, terminal_tag, is_first_spawn) = run_apply_commit(
|
||||
&coord,
|
||||
&approval,
|
||||
&agent_dir,
|
||||
|
|
@ -55,7 +55,21 @@ pub async fn approve(coord: Arc<Coordinator>, id: i64) -> Result<()> {
|
|||
if let Err(e) = crate::forge::push_config(&approval.agent).await {
|
||||
tracing::warn!(agent = %approval.agent, error = ?e, "forge: push_config after apply failed");
|
||||
}
|
||||
finish_approval(&coord, &approval, result, terminal_tag)
|
||||
if is_first_spawn && result.is_ok() {
|
||||
// First-spawn bookkeeping: create the per-agent forge user
|
||||
// and mirror the applied repo into agent-configs/<n>.
|
||||
if let Err(e) = crate::forge::ensure_user_for(&approval.agent).await {
|
||||
tracing::warn!(agent = %approval.agent, error = ?e, "forge: ensure_user after first spawn failed");
|
||||
}
|
||||
if let Err(e) = crate::forge::ensure_config_repo(&approval.agent).await {
|
||||
tracing::warn!(agent = %approval.agent, error = ?e, "forge: ensure_config_repo after first spawn failed");
|
||||
}
|
||||
// New container row appeared — rescan so the dashboard
|
||||
// reflects the post-spawn state without a manual refetch.
|
||||
coord.rescan_containers_and_emit().await;
|
||||
crate::dashboard::emit_tombstones_snapshot(&coord).await;
|
||||
}
|
||||
finish_approval(&coord, &approval, result, terminal_tag, is_first_spawn)
|
||||
}
|
||||
ApprovalKind::InitConfig => {
|
||||
// Seed the proposed config repo. Runs synchronously — it's just
|
||||
|
|
@ -67,7 +81,7 @@ pub async fn approve(coord: Arc<Coordinator>, id: i64) -> Result<()> {
|
|||
Ok(())
|
||||
}
|
||||
.await;
|
||||
finish_approval(&coord, &approval, result, None)
|
||||
finish_approval(&coord, &approval, result, None, false)
|
||||
}
|
||||
ApprovalKind::UpdateMetaInputs => {
|
||||
// Decode the inputs from the commit_ref field (stored as JSON
|
||||
|
|
@ -117,7 +131,7 @@ pub async fn approve(coord: Arc<Coordinator>, id: i64) -> Result<()> {
|
|||
tracing::warn!(agent = %agent_bg, error = ?e, "forge: push_config after spawn failed");
|
||||
}
|
||||
}
|
||||
if let Err(e) = finish_approval(&coord_bg, &approval_bg, result, None) {
|
||||
if let Err(e) = finish_approval(&coord_bg, &approval_bg, result, None, false) {
|
||||
tracing::warn!(agent = %agent_bg, error = ?e, "spawn approval failed");
|
||||
}
|
||||
// New container row appeared (or didn't, on failure
|
||||
|
|
@ -139,6 +153,7 @@ fn finish_approval(
|
|||
approval: &hive_sh4re::Approval,
|
||||
result: Result<()>,
|
||||
terminal_tag: Option<String>,
|
||||
is_first_spawn: bool,
|
||||
) -> Result<()> {
|
||||
let (status, note, ok) = match &result {
|
||||
Ok(()) => (ApprovalStatus::Approved, None, true),
|
||||
|
|
@ -201,6 +216,14 @@ fn finish_approval(
|
|||
note,
|
||||
sha: approval.fetched_sha.clone(),
|
||||
}),
|
||||
ApprovalKind::ApplyCommit if is_first_spawn => {
|
||||
coord.notify_manager(&HelperEvent::Spawned {
|
||||
agent: approval.agent.clone(),
|
||||
ok,
|
||||
note,
|
||||
sha: approval.fetched_sha.clone(),
|
||||
});
|
||||
}
|
||||
ApprovalKind::ApplyCommit => coord.notify_manager(&HelperEvent::Rebuilt {
|
||||
agent: approval.agent.clone(),
|
||||
ok,
|
||||
|
|
@ -232,10 +255,14 @@ async fn run_apply_commit(
|
|||
applied_dir: &std::path::Path,
|
||||
claude_dir: &std::path::Path,
|
||||
notes_dir: &std::path::Path,
|
||||
) -> (Result<()>, Option<String>) {
|
||||
) -> (Result<()>, Option<String>, bool) {
|
||||
let id = approval.id;
|
||||
let proposal_ref = format!("refs/tags/proposal/{id}");
|
||||
|
||||
// Detect first spawn before we touch anything so we can branch on it
|
||||
// throughout this function.
|
||||
let is_first_spawn = !lifecycle::container_exists(&approval.agent).await;
|
||||
|
||||
// Defensive: submit-time should have planted proposal/<id>, but if
|
||||
// the row was migrated from an older schema or the tag got pruned
|
||||
// we fail early with a clear note rather than building a stale
|
||||
|
|
@ -246,6 +273,7 @@ async fn run_apply_commit(
|
|||
"missing proposal tag {proposal_ref}: {e:#}"
|
||||
)),
|
||||
None,
|
||||
is_first_spawn,
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -253,16 +281,30 @@ async fn run_apply_commit(
|
|||
// (and the meta lock indirectly) back if the build fails.
|
||||
let prev_main_sha = match lifecycle::git_rev_parse(applied_dir, "refs/heads/main").await {
|
||||
Ok(s) => s,
|
||||
Err(e) => return (Err(anyhow::anyhow!("read applied/main: {e:#}")), None),
|
||||
Err(e) => {
|
||||
return (
|
||||
Err(anyhow::anyhow!("read applied/main: {e:#}")),
|
||||
None,
|
||||
is_first_spawn,
|
||||
)
|
||||
}
|
||||
};
|
||||
|
||||
if let Err(e) = lifecycle::git_tag(applied_dir, &format!("approved/{id}"), &proposal_ref).await
|
||||
{
|
||||
return (Err(anyhow::anyhow!("plant approved/{id}: {e:#}")), None);
|
||||
return (
|
||||
Err(anyhow::anyhow!("plant approved/{id}: {e:#}")),
|
||||
None,
|
||||
is_first_spawn,
|
||||
);
|
||||
}
|
||||
if let Err(e) = lifecycle::git_tag(applied_dir, &format!("building/{id}"), &proposal_ref).await
|
||||
{
|
||||
return (Err(anyhow::anyhow!("plant building/{id}: {e:#}")), None);
|
||||
return (
|
||||
Err(anyhow::anyhow!("plant building/{id}: {e:#}")),
|
||||
None,
|
||||
is_first_spawn,
|
||||
);
|
||||
}
|
||||
|
||||
// Fast-forward applied/main to proposal/<id> + sync the working
|
||||
|
|
@ -274,23 +316,70 @@ async fn run_apply_commit(
|
|||
return (
|
||||
Err(anyhow::anyhow!("ff main to {proposal_ref}: {e:#}")),
|
||||
None,
|
||||
is_first_spawn,
|
||||
);
|
||||
}
|
||||
if let Err(e) = lifecycle::git_read_tree_reset(applied_dir, "refs/heads/main").await {
|
||||
// main is ahead; working tree didn't sync. Roll main back to
|
||||
// keep the two consistent before bailing.
|
||||
let _ = lifecycle::git_update_ref(applied_dir, "refs/heads/main", &prev_main_sha).await;
|
||||
return (Err(anyhow::anyhow!("read-tree to main: {e:#}")), None);
|
||||
return (
|
||||
Err(anyhow::anyhow!("read-tree to main: {e:#}")),
|
||||
None,
|
||||
is_first_spawn,
|
||||
);
|
||||
}
|
||||
|
||||
// First spawn: sync_agents must add this agent to the meta flake
|
||||
// before prepare_deploy can update its input lock (which won't
|
||||
// exist yet if this is the agent's first deploy).
|
||||
if is_first_spawn {
|
||||
let agents = match lifecycle::agents_for_meta_listing_with(&approval.agent).await {
|
||||
Ok(a) => a,
|
||||
Err(e) => {
|
||||
let _ =
|
||||
lifecycle::git_update_ref(applied_dir, "refs/heads/main", &prev_main_sha)
|
||||
.await;
|
||||
let _ = lifecycle::git_read_tree_reset(applied_dir, "refs/heads/main").await;
|
||||
return (
|
||||
Err(anyhow::anyhow!("agents_for_meta_listing_with: {e:#}")),
|
||||
None,
|
||||
is_first_spawn,
|
||||
);
|
||||
}
|
||||
};
|
||||
if let Err(e) = crate::meta::sync_agents(
|
||||
&coord.hyperhive_flake,
|
||||
coord.dashboard_port,
|
||||
&coord.operator_pronouns,
|
||||
&coord.context_window_tokens,
|
||||
&agents,
|
||||
)
|
||||
.await
|
||||
{
|
||||
let _ =
|
||||
lifecycle::git_update_ref(applied_dir, "refs/heads/main", &prev_main_sha).await;
|
||||
let _ = lifecycle::git_read_tree_reset(applied_dir, "refs/heads/main").await;
|
||||
return (
|
||||
Err(anyhow::anyhow!("meta sync_agents for first spawn: {e:#}")),
|
||||
None,
|
||||
is_first_spawn,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Phase 1 of the meta two-phase deploy: relock without committing.
|
||||
if let Err(e) = crate::meta::prepare_deploy(&approval.agent).await {
|
||||
let _ = lifecycle::git_update_ref(applied_dir, "refs/heads/main", &prev_main_sha).await;
|
||||
let _ = lifecycle::git_read_tree_reset(applied_dir, "refs/heads/main").await;
|
||||
return (Err(anyhow::anyhow!("meta prepare_deploy: {e:#}")), None);
|
||||
return (
|
||||
Err(anyhow::anyhow!("meta prepare_deploy: {e:#}")),
|
||||
None,
|
||||
is_first_spawn,
|
||||
);
|
||||
}
|
||||
|
||||
// Container-level rebuild against meta#<name>.
|
||||
// Container-level rebuild (or first-time create) against meta#<name>.
|
||||
let build_result = lifecycle::rebuild_no_meta(
|
||||
&approval.agent,
|
||||
agent_dir,
|
||||
|
|
@ -324,7 +413,7 @@ async fn run_apply_commit(
|
|||
// proposal, agent picks up where it left off with the
|
||||
// new env / packages.
|
||||
coord.kick_agent(&approval.agent, "config update applied");
|
||||
(Ok(()), Some(tag))
|
||||
(Ok(()), Some(tag), is_first_spawn)
|
||||
}
|
||||
Err(e) => {
|
||||
let tag = format!("failed/{id}");
|
||||
|
|
@ -350,7 +439,7 @@ async fn run_apply_commit(
|
|||
tracing::warn!(agent = %approval.agent, %id, error = ?ae, "meta abort_deploy failed");
|
||||
}
|
||||
let _ = coord;
|
||||
(Err(e), Some(tag))
|
||||
(Err(e), Some(tag), is_first_spawn)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -206,6 +206,14 @@ async fn agents_after_spawn(name: &str) -> Result<Vec<crate::meta::AgentSpec>> {
|
|||
agents_for_meta(Some(name)).await
|
||||
}
|
||||
|
||||
/// Like `agents_for_meta_listing` but with an extra agent added (for a
|
||||
/// container that doesn't exist yet). Used by the first-spawn path in
|
||||
/// `actions::run_apply_commit` to register the new agent in meta before
|
||||
/// `prepare_deploy` tries to update its input lock.
|
||||
pub async fn agents_for_meta_listing_with(extra: &str) -> Result<Vec<crate::meta::AgentSpec>> {
|
||||
agents_for_meta(Some(extra)).await
|
||||
}
|
||||
|
||||
/// Public enumeration of currently-existing agents (whatever
|
||||
/// `nixos-container list` says), sorted, no extras. For callers
|
||||
/// outside this module that need to reseed meta after lifecycle
|
||||
|
|
@ -214,6 +222,19 @@ pub async fn agents_for_meta_listing() -> Result<Vec<crate::meta::AgentSpec>> {
|
|||
agents_for_meta(None).await
|
||||
}
|
||||
|
||||
/// True when the named container already exists (appears in
|
||||
/// `nixos-container list`). Used by the apply-commit path to decide
|
||||
/// between first-spawn (`nixos-container create`) and normal rebuild
|
||||
/// (`nixos-container update`).
|
||||
pub async fn container_exists(name: &str) -> bool {
|
||||
let container = container_name(name);
|
||||
list()
|
||||
.await
|
||||
.unwrap_or_default()
|
||||
.iter()
|
||||
.any(|c| c == &container)
|
||||
}
|
||||
|
||||
pub async fn kill(name: &str) -> Result<()> {
|
||||
validate(name)?;
|
||||
let container = container_name(name);
|
||||
|
|
@ -314,13 +335,24 @@ pub async fn rebuild_no_meta(
|
|||
ensure_state_dir(notes_dir)?;
|
||||
let container = container_name(name);
|
||||
let flake_ref = format!("{}#{name}", crate::meta::meta_dir().display());
|
||||
set_nspawn_flags(&container, agent_dir, claude_dir, notes_dir)?;
|
||||
set_resource_limits(&container)?;
|
||||
systemd_daemon_reload().await?;
|
||||
run(&["update", &container, "--flake", &flake_ref]).await?;
|
||||
// Restart so any nspawn-level changes (bind mounts, networking, etc.) apply.
|
||||
run(&["stop", &container]).await?;
|
||||
run(&["start", &container]).await
|
||||
if container_exists(name).await {
|
||||
// Existing container: update nspawn flags, then rebuild + restart
|
||||
// so any bind-mount / networking changes take effect.
|
||||
set_nspawn_flags(&container, agent_dir, claude_dir, notes_dir)?;
|
||||
set_resource_limits(&container)?;
|
||||
systemd_daemon_reload().await?;
|
||||
run(&["update", &container, "--flake", &flake_ref]).await?;
|
||||
run(&["stop", &container]).await?;
|
||||
run(&["start", &container]).await
|
||||
} else {
|
||||
// First spawn: create the container first (which writes the nspawn
|
||||
// conf file), then overwrite with our flags and start.
|
||||
run(&["create", &container, "--flake", &flake_ref]).await?;
|
||||
set_nspawn_flags(&container, agent_dir, claude_dir, notes_dir)?;
|
||||
set_resource_limits(&container)?;
|
||||
systemd_daemon_reload().await?;
|
||||
run(&["start", &container]).await
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn list() -> Result<Vec<String>> {
|
||||
|
|
|
|||
|
|
@ -561,10 +561,13 @@ async fn submit_apply_commit(
|
|||
);
|
||||
}
|
||||
if !applied_dir.join(".git").exists() {
|
||||
anyhow::bail!(
|
||||
"applied repo at {} is uninitialised — spawn the agent first",
|
||||
applied_dir.display()
|
||||
);
|
||||
// First deploy: seed the applied repo from proposed so we can plant
|
||||
// the proposal/<id> tag below. The applied repo starts at the
|
||||
// template commit (deployed/0); run_apply_commit will fast-forward
|
||||
// main to the manager's commit on approval and create the container.
|
||||
lifecycle::setup_applied(&applied_dir, Some(&proposed_dir), agent)
|
||||
.await
|
||||
.context("seed applied repo for first spawn")?;
|
||||
}
|
||||
let id = coord
|
||||
.approvals
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue