fix #289: full forge sync on rebuild (extract sync_agent, use in rebuild_agent)

extract per-agent forge logic from ensure_all() into sync_agent()
so both the startup sweep and rebuild_agent call identical code.
rebuild now runs: ensure_user_for + ensure_config_repo + push_config
+ meta_read_access + ensure_meta_remote — same as the boot sweep.
missing tokens and drift in any forge state are fixed by rebuild,
not just hive reboot.
This commit is contained in:
damocles 2026-05-22 22:10:56 +02:00 committed by Mara
parent 7ad9809207
commit 3e94914569
2 changed files with 49 additions and 34 deletions

View file

@ -101,13 +101,13 @@ pub async fn rebuild_agent(coord: &Arc<Coordinator>, name: &str, current_rev: &s
sha: None,
tag: None,
});
// Re-seed the forge token on every rebuild so a missing
// token (e.g. first-spawn seeding failed transiently) is
// fixed by the next rebuild rather than requiring a full
// hive-c0re restart.
if let Err(e) = crate::forge::ensure_user_for(name).await {
tracing::warn!(%name, error = ?e, "forge: ensure_user after rebuild failed");
}
// Run the full forge sync on every successful rebuild so
// the rebuild path is equivalent to the hive-c0re startup
// sweep: token, config-repo mirror, meta read access, and
// meta remote are all kept in sync. Recovers missing tokens
// (e.g. first-spawn seeding failed transiently) without
// requiring a full hive-c0re restart.
crate::forge::sync_agent(name, crate::forge::core_token().as_deref()).await;
// Wake the agent on its next turn so claude sees a
// "you were rebuilt — check /state/ for notes, --continue
// session intact" hint. Covers dashboard rebuild, admin

View file

@ -519,6 +519,47 @@ async fn ensure_org(name: &str, admin_token: &str) -> Result<()> {
}
}
/// Per-agent forge sync: ensure the agent has a forgejo user + token,
/// a mirrored config repo, read access to `core/meta`, and the `meta`
/// remote in its proposed repo. All operations are idempotent; failures
/// are logged as warnings but don't abort the caller.
///
/// `core_token` is `core_token()` — passed in so callers that already
/// fetched it don't re-read the file. Pass `None` to skip the
/// `meta_read_access` step (safe: the access grant is best-effort).
///
/// Called by both `ensure_all()` (startup sweep) and `rebuild_agent`
/// (per-rebuild) so the two paths stay equivalent.
pub async fn sync_agent(name: &str, core_token: Option<&str>) {
if let Err(e) = ensure_user_for(name).await {
tracing::warn!(%name, error = ?e, "forge: ensure_user failed");
}
// Align email to match the git user.email set by meta::render_flake
// so commits link to the agent's Forgejo profile. Best-effort;
// also patches up agents created before this fix (old @hive.local).
ensure_user_email(name).await;
// Mirror the agent's applied config repo into agent-configs.
// ensure_config_repo is idempotent; push_config catches any
// drift since the last run — e.g. the startup migration just
// relocated `deployed/0`, or a deploy landed while the forge
// was down.
if let Err(e) = ensure_config_repo(name).await {
tracing::warn!(%name, error = ?e, "forge: ensure_config_repo failed");
}
if let Err(e) = push_config(name).await {
tracing::warn!(%name, error = ?e, "forge: push_config failed");
}
// Grant read-only access to core/meta and wire the `meta` remote
// into the proposed repo so agents can fetch their deployment context.
if let Some(token) = core_token
&& let Err(e) = meta_read_access(name, token).await {
tracing::warn!(%name, error = ?e, "forge: ensure_meta_read_access failed");
}
if let Err(e) = ensure_meta_remote(name).await {
tracing::warn!(%name, error = ?e, "forge: ensure_meta_remote failed");
}
}
/// Sweep every existing container (manager + sub-agents) and ensure
/// each has a forgejo user + token, plus an `agent-configs/<name>`
/// repo mirroring its applied config. Also seeds the `core` admin
@ -563,32 +604,6 @@ pub async fn ensure_all() {
} else {
continue;
};
if let Err(e) = ensure_user_for(&name).await {
tracing::warn!(%name, error = ?e, "forge: ensure_user failed");
}
// Align email to match the git user.email set by meta::render_flake
// so commits link to the agent's Forgejo profile. Best-effort;
// also patches up agents created before this fix (old @hive.local).
ensure_user_email(&name).await;
// Mirror the agent's applied config repo into agent-configs.
// ensure_config_repo is idempotent; push_config catches any
// drift since the last run — e.g. the startup migration just
// relocated `deployed/0`, or a deploy landed while the forge
// was down.
if let Err(e) = ensure_config_repo(&name).await {
tracing::warn!(%name, error = ?e, "forge: ensure_config_repo failed");
}
if let Err(e) = push_config(&name).await {
tracing::warn!(%name, error = ?e, "forge: push_config failed");
}
// Grant read-only access to core/meta and wire the `meta` remote
// into the proposed repo so agents can fetch their deployment context.
if let Some(token) = core_token.as_deref()
&& let Err(e) = meta_read_access(&name, token).await {
tracing::warn!(%name, error = ?e, "forge: ensure_meta_read_access failed");
}
if let Err(e) = ensure_meta_remote(&name).await {
tracing::warn!(%name, error = ?e, "forge: ensure_meta_remote failed");
}
sync_agent(&name, core_token.as_deref()).await;
}
}