forge: auto-create a user + token per agent on spawn / startup

new forge module probes the hive-forge nixos-container (no-op when
absent), and ensures every agent + the manager has a forgejo user
named after them with an access token at `<state>/forge-token`
(visible inside the container as `/state/forge-token`).

idempotent: skips user creation when forgejo reports 'already
exists', skips token issuance when the file is present, scopes the
token to read:user,write:repository,write:issue. token-name suffixed
with a clock so re-issuing doesn't collide with a stale name. shells
out via `nixos-container run hive-forge -- runuser -u forgejo --
forgejo admin` (runuser instead of sudo since sudo isn't in the
container by default).

hooks: ensure_all sweeps existing containers at hive-c0re startup
(backgrounded), and the actions.rs spawn task calls ensure_user_for
the new agent right after lifecycle::spawn succeeds. failures log a
warning but don't abort spawn — a missing token is recoverable from
the next startup sweep.
This commit is contained in:
müde 2026-05-16 20:55:13 +02:00
parent 6e9c67dd94
commit 480d646f69
3 changed files with 211 additions and 0 deletions

View file

@ -77,6 +77,11 @@ pub async fn approve(coord: Arc<Coordinator>, id: i64) -> Result<()> {
) )
.await; .await;
drop(guard); drop(guard);
if result.is_ok()
&& let Err(e) = crate::forge::ensure_user_for(&agent_bg).await
{
tracing::warn!(agent = %agent_bg, error = ?e, "forge: ensure_user after spawn failed");
}
if let Err(e) = finish_approval(&coord_bg, &approval_bg, result, None) { if let Err(e) = finish_approval(&coord_bg, &approval_bg, result, None) {
tracing::warn!(agent = %agent_bg, error = ?e, "spawn approval failed"); tracing::warn!(agent = %agent_bg, error = ?e, "spawn approval failed");
} }

198
hive-c0re/src/forge.rs Normal file
View file

@ -0,0 +1,198 @@
//! Optional Forgejo wiring. When the `hive-forge` nixos-container is
//! present and running, hive-c0re ensures every agent (and the
//! manager) has a corresponding forgejo user with an API token
//! written to `<agent-state>/forge-token` — visible inside the
//! container as `/state/forge-token`. Idempotent: skips creation
//! when the user already exists, skips token issuance when the file
//! is already there.
//!
//! No-op when `hive-forge` isn't enabled (detected via
//! `nixos-container list`), so operators who don't run the bundled
//! forge pay nothing.
use std::path::{Path, PathBuf};
use anyhow::{Context, Result};
use tokio::process::Command;
use crate::coordinator::Coordinator;
const FORGE_CONTAINER: &str = "hive-forge";
const TOKEN_NAME_PREFIX: &str = "hyperhive";
/// Forgejo scopes the agent's token gets. `write:repository` covers
/// clone/push/repo-create on the user's own repos; `write:issue` is
/// what PRs and comments ride under; `read:user` is mandatory for
/// the token-owner endpoint clients use to introspect.
const TOKEN_SCOPES: &str = "read:user,write:repository,write:issue";
/// Token file inside the agent's bind-mounted state dir (visible as
/// `/state/forge-token` from inside the container).
fn token_path(name: &str) -> PathBuf {
Coordinator::agent_notes_dir(name).join("forge-token")
}
/// Probe whether `hive-forge` exists as a nixos-container. Cheap —
/// `nixos-container list` is just a directory scan in /etc.
pub async fn is_present() -> bool {
let Ok(out) = Command::new("nixos-container")
.arg("list")
.output()
.await
else {
return false;
};
if !out.status.success() {
return false;
}
String::from_utf8_lossy(&out.stdout)
.lines()
.any(|l| l.trim() == FORGE_CONTAINER)
}
/// Run `forgejo admin <args>` inside the hive-forge container as the
/// forgejo user (the only uid with write access to the state dir).
/// Returns stdout on success; bails with stderr context on failure.
async fn forge_admin(args: &[&str]) -> Result<String> {
let mut cmd = Command::new("nixos-container");
// `runuser` (util-linux, always present in a NixOS container)
// beats `sudo` here — sudo isn't installed unless `security.sudo`
// is enabled, and we don't want to depend on that.
cmd.args(["run", FORGE_CONTAINER, "--", "runuser", "-u", "forgejo", "--", "forgejo", "admin"]);
cmd.args(args);
let out = cmd
.output()
.await
.context("invoke nixos-container run hive-forge -- forgejo admin")?;
if !out.status.success() {
anyhow::bail!(
"forgejo admin {} failed ({}): {}",
args.join(" "),
out.status,
String::from_utf8_lossy(&out.stderr).trim(),
);
}
Ok(String::from_utf8_lossy(&out.stdout).into_owned())
}
/// Pull the access token out of forgejo's success message. Format
/// has shifted across versions (table form vs. "Access token was
/// successfully created: <hex>"), so just hunt the output for the
/// first long hex-looking word.
fn extract_token(output: &str) -> Option<String> {
output
.split(|c: char| c.is_whitespace() || c == ',' || c == ':')
.find(|w| w.len() >= 32 && w.chars().all(|c| c.is_ascii_hexdigit()))
.map(str::to_owned)
}
/// Ensure a forgejo user named `name` exists. Idempotent: forgejo
/// returns a "user already exists" error which we treat as success.
async fn ensure_user_exists(name: &str) -> Result<()> {
let result = forge_admin(&[
"user",
"create",
"--username",
name,
"--email",
&format!("{name}@hive.local"),
"--random-password",
"--must-change-password=false",
])
.await;
match result {
Ok(_) => {
tracing::info!(%name, "forge: created user");
Ok(())
}
Err(e) => {
// Forgejo's "already exists" error wording varies; just
// try the next step and let token issuance surface a
// real failure if the user truly isn't there.
let msg = format!("{e:#}");
if msg.contains("already exists") || msg.contains("user already") {
tracing::debug!(%name, "forge: user already exists");
Ok(())
} else {
tracing::warn!(%name, error = %msg, "forge: user create unclear; trying token anyway");
Ok(())
}
}
}
}
/// Mint a fresh access token for `name` and persist it to
/// `<state>/forge-token` (0600). Token name is suffixed with a
/// monotonic clock so re-issuing doesn't collide with an existing
/// token of the same name in the DB.
async fn mint_and_persist_token(name: &str, path: &Path) -> Result<()> {
let token_name = format!(
"{TOKEN_NAME_PREFIX}-{}",
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_secs())
.unwrap_or(0)
);
let stdout = forge_admin(&[
"user",
"generate-access-token",
"--username",
name,
"--token-name",
&token_name,
"--scopes",
TOKEN_SCOPES,
])
.await?;
let token = extract_token(&stdout)
.with_context(|| format!("parse token from forgejo output: {stdout:?}"))?;
if let Some(parent) = path.parent() {
std::fs::create_dir_all(parent).ok();
}
std::fs::write(path, format!("{token}\n"))
.with_context(|| format!("write token to {}", path.display()))?;
use std::os::unix::fs::PermissionsExt;
let _ = std::fs::set_permissions(path, std::fs::Permissions::from_mode(0o600));
tracing::info!(%name, path = %path.display(), %token_name, "forge: persisted access token");
Ok(())
}
/// Ensure `name` has a forgejo user + token file. No-op when the
/// token file is already present. Safe to call on every spawn and
/// on every hive-c0re startup.
pub async fn ensure_user_for(name: &str) -> Result<()> {
if !is_present().await {
return Ok(());
}
let path = token_path(name);
if path.exists() {
return Ok(());
}
ensure_user_exists(name).await?;
mint_and_persist_token(name, &path).await
}
/// Sweep every existing container (manager + sub-agents) and ensure
/// each has a forgejo user + token. Called once at hive-c0re
/// startup. Per-agent failures are logged but don't abort the sweep.
pub async fn ensure_all() {
if !is_present().await {
tracing::debug!("forge: hive-forge container absent, skipping user sweep");
return;
}
let Ok(containers) = crate::lifecycle::list().await else {
tracing::warn!("forge: nixos-container list failed; skipping user sweep");
return;
};
for c in containers {
let name = if c == crate::lifecycle::MANAGER_NAME {
c
} else if let Some(n) = c.strip_prefix(crate::lifecycle::AGENT_PREFIX) {
n.to_owned()
} else {
continue;
};
if let Err(e) = ensure_user_for(&name).await {
tracing::warn!(%name, error = ?e, "forge: ensure_user failed");
}
}
}

View file

@ -15,6 +15,7 @@ mod coordinator;
mod crash_watch; mod crash_watch;
mod dashboard; mod dashboard;
mod events_vacuum; mod events_vacuum;
mod forge;
mod lifecycle; mod lifecycle;
mod manager_server; mod manager_server;
mod meta; mod meta;
@ -134,6 +135,13 @@ async fn main() -> Result<()> {
tracing::warn!(error = ?e, "auto-update task failed"); tracing::warn!(error = ?e, "auto-update task failed");
} }
}); });
// Forge user sweep: ensure every existing container has a
// forgejo user + access token. No-op when the hive-forge
// container isn't running. Backgrounded — touches the
// forge state dir via `nixos-container run` which is slow.
tokio::spawn(async move {
forge::ensure_all().await;
});
// Periodic broker vacuum: drop delivered messages older than // Periodic broker vacuum: drop delivered messages older than
// 30 days. Undelivered messages are always kept (still in // 30 days. Undelivered messages are always kept (still in
// flight). Runs hourly; first sweep happens immediately. // flight). Runs hourly; first sweep happens immediately.