phase 8 step 1: per-agent claude creds bind + destroy keeps state
This commit is contained in:
parent
0fc287c768
commit
a42fdb3a5c
9 changed files with 158 additions and 24 deletions
|
|
@ -21,6 +21,7 @@ pub async fn approve(coord: &Coordinator, id: i64) -> Result<()> {
|
|||
let agent_dir = coord.register_agent(&approval.agent)?;
|
||||
let proposed_dir = Coordinator::agent_proposed_dir(&approval.agent);
|
||||
let applied_dir = Coordinator::agent_applied_dir(&approval.agent);
|
||||
let claude_dir = Coordinator::agent_claude_dir(&approval.agent);
|
||||
let result: Result<()> = async {
|
||||
lifecycle::apply_commit(&applied_dir, &proposed_dir, &approval.commit_ref).await?;
|
||||
lifecycle::rebuild(
|
||||
|
|
@ -28,6 +29,7 @@ pub async fn approve(coord: &Coordinator, id: i64) -> Result<()> {
|
|||
&coord.hyperhive_flake,
|
||||
&agent_dir,
|
||||
&applied_dir,
|
||||
&claude_dir,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
|
@ -64,8 +66,14 @@ pub async fn approve(coord: &Coordinator, id: i64) -> Result<()> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Fully tear down a sub-agent. Refuses the manager (declarative; would fight
|
||||
/// with the host's nixos config).
|
||||
/// Tear down a sub-agent container. By default this is non-destructive to
|
||||
/// persistent state: the proposed/applied config repos and the Claude
|
||||
/// credentials dir under `/var/lib/hyperhive/{agents,applied}/<name>/` are
|
||||
/// kept, so recreating an agent of the same name reuses prior config + creds
|
||||
/// (no re-login). The ephemeral runtime dir under `/run/hyperhive/agents/`
|
||||
/// is cleared because its contents (the mcp socket) don't survive restarts
|
||||
/// anyway. A future `--purge` path can wipe state when the operator opts in.
|
||||
/// Refuses the manager (declarative; would fight with the host's nixos config).
|
||||
pub async fn destroy(coord: &Coordinator, name: &str) -> Result<()> {
|
||||
if name == MANAGER_NAME || name == MANAGER_AGENT {
|
||||
bail!("refusing to destroy the manager ({name})");
|
||||
|
|
@ -77,14 +85,6 @@ pub async fn destroy(coord: &Coordinator, name: &str) -> Result<()> {
|
|||
if runtime.exists() {
|
||||
let _ = std::fs::remove_dir_all(&runtime);
|
||||
}
|
||||
let state = Coordinator::agent_state_root(name);
|
||||
if state.exists() {
|
||||
let _ = std::fs::remove_dir_all(&state);
|
||||
}
|
||||
let applied = Coordinator::agent_applied_dir(name);
|
||||
if applied.exists() {
|
||||
let _ = std::fs::remove_dir_all(&applied);
|
||||
}
|
||||
let _ = coord
|
||||
.approvals
|
||||
.fail_pending_for_agent(name, "agent destroyed");
|
||||
|
|
|
|||
|
|
@ -91,6 +91,14 @@ impl Coordinator {
|
|||
Self::agent_state_root(name).join("config")
|
||||
}
|
||||
|
||||
/// Per-agent Claude credentials dir. Bind-mounted RW into the agent
|
||||
/// container at `/root/.claude` so OAuth state survives container
|
||||
/// destroy/recreate. Each agent owns its own token lineage — sharing
|
||||
/// would break on the first refresh-token rotation.
|
||||
pub fn agent_claude_dir(name: &str) -> PathBuf {
|
||||
Self::agent_state_root(name).join("claude")
|
||||
}
|
||||
|
||||
/// Authoritative applied config repo. Hive-c0re-only.
|
||||
pub fn agent_applied_dir(name: &str) -> PathBuf {
|
||||
PathBuf::from(format!("{APPLIED_STATE_ROOT}/{name}"))
|
||||
|
|
|
|||
|
|
@ -162,7 +162,7 @@ fn render_containers(containers: &[String], hostname: &str) -> String {
|
|||
let port = lifecycle::agent_web_port(name);
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"<li><span class=\"glyph\">▒░▒░░</span> <a href=\"http://{hostname}:{port}/\">{name}</a> <span class=\"role role-ag3nt\">ag3nt</span> <span class=\"meta\">{container} :{port}</span>\n <form method=\"POST\" action=\"/destroy/{name}\" class=\"inline\" onsubmit=\"return confirm('destroy {name}? this wipes the agent\\'s state.');\"><button class=\"btn btn-destroy\" type=\"submit\">DESTR0Y</button></form>\n</li>",
|
||||
"<li><span class=\"glyph\">▒░▒░░</span> <a href=\"http://{hostname}:{port}/\">{name}</a> <span class=\"role role-ag3nt\">ag3nt</span> <span class=\"meta\">{container} :{port}</span>\n <form method=\"POST\" action=\"/destroy/{name}\" class=\"inline\" onsubmit=\"return confirm('destroy {name}? container is removed; state + creds kept.');\"><button class=\"btn btn-destroy\" type=\"submit\">DESTR0Y</button></form>\n</li>",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,6 +16,10 @@ pub const MANAGER_NAME: &str = "hm1nd";
|
|||
/// Mount point of the per-agent runtime directory inside the container.
|
||||
pub const CONTAINER_RUNTIME_MOUNT: &str = "/run/hive";
|
||||
|
||||
/// Mount point of the per-agent Claude credentials dir inside the container.
|
||||
/// Persistent across destroy/recreate so OAuth login survives.
|
||||
pub const CONTAINER_CLAUDE_MOUNT: &str = "/root/.claude";
|
||||
|
||||
const GIT_NAME: &str = "hive-c0re";
|
||||
const GIT_EMAIL: &str = "hive-c0re@hyperhive";
|
||||
|
||||
|
|
@ -66,14 +70,16 @@ pub async fn spawn(
|
|||
agent_dir: &Path,
|
||||
proposed_dir: &Path,
|
||||
applied_dir: &Path,
|
||||
claude_dir: &Path,
|
||||
) -> Result<()> {
|
||||
validate(name)?;
|
||||
setup_proposed(proposed_dir, name).await?;
|
||||
setup_applied(applied_dir, name, hyperhive_flake).await?;
|
||||
ensure_claude_dir(claude_dir)?;
|
||||
let container = container_name(name);
|
||||
let flake_ref = format!("{}#default", applied_dir.display());
|
||||
run(&["create", &container, "--flake", &flake_ref]).await?;
|
||||
set_nspawn_flags(&container, agent_dir)?;
|
||||
set_nspawn_flags(&container, agent_dir, claude_dir)?;
|
||||
set_resource_limits(&container)?;
|
||||
systemd_daemon_reload().await?;
|
||||
run(&["start", &container]).await
|
||||
|
|
@ -108,12 +114,14 @@ pub async fn rebuild(
|
|||
hyperhive_flake: &str,
|
||||
agent_dir: &Path,
|
||||
applied_dir: &Path,
|
||||
claude_dir: &Path,
|
||||
) -> Result<()> {
|
||||
validate(name)?;
|
||||
setup_applied(applied_dir, name, hyperhive_flake).await?;
|
||||
ensure_claude_dir(claude_dir)?;
|
||||
let container = container_name(name);
|
||||
let flake_ref = format!("{}#default", applied_dir.display());
|
||||
set_nspawn_flags(&container, agent_dir)?;
|
||||
set_nspawn_flags(&container, agent_dir, claude_dir)?;
|
||||
set_resource_limits(&container)?;
|
||||
systemd_daemon_reload().await?;
|
||||
run(&["update", &container, "--flake", &flake_ref]).await?;
|
||||
|
|
@ -248,6 +256,23 @@ pub async fn apply_commit(applied_dir: &Path, proposed_dir: &Path, commit_ref: &
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// Create the per-agent Claude credentials dir if missing. Mode 0700 — only
|
||||
/// root inside the container reads/writes it. Idempotent: existing dirs are
|
||||
/// left untouched (an agent's OAuth tokens survive `destroy`/recreate).
|
||||
fn ensure_claude_dir(claude_dir: &Path) -> Result<()> {
|
||||
if !claude_dir.exists() {
|
||||
std::fs::create_dir_all(claude_dir)
|
||||
.with_context(|| format!("create {}", claude_dir.display()))?;
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
std::fs::set_permissions(claude_dir, std::fs::Permissions::from_mode(0o700))
|
||||
.with_context(|| format!("chmod {}", claude_dir.display()))?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn initial_agent_nix(name: &str) -> String {
|
||||
format!(
|
||||
"{{ ... }}:\n{{\n # Per-agent overrides for {name}. The manager edits this\n # file (and commits) to customise the agent's NixOS config.\n}}\n",
|
||||
|
|
@ -347,12 +372,13 @@ async fn systemd_daemon_reload() -> Result<()> {
|
|||
/// is reachable on the host) and `EXTRA_NSPAWN_FLAGS` (the runtime-dir bind).
|
||||
/// The start script expands `$EXTRA_NSPAWN_FLAGS` unquoted into the
|
||||
/// `systemd-nspawn` command.
|
||||
fn set_nspawn_flags(container: &str, agent_dir: &Path) -> Result<()> {
|
||||
fn set_nspawn_flags(container: &str, agent_dir: &Path, claude_dir: &Path) -> Result<()> {
|
||||
let path = format!("/etc/nixos-containers/{container}.conf");
|
||||
let original = std::fs::read_to_string(&path).with_context(|| format!("read {path}"))?;
|
||||
let bind_flag = format!(
|
||||
"EXTRA_NSPAWN_FLAGS=\"--bind={}:{CONTAINER_RUNTIME_MOUNT}\"",
|
||||
agent_dir.display()
|
||||
"EXTRA_NSPAWN_FLAGS=\"--bind={runtime}:{CONTAINER_RUNTIME_MOUNT} --bind={claude}:{CONTAINER_CLAUDE_MOUNT}\"",
|
||||
runtime = agent_dir.display(),
|
||||
claude = claude_dir.display(),
|
||||
);
|
||||
let mut lines: Vec<String> = original
|
||||
.lines()
|
||||
|
|
|
|||
|
|
@ -97,12 +97,14 @@ async fn dispatch(req: &ManagerRequest, coord: &Coordinator) -> ManagerResponse
|
|||
let agent_dir = coord.register_agent(name)?;
|
||||
let proposed_dir = Coordinator::agent_proposed_dir(name);
|
||||
let applied_dir = Coordinator::agent_applied_dir(name);
|
||||
let claude_dir = Coordinator::agent_claude_dir(name);
|
||||
if let Err(e) = lifecycle::spawn(
|
||||
name,
|
||||
&coord.hyperhive_flake,
|
||||
&agent_dir,
|
||||
&proposed_dir,
|
||||
&applied_dir,
|
||||
&claude_dir,
|
||||
)
|
||||
.await
|
||||
{
|
||||
|
|
|
|||
|
|
@ -64,12 +64,14 @@ async fn dispatch(req: &HostRequest, coord: &Coordinator) -> HostResponse {
|
|||
let agent_dir = coord.register_agent(name)?;
|
||||
let proposed_dir = Coordinator::agent_proposed_dir(name);
|
||||
let applied_dir = Coordinator::agent_applied_dir(name);
|
||||
let claude_dir = Coordinator::agent_claude_dir(name);
|
||||
if let Err(e) = lifecycle::spawn(
|
||||
name,
|
||||
&coord.hyperhive_flake,
|
||||
&agent_dir,
|
||||
&proposed_dir,
|
||||
&applied_dir,
|
||||
&claude_dir,
|
||||
)
|
||||
.await
|
||||
{
|
||||
|
|
@ -93,7 +95,15 @@ async fn dispatch(req: &HostRequest, coord: &Coordinator) -> HostResponse {
|
|||
tracing::info!(%name, "rebuild");
|
||||
let agent_dir = coord.register_agent(name)?;
|
||||
let applied_dir = Coordinator::agent_applied_dir(name);
|
||||
lifecycle::rebuild(name, &coord.hyperhive_flake, &agent_dir, &applied_dir).await?;
|
||||
let claude_dir = Coordinator::agent_claude_dir(name);
|
||||
lifecycle::rebuild(
|
||||
name,
|
||||
&coord.hyperhive_flake,
|
||||
&agent_dir,
|
||||
&applied_dir,
|
||||
&claude_dir,
|
||||
)
|
||||
.await?;
|
||||
HostResponse::success()
|
||||
}
|
||||
HostRequest::List => HostResponse::list(lifecycle::list().await?),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue