manager: same lifecycle as agents; auto-spawn on hive-c0re start
This commit is contained in:
parent
d81a845dbe
commit
f99ed3fe7a
8 changed files with 168 additions and 65 deletions
28
CLAUDE.md
28
CLAUDE.md
|
|
@ -156,6 +156,34 @@ docs/damocles-migration.md options for moving damocles onto hyperhive
|
||||||
marks them `failed` with note `"agent state dir missing"` so they fall out
|
marks them `failed` with note `"agent state dir missing"` so they fall out
|
||||||
of `pending`. They stay in sqlite for audit.
|
of `pending`. They stay in sqlite for audit.
|
||||||
|
|
||||||
|
## Manager (hm1nd) is hive-c0re-managed
|
||||||
|
|
||||||
|
The manager container runs through the **same lifecycle as sub-agents** —
|
||||||
|
no separate code path. On `hive-c0re serve` startup, if `nixos-container
|
||||||
|
list` doesn't include `hm1nd`, hive-c0re creates it. The manager's flake
|
||||||
|
lives at `/var/lib/hyperhive/applied/hm1nd/`; its proposed (manager-editable)
|
||||||
|
config at `/var/lib/hyperhive/agents/hm1nd/config/`. Manager can edit its
|
||||||
|
own `agent.nix` (visible inside the container at `/agents/hm1nd/config/`),
|
||||||
|
commit, and submit `request-apply-commit hm1nd <sha>` for operator
|
||||||
|
approval — same flow as for sub-agents.
|
||||||
|
|
||||||
|
Differences from sub-agents:
|
||||||
|
- `flake.nix` extends `hyperhive.nixosConfigurations.manager` (vs
|
||||||
|
`agent-base`).
|
||||||
|
- Container name is `hm1nd` (no `h-` prefix).
|
||||||
|
- Fixed web UI port (`MANAGER_PORT = 8000`).
|
||||||
|
- `set_nspawn_flags` adds an extra bind: `/var/lib/hyperhive/agents` →
|
||||||
|
`/agents` (RW), so the manager can edit per-agent proposed repos.
|
||||||
|
- First-deploy spawn bypasses the approval queue (manager is required
|
||||||
|
infrastructure).
|
||||||
|
- Per-agent socket is the manager socket at `/run/hyperhive/manager/`, owned
|
||||||
|
by `manager_server::start`. `coordinator::ensure_runtime` returns that
|
||||||
|
path for manager and the usual `/run/hyperhive/agents/<name>/` for the
|
||||||
|
rest.
|
||||||
|
|
||||||
|
**Migration note:** drop any `containers.hm1nd = { ... }` block from your
|
||||||
|
host NixOS config. hyperhive creates and updates the manager itself now.
|
||||||
|
|
||||||
## Auto-update on startup
|
## Auto-update on startup
|
||||||
|
|
||||||
`hive-c0re serve` runs `auto_update::run` in a background task right after
|
`hive-c0re serve` runs `auto_update::run` in a background task right after
|
||||||
|
|
|
||||||
|
|
@ -33,7 +33,7 @@ pub async fn approve(coord: Arc<Coordinator>, id: i64) -> Result<()> {
|
||||||
"approval: running action",
|
"approval: running action",
|
||||||
);
|
);
|
||||||
|
|
||||||
let agent_dir = coord.register_agent(&approval.agent)?;
|
let agent_dir = coord.ensure_runtime(&approval.agent)?;
|
||||||
let proposed_dir = Coordinator::agent_proposed_dir(&approval.agent);
|
let proposed_dir = Coordinator::agent_proposed_dir(&approval.agent);
|
||||||
let applied_dir = Coordinator::agent_applied_dir(&approval.agent);
|
let applied_dir = Coordinator::agent_applied_dir(&approval.agent);
|
||||||
let claude_dir = Coordinator::agent_claude_dir(&approval.agent);
|
let claude_dir = Coordinator::agent_claude_dir(&approval.agent);
|
||||||
|
|
|
||||||
|
|
@ -11,8 +11,7 @@
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use anyhow::{Context, Result, bail};
|
use anyhow::{Context, Result};
|
||||||
use tokio::process::Command;
|
|
||||||
|
|
||||||
use crate::coordinator::Coordinator;
|
use crate::coordinator::Coordinator;
|
||||||
use crate::lifecycle::{self, AGENT_PREFIX, MANAGER_NAME};
|
use crate::lifecycle::{self, AGENT_PREFIX, MANAGER_NAME};
|
||||||
|
|
@ -55,8 +54,8 @@ pub fn agent_needs_update(name: &str, current_rev: &str) -> bool {
|
||||||
pub async fn rebuild_agent(coord: &Arc<Coordinator>, name: &str, current_rev: &str) -> Result<()> {
|
pub async fn rebuild_agent(coord: &Arc<Coordinator>, name: &str, current_rev: &str) -> Result<()> {
|
||||||
tracing::info!(%name, rev = %current_rev, "rebuild agent");
|
tracing::info!(%name, rev = %current_rev, "rebuild agent");
|
||||||
let agent_dir = coord
|
let agent_dir = coord
|
||||||
.register_agent(name)
|
.ensure_runtime(name)
|
||||||
.with_context(|| format!("register_agent {name}"))?;
|
.with_context(|| format!("ensure_runtime {name}"))?;
|
||||||
let applied_dir = Coordinator::agent_applied_dir(name);
|
let applied_dir = Coordinator::agent_applied_dir(name);
|
||||||
let claude_dir = Coordinator::agent_claude_dir(name);
|
let claude_dir = Coordinator::agent_claude_dir(name);
|
||||||
lifecycle::rebuild(
|
lifecycle::rebuild(
|
||||||
|
|
@ -72,26 +71,34 @@ pub async fn rebuild_agent(coord: &Arc<Coordinator>, name: &str, current_rev: &s
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Apply the manager's host-declared config: `nixos-container update hm1nd`
|
|
||||||
/// (no `--flake`) re-reads `/etc/nixos-containers/hm1nd.conf`, which the
|
/// Auto-create the manager container on startup if it isn't already there.
|
||||||
/// host's `nixos-rebuild switch` rewrites to point at the new `SYSTEM_PATH`.
|
/// hive-c0re manages hm1nd end-to-end (Phase 8 follow-up): operators no
|
||||||
/// Idempotent when nothing has changed.
|
/// longer declare `containers.hm1nd` in their host NixOS config. Bypasses
|
||||||
pub async fn rebuild_manager(current_rev: &str) -> Result<()> {
|
/// the approval queue — manager is required infrastructure. Idempotent.
|
||||||
tracing::info!(rev = %current_rev, "rebuild manager (nixos-container update hm1nd)");
|
pub async fn ensure_manager(coord: &Arc<Coordinator>) -> Result<()> {
|
||||||
let out = Command::new("nixos-container")
|
let existing = lifecycle::list().await.unwrap_or_default();
|
||||||
.args(["update", MANAGER_NAME])
|
if existing.iter().any(|c| c == MANAGER_NAME) {
|
||||||
.output()
|
tracing::debug!("manager container already present");
|
||||||
.await
|
return Ok(());
|
||||||
.context("invoke nixos-container update hm1nd")?;
|
}
|
||||||
if !out.status.success() {
|
tracing::info!("manager container missing — spawning");
|
||||||
bail!(
|
let runtime = coord.ensure_runtime(MANAGER_NAME)?;
|
||||||
"nixos-container update {MANAGER_NAME} failed ({}): {}",
|
let proposed = Coordinator::agent_proposed_dir(MANAGER_NAME);
|
||||||
out.status,
|
let applied = Coordinator::agent_applied_dir(MANAGER_NAME);
|
||||||
String::from_utf8_lossy(&out.stderr).trim()
|
let claude_dir = Coordinator::agent_claude_dir(MANAGER_NAME);
|
||||||
);
|
lifecycle::spawn(
|
||||||
|
MANAGER_NAME,
|
||||||
|
&coord.hyperhive_flake,
|
||||||
|
&runtime,
|
||||||
|
&proposed,
|
||||||
|
&applied,
|
||||||
|
&claude_dir,
|
||||||
|
)
|
||||||
|
.await?;
|
||||||
|
if let Some(rev) = current_flake_rev(&coord.hyperhive_flake) {
|
||||||
|
let _ = std::fs::write(rev_marker_path(MANAGER_NAME), rev);
|
||||||
}
|
}
|
||||||
std::fs::write(rev_marker_path(MANAGER_NAME), current_rev)
|
|
||||||
.with_context(|| format!("write rev marker for {MANAGER_NAME}"))?;
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -117,16 +124,17 @@ pub async fn run(coord: Arc<Coordinator>) -> Result<()> {
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut tasks = Vec::new();
|
let mut tasks = Vec::new();
|
||||||
let mut manager_present = false;
|
|
||||||
for container in containers {
|
for container in containers {
|
||||||
if container == MANAGER_NAME {
|
// Manager and sub-agents share the same lifecycle now; both go
|
||||||
manager_present = true;
|
// through rebuild_agent with name-derived paths.
|
||||||
continue;
|
let logical = if container == MANAGER_NAME {
|
||||||
}
|
Some(MANAGER_NAME.to_owned())
|
||||||
let Some(name) = container.strip_prefix(AGENT_PREFIX) else {
|
} else {
|
||||||
|
container.strip_prefix(AGENT_PREFIX).map(str::to_owned)
|
||||||
|
};
|
||||||
|
let Some(name) = logical else {
|
||||||
continue;
|
continue;
|
||||||
};
|
};
|
||||||
let name = name.to_owned();
|
|
||||||
if !agent_needs_update(&name, ¤t_rev) {
|
if !agent_needs_update(&name, ¤t_rev) {
|
||||||
tracing::debug!(%name, "auto-update: up-to-date");
|
tracing::debug!(%name, "auto-update: up-to-date");
|
||||||
continue;
|
continue;
|
||||||
|
|
@ -140,19 +148,6 @@ pub async fn run(coord: Arc<Coordinator>) -> Result<()> {
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Manager runs unconditionally when its marker differs: even if the host
|
|
||||||
// hasn't been rebuilt yet, `nixos-container update hm1nd` is a no-op, so
|
|
||||||
// there's no harm. The host's own activation already updates declarative
|
|
||||||
// containers — this is belt-and-braces for hive-c0re restarts.
|
|
||||||
if manager_present && agent_needs_update(MANAGER_NAME, ¤t_rev) {
|
|
||||||
let current_rev = current_rev.clone();
|
|
||||||
tasks.push(tokio::spawn(async move {
|
|
||||||
if let Err(e) = rebuild_manager(¤t_rev).await {
|
|
||||||
tracing::warn!(error = ?e, "auto-update: manager rebuild failed");
|
|
||||||
}
|
|
||||||
}));
|
|
||||||
}
|
|
||||||
|
|
||||||
for t in tasks {
|
for t in tasks {
|
||||||
let _ = t.await;
|
let _ = t.await;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -118,6 +118,21 @@ impl Coordinator {
|
||||||
Self::manager_dir().join("mcp.sock")
|
Self::manager_dir().join("mcp.sock")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Ensure a runtime dir + (for sub-agents) per-agent socket exists. For
|
||||||
|
/// the manager, `manager_server::start` owns the socket — just return
|
||||||
|
/// the dir. For sub-agents this is `register_agent` (creates a fresh
|
||||||
|
/// listener bound to `socket_path(name)`). Source directory of the
|
||||||
|
/// `/run/hive/mcp.sock` bind that ends up in `set_nspawn_flags`.
|
||||||
|
pub fn ensure_runtime(&self, name: &str) -> Result<PathBuf> {
|
||||||
|
if name == crate::lifecycle::MANAGER_NAME {
|
||||||
|
let dir = Self::manager_dir();
|
||||||
|
std::fs::create_dir_all(&dir)
|
||||||
|
.with_context(|| format!("create manager dir {}", dir.display()))?;
|
||||||
|
return Ok(dir);
|
||||||
|
}
|
||||||
|
self.register_agent(name)
|
||||||
|
}
|
||||||
|
|
||||||
/// Per-agent state root (parent of `config/`, future `prompts/`, etc.).
|
/// Per-agent state root (parent of `config/`, future `prompts/`, etc.).
|
||||||
pub fn agent_state_root(name: &str) -> PathBuf {
|
pub fn agent_state_root(name: &str) -> PathBuf {
|
||||||
PathBuf::from(format!("{AGENT_STATE_ROOT}/{name}"))
|
PathBuf::from(format!("{AGENT_STATE_ROOT}/{name}"))
|
||||||
|
|
|
||||||
|
|
@ -172,11 +172,7 @@ async fn post_rebuild(State(state): State<AppState>, AxumPath(name): AxumPath<St
|
||||||
"rebuild: hyperhive_flake has no canonical path; manual rebuild only via `hive-c0re rebuild`",
|
"rebuild: hyperhive_flake has no canonical path; manual rebuild only via `hive-c0re rebuild`",
|
||||||
);
|
);
|
||||||
};
|
};
|
||||||
let result = if name == lifecycle::MANAGER_NAME {
|
let result = crate::auto_update::rebuild_agent(&state.coord, &name, ¤t_rev).await;
|
||||||
crate::auto_update::rebuild_manager(¤t_rev).await
|
|
||||||
} else {
|
|
||||||
crate::auto_update::rebuild_agent(&state.coord, &name, ¤t_rev).await
|
|
||||||
};
|
|
||||||
match result {
|
match result {
|
||||||
Ok(()) => Redirect::to("/").into_response(),
|
Ok(()) => Redirect::to("/").into_response(),
|
||||||
Err(e) => error_response(&format!("rebuild {name} failed: {e:#}")),
|
Err(e) => error_response(&format!("rebuild {name} failed: {e:#}")),
|
||||||
|
|
|
||||||
|
|
@ -10,9 +10,15 @@ use tokio::process::Command;
|
||||||
/// name itself can be at most `MAX_AGENT_NAME` chars.
|
/// name itself can be at most `MAX_AGENT_NAME` chars.
|
||||||
pub const AGENT_PREFIX: &str = "h-";
|
pub const AGENT_PREFIX: &str = "h-";
|
||||||
pub const MAX_AGENT_NAME: usize = 9;
|
pub const MAX_AGENT_NAME: usize = 9;
|
||||||
/// Container name of the manager (a separate slot from sub-agents).
|
/// Container name of the manager. Lives in the same path scheme as sub-agents
|
||||||
|
/// (`/var/lib/hyperhive/agents/hm1nd/`, `/var/lib/hyperhive/applied/hm1nd/`),
|
||||||
|
/// but its container has no `h-` prefix and extends a different
|
||||||
|
/// nixosConfiguration (`manager`, not `agent-base`).
|
||||||
pub const MANAGER_NAME: &str = "hm1nd";
|
pub const MANAGER_NAME: &str = "hm1nd";
|
||||||
|
|
||||||
|
/// Web UI port reserved for the manager (sub-agents hash into 8100..8999).
|
||||||
|
pub const MANAGER_PORT: u16 = 8000;
|
||||||
|
|
||||||
/// Mount point of the per-agent runtime directory inside the container.
|
/// Mount point of the per-agent runtime directory inside the container.
|
||||||
pub const CONTAINER_RUNTIME_MOUNT: &str = "/run/hive";
|
pub const CONTAINER_RUNTIME_MOUNT: &str = "/run/hive";
|
||||||
|
|
||||||
|
|
@ -35,9 +41,13 @@ const DEFAULT_MEMORY_MAX: &str = "2G";
|
||||||
const DEFAULT_CPU_QUOTA: &str = "50%";
|
const DEFAULT_CPU_QUOTA: &str = "50%";
|
||||||
|
|
||||||
/// Returns the per-agent web UI port. Same hash on both sides — manager,
|
/// Returns the per-agent web UI port. Same hash on both sides — manager,
|
||||||
/// dashboard, and agent harness all agree.
|
/// dashboard, and agent harness all agree. Manager is fixed at
|
||||||
|
/// `MANAGER_PORT`.
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub fn agent_web_port(name: &str) -> u16 {
|
pub fn agent_web_port(name: &str) -> u16 {
|
||||||
|
if name == MANAGER_NAME {
|
||||||
|
return MANAGER_PORT;
|
||||||
|
}
|
||||||
let mut hash: u32 = 2_166_136_261;
|
let mut hash: u32 = 2_166_136_261;
|
||||||
for b in name.bytes() {
|
for b in name.bytes() {
|
||||||
hash ^= u32::from(b);
|
hash ^= u32::from(b);
|
||||||
|
|
@ -47,14 +57,34 @@ pub fn agent_web_port(name: &str) -> u16 {
|
||||||
WEB_PORT_BASE + u16::try_from(hash % u32::from(WEB_PORT_RANGE)).unwrap_or(0)
|
WEB_PORT_BASE + u16::try_from(hash % u32::from(WEB_PORT_RANGE)).unwrap_or(0)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[must_use]
|
||||||
pub fn container_name(name: &str) -> String {
|
pub fn container_name(name: &str) -> String {
|
||||||
|
if name == MANAGER_NAME {
|
||||||
|
MANAGER_NAME.to_owned()
|
||||||
|
} else {
|
||||||
format!("{AGENT_PREFIX}{name}")
|
format!("{AGENT_PREFIX}{name}")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[must_use]
|
||||||
|
pub fn is_manager(name: &str) -> bool {
|
||||||
|
name == MANAGER_NAME
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The nixosConfiguration in the hyperhive flake the agent's `flake.nix`
|
||||||
|
/// extends. Manager → `manager`; everyone else → `agent-base`.
|
||||||
|
#[must_use]
|
||||||
|
pub fn flake_base(name: &str) -> &'static str {
|
||||||
|
if is_manager(name) { "manager" } else { "agent-base" }
|
||||||
}
|
}
|
||||||
|
|
||||||
fn validate(name: &str) -> Result<()> {
|
fn validate(name: &str) -> Result<()> {
|
||||||
if name.is_empty() {
|
if name.is_empty() {
|
||||||
bail!("agent name must not be empty");
|
bail!("agent name must not be empty");
|
||||||
}
|
}
|
||||||
|
if is_manager(name) {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
if name.len() > MAX_AGENT_NAME {
|
if name.len() > MAX_AGENT_NAME {
|
||||||
bail!(
|
bail!(
|
||||||
"agent name '{name}' is too long ({} chars); max {MAX_AGENT_NAME}",
|
"agent name '{name}' is too long ({} chars); max {MAX_AGENT_NAME}",
|
||||||
|
|
@ -180,14 +210,25 @@ pub async fn setup_applied(applied_dir: &Path, name: &str, hyperhive_flake: &str
|
||||||
.with_context(|| format!("create {}", applied_dir.display()))?;
|
.with_context(|| format!("create {}", applied_dir.display()))?;
|
||||||
|
|
||||||
let port = agent_web_port(name);
|
let port = agent_web_port(name);
|
||||||
|
let base = flake_base(name);
|
||||||
|
let service = if is_manager(name) {
|
||||||
|
"hive-m1nd"
|
||||||
|
} else {
|
||||||
|
"hive-ag3nt"
|
||||||
|
};
|
||||||
|
let description = if is_manager(name) {
|
||||||
|
format!("hyperhive manager {name}")
|
||||||
|
} else {
|
||||||
|
format!("hyperhive sub-agent {name}")
|
||||||
|
};
|
||||||
let flake_body = format!(
|
let flake_body = format!(
|
||||||
r#"{{
|
r#"{{
|
||||||
description = "hyperhive sub-agent {name}";
|
description = "{description}";
|
||||||
inputs.hyperhive.url = "{hyperhive_flake}";
|
inputs.hyperhive.url = "{hyperhive_flake}";
|
||||||
outputs =
|
outputs =
|
||||||
{{ hyperhive, ... }}:
|
{{ hyperhive, ... }}:
|
||||||
{{
|
{{
|
||||||
nixosConfigurations.default = hyperhive.nixosConfigurations.agent-base.extendModules {{
|
nixosConfigurations.default = hyperhive.nixosConfigurations.{base}.extendModules {{
|
||||||
modules = [
|
modules = [
|
||||||
./agent.nix
|
./agent.nix
|
||||||
{{
|
{{
|
||||||
|
|
@ -198,7 +239,7 @@ pub async fn setup_applied(applied_dir: &Path, name: &str, hyperhive_flake: &str
|
||||||
[init]
|
[init]
|
||||||
defaultBranch = main
|
defaultBranch = main
|
||||||
'';
|
'';
|
||||||
systemd.services.hive-ag3nt.environment = {{
|
systemd.services.{service}.environment = {{
|
||||||
HIVE_PORT = "{port}";
|
HIVE_PORT = "{port}";
|
||||||
HIVE_LABEL = "{name}";
|
HIVE_LABEL = "{name}";
|
||||||
}};
|
}};
|
||||||
|
|
@ -372,14 +413,35 @@ async fn systemd_daemon_reload() -> Result<()> {
|
||||||
/// is reachable on the host) and `EXTRA_NSPAWN_FLAGS` (the runtime-dir bind).
|
/// is reachable on the host) and `EXTRA_NSPAWN_FLAGS` (the runtime-dir bind).
|
||||||
/// The start script expands `$EXTRA_NSPAWN_FLAGS` unquoted into the
|
/// The start script expands `$EXTRA_NSPAWN_FLAGS` unquoted into the
|
||||||
/// `systemd-nspawn` command.
|
/// `systemd-nspawn` command.
|
||||||
fn set_nspawn_flags(container: &str, agent_dir: &Path, claude_dir: &Path) -> Result<()> {
|
/// Where in the container's filesystem the manager sees its agents tree.
|
||||||
|
/// Matches the `/agents` path that pre-Phase-8 hosts declared via
|
||||||
|
/// `containers.hm1nd.bindMounts."/agents"`.
|
||||||
|
pub const CONTAINER_MANAGER_AGENTS_MOUNT: &str = "/agents";
|
||||||
|
|
||||||
|
/// The on-host root that gets bind-mounted to `/agents` inside the manager.
|
||||||
|
/// Hard-coded to match `AGENT_STATE_ROOT` in coordinator.rs (kept duplicated
|
||||||
|
/// here so lifecycle stays usable as a leaf module).
|
||||||
|
const HOST_AGENTS_ROOT: &str = "/var/lib/hyperhive/agents";
|
||||||
|
|
||||||
|
fn set_nspawn_flags(container: &str, runtime_dir: &Path, claude_dir: &Path) -> Result<()> {
|
||||||
let path = format!("/etc/nixos-containers/{container}.conf");
|
let path = format!("/etc/nixos-containers/{container}.conf");
|
||||||
let original = std::fs::read_to_string(&path).with_context(|| format!("read {path}"))?;
|
let original = std::fs::read_to_string(&path).with_context(|| format!("read {path}"))?;
|
||||||
let bind_flag = format!(
|
let mut binds = format!(
|
||||||
"EXTRA_NSPAWN_FLAGS=\"--bind={runtime}:{CONTAINER_RUNTIME_MOUNT} --bind={claude}:{CONTAINER_CLAUDE_MOUNT}\"",
|
"--bind={runtime}:{CONTAINER_RUNTIME_MOUNT} --bind={claude}:{CONTAINER_CLAUDE_MOUNT}",
|
||||||
runtime = agent_dir.display(),
|
runtime = runtime_dir.display(),
|
||||||
claude = claude_dir.display(),
|
claude = claude_dir.display(),
|
||||||
);
|
);
|
||||||
|
if container == MANAGER_NAME {
|
||||||
|
// Manager edits sub-agent proposed/ repos and its own. RW so it can
|
||||||
|
// git-commit. Sub-agents see only their own /run/hive socket and
|
||||||
|
// /root/.claude (no /agents).
|
||||||
|
use std::fmt::Write as _;
|
||||||
|
let _ = write!(
|
||||||
|
binds,
|
||||||
|
" --bind={HOST_AGENTS_ROOT}:{CONTAINER_MANAGER_AGENTS_MOUNT}"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
let bind_flag = format!("EXTRA_NSPAWN_FLAGS=\"{binds}\"");
|
||||||
let mut lines: Vec<String> = original
|
let mut lines: Vec<String> = original
|
||||||
.lines()
|
.lines()
|
||||||
.filter(|line| {
|
.filter(|line| {
|
||||||
|
|
|
||||||
|
|
@ -86,16 +86,23 @@ async fn main() -> Result<()> {
|
||||||
dashboard_port,
|
dashboard_port,
|
||||||
} => {
|
} => {
|
||||||
let coord = Arc::new(Coordinator::open(&db, hyperhive_flake)?);
|
let coord = Arc::new(Coordinator::open(&db, hyperhive_flake)?);
|
||||||
// Run auto-update in the background — don't block service start.
|
manager_server::start(coord.clone())?;
|
||||||
// Operators sometimes need the admin socket up to debug a stuck
|
// Auto-create the manager container if it isn't there yet. Block
|
||||||
// agent, and the rebuild loop can take tens of seconds.
|
// on this — without hm1nd the system has no manager harness.
|
||||||
|
// Failures are logged but allowed: a broken auto-spawn shouldn't
|
||||||
|
// make the dashboard unreachable for debugging.
|
||||||
|
if let Err(e) = auto_update::ensure_manager(&coord).await {
|
||||||
|
tracing::warn!(error = ?e, "auto-spawn manager failed");
|
||||||
|
}
|
||||||
|
// Auto-update in the background — don't block service start.
|
||||||
|
// Sub-agent rebuilds can take tens of seconds; we want the admin
|
||||||
|
// socket up immediately.
|
||||||
let update_coord = coord.clone();
|
let update_coord = coord.clone();
|
||||||
tokio::spawn(async move {
|
tokio::spawn(async move {
|
||||||
if let Err(e) = auto_update::run(update_coord).await {
|
if let Err(e) = auto_update::run(update_coord).await {
|
||||||
tracing::warn!(error = ?e, "auto-update task failed");
|
tracing::warn!(error = ?e, "auto-update task failed");
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
manager_server::start(coord.clone())?;
|
|
||||||
let dash_coord = coord.clone();
|
let dash_coord = coord.clone();
|
||||||
tokio::spawn(async move {
|
tokio::spawn(async move {
|
||||||
if let Err(e) = dashboard::serve(dashboard_port, dash_coord).await {
|
if let Err(e) = dashboard::serve(dashboard_port, dash_coord).await {
|
||||||
|
|
|
||||||
|
|
@ -61,7 +61,7 @@ async fn dispatch(req: &HostRequest, coord: Arc<Coordinator>) -> HostResponse {
|
||||||
Ok(match req {
|
Ok(match req {
|
||||||
HostRequest::Spawn { name } => {
|
HostRequest::Spawn { name } => {
|
||||||
tracing::info!(%name, "spawn");
|
tracing::info!(%name, "spawn");
|
||||||
let agent_dir = coord.register_agent(name)?;
|
let agent_dir = coord.ensure_runtime(name)?;
|
||||||
let proposed_dir = Coordinator::agent_proposed_dir(name);
|
let proposed_dir = Coordinator::agent_proposed_dir(name);
|
||||||
let applied_dir = Coordinator::agent_applied_dir(name);
|
let applied_dir = Coordinator::agent_applied_dir(name);
|
||||||
let claude_dir = Coordinator::agent_claude_dir(name);
|
let claude_dir = Coordinator::agent_claude_dir(name);
|
||||||
|
|
@ -101,7 +101,7 @@ async fn dispatch(req: &HostRequest, coord: Arc<Coordinator>) -> HostResponse {
|
||||||
}
|
}
|
||||||
HostRequest::Rebuild { name } => {
|
HostRequest::Rebuild { name } => {
|
||||||
tracing::info!(%name, "rebuild");
|
tracing::info!(%name, "rebuild");
|
||||||
let agent_dir = coord.register_agent(name)?;
|
let agent_dir = coord.ensure_runtime(name)?;
|
||||||
let applied_dir = Coordinator::agent_applied_dir(name);
|
let applied_dir = Coordinator::agent_applied_dir(name);
|
||||||
let claude_dir = Coordinator::agent_claude_dir(name);
|
let claude_dir = Coordinator::agent_claude_dir(name);
|
||||||
lifecycle::rebuild(
|
lifecycle::rebuild(
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue