hyperhive/hive-sh4re/src/lib.rs
2026-05-15 23:11:10 +02:00

444 lines
17 KiB
Rust

//! Wire types shared between `hive-c0re` and the in-container harness.
use serde::{Deserialize, Serialize};
// -----------------------------------------------------------------------------
// Host admin socket — /run/hyperhive/host.sock
// -----------------------------------------------------------------------------
/// Requests on the host admin socket.
///
/// Wire format: one JSON object per line.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "cmd", rename_all = "snake_case")]
pub enum HostRequest {
/// Create and start a sub-agent container directly (no approval). Use
/// this from privileged contexts (operator on the host); it bypasses the
/// approval queue intentionally so test scripts and one-off recoveries
/// don't need a separate approve step.
Spawn { name: String },
/// Submit a spawn request for the user to approve. On approval the host
/// creates and starts the container. Mirrors the manager's
/// `RequestSpawn` — exposed on the admin socket so the dashboard and CLI
/// can also queue spawns through the approval flow.
RequestSpawn { name: String },
/// Stop a managed container (graceful).
Kill { name: String },
/// Tear down a sub-agent container: stop + remove + drop the systemd
/// drop-in, purge pending approvals. Persistent state (proposed/applied
/// repos, Claude credentials) is KEPT by default — recreating the agent
/// with the same name reuses prior config + login. With `purge=true`
/// the agent's `/var/lib/hyperhive/{agents,applied}/<name>/` trees are
/// also wiped (config history + creds + notes gone forever). Manager
/// not destroyable.
Destroy {
name: String,
#[serde(default)]
purge: bool,
},
/// Apply pending config to a managed container.
Rebuild { name: String },
/// List managed containers.
List,
/// List pending approval requests.
Pending,
/// Approve a pending request by id; the action runs immediately.
Approve { id: i64 },
/// Deny a pending request by id.
Deny { id: i64 },
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HostResponse {
pub ok: bool,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub error: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub agents: Option<Vec<String>>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub approvals: Option<Vec<Approval>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Approval {
pub id: i64,
pub agent: String,
#[serde(default)]
pub kind: ApprovalKind,
/// For `ApplyCommit`: the git sha the manager submitted. For `Spawn`:
/// empty. Note that this is the manager's *claimed* ref — the
/// canonical, hive-c0re-vouched sha after the proposal fetch lives
/// in `fetched_sha`.
pub commit_ref: String,
/// The sha hive-c0re fetched from the proposed repo into applied at
/// submission time, then tagged `proposal/<id>`. Stable for the
/// lifetime of the approval — manager amends in proposed don't
/// change what gets built. Only set for `ApplyCommit` after the
/// successful fetch.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub fetched_sha: Option<String>,
pub requested_at: i64,
pub status: ApprovalStatus,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub resolved_at: Option<i64>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub note: Option<String>,
}
/// What action the approval, when granted, will trigger.
#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum ApprovalKind {
/// Apply a manager-proposed config commit (existing flow).
#[default]
ApplyCommit,
/// Create + start a new sub-agent container with the given name.
Spawn,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
pub enum ApprovalStatus {
Pending,
Approved,
Denied,
Failed,
}
impl HostResponse {
pub fn success() -> Self {
Self {
ok: true,
error: None,
agents: None,
approvals: None,
}
}
pub fn error(message: impl Into<String>) -> Self {
Self {
ok: false,
error: Some(message.into()),
agents: None,
approvals: None,
}
}
pub fn list(agents: Vec<String>) -> Self {
Self {
ok: true,
error: None,
agents: Some(agents),
approvals: None,
}
}
pub fn pending(approvals: Vec<Approval>) -> Self {
Self {
ok: true,
error: None,
agents: None,
approvals: Some(approvals),
}
}
}
// -----------------------------------------------------------------------------
// Per-agent socket — /run/hyperhive/agents/<name>/mcp.sock on the host,
// bind-mounted into the container at /run/hive/mcp.sock.
// -----------------------------------------------------------------------------
/// A logical message between agents.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Message {
pub from: String,
pub to: String,
pub body: String,
}
/// One row of a broker inbox query — what the dashboard renders in
/// its operator-inbox section and what a per-agent web UI returns
/// from a `Recent` request. Lives in `hive_sh4re` so it can travel
/// over both the dashboard's `/api/state` and the agent socket
/// without an internal-to-wire conversion.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct InboxRow {
pub id: i64,
pub from: String,
pub body: String,
pub at: i64,
}
/// Requests on a per-agent socket. The agent's identity is the socket
/// it came in on; `Send.from` is filled in by the server, not the client.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "cmd", rename_all = "snake_case")]
pub enum AgentRequest {
/// Send a message to another agent.
Send { to: String, body: String },
/// Pop one pending message from this agent's inbox. Long-polls
/// up to `wait_seconds` (capped at 60s server-side, default 30s
/// when None) before returning `Empty`.
Recv {
#[serde(default)]
wait_seconds: Option<u64>,
},
/// Non-mutating: how many pending messages are addressed to me?
/// Used by the harness to render a status line after each tool call.
Status,
/// Operator-injected message TO this agent (from this agent's own web
/// UI). Recipient is implicit — `from` is `"operator"`. Effectively the
/// per-agent equivalent of the old dashboard T4LK form, but scoped to
/// the agent whose page the operator is on.
OperatorMsg { body: String },
/// Last `limit` messages addressed to this agent, newest-first.
/// Non-mutating — pulls from the broker without delivering. The
/// per-agent web UI uses this to render its own inbox section.
Recent { limit: u64 },
}
/// Responses on a per-agent socket.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "kind", rename_all = "snake_case")]
pub enum AgentResponse {
/// `Send` succeeded.
Ok,
/// Either `Send` failed or `Recv` errored.
Err { message: String },
/// `Recv` produced a message.
Message { from: String, body: String },
/// `Recv` found nothing pending.
Empty,
/// `Status` result: how many pending messages are in this agent's inbox.
Status { unread: u64 },
/// `Recent` result: newest-first inbox rows.
Recent { rows: Vec<InboxRow> },
}
// -----------------------------------------------------------------------------
// Manager socket — /run/hyperhive/manager/mcp.sock on the host, bind-mounted
// into the manager container at /run/hive/mcp.sock.
// -----------------------------------------------------------------------------
/// Logical name the broker uses for the manager.
pub const MANAGER_AGENT: &str = "manager";
/// Logical name the broker uses for the human operator. Messages with
/// `to = OPERATOR_RECIPIENT` accumulate in sqlite and surface on the
/// dashboard's inbox view — they are never `recv`'d by an agent harness.
pub const OPERATOR_RECIPIENT: &str = "operator";
/// Sender hive-c0re uses for events it pushes into the manager's inbox.
/// Manager harness recognises this and parses the body as a `HelperEvent`.
pub const SYSTEM_SENDER: &str = "system";
/// Out-of-band events the host-side daemon pushes to the manager's inbox.
/// Serialised as JSON in `Message::body` (sender = `SYSTEM_SENDER`).
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "event", rename_all = "snake_case")]
pub enum HelperEvent {
/// An approval was approved/denied/failed; if approved, the underlying
/// action (rebuild or spawn) has already run by the time this lands.
ApprovalResolved {
id: i64,
agent: String,
commit_ref: String,
status: ApprovalStatus,
#[serde(default, skip_serializing_if = "Option::is_none")]
note: Option<String>,
/// Canonical sha hive-c0re fetched into applied at submission
/// time. `git show <sha>` against `/agents/<n>/applied.git`
/// inside the manager container yields the exact tree being
/// referenced.
#[serde(default, skip_serializing_if = "Option::is_none")]
sha: Option<String>,
/// Terminal tag name in the applied repo for this approval —
/// `deployed/<id>`, `failed/<id>`, or `denied/<id>` (and
/// `approved/<id>` for the rare bare-approval case where
/// no underlying action runs).
#[serde(default, skip_serializing_if = "Option::is_none")]
tag: Option<String>,
},
/// A new container was spawned (post-approval or via the admin CLI
/// bypass path). `ok=false` means the spawn failed.
Spawned {
agent: String,
ok: bool,
#[serde(default, skip_serializing_if = "Option::is_none")]
note: Option<String>,
/// Sha of the `deployed/0` commit seeded by hive-c0re on
/// first spawn (Some on success, None on failure).
#[serde(default, skip_serializing_if = "Option::is_none")]
sha: Option<String>,
},
/// A container was rebuilt (auto-update on flake rev change, or a
/// manual rebuild from CLI/dashboard).
Rebuilt {
agent: String,
ok: bool,
#[serde(default, skip_serializing_if = "Option::is_none")]
note: Option<String>,
/// Sha that ended up at `deployed/<id>` on success, or the
/// proposal sha that just got tagged `failed/<id>` on
/// failure. None for the (rare) rebuild path that doesn't go
/// through an approval (e.g. `auto_update::rebuild_agent`
/// reapplying the existing main).
#[serde(default, skip_serializing_if = "Option::is_none")]
sha: Option<String>,
/// `deployed/<id>` or `failed/<id>` for approval-driven
/// rebuilds; None for auto-update / dashboard rebuilds that
/// don't change the deployed commit.
#[serde(default, skip_serializing_if = "Option::is_none")]
tag: Option<String>,
},
/// A sub-agent's container was stopped (the systemd unit is down;
/// persistent state is unchanged).
Killed { agent: String },
/// A sub-agent's container was torn down (container removed; state
/// dirs preserved per `destroy` semantics).
Destroyed { agent: String },
/// A sub-agent's container has no claude session yet (first
/// spawn, or `--purge` wiped creds). Manager can't do anything
/// about it directly — login is interactive OAuth — but it
/// surfaces so the manager knows the agent is in partial-run
/// mode and can flag the operator.
NeedsLogin { agent: String },
/// An agent successfully completed claude login — the session
/// dir now contains creds. Transition fires once per login.
LoggedIn { agent: String },
/// An agent's recorded flake rev is stale relative to the
/// current hyperhive rev. The manager has the `update` tool to
/// trigger a rebuild without operator approval (it's a no-op
/// when nothing actually changed).
NeedsUpdate { agent: String },
/// Container exited without an operator-initiated stop. Fired by
/// the crash watcher when an agent's container transitions from
/// running → stopped and no `Stopping` / `Restarting` /
/// `Destroying` transient was set, so the operator (or the
/// manager) knows it crashed rather than was killed on purpose.
ContainerCrash {
agent: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
note: Option<String>,
},
/// The operator answered a question that was queued via
/// `AskOperator`. `id` matches the `QuestionQueued.id` returned to the
/// asker; `question` echoes the original prompt so the manager can
/// stitch the answer back to context across compactions.
OperatorAnswered {
id: i64,
question: String,
answer: String,
},
}
/// Requests on the manager socket. Manager has the agent surface (send/recv)
/// plus privileged lifecycle verbs.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "cmd", rename_all = "snake_case")]
pub enum ManagerRequest {
Send {
to: String,
body: String,
},
/// Same shape as `AgentRequest::Recv` — caller-tunable long-poll
/// duration, capped at 60s server-side, default 30s when None.
Recv {
#[serde(default)]
wait_seconds: Option<u64>,
},
/// Non-mutating: pending message count, used to render a status line
/// after each MCP tool call (mirrors `AgentRequest::Status`).
Status,
/// Operator-injected message TO the manager (from the manager's own web
/// UI). Same shape as `AgentRequest::OperatorMsg`.
OperatorMsg {
body: String,
},
/// Last `limit` messages addressed to the manager, newest-first.
/// Non-mutating; mirror of `AgentRequest::Recent`.
Recent {
limit: u64,
},
/// Submit a spawn request for the user to approve. On approval the host
/// creates and starts the container. Brand-new agent names only — if an
/// agent of the same name already exists, the approval will fail.
RequestSpawn {
name: String,
},
/// Stop a sub-agent (graceful).
Kill {
name: String,
},
/// Start a previously-stopped sub-agent container.
Start {
name: String,
},
/// Restart a sub-agent container (stop + start).
Restart {
name: String,
},
/// Rebuild a sub-agent: re-applies the current hyperhive flake +
/// agent.nix, restarts the container. No approval required —
/// it's idempotent and the manager owns its own update cadence.
Update {
name: String,
},
/// Submit a config commit for the user to approve. `commit_ref` is opaque
/// to the host (typically a git sha pointing into the agent's config repo).
/// On approval the host applies the change via `nixos-container update`.
RequestApplyCommit {
agent: String,
commit_ref: String,
},
/// Ask the operator a question. Returns immediately with the queued
/// question id; the operator's answer arrives later as a
/// `HelperEvent::OperatorAnswered` in the manager inbox.
///
/// - `options` is advisory: empty = free-text only; non-empty = the
/// dashboard renders the choices alongside a free-text fallback
/// ("Other…") so the operator is never trapped.
/// - `multi=true` lets the operator pick multiple options (rendered
/// as checkboxes). The answer is returned as a single string with
/// selections joined by ", ".
/// - `ttl_seconds`: optional auto-cancel after that many seconds. On
/// expiry the question is resolved with answer `[expired]` and the
/// manager gets the usual `OperatorAnswered` event. None = wait
/// forever for an operator answer (or manual cancel).
AskOperator {
question: String,
#[serde(default)]
options: Vec<String>,
#[serde(default)]
multi: bool,
#[serde(default)]
ttl_seconds: Option<u64>,
},
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "kind", rename_all = "snake_case")]
pub enum ManagerResponse {
Ok,
Err {
message: String,
},
Message {
from: String,
body: String,
},
Empty,
Status {
unread: u64,
},
/// Result of `AskOperator`: the queued question id. The actual answer
/// arrives later as a `HelperEvent::OperatorAnswered` in the manager
/// inbox, so this returns immediately rather than blocking the turn.
QuestionQueued {
id: i64,
},
/// `Recent` result: mirror of `AgentResponse::Recent`.
Recent {
rows: Vec<InboxRow>,
},
}