Compare commits
No commits in common. "d3d52349c3431cb03c0b976fa0e5ec6cfc8aa34d" and "67f948028cae137d4a722ab121fb23460f729736" have entirely different histories.
d3d52349c3
...
67f948028c
13 changed files with 52 additions and 194 deletions
|
|
@ -275,65 +275,13 @@ pub enum TurnState {
|
||||||
Compacting,
|
Compacting,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Default claude model when nothing's been set at runtime. Overridable
|
/// Default claude model when nothing's been set at runtime. The
|
||||||
/// via the `HIVE_DEFAULT_MODEL` env var (set from `hyperhive.model` in
|
/// operator can switch via `/model <name>` in the web terminal; the
|
||||||
/// the container's `agent.nix`). The operator can also switch at runtime
|
/// chosen model lives in `Bus::model` for the rest of the harness
|
||||||
/// via `/model <name>` in the web terminal; the chosen model is persisted
|
/// process's life (resets on restart, by design — operator overrides
|
||||||
/// to the state dir so it survives restarts.
|
/// shouldn't survive accidentally).
|
||||||
pub const DEFAULT_MODEL: &str = "haiku";
|
pub const DEFAULT_MODEL: &str = "haiku";
|
||||||
|
|
||||||
/// Return the initial default model name: `HIVE_DEFAULT_MODEL` env var if
|
|
||||||
/// set to a non-empty string, otherwise `DEFAULT_MODEL`.
|
|
||||||
#[must_use]
|
|
||||||
pub fn default_model() -> &'static str {
|
|
||||||
// Leak once at startup — acceptable for a single config value.
|
|
||||||
std::env::var("HIVE_DEFAULT_MODEL")
|
|
||||||
.ok()
|
|
||||||
.filter(|s| !s.trim().is_empty())
|
|
||||||
.map_or(DEFAULT_MODEL, |s| Box::leak(s.into_boxed_str()))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Context-window size in tokens for a given model name.
|
|
||||||
///
|
|
||||||
/// Canonical per-model sizes are declared in `harness-base.nix` as
|
|
||||||
/// `hyperhive.contextWindowTokens` and injected as
|
|
||||||
/// `HIVE_CONTEXT_WINDOW_TOKENS_<KEY_UPPER>` env vars — so this function
|
|
||||||
/// normally just reads them. The Rust code carries no model knowledge;
|
|
||||||
/// updating model families only requires a Nix change.
|
|
||||||
///
|
|
||||||
/// Resolution order (first match wins):
|
|
||||||
/// 1. `HIVE_CONTEXT_WINDOW_TOKENS_<KEY>` — key (lowercased) is a
|
|
||||||
/// substring of the active model name. Populated by the Nix default
|
|
||||||
/// map for all known families; add/override in `agent.nix`.
|
|
||||||
/// 2. `HIVE_CONTEXT_WINDOW_TOKENS` — single global override (any model).
|
|
||||||
/// 3. Hard fallback: `200_000` (conservative; only hit outside NixOS).
|
|
||||||
#[must_use]
|
|
||||||
pub fn context_window_tokens(model: &str) -> u64 {
|
|
||||||
let m = model.to_ascii_lowercase();
|
|
||||||
// Per-model env vars set by `hyperhive.contextWindowTokens` in Nix.
|
|
||||||
for (key, val) in std::env::vars() {
|
|
||||||
if let Some(suffix) = key.strip_prefix("HIVE_CONTEXT_WINDOW_TOKENS_") {
|
|
||||||
if !suffix.is_empty() && m.contains(&suffix.to_ascii_lowercase()) {
|
|
||||||
if let Ok(v) = val.trim().parse::<u64>() {
|
|
||||||
if v > 0 {
|
|
||||||
return v;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Global override (single value, any model).
|
|
||||||
if let Ok(s) = std::env::var("HIVE_CONTEXT_WINDOW_TOKENS") {
|
|
||||||
if let Ok(v) = s.trim().parse::<u64>() {
|
|
||||||
if v > 0 {
|
|
||||||
return v;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Hard fallback for dev/test outside NixOS where env vars aren't set.
|
|
||||||
200_000
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct Bus {
|
pub struct Bus {
|
||||||
tx: Arc<broadcast::Sender<BusEvent>>,
|
tx: Arc<broadcast::Sender<BusEvent>>,
|
||||||
|
|
@ -403,7 +351,7 @@ impl Bus {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
let (tx, _) = broadcast::channel(CHANNEL_CAPACITY);
|
let (tx, _) = broadcast::channel(CHANNEL_CAPACITY);
|
||||||
let initial_model = load_model().unwrap_or_else(|| default_model().to_owned());
|
let initial_model = load_model().unwrap_or_else(|| DEFAULT_MODEL.to_owned());
|
||||||
// Restore rate_limited from the sentinel file — if the harness
|
// Restore rate_limited from the sentinel file — if the harness
|
||||||
// crashed while parked, we should still show the right status on
|
// crashed while parked, we should still show the right status on
|
||||||
// cold load until the next turn clears it.
|
// cold load until the next turn clears it.
|
||||||
|
|
|
||||||
|
|
@ -54,6 +54,16 @@ const RATE_LIMIT_MARKERS: &[&str] = &[
|
||||||
/// capacity limits.
|
/// capacity limits.
|
||||||
const DEFAULT_RATE_LIMIT_SLEEP_SECS: u64 = 300;
|
const DEFAULT_RATE_LIMIT_SLEEP_SECS: u64 = 300;
|
||||||
|
|
||||||
|
/// Token watermark for *auto session-reset*. When context is at or above this
|
||||||
|
/// many tokens AND the prompt cache has gone cold (idle time >= `CACHE_TTL_SECS`),
|
||||||
|
/// the harness drops `--continue` so the next turn starts fresh. Running any
|
||||||
|
/// turn (even a checkpoint) before the reset would re-upload the full context
|
||||||
|
/// and warm the cache, defeating the cost purpose — so the reset happens
|
||||||
|
/// immediately with no preceding turn. Default is ~50% of a 200k-token
|
||||||
|
/// window; override via `HIVE_AUTO_RESET_WATERMARK_TOKENS`, or set to `0`
|
||||||
|
/// to disable.
|
||||||
|
const DEFAULT_AUTO_RESET_WATERMARK_TOKENS: u64 = 100_000;
|
||||||
|
|
||||||
/// Assumed prompt-cache TTL. Claude caches prompt prefixes — ~5 minutes on
|
/// Assumed prompt-cache TTL. Claude caches prompt prefixes — ~5 minutes on
|
||||||
/// the API (pay-per-token), ~1 hour on Claude Max (subscription). When the
|
/// the API (pay-per-token), ~1 hour on Claude Max (subscription). When the
|
||||||
/// idle gap exceeds this, the cache prefix has likely expired and the next
|
/// idle gap exceeds this, the cache prefix has likely expired and the next
|
||||||
|
|
@ -65,6 +75,19 @@ const DEFAULT_RATE_LIMIT_SLEEP_SECS: u64 = 300;
|
||||||
/// `0` to disable (always resume).
|
/// `0` to disable (always resume).
|
||||||
const DEFAULT_CACHE_TTL_SECS: u64 = 3600;
|
const DEFAULT_CACHE_TTL_SECS: u64 = 3600;
|
||||||
|
|
||||||
|
/// Token watermark for *proactive* compaction. Once a turn finishes with
|
||||||
|
/// the last inference's context size at or above this many tokens,
|
||||||
|
/// `drive_turn` runs one dedicated notes-checkpoint turn (so the agent
|
||||||
|
/// can flush durable state into `/state`) and then `/compact` — while the
|
||||||
|
/// session is still healthy enough to run a turn at all. This is distinct
|
||||||
|
/// from the reactive `PROMPT_TOO_LONG_MARKER` path, which only fires once
|
||||||
|
/// the session is *already* past the window: at that point no turn can
|
||||||
|
/// run on it, so the reactive path just compacts + retries with no
|
||||||
|
/// checkpoint. Default is ~75% of a 200k-token window; override via
|
||||||
|
/// `HIVE_COMPACT_WATERMARK_TOKENS`, or set that to `0` to disable
|
||||||
|
/// proactive compaction entirely (the reactive path always applies).
|
||||||
|
const DEFAULT_COMPACT_WATERMARK_TOKENS: u64 = 150_000;
|
||||||
|
|
||||||
/// Synthetic wake prompt for the proactive notes-checkpoint turn. Not an
|
/// Synthetic wake prompt for the proactive notes-checkpoint turn. Not an
|
||||||
/// inbox message — the harness injects it directly so the agent gets one
|
/// inbox message — the harness injects it directly so the agent gets one
|
||||||
/// turn to persist durable state before `/compact` collapses the
|
/// turn to persist durable state before `/compact` collapses the
|
||||||
|
|
@ -189,19 +212,14 @@ pub fn rate_limit_sleep_secs() -> u64 {
|
||||||
.unwrap_or(DEFAULT_RATE_LIMIT_SLEEP_SECS)
|
.unwrap_or(DEFAULT_RATE_LIMIT_SLEEP_SECS)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Resolve the auto-reset watermark. Priority order:
|
/// Resolve the auto-reset watermark: `HIVE_AUTO_RESET_WATERMARK_TOKENS` if
|
||||||
/// 1. `HIVE_AUTO_RESET_WATERMARK_TOKENS` env var (explicit override).
|
/// set to a valid integer, else `DEFAULT_AUTO_RESET_WATERMARK_TOKENS`. `0`
|
||||||
/// 2. 50% of the model's context window (derived from `bus.model()` +
|
/// disables auto-reset entirely.
|
||||||
/// `events::context_window_tokens`).
|
fn auto_reset_watermark_tokens() -> u64 {
|
||||||
/// `0` disables auto-reset entirely.
|
std::env::var("HIVE_AUTO_RESET_WATERMARK_TOKENS")
|
||||||
fn auto_reset_watermark_tokens(bus: &Bus) -> u64 {
|
|
||||||
if let Some(v) = std::env::var("HIVE_AUTO_RESET_WATERMARK_TOKENS")
|
|
||||||
.ok()
|
.ok()
|
||||||
.and_then(|s| s.trim().parse::<u64>().ok())
|
.and_then(|s| s.trim().parse::<u64>().ok())
|
||||||
{
|
.unwrap_or(DEFAULT_AUTO_RESET_WATERMARK_TOKENS)
|
||||||
return v;
|
|
||||||
}
|
|
||||||
crate::events::context_window_tokens(&bus.model()) / 2
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Resolve the assumed cache TTL: `HIVE_CACHE_TTL_SECS` if set, else
|
/// Resolve the assumed cache TTL: `HIVE_CACHE_TTL_SECS` if set, else
|
||||||
|
|
@ -214,19 +232,14 @@ fn cache_ttl_secs() -> u64 {
|
||||||
.unwrap_or(DEFAULT_CACHE_TTL_SECS)
|
.unwrap_or(DEFAULT_CACHE_TTL_SECS)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Resolve the proactive-compaction watermark. Priority order:
|
/// Resolve the proactive-compaction watermark: `HIVE_COMPACT_WATERMARK_TOKENS`
|
||||||
/// 1. `HIVE_COMPACT_WATERMARK_TOKENS` env var (explicit override).
|
/// if set to a valid integer, else `DEFAULT_COMPACT_WATERMARK_TOKENS`. A
|
||||||
/// 2. 75% of the model's context window (derived from `bus.model()` +
|
/// value of `0` disables proactive compaction.
|
||||||
/// `events::context_window_tokens`).
|
fn compact_watermark_tokens() -> u64 {
|
||||||
/// `0` disables proactive compaction (reactive path still applies).
|
std::env::var("HIVE_COMPACT_WATERMARK_TOKENS")
|
||||||
fn compact_watermark_tokens(bus: &Bus) -> u64 {
|
|
||||||
if let Some(v) = std::env::var("HIVE_COMPACT_WATERMARK_TOKENS")
|
|
||||||
.ok()
|
.ok()
|
||||||
.and_then(|s| s.trim().parse::<u64>().ok())
|
.and_then(|s| s.trim().parse::<u64>().ok())
|
||||||
{
|
.unwrap_or(DEFAULT_COMPACT_WATERMARK_TOKENS)
|
||||||
return v;
|
|
||||||
}
|
|
||||||
crate::events::context_window_tokens(&bus.model()) * 3 / 4
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Drive one turn end-to-end. Three paths layer on top of the raw `run_turn`:
|
/// Drive one turn end-to-end. Three paths layer on top of the raw `run_turn`:
|
||||||
|
|
@ -278,7 +291,7 @@ pub async fn drive_turn(prompt: &str, files: &TurnFiles, bus: &Bus) -> TurnOutco
|
||||||
/// checkpoint or compaction is logged + surfaced as a Note but never
|
/// checkpoint or compaction is logged + surfaced as a Note but never
|
||||||
/// fails the turn that already succeeded.
|
/// fails the turn that already succeeded.
|
||||||
async fn maybe_checkpoint_and_compact(files: &TurnFiles, bus: &Bus) {
|
async fn maybe_checkpoint_and_compact(files: &TurnFiles, bus: &Bus) {
|
||||||
let watermark = compact_watermark_tokens(bus);
|
let watermark = compact_watermark_tokens();
|
||||||
if watermark == 0 {
|
if watermark == 0 {
|
||||||
return; // proactive compaction disabled
|
return; // proactive compaction disabled
|
||||||
}
|
}
|
||||||
|
|
@ -323,7 +336,7 @@ async fn maybe_checkpoint_and_compact(files: &TurnFiles, bus: &Bus) {
|
||||||
/// any turn before the reset would re-upload and re-warm the cache, which
|
/// any turn before the reset would re-upload and re-warm the cache, which
|
||||||
/// defeats the cost-optimisation purpose entirely.
|
/// defeats the cost-optimisation purpose entirely.
|
||||||
fn maybe_auto_reset(bus: &Bus) {
|
fn maybe_auto_reset(bus: &Bus) {
|
||||||
let watermark = auto_reset_watermark_tokens(bus);
|
let watermark = auto_reset_watermark_tokens();
|
||||||
if watermark == 0 {
|
if watermark == 0 {
|
||||||
return; // auto-reset disabled
|
return; // auto-reset disabled
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -357,12 +357,6 @@ struct StateSnapshot {
|
||||||
/// the operator can see what they just switched to (and what's
|
/// the operator can see what they just switched to (and what's
|
||||||
/// in flight). Mutable at runtime via `POST /api/model`.
|
/// in flight). Mutable at runtime via `POST /api/model`.
|
||||||
model: String,
|
model: String,
|
||||||
/// Effective context-window token budget for the current model.
|
|
||||||
/// Derived from `events::context_window_tokens(&model)` — respects
|
|
||||||
/// per-model and global `HIVE_CONTEXT_WINDOW_TOKENS_*` overrides then
|
|
||||||
/// falls back to model-family heuristic. Consumers (e.g. dashboard
|
|
||||||
/// badge) use this to render the ctx-usage percentage.
|
|
||||||
context_window_tokens: u64,
|
|
||||||
/// Last-inference token usage from the most recent completed
|
/// Last-inference token usage from the most recent completed
|
||||||
/// turn — represents the current context-window size at turn-end.
|
/// turn — represents the current context-window size at turn-end.
|
||||||
/// `null` until the first turn finishes.
|
/// `null` until the first turn finishes.
|
||||||
|
|
@ -457,7 +451,6 @@ async fn api_state(State(state): State<AppState>) -> axum::Json<StateSnapshot> {
|
||||||
let inbox = recent_inbox(&state.socket, state.flavor()).await;
|
let inbox = recent_inbox(&state.socket, state.flavor()).await;
|
||||||
let (turn_state, turn_state_since) = state.bus.state_snapshot();
|
let (turn_state, turn_state_since) = state.bus.state_snapshot();
|
||||||
let model = state.bus.model();
|
let model = state.bus.model();
|
||||||
let context_window_tokens = crate::events::context_window_tokens(&model);
|
|
||||||
let ctx_usage = state.bus.last_ctx_usage();
|
let ctx_usage = state.bus.last_ctx_usage();
|
||||||
let cost_usage = state.bus.last_cost_usage();
|
let cost_usage = state.bus.last_cost_usage();
|
||||||
axum::Json(StateSnapshot {
|
axum::Json(StateSnapshot {
|
||||||
|
|
@ -470,7 +463,6 @@ async fn api_state(State(state): State<AppState>) -> axum::Json<StateSnapshot> {
|
||||||
turn_state,
|
turn_state,
|
||||||
turn_state_since,
|
turn_state_since,
|
||||||
model,
|
model,
|
||||||
context_window_tokens,
|
|
||||||
ctx_usage,
|
ctx_usage,
|
||||||
cost_usage,
|
cost_usage,
|
||||||
gui_enabled: state.gui_vnc_port.is_some(),
|
gui_enabled: state.gui_vnc_port.is_some(),
|
||||||
|
|
|
||||||
|
|
@ -91,7 +91,6 @@ pub async fn approve(coord: Arc<Coordinator>, id: i64) -> Result<()> {
|
||||||
¬es_dir,
|
¬es_dir,
|
||||||
coord_bg.dashboard_port,
|
coord_bg.dashboard_port,
|
||||||
&coord_bg.operator_pronouns,
|
&coord_bg.operator_pronouns,
|
||||||
&coord_bg.context_window_tokens,
|
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
drop(guard);
|
drop(guard);
|
||||||
|
|
@ -416,7 +415,6 @@ async fn sync_meta_after_lifecycle(coord: &Coordinator) -> Result<()> {
|
||||||
&coord.hyperhive_flake,
|
&coord.hyperhive_flake,
|
||||||
coord.dashboard_port,
|
coord.dashboard_port,
|
||||||
&coord.operator_pronouns,
|
&coord.operator_pronouns,
|
||||||
&coord.context_window_tokens,
|
|
||||||
&agents,
|
&agents,
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
|
|
|
||||||
|
|
@ -73,7 +73,6 @@ pub async fn rebuild_agent(coord: &Arc<Coordinator>, name: &str, current_rev: &s
|
||||||
¬es_dir,
|
¬es_dir,
|
||||||
coord.dashboard_port,
|
coord.dashboard_port,
|
||||||
&coord.operator_pronouns,
|
&coord.operator_pronouns,
|
||||||
&coord.context_window_tokens,
|
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
drop(guard);
|
drop(guard);
|
||||||
|
|
@ -161,7 +160,6 @@ pub async fn ensure_manager(coord: &Arc<Coordinator>) -> Result<()> {
|
||||||
¬es_dir,
|
¬es_dir,
|
||||||
coord.dashboard_port,
|
coord.dashboard_port,
|
||||||
&coord.operator_pronouns,
|
&coord.operator_pronouns,
|
||||||
&coord.context_window_tokens,
|
|
||||||
)
|
)
|
||||||
.await?;
|
.await?;
|
||||||
if let Some(rev) = current_rev {
|
if let Some(rev) = current_rev {
|
||||||
|
|
|
||||||
|
|
@ -51,13 +51,6 @@ pub struct Coordinator {
|
||||||
/// meta flake); the harness substitutes it into the agent /
|
/// meta flake); the harness substitutes it into the agent /
|
||||||
/// manager system prompt at boot.
|
/// manager system prompt at boot.
|
||||||
pub operator_pronouns: String,
|
pub operator_pronouns: String,
|
||||||
/// Per-model context-window sizes in tokens. Set via the host-level
|
|
||||||
/// `services.hive-c0re.contextWindowTokens` NixOS option; injected
|
|
||||||
/// into each container as `HIVE_CONTEXT_WINDOW_TOKENS_<KEY_UPPER>`
|
|
||||||
/// by the meta flake renderer. The harness uses these to derive
|
|
||||||
/// compaction / auto-reset watermarks and exposes the active value
|
|
||||||
/// on `/api/state` as `context_window_tokens`.
|
|
||||||
pub context_window_tokens: std::collections::HashMap<String, u64>,
|
|
||||||
agents: Mutex<HashMap<String, AgentSocket>>,
|
agents: Mutex<HashMap<String, AgentSocket>>,
|
||||||
/// Agents whose lifecycle action (currently just spawn) is in flight.
|
/// Agents whose lifecycle action (currently just spawn) is in flight.
|
||||||
/// Read by the dashboard to render a spinner; cleared when the action
|
/// Read by the dashboard to render a spinner; cleared when the action
|
||||||
|
|
@ -146,7 +139,6 @@ impl Coordinator {
|
||||||
hyperhive_flake: String,
|
hyperhive_flake: String,
|
||||||
dashboard_port: u16,
|
dashboard_port: u16,
|
||||||
operator_pronouns: String,
|
operator_pronouns: String,
|
||||||
context_window_tokens: std::collections::HashMap<String, u64>,
|
|
||||||
) -> Result<Self> {
|
) -> Result<Self> {
|
||||||
let broker = Broker::open(db_path).context("open broker")?;
|
let broker = Broker::open(db_path).context("open broker")?;
|
||||||
let approvals = Approvals::open(db_path).context("open approvals")?;
|
let approvals = Approvals::open(db_path).context("open approvals")?;
|
||||||
|
|
@ -160,7 +152,6 @@ impl Coordinator {
|
||||||
hyperhive_flake,
|
hyperhive_flake,
|
||||||
dashboard_port,
|
dashboard_port,
|
||||||
operator_pronouns,
|
operator_pronouns,
|
||||||
context_window_tokens,
|
|
||||||
agents: Mutex::new(HashMap::new()),
|
agents: Mutex::new(HashMap::new()),
|
||||||
transient: Mutex::new(HashMap::new()),
|
transient: Mutex::new(HashMap::new()),
|
||||||
dashboard_events,
|
dashboard_events,
|
||||||
|
|
|
||||||
|
|
@ -138,7 +138,6 @@ pub async fn spawn(
|
||||||
notes_dir: &Path,
|
notes_dir: &Path,
|
||||||
dashboard_port: u16,
|
dashboard_port: u16,
|
||||||
operator_pronouns: &str,
|
operator_pronouns: &str,
|
||||||
context_window_tokens: &std::collections::HashMap<String, u64>,
|
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
validate(name)?;
|
validate(name)?;
|
||||||
if let Some(other) = port_collision(name).await {
|
if let Some(other) = port_collision(name).await {
|
||||||
|
|
@ -155,7 +154,7 @@ pub async fn spawn(
|
||||||
// before `nixos-container create` so the `--flake meta#<name>`
|
// before `nixos-container create` so the `--flake meta#<name>`
|
||||||
// ref resolves.
|
// ref resolves.
|
||||||
let agents = agents_after_spawn(name).await?;
|
let agents = agents_after_spawn(name).await?;
|
||||||
crate::meta::sync_agents(hyperhive_flake, dashboard_port, operator_pronouns, context_window_tokens, &agents).await?;
|
crate::meta::sync_agents(hyperhive_flake, dashboard_port, operator_pronouns, &agents).await?;
|
||||||
let container = container_name(name);
|
let container = container_name(name);
|
||||||
let flake_ref = format!("{}#{name}", crate::meta::meta_dir().display());
|
let flake_ref = format!("{}#{name}", crate::meta::meta_dir().display());
|
||||||
run(&["create", &container, "--flake", &flake_ref]).await?;
|
run(&["create", &container, "--flake", &flake_ref]).await?;
|
||||||
|
|
@ -274,7 +273,6 @@ pub async fn rebuild(
|
||||||
notes_dir: &Path,
|
notes_dir: &Path,
|
||||||
dashboard_port: u16,
|
dashboard_port: u16,
|
||||||
operator_pronouns: &str,
|
operator_pronouns: &str,
|
||||||
context_window_tokens: &std::collections::HashMap<String, u64>,
|
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
// Sync the meta flake (idempotent — no-op when the rendered
|
// Sync the meta flake (idempotent — no-op when the rendered
|
||||||
// flake matches disk) so a manual rebuild from the dashboard
|
// flake matches disk) so a manual rebuild from the dashboard
|
||||||
|
|
@ -282,7 +280,7 @@ pub async fn rebuild(
|
||||||
// got added directly via `nixos-container create` outside
|
// got added directly via `nixos-container create` outside
|
||||||
// hive-c0re).
|
// hive-c0re).
|
||||||
let agents = agents_for_meta(None).await?;
|
let agents = agents_for_meta(None).await?;
|
||||||
crate::meta::sync_agents(hyperhive_flake, dashboard_port, operator_pronouns, context_window_tokens, &agents).await?;
|
crate::meta::sync_agents(hyperhive_flake, dashboard_port, operator_pronouns, &agents).await?;
|
||||||
// Then bump just this agent's input — picks up whatever
|
// Then bump just this agent's input — picks up whatever
|
||||||
// `applied/<n>/main` currently points at (deployed/<latest>).
|
// `applied/<n>/main` currently points at (deployed/<latest>).
|
||||||
// Commits the lock if it changed.
|
// Commits the lock if it changed.
|
||||||
|
|
|
||||||
|
|
@ -62,12 +62,6 @@ enum Cmd {
|
||||||
/// system prompt can mention them. Default: `she/her`.
|
/// system prompt can mention them. Default: `she/her`.
|
||||||
#[arg(long, default_value = "she/her")]
|
#[arg(long, default_value = "she/her")]
|
||||||
operator_pronouns: String,
|
operator_pronouns: String,
|
||||||
/// Per-model context-window sizes, as JSON object mapping model-family
|
|
||||||
/// short name to token count. Threaded into each container as
|
|
||||||
/// `HIVE_CONTEXT_WINDOW_TOKENS_<KEY_UPPER>` env vars. Set via the
|
|
||||||
/// `services.hive-c0re.contextWindowTokens` NixOS option.
|
|
||||||
#[arg(long, default_value = r#"{"haiku":200000,"sonnet":1000000,"opus":1000000}"#)]
|
|
||||||
context_window_tokens: String,
|
|
||||||
},
|
},
|
||||||
/// Spawn a new agent container directly (`hive-agent-<name>`). Bypasses
|
/// Spawn a new agent container directly (`hive-agent-<name>`). Bypasses
|
||||||
/// the approval queue — use only as an operator on the host. For
|
/// the approval queue — use only as an operator on the host. For
|
||||||
|
|
@ -115,17 +109,12 @@ async fn main() -> Result<()> {
|
||||||
db,
|
db,
|
||||||
dashboard_port,
|
dashboard_port,
|
||||||
operator_pronouns,
|
operator_pronouns,
|
||||||
context_window_tokens,
|
|
||||||
} => {
|
} => {
|
||||||
let cwt: std::collections::HashMap<String, u64> =
|
|
||||||
serde_json::from_str(&context_window_tokens)
|
|
||||||
.context("--context-window-tokens: invalid JSON")?;
|
|
||||||
let coord = Arc::new(Coordinator::open(
|
let coord = Arc::new(Coordinator::open(
|
||||||
&db,
|
&db,
|
||||||
hyperhive_flake,
|
hyperhive_flake,
|
||||||
dashboard_port,
|
dashboard_port,
|
||||||
operator_pronouns,
|
operator_pronouns,
|
||||||
cwt,
|
|
||||||
)?);
|
)?);
|
||||||
manager_server::start(coord.clone())?;
|
manager_server::start(coord.clone())?;
|
||||||
// Idempotent pre-flight: rewrite pre-meta-layout applied
|
// Idempotent pre-flight: rewrite pre-meta-layout applied
|
||||||
|
|
|
||||||
|
|
@ -66,14 +66,13 @@ pub async fn sync_agents(
|
||||||
hyperhive_flake: &str,
|
hyperhive_flake: &str,
|
||||||
dashboard_port: u16,
|
dashboard_port: u16,
|
||||||
operator_pronouns: &str,
|
operator_pronouns: &str,
|
||||||
context_window_tokens: &std::collections::HashMap<String, u64>,
|
|
||||||
agents: &[AgentSpec],
|
agents: &[AgentSpec],
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let _guard = META_LOCK.lock().await;
|
let _guard = META_LOCK.lock().await;
|
||||||
let dir = meta_dir();
|
let dir = meta_dir();
|
||||||
std::fs::create_dir_all(&dir).with_context(|| format!("create {}", dir.display()))?;
|
std::fs::create_dir_all(&dir).with_context(|| format!("create {}", dir.display()))?;
|
||||||
|
|
||||||
let new_flake = render_flake(hyperhive_flake, dashboard_port, operator_pronouns, context_window_tokens, agents);
|
let new_flake = render_flake(hyperhive_flake, dashboard_port, operator_pronouns, agents);
|
||||||
let flake_path = dir.join("flake.nix");
|
let flake_path = dir.join("flake.nix");
|
||||||
let on_disk = std::fs::read_to_string(&flake_path).unwrap_or_default();
|
let on_disk = std::fs::read_to_string(&flake_path).unwrap_or_default();
|
||||||
let initial = !dir.join(".git").exists();
|
let initial = !dir.join(".git").exists();
|
||||||
|
|
@ -236,7 +235,6 @@ fn render_flake(
|
||||||
hyperhive_flake: &str,
|
hyperhive_flake: &str,
|
||||||
dashboard_port: u16,
|
dashboard_port: u16,
|
||||||
operator_pronouns: &str,
|
operator_pronouns: &str,
|
||||||
context_window_tokens: &std::collections::HashMap<String, u64>,
|
|
||||||
agents: &[AgentSpec],
|
agents: &[AgentSpec],
|
||||||
) -> String {
|
) -> String {
|
||||||
use std::fmt::Write as _;
|
use std::fmt::Write as _;
|
||||||
|
|
@ -285,19 +283,8 @@ fn render_flake(
|
||||||
HIVE_PORT = toString port;
|
HIVE_PORT = toString port;
|
||||||
HIVE_LABEL = name;
|
HIVE_LABEL = name;
|
||||||
HIVE_DASHBOARD_PORT = toString dashboardPort;
|
HIVE_DASHBOARD_PORT = toString dashboardPort;
|
||||||
HIVE_OPERATOR_PRONOUNS = operatorPronouns;"#,
|
HIVE_OPERATOR_PRONOUNS = operatorPronouns;
|
||||||
);
|
HYPERHIVE_STATE_DIR = "/agents/${name}/state";
|
||||||
// Per-model context-window env vars declared in the host-level
|
|
||||||
// `services.hive-c0re.contextWindowTokens` option. Use a sorted
|
|
||||||
// iterator for deterministic flake output (no spurious git diffs).
|
|
||||||
let mut sorted_tokens: Vec<(&String, &u64)> = context_window_tokens.iter().collect();
|
|
||||||
sorted_tokens.sort_by_key(|(k, _)| k.as_str());
|
|
||||||
for (key, val) in &sorted_tokens {
|
|
||||||
let upper_key = key.to_ascii_uppercase();
|
|
||||||
let _ = writeln!(out, " HIVE_CONTEXT_WINDOW_TOKENS_{upper_key} = \"{val}\";");
|
|
||||||
}
|
|
||||||
out.push_str(
|
|
||||||
r#" HYPERHIVE_STATE_DIR = "/agents/${name}/state";
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
];
|
];
|
||||||
|
|
|
||||||
|
|
@ -83,7 +83,6 @@ pub async fn run(coord: &Arc<Coordinator>) -> Result<()> {
|
||||||
&coord.hyperhive_flake,
|
&coord.hyperhive_flake,
|
||||||
coord.dashboard_port,
|
coord.dashboard_port,
|
||||||
&coord.operator_pronouns,
|
&coord.operator_pronouns,
|
||||||
&coord.context_window_tokens,
|
|
||||||
&agents,
|
&agents,
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
|
|
|
||||||
|
|
@ -77,7 +77,6 @@ async fn dispatch(req: &HostRequest, coord: Arc<Coordinator>) -> HostResponse {
|
||||||
¬es_dir,
|
¬es_dir,
|
||||||
coord.dashboard_port,
|
coord.dashboard_port,
|
||||||
&coord.operator_pronouns,
|
&coord.operator_pronouns,
|
||||||
&coord.context_window_tokens,
|
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
|
|
@ -140,7 +139,6 @@ async fn dispatch(req: &HostRequest, coord: Arc<Coordinator>) -> HostResponse {
|
||||||
¬es_dir,
|
¬es_dir,
|
||||||
coord.dashboard_port,
|
coord.dashboard_port,
|
||||||
&coord.operator_pronouns,
|
&coord.operator_pronouns,
|
||||||
&coord.context_window_tokens,
|
|
||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
// Mirror auto_update::rebuild_agent — the manager wants
|
// Mirror auto_update::rebuild_agent — the manager wants
|
||||||
|
|
|
||||||
|
|
@ -57,31 +57,6 @@ in
|
||||||
approval needed.
|
approval needed.
|
||||||
'';
|
'';
|
||||||
};
|
};
|
||||||
contextWindowTokens = lib.mkOption {
|
|
||||||
type = lib.types.attrsOf lib.types.int;
|
|
||||||
default = {
|
|
||||||
haiku = 200000;
|
|
||||||
sonnet = 1000000;
|
|
||||||
opus = 1000000;
|
|
||||||
};
|
|
||||||
example = {
|
|
||||||
haiku = 150000;
|
|
||||||
sonnet = 900000;
|
|
||||||
};
|
|
||||||
description = ''
|
|
||||||
Per-model context-window sizes in tokens. Each key is a
|
|
||||||
model-family short name matched case-insensitively as a
|
|
||||||
substring of the active model name at runtime (e.g. `"sonnet"`
|
|
||||||
matches `"claude-sonnet-4-5"`). The defaults cover the known
|
|
||||||
Anthropic families; add entries for new models or override
|
|
||||||
existing ones here to change the window for all agents at once.
|
|
||||||
|
|
||||||
Passed to `hive-c0re serve` as JSON and injected into every
|
|
||||||
container's harness service environment as
|
|
||||||
`HIVE_CONTEXT_WINDOW_TOKENS_<KEY_UPPER>`. Changes propagate
|
|
||||||
on the next `↻ R3BU1LD` — no per-agent approval needed.
|
|
||||||
'';
|
|
||||||
};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
config = lib.mkIf cfg.enable {
|
config = lib.mkIf cfg.enable {
|
||||||
|
|
@ -114,7 +89,7 @@ in
|
||||||
];
|
];
|
||||||
environment.HYPERHIVE_GIT = "${pkgs.git}/bin/git";
|
environment.HYPERHIVE_GIT = "${pkgs.git}/bin/git";
|
||||||
serviceConfig = {
|
serviceConfig = {
|
||||||
ExecStart = "${cfg.package}/bin/hive-c0re --socket /run/hyperhive/host.sock serve --hyperhive-flake ${cfg.hyperhiveFlake} --dashboard-port ${toString cfg.dashboardPort} --operator-pronouns ${lib.escapeShellArg cfg.operatorPronouns} --context-window-tokens ${lib.escapeShellArg (builtins.toJSON cfg.contextWindowTokens)}";
|
ExecStart = "${cfg.package}/bin/hive-c0re --socket /run/hyperhive/host.sock serve --hyperhive-flake ${cfg.hyperhiveFlake} --dashboard-port ${toString cfg.dashboardPort} --operator-pronouns ${lib.escapeShellArg cfg.operatorPronouns}";
|
||||||
Restart = "on-failure";
|
Restart = "on-failure";
|
||||||
RestartSec = 2;
|
RestartSec = 2;
|
||||||
RuntimeDirectory = "hyperhive";
|
RuntimeDirectory = "hyperhive";
|
||||||
|
|
|
||||||
|
|
@ -15,27 +15,6 @@
|
||||||
# only opts in from its own `agent.nix`.
|
# only opts in from its own `agent.nix`.
|
||||||
imports = [ ./weston-vnc.nix ];
|
imports = [ ./weston-vnc.nix ];
|
||||||
|
|
||||||
options.hyperhive.model = lib.mkOption {
|
|
||||||
type = lib.types.str;
|
|
||||||
default = "haiku";
|
|
||||||
example = "sonnet";
|
|
||||||
description = ''
|
|
||||||
Default claude model for this agent. Sets the `HIVE_DEFAULT_MODEL`
|
|
||||||
environment variable consumed by the harness at boot; if no
|
|
||||||
persisted model choice exists in the agent's state dir the harness
|
|
||||||
falls back to this value. The operator can still switch the model at
|
|
||||||
runtime via the per-agent web UI — that choice is persisted to the
|
|
||||||
state dir and takes precedence over this default until the agent is
|
|
||||||
purged.
|
|
||||||
|
|
||||||
Valid values are the short model names that `claude --model` accepts:
|
|
||||||
`"haiku"`, `"sonnet"`, `"opus"` (or any future identifier). The
|
|
||||||
harness derives sensible watermarks from the model family:
|
|
||||||
haiku → 200 000 token window; sonnet / opus → 1 000 000 token window.
|
|
||||||
Override the derived window via `hyperhive.contextWindowTokens`.
|
|
||||||
'';
|
|
||||||
};
|
|
||||||
|
|
||||||
options.hyperhive.allowedBashPatterns = lib.mkOption {
|
options.hyperhive.allowedBashPatterns = lib.mkOption {
|
||||||
type = lib.types.listOf lib.types.str;
|
type = lib.types.listOf lib.types.str;
|
||||||
default = [ ];
|
default = [ ];
|
||||||
|
|
@ -229,16 +208,6 @@
|
||||||
environment.etc."hyperhive/claude-plugins-auto-update.json".text =
|
environment.etc."hyperhive/claude-plugins-auto-update.json".text =
|
||||||
builtins.toJSON config.hyperhive.claudePluginsAutoUpdate;
|
builtins.toJSON config.hyperhive.claudePluginsAutoUpdate;
|
||||||
|
|
||||||
# HIVE_DEFAULT_MODEL seeds the initial model selection when no persisted
|
|
||||||
# model choice exists in the state dir. SHELL must be set so claude's
|
|
||||||
# Bash tool finds a POSIX shell.
|
|
||||||
# HIVE_CONTEXT_WINDOW_TOKENS_* are injected by the meta flake from the
|
|
||||||
# host-level `services.hive-c0re.contextWindowTokens` option — not set here.
|
|
||||||
environment.variables = {
|
|
||||||
HIVE_DEFAULT_MODEL = config.hyperhive.model;
|
|
||||||
SHELL = "${pkgs.bashInteractive}/bin/bash";
|
|
||||||
};
|
|
||||||
|
|
||||||
boot.isNspawnContainer = true;
|
boot.isNspawnContainer = true;
|
||||||
|
|
||||||
# Every agent gets flakes + the modern `nix` CLI out of the box.
|
# Every agent gets flakes + the modern `nix` CLI out of the box.
|
||||||
|
|
@ -338,6 +307,9 @@
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
# claude's Bash tool refuses to run without a POSIX shell + $SHELL set.
|
||||||
|
environment.variables.SHELL = "${pkgs.bashInteractive}/bin/bash";
|
||||||
|
|
||||||
system.stateVersion = "25.11";
|
system.stateVersion = "25.11";
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue