Compare commits

..

4 commits

13 changed files with 194 additions and 52 deletions

View file

@ -275,13 +275,65 @@ pub enum TurnState {
Compacting, Compacting,
} }
/// Default claude model when nothing's been set at runtime. The /// Default claude model when nothing's been set at runtime. Overridable
/// operator can switch via `/model <name>` in the web terminal; the /// via the `HIVE_DEFAULT_MODEL` env var (set from `hyperhive.model` in
/// chosen model lives in `Bus::model` for the rest of the harness /// the container's `agent.nix`). The operator can also switch at runtime
/// process's life (resets on restart, by design — operator overrides /// via `/model <name>` in the web terminal; the chosen model is persisted
/// shouldn't survive accidentally). /// to the state dir so it survives restarts.
pub const DEFAULT_MODEL: &str = "haiku"; pub const DEFAULT_MODEL: &str = "haiku";
/// Return the initial default model name: `HIVE_DEFAULT_MODEL` env var if
/// set to a non-empty string, otherwise `DEFAULT_MODEL`.
#[must_use]
pub fn default_model() -> &'static str {
// Leak once at startup — acceptable for a single config value.
std::env::var("HIVE_DEFAULT_MODEL")
.ok()
.filter(|s| !s.trim().is_empty())
.map_or(DEFAULT_MODEL, |s| Box::leak(s.into_boxed_str()))
}
/// Context-window size in tokens for a given model name.
///
/// Canonical per-model sizes are declared in `harness-base.nix` as
/// `hyperhive.contextWindowTokens` and injected as
/// `HIVE_CONTEXT_WINDOW_TOKENS_<KEY_UPPER>` env vars — so this function
/// normally just reads them. The Rust code carries no model knowledge;
/// updating model families only requires a Nix change.
///
/// Resolution order (first match wins):
/// 1. `HIVE_CONTEXT_WINDOW_TOKENS_<KEY>` — key (lowercased) is a
/// substring of the active model name. Populated by the Nix default
/// map for all known families; add/override in `agent.nix`.
/// 2. `HIVE_CONTEXT_WINDOW_TOKENS` — single global override (any model).
/// 3. Hard fallback: `200_000` (conservative; only hit outside NixOS).
#[must_use]
pub fn context_window_tokens(model: &str) -> u64 {
let m = model.to_ascii_lowercase();
// Per-model env vars set by `hyperhive.contextWindowTokens` in Nix.
for (key, val) in std::env::vars() {
if let Some(suffix) = key.strip_prefix("HIVE_CONTEXT_WINDOW_TOKENS_") {
if !suffix.is_empty() && m.contains(&suffix.to_ascii_lowercase()) {
if let Ok(v) = val.trim().parse::<u64>() {
if v > 0 {
return v;
}
}
}
}
}
// Global override (single value, any model).
if let Ok(s) = std::env::var("HIVE_CONTEXT_WINDOW_TOKENS") {
if let Ok(v) = s.trim().parse::<u64>() {
if v > 0 {
return v;
}
}
}
// Hard fallback for dev/test outside NixOS where env vars aren't set.
200_000
}
#[derive(Clone)] #[derive(Clone)]
pub struct Bus { pub struct Bus {
tx: Arc<broadcast::Sender<BusEvent>>, tx: Arc<broadcast::Sender<BusEvent>>,
@ -351,7 +403,7 @@ impl Bus {
} }
}; };
let (tx, _) = broadcast::channel(CHANNEL_CAPACITY); let (tx, _) = broadcast::channel(CHANNEL_CAPACITY);
let initial_model = load_model().unwrap_or_else(|| DEFAULT_MODEL.to_owned()); let initial_model = load_model().unwrap_or_else(|| default_model().to_owned());
// Restore rate_limited from the sentinel file — if the harness // Restore rate_limited from the sentinel file — if the harness
// crashed while parked, we should still show the right status on // crashed while parked, we should still show the right status on
// cold load until the next turn clears it. // cold load until the next turn clears it.

View file

@ -54,16 +54,6 @@ const RATE_LIMIT_MARKERS: &[&str] = &[
/// capacity limits. /// capacity limits.
const DEFAULT_RATE_LIMIT_SLEEP_SECS: u64 = 300; const DEFAULT_RATE_LIMIT_SLEEP_SECS: u64 = 300;
/// Token watermark for *auto session-reset*. When context is at or above this
/// many tokens AND the prompt cache has gone cold (idle time >= `CACHE_TTL_SECS`),
/// the harness drops `--continue` so the next turn starts fresh. Running any
/// turn (even a checkpoint) before the reset would re-upload the full context
/// and warm the cache, defeating the cost purpose — so the reset happens
/// immediately with no preceding turn. Default is ~50% of a 200k-token
/// window; override via `HIVE_AUTO_RESET_WATERMARK_TOKENS`, or set to `0`
/// to disable.
const DEFAULT_AUTO_RESET_WATERMARK_TOKENS: u64 = 100_000;
/// Assumed prompt-cache TTL. Claude caches prompt prefixes — ~5 minutes on /// Assumed prompt-cache TTL. Claude caches prompt prefixes — ~5 minutes on
/// the API (pay-per-token), ~1 hour on Claude Max (subscription). When the /// the API (pay-per-token), ~1 hour on Claude Max (subscription). When the
/// idle gap exceeds this, the cache prefix has likely expired and the next /// idle gap exceeds this, the cache prefix has likely expired and the next
@ -75,19 +65,6 @@ const DEFAULT_AUTO_RESET_WATERMARK_TOKENS: u64 = 100_000;
/// `0` to disable (always resume). /// `0` to disable (always resume).
const DEFAULT_CACHE_TTL_SECS: u64 = 3600; const DEFAULT_CACHE_TTL_SECS: u64 = 3600;
/// Token watermark for *proactive* compaction. Once a turn finishes with
/// the last inference's context size at or above this many tokens,
/// `drive_turn` runs one dedicated notes-checkpoint turn (so the agent
/// can flush durable state into `/state`) and then `/compact` — while the
/// session is still healthy enough to run a turn at all. This is distinct
/// from the reactive `PROMPT_TOO_LONG_MARKER` path, which only fires once
/// the session is *already* past the window: at that point no turn can
/// run on it, so the reactive path just compacts + retries with no
/// checkpoint. Default is ~75% of a 200k-token window; override via
/// `HIVE_COMPACT_WATERMARK_TOKENS`, or set that to `0` to disable
/// proactive compaction entirely (the reactive path always applies).
const DEFAULT_COMPACT_WATERMARK_TOKENS: u64 = 150_000;
/// Synthetic wake prompt for the proactive notes-checkpoint turn. Not an /// Synthetic wake prompt for the proactive notes-checkpoint turn. Not an
/// inbox message — the harness injects it directly so the agent gets one /// inbox message — the harness injects it directly so the agent gets one
/// turn to persist durable state before `/compact` collapses the /// turn to persist durable state before `/compact` collapses the
@ -212,14 +189,19 @@ pub fn rate_limit_sleep_secs() -> u64 {
.unwrap_or(DEFAULT_RATE_LIMIT_SLEEP_SECS) .unwrap_or(DEFAULT_RATE_LIMIT_SLEEP_SECS)
} }
/// Resolve the auto-reset watermark: `HIVE_AUTO_RESET_WATERMARK_TOKENS` if /// Resolve the auto-reset watermark. Priority order:
/// set to a valid integer, else `DEFAULT_AUTO_RESET_WATERMARK_TOKENS`. `0` /// 1. `HIVE_AUTO_RESET_WATERMARK_TOKENS` env var (explicit override).
/// disables auto-reset entirely. /// 2. 50% of the model's context window (derived from `bus.model()` +
fn auto_reset_watermark_tokens() -> u64 { /// `events::context_window_tokens`).
std::env::var("HIVE_AUTO_RESET_WATERMARK_TOKENS") /// `0` disables auto-reset entirely.
fn auto_reset_watermark_tokens(bus: &Bus) -> u64 {
if let Some(v) = std::env::var("HIVE_AUTO_RESET_WATERMARK_TOKENS")
.ok() .ok()
.and_then(|s| s.trim().parse::<u64>().ok()) .and_then(|s| s.trim().parse::<u64>().ok())
.unwrap_or(DEFAULT_AUTO_RESET_WATERMARK_TOKENS) {
return v;
}
crate::events::context_window_tokens(&bus.model()) / 2
} }
/// Resolve the assumed cache TTL: `HIVE_CACHE_TTL_SECS` if set, else /// Resolve the assumed cache TTL: `HIVE_CACHE_TTL_SECS` if set, else
@ -232,14 +214,19 @@ fn cache_ttl_secs() -> u64 {
.unwrap_or(DEFAULT_CACHE_TTL_SECS) .unwrap_or(DEFAULT_CACHE_TTL_SECS)
} }
/// Resolve the proactive-compaction watermark: `HIVE_COMPACT_WATERMARK_TOKENS` /// Resolve the proactive-compaction watermark. Priority order:
/// if set to a valid integer, else `DEFAULT_COMPACT_WATERMARK_TOKENS`. A /// 1. `HIVE_COMPACT_WATERMARK_TOKENS` env var (explicit override).
/// value of `0` disables proactive compaction. /// 2. 75% of the model's context window (derived from `bus.model()` +
fn compact_watermark_tokens() -> u64 { /// `events::context_window_tokens`).
std::env::var("HIVE_COMPACT_WATERMARK_TOKENS") /// `0` disables proactive compaction (reactive path still applies).
fn compact_watermark_tokens(bus: &Bus) -> u64 {
if let Some(v) = std::env::var("HIVE_COMPACT_WATERMARK_TOKENS")
.ok() .ok()
.and_then(|s| s.trim().parse::<u64>().ok()) .and_then(|s| s.trim().parse::<u64>().ok())
.unwrap_or(DEFAULT_COMPACT_WATERMARK_TOKENS) {
return v;
}
crate::events::context_window_tokens(&bus.model()) * 3 / 4
} }
/// Drive one turn end-to-end. Three paths layer on top of the raw `run_turn`: /// Drive one turn end-to-end. Three paths layer on top of the raw `run_turn`:
@ -291,7 +278,7 @@ pub async fn drive_turn(prompt: &str, files: &TurnFiles, bus: &Bus) -> TurnOutco
/// checkpoint or compaction is logged + surfaced as a Note but never /// checkpoint or compaction is logged + surfaced as a Note but never
/// fails the turn that already succeeded. /// fails the turn that already succeeded.
async fn maybe_checkpoint_and_compact(files: &TurnFiles, bus: &Bus) { async fn maybe_checkpoint_and_compact(files: &TurnFiles, bus: &Bus) {
let watermark = compact_watermark_tokens(); let watermark = compact_watermark_tokens(bus);
if watermark == 0 { if watermark == 0 {
return; // proactive compaction disabled return; // proactive compaction disabled
} }
@ -336,7 +323,7 @@ async fn maybe_checkpoint_and_compact(files: &TurnFiles, bus: &Bus) {
/// any turn before the reset would re-upload and re-warm the cache, which /// any turn before the reset would re-upload and re-warm the cache, which
/// defeats the cost-optimisation purpose entirely. /// defeats the cost-optimisation purpose entirely.
fn maybe_auto_reset(bus: &Bus) { fn maybe_auto_reset(bus: &Bus) {
let watermark = auto_reset_watermark_tokens(); let watermark = auto_reset_watermark_tokens(bus);
if watermark == 0 { if watermark == 0 {
return; // auto-reset disabled return; // auto-reset disabled
} }

View file

@ -357,6 +357,12 @@ struct StateSnapshot {
/// the operator can see what they just switched to (and what's /// the operator can see what they just switched to (and what's
/// in flight). Mutable at runtime via `POST /api/model`. /// in flight). Mutable at runtime via `POST /api/model`.
model: String, model: String,
/// Effective context-window token budget for the current model.
/// Derived from `events::context_window_tokens(&model)` — respects
/// per-model and global `HIVE_CONTEXT_WINDOW_TOKENS_*` overrides then
/// falls back to model-family heuristic. Consumers (e.g. dashboard
/// badge) use this to render the ctx-usage percentage.
context_window_tokens: u64,
/// Last-inference token usage from the most recent completed /// Last-inference token usage from the most recent completed
/// turn — represents the current context-window size at turn-end. /// turn — represents the current context-window size at turn-end.
/// `null` until the first turn finishes. /// `null` until the first turn finishes.
@ -451,6 +457,7 @@ async fn api_state(State(state): State<AppState>) -> axum::Json<StateSnapshot> {
let inbox = recent_inbox(&state.socket, state.flavor()).await; let inbox = recent_inbox(&state.socket, state.flavor()).await;
let (turn_state, turn_state_since) = state.bus.state_snapshot(); let (turn_state, turn_state_since) = state.bus.state_snapshot();
let model = state.bus.model(); let model = state.bus.model();
let context_window_tokens = crate::events::context_window_tokens(&model);
let ctx_usage = state.bus.last_ctx_usage(); let ctx_usage = state.bus.last_ctx_usage();
let cost_usage = state.bus.last_cost_usage(); let cost_usage = state.bus.last_cost_usage();
axum::Json(StateSnapshot { axum::Json(StateSnapshot {
@ -463,6 +470,7 @@ async fn api_state(State(state): State<AppState>) -> axum::Json<StateSnapshot> {
turn_state, turn_state,
turn_state_since, turn_state_since,
model, model,
context_window_tokens,
ctx_usage, ctx_usage,
cost_usage, cost_usage,
gui_enabled: state.gui_vnc_port.is_some(), gui_enabled: state.gui_vnc_port.is_some(),

View file

@ -91,6 +91,7 @@ pub async fn approve(coord: Arc<Coordinator>, id: i64) -> Result<()> {
&notes_dir, &notes_dir,
coord_bg.dashboard_port, coord_bg.dashboard_port,
&coord_bg.operator_pronouns, &coord_bg.operator_pronouns,
&coord_bg.context_window_tokens,
) )
.await; .await;
drop(guard); drop(guard);
@ -415,6 +416,7 @@ async fn sync_meta_after_lifecycle(coord: &Coordinator) -> Result<()> {
&coord.hyperhive_flake, &coord.hyperhive_flake,
coord.dashboard_port, coord.dashboard_port,
&coord.operator_pronouns, &coord.operator_pronouns,
&coord.context_window_tokens,
&agents, &agents,
) )
.await .await

View file

@ -73,6 +73,7 @@ pub async fn rebuild_agent(coord: &Arc<Coordinator>, name: &str, current_rev: &s
&notes_dir, &notes_dir,
coord.dashboard_port, coord.dashboard_port,
&coord.operator_pronouns, &coord.operator_pronouns,
&coord.context_window_tokens,
) )
.await; .await;
drop(guard); drop(guard);
@ -160,6 +161,7 @@ pub async fn ensure_manager(coord: &Arc<Coordinator>) -> Result<()> {
&notes_dir, &notes_dir,
coord.dashboard_port, coord.dashboard_port,
&coord.operator_pronouns, &coord.operator_pronouns,
&coord.context_window_tokens,
) )
.await?; .await?;
if let Some(rev) = current_rev { if let Some(rev) = current_rev {

View file

@ -51,6 +51,13 @@ pub struct Coordinator {
/// meta flake); the harness substitutes it into the agent / /// meta flake); the harness substitutes it into the agent /
/// manager system prompt at boot. /// manager system prompt at boot.
pub operator_pronouns: String, pub operator_pronouns: String,
/// Per-model context-window sizes in tokens. Set via the host-level
/// `services.hive-c0re.contextWindowTokens` NixOS option; injected
/// into each container as `HIVE_CONTEXT_WINDOW_TOKENS_<KEY_UPPER>`
/// by the meta flake renderer. The harness uses these to derive
/// compaction / auto-reset watermarks and exposes the active value
/// on `/api/state` as `context_window_tokens`.
pub context_window_tokens: std::collections::HashMap<String, u64>,
agents: Mutex<HashMap<String, AgentSocket>>, agents: Mutex<HashMap<String, AgentSocket>>,
/// Agents whose lifecycle action (currently just spawn) is in flight. /// Agents whose lifecycle action (currently just spawn) is in flight.
/// Read by the dashboard to render a spinner; cleared when the action /// Read by the dashboard to render a spinner; cleared when the action
@ -139,6 +146,7 @@ impl Coordinator {
hyperhive_flake: String, hyperhive_flake: String,
dashboard_port: u16, dashboard_port: u16,
operator_pronouns: String, operator_pronouns: String,
context_window_tokens: std::collections::HashMap<String, u64>,
) -> Result<Self> { ) -> Result<Self> {
let broker = Broker::open(db_path).context("open broker")?; let broker = Broker::open(db_path).context("open broker")?;
let approvals = Approvals::open(db_path).context("open approvals")?; let approvals = Approvals::open(db_path).context("open approvals")?;
@ -152,6 +160,7 @@ impl Coordinator {
hyperhive_flake, hyperhive_flake,
dashboard_port, dashboard_port,
operator_pronouns, operator_pronouns,
context_window_tokens,
agents: Mutex::new(HashMap::new()), agents: Mutex::new(HashMap::new()),
transient: Mutex::new(HashMap::new()), transient: Mutex::new(HashMap::new()),
dashboard_events, dashboard_events,

View file

@ -138,6 +138,7 @@ pub async fn spawn(
notes_dir: &Path, notes_dir: &Path,
dashboard_port: u16, dashboard_port: u16,
operator_pronouns: &str, operator_pronouns: &str,
context_window_tokens: &std::collections::HashMap<String, u64>,
) -> Result<()> { ) -> Result<()> {
validate(name)?; validate(name)?;
if let Some(other) = port_collision(name).await { if let Some(other) = port_collision(name).await {
@ -154,7 +155,7 @@ pub async fn spawn(
// before `nixos-container create` so the `--flake meta#<name>` // before `nixos-container create` so the `--flake meta#<name>`
// ref resolves. // ref resolves.
let agents = agents_after_spawn(name).await?; let agents = agents_after_spawn(name).await?;
crate::meta::sync_agents(hyperhive_flake, dashboard_port, operator_pronouns, &agents).await?; crate::meta::sync_agents(hyperhive_flake, dashboard_port, operator_pronouns, context_window_tokens, &agents).await?;
let container = container_name(name); let container = container_name(name);
let flake_ref = format!("{}#{name}", crate::meta::meta_dir().display()); let flake_ref = format!("{}#{name}", crate::meta::meta_dir().display());
run(&["create", &container, "--flake", &flake_ref]).await?; run(&["create", &container, "--flake", &flake_ref]).await?;
@ -273,6 +274,7 @@ pub async fn rebuild(
notes_dir: &Path, notes_dir: &Path,
dashboard_port: u16, dashboard_port: u16,
operator_pronouns: &str, operator_pronouns: &str,
context_window_tokens: &std::collections::HashMap<String, u64>,
) -> Result<()> { ) -> Result<()> {
// Sync the meta flake (idempotent — no-op when the rendered // Sync the meta flake (idempotent — no-op when the rendered
// flake matches disk) so a manual rebuild from the dashboard // flake matches disk) so a manual rebuild from the dashboard
@ -280,7 +282,7 @@ pub async fn rebuild(
// got added directly via `nixos-container create` outside // got added directly via `nixos-container create` outside
// hive-c0re). // hive-c0re).
let agents = agents_for_meta(None).await?; let agents = agents_for_meta(None).await?;
crate::meta::sync_agents(hyperhive_flake, dashboard_port, operator_pronouns, &agents).await?; crate::meta::sync_agents(hyperhive_flake, dashboard_port, operator_pronouns, context_window_tokens, &agents).await?;
// Then bump just this agent's input — picks up whatever // Then bump just this agent's input — picks up whatever
// `applied/<n>/main` currently points at (deployed/<latest>). // `applied/<n>/main` currently points at (deployed/<latest>).
// Commits the lock if it changed. // Commits the lock if it changed.

View file

@ -62,6 +62,12 @@ enum Cmd {
/// system prompt can mention them. Default: `she/her`. /// system prompt can mention them. Default: `she/her`.
#[arg(long, default_value = "she/her")] #[arg(long, default_value = "she/her")]
operator_pronouns: String, operator_pronouns: String,
/// Per-model context-window sizes, as JSON object mapping model-family
/// short name to token count. Threaded into each container as
/// `HIVE_CONTEXT_WINDOW_TOKENS_<KEY_UPPER>` env vars. Set via the
/// `services.hive-c0re.contextWindowTokens` NixOS option.
#[arg(long, default_value = r#"{"haiku":200000,"sonnet":1000000,"opus":1000000}"#)]
context_window_tokens: String,
}, },
/// Spawn a new agent container directly (`hive-agent-<name>`). Bypasses /// Spawn a new agent container directly (`hive-agent-<name>`). Bypasses
/// the approval queue — use only as an operator on the host. For /// the approval queue — use only as an operator on the host. For
@ -109,12 +115,17 @@ async fn main() -> Result<()> {
db, db,
dashboard_port, dashboard_port,
operator_pronouns, operator_pronouns,
context_window_tokens,
} => { } => {
let cwt: std::collections::HashMap<String, u64> =
serde_json::from_str(&context_window_tokens)
.context("--context-window-tokens: invalid JSON")?;
let coord = Arc::new(Coordinator::open( let coord = Arc::new(Coordinator::open(
&db, &db,
hyperhive_flake, hyperhive_flake,
dashboard_port, dashboard_port,
operator_pronouns, operator_pronouns,
cwt,
)?); )?);
manager_server::start(coord.clone())?; manager_server::start(coord.clone())?;
// Idempotent pre-flight: rewrite pre-meta-layout applied // Idempotent pre-flight: rewrite pre-meta-layout applied

View file

@ -66,13 +66,14 @@ pub async fn sync_agents(
hyperhive_flake: &str, hyperhive_flake: &str,
dashboard_port: u16, dashboard_port: u16,
operator_pronouns: &str, operator_pronouns: &str,
context_window_tokens: &std::collections::HashMap<String, u64>,
agents: &[AgentSpec], agents: &[AgentSpec],
) -> Result<()> { ) -> Result<()> {
let _guard = META_LOCK.lock().await; let _guard = META_LOCK.lock().await;
let dir = meta_dir(); let dir = meta_dir();
std::fs::create_dir_all(&dir).with_context(|| format!("create {}", dir.display()))?; std::fs::create_dir_all(&dir).with_context(|| format!("create {}", dir.display()))?;
let new_flake = render_flake(hyperhive_flake, dashboard_port, operator_pronouns, agents); let new_flake = render_flake(hyperhive_flake, dashboard_port, operator_pronouns, context_window_tokens, agents);
let flake_path = dir.join("flake.nix"); let flake_path = dir.join("flake.nix");
let on_disk = std::fs::read_to_string(&flake_path).unwrap_or_default(); let on_disk = std::fs::read_to_string(&flake_path).unwrap_or_default();
let initial = !dir.join(".git").exists(); let initial = !dir.join(".git").exists();
@ -235,6 +236,7 @@ fn render_flake(
hyperhive_flake: &str, hyperhive_flake: &str,
dashboard_port: u16, dashboard_port: u16,
operator_pronouns: &str, operator_pronouns: &str,
context_window_tokens: &std::collections::HashMap<String, u64>,
agents: &[AgentSpec], agents: &[AgentSpec],
) -> String { ) -> String {
use std::fmt::Write as _; use std::fmt::Write as _;
@ -283,8 +285,19 @@ fn render_flake(
HIVE_PORT = toString port; HIVE_PORT = toString port;
HIVE_LABEL = name; HIVE_LABEL = name;
HIVE_DASHBOARD_PORT = toString dashboardPort; HIVE_DASHBOARD_PORT = toString dashboardPort;
HIVE_OPERATOR_PRONOUNS = operatorPronouns; HIVE_OPERATOR_PRONOUNS = operatorPronouns;"#,
HYPERHIVE_STATE_DIR = "/agents/${name}/state"; );
// Per-model context-window env vars declared in the host-level
// `services.hive-c0re.contextWindowTokens` option. Use a sorted
// iterator for deterministic flake output (no spurious git diffs).
let mut sorted_tokens: Vec<(&String, &u64)> = context_window_tokens.iter().collect();
sorted_tokens.sort_by_key(|(k, _)| k.as_str());
for (key, val) in &sorted_tokens {
let upper_key = key.to_ascii_uppercase();
let _ = writeln!(out, " HIVE_CONTEXT_WINDOW_TOKENS_{upper_key} = \"{val}\";");
}
out.push_str(
r#" HYPERHIVE_STATE_DIR = "/agents/${name}/state";
}; };
} }
]; ];

View file

@ -83,6 +83,7 @@ pub async fn run(coord: &Arc<Coordinator>) -> Result<()> {
&coord.hyperhive_flake, &coord.hyperhive_flake,
coord.dashboard_port, coord.dashboard_port,
&coord.operator_pronouns, &coord.operator_pronouns,
&coord.context_window_tokens,
&agents, &agents,
) )
.await .await

View file

@ -77,6 +77,7 @@ async fn dispatch(req: &HostRequest, coord: Arc<Coordinator>) -> HostResponse {
&notes_dir, &notes_dir,
coord.dashboard_port, coord.dashboard_port,
&coord.operator_pronouns, &coord.operator_pronouns,
&coord.context_window_tokens,
) )
.await .await
{ {
@ -139,6 +140,7 @@ async fn dispatch(req: &HostRequest, coord: Arc<Coordinator>) -> HostResponse {
&notes_dir, &notes_dir,
coord.dashboard_port, coord.dashboard_port,
&coord.operator_pronouns, &coord.operator_pronouns,
&coord.context_window_tokens,
) )
.await; .await;
// Mirror auto_update::rebuild_agent — the manager wants // Mirror auto_update::rebuild_agent — the manager wants

View file

@ -57,6 +57,31 @@ in
approval needed. approval needed.
''; '';
}; };
contextWindowTokens = lib.mkOption {
type = lib.types.attrsOf lib.types.int;
default = {
haiku = 200000;
sonnet = 1000000;
opus = 1000000;
};
example = {
haiku = 150000;
sonnet = 900000;
};
description = ''
Per-model context-window sizes in tokens. Each key is a
model-family short name matched case-insensitively as a
substring of the active model name at runtime (e.g. `"sonnet"`
matches `"claude-sonnet-4-5"`). The defaults cover the known
Anthropic families; add entries for new models or override
existing ones here to change the window for all agents at once.
Passed to `hive-c0re serve` as JSON and injected into every
container's harness service environment as
`HIVE_CONTEXT_WINDOW_TOKENS_<KEY_UPPER>`. Changes propagate
on the next ` R3BU1LD` no per-agent approval needed.
'';
};
}; };
config = lib.mkIf cfg.enable { config = lib.mkIf cfg.enable {
@ -89,7 +114,7 @@ in
]; ];
environment.HYPERHIVE_GIT = "${pkgs.git}/bin/git"; environment.HYPERHIVE_GIT = "${pkgs.git}/bin/git";
serviceConfig = { serviceConfig = {
ExecStart = "${cfg.package}/bin/hive-c0re --socket /run/hyperhive/host.sock serve --hyperhive-flake ${cfg.hyperhiveFlake} --dashboard-port ${toString cfg.dashboardPort} --operator-pronouns ${lib.escapeShellArg cfg.operatorPronouns}"; ExecStart = "${cfg.package}/bin/hive-c0re --socket /run/hyperhive/host.sock serve --hyperhive-flake ${cfg.hyperhiveFlake} --dashboard-port ${toString cfg.dashboardPort} --operator-pronouns ${lib.escapeShellArg cfg.operatorPronouns} --context-window-tokens ${lib.escapeShellArg (builtins.toJSON cfg.contextWindowTokens)}";
Restart = "on-failure"; Restart = "on-failure";
RestartSec = 2; RestartSec = 2;
RuntimeDirectory = "hyperhive"; RuntimeDirectory = "hyperhive";

View file

@ -15,6 +15,27 @@
# only opts in from its own `agent.nix`. # only opts in from its own `agent.nix`.
imports = [ ./weston-vnc.nix ]; imports = [ ./weston-vnc.nix ];
options.hyperhive.model = lib.mkOption {
type = lib.types.str;
default = "haiku";
example = "sonnet";
description = ''
Default claude model for this agent. Sets the `HIVE_DEFAULT_MODEL`
environment variable consumed by the harness at boot; if no
persisted model choice exists in the agent's state dir the harness
falls back to this value. The operator can still switch the model at
runtime via the per-agent web UI that choice is persisted to the
state dir and takes precedence over this default until the agent is
purged.
Valid values are the short model names that `claude --model` accepts:
`"haiku"`, `"sonnet"`, `"opus"` (or any future identifier). The
harness derives sensible watermarks from the model family:
haiku 200 000 token window; sonnet / opus 1 000 000 token window.
Override the derived window via `hyperhive.contextWindowTokens`.
'';
};
options.hyperhive.allowedBashPatterns = lib.mkOption { options.hyperhive.allowedBashPatterns = lib.mkOption {
type = lib.types.listOf lib.types.str; type = lib.types.listOf lib.types.str;
default = [ ]; default = [ ];
@ -208,6 +229,16 @@
environment.etc."hyperhive/claude-plugins-auto-update.json".text = environment.etc."hyperhive/claude-plugins-auto-update.json".text =
builtins.toJSON config.hyperhive.claudePluginsAutoUpdate; builtins.toJSON config.hyperhive.claudePluginsAutoUpdate;
# HIVE_DEFAULT_MODEL seeds the initial model selection when no persisted
# model choice exists in the state dir. SHELL must be set so claude's
# Bash tool finds a POSIX shell.
# HIVE_CONTEXT_WINDOW_TOKENS_* are injected by the meta flake from the
# host-level `services.hive-c0re.contextWindowTokens` option — not set here.
environment.variables = {
HIVE_DEFAULT_MODEL = config.hyperhive.model;
SHELL = "${pkgs.bashInteractive}/bin/bash";
};
boot.isNspawnContainer = true; boot.isNspawnContainer = true;
# Every agent gets flakes + the modern `nix` CLI out of the box. # Every agent gets flakes + the modern `nix` CLI out of the box.
@ -307,9 +338,6 @@
}; };
}; };
# claude's Bash tool refuses to run without a POSIX shell + $SHELL set.
environment.variables.SHELL = "${pkgs.bashInteractive}/bin/bash";
system.stateVersion = "25.11"; system.stateVersion = "25.11";
}; };
} }