From 9064cd3c57f1c9c248e2e7abff07e41b7865a810 Mon Sep 17 00:00:00 2001 From: damocles Date: Wed, 20 May 2026 15:12:37 +0200 Subject: [PATCH 1/4] model/context: configurable default model + model-derived context window --- hive-ag3nt/src/events.rs | 50 ++++++++++++++++++++++++---- hive-ag3nt/src/turn.rs | 61 +++++++++++++--------------------- nix/templates/harness-base.nix | 49 +++++++++++++++++++++++++++ 3 files changed, 117 insertions(+), 43 deletions(-) diff --git a/hive-ag3nt/src/events.rs b/hive-ag3nt/src/events.rs index 36b9e26..b7519a6 100644 --- a/hive-ag3nt/src/events.rs +++ b/hive-ag3nt/src/events.rs @@ -275,13 +275,51 @@ pub enum TurnState { Compacting, } -/// Default claude model when nothing's been set at runtime. The -/// operator can switch via `/model ` in the web terminal; the -/// chosen model lives in `Bus::model` for the rest of the harness -/// process's life (resets on restart, by design — operator overrides -/// shouldn't survive accidentally). +/// Default claude model when nothing's been set at runtime. Overridable +/// via the `HIVE_DEFAULT_MODEL` env var (set from `hyperhive.model` in +/// the container's `agent.nix`). The operator can also switch at runtime +/// via `/model ` in the web terminal; the chosen model is persisted +/// to the state dir so it survives restarts. pub const DEFAULT_MODEL: &str = "haiku"; +/// Return the initial default model name: `HIVE_DEFAULT_MODEL` env var if +/// set to a non-empty string, otherwise `DEFAULT_MODEL`. +#[must_use] +pub fn default_model() -> &'static str { + // Leak once at startup — acceptable for a single config value. + std::env::var("HIVE_DEFAULT_MODEL") + .ok() + .filter(|s| !s.trim().is_empty()) + .map_or(DEFAULT_MODEL, |s| Box::leak(s.into_boxed_str())) +} + +/// Context-window size in tokens for a given model name. +/// +/// Recognises the model families that Claude Code supports: +/// - `haiku` family: 200 000 tokens +/// - `sonnet` / `opus` families: 1 000 000 tokens +/// - anything else: 200 000 tokens (conservative default) +/// +/// Overridable at runtime via `HIVE_CONTEXT_WINDOW_TOKENS` (useful for +/// future models or when the operator knows the exact limit). The env +/// var takes precedence over the model-name heuristic. +#[must_use] +pub fn context_window_tokens(model: &str) -> u64 { + if let Ok(s) = std::env::var("HIVE_CONTEXT_WINDOW_TOKENS") { + if let Ok(v) = s.trim().parse::() { + if v > 0 { + return v; + } + } + } + let m = model.to_ascii_lowercase(); + if m.contains("sonnet") || m.contains("opus") { + 1_000_000 + } else { + 200_000 + } +} + #[derive(Clone)] pub struct Bus { tx: Arc>, @@ -351,7 +389,7 @@ impl Bus { } }; let (tx, _) = broadcast::channel(CHANNEL_CAPACITY); - let initial_model = load_model().unwrap_or_else(|| DEFAULT_MODEL.to_owned()); + let initial_model = load_model().unwrap_or_else(|| default_model().to_owned()); // Restore rate_limited from the sentinel file — if the harness // crashed while parked, we should still show the right status on // cold load until the next turn clears it. diff --git a/hive-ag3nt/src/turn.rs b/hive-ag3nt/src/turn.rs index d42bc7c..a20c9a4 100644 --- a/hive-ag3nt/src/turn.rs +++ b/hive-ag3nt/src/turn.rs @@ -54,16 +54,6 @@ const RATE_LIMIT_MARKERS: &[&str] = &[ /// capacity limits. const DEFAULT_RATE_LIMIT_SLEEP_SECS: u64 = 300; -/// Token watermark for *auto session-reset*. When context is at or above this -/// many tokens AND the prompt cache has gone cold (idle time >= `CACHE_TTL_SECS`), -/// the harness drops `--continue` so the next turn starts fresh. Running any -/// turn (even a checkpoint) before the reset would re-upload the full context -/// and warm the cache, defeating the cost purpose — so the reset happens -/// immediately with no preceding turn. Default is ~50% of a 200k-token -/// window; override via `HIVE_AUTO_RESET_WATERMARK_TOKENS`, or set to `0` -/// to disable. -const DEFAULT_AUTO_RESET_WATERMARK_TOKENS: u64 = 100_000; - /// Assumed prompt-cache TTL. Claude caches prompt prefixes — ~5 minutes on /// the API (pay-per-token), ~1 hour on Claude Max (subscription). When the /// idle gap exceeds this, the cache prefix has likely expired and the next @@ -75,19 +65,6 @@ const DEFAULT_AUTO_RESET_WATERMARK_TOKENS: u64 = 100_000; /// `0` to disable (always resume). const DEFAULT_CACHE_TTL_SECS: u64 = 3600; -/// Token watermark for *proactive* compaction. Once a turn finishes with -/// the last inference's context size at or above this many tokens, -/// `drive_turn` runs one dedicated notes-checkpoint turn (so the agent -/// can flush durable state into `/state`) and then `/compact` — while the -/// session is still healthy enough to run a turn at all. This is distinct -/// from the reactive `PROMPT_TOO_LONG_MARKER` path, which only fires once -/// the session is *already* past the window: at that point no turn can -/// run on it, so the reactive path just compacts + retries with no -/// checkpoint. Default is ~75% of a 200k-token window; override via -/// `HIVE_COMPACT_WATERMARK_TOKENS`, or set that to `0` to disable -/// proactive compaction entirely (the reactive path always applies). -const DEFAULT_COMPACT_WATERMARK_TOKENS: u64 = 150_000; - /// Synthetic wake prompt for the proactive notes-checkpoint turn. Not an /// inbox message — the harness injects it directly so the agent gets one /// turn to persist durable state before `/compact` collapses the @@ -212,14 +189,19 @@ pub fn rate_limit_sleep_secs() -> u64 { .unwrap_or(DEFAULT_RATE_LIMIT_SLEEP_SECS) } -/// Resolve the auto-reset watermark: `HIVE_AUTO_RESET_WATERMARK_TOKENS` if -/// set to a valid integer, else `DEFAULT_AUTO_RESET_WATERMARK_TOKENS`. `0` -/// disables auto-reset entirely. -fn auto_reset_watermark_tokens() -> u64 { - std::env::var("HIVE_AUTO_RESET_WATERMARK_TOKENS") +/// Resolve the auto-reset watermark. Priority order: +/// 1. `HIVE_AUTO_RESET_WATERMARK_TOKENS` env var (explicit override). +/// 2. 50% of the model's context window (derived from `bus.model()` + +/// `events::context_window_tokens`). +/// `0` disables auto-reset entirely. +fn auto_reset_watermark_tokens(bus: &Bus) -> u64 { + if let Some(v) = std::env::var("HIVE_AUTO_RESET_WATERMARK_TOKENS") .ok() .and_then(|s| s.trim().parse::().ok()) - .unwrap_or(DEFAULT_AUTO_RESET_WATERMARK_TOKENS) + { + return v; + } + crate::events::context_window_tokens(&bus.model()) / 2 } /// Resolve the assumed cache TTL: `HIVE_CACHE_TTL_SECS` if set, else @@ -232,14 +214,19 @@ fn cache_ttl_secs() -> u64 { .unwrap_or(DEFAULT_CACHE_TTL_SECS) } -/// Resolve the proactive-compaction watermark: `HIVE_COMPACT_WATERMARK_TOKENS` -/// if set to a valid integer, else `DEFAULT_COMPACT_WATERMARK_TOKENS`. A -/// value of `0` disables proactive compaction. -fn compact_watermark_tokens() -> u64 { - std::env::var("HIVE_COMPACT_WATERMARK_TOKENS") +/// Resolve the proactive-compaction watermark. Priority order: +/// 1. `HIVE_COMPACT_WATERMARK_TOKENS` env var (explicit override). +/// 2. 75% of the model's context window (derived from `bus.model()` + +/// `events::context_window_tokens`). +/// `0` disables proactive compaction (reactive path still applies). +fn compact_watermark_tokens(bus: &Bus) -> u64 { + if let Some(v) = std::env::var("HIVE_COMPACT_WATERMARK_TOKENS") .ok() .and_then(|s| s.trim().parse::().ok()) - .unwrap_or(DEFAULT_COMPACT_WATERMARK_TOKENS) + { + return v; + } + crate::events::context_window_tokens(&bus.model()) * 3 / 4 } /// Drive one turn end-to-end. Three paths layer on top of the raw `run_turn`: @@ -291,7 +278,7 @@ pub async fn drive_turn(prompt: &str, files: &TurnFiles, bus: &Bus) -> TurnOutco /// checkpoint or compaction is logged + surfaced as a Note but never /// fails the turn that already succeeded. async fn maybe_checkpoint_and_compact(files: &TurnFiles, bus: &Bus) { - let watermark = compact_watermark_tokens(); + let watermark = compact_watermark_tokens(bus); if watermark == 0 { return; // proactive compaction disabled } @@ -336,7 +323,7 @@ async fn maybe_checkpoint_and_compact(files: &TurnFiles, bus: &Bus) { /// any turn before the reset would re-upload and re-warm the cache, which /// defeats the cost-optimisation purpose entirely. fn maybe_auto_reset(bus: &Bus) { - let watermark = auto_reset_watermark_tokens(); + let watermark = auto_reset_watermark_tokens(bus); if watermark == 0 { return; // auto-reset disabled } diff --git a/nix/templates/harness-base.nix b/nix/templates/harness-base.nix index 63368e8..0ed52c6 100644 --- a/nix/templates/harness-base.nix +++ b/nix/templates/harness-base.nix @@ -15,6 +15,45 @@ # only opts in from its own `agent.nix`. imports = [ ./weston-vnc.nix ]; + options.hyperhive.model = lib.mkOption { + type = lib.types.str; + default = "haiku"; + example = "sonnet"; + description = '' + Default claude model for this agent. Sets the `HIVE_DEFAULT_MODEL` + environment variable consumed by the harness at boot; if no + persisted model choice exists in the agent's state dir the harness + falls back to this value. The operator can still switch the model at + runtime via the per-agent web UI — that choice is persisted to the + state dir and takes precedence over this default until the agent is + purged. + + Valid values are the short model names that `claude --model` accepts: + `"haiku"`, `"sonnet"`, `"opus"` (or any future identifier). The + harness derives sensible watermarks from the model family: + haiku → 200 000 token window; sonnet / opus → 1 000 000 token window. + Override the derived window via `hyperhive.contextWindowTokens`. + ''; + }; + + options.hyperhive.contextWindowTokens = lib.mkOption { + type = lib.types.int; + default = 0; + example = 1000000; + description = '' + Context-window size in tokens for this agent's model. `0` (the + default) means "auto-derive from the model name": haiku → 200 000, + sonnet / opus → 1 000 000. Set an explicit value here when you are + using a model the harness does not recognise, or when Anthropic + changes the window for an existing model family. + + Sets the `HIVE_CONTEXT_WINDOW_TOKENS` environment variable; the + harness reads it at runtime and uses it to compute the default + compaction and auto-reset watermarks (75% and 50% of the window + respectively). + ''; + }; + options.hyperhive.allowedBashPatterns = lib.mkOption { type = lib.types.listOf lib.types.str; default = [ ]; @@ -208,6 +247,16 @@ environment.etc."hyperhive/claude-plugins-auto-update.json".text = builtins.toJSON config.hyperhive.claudePluginsAutoUpdate; + # Model + context-window env vars consumed by the harness at boot. + # HIVE_DEFAULT_MODEL seeds the initial model selection when no persisted + # model choice exists in the state dir. HIVE_CONTEXT_WINDOW_TOKENS + # overrides the auto-derived window size (only set when the NixOS option + # is non-zero so an unset env var lets the harness use its own heuristic). + environment.variables.HIVE_DEFAULT_MODEL = config.hyperhive.model; + environment.variables = lib.mkIf (config.hyperhive.contextWindowTokens != 0) { + HIVE_CONTEXT_WINDOW_TOKENS = toString config.hyperhive.contextWindowTokens; + }; + boot.isNspawnContainer = true; # Every agent gets flakes + the modern `nix` CLI out of the box. From 770cbaccf9cd589fc53b169060dffa308dafc81f Mon Sep 17 00:00:00 2001 From: damocles Date: Wed, 20 May 2026 15:20:07 +0200 Subject: [PATCH 2/4] model/context: per-model ctx window overrides + expose window size in /api/state --- hive-ag3nt/src/events.rs | 24 +++++++++++++- hive-ag3nt/src/web_ui.rs | 8 +++++ nix/templates/harness-base.nix | 59 +++++++++++++++++++++------------- 3 files changed, 68 insertions(+), 23 deletions(-) diff --git a/hive-ag3nt/src/events.rs b/hive-ag3nt/src/events.rs index b7519a6..4e36f2f 100644 --- a/hive-ag3nt/src/events.rs +++ b/hive-ag3nt/src/events.rs @@ -303,8 +303,30 @@ pub fn default_model() -> &'static str { /// Overridable at runtime via `HIVE_CONTEXT_WINDOW_TOKENS` (useful for /// future models or when the operator knows the exact limit). The env /// var takes precedence over the model-name heuristic. +/// +/// Resolution order (first match wins): +/// 1. `HIVE_CONTEXT_WINDOW_TOKENS_` — per-model override where KEY +/// (case-insensitive) is a substring of the active model name. +/// Set by `hyperhive.contextWindowTokens.` in `agent.nix`. +/// 2. `HIVE_CONTEXT_WINDOW_TOKENS` — global override (any model). +/// 3. Auto-derive: haiku → 200 000, sonnet / opus → 1 000 000. #[must_use] pub fn context_window_tokens(model: &str) -> u64 { + let m = model.to_ascii_lowercase(); + // Per-model overrides: HIVE_CONTEXT_WINDOW_TOKENS_ where + // KEY (lowercased) must be a non-empty substring of the model name. + for (key, val) in std::env::vars() { + if let Some(suffix) = key.strip_prefix("HIVE_CONTEXT_WINDOW_TOKENS_") { + if !suffix.is_empty() && m.contains(&suffix.to_ascii_lowercase()) { + if let Ok(v) = val.trim().parse::() { + if v > 0 { + return v; + } + } + } + } + } + // Global override. if let Ok(s) = std::env::var("HIVE_CONTEXT_WINDOW_TOKENS") { if let Ok(v) = s.trim().parse::() { if v > 0 { @@ -312,7 +334,7 @@ pub fn context_window_tokens(model: &str) -> u64 { } } } - let m = model.to_ascii_lowercase(); + // Auto-derive from model family. if m.contains("sonnet") || m.contains("opus") { 1_000_000 } else { diff --git a/hive-ag3nt/src/web_ui.rs b/hive-ag3nt/src/web_ui.rs index 3755eed..a172bac 100644 --- a/hive-ag3nt/src/web_ui.rs +++ b/hive-ag3nt/src/web_ui.rs @@ -357,6 +357,12 @@ struct StateSnapshot { /// the operator can see what they just switched to (and what's /// in flight). Mutable at runtime via `POST /api/model`. model: String, + /// Effective context-window token budget for the current model. + /// Derived from `events::context_window_tokens(&model)` — respects + /// per-model and global `HIVE_CONTEXT_WINDOW_TOKENS_*` overrides then + /// falls back to model-family heuristic. Consumers (e.g. dashboard + /// badge) use this to render the ctx-usage percentage. + context_window_tokens: u64, /// Last-inference token usage from the most recent completed /// turn — represents the current context-window size at turn-end. /// `null` until the first turn finishes. @@ -451,6 +457,7 @@ async fn api_state(State(state): State) -> axum::Json { let inbox = recent_inbox(&state.socket, state.flavor()).await; let (turn_state, turn_state_since) = state.bus.state_snapshot(); let model = state.bus.model(); + let context_window_tokens = crate::events::context_window_tokens(&model); let ctx_usage = state.bus.last_ctx_usage(); let cost_usage = state.bus.last_cost_usage(); axum::Json(StateSnapshot { @@ -463,6 +470,7 @@ async fn api_state(State(state): State) -> axum::Json { turn_state, turn_state_since, model, + context_window_tokens, ctx_usage, cost_usage, gui_enabled: state.gui_vnc_port.is_some(), diff --git a/nix/templates/harness-base.nix b/nix/templates/harness-base.nix index 0ed52c6..d7d30a7 100644 --- a/nix/templates/harness-base.nix +++ b/nix/templates/harness-base.nix @@ -37,20 +37,28 @@ }; options.hyperhive.contextWindowTokens = lib.mkOption { - type = lib.types.int; - default = 0; - example = 1000000; + type = lib.types.attrsOf lib.types.int; + default = { }; + example = { + haiku = 150000; + sonnet = 900000; + }; description = '' - Context-window size in tokens for this agent's model. `0` (the - default) means "auto-derive from the model name": haiku → 200 000, - sonnet / opus → 1 000 000. Set an explicit value here when you are - using a model the harness does not recognise, or when Anthropic - changes the window for an existing model family. + Per-model context-window overrides. Each attribute name is a + model-family short name (e.g. `"haiku"`, `"sonnet"`) matched as a + case-insensitive substring of the active model name at runtime, so + `"sonnet"` matches `"claude-sonnet-4-5"` and any future variant. + Empty map (the default) means auto-derive: haiku → 200 000, + sonnet / opus → 1 000 000. - Sets the `HIVE_CONTEXT_WINDOW_TOKENS` environment variable; the - harness reads it at runtime and uses it to compute the default - compaction and auto-reset watermarks (75% and 50% of the window - respectively). + Each entry is rendered as + `HIVE_CONTEXT_WINDOW_TOKENS_` (e.g. + `HIVE_CONTEXT_WINDOW_TOKENS_SONNET = "900000"`). The harness checks + these per-model vars first, then the global + `HIVE_CONTEXT_WINDOW_TOKENS`, then the model-family heuristic. + At runtime, the effective window drives compaction (75%) and + auto-reset (50%) watermarks, and is exposed via `/api/state` as + `context_window_tokens`. ''; }; @@ -249,13 +257,23 @@ # Model + context-window env vars consumed by the harness at boot. # HIVE_DEFAULT_MODEL seeds the initial model selection when no persisted - # model choice exists in the state dir. HIVE_CONTEXT_WINDOW_TOKENS - # overrides the auto-derived window size (only set when the NixOS option - # is non-zero so an unset env var lets the harness use its own heuristic). - environment.variables.HIVE_DEFAULT_MODEL = config.hyperhive.model; - environment.variables = lib.mkIf (config.hyperhive.contextWindowTokens != 0) { - HIVE_CONTEXT_WINDOW_TOKENS = toString config.hyperhive.contextWindowTokens; - }; + # model choice exists in the state dir. + # HIVE_CONTEXT_WINDOW_TOKENS_ provides per-model overrides + # (e.g. HIVE_CONTEXT_WINDOW_TOKENS_SONNET) from contextWindowTokens attrset. + # SHELL must be set so claude's Bash tool finds a POSIX shell. + environment.variables = lib.mkMerge ( + [ + { + HIVE_DEFAULT_MODEL = config.hyperhive.model; + SHELL = "${pkgs.bashInteractive}/bin/bash"; + } + ] + ++ lib.mapAttrsToList + (model: tokens: { + "HIVE_CONTEXT_WINDOW_TOKENS_${lib.toUpper model}" = toString tokens; + }) + config.hyperhive.contextWindowTokens + ); boot.isNspawnContainer = true; @@ -356,9 +374,6 @@ }; }; - # claude's Bash tool refuses to run without a POSIX shell + $SHELL set. - environment.variables.SHELL = "${pkgs.bashInteractive}/bin/bash"; - system.stateVersion = "25.11"; }; } From 7e2f13cad8a2e4604bd0d0e96b5a0c31e174e014 Mon Sep 17 00:00:00 2001 From: damocles Date: Wed, 20 May 2026 15:31:06 +0200 Subject: [PATCH 3/4] model/context: defaults in nix module, no heuristic in rust --- hive-ag3nt/src/events.rs | 36 +++++++++++++--------------------- nix/templates/harness-base.nix | 27 ++++++++++++++++--------- 2 files changed, 32 insertions(+), 31 deletions(-) diff --git a/hive-ag3nt/src/events.rs b/hive-ag3nt/src/events.rs index 4e36f2f..ec8ca7e 100644 --- a/hive-ag3nt/src/events.rs +++ b/hive-ag3nt/src/events.rs @@ -295,26 +295,22 @@ pub fn default_model() -> &'static str { /// Context-window size in tokens for a given model name. /// -/// Recognises the model families that Claude Code supports: -/// - `haiku` family: 200 000 tokens -/// - `sonnet` / `opus` families: 1 000 000 tokens -/// - anything else: 200 000 tokens (conservative default) -/// -/// Overridable at runtime via `HIVE_CONTEXT_WINDOW_TOKENS` (useful for -/// future models or when the operator knows the exact limit). The env -/// var takes precedence over the model-name heuristic. +/// Canonical per-model sizes are declared in `harness-base.nix` as +/// `hyperhive.contextWindowTokens` and injected as +/// `HIVE_CONTEXT_WINDOW_TOKENS_` env vars — so this function +/// normally just reads them. The Rust code carries no model knowledge; +/// updating model families only requires a Nix change. /// /// Resolution order (first match wins): -/// 1. `HIVE_CONTEXT_WINDOW_TOKENS_` — per-model override where KEY -/// (case-insensitive) is a substring of the active model name. -/// Set by `hyperhive.contextWindowTokens.` in `agent.nix`. -/// 2. `HIVE_CONTEXT_WINDOW_TOKENS` — global override (any model). -/// 3. Auto-derive: haiku → 200 000, sonnet / opus → 1 000 000. +/// 1. `HIVE_CONTEXT_WINDOW_TOKENS_` — key (lowercased) is a +/// substring of the active model name. Populated by the Nix default +/// map for all known families; add/override in `agent.nix`. +/// 2. `HIVE_CONTEXT_WINDOW_TOKENS` — single global override (any model). +/// 3. Hard fallback: `200_000` (conservative; only hit outside NixOS). #[must_use] pub fn context_window_tokens(model: &str) -> u64 { let m = model.to_ascii_lowercase(); - // Per-model overrides: HIVE_CONTEXT_WINDOW_TOKENS_ where - // KEY (lowercased) must be a non-empty substring of the model name. + // Per-model env vars set by `hyperhive.contextWindowTokens` in Nix. for (key, val) in std::env::vars() { if let Some(suffix) = key.strip_prefix("HIVE_CONTEXT_WINDOW_TOKENS_") { if !suffix.is_empty() && m.contains(&suffix.to_ascii_lowercase()) { @@ -326,7 +322,7 @@ pub fn context_window_tokens(model: &str) -> u64 { } } } - // Global override. + // Global override (single value, any model). if let Ok(s) = std::env::var("HIVE_CONTEXT_WINDOW_TOKENS") { if let Ok(v) = s.trim().parse::() { if v > 0 { @@ -334,12 +330,8 @@ pub fn context_window_tokens(model: &str) -> u64 { } } } - // Auto-derive from model family. - if m.contains("sonnet") || m.contains("opus") { - 1_000_000 - } else { - 200_000 - } + // Hard fallback for dev/test outside NixOS where env vars aren't set. + 200_000 } #[derive(Clone)] diff --git a/nix/templates/harness-base.nix b/nix/templates/harness-base.nix index d7d30a7..d3b4956 100644 --- a/nix/templates/harness-base.nix +++ b/nix/templates/harness-base.nix @@ -38,26 +38,35 @@ options.hyperhive.contextWindowTokens = lib.mkOption { type = lib.types.attrsOf lib.types.int; - default = { }; + # Canonical defaults for known Anthropic model families. + # Override any entry in your agent.nix, or add new keys for + # model families not listed here. + default = { + haiku = 200000; + sonnet = 1000000; + opus = 1000000; + }; example = { haiku = 150000; sonnet = 900000; }; description = '' - Per-model context-window overrides. Each attribute name is a + Per-model context-window sizes in tokens. Each key is a model-family short name (e.g. `"haiku"`, `"sonnet"`) matched as a case-insensitive substring of the active model name at runtime, so `"sonnet"` matches `"claude-sonnet-4-5"` and any future variant. - Empty map (the default) means auto-derive: haiku → 200 000, - sonnet / opus → 1 000 000. + + The defaults declared here cover the known Anthropic model families. + Add or override entries in your `agent.nix` when using a + non-standard model or when Anthropic changes a model's window. Each entry is rendered as `HIVE_CONTEXT_WINDOW_TOKENS_` (e.g. - `HIVE_CONTEXT_WINDOW_TOKENS_SONNET = "900000"`). The harness checks - these per-model vars first, then the global - `HIVE_CONTEXT_WINDOW_TOKENS`, then the model-family heuristic. - At runtime, the effective window drives compaction (75%) and - auto-reset (50%) watermarks, and is exposed via `/api/state` as + `HIVE_CONTEXT_WINDOW_TOKENS_SONNET = "1000000"`). The harness + checks these per-model vars in order (first substring match wins), + then falls back to `200000` when no key matches. At runtime the + effective window drives compaction (75%) and auto-reset (50%) + watermarks, and is exposed via `/api/state` as `context_window_tokens`. ''; }; From d3d52349c3431cb03c0b976fa0e5ec6cfc8aa34d Mon Sep 17 00:00:00 2001 From: damocles Date: Wed, 20 May 2026 15:42:56 +0200 Subject: [PATCH 4/4] model/context: move context window config to host-level hive-c0re.nix --- hive-c0re/src/actions.rs | 2 ++ hive-c0re/src/auto_update.rs | 2 ++ hive-c0re/src/coordinator.rs | 9 +++++ hive-c0re/src/lifecycle.rs | 6 ++-- hive-c0re/src/main.rs | 11 ++++++ hive-c0re/src/meta.rs | 19 +++++++++-- hive-c0re/src/migrate.rs | 1 + hive-c0re/src/server.rs | 2 ++ nix/modules/hive-c0re.nix | 27 ++++++++++++++- nix/templates/harness-base.nix | 61 +++++----------------------------- 10 files changed, 81 insertions(+), 59 deletions(-) diff --git a/hive-c0re/src/actions.rs b/hive-c0re/src/actions.rs index 3d8e224..0fa0560 100644 --- a/hive-c0re/src/actions.rs +++ b/hive-c0re/src/actions.rs @@ -91,6 +91,7 @@ pub async fn approve(coord: Arc, id: i64) -> Result<()> { ¬es_dir, coord_bg.dashboard_port, &coord_bg.operator_pronouns, + &coord_bg.context_window_tokens, ) .await; drop(guard); @@ -415,6 +416,7 @@ async fn sync_meta_after_lifecycle(coord: &Coordinator) -> Result<()> { &coord.hyperhive_flake, coord.dashboard_port, &coord.operator_pronouns, + &coord.context_window_tokens, &agents, ) .await diff --git a/hive-c0re/src/auto_update.rs b/hive-c0re/src/auto_update.rs index 9a18bd7..3acc674 100644 --- a/hive-c0re/src/auto_update.rs +++ b/hive-c0re/src/auto_update.rs @@ -73,6 +73,7 @@ pub async fn rebuild_agent(coord: &Arc, name: &str, current_rev: &s ¬es_dir, coord.dashboard_port, &coord.operator_pronouns, + &coord.context_window_tokens, ) .await; drop(guard); @@ -160,6 +161,7 @@ pub async fn ensure_manager(coord: &Arc) -> Result<()> { ¬es_dir, coord.dashboard_port, &coord.operator_pronouns, + &coord.context_window_tokens, ) .await?; if let Some(rev) = current_rev { diff --git a/hive-c0re/src/coordinator.rs b/hive-c0re/src/coordinator.rs index a291443..3a85bf2 100644 --- a/hive-c0re/src/coordinator.rs +++ b/hive-c0re/src/coordinator.rs @@ -51,6 +51,13 @@ pub struct Coordinator { /// meta flake); the harness substitutes it into the agent / /// manager system prompt at boot. pub operator_pronouns: String, + /// Per-model context-window sizes in tokens. Set via the host-level + /// `services.hive-c0re.contextWindowTokens` NixOS option; injected + /// into each container as `HIVE_CONTEXT_WINDOW_TOKENS_` + /// by the meta flake renderer. The harness uses these to derive + /// compaction / auto-reset watermarks and exposes the active value + /// on `/api/state` as `context_window_tokens`. + pub context_window_tokens: std::collections::HashMap, agents: Mutex>, /// Agents whose lifecycle action (currently just spawn) is in flight. /// Read by the dashboard to render a spinner; cleared when the action @@ -139,6 +146,7 @@ impl Coordinator { hyperhive_flake: String, dashboard_port: u16, operator_pronouns: String, + context_window_tokens: std::collections::HashMap, ) -> Result { let broker = Broker::open(db_path).context("open broker")?; let approvals = Approvals::open(db_path).context("open approvals")?; @@ -152,6 +160,7 @@ impl Coordinator { hyperhive_flake, dashboard_port, operator_pronouns, + context_window_tokens, agents: Mutex::new(HashMap::new()), transient: Mutex::new(HashMap::new()), dashboard_events, diff --git a/hive-c0re/src/lifecycle.rs b/hive-c0re/src/lifecycle.rs index 1e07352..cd29476 100644 --- a/hive-c0re/src/lifecycle.rs +++ b/hive-c0re/src/lifecycle.rs @@ -138,6 +138,7 @@ pub async fn spawn( notes_dir: &Path, dashboard_port: u16, operator_pronouns: &str, + context_window_tokens: &std::collections::HashMap, ) -> Result<()> { validate(name)?; if let Some(other) = port_collision(name).await { @@ -154,7 +155,7 @@ pub async fn spawn( // before `nixos-container create` so the `--flake meta#` // ref resolves. let agents = agents_after_spawn(name).await?; - crate::meta::sync_agents(hyperhive_flake, dashboard_port, operator_pronouns, &agents).await?; + crate::meta::sync_agents(hyperhive_flake, dashboard_port, operator_pronouns, context_window_tokens, &agents).await?; let container = container_name(name); let flake_ref = format!("{}#{name}", crate::meta::meta_dir().display()); run(&["create", &container, "--flake", &flake_ref]).await?; @@ -273,6 +274,7 @@ pub async fn rebuild( notes_dir: &Path, dashboard_port: u16, operator_pronouns: &str, + context_window_tokens: &std::collections::HashMap, ) -> Result<()> { // Sync the meta flake (idempotent — no-op when the rendered // flake matches disk) so a manual rebuild from the dashboard @@ -280,7 +282,7 @@ pub async fn rebuild( // got added directly via `nixos-container create` outside // hive-c0re). let agents = agents_for_meta(None).await?; - crate::meta::sync_agents(hyperhive_flake, dashboard_port, operator_pronouns, &agents).await?; + crate::meta::sync_agents(hyperhive_flake, dashboard_port, operator_pronouns, context_window_tokens, &agents).await?; // Then bump just this agent's input — picks up whatever // `applied//main` currently points at (deployed/). // Commits the lock if it changed. diff --git a/hive-c0re/src/main.rs b/hive-c0re/src/main.rs index 1206567..4948a95 100644 --- a/hive-c0re/src/main.rs +++ b/hive-c0re/src/main.rs @@ -62,6 +62,12 @@ enum Cmd { /// system prompt can mention them. Default: `she/her`. #[arg(long, default_value = "she/her")] operator_pronouns: String, + /// Per-model context-window sizes, as JSON object mapping model-family + /// short name to token count. Threaded into each container as + /// `HIVE_CONTEXT_WINDOW_TOKENS_` env vars. Set via the + /// `services.hive-c0re.contextWindowTokens` NixOS option. + #[arg(long, default_value = r#"{"haiku":200000,"sonnet":1000000,"opus":1000000}"#)] + context_window_tokens: String, }, /// Spawn a new agent container directly (`hive-agent-`). Bypasses /// the approval queue — use only as an operator on the host. For @@ -109,12 +115,17 @@ async fn main() -> Result<()> { db, dashboard_port, operator_pronouns, + context_window_tokens, } => { + let cwt: std::collections::HashMap = + serde_json::from_str(&context_window_tokens) + .context("--context-window-tokens: invalid JSON")?; let coord = Arc::new(Coordinator::open( &db, hyperhive_flake, dashboard_port, operator_pronouns, + cwt, )?); manager_server::start(coord.clone())?; // Idempotent pre-flight: rewrite pre-meta-layout applied diff --git a/hive-c0re/src/meta.rs b/hive-c0re/src/meta.rs index 48f4fde..55b01dd 100644 --- a/hive-c0re/src/meta.rs +++ b/hive-c0re/src/meta.rs @@ -66,13 +66,14 @@ pub async fn sync_agents( hyperhive_flake: &str, dashboard_port: u16, operator_pronouns: &str, + context_window_tokens: &std::collections::HashMap, agents: &[AgentSpec], ) -> Result<()> { let _guard = META_LOCK.lock().await; let dir = meta_dir(); std::fs::create_dir_all(&dir).with_context(|| format!("create {}", dir.display()))?; - let new_flake = render_flake(hyperhive_flake, dashboard_port, operator_pronouns, agents); + let new_flake = render_flake(hyperhive_flake, dashboard_port, operator_pronouns, context_window_tokens, agents); let flake_path = dir.join("flake.nix"); let on_disk = std::fs::read_to_string(&flake_path).unwrap_or_default(); let initial = !dir.join(".git").exists(); @@ -235,6 +236,7 @@ fn render_flake( hyperhive_flake: &str, dashboard_port: u16, operator_pronouns: &str, + context_window_tokens: &std::collections::HashMap, agents: &[AgentSpec], ) -> String { use std::fmt::Write as _; @@ -283,8 +285,19 @@ fn render_flake( HIVE_PORT = toString port; HIVE_LABEL = name; HIVE_DASHBOARD_PORT = toString dashboardPort; - HIVE_OPERATOR_PRONOUNS = operatorPronouns; - HYPERHIVE_STATE_DIR = "/agents/${name}/state"; + HIVE_OPERATOR_PRONOUNS = operatorPronouns;"#, + ); + // Per-model context-window env vars declared in the host-level + // `services.hive-c0re.contextWindowTokens` option. Use a sorted + // iterator for deterministic flake output (no spurious git diffs). + let mut sorted_tokens: Vec<(&String, &u64)> = context_window_tokens.iter().collect(); + sorted_tokens.sort_by_key(|(k, _)| k.as_str()); + for (key, val) in &sorted_tokens { + let upper_key = key.to_ascii_uppercase(); + let _ = writeln!(out, " HIVE_CONTEXT_WINDOW_TOKENS_{upper_key} = \"{val}\";"); + } + out.push_str( + r#" HYPERHIVE_STATE_DIR = "/agents/${name}/state"; }; } ]; diff --git a/hive-c0re/src/migrate.rs b/hive-c0re/src/migrate.rs index 43fe95d..5e1ae42 100644 --- a/hive-c0re/src/migrate.rs +++ b/hive-c0re/src/migrate.rs @@ -83,6 +83,7 @@ pub async fn run(coord: &Arc) -> Result<()> { &coord.hyperhive_flake, coord.dashboard_port, &coord.operator_pronouns, + &coord.context_window_tokens, &agents, ) .await diff --git a/hive-c0re/src/server.rs b/hive-c0re/src/server.rs index 639475e..42ccc5f 100644 --- a/hive-c0re/src/server.rs +++ b/hive-c0re/src/server.rs @@ -77,6 +77,7 @@ async fn dispatch(req: &HostRequest, coord: Arc) -> HostResponse { ¬es_dir, coord.dashboard_port, &coord.operator_pronouns, + &coord.context_window_tokens, ) .await { @@ -139,6 +140,7 @@ async fn dispatch(req: &HostRequest, coord: Arc) -> HostResponse { ¬es_dir, coord.dashboard_port, &coord.operator_pronouns, + &coord.context_window_tokens, ) .await; // Mirror auto_update::rebuild_agent — the manager wants diff --git a/nix/modules/hive-c0re.nix b/nix/modules/hive-c0re.nix index 158eefe..9325741 100644 --- a/nix/modules/hive-c0re.nix +++ b/nix/modules/hive-c0re.nix @@ -57,6 +57,31 @@ in approval needed. ''; }; + contextWindowTokens = lib.mkOption { + type = lib.types.attrsOf lib.types.int; + default = { + haiku = 200000; + sonnet = 1000000; + opus = 1000000; + }; + example = { + haiku = 150000; + sonnet = 900000; + }; + description = '' + Per-model context-window sizes in tokens. Each key is a + model-family short name matched case-insensitively as a + substring of the active model name at runtime (e.g. `"sonnet"` + matches `"claude-sonnet-4-5"`). The defaults cover the known + Anthropic families; add entries for new models or override + existing ones here to change the window for all agents at once. + + Passed to `hive-c0re serve` as JSON and injected into every + container's harness service environment as + `HIVE_CONTEXT_WINDOW_TOKENS_`. Changes propagate + on the next `↻ R3BU1LD` — no per-agent approval needed. + ''; + }; }; config = lib.mkIf cfg.enable { @@ -89,7 +114,7 @@ in ]; environment.HYPERHIVE_GIT = "${pkgs.git}/bin/git"; serviceConfig = { - ExecStart = "${cfg.package}/bin/hive-c0re --socket /run/hyperhive/host.sock serve --hyperhive-flake ${cfg.hyperhiveFlake} --dashboard-port ${toString cfg.dashboardPort} --operator-pronouns ${lib.escapeShellArg cfg.operatorPronouns}"; + ExecStart = "${cfg.package}/bin/hive-c0re --socket /run/hyperhive/host.sock serve --hyperhive-flake ${cfg.hyperhiveFlake} --dashboard-port ${toString cfg.dashboardPort} --operator-pronouns ${lib.escapeShellArg cfg.operatorPronouns} --context-window-tokens ${lib.escapeShellArg (builtins.toJSON cfg.contextWindowTokens)}"; Restart = "on-failure"; RestartSec = 2; RuntimeDirectory = "hyperhive"; diff --git a/nix/templates/harness-base.nix b/nix/templates/harness-base.nix index d3b4956..25bfa20 100644 --- a/nix/templates/harness-base.nix +++ b/nix/templates/harness-base.nix @@ -36,41 +36,6 @@ ''; }; - options.hyperhive.contextWindowTokens = lib.mkOption { - type = lib.types.attrsOf lib.types.int; - # Canonical defaults for known Anthropic model families. - # Override any entry in your agent.nix, or add new keys for - # model families not listed here. - default = { - haiku = 200000; - sonnet = 1000000; - opus = 1000000; - }; - example = { - haiku = 150000; - sonnet = 900000; - }; - description = '' - Per-model context-window sizes in tokens. Each key is a - model-family short name (e.g. `"haiku"`, `"sonnet"`) matched as a - case-insensitive substring of the active model name at runtime, so - `"sonnet"` matches `"claude-sonnet-4-5"` and any future variant. - - The defaults declared here cover the known Anthropic model families. - Add or override entries in your `agent.nix` when using a - non-standard model or when Anthropic changes a model's window. - - Each entry is rendered as - `HIVE_CONTEXT_WINDOW_TOKENS_` (e.g. - `HIVE_CONTEXT_WINDOW_TOKENS_SONNET = "1000000"`). The harness - checks these per-model vars in order (first substring match wins), - then falls back to `200000` when no key matches. At runtime the - effective window drives compaction (75%) and auto-reset (50%) - watermarks, and is exposed via `/api/state` as - `context_window_tokens`. - ''; - }; - options.hyperhive.allowedBashPatterns = lib.mkOption { type = lib.types.listOf lib.types.str; default = [ ]; @@ -264,25 +229,15 @@ environment.etc."hyperhive/claude-plugins-auto-update.json".text = builtins.toJSON config.hyperhive.claudePluginsAutoUpdate; - # Model + context-window env vars consumed by the harness at boot. # HIVE_DEFAULT_MODEL seeds the initial model selection when no persisted - # model choice exists in the state dir. - # HIVE_CONTEXT_WINDOW_TOKENS_ provides per-model overrides - # (e.g. HIVE_CONTEXT_WINDOW_TOKENS_SONNET) from contextWindowTokens attrset. - # SHELL must be set so claude's Bash tool finds a POSIX shell. - environment.variables = lib.mkMerge ( - [ - { - HIVE_DEFAULT_MODEL = config.hyperhive.model; - SHELL = "${pkgs.bashInteractive}/bin/bash"; - } - ] - ++ lib.mapAttrsToList - (model: tokens: { - "HIVE_CONTEXT_WINDOW_TOKENS_${lib.toUpper model}" = toString tokens; - }) - config.hyperhive.contextWindowTokens - ); + # model choice exists in the state dir. SHELL must be set so claude's + # Bash tool finds a POSIX shell. + # HIVE_CONTEXT_WINDOW_TOKENS_* are injected by the meta flake from the + # host-level `services.hive-c0re.contextWindowTokens` option — not set here. + environment.variables = { + HIVE_DEFAULT_MODEL = config.hyperhive.model; + SHELL = "${pkgs.bashInteractive}/bin/bash"; + }; boot.isNspawnContainer = true;