From 9064cd3c57f1c9c248e2e7abff07e41b7865a810 Mon Sep 17 00:00:00 2001 From: damocles Date: Wed, 20 May 2026 15:12:37 +0200 Subject: [PATCH] model/context: configurable default model + model-derived context window --- hive-ag3nt/src/events.rs | 50 ++++++++++++++++++++++++---- hive-ag3nt/src/turn.rs | 61 +++++++++++++--------------------- nix/templates/harness-base.nix | 49 +++++++++++++++++++++++++++ 3 files changed, 117 insertions(+), 43 deletions(-) diff --git a/hive-ag3nt/src/events.rs b/hive-ag3nt/src/events.rs index 36b9e26..b7519a6 100644 --- a/hive-ag3nt/src/events.rs +++ b/hive-ag3nt/src/events.rs @@ -275,13 +275,51 @@ pub enum TurnState { Compacting, } -/// Default claude model when nothing's been set at runtime. The -/// operator can switch via `/model ` in the web terminal; the -/// chosen model lives in `Bus::model` for the rest of the harness -/// process's life (resets on restart, by design — operator overrides -/// shouldn't survive accidentally). +/// Default claude model when nothing's been set at runtime. Overridable +/// via the `HIVE_DEFAULT_MODEL` env var (set from `hyperhive.model` in +/// the container's `agent.nix`). The operator can also switch at runtime +/// via `/model ` in the web terminal; the chosen model is persisted +/// to the state dir so it survives restarts. pub const DEFAULT_MODEL: &str = "haiku"; +/// Return the initial default model name: `HIVE_DEFAULT_MODEL` env var if +/// set to a non-empty string, otherwise `DEFAULT_MODEL`. +#[must_use] +pub fn default_model() -> &'static str { + // Leak once at startup — acceptable for a single config value. + std::env::var("HIVE_DEFAULT_MODEL") + .ok() + .filter(|s| !s.trim().is_empty()) + .map_or(DEFAULT_MODEL, |s| Box::leak(s.into_boxed_str())) +} + +/// Context-window size in tokens for a given model name. +/// +/// Recognises the model families that Claude Code supports: +/// - `haiku` family: 200 000 tokens +/// - `sonnet` / `opus` families: 1 000 000 tokens +/// - anything else: 200 000 tokens (conservative default) +/// +/// Overridable at runtime via `HIVE_CONTEXT_WINDOW_TOKENS` (useful for +/// future models or when the operator knows the exact limit). The env +/// var takes precedence over the model-name heuristic. +#[must_use] +pub fn context_window_tokens(model: &str) -> u64 { + if let Ok(s) = std::env::var("HIVE_CONTEXT_WINDOW_TOKENS") { + if let Ok(v) = s.trim().parse::() { + if v > 0 { + return v; + } + } + } + let m = model.to_ascii_lowercase(); + if m.contains("sonnet") || m.contains("opus") { + 1_000_000 + } else { + 200_000 + } +} + #[derive(Clone)] pub struct Bus { tx: Arc>, @@ -351,7 +389,7 @@ impl Bus { } }; let (tx, _) = broadcast::channel(CHANNEL_CAPACITY); - let initial_model = load_model().unwrap_or_else(|| DEFAULT_MODEL.to_owned()); + let initial_model = load_model().unwrap_or_else(|| default_model().to_owned()); // Restore rate_limited from the sentinel file — if the harness // crashed while parked, we should still show the right status on // cold load until the next turn clears it. diff --git a/hive-ag3nt/src/turn.rs b/hive-ag3nt/src/turn.rs index d42bc7c..a20c9a4 100644 --- a/hive-ag3nt/src/turn.rs +++ b/hive-ag3nt/src/turn.rs @@ -54,16 +54,6 @@ const RATE_LIMIT_MARKERS: &[&str] = &[ /// capacity limits. const DEFAULT_RATE_LIMIT_SLEEP_SECS: u64 = 300; -/// Token watermark for *auto session-reset*. When context is at or above this -/// many tokens AND the prompt cache has gone cold (idle time >= `CACHE_TTL_SECS`), -/// the harness drops `--continue` so the next turn starts fresh. Running any -/// turn (even a checkpoint) before the reset would re-upload the full context -/// and warm the cache, defeating the cost purpose — so the reset happens -/// immediately with no preceding turn. Default is ~50% of a 200k-token -/// window; override via `HIVE_AUTO_RESET_WATERMARK_TOKENS`, or set to `0` -/// to disable. -const DEFAULT_AUTO_RESET_WATERMARK_TOKENS: u64 = 100_000; - /// Assumed prompt-cache TTL. Claude caches prompt prefixes — ~5 minutes on /// the API (pay-per-token), ~1 hour on Claude Max (subscription). When the /// idle gap exceeds this, the cache prefix has likely expired and the next @@ -75,19 +65,6 @@ const DEFAULT_AUTO_RESET_WATERMARK_TOKENS: u64 = 100_000; /// `0` to disable (always resume). const DEFAULT_CACHE_TTL_SECS: u64 = 3600; -/// Token watermark for *proactive* compaction. Once a turn finishes with -/// the last inference's context size at or above this many tokens, -/// `drive_turn` runs one dedicated notes-checkpoint turn (so the agent -/// can flush durable state into `/state`) and then `/compact` — while the -/// session is still healthy enough to run a turn at all. This is distinct -/// from the reactive `PROMPT_TOO_LONG_MARKER` path, which only fires once -/// the session is *already* past the window: at that point no turn can -/// run on it, so the reactive path just compacts + retries with no -/// checkpoint. Default is ~75% of a 200k-token window; override via -/// `HIVE_COMPACT_WATERMARK_TOKENS`, or set that to `0` to disable -/// proactive compaction entirely (the reactive path always applies). -const DEFAULT_COMPACT_WATERMARK_TOKENS: u64 = 150_000; - /// Synthetic wake prompt for the proactive notes-checkpoint turn. Not an /// inbox message — the harness injects it directly so the agent gets one /// turn to persist durable state before `/compact` collapses the @@ -212,14 +189,19 @@ pub fn rate_limit_sleep_secs() -> u64 { .unwrap_or(DEFAULT_RATE_LIMIT_SLEEP_SECS) } -/// Resolve the auto-reset watermark: `HIVE_AUTO_RESET_WATERMARK_TOKENS` if -/// set to a valid integer, else `DEFAULT_AUTO_RESET_WATERMARK_TOKENS`. `0` -/// disables auto-reset entirely. -fn auto_reset_watermark_tokens() -> u64 { - std::env::var("HIVE_AUTO_RESET_WATERMARK_TOKENS") +/// Resolve the auto-reset watermark. Priority order: +/// 1. `HIVE_AUTO_RESET_WATERMARK_TOKENS` env var (explicit override). +/// 2. 50% of the model's context window (derived from `bus.model()` + +/// `events::context_window_tokens`). +/// `0` disables auto-reset entirely. +fn auto_reset_watermark_tokens(bus: &Bus) -> u64 { + if let Some(v) = std::env::var("HIVE_AUTO_RESET_WATERMARK_TOKENS") .ok() .and_then(|s| s.trim().parse::().ok()) - .unwrap_or(DEFAULT_AUTO_RESET_WATERMARK_TOKENS) + { + return v; + } + crate::events::context_window_tokens(&bus.model()) / 2 } /// Resolve the assumed cache TTL: `HIVE_CACHE_TTL_SECS` if set, else @@ -232,14 +214,19 @@ fn cache_ttl_secs() -> u64 { .unwrap_or(DEFAULT_CACHE_TTL_SECS) } -/// Resolve the proactive-compaction watermark: `HIVE_COMPACT_WATERMARK_TOKENS` -/// if set to a valid integer, else `DEFAULT_COMPACT_WATERMARK_TOKENS`. A -/// value of `0` disables proactive compaction. -fn compact_watermark_tokens() -> u64 { - std::env::var("HIVE_COMPACT_WATERMARK_TOKENS") +/// Resolve the proactive-compaction watermark. Priority order: +/// 1. `HIVE_COMPACT_WATERMARK_TOKENS` env var (explicit override). +/// 2. 75% of the model's context window (derived from `bus.model()` + +/// `events::context_window_tokens`). +/// `0` disables proactive compaction (reactive path still applies). +fn compact_watermark_tokens(bus: &Bus) -> u64 { + if let Some(v) = std::env::var("HIVE_COMPACT_WATERMARK_TOKENS") .ok() .and_then(|s| s.trim().parse::().ok()) - .unwrap_or(DEFAULT_COMPACT_WATERMARK_TOKENS) + { + return v; + } + crate::events::context_window_tokens(&bus.model()) * 3 / 4 } /// Drive one turn end-to-end. Three paths layer on top of the raw `run_turn`: @@ -291,7 +278,7 @@ pub async fn drive_turn(prompt: &str, files: &TurnFiles, bus: &Bus) -> TurnOutco /// checkpoint or compaction is logged + surfaced as a Note but never /// fails the turn that already succeeded. async fn maybe_checkpoint_and_compact(files: &TurnFiles, bus: &Bus) { - let watermark = compact_watermark_tokens(); + let watermark = compact_watermark_tokens(bus); if watermark == 0 { return; // proactive compaction disabled } @@ -336,7 +323,7 @@ async fn maybe_checkpoint_and_compact(files: &TurnFiles, bus: &Bus) { /// any turn before the reset would re-upload and re-warm the cache, which /// defeats the cost-optimisation purpose entirely. fn maybe_auto_reset(bus: &Bus) { - let watermark = auto_reset_watermark_tokens(); + let watermark = auto_reset_watermark_tokens(bus); if watermark == 0 { return; // auto-reset disabled } diff --git a/nix/templates/harness-base.nix b/nix/templates/harness-base.nix index 63368e8..0ed52c6 100644 --- a/nix/templates/harness-base.nix +++ b/nix/templates/harness-base.nix @@ -15,6 +15,45 @@ # only opts in from its own `agent.nix`. imports = [ ./weston-vnc.nix ]; + options.hyperhive.model = lib.mkOption { + type = lib.types.str; + default = "haiku"; + example = "sonnet"; + description = '' + Default claude model for this agent. Sets the `HIVE_DEFAULT_MODEL` + environment variable consumed by the harness at boot; if no + persisted model choice exists in the agent's state dir the harness + falls back to this value. The operator can still switch the model at + runtime via the per-agent web UI — that choice is persisted to the + state dir and takes precedence over this default until the agent is + purged. + + Valid values are the short model names that `claude --model` accepts: + `"haiku"`, `"sonnet"`, `"opus"` (or any future identifier). The + harness derives sensible watermarks from the model family: + haiku → 200 000 token window; sonnet / opus → 1 000 000 token window. + Override the derived window via `hyperhive.contextWindowTokens`. + ''; + }; + + options.hyperhive.contextWindowTokens = lib.mkOption { + type = lib.types.int; + default = 0; + example = 1000000; + description = '' + Context-window size in tokens for this agent's model. `0` (the + default) means "auto-derive from the model name": haiku → 200 000, + sonnet / opus → 1 000 000. Set an explicit value here when you are + using a model the harness does not recognise, or when Anthropic + changes the window for an existing model family. + + Sets the `HIVE_CONTEXT_WINDOW_TOKENS` environment variable; the + harness reads it at runtime and uses it to compute the default + compaction and auto-reset watermarks (75% and 50% of the window + respectively). + ''; + }; + options.hyperhive.allowedBashPatterns = lib.mkOption { type = lib.types.listOf lib.types.str; default = [ ]; @@ -208,6 +247,16 @@ environment.etc."hyperhive/claude-plugins-auto-update.json".text = builtins.toJSON config.hyperhive.claudePluginsAutoUpdate; + # Model + context-window env vars consumed by the harness at boot. + # HIVE_DEFAULT_MODEL seeds the initial model selection when no persisted + # model choice exists in the state dir. HIVE_CONTEXT_WINDOW_TOKENS + # overrides the auto-derived window size (only set when the NixOS option + # is non-zero so an unset env var lets the harness use its own heuristic). + environment.variables.HIVE_DEFAULT_MODEL = config.hyperhive.model; + environment.variables = lib.mkIf (config.hyperhive.contextWindowTokens != 0) { + HIVE_CONTEXT_WINDOW_TOKENS = toString config.hyperhive.contextWindowTokens; + }; + boot.isNspawnContainer = true; # Every agent gets flakes + the modern `nix` CLI out of the box.