model/context: configurable default model + model-derived context window

This commit is contained in:
damocles 2026-05-20 15:12:37 +02:00 committed by Mara
parent 67f948028c
commit 9064cd3c57
3 changed files with 117 additions and 43 deletions

View file

@ -275,13 +275,51 @@ pub enum TurnState {
Compacting,
}
/// Default claude model when nothing's been set at runtime. The
/// operator can switch via `/model <name>` in the web terminal; the
/// chosen model lives in `Bus::model` for the rest of the harness
/// process's life (resets on restart, by design — operator overrides
/// shouldn't survive accidentally).
/// Default claude model when nothing's been set at runtime. Overridable
/// via the `HIVE_DEFAULT_MODEL` env var (set from `hyperhive.model` in
/// the container's `agent.nix`). The operator can also switch at runtime
/// via `/model <name>` in the web terminal; the chosen model is persisted
/// to the state dir so it survives restarts.
pub const DEFAULT_MODEL: &str = "haiku";
/// Return the initial default model name: `HIVE_DEFAULT_MODEL` env var if
/// set to a non-empty string, otherwise `DEFAULT_MODEL`.
#[must_use]
pub fn default_model() -> &'static str {
// Leak once at startup — acceptable for a single config value.
std::env::var("HIVE_DEFAULT_MODEL")
.ok()
.filter(|s| !s.trim().is_empty())
.map_or(DEFAULT_MODEL, |s| Box::leak(s.into_boxed_str()))
}
/// Context-window size in tokens for a given model name.
///
/// Recognises the model families that Claude Code supports:
/// - `haiku` family: 200 000 tokens
/// - `sonnet` / `opus` families: 1 000 000 tokens
/// - anything else: 200 000 tokens (conservative default)
///
/// Overridable at runtime via `HIVE_CONTEXT_WINDOW_TOKENS` (useful for
/// future models or when the operator knows the exact limit). The env
/// var takes precedence over the model-name heuristic.
#[must_use]
pub fn context_window_tokens(model: &str) -> u64 {
if let Ok(s) = std::env::var("HIVE_CONTEXT_WINDOW_TOKENS") {
if let Ok(v) = s.trim().parse::<u64>() {
if v > 0 {
return v;
}
}
}
let m = model.to_ascii_lowercase();
if m.contains("sonnet") || m.contains("opus") {
1_000_000
} else {
200_000
}
}
#[derive(Clone)]
pub struct Bus {
tx: Arc<broadcast::Sender<BusEvent>>,
@ -351,7 +389,7 @@ impl Bus {
}
};
let (tx, _) = broadcast::channel(CHANNEL_CAPACITY);
let initial_model = load_model().unwrap_or_else(|| DEFAULT_MODEL.to_owned());
let initial_model = load_model().unwrap_or_else(|| default_model().to_owned());
// Restore rate_limited from the sentinel file — if the harness
// crashed while parked, we should still show the right status on
// cold load until the next turn clears it.

View file

@ -54,16 +54,6 @@ const RATE_LIMIT_MARKERS: &[&str] = &[
/// capacity limits.
const DEFAULT_RATE_LIMIT_SLEEP_SECS: u64 = 300;
/// Token watermark for *auto session-reset*. When context is at or above this
/// many tokens AND the prompt cache has gone cold (idle time >= `CACHE_TTL_SECS`),
/// the harness drops `--continue` so the next turn starts fresh. Running any
/// turn (even a checkpoint) before the reset would re-upload the full context
/// and warm the cache, defeating the cost purpose — so the reset happens
/// immediately with no preceding turn. Default is ~50% of a 200k-token
/// window; override via `HIVE_AUTO_RESET_WATERMARK_TOKENS`, or set to `0`
/// to disable.
const DEFAULT_AUTO_RESET_WATERMARK_TOKENS: u64 = 100_000;
/// Assumed prompt-cache TTL. Claude caches prompt prefixes — ~5 minutes on
/// the API (pay-per-token), ~1 hour on Claude Max (subscription). When the
/// idle gap exceeds this, the cache prefix has likely expired and the next
@ -75,19 +65,6 @@ const DEFAULT_AUTO_RESET_WATERMARK_TOKENS: u64 = 100_000;
/// `0` to disable (always resume).
const DEFAULT_CACHE_TTL_SECS: u64 = 3600;
/// Token watermark for *proactive* compaction. Once a turn finishes with
/// the last inference's context size at or above this many tokens,
/// `drive_turn` runs one dedicated notes-checkpoint turn (so the agent
/// can flush durable state into `/state`) and then `/compact` — while the
/// session is still healthy enough to run a turn at all. This is distinct
/// from the reactive `PROMPT_TOO_LONG_MARKER` path, which only fires once
/// the session is *already* past the window: at that point no turn can
/// run on it, so the reactive path just compacts + retries with no
/// checkpoint. Default is ~75% of a 200k-token window; override via
/// `HIVE_COMPACT_WATERMARK_TOKENS`, or set that to `0` to disable
/// proactive compaction entirely (the reactive path always applies).
const DEFAULT_COMPACT_WATERMARK_TOKENS: u64 = 150_000;
/// Synthetic wake prompt for the proactive notes-checkpoint turn. Not an
/// inbox message — the harness injects it directly so the agent gets one
/// turn to persist durable state before `/compact` collapses the
@ -212,14 +189,19 @@ pub fn rate_limit_sleep_secs() -> u64 {
.unwrap_or(DEFAULT_RATE_LIMIT_SLEEP_SECS)
}
/// Resolve the auto-reset watermark: `HIVE_AUTO_RESET_WATERMARK_TOKENS` if
/// set to a valid integer, else `DEFAULT_AUTO_RESET_WATERMARK_TOKENS`. `0`
/// disables auto-reset entirely.
fn auto_reset_watermark_tokens() -> u64 {
std::env::var("HIVE_AUTO_RESET_WATERMARK_TOKENS")
/// Resolve the auto-reset watermark. Priority order:
/// 1. `HIVE_AUTO_RESET_WATERMARK_TOKENS` env var (explicit override).
/// 2. 50% of the model's context window (derived from `bus.model()` +
/// `events::context_window_tokens`).
/// `0` disables auto-reset entirely.
fn auto_reset_watermark_tokens(bus: &Bus) -> u64 {
if let Some(v) = std::env::var("HIVE_AUTO_RESET_WATERMARK_TOKENS")
.ok()
.and_then(|s| s.trim().parse::<u64>().ok())
.unwrap_or(DEFAULT_AUTO_RESET_WATERMARK_TOKENS)
{
return v;
}
crate::events::context_window_tokens(&bus.model()) / 2
}
/// Resolve the assumed cache TTL: `HIVE_CACHE_TTL_SECS` if set, else
@ -232,14 +214,19 @@ fn cache_ttl_secs() -> u64 {
.unwrap_or(DEFAULT_CACHE_TTL_SECS)
}
/// Resolve the proactive-compaction watermark: `HIVE_COMPACT_WATERMARK_TOKENS`
/// if set to a valid integer, else `DEFAULT_COMPACT_WATERMARK_TOKENS`. A
/// value of `0` disables proactive compaction.
fn compact_watermark_tokens() -> u64 {
std::env::var("HIVE_COMPACT_WATERMARK_TOKENS")
/// Resolve the proactive-compaction watermark. Priority order:
/// 1. `HIVE_COMPACT_WATERMARK_TOKENS` env var (explicit override).
/// 2. 75% of the model's context window (derived from `bus.model()` +
/// `events::context_window_tokens`).
/// `0` disables proactive compaction (reactive path still applies).
fn compact_watermark_tokens(bus: &Bus) -> u64 {
if let Some(v) = std::env::var("HIVE_COMPACT_WATERMARK_TOKENS")
.ok()
.and_then(|s| s.trim().parse::<u64>().ok())
.unwrap_or(DEFAULT_COMPACT_WATERMARK_TOKENS)
{
return v;
}
crate::events::context_window_tokens(&bus.model()) * 3 / 4
}
/// Drive one turn end-to-end. Three paths layer on top of the raw `run_turn`:
@ -291,7 +278,7 @@ pub async fn drive_turn(prompt: &str, files: &TurnFiles, bus: &Bus) -> TurnOutco
/// checkpoint or compaction is logged + surfaced as a Note but never
/// fails the turn that already succeeded.
async fn maybe_checkpoint_and_compact(files: &TurnFiles, bus: &Bus) {
let watermark = compact_watermark_tokens();
let watermark = compact_watermark_tokens(bus);
if watermark == 0 {
return; // proactive compaction disabled
}
@ -336,7 +323,7 @@ async fn maybe_checkpoint_and_compact(files: &TurnFiles, bus: &Bus) {
/// any turn before the reset would re-upload and re-warm the cache, which
/// defeats the cost-optimisation purpose entirely.
fn maybe_auto_reset(bus: &Bus) {
let watermark = auto_reset_watermark_tokens();
let watermark = auto_reset_watermark_tokens(bus);
if watermark == 0 {
return; // auto-reset disabled
}

View file

@ -15,6 +15,45 @@
# only opts in from its own `agent.nix`.
imports = [ ./weston-vnc.nix ];
options.hyperhive.model = lib.mkOption {
type = lib.types.str;
default = "haiku";
example = "sonnet";
description = ''
Default claude model for this agent. Sets the `HIVE_DEFAULT_MODEL`
environment variable consumed by the harness at boot; if no
persisted model choice exists in the agent's state dir the harness
falls back to this value. The operator can still switch the model at
runtime via the per-agent web UI that choice is persisted to the
state dir and takes precedence over this default until the agent is
purged.
Valid values are the short model names that `claude --model` accepts:
`"haiku"`, `"sonnet"`, `"opus"` (or any future identifier). The
harness derives sensible watermarks from the model family:
haiku 200 000 token window; sonnet / opus 1 000 000 token window.
Override the derived window via `hyperhive.contextWindowTokens`.
'';
};
options.hyperhive.contextWindowTokens = lib.mkOption {
type = lib.types.int;
default = 0;
example = 1000000;
description = ''
Context-window size in tokens for this agent's model. `0` (the
default) means "auto-derive from the model name": haiku 200 000,
sonnet / opus 1 000 000. Set an explicit value here when you are
using a model the harness does not recognise, or when Anthropic
changes the window for an existing model family.
Sets the `HIVE_CONTEXT_WINDOW_TOKENS` environment variable; the
harness reads it at runtime and uses it to compute the default
compaction and auto-reset watermarks (75% and 50% of the window
respectively).
'';
};
options.hyperhive.allowedBashPatterns = lib.mkOption {
type = lib.types.listOf lib.types.str;
default = [ ];
@ -208,6 +247,16 @@
environment.etc."hyperhive/claude-plugins-auto-update.json".text =
builtins.toJSON config.hyperhive.claudePluginsAutoUpdate;
# Model + context-window env vars consumed by the harness at boot.
# HIVE_DEFAULT_MODEL seeds the initial model selection when no persisted
# model choice exists in the state dir. HIVE_CONTEXT_WINDOW_TOKENS
# overrides the auto-derived window size (only set when the NixOS option
# is non-zero so an unset env var lets the harness use its own heuristic).
environment.variables.HIVE_DEFAULT_MODEL = config.hyperhive.model;
environment.variables = lib.mkIf (config.hyperhive.contextWindowTokens != 0) {
HIVE_CONTEXT_WINDOW_TOKENS = toString config.hyperhive.contextWindowTokens;
};
boot.isNspawnContainer = true;
# Every agent gets flakes + the modern `nix` CLI out of the box.