model/context: defaults in nix module, no heuristic in rust

This commit is contained in:
damocles 2026-05-20 15:31:06 +02:00 committed by Mara
parent 770cbaccf9
commit 7e2f13cad8
2 changed files with 32 additions and 31 deletions

View file

@ -295,26 +295,22 @@ pub fn default_model() -> &'static str {
/// Context-window size in tokens for a given model name.
///
/// Recognises the model families that Claude Code supports:
/// - `haiku` family: 200 000 tokens
/// - `sonnet` / `opus` families: 1 000 000 tokens
/// - anything else: 200 000 tokens (conservative default)
///
/// Overridable at runtime via `HIVE_CONTEXT_WINDOW_TOKENS` (useful for
/// future models or when the operator knows the exact limit). The env
/// var takes precedence over the model-name heuristic.
/// Canonical per-model sizes are declared in `harness-base.nix` as
/// `hyperhive.contextWindowTokens` and injected as
/// `HIVE_CONTEXT_WINDOW_TOKENS_<KEY_UPPER>` env vars — so this function
/// normally just reads them. The Rust code carries no model knowledge;
/// updating model families only requires a Nix change.
///
/// Resolution order (first match wins):
/// 1. `HIVE_CONTEXT_WINDOW_TOKENS_<KEY>` — per-model override where KEY
/// (case-insensitive) is a substring of the active model name.
/// Set by `hyperhive.contextWindowTokens.<key>` in `agent.nix`.
/// 2. `HIVE_CONTEXT_WINDOW_TOKENS` — global override (any model).
/// 3. Auto-derive: haiku → 200 000, sonnet / opus → 1 000 000.
/// 1. `HIVE_CONTEXT_WINDOW_TOKENS_<KEY>` — key (lowercased) is a
/// substring of the active model name. Populated by the Nix default
/// map for all known families; add/override in `agent.nix`.
/// 2. `HIVE_CONTEXT_WINDOW_TOKENS` — single global override (any model).
/// 3. Hard fallback: `200_000` (conservative; only hit outside NixOS).
#[must_use]
pub fn context_window_tokens(model: &str) -> u64 {
let m = model.to_ascii_lowercase();
// Per-model overrides: HIVE_CONTEXT_WINDOW_TOKENS_<KEY_UPPER> where
// KEY (lowercased) must be a non-empty substring of the model name.
// Per-model env vars set by `hyperhive.contextWindowTokens` in Nix.
for (key, val) in std::env::vars() {
if let Some(suffix) = key.strip_prefix("HIVE_CONTEXT_WINDOW_TOKENS_") {
if !suffix.is_empty() && m.contains(&suffix.to_ascii_lowercase()) {
@ -326,7 +322,7 @@ pub fn context_window_tokens(model: &str) -> u64 {
}
}
}
// Global override.
// Global override (single value, any model).
if let Ok(s) = std::env::var("HIVE_CONTEXT_WINDOW_TOKENS") {
if let Ok(v) = s.trim().parse::<u64>() {
if v > 0 {
@ -334,13 +330,9 @@ pub fn context_window_tokens(model: &str) -> u64 {
}
}
}
// Auto-derive from model family.
if m.contains("sonnet") || m.contains("opus") {
1_000_000
} else {
// Hard fallback for dev/test outside NixOS where env vars aren't set.
200_000
}
}
#[derive(Clone)]
pub struct Bus {

View file

@ -38,26 +38,35 @@
options.hyperhive.contextWindowTokens = lib.mkOption {
type = lib.types.attrsOf lib.types.int;
default = { };
# Canonical defaults for known Anthropic model families.
# Override any entry in your agent.nix, or add new keys for
# model families not listed here.
default = {
haiku = 200000;
sonnet = 1000000;
opus = 1000000;
};
example = {
haiku = 150000;
sonnet = 900000;
};
description = ''
Per-model context-window overrides. Each attribute name is a
Per-model context-window sizes in tokens. Each key is a
model-family short name (e.g. `"haiku"`, `"sonnet"`) matched as a
case-insensitive substring of the active model name at runtime, so
`"sonnet"` matches `"claude-sonnet-4-5"` and any future variant.
Empty map (the default) means auto-derive: haiku 200 000,
sonnet / opus 1 000 000.
The defaults declared here cover the known Anthropic model families.
Add or override entries in your `agent.nix` when using a
non-standard model or when Anthropic changes a model's window.
Each entry is rendered as
`HIVE_CONTEXT_WINDOW_TOKENS_<KEY_UPPER>` (e.g.
`HIVE_CONTEXT_WINDOW_TOKENS_SONNET = "900000"`). The harness checks
these per-model vars first, then the global
`HIVE_CONTEXT_WINDOW_TOKENS`, then the model-family heuristic.
At runtime, the effective window drives compaction (75%) and
auto-reset (50%) watermarks, and is exposed via `/api/state` as
`HIVE_CONTEXT_WINDOW_TOKENS_SONNET = "1000000"`). The harness
checks these per-model vars in order (first substring match wins),
then falls back to `200000` when no key matches. At runtime the
effective window drives compaction (75%) and auto-reset (50%)
watermarks, and is exposed via `/api/state` as
`context_window_tokens`.
'';
};