model/context: configurable default model + model-derived context window

This commit is contained in:
damocles 2026-05-20 15:12:37 +02:00 committed by Mara
parent 67f948028c
commit 9064cd3c57
3 changed files with 117 additions and 43 deletions

View file

@ -275,13 +275,51 @@ pub enum TurnState {
Compacting,
}
/// Default claude model when nothing's been set at runtime. The
/// operator can switch via `/model <name>` in the web terminal; the
/// chosen model lives in `Bus::model` for the rest of the harness
/// process's life (resets on restart, by design — operator overrides
/// shouldn't survive accidentally).
/// Default claude model when nothing's been set at runtime. Overridable
/// via the `HIVE_DEFAULT_MODEL` env var (set from `hyperhive.model` in
/// the container's `agent.nix`). The operator can also switch at runtime
/// via `/model <name>` in the web terminal; the chosen model is persisted
/// to the state dir so it survives restarts.
pub const DEFAULT_MODEL: &str = "haiku";
/// Return the initial default model name: `HIVE_DEFAULT_MODEL` env var if
/// set to a non-empty string, otherwise `DEFAULT_MODEL`.
#[must_use]
pub fn default_model() -> &'static str {
// Leak once at startup — acceptable for a single config value.
std::env::var("HIVE_DEFAULT_MODEL")
.ok()
.filter(|s| !s.trim().is_empty())
.map_or(DEFAULT_MODEL, |s| Box::leak(s.into_boxed_str()))
}
/// Context-window size in tokens for a given model name.
///
/// Recognises the model families that Claude Code supports:
/// - `haiku` family: 200 000 tokens
/// - `sonnet` / `opus` families: 1 000 000 tokens
/// - anything else: 200 000 tokens (conservative default)
///
/// Overridable at runtime via `HIVE_CONTEXT_WINDOW_TOKENS` (useful for
/// future models or when the operator knows the exact limit). The env
/// var takes precedence over the model-name heuristic.
#[must_use]
pub fn context_window_tokens(model: &str) -> u64 {
if let Ok(s) = std::env::var("HIVE_CONTEXT_WINDOW_TOKENS") {
if let Ok(v) = s.trim().parse::<u64>() {
if v > 0 {
return v;
}
}
}
let m = model.to_ascii_lowercase();
if m.contains("sonnet") || m.contains("opus") {
1_000_000
} else {
200_000
}
}
#[derive(Clone)]
pub struct Bus {
tx: Arc<broadcast::Sender<BusEvent>>,
@ -351,7 +389,7 @@ impl Bus {
}
};
let (tx, _) = broadcast::channel(CHANNEL_CAPACITY);
let initial_model = load_model().unwrap_or_else(|| DEFAULT_MODEL.to_owned());
let initial_model = load_model().unwrap_or_else(|| default_model().to_owned());
// Restore rate_limited from the sentinel file — if the harness
// crashed while parked, we should still show the right status on
// cold load until the next turn clears it.