model/context: per-model ctx window overrides + expose window size in /api/state

2026-05-20 15:20:07 +02:00 · 2026-05-20 15:20:07 +02:00 · 770cbaccf9
commit 770cbaccf9
parent 9064cd3c57
3 changed files with 68 additions and 23 deletions
--- a/hive-ag3nt/src/events.rs
+++ b/hive-ag3nt/src/events.rs
@ -303,8 +303,30 @@ pub fn default_model() -> &'static str {
 /// Overridable at runtime via `HIVE_CONTEXT_WINDOW_TOKENS` (useful for
 /// future models or when the operator knows the exact limit). The env
 /// var takes precedence over the model-name heuristic.
+///
+/// Resolution order (first match wins):
+/// 1. `HIVE_CONTEXT_WINDOW_TOKENS_<KEY>` — per-model override where KEY
+///    (case-insensitive) is a substring of the active model name.
+///    Set by `hyperhive.contextWindowTokens.<key>` in `agent.nix`.
+/// 2. `HIVE_CONTEXT_WINDOW_TOKENS` — global override (any model).
+/// 3. Auto-derive: haiku → 200 000, sonnet / opus → 1 000 000.
 #[must_use]
 pub fn context_window_tokens(model: &str) -> u64 {
+    let m = model.to_ascii_lowercase();
+    // Per-model overrides: HIVE_CONTEXT_WINDOW_TOKENS_<KEY_UPPER> where
+    // KEY (lowercased) must be a non-empty substring of the model name.
+    for (key, val) in std::env::vars() {
+        if let Some(suffix) = key.strip_prefix("HIVE_CONTEXT_WINDOW_TOKENS_") {
+            if !suffix.is_empty() && m.contains(&suffix.to_ascii_lowercase()) {
+                if let Ok(v) = val.trim().parse::<u64>() {
+                    if v > 0 {
+                        return v;
+                    }
+                }
+            }
+        }
+    }
+    // Global override.
    if let Ok(s) = std::env::var("HIVE_CONTEXT_WINDOW_TOKENS") {
        if let Ok(v) = s.trim().parse::<u64>() {
            if v > 0 {
@ -312,7 +334,7 @@ pub fn context_window_tokens(model: &str) -> u64 {
            }
        }
    }
-    let m = model.to_ascii_lowercase();
+    // Auto-derive from model family.
    if m.contains("sonnet") || m.contains("opus") {
        1_000_000
    } else {
--- a/hive-ag3nt/src/web_ui.rs
+++ b/hive-ag3nt/src/web_ui.rs
@ -357,6 +357,12 @@ struct StateSnapshot {
    /// the operator can see what they just switched to (and what's
    /// in flight). Mutable at runtime via `POST /api/model`.
    model: String,
+    /// Effective context-window token budget for the current model.
+    /// Derived from `events::context_window_tokens(&model)` — respects
+    /// per-model and global `HIVE_CONTEXT_WINDOW_TOKENS_*` overrides then
+    /// falls back to model-family heuristic. Consumers (e.g. dashboard
+    /// badge) use this to render the ctx-usage percentage.
+    context_window_tokens: u64,
    /// Last-inference token usage from the most recent completed
    /// turn — represents the current context-window size at turn-end.
    /// `null` until the first turn finishes.
@ -451,6 +457,7 @@ async fn api_state(State(state): State<AppState>) -> axum::Json<StateSnapshot> {
    let inbox = recent_inbox(&state.socket, state.flavor()).await;
    let (turn_state, turn_state_since) = state.bus.state_snapshot();
    let model = state.bus.model();
+    let context_window_tokens = crate::events::context_window_tokens(&model);
    let ctx_usage = state.bus.last_ctx_usage();
    let cost_usage = state.bus.last_cost_usage();
    axum::Json(StateSnapshot {
@ -463,6 +470,7 @@ async fn api_state(State(state): State<AppState>) -> axum::Json<StateSnapshot> {
        turn_state,
        turn_state_since,
        model,
+        context_window_tokens,
        ctx_usage,
        cost_usage,
        gui_enabled: state.gui_vnc_port.is_some(),