model/context: per-model ctx window overrides + expose window size in /api/state
This commit is contained in:
parent
9064cd3c57
commit
770cbaccf9
3 changed files with 68 additions and 23 deletions
|
|
@ -303,8 +303,30 @@ pub fn default_model() -> &'static str {
|
||||||
/// Overridable at runtime via `HIVE_CONTEXT_WINDOW_TOKENS` (useful for
|
/// Overridable at runtime via `HIVE_CONTEXT_WINDOW_TOKENS` (useful for
|
||||||
/// future models or when the operator knows the exact limit). The env
|
/// future models or when the operator knows the exact limit). The env
|
||||||
/// var takes precedence over the model-name heuristic.
|
/// var takes precedence over the model-name heuristic.
|
||||||
|
///
|
||||||
|
/// Resolution order (first match wins):
|
||||||
|
/// 1. `HIVE_CONTEXT_WINDOW_TOKENS_<KEY>` — per-model override where KEY
|
||||||
|
/// (case-insensitive) is a substring of the active model name.
|
||||||
|
/// Set by `hyperhive.contextWindowTokens.<key>` in `agent.nix`.
|
||||||
|
/// 2. `HIVE_CONTEXT_WINDOW_TOKENS` — global override (any model).
|
||||||
|
/// 3. Auto-derive: haiku → 200 000, sonnet / opus → 1 000 000.
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub fn context_window_tokens(model: &str) -> u64 {
|
pub fn context_window_tokens(model: &str) -> u64 {
|
||||||
|
let m = model.to_ascii_lowercase();
|
||||||
|
// Per-model overrides: HIVE_CONTEXT_WINDOW_TOKENS_<KEY_UPPER> where
|
||||||
|
// KEY (lowercased) must be a non-empty substring of the model name.
|
||||||
|
for (key, val) in std::env::vars() {
|
||||||
|
if let Some(suffix) = key.strip_prefix("HIVE_CONTEXT_WINDOW_TOKENS_") {
|
||||||
|
if !suffix.is_empty() && m.contains(&suffix.to_ascii_lowercase()) {
|
||||||
|
if let Ok(v) = val.trim().parse::<u64>() {
|
||||||
|
if v > 0 {
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Global override.
|
||||||
if let Ok(s) = std::env::var("HIVE_CONTEXT_WINDOW_TOKENS") {
|
if let Ok(s) = std::env::var("HIVE_CONTEXT_WINDOW_TOKENS") {
|
||||||
if let Ok(v) = s.trim().parse::<u64>() {
|
if let Ok(v) = s.trim().parse::<u64>() {
|
||||||
if v > 0 {
|
if v > 0 {
|
||||||
|
|
@ -312,7 +334,7 @@ pub fn context_window_tokens(model: &str) -> u64 {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let m = model.to_ascii_lowercase();
|
// Auto-derive from model family.
|
||||||
if m.contains("sonnet") || m.contains("opus") {
|
if m.contains("sonnet") || m.contains("opus") {
|
||||||
1_000_000
|
1_000_000
|
||||||
} else {
|
} else {
|
||||||
|
|
|
||||||
|
|
@ -357,6 +357,12 @@ struct StateSnapshot {
|
||||||
/// the operator can see what they just switched to (and what's
|
/// the operator can see what they just switched to (and what's
|
||||||
/// in flight). Mutable at runtime via `POST /api/model`.
|
/// in flight). Mutable at runtime via `POST /api/model`.
|
||||||
model: String,
|
model: String,
|
||||||
|
/// Effective context-window token budget for the current model.
|
||||||
|
/// Derived from `events::context_window_tokens(&model)` — respects
|
||||||
|
/// per-model and global `HIVE_CONTEXT_WINDOW_TOKENS_*` overrides then
|
||||||
|
/// falls back to model-family heuristic. Consumers (e.g. dashboard
|
||||||
|
/// badge) use this to render the ctx-usage percentage.
|
||||||
|
context_window_tokens: u64,
|
||||||
/// Last-inference token usage from the most recent completed
|
/// Last-inference token usage from the most recent completed
|
||||||
/// turn — represents the current context-window size at turn-end.
|
/// turn — represents the current context-window size at turn-end.
|
||||||
/// `null` until the first turn finishes.
|
/// `null` until the first turn finishes.
|
||||||
|
|
@ -451,6 +457,7 @@ async fn api_state(State(state): State<AppState>) -> axum::Json<StateSnapshot> {
|
||||||
let inbox = recent_inbox(&state.socket, state.flavor()).await;
|
let inbox = recent_inbox(&state.socket, state.flavor()).await;
|
||||||
let (turn_state, turn_state_since) = state.bus.state_snapshot();
|
let (turn_state, turn_state_since) = state.bus.state_snapshot();
|
||||||
let model = state.bus.model();
|
let model = state.bus.model();
|
||||||
|
let context_window_tokens = crate::events::context_window_tokens(&model);
|
||||||
let ctx_usage = state.bus.last_ctx_usage();
|
let ctx_usage = state.bus.last_ctx_usage();
|
||||||
let cost_usage = state.bus.last_cost_usage();
|
let cost_usage = state.bus.last_cost_usage();
|
||||||
axum::Json(StateSnapshot {
|
axum::Json(StateSnapshot {
|
||||||
|
|
@ -463,6 +470,7 @@ async fn api_state(State(state): State<AppState>) -> axum::Json<StateSnapshot> {
|
||||||
turn_state,
|
turn_state,
|
||||||
turn_state_since,
|
turn_state_since,
|
||||||
model,
|
model,
|
||||||
|
context_window_tokens,
|
||||||
ctx_usage,
|
ctx_usage,
|
||||||
cost_usage,
|
cost_usage,
|
||||||
gui_enabled: state.gui_vnc_port.is_some(),
|
gui_enabled: state.gui_vnc_port.is_some(),
|
||||||
|
|
|
||||||
|
|
@ -37,20 +37,28 @@
|
||||||
};
|
};
|
||||||
|
|
||||||
options.hyperhive.contextWindowTokens = lib.mkOption {
|
options.hyperhive.contextWindowTokens = lib.mkOption {
|
||||||
type = lib.types.int;
|
type = lib.types.attrsOf lib.types.int;
|
||||||
default = 0;
|
default = { };
|
||||||
example = 1000000;
|
example = {
|
||||||
|
haiku = 150000;
|
||||||
|
sonnet = 900000;
|
||||||
|
};
|
||||||
description = ''
|
description = ''
|
||||||
Context-window size in tokens for this agent's model. `0` (the
|
Per-model context-window overrides. Each attribute name is a
|
||||||
default) means "auto-derive from the model name": haiku → 200 000,
|
model-family short name (e.g. `"haiku"`, `"sonnet"`) matched as a
|
||||||
sonnet / opus → 1 000 000. Set an explicit value here when you are
|
case-insensitive substring of the active model name at runtime, so
|
||||||
using a model the harness does not recognise, or when Anthropic
|
`"sonnet"` matches `"claude-sonnet-4-5"` and any future variant.
|
||||||
changes the window for an existing model family.
|
Empty map (the default) means auto-derive: haiku → 200 000,
|
||||||
|
sonnet / opus → 1 000 000.
|
||||||
|
|
||||||
Sets the `HIVE_CONTEXT_WINDOW_TOKENS` environment variable; the
|
Each entry is rendered as
|
||||||
harness reads it at runtime and uses it to compute the default
|
`HIVE_CONTEXT_WINDOW_TOKENS_<KEY_UPPER>` (e.g.
|
||||||
compaction and auto-reset watermarks (75% and 50% of the window
|
`HIVE_CONTEXT_WINDOW_TOKENS_SONNET = "900000"`). The harness checks
|
||||||
respectively).
|
these per-model vars first, then the global
|
||||||
|
`HIVE_CONTEXT_WINDOW_TOKENS`, then the model-family heuristic.
|
||||||
|
At runtime, the effective window drives compaction (75%) and
|
||||||
|
auto-reset (50%) watermarks, and is exposed via `/api/state` as
|
||||||
|
`context_window_tokens`.
|
||||||
'';
|
'';
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -249,13 +257,23 @@
|
||||||
|
|
||||||
# Model + context-window env vars consumed by the harness at boot.
|
# Model + context-window env vars consumed by the harness at boot.
|
||||||
# HIVE_DEFAULT_MODEL seeds the initial model selection when no persisted
|
# HIVE_DEFAULT_MODEL seeds the initial model selection when no persisted
|
||||||
# model choice exists in the state dir. HIVE_CONTEXT_WINDOW_TOKENS
|
# model choice exists in the state dir.
|
||||||
# overrides the auto-derived window size (only set when the NixOS option
|
# HIVE_CONTEXT_WINDOW_TOKENS_<KEY_UPPER> provides per-model overrides
|
||||||
# is non-zero so an unset env var lets the harness use its own heuristic).
|
# (e.g. HIVE_CONTEXT_WINDOW_TOKENS_SONNET) from contextWindowTokens attrset.
|
||||||
environment.variables.HIVE_DEFAULT_MODEL = config.hyperhive.model;
|
# SHELL must be set so claude's Bash tool finds a POSIX shell.
|
||||||
environment.variables = lib.mkIf (config.hyperhive.contextWindowTokens != 0) {
|
environment.variables = lib.mkMerge (
|
||||||
HIVE_CONTEXT_WINDOW_TOKENS = toString config.hyperhive.contextWindowTokens;
|
[
|
||||||
};
|
{
|
||||||
|
HIVE_DEFAULT_MODEL = config.hyperhive.model;
|
||||||
|
SHELL = "${pkgs.bashInteractive}/bin/bash";
|
||||||
|
}
|
||||||
|
]
|
||||||
|
++ lib.mapAttrsToList
|
||||||
|
(model: tokens: {
|
||||||
|
"HIVE_CONTEXT_WINDOW_TOKENS_${lib.toUpper model}" = toString tokens;
|
||||||
|
})
|
||||||
|
config.hyperhive.contextWindowTokens
|
||||||
|
);
|
||||||
|
|
||||||
boot.isNspawnContainer = true;
|
boot.isNspawnContainer = true;
|
||||||
|
|
||||||
|
|
@ -356,9 +374,6 @@
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
# claude's Bash tool refuses to run without a POSIX shell + $SHELL set.
|
|
||||||
environment.variables.SHELL = "${pkgs.bashInteractive}/bin/bash";
|
|
||||||
|
|
||||||
system.stateVersion = "25.11";
|
system.stateVersion = "25.11";
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue