model/context: per-model ctx window overrides + expose window size in /api/state
This commit is contained in:
parent
9064cd3c57
commit
770cbaccf9
3 changed files with 68 additions and 23 deletions
|
|
@ -303,8 +303,30 @@ pub fn default_model() -> &'static str {
|
|||
/// Overridable at runtime via `HIVE_CONTEXT_WINDOW_TOKENS` (useful for
|
||||
/// future models or when the operator knows the exact limit). The env
|
||||
/// var takes precedence over the model-name heuristic.
|
||||
///
|
||||
/// Resolution order (first match wins):
|
||||
/// 1. `HIVE_CONTEXT_WINDOW_TOKENS_<KEY>` — per-model override where KEY
|
||||
/// (case-insensitive) is a substring of the active model name.
|
||||
/// Set by `hyperhive.contextWindowTokens.<key>` in `agent.nix`.
|
||||
/// 2. `HIVE_CONTEXT_WINDOW_TOKENS` — global override (any model).
|
||||
/// 3. Auto-derive: haiku → 200 000, sonnet / opus → 1 000 000.
|
||||
#[must_use]
|
||||
pub fn context_window_tokens(model: &str) -> u64 {
|
||||
let m = model.to_ascii_lowercase();
|
||||
// Per-model overrides: HIVE_CONTEXT_WINDOW_TOKENS_<KEY_UPPER> where
|
||||
// KEY (lowercased) must be a non-empty substring of the model name.
|
||||
for (key, val) in std::env::vars() {
|
||||
if let Some(suffix) = key.strip_prefix("HIVE_CONTEXT_WINDOW_TOKENS_") {
|
||||
if !suffix.is_empty() && m.contains(&suffix.to_ascii_lowercase()) {
|
||||
if let Ok(v) = val.trim().parse::<u64>() {
|
||||
if v > 0 {
|
||||
return v;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Global override.
|
||||
if let Ok(s) = std::env::var("HIVE_CONTEXT_WINDOW_TOKENS") {
|
||||
if let Ok(v) = s.trim().parse::<u64>() {
|
||||
if v > 0 {
|
||||
|
|
@ -312,7 +334,7 @@ pub fn context_window_tokens(model: &str) -> u64 {
|
|||
}
|
||||
}
|
||||
}
|
||||
let m = model.to_ascii_lowercase();
|
||||
// Auto-derive from model family.
|
||||
if m.contains("sonnet") || m.contains("opus") {
|
||||
1_000_000
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -357,6 +357,12 @@ struct StateSnapshot {
|
|||
/// the operator can see what they just switched to (and what's
|
||||
/// in flight). Mutable at runtime via `POST /api/model`.
|
||||
model: String,
|
||||
/// Effective context-window token budget for the current model.
|
||||
/// Derived from `events::context_window_tokens(&model)` — respects
|
||||
/// per-model and global `HIVE_CONTEXT_WINDOW_TOKENS_*` overrides then
|
||||
/// falls back to model-family heuristic. Consumers (e.g. dashboard
|
||||
/// badge) use this to render the ctx-usage percentage.
|
||||
context_window_tokens: u64,
|
||||
/// Last-inference token usage from the most recent completed
|
||||
/// turn — represents the current context-window size at turn-end.
|
||||
/// `null` until the first turn finishes.
|
||||
|
|
@ -451,6 +457,7 @@ async fn api_state(State(state): State<AppState>) -> axum::Json<StateSnapshot> {
|
|||
let inbox = recent_inbox(&state.socket, state.flavor()).await;
|
||||
let (turn_state, turn_state_since) = state.bus.state_snapshot();
|
||||
let model = state.bus.model();
|
||||
let context_window_tokens = crate::events::context_window_tokens(&model);
|
||||
let ctx_usage = state.bus.last_ctx_usage();
|
||||
let cost_usage = state.bus.last_cost_usage();
|
||||
axum::Json(StateSnapshot {
|
||||
|
|
@ -463,6 +470,7 @@ async fn api_state(State(state): State<AppState>) -> axum::Json<StateSnapshot> {
|
|||
turn_state,
|
||||
turn_state_since,
|
||||
model,
|
||||
context_window_tokens,
|
||||
ctx_usage,
|
||||
cost_usage,
|
||||
gui_enabled: state.gui_vnc_port.is_some(),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue