model/context: per-model ctx window overrides + expose window size in /api/state

This commit is contained in:
damocles 2026-05-20 15:20:07 +02:00 committed by Mara
parent 9064cd3c57
commit 770cbaccf9
3 changed files with 68 additions and 23 deletions

View file

@ -357,6 +357,12 @@ struct StateSnapshot {
/// the operator can see what they just switched to (and what's
/// in flight). Mutable at runtime via `POST /api/model`.
model: String,
/// Effective context-window token budget for the current model.
/// Derived from `events::context_window_tokens(&model)` — respects
/// per-model and global `HIVE_CONTEXT_WINDOW_TOKENS_*` overrides then
/// falls back to model-family heuristic. Consumers (e.g. dashboard
/// badge) use this to render the ctx-usage percentage.
context_window_tokens: u64,
/// Last-inference token usage from the most recent completed
/// turn — represents the current context-window size at turn-end.
/// `null` until the first turn finishes.
@ -451,6 +457,7 @@ async fn api_state(State(state): State<AppState>) -> axum::Json<StateSnapshot> {
let inbox = recent_inbox(&state.socket, state.flavor()).await;
let (turn_state, turn_state_since) = state.bus.state_snapshot();
let model = state.bus.model();
let context_window_tokens = crate::events::context_window_tokens(&model);
let ctx_usage = state.bus.last_ctx_usage();
let cost_usage = state.bus.last_cost_usage();
axum::Json(StateSnapshot {
@ -463,6 +470,7 @@ async fn api_state(State(state): State<AppState>) -> axum::Json<StateSnapshot> {
turn_state,
turn_state_since,
model,
context_window_tokens,
ctx_usage,
cost_usage,
gui_enabled: state.gui_vnc_port.is_some(),