model/context: per-model ctx window overrides + expose window size in /api/state
This commit is contained in:
parent
9064cd3c57
commit
770cbaccf9
3 changed files with 68 additions and 23 deletions
|
|
@ -357,6 +357,12 @@ struct StateSnapshot {
|
|||
/// the operator can see what they just switched to (and what's
|
||||
/// in flight). Mutable at runtime via `POST /api/model`.
|
||||
model: String,
|
||||
/// Effective context-window token budget for the current model.
|
||||
/// Derived from `events::context_window_tokens(&model)` — respects
|
||||
/// per-model and global `HIVE_CONTEXT_WINDOW_TOKENS_*` overrides then
|
||||
/// falls back to model-family heuristic. Consumers (e.g. dashboard
|
||||
/// badge) use this to render the ctx-usage percentage.
|
||||
context_window_tokens: u64,
|
||||
/// Last-inference token usage from the most recent completed
|
||||
/// turn — represents the current context-window size at turn-end.
|
||||
/// `null` until the first turn finishes.
|
||||
|
|
@ -451,6 +457,7 @@ async fn api_state(State(state): State<AppState>) -> axum::Json<StateSnapshot> {
|
|||
let inbox = recent_inbox(&state.socket, state.flavor()).await;
|
||||
let (turn_state, turn_state_since) = state.bus.state_snapshot();
|
||||
let model = state.bus.model();
|
||||
let context_window_tokens = crate::events::context_window_tokens(&model);
|
||||
let ctx_usage = state.bus.last_ctx_usage();
|
||||
let cost_usage = state.bus.last_cost_usage();
|
||||
axum::Json(StateSnapshot {
|
||||
|
|
@ -463,6 +470,7 @@ async fn api_state(State(state): State<AppState>) -> axum::Json<StateSnapshot> {
|
|||
turn_state,
|
||||
turn_state_since,
|
||||
model,
|
||||
context_window_tokens,
|
||||
ctx_usage,
|
||||
cost_usage,
|
||||
gui_enabled: state.gui_vnc_port.is_some(),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue