dashboard: derive ctx badge thresholds from the model context window

This commit is contained in:
iris 2026-05-21 22:52:32 +02:00 committed by Mara
parent cbd4b71322
commit 4a27ef7304
4 changed files with 122 additions and 26 deletions

View file

@ -186,6 +186,13 @@ guess from container state.
`ctx · Nk` chip showing the agent's last-turn context size `ctx · Nk` chip showing the agent's last-turn context size
(from `ContainerView.ctx_tokens`, read from the turn-stats (from `ContainerView.ctx_tokens`, read from the turn-stats
sqlite on each `build_all` sweep; absent until the first turn). sqlite on each `build_all` sweep; absent until the first turn).
The chip colour (green / yellow / red) is keyed off the model's
real context window: `build_all` resolves the last turn's model
against the host's per-model `contextWindowTokens` config and
exposes it as `ContainerView.context_window_tokens`; the badge
goes yellow ≥ 50% and red ≥ 75% of that window (the harness
compaction watermarks). When the window can't be resolved the
badge falls back to fixed 100k / 150k thresholds. (issue #66)
- Line 2: action buttons — `↻ R3BU1LD` always, `DESTR0Y` + `PURG3` - Line 2: action buttons — `↻ R3BU1LD` always, `DESTR0Y` + `PURG3`
on sub-agents, `↺ R3ST4RT` + (sub-agents) `■ ST0P` when running, on sub-agents, `↺ R3ST4RT` + (sub-agents) `■ ST0P` when running,
`▶ ST4RT` when stopped. Buttons dim + disable while a transient `▶ ST4RT` when stopped. Buttons dim + disable while a transient

View file

@ -4,11 +4,16 @@
(() => { (() => {
// ─── constants ────────────────────────────────────────────────────────── // ─── constants ──────────────────────────────────────────────────────────
// Context-window token thresholds — mirror the harness compaction watermarks // Context-window badge thresholds. Preferred source is each container's
// in hive-ag3nt (HIVE_COMPACT_WATERMARK_TOKENS = 150k; auto-reset at 100k). // `context_window_tokens` from /api/state (the real window for the model
// TODO: source these from model metadata once damocles lands that feature. // it last ran on) — thresholds are then 75% / 50% of it, matching the
const CTX_WARN_TOKENS = 150_000; // ≥ this → compact territory (red) // harness compaction watermarks (compact at 75%, auto-reset at 50%). The
const CTX_CAUTION_TOKENS = 100_000; // ≥ this → approaching reset (yellow) // fixed token constants are the fallback for when that field is absent
// (agent has no turns yet, or no per-model config matched the model).
const CTX_WARN_FRACTION = 0.75; // ≥ this share of the window → red
const CTX_CAUTION_FRACTION = 0.50; // ≥ this share of the window → yellow
const CTX_WARN_TOKENS = 150_000; // fallback red threshold (≈ 75% of 200k)
const CTX_CAUTION_TOKENS = 100_000; // fallback yellow threshold (≈ 50% of 200k)
// ─── helpers ──────────────────────────────────────────────────────────── // ─── helpers ────────────────────────────────────────────────────────────
const $ = (id) => document.getElementById(id); const $ = (id) => document.getElementById(id);
@ -680,14 +685,20 @@
} }
if (c.ctx_tokens != null) { if (c.ctx_tokens != null) {
const k = Math.round(c.ctx_tokens / 1000); const k = Math.round(c.ctx_tokens / 1000);
const ctxClass = c.ctx_tokens >= CTX_WARN_TOKENS ? 'badge-ctx-warn' // Thresholds track the model's real context window when the
: c.ctx_tokens >= CTX_CAUTION_TOKENS ? 'badge-ctx-caution' // backend supplies it; otherwise fall back to fixed constants.
const win = c.context_window_tokens;
const warn = win != null ? win * CTX_WARN_FRACTION : CTX_WARN_TOKENS;
const caution = win != null ? win * CTX_CAUTION_FRACTION : CTX_CAUTION_TOKENS;
const ctxClass = c.ctx_tokens >= warn ? 'badge-ctx-warn'
: c.ctx_tokens >= caution ? 'badge-ctx-caution'
: 'badge-ctx-ok'; : 'badge-ctx-ok';
const title = win != null
? `last turn context: ${c.ctx_tokens.toLocaleString()} / ${win.toLocaleString()} `
+ `tokens (${Math.round((c.ctx_tokens / win) * 100)}% of the window)`
: `last turn context size: ${c.ctx_tokens.toLocaleString()} tokens`;
head.append(el('span', head.append(el('span',
{ { class: `badge ${ctxClass}`, title },
class: `badge ${ctxClass}`,
title: `last turn context size: ${c.ctx_tokens.toLocaleString()} tokens`,
},
`ctx·${k}k`)); `ctx·${k}k`));
} }
body.append(head); body.append(head);

View file

@ -165,8 +165,11 @@ a:hover {
color: var(--cyan); border-color: var(--cyan); color: var(--cyan); border-color: var(--cyan);
text-shadow: 0 0 6px rgba(137, 220, 235, 0.4); text-shadow: 0 0 6px rgba(137, 220, 235, 0.4);
} }
/* Context-window usage badges on dashboard container rows. /* Context-window usage badges on dashboard container rows. Thresholds
Green < 100k, yellow 100150k, red 150k (mirrors harness watermarks). */ are derived per-container: yellow 50% and red 75% of the model's
context window (`ContainerView.context_window_tokens`), mirroring the
harness compaction watermarks. Falls back to fixed 100k / 150k when
the window is unknown. (issue #66) */
.badge-ctx-ok { .badge-ctx-ok {
color: var(--green); border-color: var(--green); color: var(--green); border-color: var(--green);
opacity: 0.85; opacity: 0.85;

View file

@ -55,6 +55,15 @@ pub struct ContainerView {
/// the "which agent is close to the window?" dashboard glance. /// the "which agent is close to the window?" dashboard glance.
#[serde(default, skip_serializing_if = "Option::is_none")] #[serde(default, skip_serializing_if = "Option::is_none")]
pub ctx_tokens: Option<u64>, pub ctx_tokens: Option<u64>,
/// Context-window size (tokens) for the model this agent ran on its
/// most recent turn — the model name from the last turn-stats row
/// resolved against the host's per-model `contextWindowTokens`
/// config. Lets the dashboard derive the ctx badge thresholds
/// (75% / 50% of the window, matching the harness compaction
/// watermarks) instead of hardcoding them. `None` when the agent
/// has no turns yet or no config key matches the model. (issue #66)
#[serde(default, skip_serializing_if = "Option::is_none")]
pub context_window_tokens: Option<u64>,
/// True while the harness is parked after an API rate-limit response. /// True while the harness is parked after an API rate-limit response.
/// Detected via the sentinel file `{state_dir}/hyperhive-rate-limited` /// Detected via the sentinel file `{state_dir}/hyperhive-rate-limited`
/// that the harness writes in `Bus::emit_status("rate_limited")` and /// that the harness writes in `Bus::emit_status("rate_limited")` and
@ -103,7 +112,11 @@ pub async fn build_all(coord: &Coordinator) -> Vec<ContainerView> {
.broker .broker
.count_pending_reminders_for(reminder_recipient) .count_pending_reminders_for(reminder_recipient)
.unwrap_or(0); .unwrap_or(0);
let ctx_tokens = read_last_ctx_tokens(&logical); let last_turn = read_last_turn(&logical);
let ctx_tokens = last_turn.as_ref().map(|(toks, _)| *toks);
let context_window_tokens = last_turn
.as_ref()
.and_then(|(_, model)| resolve_ctx_window(model, &coord.context_window_tokens));
let rate_limited = is_rate_limited(&logical); let rate_limited = is_rate_limited(&logical);
let extra_links = read_dashboard_links(&logical); let extra_links = read_dashboard_links(&logical);
out.push(ContainerView { out.push(ContainerView {
@ -117,6 +130,7 @@ pub async fn build_all(coord: &Coordinator) -> Vec<ContainerView> {
deployed_sha, deployed_sha,
pending_reminders, pending_reminders,
ctx_tokens, ctx_tokens,
context_window_tokens,
rate_limited, rate_limited,
extra_links, extra_links,
}); });
@ -158,16 +172,16 @@ fn is_rate_limited(name: &str) -> bool {
.exists() .exists()
} }
/// Read the most recent completed turn's context-window size (prompt /// Read the agent's most recent completed turn from its turn-stats
/// tokens) from the agent's turn-stats `SQLite`. Returns `None` when /// `SQLite`: the context-window size (prompt tokens) and the model name.
/// the file is absent or has no rows. Best-effort — any DB error /// Returns `None` when the file is absent or has no rows. Best-effort
/// silently yields `None` so a missing/corrupt file never blocks /// — any database error silently yields `None` so a missing or
/// `build_all`. /// corrupt file never blocks `build_all`.
/// ///
/// Context tokens are the sum of `last_input_tokens`, `last_cache_read_input_tokens`, /// Context tokens sum the prompt-side fields (`last_input_tokens`,
/// and `last_cache_creation_input_tokens`, mirroring /// `last_cache_read_input_tokens`, `last_cache_creation_input_tokens`),
/// `hive_ag3nt::events::TokenUsage::context_tokens`. /// mirroring `hive_ag3nt::events::TokenUsage::context_tokens`.
fn read_last_ctx_tokens(name: &str) -> Option<u64> { fn read_last_turn(name: &str) -> Option<(u64, String)> {
let path = Coordinator::agent_notes_dir(name).join("hyperhive-turn-stats.sqlite"); let path = Coordinator::agent_notes_dir(name).join("hyperhive-turn-stats.sqlite");
let conn = Connection::open_with_flags( let conn = Connection::open_with_flags(
&path, &path,
@ -175,13 +189,29 @@ fn read_last_ctx_tokens(name: &str) -> Option<u64> {
) )
.ok()?; .ok()?;
conn.query_row( conn.query_row(
"SELECT last_input_tokens + last_cache_read_input_tokens + last_cache_creation_input_tokens \ "SELECT last_input_tokens + last_cache_read_input_tokens + last_cache_creation_input_tokens, model \
FROM turn_stats ORDER BY started_at DESC LIMIT 1", FROM turn_stats ORDER BY started_at DESC LIMIT 1",
[], [],
|row| row.get::<_, i64>(0), |row| Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?)),
) )
.ok() .ok()
.and_then(|v| u64::try_from(v).ok()) .and_then(|(toks, model)| Some((u64::try_from(toks).ok()?, model)))
}
/// Resolve a model name to its context-window size using the host's
/// per-model `contextWindowTokens` config. Mirrors the harness's
/// `events::context_window_tokens` substring match: the first config
/// key (lowercased, non-empty) that is a substring of the lowercased
/// model name wins. `None` when nothing matches.
fn resolve_ctx_window(model: &str, per_model: &HashMap<String, u64>) -> Option<u64> {
let m = model.to_ascii_lowercase();
per_model
.iter()
.find(|(key, _)| {
let k = key.to_ascii_lowercase();
!k.is_empty() && m.contains(&k)
})
.map(|(_, &tokens)| tokens)
} }
/// Map of `agent-<n>` → locked sha from meta's flake.lock. Used to /// Map of `agent-<n>` → locked sha from meta's flake.lock. Used to
@ -223,3 +253,48 @@ fn read_meta_locked_revs() -> HashMap<String, String> {
} }
out out
} }
#[cfg(test)]
mod tests {
use super::resolve_ctx_window;
use std::collections::HashMap;
fn cfg() -> HashMap<String, u64> {
[
("haiku".to_owned(), 200_000),
("sonnet".to_owned(), 1_000_000),
("opus".to_owned(), 1_000_000),
]
.into_iter()
.collect()
}
#[test]
fn resolves_family_substring() {
assert_eq!(resolve_ctx_window("claude-3-5-haiku-20241022", &cfg()), Some(200_000));
assert_eq!(resolve_ctx_window("claude-sonnet-4-5", &cfg()), Some(1_000_000));
assert_eq!(resolve_ctx_window("claude-opus-4-1", &cfg()), Some(1_000_000));
}
#[test]
fn resolution_is_case_insensitive() {
assert_eq!(resolve_ctx_window("Claude-Sonnet-4", &cfg()), Some(1_000_000));
}
#[test]
fn unknown_model_yields_none() {
assert_eq!(resolve_ctx_window("some-other-llm", &cfg()), None);
}
#[test]
fn empty_config_yields_none() {
assert_eq!(resolve_ctx_window("claude-3-5-haiku", &HashMap::new()), None);
}
#[test]
fn empty_key_is_skipped() {
let mut m = HashMap::new();
m.insert(String::new(), 999);
assert_eq!(resolve_ctx_window("claude-3-5-haiku", &m), None);
}
}