dashboard: derive ctx badge thresholds from the model context window
This commit is contained in:
parent
cbd4b71322
commit
4a27ef7304
4 changed files with 122 additions and 26 deletions
|
|
@ -4,11 +4,16 @@
|
|||
|
||||
(() => {
|
||||
// ─── constants ──────────────────────────────────────────────────────────
|
||||
// Context-window token thresholds — mirror the harness compaction watermarks
|
||||
// in hive-ag3nt (HIVE_COMPACT_WATERMARK_TOKENS = 150k; auto-reset at 100k).
|
||||
// TODO: source these from model metadata once damocles lands that feature.
|
||||
const CTX_WARN_TOKENS = 150_000; // ≥ this → compact territory (red)
|
||||
const CTX_CAUTION_TOKENS = 100_000; // ≥ this → approaching reset (yellow)
|
||||
// Context-window badge thresholds. Preferred source is each container's
|
||||
// `context_window_tokens` from /api/state (the real window for the model
|
||||
// it last ran on) — thresholds are then 75% / 50% of it, matching the
|
||||
// harness compaction watermarks (compact at 75%, auto-reset at 50%). The
|
||||
// fixed token constants are the fallback for when that field is absent
|
||||
// (agent has no turns yet, or no per-model config matched the model).
|
||||
const CTX_WARN_FRACTION = 0.75; // ≥ this share of the window → red
|
||||
const CTX_CAUTION_FRACTION = 0.50; // ≥ this share of the window → yellow
|
||||
const CTX_WARN_TOKENS = 150_000; // fallback red threshold (≈ 75% of 200k)
|
||||
const CTX_CAUTION_TOKENS = 100_000; // fallback yellow threshold (≈ 50% of 200k)
|
||||
|
||||
// ─── helpers ────────────────────────────────────────────────────────────
|
||||
const $ = (id) => document.getElementById(id);
|
||||
|
|
@ -680,14 +685,20 @@
|
|||
}
|
||||
if (c.ctx_tokens != null) {
|
||||
const k = Math.round(c.ctx_tokens / 1000);
|
||||
const ctxClass = c.ctx_tokens >= CTX_WARN_TOKENS ? 'badge-ctx-warn'
|
||||
: c.ctx_tokens >= CTX_CAUTION_TOKENS ? 'badge-ctx-caution'
|
||||
// Thresholds track the model's real context window when the
|
||||
// backend supplies it; otherwise fall back to fixed constants.
|
||||
const win = c.context_window_tokens;
|
||||
const warn = win != null ? win * CTX_WARN_FRACTION : CTX_WARN_TOKENS;
|
||||
const caution = win != null ? win * CTX_CAUTION_FRACTION : CTX_CAUTION_TOKENS;
|
||||
const ctxClass = c.ctx_tokens >= warn ? 'badge-ctx-warn'
|
||||
: c.ctx_tokens >= caution ? 'badge-ctx-caution'
|
||||
: 'badge-ctx-ok';
|
||||
const title = win != null
|
||||
? `last turn context: ${c.ctx_tokens.toLocaleString()} / ${win.toLocaleString()} `
|
||||
+ `tokens (${Math.round((c.ctx_tokens / win) * 100)}% of the window)`
|
||||
: `last turn context size: ${c.ctx_tokens.toLocaleString()} tokens`;
|
||||
head.append(el('span',
|
||||
{
|
||||
class: `badge ${ctxClass}`,
|
||||
title: `last turn context size: ${c.ctx_tokens.toLocaleString()} tokens`,
|
||||
},
|
||||
{ class: `badge ${ctxClass}`, title },
|
||||
`ctx·${k}k`));
|
||||
}
|
||||
body.append(head);
|
||||
|
|
|
|||
|
|
@ -165,8 +165,11 @@ a:hover {
|
|||
color: var(--cyan); border-color: var(--cyan);
|
||||
text-shadow: 0 0 6px rgba(137, 220, 235, 0.4);
|
||||
}
|
||||
/* Context-window usage badges on dashboard container rows.
|
||||
Green < 100k, yellow 100–150k, red ≥ 150k (mirrors harness watermarks). */
|
||||
/* Context-window usage badges on dashboard container rows. Thresholds
|
||||
are derived per-container: yellow ≥ 50% and red ≥ 75% of the model's
|
||||
context window (`ContainerView.context_window_tokens`), mirroring the
|
||||
harness compaction watermarks. Falls back to fixed 100k / 150k when
|
||||
the window is unknown. (issue #66) */
|
||||
.badge-ctx-ok {
|
||||
color: var(--green); border-color: var(--green);
|
||||
opacity: 0.85;
|
||||
|
|
|
|||
|
|
@ -55,6 +55,15 @@ pub struct ContainerView {
|
|||
/// the "which agent is close to the window?" dashboard glance.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub ctx_tokens: Option<u64>,
|
||||
/// Context-window size (tokens) for the model this agent ran on its
|
||||
/// most recent turn — the model name from the last turn-stats row
|
||||
/// resolved against the host's per-model `contextWindowTokens`
|
||||
/// config. Lets the dashboard derive the ctx badge thresholds
|
||||
/// (75% / 50% of the window, matching the harness compaction
|
||||
/// watermarks) instead of hardcoding them. `None` when the agent
|
||||
/// has no turns yet or no config key matches the model. (issue #66)
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub context_window_tokens: Option<u64>,
|
||||
/// True while the harness is parked after an API rate-limit response.
|
||||
/// Detected via the sentinel file `{state_dir}/hyperhive-rate-limited`
|
||||
/// that the harness writes in `Bus::emit_status("rate_limited")` and
|
||||
|
|
@ -103,7 +112,11 @@ pub async fn build_all(coord: &Coordinator) -> Vec<ContainerView> {
|
|||
.broker
|
||||
.count_pending_reminders_for(reminder_recipient)
|
||||
.unwrap_or(0);
|
||||
let ctx_tokens = read_last_ctx_tokens(&logical);
|
||||
let last_turn = read_last_turn(&logical);
|
||||
let ctx_tokens = last_turn.as_ref().map(|(toks, _)| *toks);
|
||||
let context_window_tokens = last_turn
|
||||
.as_ref()
|
||||
.and_then(|(_, model)| resolve_ctx_window(model, &coord.context_window_tokens));
|
||||
let rate_limited = is_rate_limited(&logical);
|
||||
let extra_links = read_dashboard_links(&logical);
|
||||
out.push(ContainerView {
|
||||
|
|
@ -117,6 +130,7 @@ pub async fn build_all(coord: &Coordinator) -> Vec<ContainerView> {
|
|||
deployed_sha,
|
||||
pending_reminders,
|
||||
ctx_tokens,
|
||||
context_window_tokens,
|
||||
rate_limited,
|
||||
extra_links,
|
||||
});
|
||||
|
|
@ -158,16 +172,16 @@ fn is_rate_limited(name: &str) -> bool {
|
|||
.exists()
|
||||
}
|
||||
|
||||
/// Read the most recent completed turn's context-window size (prompt
|
||||
/// tokens) from the agent's turn-stats `SQLite`. Returns `None` when
|
||||
/// the file is absent or has no rows. Best-effort — any DB error
|
||||
/// silently yields `None` so a missing/corrupt file never blocks
|
||||
/// `build_all`.
|
||||
/// Read the agent's most recent completed turn from its turn-stats
|
||||
/// `SQLite`: the context-window size (prompt tokens) and the model name.
|
||||
/// Returns `None` when the file is absent or has no rows. Best-effort
|
||||
/// — any database error silently yields `None` so a missing or
|
||||
/// corrupt file never blocks `build_all`.
|
||||
///
|
||||
/// Context tokens are the sum of `last_input_tokens`, `last_cache_read_input_tokens`,
|
||||
/// and `last_cache_creation_input_tokens`, mirroring
|
||||
/// `hive_ag3nt::events::TokenUsage::context_tokens`.
|
||||
fn read_last_ctx_tokens(name: &str) -> Option<u64> {
|
||||
/// Context tokens sum the prompt-side fields (`last_input_tokens`,
|
||||
/// `last_cache_read_input_tokens`, `last_cache_creation_input_tokens`),
|
||||
/// mirroring `hive_ag3nt::events::TokenUsage::context_tokens`.
|
||||
fn read_last_turn(name: &str) -> Option<(u64, String)> {
|
||||
let path = Coordinator::agent_notes_dir(name).join("hyperhive-turn-stats.sqlite");
|
||||
let conn = Connection::open_with_flags(
|
||||
&path,
|
||||
|
|
@ -175,13 +189,29 @@ fn read_last_ctx_tokens(name: &str) -> Option<u64> {
|
|||
)
|
||||
.ok()?;
|
||||
conn.query_row(
|
||||
"SELECT last_input_tokens + last_cache_read_input_tokens + last_cache_creation_input_tokens \
|
||||
"SELECT last_input_tokens + last_cache_read_input_tokens + last_cache_creation_input_tokens, model \
|
||||
FROM turn_stats ORDER BY started_at DESC LIMIT 1",
|
||||
[],
|
||||
|row| row.get::<_, i64>(0),
|
||||
|row| Ok((row.get::<_, i64>(0)?, row.get::<_, String>(1)?)),
|
||||
)
|
||||
.ok()
|
||||
.and_then(|v| u64::try_from(v).ok())
|
||||
.and_then(|(toks, model)| Some((u64::try_from(toks).ok()?, model)))
|
||||
}
|
||||
|
||||
/// Resolve a model name to its context-window size using the host's
|
||||
/// per-model `contextWindowTokens` config. Mirrors the harness's
|
||||
/// `events::context_window_tokens` substring match: the first config
|
||||
/// key (lowercased, non-empty) that is a substring of the lowercased
|
||||
/// model name wins. `None` when nothing matches.
|
||||
fn resolve_ctx_window(model: &str, per_model: &HashMap<String, u64>) -> Option<u64> {
|
||||
let m = model.to_ascii_lowercase();
|
||||
per_model
|
||||
.iter()
|
||||
.find(|(key, _)| {
|
||||
let k = key.to_ascii_lowercase();
|
||||
!k.is_empty() && m.contains(&k)
|
||||
})
|
||||
.map(|(_, &tokens)| tokens)
|
||||
}
|
||||
|
||||
/// Map of `agent-<n>` → locked sha from meta's flake.lock. Used to
|
||||
|
|
@ -223,3 +253,48 @@ fn read_meta_locked_revs() -> HashMap<String, String> {
|
|||
}
|
||||
out
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::resolve_ctx_window;
|
||||
use std::collections::HashMap;
|
||||
|
||||
fn cfg() -> HashMap<String, u64> {
|
||||
[
|
||||
("haiku".to_owned(), 200_000),
|
||||
("sonnet".to_owned(), 1_000_000),
|
||||
("opus".to_owned(), 1_000_000),
|
||||
]
|
||||
.into_iter()
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolves_family_substring() {
|
||||
assert_eq!(resolve_ctx_window("claude-3-5-haiku-20241022", &cfg()), Some(200_000));
|
||||
assert_eq!(resolve_ctx_window("claude-sonnet-4-5", &cfg()), Some(1_000_000));
|
||||
assert_eq!(resolve_ctx_window("claude-opus-4-1", &cfg()), Some(1_000_000));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolution_is_case_insensitive() {
|
||||
assert_eq!(resolve_ctx_window("Claude-Sonnet-4", &cfg()), Some(1_000_000));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unknown_model_yields_none() {
|
||||
assert_eq!(resolve_ctx_window("some-other-llm", &cfg()), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_config_yields_none() {
|
||||
assert_eq!(resolve_ctx_window("claude-3-5-haiku", &HashMap::new()), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_key_is_skipped() {
|
||||
let mut m = HashMap::new();
|
||||
m.insert(String::new(), 999);
|
||||
assert_eq!(resolve_ctx_window("claude-3-5-haiku", &m), None);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue