stats: per-bucket turns-by-model chart

each turn_stats row already records the model; roll it up per bucket
so the /stats page can show which model ran when. model choice
greatly affects token cost, so the new stacked-bar chart sits right
under the cost chart for eyeball correlation across the window.

Snapshot gains a sorted `models` series list; each Bucket carries a
`model_counts` map.
This commit is contained in:
müde 2026-05-20 10:58:14 +02:00
parent 24b10becc9
commit f13c3dff8f
3 changed files with 71 additions and 9 deletions

View file

@ -80,6 +80,7 @@
<div class="card wide"><h3>turn duration (ms) — p50 / p95 / avg</h3><div class="chart-wrap"><canvas id="chart-duration"></canvas></div></div> <div class="card wide"><h3>turn duration (ms) — p50 / p95 / avg</h3><div class="chart-wrap"><canvas id="chart-duration"></canvas></div></div>
<div class="card wide"><h3>context tokens (last inference per turn) — avg / max</h3><div class="chart-wrap"><canvas id="chart-ctx"></canvas></div></div> <div class="card wide"><h3>context tokens (last inference per turn) — avg / max</h3><div class="chart-wrap"><canvas id="chart-ctx"></canvas></div></div>
<div class="card wide"><h3>token cost per bucket (sum across inferences)</h3><div class="chart-wrap"><canvas id="chart-cost"></canvas></div></div> <div class="card wide"><h3>token cost per bucket (sum across inferences)</h3><div class="chart-wrap"><canvas id="chart-cost"></canvas></div></div>
<div class="card wide"><h3>turns by model per bucket — model drives token cost</h3><div class="chart-wrap"><canvas id="chart-model"></canvas></div></div>
<div class="card"><h3>top tools</h3><div class="chart-wrap"><canvas id="chart-tools"></canvas></div></div> <div class="card"><h3>top tools</h3><div class="chart-wrap"><canvas id="chart-tools"></canvas></div></div>
<div class="card"><h3>wake source mix</h3><div class="chart-wrap"><canvas id="chart-wake"></canvas></div></div> <div class="card"><h3>wake source mix</h3><div class="chart-wrap"><canvas id="chart-wake"></canvas></div></div>
<div class="card"><h3>result mix</h3><div class="chart-wrap"><canvas id="chart-result"></canvas></div></div> <div class="card"><h3>result mix</h3><div class="chart-wrap"><canvas id="chart-result"></canvas></div></div>

View file

@ -226,6 +226,34 @@
}); });
} }
function renderModelChart(s) {
const id = 'chart-model';
destroy(id);
const models = s.models || [];
if (!models.length) { paintEmpty(id, 'no turns in window'); return; }
const labels = s.buckets.map((b) => bucketLabel(b.ts, s.bucket_seconds));
// One stacked series per model. Model choice drives token cost,
// so this lines up against the cost chart above it.
const datasets = models.map((m, i) => ({
label: m,
data: s.buckets.map((b) => (b.model_counts && b.model_counts[m]) || 0),
backgroundColor: wheel[i % wheel.length],
}));
charts[id] = new Chart(document.getElementById(id), {
type: 'bar',
data: { labels, datasets },
options: {
responsive: true, maintainAspectRatio: false,
plugins: { legend: { position: 'top', labels: { boxWidth: 12 } } },
scales: {
x: { stacked: true, grid: { color: palette.border } },
y: { stacked: true, beginAtZero: true,
grid: { color: palette.border }, ticks: { precision: 0 } },
},
},
});
}
function renderKeyCount(canvasId, items, emptyMsg) { function renderKeyCount(canvasId, items, emptyMsg) {
destroy(canvasId); destroy(canvasId);
if (!items || items.length === 0) { if (!items || items.length === 0) {
@ -252,6 +280,7 @@
paintEmpty('chart-duration', 'no turns in window'); paintEmpty('chart-duration', 'no turns in window');
paintEmpty('chart-ctx', 'no turns in window'); paintEmpty('chart-ctx', 'no turns in window');
paintEmpty('chart-cost', 'no turns in window'); paintEmpty('chart-cost', 'no turns in window');
paintEmpty('chart-model', 'no turns in window');
paintEmpty('chart-tools', 'no tool calls'); paintEmpty('chart-tools', 'no tool calls');
paintEmpty('chart-wake', 'no wakes'); paintEmpty('chart-wake', 'no wakes');
paintEmpty('chart-result', 'no results'); paintEmpty('chart-result', 'no results');
@ -261,6 +290,7 @@
renderDurationChart(s); renderDurationChart(s);
renderCtxChart(s); renderCtxChart(s);
renderCostChart(s); renderCostChart(s);
renderModelChart(s);
renderKeyCount('chart-tools', s.tool_breakdown, 'no tool calls'); renderKeyCount('chart-tools', s.tool_breakdown, 'no tool calls');
renderKeyCount('chart-wake', s.wake_mix, 'no wakes'); renderKeyCount('chart-wake', s.wake_mix, 'no wakes');
renderKeyCount('chart-result', s.result_mix, 'no results'); renderKeyCount('chart-result', s.result_mix, 'no results');

View file

@ -7,7 +7,7 @@
//! propagating — the stats page is decorative, not authoritative, and //! propagating — the stats page is decorative, not authoritative, and
//! a missing db on a brand-new agent shouldn't 500 the route. //! a missing db on a brand-new agent shouldn't 500 the route.
use std::collections::HashMap; use std::collections::{HashMap, HashSet};
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use anyhow::{Context, Result}; use anyhow::{Context, Result};
@ -73,6 +73,10 @@ pub struct Snapshot {
pub tool_breakdown: Vec<KeyCount>, pub tool_breakdown: Vec<KeyCount>,
pub wake_mix: Vec<KeyCount>, pub wake_mix: Vec<KeyCount>,
pub result_mix: Vec<KeyCount>, pub result_mix: Vec<KeyCount>,
/// Distinct models seen in the window, sorted. Each bucket's
/// `model_counts` keys into this set; the stats page uses it as
/// the stacked-bar series list (stable order + colours).
pub models: Vec<String>,
/// Across-window p50 / p95 / avg of `duration_ms`. Same numbers /// Across-window p50 / p95 / avg of `duration_ms`. Same numbers
/// as the per-bucket fields but aggregated over the whole window /// as the per-bucket fields but aggregated over the whole window
/// for the headline summary chips. /// for the headline summary chips.
@ -97,6 +101,10 @@ pub struct Bucket {
/// size at turn-end — useful for spotting drift toward compaction). /// size at turn-end — useful for spotting drift toward compaction).
pub avg_ctx_tokens: f64, pub avg_ctx_tokens: f64,
pub max_ctx_tokens: u64, pub max_ctx_tokens: u64,
/// Turn count per model in this bucket. Model choice greatly
/// affects token cost, so this lets the operator line model usage
/// up against the cost series over time.
pub model_counts: HashMap<String, u64>,
} }
#[derive(Debug, Serialize)] #[derive(Debug, Serialize)]
@ -142,6 +150,7 @@ fn empty_snapshot(window: Window) -> Snapshot {
tool_breakdown: Vec::new(), tool_breakdown: Vec::new(),
wake_mix: Vec::new(), wake_mix: Vec::new(),
result_mix: Vec::new(), result_mix: Vec::new(),
models: Vec::new(),
duration_summary: DurationSummary::default(), duration_summary: DurationSummary::default(),
} }
} }
@ -162,7 +171,7 @@ fn snapshot(path: &Path, window: Window) -> Result<Snapshot> {
cache_read_input_tokens, cache_creation_input_tokens, cache_read_input_tokens, cache_creation_input_tokens,
last_input_tokens, last_input_tokens,
tool_call_breakdown_json, tool_call_breakdown_json,
wake_from, result_kind wake_from, result_kind, model
FROM turn_stats FROM turn_stats
WHERE started_at >= ?1 WHERE started_at >= ?1
ORDER BY started_at ASC", ORDER BY started_at ASC",
@ -179,6 +188,7 @@ fn snapshot(path: &Path, window: Window) -> Result<Snapshot> {
tool_breakdown_json: row.get::<_, Option<String>>(7)?, tool_breakdown_json: row.get::<_, Option<String>>(7)?,
wake_from: row.get::<_, String>(8)?, wake_from: row.get::<_, String>(8)?,
result_kind: row.get::<_, String>(9)?, result_kind: row.get::<_, String>(9)?,
model: row.get::<_, String>(10)?,
}) })
})?; })?;
@ -186,6 +196,7 @@ fn snapshot(path: &Path, window: Window) -> Result<Snapshot> {
let mut tool_totals: HashMap<String, u64> = HashMap::new(); let mut tool_totals: HashMap<String, u64> = HashMap::new();
let mut wake_totals: HashMap<String, u64> = HashMap::new(); let mut wake_totals: HashMap<String, u64> = HashMap::new();
let mut result_totals: HashMap<String, u64> = HashMap::new(); let mut result_totals: HashMap<String, u64> = HashMap::new();
let mut model_set: HashSet<String> = HashSet::new();
let mut all_durations: Vec<i64> = Vec::new(); let mut all_durations: Vec<i64> = Vec::new();
let mut turn_count: u64 = 0; let mut turn_count: u64 = 0;
@ -206,10 +217,12 @@ fn snapshot(path: &Path, window: Window) -> Result<Snapshot> {
.saturating_add(r.cache_creation_input_tokens); .saturating_add(r.cache_creation_input_tokens);
acc.ctx_sum = acc.ctx_sum.saturating_add(r.last_input_tokens); acc.ctx_sum = acc.ctx_sum.saturating_add(r.last_input_tokens);
acc.ctx_max = acc.ctx_max.max(r.last_input_tokens); acc.ctx_max = acc.ctx_max.max(r.last_input_tokens);
*acc.model_counts.entry(r.model.clone()).or_insert(0) += 1;
all_durations.push(r.duration_ms.max(0)); all_durations.push(r.duration_ms.max(0));
*wake_totals.entry(r.wake_from).or_insert(0) += 1; *wake_totals.entry(r.wake_from).or_insert(0) += 1;
*result_totals.entry(r.result_kind).or_insert(0) += 1; *result_totals.entry(r.result_kind).or_insert(0) += 1;
model_set.insert(r.model);
if let Some(json) = r.tool_breakdown_json if let Some(json) = r.tool_breakdown_json
&& let Ok(map) = serde_json::from_str::<HashMap<String, u64>>(&json) && let Ok(map) = serde_json::from_str::<HashMap<String, u64>>(&json)
@ -222,6 +235,8 @@ fn snapshot(path: &Path, window: Window) -> Result<Snapshot> {
let buckets = fill_buckets(from, now, bucket_secs, &by_bucket); let buckets = fill_buckets(from, now, bucket_secs, &by_bucket);
let duration_summary = summarize_durations(&mut all_durations); let duration_summary = summarize_durations(&mut all_durations);
let mut models: Vec<String> = model_set.into_iter().collect();
models.sort_unstable();
Ok(Snapshot { Ok(Snapshot {
window: window.label(), window: window.label(),
@ -233,6 +248,7 @@ fn snapshot(path: &Path, window: Window) -> Result<Snapshot> {
tool_breakdown: top_n(tool_totals, 10), tool_breakdown: top_n(tool_totals, 10),
wake_mix: top_n(wake_totals, 20), wake_mix: top_n(wake_totals, 20),
result_mix: top_n(result_totals, 20), result_mix: top_n(result_totals, 20),
models,
duration_summary, duration_summary,
}) })
} }
@ -248,6 +264,7 @@ struct Row {
tool_breakdown_json: Option<String>, tool_breakdown_json: Option<String>,
wake_from: String, wake_from: String,
result_kind: String, result_kind: String,
model: String,
} }
#[derive(Default)] #[derive(Default)]
@ -260,6 +277,7 @@ struct BucketAcc {
cache_creation_input_tokens: u64, cache_creation_input_tokens: u64,
ctx_sum: u64, ctx_sum: u64,
ctx_max: u64, ctx_max: u64,
model_counts: HashMap<String, u64>,
} }
fn fill_buckets( fn fill_buckets(
@ -307,6 +325,7 @@ fn fill_buckets(
cache_creation_input_tokens: acc.cache_creation_input_tokens, cache_creation_input_tokens: acc.cache_creation_input_tokens,
avg_ctx_tokens: avg_ctx, avg_ctx_tokens: avg_ctx,
max_ctx_tokens: acc.ctx_max, max_ctx_tokens: acc.ctx_max,
model_counts: acc.model_counts.clone(),
} }
} else { } else {
Bucket { Bucket {
@ -321,6 +340,7 @@ fn fill_buckets(
cache_creation_input_tokens: 0, cache_creation_input_tokens: 0,
avg_ctx_tokens: 0.0, avg_ctx_tokens: 0.0,
max_ctx_tokens: 0, max_ctx_tokens: 0,
model_counts: HashMap::new(),
} }
}; };
out.push(bucket); out.push(bucket);
@ -392,7 +412,7 @@ mod tests {
std::env::temp_dir().join(format!("hyperhive-stats-test-{pid}-{n}.sqlite")) std::env::temp_dir().join(format!("hyperhive-stats-test-{pid}-{n}.sqlite"))
} }
fn seed_db(path: &Path, rows: &[(i64, i64, &str, &str, &str)]) { fn seed_db(path: &Path, rows: &[(i64, i64, &str, &str, &str, &str)]) {
let conn = Connection::open(path).unwrap(); let conn = Connection::open(path).unwrap();
conn.execute_batch( conn.execute_batch(
"CREATE TABLE turn_stats ( "CREATE TABLE turn_stats (
@ -419,13 +439,13 @@ mod tests {
);", );",
) )
.unwrap(); .unwrap();
for (started, dur, wake, result, tools_json) in rows { for (started, dur, model, wake, result, tools_json) in rows {
conn.execute( conn.execute(
"INSERT INTO turn_stats "INSERT INTO turn_stats
(started_at, ended_at, duration_ms, model, wake_from, (started_at, ended_at, duration_ms, model, wake_from,
last_input_tokens, tool_call_breakdown_json, result_kind) last_input_tokens, tool_call_breakdown_json, result_kind)
VALUES (?1, ?2, ?3, 'm', ?4, 1000, ?5, ?6)", VALUES (?1, ?2, ?3, ?4, ?5, 1000, ?6, ?7)",
params![started, started + dur / 1000, dur, wake, tools_json, result], params![started, started + dur / 1000, dur, model, wake, tools_json, result],
) )
.unwrap(); .unwrap();
} }
@ -439,9 +459,9 @@ mod tests {
seed_db( seed_db(
&db, &db,
&[ &[
(now - 600, 5_000, "recv", "ok", r#"{"Read":2,"Bash":1}"#), (now - 600, 5_000, "opus", "recv", "ok", r#"{"Read":2,"Bash":1}"#),
(now - 300, 10_000, "recv", "ok", r#"{"Read":3}"#), (now - 300, 10_000, "opus", "recv", "ok", r#"{"Read":3}"#),
(now - 100, 20_000, "operator", "failed", "{}"), (now - 100, 20_000, "sonnet", "operator", "failed", "{}"),
], ],
); );
let s = snapshot(&db, Window::Day).unwrap(); let s = snapshot(&db, Window::Day).unwrap();
@ -469,6 +489,17 @@ mod tests {
.collect(); .collect();
assert_eq!(result_map.get("ok").copied(), Some(2)); assert_eq!(result_map.get("ok").copied(), Some(2));
assert_eq!(result_map.get("failed").copied(), Some(1)); assert_eq!(result_map.get("failed").copied(), Some(1));
// Model breakdown: 2 opus + 1 sonnet, all in the same hour
// bucket given the 24h window.
assert_eq!(s.models, vec!["opus".to_string(), "sonnet".to_string()]);
let mut model_totals: HashMap<String, u64> = HashMap::new();
for b in &s.buckets {
for (k, v) in &b.model_counts {
*model_totals.entry(k.clone()).or_insert(0) += v;
}
}
assert_eq!(model_totals.get("opus").copied(), Some(2));
assert_eq!(model_totals.get("sonnet").copied(), Some(1));
// Durations: [5000, 10000, 20000] → avg ≈ 11666.67, p50 = 10000, p95 ~ 20000 // Durations: [5000, 10000, 20000] → avg ≈ 11666.67, p50 = 10000, p95 ~ 20000
assert!((s.duration_summary.avg_ms - 11_666.666_666_666_666).abs() < 1.0); assert!((s.duration_summary.avg_ms - 11_666.666_666_666_666).abs() < 1.0);
assert!((s.duration_summary.p50_ms - 10_000.0).abs() < 1.0); assert!((s.duration_summary.p50_ms - 10_000.0).abs() < 1.0);