stats: per-bucket turns-by-model chart

each turn_stats row already records the model; roll it up per bucket
so the /stats page can show which model ran when. model choice
greatly affects token cost, so the new stacked-bar chart sits right
under the cost chart for eyeball correlation across the window.

Snapshot gains a sorted `models` series list; each Bucket carries a
`model_counts` map.
This commit is contained in:
müde 2026-05-20 10:58:14 +02:00
parent 24b10becc9
commit f13c3dff8f
3 changed files with 71 additions and 9 deletions

View file

@ -7,7 +7,7 @@
//! propagating — the stats page is decorative, not authoritative, and
//! a missing db on a brand-new agent shouldn't 500 the route.
use std::collections::HashMap;
use std::collections::{HashMap, HashSet};
use std::path::{Path, PathBuf};
use anyhow::{Context, Result};
@ -73,6 +73,10 @@ pub struct Snapshot {
pub tool_breakdown: Vec<KeyCount>,
pub wake_mix: Vec<KeyCount>,
pub result_mix: Vec<KeyCount>,
/// Distinct models seen in the window, sorted. Each bucket's
/// `model_counts` keys into this set; the stats page uses it as
/// the stacked-bar series list (stable order + colours).
pub models: Vec<String>,
/// Across-window p50 / p95 / avg of `duration_ms`. Same numbers
/// as the per-bucket fields but aggregated over the whole window
/// for the headline summary chips.
@ -97,6 +101,10 @@ pub struct Bucket {
/// size at turn-end — useful for spotting drift toward compaction).
pub avg_ctx_tokens: f64,
pub max_ctx_tokens: u64,
/// Turn count per model in this bucket. Model choice greatly
/// affects token cost, so this lets the operator line model usage
/// up against the cost series over time.
pub model_counts: HashMap<String, u64>,
}
#[derive(Debug, Serialize)]
@ -142,6 +150,7 @@ fn empty_snapshot(window: Window) -> Snapshot {
tool_breakdown: Vec::new(),
wake_mix: Vec::new(),
result_mix: Vec::new(),
models: Vec::new(),
duration_summary: DurationSummary::default(),
}
}
@ -162,7 +171,7 @@ fn snapshot(path: &Path, window: Window) -> Result<Snapshot> {
cache_read_input_tokens, cache_creation_input_tokens,
last_input_tokens,
tool_call_breakdown_json,
wake_from, result_kind
wake_from, result_kind, model
FROM turn_stats
WHERE started_at >= ?1
ORDER BY started_at ASC",
@ -179,6 +188,7 @@ fn snapshot(path: &Path, window: Window) -> Result<Snapshot> {
tool_breakdown_json: row.get::<_, Option<String>>(7)?,
wake_from: row.get::<_, String>(8)?,
result_kind: row.get::<_, String>(9)?,
model: row.get::<_, String>(10)?,
})
})?;
@ -186,6 +196,7 @@ fn snapshot(path: &Path, window: Window) -> Result<Snapshot> {
let mut tool_totals: HashMap<String, u64> = HashMap::new();
let mut wake_totals: HashMap<String, u64> = HashMap::new();
let mut result_totals: HashMap<String, u64> = HashMap::new();
let mut model_set: HashSet<String> = HashSet::new();
let mut all_durations: Vec<i64> = Vec::new();
let mut turn_count: u64 = 0;
@ -206,10 +217,12 @@ fn snapshot(path: &Path, window: Window) -> Result<Snapshot> {
.saturating_add(r.cache_creation_input_tokens);
acc.ctx_sum = acc.ctx_sum.saturating_add(r.last_input_tokens);
acc.ctx_max = acc.ctx_max.max(r.last_input_tokens);
*acc.model_counts.entry(r.model.clone()).or_insert(0) += 1;
all_durations.push(r.duration_ms.max(0));
*wake_totals.entry(r.wake_from).or_insert(0) += 1;
*result_totals.entry(r.result_kind).or_insert(0) += 1;
model_set.insert(r.model);
if let Some(json) = r.tool_breakdown_json
&& let Ok(map) = serde_json::from_str::<HashMap<String, u64>>(&json)
@ -222,6 +235,8 @@ fn snapshot(path: &Path, window: Window) -> Result<Snapshot> {
let buckets = fill_buckets(from, now, bucket_secs, &by_bucket);
let duration_summary = summarize_durations(&mut all_durations);
let mut models: Vec<String> = model_set.into_iter().collect();
models.sort_unstable();
Ok(Snapshot {
window: window.label(),
@ -233,6 +248,7 @@ fn snapshot(path: &Path, window: Window) -> Result<Snapshot> {
tool_breakdown: top_n(tool_totals, 10),
wake_mix: top_n(wake_totals, 20),
result_mix: top_n(result_totals, 20),
models,
duration_summary,
})
}
@ -248,6 +264,7 @@ struct Row {
tool_breakdown_json: Option<String>,
wake_from: String,
result_kind: String,
model: String,
}
#[derive(Default)]
@ -260,6 +277,7 @@ struct BucketAcc {
cache_creation_input_tokens: u64,
ctx_sum: u64,
ctx_max: u64,
model_counts: HashMap<String, u64>,
}
fn fill_buckets(
@ -307,6 +325,7 @@ fn fill_buckets(
cache_creation_input_tokens: acc.cache_creation_input_tokens,
avg_ctx_tokens: avg_ctx,
max_ctx_tokens: acc.ctx_max,
model_counts: acc.model_counts.clone(),
}
} else {
Bucket {
@ -321,6 +340,7 @@ fn fill_buckets(
cache_creation_input_tokens: 0,
avg_ctx_tokens: 0.0,
max_ctx_tokens: 0,
model_counts: HashMap::new(),
}
};
out.push(bucket);
@ -392,7 +412,7 @@ mod tests {
std::env::temp_dir().join(format!("hyperhive-stats-test-{pid}-{n}.sqlite"))
}
fn seed_db(path: &Path, rows: &[(i64, i64, &str, &str, &str)]) {
fn seed_db(path: &Path, rows: &[(i64, i64, &str, &str, &str, &str)]) {
let conn = Connection::open(path).unwrap();
conn.execute_batch(
"CREATE TABLE turn_stats (
@ -419,13 +439,13 @@ mod tests {
);",
)
.unwrap();
for (started, dur, wake, result, tools_json) in rows {
for (started, dur, model, wake, result, tools_json) in rows {
conn.execute(
"INSERT INTO turn_stats
(started_at, ended_at, duration_ms, model, wake_from,
last_input_tokens, tool_call_breakdown_json, result_kind)
VALUES (?1, ?2, ?3, 'm', ?4, 1000, ?5, ?6)",
params![started, started + dur / 1000, dur, wake, tools_json, result],
VALUES (?1, ?2, ?3, ?4, ?5, 1000, ?6, ?7)",
params![started, started + dur / 1000, dur, model, wake, tools_json, result],
)
.unwrap();
}
@ -439,9 +459,9 @@ mod tests {
seed_db(
&db,
&[
(now - 600, 5_000, "recv", "ok", r#"{"Read":2,"Bash":1}"#),
(now - 300, 10_000, "recv", "ok", r#"{"Read":3}"#),
(now - 100, 20_000, "operator", "failed", "{}"),
(now - 600, 5_000, "opus", "recv", "ok", r#"{"Read":2,"Bash":1}"#),
(now - 300, 10_000, "opus", "recv", "ok", r#"{"Read":3}"#),
(now - 100, 20_000, "sonnet", "operator", "failed", "{}"),
],
);
let s = snapshot(&db, Window::Day).unwrap();
@ -469,6 +489,17 @@ mod tests {
.collect();
assert_eq!(result_map.get("ok").copied(), Some(2));
assert_eq!(result_map.get("failed").copied(), Some(1));
// Model breakdown: 2 opus + 1 sonnet, all in the same hour
// bucket given the 24h window.
assert_eq!(s.models, vec!["opus".to_string(), "sonnet".to_string()]);
let mut model_totals: HashMap<String, u64> = HashMap::new();
for b in &s.buckets {
for (k, v) in &b.model_counts {
*model_totals.entry(k.clone()).or_insert(0) += v;
}
}
assert_eq!(model_totals.get("opus").copied(), Some(2));
assert_eq!(model_totals.get("sonnet").copied(), Some(1));
// Durations: [5000, 10000, 20000] → avg ≈ 11666.67, p50 = 10000, p95 ~ 20000
assert!((s.duration_summary.avg_ms - 11_666.666_666_666_666).abs() < 1.0);
assert!((s.duration_summary.p50_ms - 10_000.0).abs() < 1.0);