stats: per-bucket turns-by-model chart

each turn_stats row already records the model; roll it up per bucket so the /stats page can show which model ran when. model choice greatly affects token cost, so the new stacked-bar chart sits right under the cost chart for eyeball correlation across the window. Snapshot gains a sorted `models` series list; each Bucket carries a `model_counts` map.
2026-05-20 10:58:14 +02:00 · 2026-05-20 10:58:14 +02:00 · f13c3dff8f
commit f13c3dff8f
parent 24b10becc9
3 changed files with 71 additions and 9 deletions
--- a/hive-ag3nt/assets/stats.html
+++ b/hive-ag3nt/assets/stats.html
@ -80,6 +80,7 @@
    <div class="card wide"><h3>turn duration (ms) — p50 / p95 / avg</h3><div class="chart-wrap"><canvas id="chart-duration"></canvas></div></div>
    <div class="card wide"><h3>context tokens (last inference per turn) — avg / max</h3><div class="chart-wrap"><canvas id="chart-ctx"></canvas></div></div>
    <div class="card wide"><h3>token cost per bucket (sum across inferences)</h3><div class="chart-wrap"><canvas id="chart-cost"></canvas></div></div>
    <div class="card wide"><h3>turns by model per bucket — model drives token cost</h3><div class="chart-wrap"><canvas id="chart-model"></canvas></div></div>
    <div class="card"><h3>top tools</h3><div class="chart-wrap"><canvas id="chart-tools"></canvas></div></div>
    <div class="card"><h3>wake source mix</h3><div class="chart-wrap"><canvas id="chart-wake"></canvas></div></div>
    <div class="card"><h3>result mix</h3><div class="chart-wrap"><canvas id="chart-result"></canvas></div></div>
--- a/hive-ag3nt/assets/stats.js
+++ b/hive-ag3nt/assets/stats.js
@ -226,6 +226,34 @@
    });
  }
  function renderModelChart(s) {
    const id = 'chart-model';
    destroy(id);
    const models = s.models || [];
    if (!models.length) { paintEmpty(id, 'no turns in window'); return; }
    const labels = s.buckets.map((b) => bucketLabel(b.ts, s.bucket_seconds));
    // One stacked series per model. Model choice drives token cost,
    // so this lines up against the cost chart above it.
    const datasets = models.map((m, i) => ({
      label: m,
      data: s.buckets.map((b) => (b.model_counts && b.model_counts[m]) || 0),
      backgroundColor: wheel[i % wheel.length],
    }));
    charts[id] = new Chart(document.getElementById(id), {
      type: 'bar',
      data: { labels, datasets },
      options: {
        responsive: true, maintainAspectRatio: false,
        plugins: { legend: { position: 'top', labels: { boxWidth: 12 } } },
        scales: {
          x: { stacked: true, grid: { color: palette.border } },
          y: { stacked: true, beginAtZero: true,
               grid: { color: palette.border }, ticks: { precision: 0 } },
        },
      },
    });
  }
  function renderKeyCount(canvasId, items, emptyMsg) {
    destroy(canvasId);
    if (!items || items.length === 0) {
@ -252,6 +280,7 @@
      paintEmpty('chart-duration', 'no turns in window');
      paintEmpty('chart-ctx', 'no turns in window');
      paintEmpty('chart-cost', 'no turns in window');
      paintEmpty('chart-model', 'no turns in window');
      paintEmpty('chart-tools', 'no tool calls');
      paintEmpty('chart-wake', 'no wakes');
      paintEmpty('chart-result', 'no results');
@ -261,6 +290,7 @@
    renderDurationChart(s);
    renderCtxChart(s);
    renderCostChart(s);
    renderModelChart(s);
    renderKeyCount('chart-tools', s.tool_breakdown, 'no tool calls');
    renderKeyCount('chart-wake', s.wake_mix, 'no wakes');
    renderKeyCount('chart-result', s.result_mix, 'no results');
--- a/hive-ag3nt/src/stats.rs
+++ b/hive-ag3nt/src/stats.rs
@ -7,7 +7,7 @@
 //! propagating — the stats page is decorative, not authoritative, and
 //! a missing db on a brand-new agent shouldn't 500 the route.
-use std::collections::HashMap;
+use std::collections::{HashMap, HashSet};
 use std::path::{Path, PathBuf};
 use anyhow::{Context, Result};
@ -73,6 +73,10 @@ pub struct Snapshot {
    pub tool_breakdown: Vec<KeyCount>,
    pub wake_mix: Vec<KeyCount>,
    pub result_mix: Vec<KeyCount>,
    /// Distinct models seen in the window, sorted. Each bucket's
    /// `model_counts` keys into this set; the stats page uses it as
    /// the stacked-bar series list (stable order + colours).
    pub models: Vec<String>,
    /// Across-window p50 / p95 / avg of `duration_ms`. Same numbers
    /// as the per-bucket fields but aggregated over the whole window
    /// for the headline summary chips.
@ -97,6 +101,10 @@ pub struct Bucket {
    /// size at turn-end — useful for spotting drift toward compaction).
    pub avg_ctx_tokens: f64,
    pub max_ctx_tokens: u64,
    /// Turn count per model in this bucket. Model choice greatly
    /// affects token cost, so this lets the operator line model usage
    /// up against the cost series over time.
    pub model_counts: HashMap<String, u64>,
 }
 #[derive(Debug, Serialize)]
@ -142,6 +150,7 @@ fn empty_snapshot(window: Window) -> Snapshot {
        tool_breakdown: Vec::new(),
        wake_mix: Vec::new(),
        result_mix: Vec::new(),
        models: Vec::new(),
        duration_summary: DurationSummary::default(),
    }
 }
@ -162,7 +171,7 @@ fn snapshot(path: &Path, window: Window) -> Result<Snapshot> {
                cache_read_input_tokens, cache_creation_input_tokens,
                last_input_tokens,
                tool_call_breakdown_json,
-                wake_from, result_kind
+                wake_from, result_kind, model
         FROM turn_stats
         WHERE started_at >= ?1
         ORDER BY started_at ASC",
@ -179,6 +188,7 @@ fn snapshot(path: &Path, window: Window) -> Result<Snapshot> {
            tool_breakdown_json: row.get::<_, Option<String>>(7)?,
            wake_from: row.get::<_, String>(8)?,
            result_kind: row.get::<_, String>(9)?,
            model: row.get::<_, String>(10)?,
        })
    })?;
@ -186,6 +196,7 @@ fn snapshot(path: &Path, window: Window) -> Result<Snapshot> {
    let mut tool_totals: HashMap<String, u64> = HashMap::new();
    let mut wake_totals: HashMap<String, u64> = HashMap::new();
    let mut result_totals: HashMap<String, u64> = HashMap::new();
    let mut model_set: HashSet<String> = HashSet::new();
    let mut all_durations: Vec<i64> = Vec::new();
    let mut turn_count: u64 = 0;
@ -206,10 +217,12 @@ fn snapshot(path: &Path, window: Window) -> Result<Snapshot> {
            .saturating_add(r.cache_creation_input_tokens);
        acc.ctx_sum = acc.ctx_sum.saturating_add(r.last_input_tokens);
        acc.ctx_max = acc.ctx_max.max(r.last_input_tokens);
        *acc.model_counts.entry(r.model.clone()).or_insert(0) += 1;
        all_durations.push(r.duration_ms.max(0));
        *wake_totals.entry(r.wake_from).or_insert(0) += 1;
        *result_totals.entry(r.result_kind).or_insert(0) += 1;
        model_set.insert(r.model);
        if let Some(json) = r.tool_breakdown_json
            && let Ok(map) = serde_json::from_str::<HashMap<String, u64>>(&json)
@ -222,6 +235,8 @@ fn snapshot(path: &Path, window: Window) -> Result<Snapshot> {
    let buckets = fill_buckets(from, now, bucket_secs, &by_bucket);
    let duration_summary = summarize_durations(&mut all_durations);
    let mut models: Vec<String> = model_set.into_iter().collect();
    models.sort_unstable();
    Ok(Snapshot {
        window: window.label(),
@ -233,6 +248,7 @@ fn snapshot(path: &Path, window: Window) -> Result<Snapshot> {
        tool_breakdown: top_n(tool_totals, 10),
        wake_mix: top_n(wake_totals, 20),
        result_mix: top_n(result_totals, 20),
        models,
        duration_summary,
    })
 }
@ -248,6 +264,7 @@ struct Row {
    tool_breakdown_json: Option<String>,
    wake_from: String,
    result_kind: String,
    model: String,
 }
 #[derive(Default)]
@ -260,6 +277,7 @@ struct BucketAcc {
    cache_creation_input_tokens: u64,
    ctx_sum: u64,
    ctx_max: u64,
    model_counts: HashMap<String, u64>,
 }
 fn fill_buckets(
@ -307,6 +325,7 @@ fn fill_buckets(
                cache_creation_input_tokens: acc.cache_creation_input_tokens,
                avg_ctx_tokens: avg_ctx,
                max_ctx_tokens: acc.ctx_max,
                model_counts: acc.model_counts.clone(),
            }
        } else {
            Bucket {
@ -321,6 +340,7 @@ fn fill_buckets(
                cache_creation_input_tokens: 0,
                avg_ctx_tokens: 0.0,
                max_ctx_tokens: 0,
                model_counts: HashMap::new(),
            }
        };
        out.push(bucket);
@ -392,7 +412,7 @@ mod tests {
        std::env::temp_dir().join(format!("hyperhive-stats-test-{pid}-{n}.sqlite"))
    }
-    fn seed_db(path: &Path, rows: &[(i64, i64, &str, &str, &str)]) {
+    fn seed_db(path: &Path, rows: &[(i64, i64, &str, &str, &str, &str)]) {
        let conn = Connection::open(path).unwrap();
        conn.execute_batch(
            "CREATE TABLE turn_stats (
@ -419,13 +439,13 @@ mod tests {
            );",
        )
        .unwrap();
-        for (started, dur, wake, result, tools_json) in rows {
+        for (started, dur, model, wake, result, tools_json) in rows {
            conn.execute(
                "INSERT INTO turn_stats
                    (started_at, ended_at, duration_ms, model, wake_from,
                     last_input_tokens, tool_call_breakdown_json, result_kind)
-                 VALUES (?1, ?2, ?3, 'm', ?4, 1000, ?5, ?6)",
+                 VALUES (?1, ?2, ?3, ?4, ?5, 1000, ?6, ?7)",
-                params![started, started + dur / 1000, dur, wake, tools_json, result],
+                params![started, started + dur / 1000, dur, model, wake, tools_json, result],
            )
            .unwrap();
        }
@ -439,9 +459,9 @@ mod tests {
        seed_db(
            &db,
            &[
-                (now - 600, 5_000, "recv", "ok", r#"{"Read":2,"Bash":1}"#),
+                (now - 600, 5_000, "opus", "recv", "ok", r#"{"Read":2,"Bash":1}"#),
-                (now - 300, 10_000, "recv", "ok", r#"{"Read":3}"#),
+                (now - 300, 10_000, "opus", "recv", "ok", r#"{"Read":3}"#),
-                (now - 100, 20_000, "operator", "failed", "{}"),
+                (now - 100, 20_000, "sonnet", "operator", "failed", "{}"),
            ],
        );
        let s = snapshot(&db, Window::Day).unwrap();
@ -469,6 +489,17 @@ mod tests {
            .collect();
        assert_eq!(result_map.get("ok").copied(), Some(2));
        assert_eq!(result_map.get("failed").copied(), Some(1));
        // Model breakdown: 2 opus + 1 sonnet, all in the same hour
        // bucket given the 24h window.
        assert_eq!(s.models, vec!["opus".to_string(), "sonnet".to_string()]);
        let mut model_totals: HashMap<String, u64> = HashMap::new();
        for b in &s.buckets {
            for (k, v) in &b.model_counts {
                *model_totals.entry(k.clone()).or_insert(0) += v;
            }
        }
        assert_eq!(model_totals.get("opus").copied(), Some(2));
        assert_eq!(model_totals.get("sonnet").copied(), Some(1));
        // Durations: [5000, 10000, 20000] → avg ≈ 11666.67, p50 = 10000, p95 ~ 20000
        assert!((s.duration_summary.avg_ms - 11_666.666_666_666_666).abs() < 1.0);
        assert!((s.duration_summary.p50_ms - 10_000.0).abs() < 1.0);