agent badges: split into ctx (last-inference) + cost (cumulative)

the existing ctx badge was misnamed: it summed `result.usage`, which is the cumulative tokens billed across every inference in the turn. for tool-heavy turns that easily exceeds the model's context window (a 600k cached prefix × 15 sub-calls = 9M cache_read), making it useless as a "should i compact?" signal. now two separate badges: ctx · N last inference's prompt size = actual context window in use right now. parsed from each `assistant` event's `.message.usage`; the harness tracks the most recent one across the stream and snapshots it when the `result` event lands. cost · M cumulative tokens billed across the whole turn (the previous behaviour, now correctly labelled). both update via a single `TokenUsageChanged { ctx, cost }` SSE event at turn-end. turn_stats grows four columns (`last_input_tokens`, `last_output_tokens`, `last_cache_read_input_tokens`, `last_cache_creation_input_tokens`) so the cold-load seed can paint both badges on page load. migrations run try-and-ignore ALTERs so existing agent dbs catch up; pre-migration rows have last-inference zeros and yield no `ctx` seed (badge stays empty until next turn) rather than a misleading 0.
2026-05-18 18:48:35 +02:00 · 2026-05-18 18:48:35 +02:00 · 5c6c607e25
commit 5c6c607e25
parent 14549dd8a9
9 changed files with 267 additions and 101 deletions
--- a/hive-ag3nt/assets/app.js
+++ b/hive-ag3nt/assets/app.js
@ -525,30 +525,43 @@
    el_.textContent = 'model · ' + model;
    el_.title = `claude --model ${model}\nset via the operator's /model command; persists across turns until changed`;
  }
-  // Context badge — mirrors Claude Code's bottom-right "N tokens"
-  // indicator. Primary number is total prompt tokens used in the
-  // current context window (input + both cache axes); hover for the
-  // breakdown including output. Kept as chrome on the state row so
-  // the terminal stays the star.
-  function renderTokenUsage(u) {
-    const el_ = $('ctx-badge');
+  // Token badges — two separate chips:
+  //   ctx · N    last inference's prompt size = current context window
+  //              utilisation (what to watch for compaction decisions)
+  //   cost · M   cumulative billed tokens across the whole last turn
+  //              (sum across every inference; tool-heavy turns rebill
+  //              the cached prompt per call and blow past the model's
+  //              context window — this is a cost signal, not a size
+  //              signal)
+  // Both fed by the same `token_usage_changed` SSE event (`{ ctx, cost }`).
+  const fmtTokens = (n) => {
+    if (n >= 1_000_000) return (n / 1_000_000).toFixed(1) + 'M';
+    if (n >= 1_000) return Math.round(n / 1000) + 'k';
+    return String(n);
+  };
+  function renderOneUsage(elId, label, u, blurb) {
+    const el_ = $(elId);
    if (!el_) return;
    if (!u) { el_.hidden = true; return; }
-    const ctx = u.input_tokens + u.cache_read_input_tokens + u.cache_creation_input_tokens;
-    const fmt = (n) => {
-      if (n >= 1_000_000) return (n / 1_000_000).toFixed(1) + 'M';
-      if (n >= 1_000) return Math.round(n / 1000) + 'k';
-      return String(n);
-    };
+    const total = u.input_tokens + u.cache_read_input_tokens + u.cache_creation_input_tokens;
    el_.hidden = false;
    el_.title = [
-      'context window in use',
+      blurb,
      'input: ' + u.input_tokens,
      'cache_read: ' + u.cache_read_input_tokens,
      'cache_write: ' + u.cache_creation_input_tokens,
-      'output (last turn): ' + u.output_tokens,
+      'output: ' + u.output_tokens,
    ].join('\n');
-    el_.textContent = 'ctx · ' + fmt(ctx);
+    el_.textContent = label + ' · ' + fmtTokens(total);
+  }
+  function renderTokenUsage(ev) {
+    // `ev` is `{ ctx, cost }` either off /api/state cold-load (each may
+    // be null) or off a `token_usage_changed` SSE event (both present
+    // post-turn).
+    renderOneUsage('ctx-badge', 'ctx', ev && ev.ctx,
+      'last-inference prompt size — the actual context window in use right now');
+    renderOneUsage('cost-badge', 'cost', ev && ev.cost,
+      'cumulative tokens billed across the last turn (sum across every inference)');
  }
  function renderLastTurn(ms) {
    const el_ = $('last-turn');
@ -626,7 +639,7 @@
      }
      renderAliveBadge(s.status);
      renderModelChip(s.model);
-      renderTokenUsage(s.token_usage);
+      renderTokenUsage({ ctx: s.ctx_usage, cost: s.cost_usage });
      // Open-threads aren't part of /api/state (kept on the broker
      // db, fetched via the per-agent socket). Cold-load fetches
      // it here; turn_end refreshes it via the renderer below.
@ -1026,7 +1039,7 @@
        },
        model_changed(ev, api) { if (!api.fromHistory) renderModelChip(ev.model); },
        token_usage_changed(ev, api) {
-          if (!api.fromHistory) renderTokenUsage(ev.usage);
+          if (!api.fromHistory) renderTokenUsage({ ctx: ev.ctx, cost: ev.cost });
        },
        turn_state_changed(ev, api) {
          if (!api.fromHistory) setStateAbs(ev.state, ev.since_unix);
--- a/hive-ag3nt/assets/index.html
+++ b/hive-ag3nt/assets/index.html
@ -18,6 +18,7 @@
    <span id="state-badge" class="state-badge state-loading">… booting</span>
    <span id="model-chip" class="model-chip" hidden></span>
    <span id="ctx-badge" class="ctx-badge" hidden title="tokens used in the current context window"></span>
+    <span id="cost-badge" class="ctx-badge" hidden title="cumulative tokens billed across the last turn (sum across every inference; tool-heavy turns rebill the cached prompt per call)"></span>
    <span id="last-turn" class="last-turn" hidden></span>
    <button type="button" id="cancel-btn" class="btn-cancel-turn" hidden>■ cancel turn</button>
    <button type="button" id="new-session-btn" class="btn-new-session"