diff --git a/docs/web-ui.md b/docs/web-ui.md index dd933a9..0aeb840 100644 --- a/docs/web-ui.md +++ b/docs/web-ui.md @@ -310,13 +310,22 @@ Layout, top to bottom: `turn_state_since`. - Model chip: `model · ` (e.g. `model · haiku`). Driven by `LiveEvent::ModelChanged`; emitted from `Bus::set_model`. - - Ctx badge: `ctx · 142k` — total prompt tokens in the - current context window (input + cache_read + cache_write), - mirroring claude code's bottom-right indicator. Hover for - the breakdown including output. Driven by - `LiveEvent::TokenUsageChanged`; emitted from - `Bus::record_usage` whenever the terminal `result` event - delivers a fresh usage block. + - Ctx badge: `ctx · 142k` — last inference's prompt size + (input + cache_read + cache_write of the most recent + model call in the just-ended turn). This is the **actual + context window utilisation** — the number to watch when + deciding whether to compact. + - Cost badge: `cost · 1.3M` — cumulative tokens billed + across **every inference** in the last turn (sum of all + per-call prompts). Tool-heavy turns rebill the cached + prefix per call, so this routinely exceeds the model's + window — it's a cost signal, not a size signal. + - Both badges driven by `LiveEvent::TokenUsageChanged { + ctx, cost }`, emitted once at turn-end from + `Bus::record_turn_usage`. The harness tracks per-inference + usage by walking `assistant` events in the stream-json + and updating `last_inference` on each one; the `result` + event supplies `cost` and triggers the emit. - Last-turn chip: `last turn 12.3s` appears after the first turn ends, computed from the state-since deltas. - `■ cancel turn` button: visible only while state=thinking, @@ -437,8 +446,11 @@ Bus events (new vocabulary on `/events/stream`): `needs_login_idle` / `needs_login_in_progress`. Drives the alive-badge. - `model_changed { model }` — drives the model chip. -- `token_usage_changed { usage: TokenUsage }` — drives the - ctx-badge. Emitted from `Bus::record_usage` whenever the - stream-json `result` event delivers a fresh usage block. +- `token_usage_changed { ctx: TokenUsage, cost: TokenUsage }` + — drives the ctx + cost badges. Emitted from + `Bus::record_turn_usage` at turn-end; `ctx` is the last + inference's usage (current context size), `cost` is the + cumulative across every inference (the `result` event's + totals). - `turn_state_changed { state, since_unix }` — drives the state badge (`idle`/`thinking`/`compacting`). diff --git a/hive-ag3nt/assets/app.js b/hive-ag3nt/assets/app.js index 0fbbd6d..6b8f5d2 100644 --- a/hive-ag3nt/assets/app.js +++ b/hive-ag3nt/assets/app.js @@ -525,30 +525,43 @@ el_.textContent = 'model · ' + model; el_.title = `claude --model ${model}\nset via the operator's /model command; persists across turns until changed`; } - // Context badge — mirrors Claude Code's bottom-right "N tokens" - // indicator. Primary number is total prompt tokens used in the - // current context window (input + both cache axes); hover for the - // breakdown including output. Kept as chrome on the state row so - // the terminal stays the star. - function renderTokenUsage(u) { - const el_ = $('ctx-badge'); + // Token badges — two separate chips: + // ctx · N last inference's prompt size = current context window + // utilisation (what to watch for compaction decisions) + // cost · M cumulative billed tokens across the whole last turn + // (sum across every inference; tool-heavy turns rebill + // the cached prompt per call and blow past the model's + // context window — this is a cost signal, not a size + // signal) + // Both fed by the same `token_usage_changed` SSE event (`{ ctx, cost }`). + const fmtTokens = (n) => { + if (n >= 1_000_000) return (n / 1_000_000).toFixed(1) + 'M'; + if (n >= 1_000) return Math.round(n / 1000) + 'k'; + return String(n); + }; + function renderOneUsage(elId, label, u, blurb) { + const el_ = $(elId); if (!el_) return; if (!u) { el_.hidden = true; return; } - const ctx = u.input_tokens + u.cache_read_input_tokens + u.cache_creation_input_tokens; - const fmt = (n) => { - if (n >= 1_000_000) return (n / 1_000_000).toFixed(1) + 'M'; - if (n >= 1_000) return Math.round(n / 1000) + 'k'; - return String(n); - }; + const total = u.input_tokens + u.cache_read_input_tokens + u.cache_creation_input_tokens; el_.hidden = false; el_.title = [ - 'context window in use', + blurb, 'input: ' + u.input_tokens, 'cache_read: ' + u.cache_read_input_tokens, 'cache_write: ' + u.cache_creation_input_tokens, - 'output (last turn): ' + u.output_tokens, + 'output: ' + u.output_tokens, ].join('\n'); - el_.textContent = 'ctx · ' + fmt(ctx); + el_.textContent = label + ' · ' + fmtTokens(total); + } + function renderTokenUsage(ev) { + // `ev` is `{ ctx, cost }` either off /api/state cold-load (each may + // be null) or off a `token_usage_changed` SSE event (both present + // post-turn). + renderOneUsage('ctx-badge', 'ctx', ev && ev.ctx, + 'last-inference prompt size — the actual context window in use right now'); + renderOneUsage('cost-badge', 'cost', ev && ev.cost, + 'cumulative tokens billed across the last turn (sum across every inference)'); } function renderLastTurn(ms) { const el_ = $('last-turn'); @@ -626,7 +639,7 @@ } renderAliveBadge(s.status); renderModelChip(s.model); - renderTokenUsage(s.token_usage); + renderTokenUsage({ ctx: s.ctx_usage, cost: s.cost_usage }); // Open-threads aren't part of /api/state (kept on the broker // db, fetched via the per-agent socket). Cold-load fetches // it here; turn_end refreshes it via the renderer below. @@ -1026,7 +1039,7 @@ }, model_changed(ev, api) { if (!api.fromHistory) renderModelChip(ev.model); }, token_usage_changed(ev, api) { - if (!api.fromHistory) renderTokenUsage(ev.usage); + if (!api.fromHistory) renderTokenUsage({ ctx: ev.ctx, cost: ev.cost }); }, turn_state_changed(ev, api) { if (!api.fromHistory) setStateAbs(ev.state, ev.since_unix); diff --git a/hive-ag3nt/assets/index.html b/hive-ag3nt/assets/index.html index 7e8dac8..3083990 100644 --- a/hive-ag3nt/assets/index.html +++ b/hive-ag3nt/assets/index.html @@ -18,6 +18,7 @@ … booting +