agent badges: split into ctx (last-inference) + cost (cumulative)
the existing ctx badge was misnamed: it summed `result.usage`, which is
the cumulative tokens billed across every inference in the turn. for
tool-heavy turns that easily exceeds the model's context window (a 600k
cached prefix × 15 sub-calls = 9M cache_read), making it useless as a
"should i compact?" signal.
now two separate badges:
ctx · N last inference's prompt size = actual context window in
use right now. parsed from each `assistant` event's
`.message.usage`; the harness tracks the most recent one
across the stream and snapshots it when the `result`
event lands.
cost · M cumulative tokens billed across the whole turn (the
previous behaviour, now correctly labelled).
both update via a single `TokenUsageChanged { ctx, cost }` SSE event at
turn-end. turn_stats grows four columns (`last_input_tokens`,
`last_output_tokens`, `last_cache_read_input_tokens`,
`last_cache_creation_input_tokens`) so the cold-load seed can paint both
badges on page load. migrations run try-and-ignore ALTERs so existing
agent dbs catch up; pre-migration rows have last-inference zeros and
yield no `ctx` seed (badge stays empty until next turn) rather than a
misleading 0.
This commit is contained in:
parent
14549dd8a9
commit
5c6c607e25
9 changed files with 267 additions and 101 deletions
|
|
@ -525,30 +525,43 @@
|
|||
el_.textContent = 'model · ' + model;
|
||||
el_.title = `claude --model ${model}\nset via the operator's /model command; persists across turns until changed`;
|
||||
}
|
||||
// Context badge — mirrors Claude Code's bottom-right "N tokens"
|
||||
// indicator. Primary number is total prompt tokens used in the
|
||||
// current context window (input + both cache axes); hover for the
|
||||
// breakdown including output. Kept as chrome on the state row so
|
||||
// the terminal stays the star.
|
||||
function renderTokenUsage(u) {
|
||||
const el_ = $('ctx-badge');
|
||||
// Token badges — two separate chips:
|
||||
// ctx · N last inference's prompt size = current context window
|
||||
// utilisation (what to watch for compaction decisions)
|
||||
// cost · M cumulative billed tokens across the whole last turn
|
||||
// (sum across every inference; tool-heavy turns rebill
|
||||
// the cached prompt per call and blow past the model's
|
||||
// context window — this is a cost signal, not a size
|
||||
// signal)
|
||||
// Both fed by the same `token_usage_changed` SSE event (`{ ctx, cost }`).
|
||||
const fmtTokens = (n) => {
|
||||
if (n >= 1_000_000) return (n / 1_000_000).toFixed(1) + 'M';
|
||||
if (n >= 1_000) return Math.round(n / 1000) + 'k';
|
||||
return String(n);
|
||||
};
|
||||
function renderOneUsage(elId, label, u, blurb) {
|
||||
const el_ = $(elId);
|
||||
if (!el_) return;
|
||||
if (!u) { el_.hidden = true; return; }
|
||||
const ctx = u.input_tokens + u.cache_read_input_tokens + u.cache_creation_input_tokens;
|
||||
const fmt = (n) => {
|
||||
if (n >= 1_000_000) return (n / 1_000_000).toFixed(1) + 'M';
|
||||
if (n >= 1_000) return Math.round(n / 1000) + 'k';
|
||||
return String(n);
|
||||
};
|
||||
const total = u.input_tokens + u.cache_read_input_tokens + u.cache_creation_input_tokens;
|
||||
el_.hidden = false;
|
||||
el_.title = [
|
||||
'context window in use',
|
||||
blurb,
|
||||
'input: ' + u.input_tokens,
|
||||
'cache_read: ' + u.cache_read_input_tokens,
|
||||
'cache_write: ' + u.cache_creation_input_tokens,
|
||||
'output (last turn): ' + u.output_tokens,
|
||||
'output: ' + u.output_tokens,
|
||||
].join('\n');
|
||||
el_.textContent = 'ctx · ' + fmt(ctx);
|
||||
el_.textContent = label + ' · ' + fmtTokens(total);
|
||||
}
|
||||
function renderTokenUsage(ev) {
|
||||
// `ev` is `{ ctx, cost }` either off /api/state cold-load (each may
|
||||
// be null) or off a `token_usage_changed` SSE event (both present
|
||||
// post-turn).
|
||||
renderOneUsage('ctx-badge', 'ctx', ev && ev.ctx,
|
||||
'last-inference prompt size — the actual context window in use right now');
|
||||
renderOneUsage('cost-badge', 'cost', ev && ev.cost,
|
||||
'cumulative tokens billed across the last turn (sum across every inference)');
|
||||
}
|
||||
function renderLastTurn(ms) {
|
||||
const el_ = $('last-turn');
|
||||
|
|
@ -626,7 +639,7 @@
|
|||
}
|
||||
renderAliveBadge(s.status);
|
||||
renderModelChip(s.model);
|
||||
renderTokenUsage(s.token_usage);
|
||||
renderTokenUsage({ ctx: s.ctx_usage, cost: s.cost_usage });
|
||||
// Open-threads aren't part of /api/state (kept on the broker
|
||||
// db, fetched via the per-agent socket). Cold-load fetches
|
||||
// it here; turn_end refreshes it via the renderer below.
|
||||
|
|
@ -1026,7 +1039,7 @@
|
|||
},
|
||||
model_changed(ev, api) { if (!api.fromHistory) renderModelChip(ev.model); },
|
||||
token_usage_changed(ev, api) {
|
||||
if (!api.fromHistory) renderTokenUsage(ev.usage);
|
||||
if (!api.fromHistory) renderTokenUsage({ ctx: ev.ctx, cost: ev.cost });
|
||||
},
|
||||
turn_state_changed(ev, api) {
|
||||
if (!api.fromHistory) setStateAbs(ev.state, ev.since_unix);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue