agent badges: split into ctx (last-inference) + cost (cumulative)
the existing ctx badge was misnamed: it summed `result.usage`, which is
the cumulative tokens billed across every inference in the turn. for
tool-heavy turns that easily exceeds the model's context window (a 600k
cached prefix × 15 sub-calls = 9M cache_read), making it useless as a
"should i compact?" signal.
now two separate badges:
ctx · N last inference's prompt size = actual context window in
use right now. parsed from each `assistant` event's
`.message.usage`; the harness tracks the most recent one
across the stream and snapshots it when the `result`
event lands.
cost · M cumulative tokens billed across the whole turn (the
previous behaviour, now correctly labelled).
both update via a single `TokenUsageChanged { ctx, cost }` SSE event at
turn-end. turn_stats grows four columns (`last_input_tokens`,
`last_output_tokens`, `last_cache_read_input_tokens`,
`last_cache_creation_input_tokens`) so the cold-load seed can paint both
badges on page load. migrations run try-and-ignore ALTERs so existing
agent dbs catch up; pre-migration rows have last-inference zeros and
yield no `ctx` seed (badge stays empty until next turn) rather than a
misleading 0.
This commit is contained in:
parent
14549dd8a9
commit
5c6c607e25
9 changed files with 267 additions and 101 deletions
|
|
@ -64,10 +64,11 @@ async fn main() -> Result<()> {
|
|||
let login_state = Arc::new(Mutex::new(initial));
|
||||
let bus = Bus::new();
|
||||
let stats = TurnStats::open_default();
|
||||
if let Some(s) = &stats
|
||||
&& let Some(u) = s.last_usage()
|
||||
{
|
||||
bus.seed_usage(u);
|
||||
if let Some(s) = &stats {
|
||||
let (ctx, cost) = s.last_usage();
|
||||
if ctx.is_some() || cost.is_some() {
|
||||
bus.seed_usage(ctx, cost);
|
||||
}
|
||||
}
|
||||
let files = turn::TurnFiles::prepare(&cli.socket, &label, mcp::Flavor::Manager).await?;
|
||||
let turn_lock: TurnLock = Arc::new(tokio::sync::Mutex::new(()));
|
||||
|
|
@ -291,7 +292,8 @@ fn build_row(
|
|||
open_threads_count: Option<u64>,
|
||||
open_reminders_count: Option<u64>,
|
||||
) -> TurnStatRow {
|
||||
let usage = bus.last_usage().unwrap_or_default();
|
||||
let cost = bus.last_cost_usage().unwrap_or_default();
|
||||
let ctx = bus.last_ctx_usage().unwrap_or(cost);
|
||||
let tool_calls = bus.take_tool_calls();
|
||||
let tool_call_count: u64 = tool_calls.values().copied().sum();
|
||||
let tool_call_breakdown_json = if tool_calls.is_empty() {
|
||||
|
|
@ -310,10 +312,14 @@ fn build_row(
|
|||
duration_ms,
|
||||
model,
|
||||
wake_from,
|
||||
input_tokens: usage.input_tokens,
|
||||
output_tokens: usage.output_tokens,
|
||||
cache_read_input_tokens: usage.cache_read_input_tokens,
|
||||
cache_creation_input_tokens: usage.cache_creation_input_tokens,
|
||||
input_tokens: cost.input_tokens,
|
||||
output_tokens: cost.output_tokens,
|
||||
cache_read_input_tokens: cost.cache_read_input_tokens,
|
||||
cache_creation_input_tokens: cost.cache_creation_input_tokens,
|
||||
last_input_tokens: ctx.input_tokens,
|
||||
last_output_tokens: ctx.output_tokens,
|
||||
last_cache_read_input_tokens: ctx.cache_read_input_tokens,
|
||||
last_cache_creation_input_tokens: ctx.cache_creation_input_tokens,
|
||||
tool_call_count,
|
||||
tool_call_breakdown_json,
|
||||
open_threads_count,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue