agent badges: split into ctx (last-inference) + cost (cumulative)
the existing ctx badge was misnamed: it summed `result.usage`, which is
the cumulative tokens billed across every inference in the turn. for
tool-heavy turns that easily exceeds the model's context window (a 600k
cached prefix × 15 sub-calls = 9M cache_read), making it useless as a
"should i compact?" signal.
now two separate badges:
ctx · N last inference's prompt size = actual context window in
use right now. parsed from each `assistant` event's
`.message.usage`; the harness tracks the most recent one
across the stream and snapshots it when the `result`
event lands.
cost · M cumulative tokens billed across the whole turn (the
previous behaviour, now correctly labelled).
both update via a single `TokenUsageChanged { ctx, cost }` SSE event at
turn-end. turn_stats grows four columns (`last_input_tokens`,
`last_output_tokens`, `last_cache_read_input_tokens`,
`last_cache_creation_input_tokens`) so the cold-load seed can paint both
badges on page load. migrations run try-and-ignore ALTERs so existing
agent dbs catch up; pre-migration rows have last-inference zeros and
yield no `ctx` seed (badge stays empty until next turn) rather than a
misleading 0.
This commit is contained in:
parent
14549dd8a9
commit
5c6c607e25
9 changed files with 267 additions and 101 deletions
|
|
@ -130,10 +130,15 @@ pub enum LiveEvent {
|
|||
/// updates the chip + the per-turn stats sink will key off this
|
||||
/// to mark the boundary in its log.
|
||||
ModelChanged { model: String },
|
||||
/// Final-turn `usage` block landed (input + output + cache
|
||||
/// counters). Powers the context-window badge + accumulates into
|
||||
/// the per-turn stats sink.
|
||||
TokenUsageChanged { usage: TokenUsage },
|
||||
/// Token usage for the turn just ended. Carries two snapshots:
|
||||
/// - `ctx` is the LAST inference's usage block (the actual context
|
||||
/// window in use right now — what the operator needs to decide
|
||||
/// whether to compact / reset).
|
||||
/// - `cost` is the cumulative usage across every inference in the
|
||||
/// turn (sum of per-call billed tokens — the cost signal). For
|
||||
/// tool-heavy turns the cumulative blows past the model's window
|
||||
/// because each tool call's prompt is rebilled.
|
||||
TokenUsageChanged { ctx: TokenUsage, cost: TokenUsage },
|
||||
/// Harness's `TurnState` transitioned (idle / thinking /
|
||||
/// compacting). `since_unix` matches `Bus::state_snapshot().1`
|
||||
/// so the client's elapsed-time ticker keeps progressing across
|
||||
|
|
@ -221,15 +226,29 @@ impl TokenUsage {
|
|||
self.input_tokens + self.cache_read_input_tokens + self.cache_creation_input_tokens
|
||||
}
|
||||
|
||||
/// Parse usage from a stream-json event. Returns `Some` only for the
|
||||
/// terminal `result` event (which is the only one that carries `usage`);
|
||||
/// every other event maps to `None`. Missing numeric fields default to 0
|
||||
/// so partial server payloads don't drop the whole snapshot.
|
||||
/// Parse usage from the terminal `result` stream-json event. This is the
|
||||
/// **cumulative** sum across every inference in the turn — useful as a
|
||||
/// cost signal, but NOT the current context size (a tool-heavy turn
|
||||
/// sums per-call cached prompts and easily exceeds the model window).
|
||||
pub fn from_stream_event(v: &serde_json::Value) -> Option<Self> {
|
||||
if v.get("type").and_then(|t| t.as_str()) != Some("result") {
|
||||
return None;
|
||||
}
|
||||
let u = v.get("usage")?;
|
||||
Self::from_usage_obj(v.get("usage")?)
|
||||
}
|
||||
|
||||
/// Parse usage from a per-inference `assistant` event's
|
||||
/// `.message.usage` block. Each turn fires one of these for every
|
||||
/// model call; tracking the LAST one over the turn gives the actual
|
||||
/// conversation context size — the number to watch for compaction.
|
||||
pub fn from_assistant_event(v: &serde_json::Value) -> Option<Self> {
|
||||
if v.get("type").and_then(|t| t.as_str()) != Some("assistant") {
|
||||
return None;
|
||||
}
|
||||
Self::from_usage_obj(v.get("message")?.get("usage")?)
|
||||
}
|
||||
|
||||
fn from_usage_obj(u: &serde_json::Value) -> Option<Self> {
|
||||
let field = |k: &str| u.get(k).and_then(serde_json::Value::as_u64).unwrap_or(0);
|
||||
Some(Self {
|
||||
input_tokens: field("input_tokens"),
|
||||
|
|
@ -281,12 +300,16 @@ pub struct Bus {
|
|||
/// Model name passed to `claude --model`. Default `haiku`; the
|
||||
/// operator can override at runtime via `POST /api/model`.
|
||||
model: Arc<Mutex<String>>,
|
||||
/// Last token usage reported by claude (from the `result` stream-json
|
||||
/// event). `None` until the first turn with usage data completes.
|
||||
/// Updated on every turn; survives across turns within one harness
|
||||
/// process lifetime (resets on container restart, which is fine —
|
||||
/// it's a live indicator, not a cumulative counter).
|
||||
last_usage: Arc<Mutex<Option<TokenUsage>>>,
|
||||
/// Last-inference token usage from the most recent turn's final
|
||||
/// `assistant` event. Represents the actual context window size at
|
||||
/// turn-end — the number the operator watches to decide whether to
|
||||
/// compact. `None` until the first turn completes.
|
||||
last_ctx_usage: Arc<Mutex<Option<TokenUsage>>>,
|
||||
/// Cumulative token usage from the most recent turn's `result`
|
||||
/// event (sum across every inference in the turn). This is the cost
|
||||
/// signal — tool-heavy turns rebill the cached prompt per call and
|
||||
/// blow past the model window. `None` until the first turn completes.
|
||||
last_cost_usage: Arc<Mutex<Option<TokenUsage>>>,
|
||||
/// One-shot: next `run_claude` call drops `--continue`, starting
|
||||
/// a fresh claude session. Set by `POST /api/new-session` from
|
||||
/// the per-agent web UI; consumed (cleared back to false) by the
|
||||
|
|
@ -323,7 +346,8 @@ impl Bus {
|
|||
store,
|
||||
state: Arc::new(Mutex::new((TurnState::Idle, now_unix()))),
|
||||
model: Arc::new(Mutex::new(initial_model)),
|
||||
last_usage: Arc::new(Mutex::new(None)),
|
||||
last_ctx_usage: Arc::new(Mutex::new(None)),
|
||||
last_cost_usage: Arc::new(Mutex::new(None)),
|
||||
skip_continue_once: Arc::new(AtomicBool::new(false)),
|
||||
tool_calls: Arc::new(Mutex::new(std::collections::HashMap::new())),
|
||||
}
|
||||
|
|
@ -378,19 +402,27 @@ impl Bus {
|
|||
self.emit(LiveEvent::ModelChanged { model: value });
|
||||
}
|
||||
|
||||
/// Seed `last_usage` at startup without emitting a SSE event.
|
||||
/// Used by the bin entrypoints to backfill from the most recent
|
||||
/// `turn_stats` row so the per-agent web UI's `ctx-badge` paints
|
||||
/// real numbers on cold load instead of staying empty until the
|
||||
/// next turn finishes.
|
||||
pub fn seed_usage(&self, usage: TokenUsage) {
|
||||
*self.last_usage.lock().unwrap() = Some(usage);
|
||||
/// Seed `last_ctx_usage` + `last_cost_usage` at startup without
|
||||
/// emitting a SSE event. Used by the bin entrypoints to backfill
|
||||
/// from the most recent `turn_stats` row so the per-agent web UI's
|
||||
/// ctx + cost badges paint real numbers on cold load.
|
||||
pub fn seed_usage(&self, ctx: Option<TokenUsage>, cost: Option<TokenUsage>) {
|
||||
if ctx.is_some() {
|
||||
*self.last_ctx_usage.lock().unwrap() = ctx;
|
||||
}
|
||||
if cost.is_some() {
|
||||
*self.last_cost_usage.lock().unwrap() = cost;
|
||||
}
|
||||
}
|
||||
|
||||
/// Record the latest token usage from a completed turn.
|
||||
pub fn record_usage(&self, usage: TokenUsage) {
|
||||
*self.last_usage.lock().unwrap() = Some(usage);
|
||||
self.emit(LiveEvent::TokenUsageChanged { usage });
|
||||
/// Record the just-ended turn's usage. `ctx` is the last inference's
|
||||
/// usage (current context size); `cost` is the cumulative across
|
||||
/// every inference in the turn (cost signal). One SSE event fires
|
||||
/// per turn carrying both.
|
||||
pub fn record_turn_usage(&self, ctx: TokenUsage, cost: TokenUsage) {
|
||||
*self.last_ctx_usage.lock().unwrap() = Some(ctx);
|
||||
*self.last_cost_usage.lock().unwrap() = Some(cost);
|
||||
self.emit(LiveEvent::TokenUsageChanged { ctx, cost });
|
||||
}
|
||||
|
||||
/// Walk a stream-json value for `tool_use` blocks and bump the
|
||||
|
|
@ -430,10 +462,18 @@ impl Bus {
|
|||
std::mem::take(&mut *self.tool_calls.lock().unwrap())
|
||||
}
|
||||
|
||||
/// Last known token usage, or `None` if no turn has completed yet.
|
||||
/// Last context-size snapshot (last inference of the most recent
|
||||
/// turn), or `None` if no turn has completed yet.
|
||||
#[must_use]
|
||||
pub fn last_usage(&self) -> Option<TokenUsage> {
|
||||
*self.last_usage.lock().unwrap()
|
||||
pub fn last_ctx_usage(&self) -> Option<TokenUsage> {
|
||||
*self.last_ctx_usage.lock().unwrap()
|
||||
}
|
||||
|
||||
/// Last cumulative cost snapshot (sum across the most recent turn's
|
||||
/// inferences), or `None` if no turn has completed yet.
|
||||
#[must_use]
|
||||
pub fn last_cost_usage(&self) -> Option<TokenUsage> {
|
||||
*self.last_cost_usage.lock().unwrap()
|
||||
}
|
||||
|
||||
/// Update the harness's authoritative turn-loop state. Records
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue