show token usage on per-agent web ui after each turn

This commit is contained in:
damocles 2026-05-17 02:25:25 +02:00
parent ca86bcf4bd
commit ce740483c6
6 changed files with 91 additions and 1 deletions

View file

@ -180,6 +180,12 @@ pre.diff {
font-size: 0.78em; font-size: 0.78em;
letter-spacing: 0.04em; letter-spacing: 0.04em;
} }
.token-usage {
color: var(--muted);
font-size: 0.8em;
letter-spacing: 0.04em;
cursor: default;
}
.btn-dashlink { .btn-dashlink {
color: var(--cyan); color: var(--cyan);
border: 1px solid var(--cyan); border: 1px solid var(--cyan);

View file

@ -412,6 +412,21 @@
el_.hidden = false; el_.hidden = false;
el_.textContent = 'model · ' + model; el_.textContent = 'model · ' + model;
} }
function renderTokenUsage(u) {
const el_ = $('token-usage');
if (!el_) return;
if (!u) { el_.hidden = true; return; }
const ctx = u.input_tokens + u.cache_read_input_tokens + u.cache_creation_input_tokens;
const fmt = (n) => n >= 1000 ? (n / 1000).toFixed(1) + 'k' : String(n);
el_.hidden = false;
el_.title = [
'input: ' + u.input_tokens,
'output: ' + u.output_tokens,
'cache_read: ' + u.cache_read_input_tokens,
'cache_write: ' + u.cache_creation_input_tokens,
].join(' · ');
el_.textContent = '· ctx ' + fmt(ctx) + ' in · ' + fmt(u.output_tokens) + ' out';
}
function renderLastTurn(ms) { function renderLastTurn(ms) {
const el_ = $('last-turn'); const el_ = $('last-turn');
if (!el_) return; if (!el_) return;
@ -485,6 +500,7 @@
setStateAbs(s.turn_state, s.turn_state_since); setStateAbs(s.turn_state, s.turn_state_since);
} }
renderModelChip(s.model); renderModelChip(s.model);
renderTokenUsage(s.token_usage);
// Skip the re-render if nothing structurally changed. The most // Skip the re-render if nothing structurally changed. The most
// common case is `online` polling itself — without this guard, the // common case is `online` polling itself — without this guard, the
// operator's <input value> gets clobbered every cycle. // operator's <input value> gets clobbered every cycle.

View file

@ -17,6 +17,7 @@
<span id="state-badge" class="state-badge state-loading">… booting</span> <span id="state-badge" class="state-badge state-loading">… booting</span>
<span id="model-chip" class="model-chip" hidden></span> <span id="model-chip" class="model-chip" hidden></span>
<span id="last-turn" class="last-turn" hidden></span> <span id="last-turn" class="last-turn" hidden></span>
<span id="token-usage" class="token-usage" hidden></span>
<button type="button" id="cancel-btn" class="btn-cancel-turn" hidden>■ cancel turn</button> <button type="button" id="cancel-btn" class="btn-cancel-turn" hidden>■ cancel turn</button>
<button type="button" id="new-session-btn" class="btn-new-session" <button type="button" id="new-session-btn" class="btn-new-session"
title="next turn runs without --continue, starting a fresh claude session">↻ new session</button> title="next turn runs without --continue, starting a fresh claude session">↻ new session</button>

View file

@ -156,6 +156,23 @@ impl EventStore {
} }
} }
/// Token usage emitted by claude in the final `result` stream-json event.
/// All counts are in tokens. `None` fields mean the server didn't report them.
#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq)]
pub struct TokenUsage {
pub input_tokens: u64,
pub output_tokens: u64,
pub cache_read_input_tokens: u64,
pub cache_creation_input_tokens: u64,
}
impl TokenUsage {
/// Total context consumed this turn (input + cache reads + cache writes).
pub fn context_tokens(&self) -> u64 {
self.input_tokens + self.cache_read_input_tokens + self.cache_creation_input_tokens
}
}
/// Authoritative turn-loop state. The harness owns it; the web UI /// Authoritative turn-loop state. The harness owns it; the web UI
/// reads via `/api/state` and renders. Lives alongside the bus /// reads via `/api/state` and renders. Lives alongside the bus
/// because everyone who has a `Bus` already has the right handle to /// because everyone who has a `Bus` already has the right handle to
@ -191,6 +208,12 @@ pub struct Bus {
/// Model name passed to `claude --model`. Default `haiku`; the /// Model name passed to `claude --model`. Default `haiku`; the
/// operator can override at runtime via `POST /api/model`. /// operator can override at runtime via `POST /api/model`.
model: Arc<Mutex<String>>, model: Arc<Mutex<String>>,
/// Last token usage reported by claude (from the `result` stream-json
/// event). `None` until the first turn with usage data completes.
/// Updated on every turn; survives across turns within one harness
/// process lifetime (resets on container restart, which is fine —
/// it's a live indicator, not a cumulative counter).
last_usage: Arc<Mutex<Option<TokenUsage>>>,
/// One-shot: next `run_claude` call drops `--continue`, starting /// One-shot: next `run_claude` call drops `--continue`, starting
/// a fresh claude session. Set by `POST /api/new-session` from /// a fresh claude session. Set by `POST /api/new-session` from
/// the per-agent web UI; consumed (cleared back to false) by the /// the per-agent web UI; consumed (cleared back to false) by the
@ -220,6 +243,7 @@ impl Bus {
store, store,
state: Arc::new(Mutex::new((TurnState::Idle, now_unix()))), state: Arc::new(Mutex::new((TurnState::Idle, now_unix()))),
model: Arc::new(Mutex::new(initial_model)), model: Arc::new(Mutex::new(initial_model)),
last_usage: Arc::new(Mutex::new(None)),
skip_continue_once: Arc::new(AtomicBool::new(false)), skip_continue_once: Arc::new(AtomicBool::new(false)),
} }
} }
@ -258,6 +282,17 @@ impl Bus {
} }
} }
/// Record the latest token usage from a completed turn.
pub fn record_usage(&self, usage: TokenUsage) {
*self.last_usage.lock().unwrap() = Some(usage);
}
/// Last known token usage, or `None` if no turn has completed yet.
#[must_use]
pub fn last_usage(&self) -> Option<TokenUsage> {
*self.last_usage.lock().unwrap()
}
/// Update the harness's authoritative turn-loop state. Records /// Update the harness's authoritative turn-loop state. Records
/// the transition time so `state_snapshot` can return a since-age. /// the transition time so `state_snapshot` can return a since-age.
pub fn set_state(&self, next: TurnState) { pub fn set_state(&self, next: TurnState) {

View file

@ -276,7 +276,34 @@ async fn run_claude(prompt: &str, files: &TurnFiles, bus: &Bus) -> Result<bool>
flag_out.store(true, Ordering::Relaxed); flag_out.store(true, Ordering::Relaxed);
} }
match serde_json::from_str::<serde_json::Value>(&line) { match serde_json::from_str::<serde_json::Value>(&line) {
Ok(v) => bus_out.emit(LiveEvent::Stream(v)), Ok(v) => {
// Extract token usage from the final `result` event and
// store it in the bus for the web UI to surface.
if v.get("type").and_then(|t| t.as_str()) == Some("result") {
if let Some(u) = v.get("usage") {
let usage = crate::events::TokenUsage {
input_tokens: u
.get("input_tokens")
.and_then(|v| v.as_u64())
.unwrap_or(0),
output_tokens: u
.get("output_tokens")
.and_then(|v| v.as_u64())
.unwrap_or(0),
cache_read_input_tokens: u
.get("cache_read_input_tokens")
.and_then(|v| v.as_u64())
.unwrap_or(0),
cache_creation_input_tokens: u
.get("cache_creation_input_tokens")
.and_then(|v| v.as_u64())
.unwrap_or(0),
};
bus_out.record_usage(usage);
}
}
bus_out.emit(LiveEvent::Stream(v));
}
Err(_) => bus_out.emit(LiveEvent::Note(format!("(non-json) {line}"))), Err(_) => bus_out.emit(LiveEvent::Note(format!("(non-json) {line}"))),
} }
} }

View file

@ -196,6 +196,9 @@ struct StateSnapshot {
/// the operator can see what they just switched to (and what's /// the operator can see what they just switched to (and what's
/// in flight). Mutable at runtime via `POST /api/model`. /// in flight). Mutable at runtime via `POST /api/model`.
model: String, model: String,
/// Token usage from the last completed turn. `null` until the
/// first turn with usage data finishes.
token_usage: Option<crate::events::TokenUsage>,
} }
#[derive(Serialize)] #[derive(Serialize)]
@ -232,6 +235,7 @@ async fn api_state(State(state): State<AppState>) -> axum::Json<StateSnapshot> {
let inbox = recent_inbox(&state.socket, state.flavor()).await; let inbox = recent_inbox(&state.socket, state.flavor()).await;
let (turn_state, turn_state_since) = state.bus.state_snapshot(); let (turn_state, turn_state_since) = state.bus.state_snapshot();
let model = state.bus.model(); let model = state.bus.model();
let token_usage = state.bus.last_usage();
axum::Json(StateSnapshot { axum::Json(StateSnapshot {
label: state.label.clone(), label: state.label.clone(),
dashboard_port, dashboard_port,
@ -241,6 +245,7 @@ async fn api_state(State(state): State<AppState>) -> axum::Json<StateSnapshot> {
turn_state, turn_state,
turn_state_since, turn_state_since,
model, model,
token_usage,
}) })
} }