diff --git a/docs/web-ui.md b/docs/web-ui.md
index dd933a9..0aeb840 100644
--- a/docs/web-ui.md
+++ b/docs/web-ui.md
@@ -310,13 +310,22 @@ Layout, top to bottom:
     `turn_state_since`.
   - Model chip: `model · <name>` (e.g. `model · haiku`). Driven
     by `LiveEvent::ModelChanged`; emitted from `Bus::set_model`.
-  - Ctx badge: `ctx · 142k` — total prompt tokens in the
-    current context window (input + cache_read + cache_write),
-    mirroring claude code's bottom-right indicator. Hover for
-    the breakdown including output. Driven by
-    `LiveEvent::TokenUsageChanged`; emitted from
-    `Bus::record_usage` whenever the terminal `result` event
-    delivers a fresh usage block.
+  - Ctx badge: `ctx · 142k` — last inference's prompt size
+    (input + cache_read + cache_write of the most recent
+    model call in the just-ended turn). This is the **actual
+    context window utilisation** — the number to watch when
+    deciding whether to compact.
+  - Cost badge: `cost · 1.3M` — cumulative tokens billed
+    across **every inference** in the last turn (sum of all
+    per-call prompts). Tool-heavy turns rebill the cached
+    prefix per call, so this routinely exceeds the model's
+    window — it's a cost signal, not a size signal.
+  - Both badges driven by `LiveEvent::TokenUsageChanged {
+    ctx, cost }`, emitted once at turn-end from
+    `Bus::record_turn_usage`. The harness tracks per-inference
+    usage by walking `assistant` events in the stream-json
+    and updating `last_inference` on each one; the `result`
+    event supplies `cost` and triggers the emit.
   - Last-turn chip: `last turn 12.3s` appears after the first
     turn ends, computed from the state-since deltas.
   - `■ cancel turn` button: visible only while state=thinking,
@@ -437,8 +446,11 @@ Bus events (new vocabulary on `/events/stream`):
   `needs_login_idle` / `needs_login_in_progress`. Drives the
   alive-badge.
 - `model_changed { model }` — drives the model chip.
-- `token_usage_changed { usage: TokenUsage }` — drives the
-  ctx-badge. Emitted from `Bus::record_usage` whenever the
-  stream-json `result` event delivers a fresh usage block.
+- `token_usage_changed { ctx: TokenUsage, cost: TokenUsage }`
+  — drives the ctx + cost badges. Emitted from
+  `Bus::record_turn_usage` at turn-end; `ctx` is the last
+  inference's usage (current context size), `cost` is the
+  cumulative across every inference (the `result` event's
+  totals).
 - `turn_state_changed { state, since_unix }` — drives the
   state badge (`idle`/`thinking`/`compacting`).
diff --git a/hive-ag3nt/assets/app.js b/hive-ag3nt/assets/app.js
index 0fbbd6d..6b8f5d2 100644
--- a/hive-ag3nt/assets/app.js
+++ b/hive-ag3nt/assets/app.js
@@ -525,30 +525,43 @@
     el_.textContent = 'model · ' + model;
     el_.title = `claude --model ${model}\nset via the operator's /model command; persists across turns until changed`;
   }
-  // Context badge — mirrors Claude Code's bottom-right "N tokens"
-  // indicator. Primary number is total prompt tokens used in the
-  // current context window (input + both cache axes); hover for the
-  // breakdown including output. Kept as chrome on the state row so
-  // the terminal stays the star.
-  function renderTokenUsage(u) {
-    const el_ = $('ctx-badge');
+  // Token badges — two separate chips:
+  //   ctx · N    last inference's prompt size = current context window
+  //              utilisation (what to watch for compaction decisions)
+  //   cost · M   cumulative billed tokens across the whole last turn
+  //              (sum across every inference; tool-heavy turns rebill
+  //              the cached prompt per call and blow past the model's
+  //              context window — this is a cost signal, not a size
+  //              signal)
+  // Both fed by the same `token_usage_changed` SSE event (`{ ctx, cost }`).
+  const fmtTokens = (n) => {
+    if (n >= 1_000_000) return (n / 1_000_000).toFixed(1) + 'M';
+    if (n >= 1_000) return Math.round(n / 1000) + 'k';
+    return String(n);
+  };
+  function renderOneUsage(elId, label, u, blurb) {
+    const el_ = $(elId);
     if (!el_) return;
     if (!u) { el_.hidden = true; return; }
-    const ctx = u.input_tokens + u.cache_read_input_tokens + u.cache_creation_input_tokens;
-    const fmt = (n) => {
-      if (n >= 1_000_000) return (n / 1_000_000).toFixed(1) + 'M';
-      if (n >= 1_000) return Math.round(n / 1000) + 'k';
-      return String(n);
-    };
+    const total = u.input_tokens + u.cache_read_input_tokens + u.cache_creation_input_tokens;
     el_.hidden = false;
     el_.title = [
-      'context window in use',
+      blurb,
       'input: ' + u.input_tokens,
       'cache_read: ' + u.cache_read_input_tokens,
       'cache_write: ' + u.cache_creation_input_tokens,
-      'output (last turn): ' + u.output_tokens,
+      'output: ' + u.output_tokens,
     ].join('\n');
-    el_.textContent = 'ctx · ' + fmt(ctx);
+    el_.textContent = label + ' · ' + fmtTokens(total);
+  }
+  function renderTokenUsage(ev) {
+    // `ev` is `{ ctx, cost }` either off /api/state cold-load (each may
+    // be null) or off a `token_usage_changed` SSE event (both present
+    // post-turn).
+    renderOneUsage('ctx-badge', 'ctx', ev && ev.ctx,
+      'last-inference prompt size — the actual context window in use right now');
+    renderOneUsage('cost-badge', 'cost', ev && ev.cost,
+      'cumulative tokens billed across the last turn (sum across every inference)');
   }
   function renderLastTurn(ms) {
     const el_ = $('last-turn');
@@ -626,7 +639,7 @@
       }
       renderAliveBadge(s.status);
       renderModelChip(s.model);
-      renderTokenUsage(s.token_usage);
+      renderTokenUsage({ ctx: s.ctx_usage, cost: s.cost_usage });
       // Open-threads aren't part of /api/state (kept on the broker
       // db, fetched via the per-agent socket). Cold-load fetches
       // it here; turn_end refreshes it via the renderer below.
@@ -1026,7 +1039,7 @@
         },
         model_changed(ev, api) { if (!api.fromHistory) renderModelChip(ev.model); },
         token_usage_changed(ev, api) {
-          if (!api.fromHistory) renderTokenUsage(ev.usage);
+          if (!api.fromHistory) renderTokenUsage({ ctx: ev.ctx, cost: ev.cost });
         },
         turn_state_changed(ev, api) {
           if (!api.fromHistory) setStateAbs(ev.state, ev.since_unix);
diff --git a/hive-ag3nt/assets/index.html b/hive-ag3nt/assets/index.html
index 7e8dac8..3083990 100644
--- a/hive-ag3nt/assets/index.html
+++ b/hive-ag3nt/assets/index.html
@@ -18,6 +18,7 @@
     <span id="state-badge" class="state-badge state-loading">… booting</span>
     <span id="model-chip" class="model-chip" hidden></span>
     <span id="ctx-badge" class="ctx-badge" hidden title="tokens used in the current context window"></span>
+    <span id="cost-badge" class="ctx-badge" hidden title="cumulative tokens billed across the last turn (sum across every inference; tool-heavy turns rebill the cached prompt per call)"></span>
     <span id="last-turn" class="last-turn" hidden></span>
     <button type="button" id="cancel-btn" class="btn-cancel-turn" hidden>■ cancel turn</button>
     <button type="button" id="new-session-btn" class="btn-new-session"
diff --git a/hive-ag3nt/src/bin/hive-ag3nt.rs b/hive-ag3nt/src/bin/hive-ag3nt.rs
index 03cce24..a9b9b44 100644
--- a/hive-ag3nt/src/bin/hive-ag3nt.rs
+++ b/hive-ag3nt/src/bin/hive-ag3nt.rs
@@ -74,10 +74,11 @@ async fn main() -> Result<()> {
             let login_state = Arc::new(Mutex::new(initial));
             let bus = Bus::new();
             let stats = TurnStats::open_default();
-            if let Some(s) = &stats
-                && let Some(u) = s.last_usage()
-            {
-                bus.seed_usage(u);
+            if let Some(s) = &stats {
+                let (ctx, cost) = s.last_usage();
+                if ctx.is_some() || cost.is_some() {
+                    bus.seed_usage(ctx, cost);
+                }
             }
             let files = turn::TurnFiles::prepare(&cli.socket, &label, mcp::Flavor::Agent).await?;
             let turn_lock: TurnLock = Arc::new(tokio::sync::Mutex::new(()));
@@ -354,7 +355,8 @@ fn build_row(
     open_threads_count: Option<u64>,
     open_reminders_count: Option<u64>,
 ) -> TurnStatRow {
-    let usage = bus.last_usage().unwrap_or_default();
+    let cost = bus.last_cost_usage().unwrap_or_default();
+    let ctx = bus.last_ctx_usage().unwrap_or(cost);
     let tool_calls = bus.take_tool_calls();
     let tool_call_count: u64 = tool_calls.values().copied().sum();
     let tool_call_breakdown_json = if tool_calls.is_empty() {
@@ -373,10 +375,14 @@ fn build_row(
         duration_ms,
         model,
         wake_from,
-        input_tokens: usage.input_tokens,
-        output_tokens: usage.output_tokens,
-        cache_read_input_tokens: usage.cache_read_input_tokens,
-        cache_creation_input_tokens: usage.cache_creation_input_tokens,
+        input_tokens: cost.input_tokens,
+        output_tokens: cost.output_tokens,
+        cache_read_input_tokens: cost.cache_read_input_tokens,
+        cache_creation_input_tokens: cost.cache_creation_input_tokens,
+        last_input_tokens: ctx.input_tokens,
+        last_output_tokens: ctx.output_tokens,
+        last_cache_read_input_tokens: ctx.cache_read_input_tokens,
+        last_cache_creation_input_tokens: ctx.cache_creation_input_tokens,
         tool_call_count,
         tool_call_breakdown_json,
         open_threads_count,
diff --git a/hive-ag3nt/src/bin/hive-m1nd.rs b/hive-ag3nt/src/bin/hive-m1nd.rs
index 870b41f..911876c 100644
--- a/hive-ag3nt/src/bin/hive-m1nd.rs
+++ b/hive-ag3nt/src/bin/hive-m1nd.rs
@@ -64,10 +64,11 @@ async fn main() -> Result<()> {
             let login_state = Arc::new(Mutex::new(initial));
             let bus = Bus::new();
             let stats = TurnStats::open_default();
-            if let Some(s) = &stats
-                && let Some(u) = s.last_usage()
-            {
-                bus.seed_usage(u);
+            if let Some(s) = &stats {
+                let (ctx, cost) = s.last_usage();
+                if ctx.is_some() || cost.is_some() {
+                    bus.seed_usage(ctx, cost);
+                }
             }
             let files = turn::TurnFiles::prepare(&cli.socket, &label, mcp::Flavor::Manager).await?;
             let turn_lock: TurnLock = Arc::new(tokio::sync::Mutex::new(()));
@@ -291,7 +292,8 @@ fn build_row(
     open_threads_count: Option<u64>,
     open_reminders_count: Option<u64>,
 ) -> TurnStatRow {
-    let usage = bus.last_usage().unwrap_or_default();
+    let cost = bus.last_cost_usage().unwrap_or_default();
+    let ctx = bus.last_ctx_usage().unwrap_or(cost);
     let tool_calls = bus.take_tool_calls();
     let tool_call_count: u64 = tool_calls.values().copied().sum();
     let tool_call_breakdown_json = if tool_calls.is_empty() {
@@ -310,10 +312,14 @@ fn build_row(
         duration_ms,
         model,
         wake_from,
-        input_tokens: usage.input_tokens,
-        output_tokens: usage.output_tokens,
-        cache_read_input_tokens: usage.cache_read_input_tokens,
-        cache_creation_input_tokens: usage.cache_creation_input_tokens,
+        input_tokens: cost.input_tokens,
+        output_tokens: cost.output_tokens,
+        cache_read_input_tokens: cost.cache_read_input_tokens,
+        cache_creation_input_tokens: cost.cache_creation_input_tokens,
+        last_input_tokens: ctx.input_tokens,
+        last_output_tokens: ctx.output_tokens,
+        last_cache_read_input_tokens: ctx.cache_read_input_tokens,
+        last_cache_creation_input_tokens: ctx.cache_creation_input_tokens,
         tool_call_count,
         tool_call_breakdown_json,
         open_threads_count,
diff --git a/hive-ag3nt/src/events.rs b/hive-ag3nt/src/events.rs
index 2fa5b38..2a9c723 100644
--- a/hive-ag3nt/src/events.rs
+++ b/hive-ag3nt/src/events.rs
@@ -130,10 +130,15 @@ pub enum LiveEvent {
     /// updates the chip + the per-turn stats sink will key off this
     /// to mark the boundary in its log.
     ModelChanged { model: String },
-    /// Final-turn `usage` block landed (input + output + cache
-    /// counters). Powers the context-window badge + accumulates into
-    /// the per-turn stats sink.
-    TokenUsageChanged { usage: TokenUsage },
+    /// Token usage for the turn just ended. Carries two snapshots:
+    /// - `ctx` is the LAST inference's usage block (the actual context
+    ///   window in use right now — what the operator needs to decide
+    ///   whether to compact / reset).
+    /// - `cost` is the cumulative usage across every inference in the
+    ///   turn (sum of per-call billed tokens — the cost signal). For
+    ///   tool-heavy turns the cumulative blows past the model's window
+    ///   because each tool call's prompt is rebilled.
+    TokenUsageChanged { ctx: TokenUsage, cost: TokenUsage },
     /// Harness's `TurnState` transitioned (idle / thinking /
     /// compacting). `since_unix` matches `Bus::state_snapshot().1`
     /// so the client's elapsed-time ticker keeps progressing across
@@ -221,15 +226,29 @@ impl TokenUsage {
         self.input_tokens + self.cache_read_input_tokens + self.cache_creation_input_tokens
     }
 
-    /// Parse usage from a stream-json event. Returns `Some` only for the
-    /// terminal `result` event (which is the only one that carries `usage`);
-    /// every other event maps to `None`. Missing numeric fields default to 0
-    /// so partial server payloads don't drop the whole snapshot.
+    /// Parse usage from the terminal `result` stream-json event. This is the
+    /// **cumulative** sum across every inference in the turn — useful as a
+    /// cost signal, but NOT the current context size (a tool-heavy turn
+    /// sums per-call cached prompts and easily exceeds the model window).
     pub fn from_stream_event(v: &serde_json::Value) -> Option<Self> {
         if v.get("type").and_then(|t| t.as_str()) != Some("result") {
             return None;
         }
-        let u = v.get("usage")?;
+        Self::from_usage_obj(v.get("usage")?)
+    }
+
+    /// Parse usage from a per-inference `assistant` event's
+    /// `.message.usage` block. Each turn fires one of these for every
+    /// model call; tracking the LAST one over the turn gives the actual
+    /// conversation context size — the number to watch for compaction.
+    pub fn from_assistant_event(v: &serde_json::Value) -> Option<Self> {
+        if v.get("type").and_then(|t| t.as_str()) != Some("assistant") {
+            return None;
+        }
+        Self::from_usage_obj(v.get("message")?.get("usage")?)
+    }
+
+    fn from_usage_obj(u: &serde_json::Value) -> Option<Self> {
         let field = |k: &str| u.get(k).and_then(serde_json::Value::as_u64).unwrap_or(0);
         Some(Self {
             input_tokens: field("input_tokens"),
@@ -281,12 +300,16 @@ pub struct Bus {
     /// Model name passed to `claude --model`. Default `haiku`; the
     /// operator can override at runtime via `POST /api/model`.
     model: Arc<Mutex<String>>,
-    /// Last token usage reported by claude (from the `result` stream-json
-    /// event). `None` until the first turn with usage data completes.
-    /// Updated on every turn; survives across turns within one harness
-    /// process lifetime (resets on container restart, which is fine —
-    /// it's a live indicator, not a cumulative counter).
-    last_usage: Arc<Mutex<Option<TokenUsage>>>,
+    /// Last-inference token usage from the most recent turn's final
+    /// `assistant` event. Represents the actual context window size at
+    /// turn-end — the number the operator watches to decide whether to
+    /// compact. `None` until the first turn completes.
+    last_ctx_usage: Arc<Mutex<Option<TokenUsage>>>,
+    /// Cumulative token usage from the most recent turn's `result`
+    /// event (sum across every inference in the turn). This is the cost
+    /// signal — tool-heavy turns rebill the cached prompt per call and
+    /// blow past the model window. `None` until the first turn completes.
+    last_cost_usage: Arc<Mutex<Option<TokenUsage>>>,
     /// One-shot: next `run_claude` call drops `--continue`, starting
     /// a fresh claude session. Set by `POST /api/new-session` from
     /// the per-agent web UI; consumed (cleared back to false) by the
@@ -323,7 +346,8 @@ impl Bus {
             store,
             state: Arc::new(Mutex::new((TurnState::Idle, now_unix()))),
             model: Arc::new(Mutex::new(initial_model)),
-            last_usage: Arc::new(Mutex::new(None)),
+            last_ctx_usage: Arc::new(Mutex::new(None)),
+            last_cost_usage: Arc::new(Mutex::new(None)),
             skip_continue_once: Arc::new(AtomicBool::new(false)),
             tool_calls: Arc::new(Mutex::new(std::collections::HashMap::new())),
         }
@@ -378,19 +402,27 @@ impl Bus {
         self.emit(LiveEvent::ModelChanged { model: value });
     }
 
-    /// Seed `last_usage` at startup without emitting a SSE event.
-    /// Used by the bin entrypoints to backfill from the most recent
-    /// `turn_stats` row so the per-agent web UI's `ctx-badge` paints
-    /// real numbers on cold load instead of staying empty until the
-    /// next turn finishes.
-    pub fn seed_usage(&self, usage: TokenUsage) {
-        *self.last_usage.lock().unwrap() = Some(usage);
+    /// Seed `last_ctx_usage` + `last_cost_usage` at startup without
+    /// emitting a SSE event. Used by the bin entrypoints to backfill
+    /// from the most recent `turn_stats` row so the per-agent web UI's
+    /// ctx + cost badges paint real numbers on cold load.
+    pub fn seed_usage(&self, ctx: Option<TokenUsage>, cost: Option<TokenUsage>) {
+        if ctx.is_some() {
+            *self.last_ctx_usage.lock().unwrap() = ctx;
+        }
+        if cost.is_some() {
+            *self.last_cost_usage.lock().unwrap() = cost;
+        }
     }
 
-    /// Record the latest token usage from a completed turn.
-    pub fn record_usage(&self, usage: TokenUsage) {
-        *self.last_usage.lock().unwrap() = Some(usage);
-        self.emit(LiveEvent::TokenUsageChanged { usage });
+    /// Record the just-ended turn's usage. `ctx` is the last inference's
+    /// usage (current context size); `cost` is the cumulative across
+    /// every inference in the turn (cost signal). One SSE event fires
+    /// per turn carrying both.
+    pub fn record_turn_usage(&self, ctx: TokenUsage, cost: TokenUsage) {
+        *self.last_ctx_usage.lock().unwrap() = Some(ctx);
+        *self.last_cost_usage.lock().unwrap() = Some(cost);
+        self.emit(LiveEvent::TokenUsageChanged { ctx, cost });
     }
 
     /// Walk a stream-json value for `tool_use` blocks and bump the
@@ -430,10 +462,18 @@ impl Bus {
         std::mem::take(&mut *self.tool_calls.lock().unwrap())
     }
 
-    /// Last known token usage, or `None` if no turn has completed yet.
+    /// Last context-size snapshot (last inference of the most recent
+    /// turn), or `None` if no turn has completed yet.
     #[must_use]
-    pub fn last_usage(&self) -> Option<TokenUsage> {
-        *self.last_usage.lock().unwrap()
+    pub fn last_ctx_usage(&self) -> Option<TokenUsage> {
+        *self.last_ctx_usage.lock().unwrap()
+    }
+
+    /// Last cumulative cost snapshot (sum across the most recent turn's
+    /// inferences), or `None` if no turn has completed yet.
+    #[must_use]
+    pub fn last_cost_usage(&self) -> Option<TokenUsage> {
+        *self.last_cost_usage.lock().unwrap()
     }
 
     /// Update the harness's authoritative turn-loop state. Records
diff --git a/hive-ag3nt/src/turn.rs b/hive-ag3nt/src/turn.rs
index 901effd..b4ad794 100644
--- a/hive-ag3nt/src/turn.rs
+++ b/hive-ag3nt/src/turn.rs
@@ -279,14 +279,28 @@ async fn run_claude(prompt: &str, files: &TurnFiles, bus: &Bus) -> Result<bool>
     let bus_err = bus.clone();
     let pump_stdout = tokio::spawn(async move {
         let mut reader = BufReader::new(stdout).lines();
+        // Track usage as the turn unfolds. `last_inference` overwrites on
+        // every assistant event so at result-time it holds the most recent
+        // model call's usage — the actual context size. The `result` event
+        // carries the cumulative-across-the-turn usage (cost signal). Both
+        // get handed to `record_turn_usage` together so a single SSE
+        // event updates both badges.
+        let mut last_inference: Option<crate::events::TokenUsage> = None;
         while let Ok(Some(line)) = reader.next_line().await {
             if line.contains(PROMPT_TOO_LONG_MARKER) {
                 flag_out.store(true, Ordering::Relaxed);
             }
             match serde_json::from_str::<serde_json::Value>(&line) {
                 Ok(v) => {
-                    if let Some(usage) = crate::events::TokenUsage::from_stream_event(&v) {
-                        bus_out.record_usage(usage);
+                    if let Some(u) = crate::events::TokenUsage::from_assistant_event(&v) {
+                        last_inference = Some(u);
+                    }
+                    if let Some(cost) = crate::events::TokenUsage::from_stream_event(&v) {
+                        // Fallback to `cost` if the turn somehow produced
+                        // a result without any assistant event — keeps the
+                        // ctx badge from going stale on a degenerate turn.
+                        let ctx = last_inference.unwrap_or(cost);
+                        bus_out.record_turn_usage(ctx, cost);
                     }
                     bus_out.observe_stream(&v);
                     bus_out.emit(LiveEvent::Stream(v));
diff --git a/hive-ag3nt/src/turn_stats.rs b/hive-ag3nt/src/turn_stats.rs
index 4d79f15..899a6e6 100644
--- a/hive-ag3nt/src/turn_stats.rs
+++ b/hive-ag3nt/src/turn_stats.rs
@@ -22,8 +22,9 @@ use anyhow::{Context, Result};
 use rusqlite::{Connection, params};
 
 /// SQL bootstrap. CREATE TABLE IF NOT EXISTS so first-boot agents
-/// and existing ones converge on the same shape; ALTER-style
-/// migrations land here as additional statements once we have any.
+/// and existing ones converge on the same shape. The base table is
+/// fresh-install only; additive migrations land via `MIGRATIONS`
+/// below as try-and-ignore ALTERs so existing dbs catch up.
 const SCHEMA: &str = "
 CREATE TABLE IF NOT EXISTS turn_stats (
     id                              INTEGER PRIMARY KEY AUTOINCREMENT,
@@ -36,6 +37,10 @@ CREATE TABLE IF NOT EXISTS turn_stats (
     output_tokens                   INTEGER NOT NULL DEFAULT 0,
     cache_read_input_tokens         INTEGER NOT NULL DEFAULT 0,
     cache_creation_input_tokens     INTEGER NOT NULL DEFAULT 0,
+    last_input_tokens               INTEGER NOT NULL DEFAULT 0,
+    last_output_tokens              INTEGER NOT NULL DEFAULT 0,
+    last_cache_read_input_tokens    INTEGER NOT NULL DEFAULT 0,
+    last_cache_creation_input_tokens INTEGER NOT NULL DEFAULT 0,
     tool_call_count                 INTEGER NOT NULL DEFAULT 0,
     tool_call_breakdown_json        TEXT,
     open_threads_count              INTEGER,
@@ -47,6 +52,17 @@ CREATE INDEX IF NOT EXISTS idx_turn_stats_started
     ON turn_stats (started_at DESC);
 ";
 
+/// Additive column migrations. Each runs unconditionally and ignores
+/// `duplicate column name` errors — sqlite < 3.35 lacks
+/// `ADD COLUMN IF NOT EXISTS`, so try-and-ignore is the portable path.
+/// New columns MUST carry a default so existing rows decode.
+const MIGRATIONS: &[&str] = &[
+    "ALTER TABLE turn_stats ADD COLUMN last_input_tokens INTEGER NOT NULL DEFAULT 0",
+    "ALTER TABLE turn_stats ADD COLUMN last_output_tokens INTEGER NOT NULL DEFAULT 0",
+    "ALTER TABLE turn_stats ADD COLUMN last_cache_read_input_tokens INTEGER NOT NULL DEFAULT 0",
+    "ALTER TABLE turn_stats ADD COLUMN last_cache_creation_input_tokens INTEGER NOT NULL DEFAULT 0",
+];
+
 /// One row to be inserted. `Option`-wrapped fields default to NULL
 /// when the harness couldn't gather them (e.g. socket roundtrip for
 /// open_threads failed) so a partial row beats no row.
@@ -57,10 +73,16 @@ pub struct TurnStatRow {
     pub duration_ms: i64,
     pub model: String,
     pub wake_from: String,
+    /// Cumulative across every inference in the turn (cost signal).
     pub input_tokens: u64,
     pub output_tokens: u64,
     pub cache_read_input_tokens: u64,
     pub cache_creation_input_tokens: u64,
+    /// Last inference's usage — the actual context size at turn end.
+    pub last_input_tokens: u64,
+    pub last_output_tokens: u64,
+    pub last_cache_read_input_tokens: u64,
+    pub last_cache_creation_input_tokens: u64,
     pub tool_call_count: u64,
     /// Per-tool breakdown as JSON: `{"Read":12,"Bash":3,...}`. None
     /// when no tools were called (saves a sqlite write of `"{}"`).
@@ -107,6 +129,18 @@ impl TurnStats {
             .with_context(|| format!("open turn_stats db {}", path.display()))?;
         conn.execute_batch(SCHEMA)
             .context("apply turn_stats schema")?;
+        for stmt in MIGRATIONS {
+            // Ignore "duplicate column name" — the migration already ran.
+            // Any other error is logged but doesn't fail open() because the
+            // base schema works and we'd rather keep the harness alive than
+            // crash on an upgrade hiccup.
+            if let Err(e) = conn.execute(stmt, []) {
+                let msg = e.to_string();
+                if !msg.contains("duplicate column name") {
+                    tracing::warn!(error = %msg, stmt, "turn_stats migration failed");
+                }
+            }
+        }
         Ok(Self {
             inner: std::sync::Arc::new(Mutex::new(conn)),
         })
@@ -121,6 +155,8 @@ impl TurnStats {
                 started_at, ended_at, duration_ms, model, wake_from,
                 input_tokens, output_tokens,
                 cache_read_input_tokens, cache_creation_input_tokens,
+                last_input_tokens, last_output_tokens,
+                last_cache_read_input_tokens, last_cache_creation_input_tokens,
                 tool_call_count, tool_call_breakdown_json,
                 open_threads_count, open_reminders_count,
                 result_kind, note
@@ -130,7 +166,9 @@ impl TurnStats {
                 ?8, ?9,
                 ?10, ?11,
                 ?12, ?13,
-                ?14, ?15
+                ?14, ?15,
+                ?16, ?17,
+                ?18, ?19
              )",
             params![
                 row.started_at,
@@ -142,6 +180,10 @@ impl TurnStats {
                 i64::try_from(row.output_tokens).unwrap_or(i64::MAX),
                 i64::try_from(row.cache_read_input_tokens).unwrap_or(i64::MAX),
                 i64::try_from(row.cache_creation_input_tokens).unwrap_or(i64::MAX),
+                i64::try_from(row.last_input_tokens).unwrap_or(i64::MAX),
+                i64::try_from(row.last_output_tokens).unwrap_or(i64::MAX),
+                i64::try_from(row.last_cache_read_input_tokens).unwrap_or(i64::MAX),
+                i64::try_from(row.last_cache_creation_input_tokens).unwrap_or(i64::MAX),
                 i64::try_from(row.tool_call_count).unwrap_or(i64::MAX),
                 row.tool_call_breakdown_json,
                 row.open_threads_count
@@ -157,32 +199,58 @@ impl TurnStats {
         }
     }
 
-    /// Token counts from the most recently inserted row, if any. Lets
-    /// the harness seed `Bus::last_usage` on startup so the per-agent
-    /// web UI's `ctx-badge` paints with real numbers on cold load
-    /// instead of waiting for the next `TokenUsageChanged` SSE event.
-    /// Best-effort: any sqlite error returns `None` and the caller
-    /// falls back to the empty state.
+    /// Token counts from the most recently inserted row, if any.
+    /// Returns `(ctx, cost)` — both backfill `Bus` on startup so the
+    /// per-agent web UI's ctx + cost badges paint with real numbers on
+    /// cold load instead of waiting for the next `TokenUsageChanged`
+    /// SSE event. Best-effort: any sqlite error returns `(None, None)`.
+    ///
+    /// Pre-migration rows (before the `last_*_tokens` columns existed)
+    /// have last-inference zeros — those rows yield `ctx = None` so the
+    /// badge stays empty until the next real turn rather than showing a
+    /// misleading 0.
     #[must_use]
-    pub fn last_usage(&self) -> Option<crate::events::TokenUsage> {
+    pub fn last_usage(
+        &self,
+    ) -> (
+        Option<crate::events::TokenUsage>,
+        Option<crate::events::TokenUsage>,
+    ) {
         let conn = self.inner.lock().unwrap();
         conn.query_row(
             "SELECT input_tokens, output_tokens,
-                    cache_read_input_tokens, cache_creation_input_tokens
+                    cache_read_input_tokens, cache_creation_input_tokens,
+                    last_input_tokens, last_output_tokens,
+                    last_cache_read_input_tokens, last_cache_creation_input_tokens
              FROM turn_stats
              ORDER BY started_at DESC
              LIMIT 1",
             [],
             |row| {
-                Ok(crate::events::TokenUsage {
-                    input_tokens: u64::try_from(row.get::<_, i64>(0)?).unwrap_or(0),
-                    output_tokens: u64::try_from(row.get::<_, i64>(1)?).unwrap_or(0),
-                    cache_read_input_tokens: u64::try_from(row.get::<_, i64>(2)?).unwrap_or(0),
-                    cache_creation_input_tokens: u64::try_from(row.get::<_, i64>(3)?).unwrap_or(0),
-                })
+                let g = |i: usize| -> rusqlite::Result<u64> {
+                    Ok(u64::try_from(row.get::<_, i64>(i)?).unwrap_or(0))
+                };
+                let cost = crate::events::TokenUsage {
+                    input_tokens: g(0)?,
+                    output_tokens: g(1)?,
+                    cache_read_input_tokens: g(2)?,
+                    cache_creation_input_tokens: g(3)?,
+                };
+                let last = crate::events::TokenUsage {
+                    input_tokens: g(4)?,
+                    output_tokens: g(5)?,
+                    cache_read_input_tokens: g(6)?,
+                    cache_creation_input_tokens: g(7)?,
+                };
+                let ctx = if last == crate::events::TokenUsage::default() {
+                    None
+                } else {
+                    Some(last)
+                };
+                Ok((ctx, Some(cost)))
             },
         )
-        .ok()
+        .unwrap_or((None, None))
     }
 }
 
diff --git a/hive-ag3nt/src/web_ui.rs b/hive-ag3nt/src/web_ui.rs
index fbf4d5d..2d69153 100644
--- a/hive-ag3nt/src/web_ui.rs
+++ b/hive-ag3nt/src/web_ui.rs
@@ -225,9 +225,13 @@ struct StateSnapshot {
     /// the operator can see what they just switched to (and what's
     /// in flight). Mutable at runtime via `POST /api/model`.
     model: String,
-    /// Token usage from the last completed turn. `null` until the
-    /// first turn with usage data finishes.
-    token_usage: Option<crate::events::TokenUsage>,
+    /// Last-inference token usage from the most recent completed
+    /// turn — represents the current context-window size at turn-end.
+    /// `null` until the first turn finishes.
+    ctx_usage: Option<crate::events::TokenUsage>,
+    /// Cumulative token usage across the most recent turn's inferences
+    /// (cost signal). `null` until the first turn finishes.
+    cost_usage: Option<crate::events::TokenUsage>,
 }
 
 #[derive(Serialize)]
@@ -310,7 +314,8 @@ async fn api_state(State(state): State<AppState>) -> axum::Json<StateSnapshot> {
     let inbox = recent_inbox(&state.socket, state.flavor()).await;
     let (turn_state, turn_state_since) = state.bus.state_snapshot();
     let model = state.bus.model();
-    let token_usage = state.bus.last_usage();
+    let ctx_usage = state.bus.last_ctx_usage();
+    let cost_usage = state.bus.last_cost_usage();
     axum::Json(StateSnapshot {
         seq,
         label: state.label.clone(),
@@ -321,7 +326,8 @@ async fn api_state(State(state): State<AppState>) -> axum::Json<StateSnapshot> {
         turn_state,
         turn_state_since,
         model,
-        token_usage,
+        ctx_usage,
+        cost_usage,
     })
 }