sse: seq plumbing + subscribe-first dedupe dance

This commit is contained in:
müde 2026-05-17 12:26:00 +02:00
parent 8c186d4fb7
commit 1340a654e7
5 changed files with 197 additions and 37 deletions

View file

@ -9,7 +9,7 @@
//! showing "connecting…" until the first event arrives. //! showing "connecting…" until the first event arrives.
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
use std::sync::{Arc, Mutex}; use std::sync::{Arc, Mutex};
use rusqlite::{Connection, params}; use rusqlite::{Connection, params};
@ -74,6 +74,18 @@ CREATE TABLE IF NOT EXISTS events (
CREATE INDEX IF NOT EXISTS idx_events_ts ON events (ts); CREATE INDEX IF NOT EXISTS idx_events_ts ON events (ts);
"; ";
/// Envelope carried over the broadcast channel: the `LiveEvent` itself
/// plus a monotonic per-process seq stamped by `Bus::emit`. SSE consumers
/// serialize this directly (seq becomes a sibling of the `kind` tag);
/// clients use seq to dedupe their buffered live traffic against the
/// snapshot/history responses (drop anything with `seq <= snapshot.seq`).
#[derive(Debug, Clone, Serialize)]
pub struct BusEvent {
pub seq: u64,
#[serde(flatten)]
pub event: LiveEvent,
}
/// One row of the agent's live stream. Serialised to JSON for SSE delivery. /// One row of the agent's live stream. Serialised to JSON for SSE delivery.
#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "kind", rename_all = "snake_case")] #[serde(tag = "kind", rename_all = "snake_case")]
@ -216,7 +228,13 @@ pub const DEFAULT_MODEL: &str = "haiku";
#[derive(Clone)] #[derive(Clone)]
pub struct Bus { pub struct Bus {
tx: Arc<broadcast::Sender<LiveEvent>>, tx: Arc<broadcast::Sender<BusEvent>>,
/// Monotonic per-process counter stamped onto every `BusEvent`.
/// Persisted nowhere — a harness restart resets seq to 0; clients
/// always treat reconnect as "fresh state, fresh stream of seqs."
/// Historical events served from sqlite carry no seq (they predate
/// the live channel the seq is meant to dedupe against).
event_seq: Arc<AtomicU64>,
/// Persistent event log. `None` only if opening the sqlite db failed /// Persistent event log. `None` only if opening the sqlite db failed
/// at construction — we keep going so the harness doesn't die on a /// at construction — we keep going so the harness doesn't die on a
/// missing state dir mount in dev / test scenarios. /// missing state dir mount in dev / test scenarios.
@ -258,6 +276,7 @@ impl Bus {
let initial_model = load_model().unwrap_or_else(|| DEFAULT_MODEL.to_owned()); let initial_model = load_model().unwrap_or_else(|| DEFAULT_MODEL.to_owned());
Self { Self {
tx: Arc::new(tx), tx: Arc::new(tx),
event_seq: Arc::new(AtomicU64::new(0)),
store, store,
state: Arc::new(Mutex::new((TurnState::Idle, now_unix()))), state: Arc::new(Mutex::new((TurnState::Idle, now_unix()))),
model: Arc::new(Mutex::new(initial_model)), model: Arc::new(Mutex::new(initial_model)),
@ -266,6 +285,20 @@ impl Bus {
} }
} }
/// Current high-water seq. Snapshot endpoints read this before
/// gathering state so the resulting (snapshot.seq, snapshot) pair
/// satisfies: any live event with seq > snapshot.seq is post-snapshot
/// (not yet reflected). Clients dedupe buffered SSE traffic against
/// this value.
#[must_use]
pub fn current_seq(&self) -> u64 {
self.event_seq.load(Ordering::SeqCst)
}
fn next_seq(&self) -> u64 {
self.event_seq.fetch_add(1, Ordering::SeqCst) + 1
}
/// Arm the one-shot: the next claude invocation will run without /// Arm the one-shot: the next claude invocation will run without
/// `--continue`, dropping any prior session context. Idempotent /// `--continue`, dropping any prior session context. Idempotent
/// — calling twice in a row before the next turn still consumes /// — calling twice in a row before the next turn still consumes
@ -333,11 +366,15 @@ impl Bus {
{ {
tracing::warn!(error = ?e, "events: append failed"); tracing::warn!(error = ?e, "events: append failed");
} }
let envelope = BusEvent {
seq: self.next_seq(),
event,
};
// Lagged subscribers drop events — fine; the UI is a tail, not a log. // Lagged subscribers drop events — fine; the UI is a tail, not a log.
let _ = self.tx.send(event); let _ = self.tx.send(envelope);
} }
pub fn subscribe(&self) -> broadcast::Receiver<LiveEvent> { pub fn subscribe(&self) -> broadcast::Receiver<BusEvent> {
self.tx.subscribe() self.tx.subscribe()
} }

View file

@ -191,6 +191,12 @@ async fn serve_shared_js() -> impl IntoResponse {
#[derive(Serialize)] #[derive(Serialize)]
struct StateSnapshot { struct StateSnapshot {
/// Bus seq at the moment this snapshot was assembled. Clients dedupe
/// their buffered SSE traffic against this value: events with
/// `seq <= snapshot.seq` are already reflected (or pre-date the
/// snapshot); `seq > snapshot.seq` is post-snapshot. Reset to 0 on
/// harness restart — clients treat reconnect as a fresh world.
seq: u64,
label: String, label: String,
dashboard_port: u16, dashboard_port: u16,
/// `"online"` | `"needs_login_idle"` | `"needs_login_in_progress"`. /// `"online"` | `"needs_login_idle"` | `"needs_login_in_progress"`.
@ -226,6 +232,9 @@ struct SessionView {
} }
async fn api_state(State(state): State<AppState>) -> axum::Json<StateSnapshot> { async fn api_state(State(state): State<AppState>) -> axum::Json<StateSnapshot> {
// Capture seq *before* any reads so the dedupe contract is
// "events with seq > snapshot.seq are post-snapshot, never missed."
let seq = state.bus.current_seq();
drop_if_finished(&state.session); drop_if_finished(&state.session);
let login = *state.login.lock().unwrap(); let login = *state.login.lock().unwrap();
let session_snapshot = state.session.lock().unwrap().clone(); let session_snapshot = state.session.lock().unwrap().clone();
@ -251,6 +260,7 @@ async fn api_state(State(state): State<AppState>) -> axum::Json<StateSnapshot> {
let model = state.bus.model(); let model = state.bus.model();
let token_usage = state.bus.last_usage(); let token_usage = state.bus.last_usage();
axum::Json(StateSnapshot { axum::Json(StateSnapshot {
seq,
label: state.label.clone(), label: state.label.clone(),
dashboard_port, dashboard_port,
status, status,
@ -338,10 +348,15 @@ async fn post_send(State(state): State<AppState>, Form(form): Form<SendForm>) ->
} }
} }
async fn events_history( async fn events_history(State(state): State<AppState>) -> axum::Json<serde_json::Value> {
State(state): State<AppState>, // Capture seq *before* the read so dedupe is "drop buffered events
) -> axum::Json<Vec<crate::events::LiveEvent>> { // you've already seen in history", never "lose an event that fired
axum::Json(state.bus.history()) // between the read and the timestamp." Historical rows have no
// per-row seq; only the high-water mark matters for the dedupe
// window.
let seq = state.bus.current_seq();
let events = state.bus.history();
axum::Json(serde_json::json!({ "seq": seq, "events": events }))
} }
async fn events_stream( async fn events_stream(

View file

@ -3,6 +3,7 @@
use std::path::Path; use std::path::Path;
use std::sync::Mutex; use std::sync::Mutex;
use std::sync::atomic::{AtomicU64, Ordering};
use std::time::{SystemTime, UNIX_EPOCH}; use std::time::{SystemTime, UNIX_EPOCH};
use anyhow::{Context, Result}; use anyhow::{Context, Result};
@ -50,12 +51,14 @@ pub type DueReminder = (String, i64, String, Option<String>);
#[serde(rename_all = "snake_case", tag = "kind")] #[serde(rename_all = "snake_case", tag = "kind")]
pub enum MessageEvent { pub enum MessageEvent {
Sent { Sent {
seq: u64,
from: String, from: String,
to: String, to: String,
body: String, body: String,
at: i64, at: i64,
}, },
Delivered { Delivered {
seq: u64,
from: String, from: String,
to: String, to: String,
body: String, body: String,
@ -66,6 +69,13 @@ pub enum MessageEvent {
pub struct Broker { pub struct Broker {
conn: Mutex<Connection>, conn: Mutex<Connection>,
events: broadcast::Sender<MessageEvent>, events: broadcast::Sender<MessageEvent>,
/// Monotonic per-process counter stamped onto every emitted
/// `MessageEvent`. Persisted nowhere — clients always treat a hive-c0re
/// restart as "everything is new" (fresh snapshot, fresh stream of
/// seqs starting at 1). Historical rows replayed via `recent_all`
/// carry `seq = 0` since they predate the live stream the seq is
/// meant to dedupe against.
event_seq: AtomicU64,
} }
impl Broker { impl Broker {
@ -81,6 +91,7 @@ impl Broker {
Ok(Self { Ok(Self {
conn: Mutex::new(conn), conn: Mutex::new(conn),
events, events,
event_seq: AtomicU64::new(0),
}) })
} }
@ -88,6 +99,20 @@ impl Broker {
self.events.subscribe() self.events.subscribe()
} }
/// Current high-water seq. Snapshot endpoints read this *before*
/// gathering state so the resulting (snapshot.seq, snapshot) pair
/// satisfies: any live event with seq > snapshot.seq is post-snapshot
/// (not yet reflected); any with seq <= snapshot.seq either pre-dates
/// the snapshot or was already captured by it. Clients dedupe their
/// buffered SSE traffic against this value.
pub fn current_seq(&self) -> u64 {
self.event_seq.load(Ordering::SeqCst)
}
fn next_seq(&self) -> u64 {
self.event_seq.fetch_add(1, Ordering::SeqCst) + 1
}
pub fn send(&self, message: &Message) -> Result<()> { pub fn send(&self, message: &Message) -> Result<()> {
let conn = self.conn.lock().unwrap(); let conn = self.conn.lock().unwrap();
conn.execute( conn.execute(
@ -96,6 +121,7 @@ impl Broker {
)?; )?;
drop(conn); drop(conn);
let _ = self.events.send(MessageEvent::Sent { let _ = self.events.send(MessageEvent::Sent {
seq: self.next_seq(),
from: message.from.clone(), from: message.from.clone(),
to: message.to.clone(), to: message.to.clone(),
body: message.body.clone(), body: message.body.clone(),
@ -149,6 +175,11 @@ impl Broker {
)?; )?;
let rows = stmt.query_map(params![limit_i], |row| { let rows = stmt.query_map(params![limit_i], |row| {
Ok(MessageEvent::Sent { Ok(MessageEvent::Sent {
// Historical events: seq=0 (never compared against live
// seqs). Live dedupe windows close against
// history_seq = broker.current_seq() captured at fetch
// time, not against per-row seqs.
seq: 0,
from: row.get(0)?, from: row.get(0)?,
to: row.get(1)?, to: row.get(1)?,
body: row.get(2)?, body: row.get(2)?,
@ -256,6 +287,7 @@ impl Broker {
)?; )?;
drop(conn); drop(conn);
let _ = self.events.send(MessageEvent::Delivered { let _ = self.events.send(MessageEvent::Delivered {
seq: self.next_seq(),
from: from.clone(), from: from.clone(),
to: to.clone(), to: to.clone(),
body: body.clone(), body: body.clone(),
@ -332,6 +364,7 @@ impl Broker {
tx.commit()?; tx.commit()?;
drop(conn); drop(conn);
let _ = self.events.send(MessageEvent::Sent { let _ = self.events.send(MessageEvent::Sent {
seq: self.next_seq(),
from: "reminder".to_owned(), from: "reminder".to_owned(),
to: agent.to_owned(), to: agent.to_owned(),
body: message.to_owned(), body: message.to_owned(),

View file

@ -144,6 +144,14 @@ async fn serve_shared_js() -> impl IntoResponse {
#[derive(Serialize)] #[derive(Serialize)]
struct StateSnapshot { struct StateSnapshot {
/// Broker seq at the moment this snapshot was assembled. Clients
/// dedupe their buffered SSE traffic against this value: any
/// `MessageEvent` with `seq <= snapshot.seq` is already reflected in
/// the snapshot (or pre-dates it); anything with `seq > snapshot.seq`
/// is post-snapshot and should be applied. Set to 0 in the
/// pre-emit case (no events ever fired) — clients treat that as
/// "apply everything you've buffered".
seq: u64,
hostname: String, hostname: String,
manager_port: u16, manager_port: u16,
any_stale: bool, any_stale: bool,
@ -285,6 +293,14 @@ async fn api_state(headers: HeaderMap, State(state): State<AppState>) -> axum::J
.unwrap_or("localhost"); .unwrap_or("localhost");
let hostname = host.split(':').next().unwrap_or(host).to_owned(); let hostname = host.split(':').next().unwrap_or(host).to_owned();
// Capture the broker seq *before* any read so the dedupe contract
// is "events with seq > snapshot.seq are post-snapshot, never
// missed." A broker event landing during snapshot construction may
// be doubly applied (snapshot caught the write + client also
// applies the SSE event) — that's a renderer's problem to make
// idempotent, not ours to avoid here.
let seq = state.coord.broker.current_seq();
let raw_containers = log_default("nixos-container list", lifecycle::list().await); let raw_containers = log_default("nixos-container list", lifecycle::list().await);
let current_rev = crate::auto_update::current_flake_rev(&state.coord.hyperhive_flake); let current_rev = crate::auto_update::current_flake_rev(&state.coord.hyperhive_flake);
let transient_snapshot = state.coord.transient_snapshot(); let transient_snapshot = state.coord.transient_snapshot();
@ -319,6 +335,7 @@ async fn api_state(headers: HeaderMap, State(state): State<AppState>) -> axum::J
log_default("questions.recent_answered", state.coord.questions.recent_answered(20)); log_default("questions.recent_answered", state.coord.questions.recent_answered(20));
axum::Json(StateSnapshot { axum::Json(StateSnapshot {
seq,
hostname, hostname,
manager_port: MANAGER_PORT, manager_port: MANAGER_PORT,
any_stale, any_stale,
@ -711,15 +728,22 @@ fn dir_size_bytes(root: &Path) -> u64 {
async fn messages_history(State(state): State<AppState>) -> Response { async fn messages_history(State(state): State<AppState>) -> Response {
// Backfill source for the dashboard message-flow terminal. Returns // Backfill source for the dashboard message-flow terminal. Returns
// up to ~200 historical broker messages as `MessageEvent::Sent` JSON // up to ~200 historical broker messages as `MessageEvent::Sent` JSON
// — same shape as the live `/messages/stream`, so the renderer // wrapped in `{ seq, events }`. The seq is the broker's high water
// doesn't branch on history vs. live. // mark at fetch time; clients use it to dedupe their buffered live
// SSE traffic (drop anything with `seq <= history_seq`) so a message
// that lands between SSE-subscribe and history-fetch isn't shown
// twice and isn't lost.
const HISTORY_LIMIT: u64 = 200; const HISTORY_LIMIT: u64 = 200;
// Capture seq *before* the query so the dedupe contract is
// "drop buffered events you've already seen in history" — never
// "lose an event that fired between the read and the timestamp."
let seq = state.coord.broker.current_seq();
match state.coord.broker.recent_all(HISTORY_LIMIT) { match state.coord.broker.recent_all(HISTORY_LIMIT) {
Ok(mut events) => { Ok(mut events) => {
// recent_all returns newest-first; reverse so the replay // recent_all returns newest-first; reverse so the replay
// builds chronologically (matches the agent /events/history). // builds chronologically (matches the agent /events/history).
events.reverse(); events.reverse();
axum::Json(events).into_response() axum::Json(serde_json::json!({ "seq": seq, "events": events })).into_response()
} }
Err(e) => error_response(&format!("messages/history failed: {e:#}")), Err(e) => error_response(&format!("messages/history failed: {e:#}")),
} }

View file

@ -166,36 +166,35 @@
} }
} }
async function backfill() { // Subscribe → buffer → fetch history → dedupe → apply.
if (!opts.historyUrl) { //
if (opts.onBackfillDone) opts.onBackfillDone(0); // Race the SSE subscription opens before the history fetch starts.
return; // Live events that land before history resolves are buffered, not
} // rendered. Once the history response (`{ seq, events }`) arrives we:
try { // 1. Replay `events` (fromHistory=true).
const resp = await fetch(opts.historyUrl); // 2. Drop buffered events with `seq <= history.seq` — they're
if (!resp.ok) { // already reflected in the history rows above.
if (opts.onBackfillDone) opts.onBackfillDone(0); // 3. Apply remaining buffered events (fromHistory=false).
return; // 4. Switch to live mode: each new SSE event dispatches immediately.
} //
const events = await resp.json(); // Without this dance an event that fires between history-fetch and
currentNoAnim = true; // SSE-subscribe goes missing; without seq dedupe the same event
for (const ev of events) dispatch(ev, true); // shows twice (once via history, once via live buffer). Both bugs
currentNoAnim = false; // were latent before.
if (events.length) row('note', '─── live (older above) ───'); //
else placeholder('(connected — waiting for events)'); // If `historyUrl` is unset we skip the dance: buffered events apply
if (opts.onBackfillDone) opts.onBackfillDone(events.length); // as live the moment the buffer flushes (no dedupe possible without
} catch (err) { // a boundary seq).
console.warn('history backfill failed', err); function start() {
if (opts.onBackfillDone) opts.onBackfillDone(0); let live = false;
} let buffered = [];
}
function subscribe() {
const es = new EventSource(opts.streamUrl); const es = new EventSource(opts.streamUrl);
es.onmessage = (e) => { es.onmessage = (e) => {
let ev; let ev;
try { ev = JSON.parse(e.data); } try { ev = JSON.parse(e.data); }
catch (err) { row('note', '[parse err] ' + e.data); return; } catch (err) { row('note', '[parse err] ' + e.data); return; }
if (!live) { buffered.push(ev); return; }
dispatch(ev, false); dispatch(ev, false);
if (opts.onLiveEvent) { if (opts.onLiveEvent) {
try { opts.onLiveEvent(ev); } try { opts.onLiveEvent(ev); }
@ -206,10 +205,62 @@
if (es.readyState === EventSource.CONNECTING) row('note', '[reconnecting…]'); if (es.readyState === EventSource.CONNECTING) row('note', '[reconnecting…]');
else row('note', '[disconnected]'); else row('note', '[disconnected]');
}; };
return es;
function flushBuffered(boundarySeq) {
const drained = buffered;
buffered = [];
live = true;
for (const ev of drained) {
// ev.seq is set by the server on live frames; absent/0 means
// "no dedupe possible, apply." Historical replays via the
// history endpoint carry no seq either way.
if (boundarySeq != null && typeof ev.seq === 'number' && ev.seq <= boundarySeq) {
continue;
}
dispatch(ev, false);
if (opts.onLiveEvent) {
try { opts.onLiveEvent(ev); }
catch (err) { console.error('onLiveEvent threw', err); }
}
}
}
async function backfill() {
if (!opts.historyUrl) {
flushBuffered(null);
if (opts.onBackfillDone) opts.onBackfillDone(0);
return;
}
try {
const resp = await fetch(opts.historyUrl);
if (!resp.ok) {
flushBuffered(null);
if (opts.onBackfillDone) opts.onBackfillDone(0);
return;
}
const body = await resp.json();
// Accept the envelope `{ seq, events }`. A bare array means
// the server hasn't been updated to include seq yet — treat
// it as "no dedupe possible."
const events = Array.isArray(body) ? body : (body.events || []);
const boundarySeq = Array.isArray(body) ? null : (body.seq ?? null);
currentNoAnim = true;
for (const ev of events) dispatch(ev, true);
currentNoAnim = false;
if (events.length) row('note', '─── live (older above) ───');
else placeholder('(connected — waiting for events)');
flushBuffered(boundarySeq);
if (opts.onBackfillDone) opts.onBackfillDone(events.length);
} catch (err) {
console.warn('history backfill failed', err);
flushBuffered(null);
if (opts.onBackfillDone) opts.onBackfillDone(0);
}
}
return backfill();
} }
const ready = backfill().then(subscribe); const ready = start();
return { row, details, detailsDiff, placeholder, ready }; return { row, details, detailsDiff, placeholder, ready };
} }