broker: recv_batch(max) — drain a bursty inbox in one round-trip

This commit is contained in:
damocles 2026-05-19 00:40:31 +02:00
parent 96ffb0e39a
commit 77b89bf2c6
9 changed files with 354 additions and 11 deletions

View file

@ -87,6 +87,15 @@ async fn serve(stream: UnixStream, agent: String, coord: Arc<Coordinator>) -> Re
/// positive `wait_seconds`.
const RECV_LONG_POLL_MAX: std::time::Duration = std::time::Duration::from_secs(180);
/// Server-side hard cap on `RecvBatch.max`. Bounds the size of a
/// single round-trip so a confused caller can't drain the entire
/// inbox in one go and blow past wire-buffer sizes; everything above
/// the cap silently clamps. 32 is comfortably above the burst sizes
/// we've seen in practice (post-rebuild rescue, multi-agent reply
/// storms) and well under the per-message `MESSAGE_MAX_BYTES` * N
/// envelope budget.
const RECV_BATCH_MAX: u32 = 32;
fn recv_timeout(wait_seconds: Option<u64>) -> std::time::Duration {
match wait_seconds {
Some(s) => std::time::Duration::from_secs(s).min(RECV_LONG_POLL_MAX),
@ -114,6 +123,25 @@ async fn dispatch(req: &AgentRequest, agent: &str, coord: &Arc<Coordinator>) ->
message: format!("{e:#}"),
},
},
AgentRequest::RecvBatch { max } => {
let cap = (*max).min(RECV_BATCH_MAX) as usize;
match broker.recv_batch(agent, cap) {
Ok(deliveries) => AgentResponse::Batch {
messages: deliveries
.into_iter()
.map(|d| hive_sh4re::DeliveredMessage {
from: d.message.from,
body: d.message.body,
id: d.id,
redelivered: d.redelivered,
})
.collect(),
},
Err(e) => AgentResponse::Err {
message: format!("{e:#}"),
},
}
}
AgentRequest::Status => match broker.count_pending(agent) {
Ok(unread) => AgentResponse::Status { unread },
Err(e) => AgentResponse::Err {

View file

@ -362,6 +362,85 @@ impl Broker {
}))
}
/// Pop up to `max` pending messages for `recipient` in one
/// round-trip. Same per-row semantics as `recv`: every popped row
/// is marked `delivered_at = NOW`, pushed onto the per-recipient
/// `unacked_ids` list (so the next `ack_turn` closes them out),
/// and tagged with `redelivered = true` if it was resurfaced by
/// the most recent `requeue_inflight`. Emits one
/// `MessageEvent::Delivered` per popped row so the dashboard
/// forwarder stream stays consistent with the single-row path.
///
/// `max == 0` short-circuits to an empty vec (no DB hit); any
/// positive value caps the batch at `max`. FIFO order matches
/// `recv`.
pub fn recv_batch(&self, recipient: &str, max: usize) -> Result<Vec<Delivery>> {
if max == 0 {
return Ok(Vec::new());
}
// Same lock order as `recv` / `ack_turn` / `requeue_inflight`.
let mut inflight = self.inflight.lock().unwrap();
let conn = self.conn.lock().unwrap();
let max_i = i64::try_from(max).unwrap_or(i64::MAX);
let mut stmt = conn.prepare(
"SELECT id, sender, recipient, body
FROM messages
WHERE recipient = ?1 AND delivered_at IS NULL
ORDER BY id ASC
LIMIT ?2",
)?;
let rows: Vec<(i64, String, String, String)> = stmt
.query_map(params![recipient, max_i], |row| {
Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?))
})?
.collect::<rusqlite::Result<_>>()?;
drop(stmt);
if rows.is_empty() {
return Ok(Vec::new());
}
// Stamp all popped rows in a single UPDATE — under the broker
// mutex, well within sqlite's 999-param default.
let now = now_unix();
let ids: Vec<i64> = rows.iter().map(|(id, _, _, _)| *id).collect();
let placeholders = std::iter::repeat_n("?", ids.len())
.collect::<Vec<_>>()
.join(",");
let sql = format!("UPDATE messages SET delivered_at = ? WHERE id IN ({placeholders})");
let mut params_vec: Vec<&dyn rusqlite::ToSql> = Vec::with_capacity(ids.len() + 1);
params_vec.push(&now);
for id in &ids {
params_vec.push(id);
}
conn.execute(&sql, params_vec.as_slice())?;
drop(conn);
// Bookkeeping + assemble the Delivery list. Per-row
// `requeued_ids` lookup runs once per pop, same as `recv`.
let slot = inflight.entry(recipient.to_owned()).or_default();
let mut deliveries = Vec::with_capacity(rows.len());
for (id, from, to, body) in rows {
slot.unacked_ids.push(id);
let redelivered = slot.requeued_ids.remove(&id);
deliveries.push(Delivery {
id,
redelivered,
message: Message { from, to, body },
});
}
drop(inflight);
// Mirror the per-row Delivered emit `recv` does so the
// dashboard forwarder sees one event per message regardless of
// which surface the harness used.
for d in &deliveries {
let _ = self.events.send(MessageEvent::Delivered {
from: d.message.from.clone(),
to: d.message.to.clone(),
body: d.message.body.clone(),
at: now,
});
}
Ok(deliveries)
}
/// Drain the per-recipient unacked-id list and mark every row
/// `acked_at = NOW`. Fired by the harness after `TurnOutcome::Ok`.
/// Returns the number of rows acked (zero is normal — claude
@ -867,6 +946,70 @@ mod tests {
);
}
/// Happy path for `recv_batch`: pops in FIFO order, respects
/// `max`, leaves the rest pending for the next call.
#[test]
fn recv_batch_pops_fifo_capped_at_max() {
let h = open_broker();
let broker = &h.broker;
for i in 0..5 {
broker.send(&msg("a", "b", &format!("m{i}"))).unwrap();
}
let batch = broker.recv_batch("b", 3).unwrap();
let bodies: Vec<_> = batch.iter().map(|d| d.message.body.as_str()).collect();
assert_eq!(bodies, vec!["m0", "m1", "m2"]);
// Remaining two stay pending; a second batch drains them.
let next = broker.recv_batch("b", 10).unwrap();
let bodies: Vec<_> = next.iter().map(|d| d.message.body.as_str()).collect();
assert_eq!(bodies, vec!["m3", "m4"]);
// ack_turn closes out all five popped rows in one go.
assert_eq!(broker.ack_turn("b").unwrap(), 5);
}
/// `recv_batch` with no pending traffic returns an empty vec
/// (the "(empty)" path), not an error.
#[test]
fn recv_batch_returns_empty_when_idle() {
let h = open_broker();
let batch = h.broker.recv_batch("ghost", 5).unwrap();
assert!(batch.is_empty());
}
/// `max = 0` short-circuits without touching the DB (covered by
/// asserting we don't accidentally pop a pending row).
#[test]
fn recv_batch_zero_max_pops_nothing() {
let h = open_broker();
let broker = &h.broker;
broker.send(&msg("a", "b", "stay")).unwrap();
assert!(broker.recv_batch("b", 0).unwrap().is_empty());
// The pending row is still in flight for the next real recv.
let d = broker.recv("b").unwrap().expect("still pending");
assert_eq!(d.message.body, "stay");
}
/// `recv_batch` tags requeued rows with `redelivered: true` and
/// drains them from the per-recipient `requeued_ids` set so a
/// fresh follow-up recv after the batch doesn't double-tag.
#[test]
fn recv_batch_propagates_redelivered_flag() {
let h = open_broker();
let broker = &h.broker;
broker.send(&msg("a", "b", "one")).unwrap();
broker.send(&msg("a", "b", "two")).unwrap();
broker.recv("b").unwrap().expect("popped 1");
broker.recv("b").unwrap().expect("popped 2");
broker.requeue_inflight("b").unwrap();
let batch = broker.recv_batch("b", 5).unwrap();
assert_eq!(batch.len(), 2);
assert!(batch.iter().all(|d| d.redelivered));
// Fresh send after the batch is NOT tagged redelivered.
broker.send(&msg("a", "b", "three")).unwrap();
let d = broker.recv("b").unwrap().expect("re-pop 3");
assert_eq!(d.message.body, "three");
assert!(!d.redelivered);
}
/// Per-recipient isolation: `requeue_inflight("a")` doesn't touch
/// b's inflight rows.
#[test]

View file

@ -75,6 +75,11 @@ async fn serve(stream: UnixStream, coord: Arc<Coordinator>) -> Result<()> {
/// seconds (clamped at MAX).
const MANAGER_RECV_LONG_POLL_MAX: std::time::Duration = std::time::Duration::from_secs(180);
/// Same shape + rationale as `agent_server::RECV_BATCH_MAX`. Kept
/// numerically aligned across surfaces so a tool description that
/// quotes the cap stays accurate either way.
const MANAGER_RECV_BATCH_MAX: u32 = 32;
fn manager_recv_timeout(wait_seconds: Option<u64>) -> std::time::Duration {
match wait_seconds {
Some(s) => std::time::Duration::from_secs(s).min(MANAGER_RECV_LONG_POLL_MAX),
@ -149,6 +154,25 @@ async fn dispatch(req: &ManagerRequest, coord: &Arc<Coordinator>) -> ManagerResp
message: format!("{e:#}"),
},
},
ManagerRequest::RecvBatch { max } => {
let cap = (*max).min(MANAGER_RECV_BATCH_MAX) as usize;
match coord.broker.recv_batch(MANAGER_AGENT, cap) {
Ok(deliveries) => ManagerResponse::Batch {
messages: deliveries
.into_iter()
.map(|d| hive_sh4re::DeliveredMessage {
from: d.message.from,
body: d.message.body,
id: d.id,
redelivered: d.redelivered,
})
.collect(),
},
Err(e) => ManagerResponse::Err {
message: format!("{e:#}"),
},
}
}
ManagerRequest::RequestSpawn { name, description } => {
tracing::info!(%name, "manager: request_spawn");
match coord.approvals.submit_kind(