broker: lease-style delivery — ack_turn + requeue_inflight close the no-drop loop

This commit is contained in:
damocles 2026-05-18 22:01:48 +02:00
parent 69a3ca7469
commit 690cb5ab5b
8 changed files with 684 additions and 35 deletions

View file

@ -358,6 +358,28 @@ pub enum AgentRequest {
/// row. The manager surface uses the same wire variant but
/// accepts any id.
CancelLooseEnd { kind: CancelLooseEndKind, id: i64 },
/// Mark every message popped by this agent since the last `AckTurn`
/// as fully handled. Fired by the harness after `TurnOutcome::Ok`
/// — claude doesn't see this surface, it's harness↔broker only.
/// On `TurnOutcome::Failed` the harness intentionally skips this
/// call, so the unacked rows stay in-flight in the DB and get
/// requeued by the next `RequeueInflight` on harness boot. Tracks
/// the popped-id list in-memory on the broker side; no payload
/// needed (the broker knows which ids it handed to this
/// recipient).
AckTurn,
/// Requeue every message the broker handed to this agent that
/// never got acked. Fired by the harness exactly once at boot,
/// before entering the serve loop — catches the
/// crashed-mid-turn / OOM-killed / container-restarted cases
/// where a previous harness session popped messages but never
/// drove them to a clean turn-end. Resets `delivered_at` on each
/// row back to NULL (so the next `Recv` pops it) and remembers
/// the id in a per-recipient in-memory set so the next `Recv`
/// can tag the message with `redelivered: true` (the harness
/// then prepends a "may already be handled" hint to the wake
/// prompt). Idempotent + cheap when there's nothing in flight.
RequeueInflight,
}
/// Responses on a per-agent socket.
@ -368,8 +390,22 @@ pub enum AgentResponse {
Ok,
/// Either `Send` failed or `Recv` errored.
Err { message: String },
/// `Recv` produced a message.
Message { from: String, body: String },
/// `Recv` produced a message. `id` is the broker's row id — opaque
/// to claude (the MCP surface strips it before handing the body
/// to the model) but tracked by the harness so the broker's
/// in-memory unacked list can be drained on `AckTurn`. When
/// `redelivered = true` this row was popped earlier, never
/// acked (turn crash / OOM / restart), and resurfaced by
/// `RequeueInflight` — the harness prepends a "may already be
/// handled" hint to the wake prompt so claude can DTRT.
Message {
from: String,
body: String,
#[serde(default)]
id: i64,
#[serde(default)]
redelivered: bool,
},
/// `Recv` found nothing pending.
Empty,
/// `Status` result: how many pending messages are in this agent's inbox.
@ -668,6 +704,13 @@ pub enum ManagerRequest {
/// can cancel any row (no owner check) — same dispatch as
/// `AgentRequest::CancelLooseEnd` but with privileged auth.
CancelLooseEnd { kind: CancelLooseEndKind, id: i64 },
/// Mirror of `AgentRequest::AckTurn` on the manager surface — fired
/// by the manager harness after `TurnOutcome::Ok` to close out
/// every message popped during the turn.
AckTurn,
/// Mirror of `AgentRequest::RequeueInflight` on the manager
/// surface — fired exactly once on manager harness boot.
RequeueInflight,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
@ -677,9 +720,18 @@ pub enum ManagerResponse {
Err {
message: String,
},
/// Same delivery shape as `AgentResponse::Message` — `id` +
/// `redelivered` carry the broker's row id and the
/// "previously popped, not acked" flag through the manager
/// surface so the manager harness drives the same
/// requeue-with-hint flow as a sub-agent.
Message {
from: String,
body: String,
#[serde(default)]
id: i64,
#[serde(default)]
redelivered: bool,
},
Empty,
Status {