add proactive context-size compaction with a notes-checkpoint turn

This commit is contained in:
damocles 2026-05-20 12:38:50 +02:00 committed by Mara
parent f2015954d9
commit 9cbb05bb86
3 changed files with 152 additions and 9 deletions

View file

@ -34,6 +34,33 @@ const CLAUDE_SETTINGS: &str = include_str!("../prompts/claude-settings.json");
/// claude exit with a useful error in the live view.
const PROMPT_TOO_LONG_MARKER: &str = "Prompt is too long";
/// Token watermark for *proactive* compaction. Once a turn finishes with
/// the last inference's context size at or above this many tokens,
/// `drive_turn` runs one dedicated notes-checkpoint turn (so the agent
/// can flush durable state into `/state`) and then `/compact` — while the
/// session is still healthy enough to run a turn at all. This is distinct
/// from the reactive `PROMPT_TOO_LONG_MARKER` path, which only fires once
/// the session is *already* past the window: at that point no turn can
/// run on it, so the reactive path just compacts + retries with no
/// checkpoint. Default is ~75% of a 200k-token window; override via
/// `HIVE_COMPACT_WATERMARK_TOKENS`, or set that to `0` to disable
/// proactive compaction entirely (the reactive path always applies).
const DEFAULT_COMPACT_WATERMARK_TOKENS: u64 = 150_000;
/// Synthetic wake prompt for the proactive notes-checkpoint turn. Not an
/// inbox message — the harness injects it directly so the agent gets one
/// turn to persist durable state before `/compact` collapses the
/// turn-by-turn history into a summary.
const CHECKPOINT_PROMPT: &str = "[system] Context checkpoint — no inbox message to handle.\n\n\
Your conversation context has grown large and the harness is about to run `/compact`, \
which collapses the detailed turn-by-turn history into a short summary. Anything you \
do not persist now is effectively lost after the next turn.\n\n\
Use THIS turn to flush anything worth keeping into your durable `/state` files: update \
your notes / CLAUDE.md / TODO.md with in-flight task state, decisions made, important \
file paths, and whatever you would need to resume cleanly with only a summary of this \
conversation to go on. Do not start new work or reply to anyone just write your notes \
and end the turn.";
/// The set of files claude reads on every invocation: the MCP server
/// config (`--mcp-config`), static settings (`--settings`), and the
/// pre-rendered role/tools system prompt (`--system-prompt-file`).
@ -129,10 +156,32 @@ pub enum TurnOutcome {
Failed(anyhow::Error),
}
/// Drive one turn end-to-end, transparently compacting + retrying once on
/// `Prompt is too long`. Both the sub-agent and manager loops call this.
/// Resolve the proactive-compaction watermark: `HIVE_COMPACT_WATERMARK_TOKENS`
/// if set to a valid integer, else `DEFAULT_COMPACT_WATERMARK_TOKENS`. A
/// value of `0` disables proactive compaction.
fn compact_watermark_tokens() -> u64 {
std::env::var("HIVE_COMPACT_WATERMARK_TOKENS")
.ok()
.and_then(|s| s.trim().parse::<u64>().ok())
.unwrap_or(DEFAULT_COMPACT_WATERMARK_TOKENS)
}
/// Drive one turn end-to-end. Two compaction paths layer on top of the
/// raw `run_turn`:
///
/// - **Reactive** — `run_turn` returns `PromptTooLong`: the session is
/// already past the context window and *no* turn can run on it, so we
/// compact immediately and retry the same wake-up prompt once. No
/// notes-checkpoint turn is possible here — the detail is gone.
/// - **Proactive** — the turn finished cleanly but its context size has
/// crept past the watermark: while the session is still healthy we
/// give the agent one dedicated turn to checkpoint its `/state` notes,
/// then compact. This keeps a later turn from hitting the reactive
/// path (where there is no chance to save anything first).
///
/// Both the sub-agent and manager loops call this.
pub async fn drive_turn(prompt: &str, files: &TurnFiles, bus: &Bus) -> TurnOutcome {
match run_turn(prompt, files, bus).await {
let outcome = match run_turn(prompt, files, bus).await {
TurnOutcome::PromptTooLong => {
if let Err(e) = compact_session(files, bus).await {
tracing::warn!(error = %format!("{e:#}"), "compact failed");
@ -141,6 +190,56 @@ pub async fn drive_turn(prompt: &str, files: &TurnFiles, bus: &Bus) -> TurnOutco
run_turn(prompt, files, bus).await
}
other => other,
};
// Proactive: a turn just completed on a still-healthy session. If its
// context crossed the watermark, checkpoint + compact before a later
// turn overflows into the reactive path. Best-effort — never changes
// the outcome of the turn that already succeeded.
if matches!(outcome, TurnOutcome::Ok) {
maybe_checkpoint_and_compact(files, bus).await;
}
outcome
}
/// Proactive post-turn compaction. If the last inference's context size
/// has crossed the watermark, run one notes-checkpoint turn so the agent
/// can persist durable state, then `/compact`. Best-effort: a failed
/// checkpoint or compaction is logged + surfaced as a Note but never
/// fails the turn that already succeeded.
async fn maybe_checkpoint_and_compact(files: &TurnFiles, bus: &Bus) {
let watermark = compact_watermark_tokens();
if watermark == 0 {
return; // proactive compaction disabled
}
let Some(used) = bus.last_ctx_usage().map(|u| u.context_tokens()) else {
return; // no usage reading yet — nothing to compare against
};
if used < watermark {
return;
}
bus.emit(LiveEvent::Note {
text: format!(
"context at {used} tokens (watermark {watermark}) — running a \
notes-checkpoint turn before /compact"
),
});
// Give the agent one turn to flush durable state into /state. If the
// session is somehow already too far gone to run even this, fall
// through to compaction anyway — the checkpoint is best-effort.
match run_turn(CHECKPOINT_PROMPT, files, bus).await {
TurnOutcome::Ok => {}
TurnOutcome::PromptTooLong => bus.emit(LiveEvent::Note {
text: "checkpoint turn overflowed the window — compacting without it".into(),
}),
TurnOutcome::Failed(e) => bus.emit(LiveEvent::Note {
text: format!("checkpoint turn failed ({e:#}) — compacting anyway"),
}),
}
if let Err(e) = compact_session(files, bus).await {
tracing::warn!(error = %format!("{e:#}"), "post-checkpoint compact failed");
bus.emit(LiveEvent::Note {
text: format!("/compact after checkpoint failed: {e:#}"),
});
}
}