auto session-reset: drop checkpoint turn, reset cold without warming cache

This commit is contained in:
damocles 2026-05-20 14:41:49 +02:00 committed by Mara
parent fb951c1004
commit 25659ee9f3

View file

@ -56,39 +56,25 @@ const DEFAULT_RATE_LIMIT_SLEEP_SECS: u64 = 300;
/// Token watermark for *auto session-reset*. When context is at or above this /// Token watermark for *auto session-reset*. When context is at or above this
/// many tokens AND the prompt cache has gone cold (idle time >= `CACHE_TTL_SECS`), /// many tokens AND the prompt cache has gone cold (idle time >= `CACHE_TTL_SECS`),
/// the harness runs a notes-checkpoint turn (so the agent can persist state) /// the harness drops `--continue` so the next turn starts fresh. Running any
/// then drops `--continue` so the next turn starts with a clean, compact /// turn (even a checkpoint) before the reset would re-upload the full context
/// context. The checkpoint re-uploads the full transcript either way, so the /// and warm the cache, defeating the cost purpose — so the reset happens
/// purpose is state preservation (not cost savings). Default is ~50% of a /// immediately with no preceding turn. Default is ~50% of a 200k-token
/// 200k-token window; override via `HIVE_AUTO_RESET_WATERMARK_TOKENS`, or set /// window; override via `HIVE_AUTO_RESET_WATERMARK_TOKENS`, or set to `0`
/// to `0` to disable. /// to disable.
const DEFAULT_AUTO_RESET_WATERMARK_TOKENS: u64 = 100_000; const DEFAULT_AUTO_RESET_WATERMARK_TOKENS: u64 = 100_000;
/// Assumed prompt-cache TTL. Claude caches prompt prefixes — ~5 minutes on /// Assumed prompt-cache TTL. Claude caches prompt prefixes — ~5 minutes on
/// the API (pay-per-token), ~1 hour on Claude Max (subscription). When the /// the API (pay-per-token), ~1 hour on Claude Max (subscription). When the
/// idle gap exceeds this, the cache prefix has likely expired and the next /// idle gap exceeds this, the cache prefix has likely expired and the next
/// turn re-uploads the full transcript regardless of whether we resume or /// turn re-uploads the full transcript regardless of whether we resume or
/// start fresh. A fresh session with a small context is therefore just as /// start fresh. A fresh session with a small context is therefore equally
/// cheap AND gives the model a clean slate. Default is 3600s (1h) matching /// cheap but gives the model a clean slate. Default is 3600s (1h) matching
/// the subscription TTL. API (pay-per-token) users should set /// the subscription TTL; API (pay-per-token) users should set
/// `HIVE_CACHE_TTL_SECS=300` to match their shorter cache window. Override /// `HIVE_CACHE_TTL_SECS=300`. Override via `HIVE_CACHE_TTL_SECS`; set to
/// via `HIVE_CACHE_TTL_SECS`; set to `0` to disable (always resume). /// `0` to disable (always resume).
const DEFAULT_CACHE_TTL_SECS: u64 = 3600; const DEFAULT_CACHE_TTL_SECS: u64 = 3600;
/// Synthetic checkpoint prompt injected before an auto-reset turn. Unlike the
/// proactive-compaction checkpoint (which is followed by `/compact`), this one
/// is followed by a fresh-session turn, so the wording focuses on "write now
/// so you can read it back cleanly" rather than "/compact is coming".
const AUTO_RESET_CHECKPOINT_PROMPT: &str =
"[system] Context checkpoint before session auto-reset — no inbox message to handle.\n\n\
The harness detected that the prompt-cache TTL has elapsed while your context is large. \
The session will be reset after this turn so you start the next message with a clean, \
compact context instead of a long accumulated transcript.\n\n\
Use THIS turn to flush anything worth keeping into your durable state files: update \
your notes / TODO.md with in-flight task state, open questions, important paths, and \
anything else you would need to resume cleanly from a fresh session. Do not start \
new work or reply to anyone just write your notes and end the turn.";
/// Token watermark for *proactive* compaction. Once a turn finishes with /// Token watermark for *proactive* compaction. Once a turn finishes with
/// the last inference's context size at or above this many tokens, /// the last inference's context size at or above this many tokens,
/// `drive_turn` runs one dedicated notes-checkpoint turn (so the agent /// `drive_turn` runs one dedicated notes-checkpoint turn (so the agent
@ -275,7 +261,7 @@ fn compact_watermark_tokens() -> u64 {
/// ///
/// Both the sub-agent and manager loops call this. /// Both the sub-agent and manager loops call this.
pub async fn drive_turn(prompt: &str, files: &TurnFiles, bus: &Bus) -> TurnOutcome { pub async fn drive_turn(prompt: &str, files: &TurnFiles, bus: &Bus) -> TurnOutcome {
maybe_auto_reset(files, bus).await; maybe_auto_reset(bus);
let outcome = match run_turn(prompt, files, bus).await { let outcome = match run_turn(prompt, files, bus).await {
TurnOutcome::PromptTooLong => { TurnOutcome::PromptTooLong => {
if let Err(e) = compact_session(files, bus).await { if let Err(e) = compact_session(files, bus).await {
@ -345,11 +331,11 @@ async fn maybe_checkpoint_and_compact(files: &TurnFiles, bus: &Bus) {
} }
/// Pre-turn auto-reset check. If context is large AND the prompt cache has /// Pre-turn auto-reset check. If context is large AND the prompt cache has
/// gone cold (idle time ≥ cache TTL), run a notes-checkpoint turn so the agent /// gone cold (idle time >= cache TTL), arm `request_new_session` so the
/// can persist durable state, then arm `request_new_session` so the actual /// next wake-up turn starts fresh. No preceding checkpoint turn — running
/// wake-up turn starts fresh. Best-effort — a failed checkpoint is noted and /// any turn before the reset would re-upload and re-warm the cache, which
/// the reset proceeds anyway (agent still gets a clean session). /// defeats the cost-optimisation purpose entirely.
async fn maybe_auto_reset(files: &TurnFiles, bus: &Bus) { fn maybe_auto_reset(bus: &Bus) {
let watermark = auto_reset_watermark_tokens(); let watermark = auto_reset_watermark_tokens();
if watermark == 0 { if watermark == 0 {
return; // auto-reset disabled return; // auto-reset disabled
@ -376,22 +362,10 @@ async fn maybe_auto_reset(files: &TurnFiles, bus: &Bus) {
} }
bus.emit(LiveEvent::Note { bus.emit(LiveEvent::Note {
text: format!( text: format!(
"context {ctx_tokens} tokens, idle {idle_secs}s cache TTL {ttl}s \ "context {ctx_tokens} tokens, idle {idle_secs}s >= cache TTL {ttl}s \
running checkpoint then auto-resetting session" dropping session (cache cold, fresh start is equally cheap)"
), ),
}); });
match run_turn(AUTO_RESET_CHECKPOINT_PROMPT, files, bus).await {
TurnOutcome::Ok => {}
TurnOutcome::PromptTooLong => bus.emit(LiveEvent::Note {
text: "auto-reset checkpoint overflowed — resetting without checkpoint".into(),
}),
TurnOutcome::RateLimited => bus.emit(LiveEvent::Note {
text: "auto-reset checkpoint was rate-limited — resetting anyway".into(),
}),
TurnOutcome::Failed(e) => bus.emit(LiveEvent::Note {
text: format!("auto-reset checkpoint failed ({e:#}) — resetting anyway"),
}),
}
bus.request_new_session(); bus.request_new_session();
} }