From 25659ee9f3e3a094ff8a567fc4ec95d5a8284f4e Mon Sep 17 00:00:00 2001 From: damocles Date: Wed, 20 May 2026 14:41:49 +0200 Subject: [PATCH] auto session-reset: drop checkpoint turn, reset cold without warming cache --- hive-ag3nt/src/turn.rs | 64 +++++++++++++----------------------------- 1 file changed, 19 insertions(+), 45 deletions(-) diff --git a/hive-ag3nt/src/turn.rs b/hive-ag3nt/src/turn.rs index 551a9d6..d42bc7c 100644 --- a/hive-ag3nt/src/turn.rs +++ b/hive-ag3nt/src/turn.rs @@ -56,39 +56,25 @@ const DEFAULT_RATE_LIMIT_SLEEP_SECS: u64 = 300; /// Token watermark for *auto session-reset*. When context is at or above this /// many tokens AND the prompt cache has gone cold (idle time >= `CACHE_TTL_SECS`), -/// the harness runs a notes-checkpoint turn (so the agent can persist state) -/// then drops `--continue` so the next turn starts with a clean, compact -/// context. The checkpoint re-uploads the full transcript either way, so the -/// purpose is state preservation (not cost savings). Default is ~50% of a -/// 200k-token window; override via `HIVE_AUTO_RESET_WATERMARK_TOKENS`, or set -/// to `0` to disable. +/// the harness drops `--continue` so the next turn starts fresh. Running any +/// turn (even a checkpoint) before the reset would re-upload the full context +/// and warm the cache, defeating the cost purpose — so the reset happens +/// immediately with no preceding turn. Default is ~50% of a 200k-token +/// window; override via `HIVE_AUTO_RESET_WATERMARK_TOKENS`, or set to `0` +/// to disable. const DEFAULT_AUTO_RESET_WATERMARK_TOKENS: u64 = 100_000; /// Assumed prompt-cache TTL. Claude caches prompt prefixes — ~5 minutes on /// the API (pay-per-token), ~1 hour on Claude Max (subscription). When the /// idle gap exceeds this, the cache prefix has likely expired and the next /// turn re-uploads the full transcript regardless of whether we resume or -/// start fresh. A fresh session with a small context is therefore just as -/// cheap AND gives the model a clean slate. Default is 3600s (1h) matching -/// the subscription TTL. API (pay-per-token) users should set -/// `HIVE_CACHE_TTL_SECS=300` to match their shorter cache window. Override -/// via `HIVE_CACHE_TTL_SECS`; set to `0` to disable (always resume). +/// start fresh. A fresh session with a small context is therefore equally +/// cheap but gives the model a clean slate. Default is 3600s (1h) matching +/// the subscription TTL; API (pay-per-token) users should set +/// `HIVE_CACHE_TTL_SECS=300`. Override via `HIVE_CACHE_TTL_SECS`; set to +/// `0` to disable (always resume). const DEFAULT_CACHE_TTL_SECS: u64 = 3600; -/// Synthetic checkpoint prompt injected before an auto-reset turn. Unlike the -/// proactive-compaction checkpoint (which is followed by `/compact`), this one -/// is followed by a fresh-session turn, so the wording focuses on "write now -/// so you can read it back cleanly" rather than "/compact is coming". -const AUTO_RESET_CHECKPOINT_PROMPT: &str = - "[system] Context checkpoint before session auto-reset — no inbox message to handle.\n\n\ -The harness detected that the prompt-cache TTL has elapsed while your context is large. \ -The session will be reset after this turn so you start the next message with a clean, \ -compact context instead of a long accumulated transcript.\n\n\ -Use THIS turn to flush anything worth keeping into your durable state files: update \ -your notes / TODO.md with in-flight task state, open questions, important paths, and \ -anything else you would need to resume cleanly from a fresh session. Do not start \ -new work or reply to anyone — just write your notes and end the turn."; - /// Token watermark for *proactive* compaction. Once a turn finishes with /// the last inference's context size at or above this many tokens, /// `drive_turn` runs one dedicated notes-checkpoint turn (so the agent @@ -275,7 +261,7 @@ fn compact_watermark_tokens() -> u64 { /// /// Both the sub-agent and manager loops call this. pub async fn drive_turn(prompt: &str, files: &TurnFiles, bus: &Bus) -> TurnOutcome { - maybe_auto_reset(files, bus).await; + maybe_auto_reset(bus); let outcome = match run_turn(prompt, files, bus).await { TurnOutcome::PromptTooLong => { if let Err(e) = compact_session(files, bus).await { @@ -345,11 +331,11 @@ async fn maybe_checkpoint_and_compact(files: &TurnFiles, bus: &Bus) { } /// Pre-turn auto-reset check. If context is large AND the prompt cache has -/// gone cold (idle time ≥ cache TTL), run a notes-checkpoint turn so the agent -/// can persist durable state, then arm `request_new_session` so the actual -/// wake-up turn starts fresh. Best-effort — a failed checkpoint is noted and -/// the reset proceeds anyway (agent still gets a clean session). -async fn maybe_auto_reset(files: &TurnFiles, bus: &Bus) { +/// gone cold (idle time >= cache TTL), arm `request_new_session` so the +/// next wake-up turn starts fresh. No preceding checkpoint turn — running +/// any turn before the reset would re-upload and re-warm the cache, which +/// defeats the cost-optimisation purpose entirely. +fn maybe_auto_reset(bus: &Bus) { let watermark = auto_reset_watermark_tokens(); if watermark == 0 { return; // auto-reset disabled @@ -376,22 +362,10 @@ async fn maybe_auto_reset(files: &TurnFiles, bus: &Bus) { } bus.emit(LiveEvent::Note { text: format!( - "context {ctx_tokens} tokens, idle {idle_secs}s ≥ cache TTL {ttl}s \ - — running checkpoint then auto-resetting session" + "context {ctx_tokens} tokens, idle {idle_secs}s >= cache TTL {ttl}s \ + — dropping session (cache cold, fresh start is equally cheap)" ), }); - match run_turn(AUTO_RESET_CHECKPOINT_PROMPT, files, bus).await { - TurnOutcome::Ok => {} - TurnOutcome::PromptTooLong => bus.emit(LiveEvent::Note { - text: "auto-reset checkpoint overflowed — resetting without checkpoint".into(), - }), - TurnOutcome::RateLimited => bus.emit(LiveEvent::Note { - text: "auto-reset checkpoint was rate-limited — resetting anyway".into(), - }), - TurnOutcome::Failed(e) => bus.emit(LiveEvent::Note { - text: format!("auto-reset checkpoint failed ({e:#}) — resetting anyway"), - }), - } bus.request_new_session(); }