auto session-reset: drop checkpoint turn, reset cold without warming cache

2026-05-20 14:41:49 +02:00 · 2026-05-20 14:41:49 +02:00 · 25659ee9f3
commit 25659ee9f3
parent fb951c1004
1 changed files with 19 additions and 45 deletions
--- a/hive-ag3nt/src/turn.rs
+++ b/hive-ag3nt/src/turn.rs
@ -56,39 +56,25 @@ const DEFAULT_RATE_LIMIT_SLEEP_SECS: u64 = 300;
 /// Token watermark for *auto session-reset*. When context is at or above this
 /// many tokens AND the prompt cache has gone cold (idle time >= `CACHE_TTL_SECS`),
-/// the harness runs a notes-checkpoint turn (so the agent can persist state)
+/// the harness drops `--continue` so the next turn starts fresh. Running any
-/// then drops `--continue` so the next turn starts with a clean, compact
+/// turn (even a checkpoint) before the reset would re-upload the full context
-/// context. The checkpoint re-uploads the full transcript either way, so the
+/// and warm the cache, defeating the cost purpose — so the reset happens
-/// purpose is state preservation (not cost savings). Default is ~50% of a
+/// immediately with no preceding turn. Default is ~50% of a 200k-token
-/// 200k-token window; override via `HIVE_AUTO_RESET_WATERMARK_TOKENS`, or set
+/// window; override via `HIVE_AUTO_RESET_WATERMARK_TOKENS`, or set to `0`
-/// to `0` to disable.
+/// to disable.
 const DEFAULT_AUTO_RESET_WATERMARK_TOKENS: u64 = 100_000;
 /// Assumed prompt-cache TTL. Claude caches prompt prefixes — ~5 minutes on
 /// the API (pay-per-token), ~1 hour on Claude Max (subscription). When the
 /// idle gap exceeds this, the cache prefix has likely expired and the next
 /// turn re-uploads the full transcript regardless of whether we resume or
-/// start fresh. A fresh session with a small context is therefore just as
+/// start fresh. A fresh session with a small context is therefore equally
-/// cheap AND gives the model a clean slate. Default is 3600s (1h) matching
+/// cheap but gives the model a clean slate. Default is 3600s (1h) matching
-/// the subscription TTL. API (pay-per-token) users should set
+/// the subscription TTL; API (pay-per-token) users should set
-/// `HIVE_CACHE_TTL_SECS=300` to match their shorter cache window. Override
+/// `HIVE_CACHE_TTL_SECS=300`. Override via `HIVE_CACHE_TTL_SECS`; set to
-/// via `HIVE_CACHE_TTL_SECS`; set to `0` to disable (always resume).
+/// `0` to disable (always resume).
 const DEFAULT_CACHE_TTL_SECS: u64 = 3600;
 /// Synthetic checkpoint prompt injected before an auto-reset turn. Unlike the
 /// proactive-compaction checkpoint (which is followed by `/compact`), this one
 /// is followed by a fresh-session turn, so the wording focuses on "write now
 /// so you can read it back cleanly" rather than "/compact is coming".
 const AUTO_RESET_CHECKPOINT_PROMPT: &str =
    "[system] Context checkpoint before session auto-reset — no inbox message to handle.\n\n\
 The harness detected that the prompt-cache TTL has elapsed while your context is large. \
 The session will be reset after this turn so you start the next message with a clean, \
 compact context instead of a long accumulated transcript.\n\n\
 Use THIS turn to flush anything worth keeping into your durable state files: update \
 your notes / TODO.md with in-flight task state, open questions, important paths, and \
 anything else you would need to resume cleanly from a fresh session. Do not start \
 new work or reply to anyone — just write your notes and end the turn.";
 /// Token watermark for *proactive* compaction. Once a turn finishes with
 /// the last inference's context size at or above this many tokens,
 /// `drive_turn` runs one dedicated notes-checkpoint turn (so the agent
@ -275,7 +261,7 @@ fn compact_watermark_tokens() -> u64 {
 ///
 /// Both the sub-agent and manager loops call this.
 pub async fn drive_turn(prompt: &str, files: &TurnFiles, bus: &Bus) -> TurnOutcome {
-    maybe_auto_reset(files, bus).await;
+    maybe_auto_reset(bus);
    let outcome = match run_turn(prompt, files, bus).await {
        TurnOutcome::PromptTooLong => {
            if let Err(e) = compact_session(files, bus).await {
@ -345,11 +331,11 @@ async fn maybe_checkpoint_and_compact(files: &TurnFiles, bus: &Bus) {
 }
 /// Pre-turn auto-reset check. If context is large AND the prompt cache has
-/// gone cold (idle time ≥ cache TTL), run a notes-checkpoint turn so the agent
+/// gone cold (idle time >= cache TTL), arm `request_new_session` so the
-/// can persist durable state, then arm `request_new_session` so the actual
+/// next wake-up turn starts fresh. No preceding checkpoint turn — running
-/// wake-up turn starts fresh. Best-effort — a failed checkpoint is noted and
+/// any turn before the reset would re-upload and re-warm the cache, which
-/// the reset proceeds anyway (agent still gets a clean session).
+/// defeats the cost-optimisation purpose entirely.
-async fn maybe_auto_reset(files: &TurnFiles, bus: &Bus) {
+fn maybe_auto_reset(bus: &Bus) {
    let watermark = auto_reset_watermark_tokens();
    if watermark == 0 {
        return; // auto-reset disabled
@ -376,22 +362,10 @@ async fn maybe_auto_reset(files: &TurnFiles, bus: &Bus) {
    }
    bus.emit(LiveEvent::Note {
        text: format!(
-            "context {ctx_tokens} tokens, idle {idle_secs}s ≥ cache TTL {ttl}s \
+            "context {ctx_tokens} tokens, idle {idle_secs}s >= cache TTL {ttl}s \
-             — running checkpoint then auto-resetting session"
+             — dropping session (cache cold, fresh start is equally cheap)"
        ),
    });
    match run_turn(AUTO_RESET_CHECKPOINT_PROMPT, files, bus).await {
        TurnOutcome::Ok => {}
        TurnOutcome::PromptTooLong => bus.emit(LiveEvent::Note {
            text: "auto-reset checkpoint overflowed — resetting without checkpoint".into(),
        }),
        TurnOutcome::RateLimited => bus.emit(LiveEvent::Note {
            text: "auto-reset checkpoint was rate-limited — resetting anyway".into(),
        }),
        TurnOutcome::Failed(e) => bus.emit(LiveEvent::Note {
            text: format!("auto-reset checkpoint failed ({e:#}) — resetting anyway"),
        }),
    }
    bus.request_new_session();
 }