auto session-reset: fix default cache TTL to 1h, clarify state-preservation purpose

2026-05-20 14:32:20 +02:00 · 2026-05-20 14:32:20 +02:00 · fb951c1004
commit fb951c1004
parent 44c903f265
1 changed files with 19 additions and 14 deletions
--- a/hive-ag3nt/src/turn.rs
+++ b/hive-ag3nt/src/turn.rs
@ -55,20 +55,25 @@ const RATE_LIMIT_MARKERS: &[&str] = &[
 const DEFAULT_RATE_LIMIT_SLEEP_SECS: u64 = 300;

 /// Token watermark for *auto session-reset*. When context is at or above this
-/// many tokens AND the prompt cache has gone cold (idle time ≥ `CACHE_TTL_SECS`),
-/// the harness runs a notes-checkpoint turn then drops `--continue` so the
-/// next turn starts fresh. Sending a large context uncached costs the same as
-/// a fresh start; the reset avoids paying for a long history the model won't
-/// benefit from. Default is ~50% of a 200k-token window; override via
-/// `HIVE_AUTO_RESET_WATERMARK_TOKENS`, or set to `0` to disable.
+/// many tokens AND the prompt cache has gone cold (idle time >= `CACHE_TTL_SECS`),
+/// the harness runs a notes-checkpoint turn (so the agent can persist state)
+/// then drops `--continue` so the next turn starts with a clean, compact
+/// context. The checkpoint re-uploads the full transcript either way, so the
+/// purpose is state preservation (not cost savings). Default is ~50% of a
+/// 200k-token window; override via `HIVE_AUTO_RESET_WATERMARK_TOKENS`, or set
+/// to `0` to disable.
 const DEFAULT_AUTO_RESET_WATERMARK_TOKENS: u64 = 100_000;

-/// Assumed prompt-cache TTL. Claude's API caches prompt prefixes for ~5
-/// minutes; after that the prefix must be re-uploaded on the next turn.
-/// When the idle gap exceeds this, a large context costs the same whether
-/// we resume or start fresh — so we start fresh. Override via
-/// `HIVE_CACHE_TTL_SECS`.
-const DEFAULT_CACHE_TTL_SECS: u64 = 300;
+/// Assumed prompt-cache TTL. Claude caches prompt prefixes — ~5 minutes on
+/// the API (pay-per-token), ~1 hour on Claude Max (subscription). When the
+/// idle gap exceeds this, the cache prefix has likely expired and the next
+/// turn re-uploads the full transcript regardless of whether we resume or
+/// start fresh. A fresh session with a small context is therefore just as
+/// cheap AND gives the model a clean slate. Default is 3600s (1h) matching
+/// the subscription TTL. API (pay-per-token) users should set
+/// `HIVE_CACHE_TTL_SECS=300` to match their shorter cache window. Override
+/// via `HIVE_CACHE_TTL_SECS`; set to `0` to disable (always resume).
+const DEFAULT_CACHE_TTL_SECS: u64 = 3600;

 /// Synthetic checkpoint prompt injected before an auto-reset turn. Unlike the
 /// proactive-compaction checkpoint (which is followed by `/compact`), this one
@ -77,8 +82,8 @@ const DEFAULT_CACHE_TTL_SECS: u64 = 300;
 const AUTO_RESET_CHECKPOINT_PROMPT: &str =
    "[system] Context checkpoint before session auto-reset — no inbox message to handle.\n\n\
 The harness detected that the prompt-cache TTL has elapsed while your context is large. \
-Resuming with --continue would re-upload the full transcript uncached; starting a fresh \
-session is cheaper and equally effective since the cache is already cold.\n\n\
+The session will be reset after this turn so you start the next message with a clean, \
+compact context instead of a long accumulated transcript.\n\n\
 Use THIS turn to flush anything worth keeping into your durable state files: update \
 your notes / TODO.md with in-flight task state, open questions, important paths, and \
 anything else you would need to resume cleanly from a fresh session. Do not start \