From fb951c10047da3772e038571689fb333799977ce Mon Sep 17 00:00:00 2001 From: damocles Date: Wed, 20 May 2026 14:32:20 +0200 Subject: [PATCH] auto session-reset: fix default cache TTL to 1h, clarify state-preservation purpose --- hive-ag3nt/src/turn.rs | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/hive-ag3nt/src/turn.rs b/hive-ag3nt/src/turn.rs index 4ae1fd3..551a9d6 100644 --- a/hive-ag3nt/src/turn.rs +++ b/hive-ag3nt/src/turn.rs @@ -55,20 +55,25 @@ const RATE_LIMIT_MARKERS: &[&str] = &[ const DEFAULT_RATE_LIMIT_SLEEP_SECS: u64 = 300; /// Token watermark for *auto session-reset*. When context is at or above this -/// many tokens AND the prompt cache has gone cold (idle time ≥ `CACHE_TTL_SECS`), -/// the harness runs a notes-checkpoint turn then drops `--continue` so the -/// next turn starts fresh. Sending a large context uncached costs the same as -/// a fresh start; the reset avoids paying for a long history the model won't -/// benefit from. Default is ~50% of a 200k-token window; override via -/// `HIVE_AUTO_RESET_WATERMARK_TOKENS`, or set to `0` to disable. +/// many tokens AND the prompt cache has gone cold (idle time >= `CACHE_TTL_SECS`), +/// the harness runs a notes-checkpoint turn (so the agent can persist state) +/// then drops `--continue` so the next turn starts with a clean, compact +/// context. The checkpoint re-uploads the full transcript either way, so the +/// purpose is state preservation (not cost savings). Default is ~50% of a +/// 200k-token window; override via `HIVE_AUTO_RESET_WATERMARK_TOKENS`, or set +/// to `0` to disable. const DEFAULT_AUTO_RESET_WATERMARK_TOKENS: u64 = 100_000; -/// Assumed prompt-cache TTL. Claude's API caches prompt prefixes for ~5 -/// minutes; after that the prefix must be re-uploaded on the next turn. -/// When the idle gap exceeds this, a large context costs the same whether -/// we resume or start fresh — so we start fresh. Override via -/// `HIVE_CACHE_TTL_SECS`. -const DEFAULT_CACHE_TTL_SECS: u64 = 300; +/// Assumed prompt-cache TTL. Claude caches prompt prefixes — ~5 minutes on +/// the API (pay-per-token), ~1 hour on Claude Max (subscription). When the +/// idle gap exceeds this, the cache prefix has likely expired and the next +/// turn re-uploads the full transcript regardless of whether we resume or +/// start fresh. A fresh session with a small context is therefore just as +/// cheap AND gives the model a clean slate. Default is 3600s (1h) matching +/// the subscription TTL. API (pay-per-token) users should set +/// `HIVE_CACHE_TTL_SECS=300` to match their shorter cache window. Override +/// via `HIVE_CACHE_TTL_SECS`; set to `0` to disable (always resume). +const DEFAULT_CACHE_TTL_SECS: u64 = 3600; /// Synthetic checkpoint prompt injected before an auto-reset turn. Unlike the /// proactive-compaction checkpoint (which is followed by `/compact`), this one @@ -77,8 +82,8 @@ const DEFAULT_CACHE_TTL_SECS: u64 = 300; const AUTO_RESET_CHECKPOINT_PROMPT: &str = "[system] Context checkpoint before session auto-reset — no inbox message to handle.\n\n\ The harness detected that the prompt-cache TTL has elapsed while your context is large. \ -Resuming with --continue would re-upload the full transcript uncached; starting a fresh \ -session is cheaper and equally effective since the cache is already cold.\n\n\ +The session will be reset after this turn so you start the next message with a clean, \ +compact context instead of a long accumulated transcript.\n\n\ Use THIS turn to flush anything worth keeping into your durable state files: update \ your notes / TODO.md with in-flight task state, open questions, important paths, and \ anything else you would need to resume cleanly from a fresh session. Do not start \