auto session-reset: drop checkpoint turn, reset cold without warming cache
This commit is contained in:
parent
fb951c1004
commit
25659ee9f3
1 changed files with 19 additions and 45 deletions
|
|
@ -56,39 +56,25 @@ const DEFAULT_RATE_LIMIT_SLEEP_SECS: u64 = 300;
|
||||||
|
|
||||||
/// Token watermark for *auto session-reset*. When context is at or above this
|
/// Token watermark for *auto session-reset*. When context is at or above this
|
||||||
/// many tokens AND the prompt cache has gone cold (idle time >= `CACHE_TTL_SECS`),
|
/// many tokens AND the prompt cache has gone cold (idle time >= `CACHE_TTL_SECS`),
|
||||||
/// the harness runs a notes-checkpoint turn (so the agent can persist state)
|
/// the harness drops `--continue` so the next turn starts fresh. Running any
|
||||||
/// then drops `--continue` so the next turn starts with a clean, compact
|
/// turn (even a checkpoint) before the reset would re-upload the full context
|
||||||
/// context. The checkpoint re-uploads the full transcript either way, so the
|
/// and warm the cache, defeating the cost purpose — so the reset happens
|
||||||
/// purpose is state preservation (not cost savings). Default is ~50% of a
|
/// immediately with no preceding turn. Default is ~50% of a 200k-token
|
||||||
/// 200k-token window; override via `HIVE_AUTO_RESET_WATERMARK_TOKENS`, or set
|
/// window; override via `HIVE_AUTO_RESET_WATERMARK_TOKENS`, or set to `0`
|
||||||
/// to `0` to disable.
|
/// to disable.
|
||||||
const DEFAULT_AUTO_RESET_WATERMARK_TOKENS: u64 = 100_000;
|
const DEFAULT_AUTO_RESET_WATERMARK_TOKENS: u64 = 100_000;
|
||||||
|
|
||||||
/// Assumed prompt-cache TTL. Claude caches prompt prefixes — ~5 minutes on
|
/// Assumed prompt-cache TTL. Claude caches prompt prefixes — ~5 minutes on
|
||||||
/// the API (pay-per-token), ~1 hour on Claude Max (subscription). When the
|
/// the API (pay-per-token), ~1 hour on Claude Max (subscription). When the
|
||||||
/// idle gap exceeds this, the cache prefix has likely expired and the next
|
/// idle gap exceeds this, the cache prefix has likely expired and the next
|
||||||
/// turn re-uploads the full transcript regardless of whether we resume or
|
/// turn re-uploads the full transcript regardless of whether we resume or
|
||||||
/// start fresh. A fresh session with a small context is therefore just as
|
/// start fresh. A fresh session with a small context is therefore equally
|
||||||
/// cheap AND gives the model a clean slate. Default is 3600s (1h) matching
|
/// cheap but gives the model a clean slate. Default is 3600s (1h) matching
|
||||||
/// the subscription TTL. API (pay-per-token) users should set
|
/// the subscription TTL; API (pay-per-token) users should set
|
||||||
/// `HIVE_CACHE_TTL_SECS=300` to match their shorter cache window. Override
|
/// `HIVE_CACHE_TTL_SECS=300`. Override via `HIVE_CACHE_TTL_SECS`; set to
|
||||||
/// via `HIVE_CACHE_TTL_SECS`; set to `0` to disable (always resume).
|
/// `0` to disable (always resume).
|
||||||
const DEFAULT_CACHE_TTL_SECS: u64 = 3600;
|
const DEFAULT_CACHE_TTL_SECS: u64 = 3600;
|
||||||
|
|
||||||
/// Synthetic checkpoint prompt injected before an auto-reset turn. Unlike the
|
|
||||||
/// proactive-compaction checkpoint (which is followed by `/compact`), this one
|
|
||||||
/// is followed by a fresh-session turn, so the wording focuses on "write now
|
|
||||||
/// so you can read it back cleanly" rather than "/compact is coming".
|
|
||||||
const AUTO_RESET_CHECKPOINT_PROMPT: &str =
|
|
||||||
"[system] Context checkpoint before session auto-reset — no inbox message to handle.\n\n\
|
|
||||||
The harness detected that the prompt-cache TTL has elapsed while your context is large. \
|
|
||||||
The session will be reset after this turn so you start the next message with a clean, \
|
|
||||||
compact context instead of a long accumulated transcript.\n\n\
|
|
||||||
Use THIS turn to flush anything worth keeping into your durable state files: update \
|
|
||||||
your notes / TODO.md with in-flight task state, open questions, important paths, and \
|
|
||||||
anything else you would need to resume cleanly from a fresh session. Do not start \
|
|
||||||
new work or reply to anyone — just write your notes and end the turn.";
|
|
||||||
|
|
||||||
/// Token watermark for *proactive* compaction. Once a turn finishes with
|
/// Token watermark for *proactive* compaction. Once a turn finishes with
|
||||||
/// the last inference's context size at or above this many tokens,
|
/// the last inference's context size at or above this many tokens,
|
||||||
/// `drive_turn` runs one dedicated notes-checkpoint turn (so the agent
|
/// `drive_turn` runs one dedicated notes-checkpoint turn (so the agent
|
||||||
|
|
@ -275,7 +261,7 @@ fn compact_watermark_tokens() -> u64 {
|
||||||
///
|
///
|
||||||
/// Both the sub-agent and manager loops call this.
|
/// Both the sub-agent and manager loops call this.
|
||||||
pub async fn drive_turn(prompt: &str, files: &TurnFiles, bus: &Bus) -> TurnOutcome {
|
pub async fn drive_turn(prompt: &str, files: &TurnFiles, bus: &Bus) -> TurnOutcome {
|
||||||
maybe_auto_reset(files, bus).await;
|
maybe_auto_reset(bus);
|
||||||
let outcome = match run_turn(prompt, files, bus).await {
|
let outcome = match run_turn(prompt, files, bus).await {
|
||||||
TurnOutcome::PromptTooLong => {
|
TurnOutcome::PromptTooLong => {
|
||||||
if let Err(e) = compact_session(files, bus).await {
|
if let Err(e) = compact_session(files, bus).await {
|
||||||
|
|
@ -345,11 +331,11 @@ async fn maybe_checkpoint_and_compact(files: &TurnFiles, bus: &Bus) {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Pre-turn auto-reset check. If context is large AND the prompt cache has
|
/// Pre-turn auto-reset check. If context is large AND the prompt cache has
|
||||||
/// gone cold (idle time ≥ cache TTL), run a notes-checkpoint turn so the agent
|
/// gone cold (idle time >= cache TTL), arm `request_new_session` so the
|
||||||
/// can persist durable state, then arm `request_new_session` so the actual
|
/// next wake-up turn starts fresh. No preceding checkpoint turn — running
|
||||||
/// wake-up turn starts fresh. Best-effort — a failed checkpoint is noted and
|
/// any turn before the reset would re-upload and re-warm the cache, which
|
||||||
/// the reset proceeds anyway (agent still gets a clean session).
|
/// defeats the cost-optimisation purpose entirely.
|
||||||
async fn maybe_auto_reset(files: &TurnFiles, bus: &Bus) {
|
fn maybe_auto_reset(bus: &Bus) {
|
||||||
let watermark = auto_reset_watermark_tokens();
|
let watermark = auto_reset_watermark_tokens();
|
||||||
if watermark == 0 {
|
if watermark == 0 {
|
||||||
return; // auto-reset disabled
|
return; // auto-reset disabled
|
||||||
|
|
@ -376,22 +362,10 @@ async fn maybe_auto_reset(files: &TurnFiles, bus: &Bus) {
|
||||||
}
|
}
|
||||||
bus.emit(LiveEvent::Note {
|
bus.emit(LiveEvent::Note {
|
||||||
text: format!(
|
text: format!(
|
||||||
"context {ctx_tokens} tokens, idle {idle_secs}s ≥ cache TTL {ttl}s \
|
"context {ctx_tokens} tokens, idle {idle_secs}s >= cache TTL {ttl}s \
|
||||||
— running checkpoint then auto-resetting session"
|
— dropping session (cache cold, fresh start is equally cheap)"
|
||||||
),
|
),
|
||||||
});
|
});
|
||||||
match run_turn(AUTO_RESET_CHECKPOINT_PROMPT, files, bus).await {
|
|
||||||
TurnOutcome::Ok => {}
|
|
||||||
TurnOutcome::PromptTooLong => bus.emit(LiveEvent::Note {
|
|
||||||
text: "auto-reset checkpoint overflowed — resetting without checkpoint".into(),
|
|
||||||
}),
|
|
||||||
TurnOutcome::RateLimited => bus.emit(LiveEvent::Note {
|
|
||||||
text: "auto-reset checkpoint was rate-limited — resetting anyway".into(),
|
|
||||||
}),
|
|
||||||
TurnOutcome::Failed(e) => bus.emit(LiveEvent::Note {
|
|
||||||
text: format!("auto-reset checkpoint failed ({e:#}) — resetting anyway"),
|
|
||||||
}),
|
|
||||||
}
|
|
||||||
bus.request_new_session();
|
bus.request_new_session();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue