model/context: configurable default model + model-derived context window

2026-05-20 15:12:37 +02:00 · 2026-05-20 15:12:37 +02:00 · 9064cd3c57
commit 9064cd3c57
parent 67f948028c
3 changed files with 117 additions and 43 deletions
--- a/hive-ag3nt/src/turn.rs
+++ b/hive-ag3nt/src/turn.rs
@ -54,16 +54,6 @@ const RATE_LIMIT_MARKERS: &[&str] = &[
 /// capacity limits.
 const DEFAULT_RATE_LIMIT_SLEEP_SECS: u64 = 300;

-/// Token watermark for *auto session-reset*. When context is at or above this
-/// many tokens AND the prompt cache has gone cold (idle time >= `CACHE_TTL_SECS`),
-/// the harness drops `--continue` so the next turn starts fresh. Running any
-/// turn (even a checkpoint) before the reset would re-upload the full context
-/// and warm the cache, defeating the cost purpose — so the reset happens
-/// immediately with no preceding turn. Default is ~50% of a 200k-token
-/// window; override via `HIVE_AUTO_RESET_WATERMARK_TOKENS`, or set to `0`
-/// to disable.
-const DEFAULT_AUTO_RESET_WATERMARK_TOKENS: u64 = 100_000;
-
 /// Assumed prompt-cache TTL. Claude caches prompt prefixes — ~5 minutes on
 /// the API (pay-per-token), ~1 hour on Claude Max (subscription). When the
 /// idle gap exceeds this, the cache prefix has likely expired and the next
@ -75,19 +65,6 @@ const DEFAULT_AUTO_RESET_WATERMARK_TOKENS: u64 = 100_000;
 /// `0` to disable (always resume).
 const DEFAULT_CACHE_TTL_SECS: u64 = 3600;

-/// Token watermark for *proactive* compaction. Once a turn finishes with
-/// the last inference's context size at or above this many tokens,
-/// `drive_turn` runs one dedicated notes-checkpoint turn (so the agent
-/// can flush durable state into `/state`) and then `/compact` — while the
-/// session is still healthy enough to run a turn at all. This is distinct
-/// from the reactive `PROMPT_TOO_LONG_MARKER` path, which only fires once
-/// the session is *already* past the window: at that point no turn can
-/// run on it, so the reactive path just compacts + retries with no
-/// checkpoint. Default is ~75% of a 200k-token window; override via
-/// `HIVE_COMPACT_WATERMARK_TOKENS`, or set that to `0` to disable
-/// proactive compaction entirely (the reactive path always applies).
-const DEFAULT_COMPACT_WATERMARK_TOKENS: u64 = 150_000;
-
 /// Synthetic wake prompt for the proactive notes-checkpoint turn. Not an
 /// inbox message — the harness injects it directly so the agent gets one
 /// turn to persist durable state before `/compact` collapses the
@ -212,14 +189,19 @@ pub fn rate_limit_sleep_secs() -> u64 {
        .unwrap_or(DEFAULT_RATE_LIMIT_SLEEP_SECS)
 }

-/// Resolve the auto-reset watermark: `HIVE_AUTO_RESET_WATERMARK_TOKENS` if
-/// set to a valid integer, else `DEFAULT_AUTO_RESET_WATERMARK_TOKENS`. `0`
-/// disables auto-reset entirely.
-fn auto_reset_watermark_tokens() -> u64 {
-    std::env::var("HIVE_AUTO_RESET_WATERMARK_TOKENS")
+/// Resolve the auto-reset watermark. Priority order:
+/// 1. `HIVE_AUTO_RESET_WATERMARK_TOKENS` env var (explicit override).
+/// 2. 50% of the model's context window (derived from `bus.model()` +
+///    `events::context_window_tokens`).
+/// `0` disables auto-reset entirely.
+fn auto_reset_watermark_tokens(bus: &Bus) -> u64 {
+    if let Some(v) = std::env::var("HIVE_AUTO_RESET_WATERMARK_TOKENS")
        .ok()
        .and_then(|s| s.trim().parse::<u64>().ok())
-        .unwrap_or(DEFAULT_AUTO_RESET_WATERMARK_TOKENS)
+    {
+        return v;
+    }
+    crate::events::context_window_tokens(&bus.model()) / 2
 }

 /// Resolve the assumed cache TTL: `HIVE_CACHE_TTL_SECS` if set, else
@ -232,14 +214,19 @@ fn cache_ttl_secs() -> u64 {
        .unwrap_or(DEFAULT_CACHE_TTL_SECS)
 }

-/// Resolve the proactive-compaction watermark: `HIVE_COMPACT_WATERMARK_TOKENS`
-/// if set to a valid integer, else `DEFAULT_COMPACT_WATERMARK_TOKENS`. A
-/// value of `0` disables proactive compaction.
-fn compact_watermark_tokens() -> u64 {
-    std::env::var("HIVE_COMPACT_WATERMARK_TOKENS")
+/// Resolve the proactive-compaction watermark. Priority order:
+/// 1. `HIVE_COMPACT_WATERMARK_TOKENS` env var (explicit override).
+/// 2. 75% of the model's context window (derived from `bus.model()` +
+///    `events::context_window_tokens`).
+/// `0` disables proactive compaction (reactive path still applies).
+fn compact_watermark_tokens(bus: &Bus) -> u64 {
+    if let Some(v) = std::env::var("HIVE_COMPACT_WATERMARK_TOKENS")
        .ok()
        .and_then(|s| s.trim().parse::<u64>().ok())
-        .unwrap_or(DEFAULT_COMPACT_WATERMARK_TOKENS)
+    {
+        return v;
+    }
+    crate::events::context_window_tokens(&bus.model()) * 3 / 4
 }

 /// Drive one turn end-to-end. Three paths layer on top of the raw `run_turn`:
@ -291,7 +278,7 @@ pub async fn drive_turn(prompt: &str, files: &TurnFiles, bus: &Bus) -> TurnOutco
 /// checkpoint or compaction is logged + surfaced as a Note but never
 /// fails the turn that already succeeded.
 async fn maybe_checkpoint_and_compact(files: &TurnFiles, bus: &Bus) {
-    let watermark = compact_watermark_tokens();
+    let watermark = compact_watermark_tokens(bus);
    if watermark == 0 {
        return; // proactive compaction disabled
    }
@ -336,7 +323,7 @@ async fn maybe_checkpoint_and_compact(files: &TurnFiles, bus: &Bus) {
 /// any turn before the reset would re-upload and re-warm the cache, which
 /// defeats the cost-optimisation purpose entirely.
 fn maybe_auto_reset(bus: &Bus) {
-    let watermark = auto_reset_watermark_tokens();
+    let watermark = auto_reset_watermark_tokens(bus);
    if watermark == 0 {
        return; // auto-reset disabled
    }