model/context: configurable default model + model-derived context window

This commit is contained in:
damocles 2026-05-20 15:12:37 +02:00 committed by Mara
parent 67f948028c
commit 9064cd3c57
3 changed files with 117 additions and 43 deletions

View file

@ -54,16 +54,6 @@ const RATE_LIMIT_MARKERS: &[&str] = &[
/// capacity limits.
const DEFAULT_RATE_LIMIT_SLEEP_SECS: u64 = 300;
/// Token watermark for *auto session-reset*. When context is at or above this
/// many tokens AND the prompt cache has gone cold (idle time >= `CACHE_TTL_SECS`),
/// the harness drops `--continue` so the next turn starts fresh. Running any
/// turn (even a checkpoint) before the reset would re-upload the full context
/// and warm the cache, defeating the cost purpose — so the reset happens
/// immediately with no preceding turn. Default is ~50% of a 200k-token
/// window; override via `HIVE_AUTO_RESET_WATERMARK_TOKENS`, or set to `0`
/// to disable.
const DEFAULT_AUTO_RESET_WATERMARK_TOKENS: u64 = 100_000;
/// Assumed prompt-cache TTL. Claude caches prompt prefixes — ~5 minutes on
/// the API (pay-per-token), ~1 hour on Claude Max (subscription). When the
/// idle gap exceeds this, the cache prefix has likely expired and the next
@ -75,19 +65,6 @@ const DEFAULT_AUTO_RESET_WATERMARK_TOKENS: u64 = 100_000;
/// `0` to disable (always resume).
const DEFAULT_CACHE_TTL_SECS: u64 = 3600;
/// Token watermark for *proactive* compaction. Once a turn finishes with
/// the last inference's context size at or above this many tokens,
/// `drive_turn` runs one dedicated notes-checkpoint turn (so the agent
/// can flush durable state into `/state`) and then `/compact` — while the
/// session is still healthy enough to run a turn at all. This is distinct
/// from the reactive `PROMPT_TOO_LONG_MARKER` path, which only fires once
/// the session is *already* past the window: at that point no turn can
/// run on it, so the reactive path just compacts + retries with no
/// checkpoint. Default is ~75% of a 200k-token window; override via
/// `HIVE_COMPACT_WATERMARK_TOKENS`, or set that to `0` to disable
/// proactive compaction entirely (the reactive path always applies).
const DEFAULT_COMPACT_WATERMARK_TOKENS: u64 = 150_000;
/// Synthetic wake prompt for the proactive notes-checkpoint turn. Not an
/// inbox message — the harness injects it directly so the agent gets one
/// turn to persist durable state before `/compact` collapses the
@ -212,14 +189,19 @@ pub fn rate_limit_sleep_secs() -> u64 {
.unwrap_or(DEFAULT_RATE_LIMIT_SLEEP_SECS)
}
/// Resolve the auto-reset watermark: `HIVE_AUTO_RESET_WATERMARK_TOKENS` if
/// set to a valid integer, else `DEFAULT_AUTO_RESET_WATERMARK_TOKENS`. `0`
/// disables auto-reset entirely.
fn auto_reset_watermark_tokens() -> u64 {
std::env::var("HIVE_AUTO_RESET_WATERMARK_TOKENS")
/// Resolve the auto-reset watermark. Priority order:
/// 1. `HIVE_AUTO_RESET_WATERMARK_TOKENS` env var (explicit override).
/// 2. 50% of the model's context window (derived from `bus.model()` +
/// `events::context_window_tokens`).
/// `0` disables auto-reset entirely.
fn auto_reset_watermark_tokens(bus: &Bus) -> u64 {
if let Some(v) = std::env::var("HIVE_AUTO_RESET_WATERMARK_TOKENS")
.ok()
.and_then(|s| s.trim().parse::<u64>().ok())
.unwrap_or(DEFAULT_AUTO_RESET_WATERMARK_TOKENS)
{
return v;
}
crate::events::context_window_tokens(&bus.model()) / 2
}
/// Resolve the assumed cache TTL: `HIVE_CACHE_TTL_SECS` if set, else
@ -232,14 +214,19 @@ fn cache_ttl_secs() -> u64 {
.unwrap_or(DEFAULT_CACHE_TTL_SECS)
}
/// Resolve the proactive-compaction watermark: `HIVE_COMPACT_WATERMARK_TOKENS`
/// if set to a valid integer, else `DEFAULT_COMPACT_WATERMARK_TOKENS`. A
/// value of `0` disables proactive compaction.
fn compact_watermark_tokens() -> u64 {
std::env::var("HIVE_COMPACT_WATERMARK_TOKENS")
/// Resolve the proactive-compaction watermark. Priority order:
/// 1. `HIVE_COMPACT_WATERMARK_TOKENS` env var (explicit override).
/// 2. 75% of the model's context window (derived from `bus.model()` +
/// `events::context_window_tokens`).
/// `0` disables proactive compaction (reactive path still applies).
fn compact_watermark_tokens(bus: &Bus) -> u64 {
if let Some(v) = std::env::var("HIVE_COMPACT_WATERMARK_TOKENS")
.ok()
.and_then(|s| s.trim().parse::<u64>().ok())
.unwrap_or(DEFAULT_COMPACT_WATERMARK_TOKENS)
{
return v;
}
crate::events::context_window_tokens(&bus.model()) * 3 / 4
}
/// Drive one turn end-to-end. Three paths layer on top of the raw `run_turn`:
@ -291,7 +278,7 @@ pub async fn drive_turn(prompt: &str, files: &TurnFiles, bus: &Bus) -> TurnOutco
/// checkpoint or compaction is logged + surfaced as a Note but never
/// fails the turn that already succeeded.
async fn maybe_checkpoint_and_compact(files: &TurnFiles, bus: &Bus) {
let watermark = compact_watermark_tokens();
let watermark = compact_watermark_tokens(bus);
if watermark == 0 {
return; // proactive compaction disabled
}
@ -336,7 +323,7 @@ async fn maybe_checkpoint_and_compact(files: &TurnFiles, bus: &Bus) {
/// any turn before the reset would re-upload and re-warm the cache, which
/// defeats the cost-optimisation purpose entirely.
fn maybe_auto_reset(bus: &Bus) {
let watermark = auto_reset_watermark_tokens();
let watermark = auto_reset_watermark_tokens(bus);
if watermark == 0 {
return; // auto-reset disabled
}