model/context: configurable default model + model-derived context window
This commit is contained in:
parent
67f948028c
commit
9064cd3c57
3 changed files with 117 additions and 43 deletions
|
|
@ -275,13 +275,51 @@ pub enum TurnState {
|
||||||
Compacting,
|
Compacting,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Default claude model when nothing's been set at runtime. The
|
/// Default claude model when nothing's been set at runtime. Overridable
|
||||||
/// operator can switch via `/model <name>` in the web terminal; the
|
/// via the `HIVE_DEFAULT_MODEL` env var (set from `hyperhive.model` in
|
||||||
/// chosen model lives in `Bus::model` for the rest of the harness
|
/// the container's `agent.nix`). The operator can also switch at runtime
|
||||||
/// process's life (resets on restart, by design — operator overrides
|
/// via `/model <name>` in the web terminal; the chosen model is persisted
|
||||||
/// shouldn't survive accidentally).
|
/// to the state dir so it survives restarts.
|
||||||
pub const DEFAULT_MODEL: &str = "haiku";
|
pub const DEFAULT_MODEL: &str = "haiku";
|
||||||
|
|
||||||
|
/// Return the initial default model name: `HIVE_DEFAULT_MODEL` env var if
|
||||||
|
/// set to a non-empty string, otherwise `DEFAULT_MODEL`.
|
||||||
|
#[must_use]
|
||||||
|
pub fn default_model() -> &'static str {
|
||||||
|
// Leak once at startup — acceptable for a single config value.
|
||||||
|
std::env::var("HIVE_DEFAULT_MODEL")
|
||||||
|
.ok()
|
||||||
|
.filter(|s| !s.trim().is_empty())
|
||||||
|
.map_or(DEFAULT_MODEL, |s| Box::leak(s.into_boxed_str()))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Context-window size in tokens for a given model name.
|
||||||
|
///
|
||||||
|
/// Recognises the model families that Claude Code supports:
|
||||||
|
/// - `haiku` family: 200 000 tokens
|
||||||
|
/// - `sonnet` / `opus` families: 1 000 000 tokens
|
||||||
|
/// - anything else: 200 000 tokens (conservative default)
|
||||||
|
///
|
||||||
|
/// Overridable at runtime via `HIVE_CONTEXT_WINDOW_TOKENS` (useful for
|
||||||
|
/// future models or when the operator knows the exact limit). The env
|
||||||
|
/// var takes precedence over the model-name heuristic.
|
||||||
|
#[must_use]
|
||||||
|
pub fn context_window_tokens(model: &str) -> u64 {
|
||||||
|
if let Ok(s) = std::env::var("HIVE_CONTEXT_WINDOW_TOKENS") {
|
||||||
|
if let Ok(v) = s.trim().parse::<u64>() {
|
||||||
|
if v > 0 {
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let m = model.to_ascii_lowercase();
|
||||||
|
if m.contains("sonnet") || m.contains("opus") {
|
||||||
|
1_000_000
|
||||||
|
} else {
|
||||||
|
200_000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct Bus {
|
pub struct Bus {
|
||||||
tx: Arc<broadcast::Sender<BusEvent>>,
|
tx: Arc<broadcast::Sender<BusEvent>>,
|
||||||
|
|
@ -351,7 +389,7 @@ impl Bus {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
let (tx, _) = broadcast::channel(CHANNEL_CAPACITY);
|
let (tx, _) = broadcast::channel(CHANNEL_CAPACITY);
|
||||||
let initial_model = load_model().unwrap_or_else(|| DEFAULT_MODEL.to_owned());
|
let initial_model = load_model().unwrap_or_else(|| default_model().to_owned());
|
||||||
// Restore rate_limited from the sentinel file — if the harness
|
// Restore rate_limited from the sentinel file — if the harness
|
||||||
// crashed while parked, we should still show the right status on
|
// crashed while parked, we should still show the right status on
|
||||||
// cold load until the next turn clears it.
|
// cold load until the next turn clears it.
|
||||||
|
|
|
||||||
|
|
@ -54,16 +54,6 @@ const RATE_LIMIT_MARKERS: &[&str] = &[
|
||||||
/// capacity limits.
|
/// capacity limits.
|
||||||
const DEFAULT_RATE_LIMIT_SLEEP_SECS: u64 = 300;
|
const DEFAULT_RATE_LIMIT_SLEEP_SECS: u64 = 300;
|
||||||
|
|
||||||
/// Token watermark for *auto session-reset*. When context is at or above this
|
|
||||||
/// many tokens AND the prompt cache has gone cold (idle time >= `CACHE_TTL_SECS`),
|
|
||||||
/// the harness drops `--continue` so the next turn starts fresh. Running any
|
|
||||||
/// turn (even a checkpoint) before the reset would re-upload the full context
|
|
||||||
/// and warm the cache, defeating the cost purpose — so the reset happens
|
|
||||||
/// immediately with no preceding turn. Default is ~50% of a 200k-token
|
|
||||||
/// window; override via `HIVE_AUTO_RESET_WATERMARK_TOKENS`, or set to `0`
|
|
||||||
/// to disable.
|
|
||||||
const DEFAULT_AUTO_RESET_WATERMARK_TOKENS: u64 = 100_000;
|
|
||||||
|
|
||||||
/// Assumed prompt-cache TTL. Claude caches prompt prefixes — ~5 minutes on
|
/// Assumed prompt-cache TTL. Claude caches prompt prefixes — ~5 minutes on
|
||||||
/// the API (pay-per-token), ~1 hour on Claude Max (subscription). When the
|
/// the API (pay-per-token), ~1 hour on Claude Max (subscription). When the
|
||||||
/// idle gap exceeds this, the cache prefix has likely expired and the next
|
/// idle gap exceeds this, the cache prefix has likely expired and the next
|
||||||
|
|
@ -75,19 +65,6 @@ const DEFAULT_AUTO_RESET_WATERMARK_TOKENS: u64 = 100_000;
|
||||||
/// `0` to disable (always resume).
|
/// `0` to disable (always resume).
|
||||||
const DEFAULT_CACHE_TTL_SECS: u64 = 3600;
|
const DEFAULT_CACHE_TTL_SECS: u64 = 3600;
|
||||||
|
|
||||||
/// Token watermark for *proactive* compaction. Once a turn finishes with
|
|
||||||
/// the last inference's context size at or above this many tokens,
|
|
||||||
/// `drive_turn` runs one dedicated notes-checkpoint turn (so the agent
|
|
||||||
/// can flush durable state into `/state`) and then `/compact` — while the
|
|
||||||
/// session is still healthy enough to run a turn at all. This is distinct
|
|
||||||
/// from the reactive `PROMPT_TOO_LONG_MARKER` path, which only fires once
|
|
||||||
/// the session is *already* past the window: at that point no turn can
|
|
||||||
/// run on it, so the reactive path just compacts + retries with no
|
|
||||||
/// checkpoint. Default is ~75% of a 200k-token window; override via
|
|
||||||
/// `HIVE_COMPACT_WATERMARK_TOKENS`, or set that to `0` to disable
|
|
||||||
/// proactive compaction entirely (the reactive path always applies).
|
|
||||||
const DEFAULT_COMPACT_WATERMARK_TOKENS: u64 = 150_000;
|
|
||||||
|
|
||||||
/// Synthetic wake prompt for the proactive notes-checkpoint turn. Not an
|
/// Synthetic wake prompt for the proactive notes-checkpoint turn. Not an
|
||||||
/// inbox message — the harness injects it directly so the agent gets one
|
/// inbox message — the harness injects it directly so the agent gets one
|
||||||
/// turn to persist durable state before `/compact` collapses the
|
/// turn to persist durable state before `/compact` collapses the
|
||||||
|
|
@ -212,14 +189,19 @@ pub fn rate_limit_sleep_secs() -> u64 {
|
||||||
.unwrap_or(DEFAULT_RATE_LIMIT_SLEEP_SECS)
|
.unwrap_or(DEFAULT_RATE_LIMIT_SLEEP_SECS)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Resolve the auto-reset watermark: `HIVE_AUTO_RESET_WATERMARK_TOKENS` if
|
/// Resolve the auto-reset watermark. Priority order:
|
||||||
/// set to a valid integer, else `DEFAULT_AUTO_RESET_WATERMARK_TOKENS`. `0`
|
/// 1. `HIVE_AUTO_RESET_WATERMARK_TOKENS` env var (explicit override).
|
||||||
/// disables auto-reset entirely.
|
/// 2. 50% of the model's context window (derived from `bus.model()` +
|
||||||
fn auto_reset_watermark_tokens() -> u64 {
|
/// `events::context_window_tokens`).
|
||||||
std::env::var("HIVE_AUTO_RESET_WATERMARK_TOKENS")
|
/// `0` disables auto-reset entirely.
|
||||||
|
fn auto_reset_watermark_tokens(bus: &Bus) -> u64 {
|
||||||
|
if let Some(v) = std::env::var("HIVE_AUTO_RESET_WATERMARK_TOKENS")
|
||||||
.ok()
|
.ok()
|
||||||
.and_then(|s| s.trim().parse::<u64>().ok())
|
.and_then(|s| s.trim().parse::<u64>().ok())
|
||||||
.unwrap_or(DEFAULT_AUTO_RESET_WATERMARK_TOKENS)
|
{
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
crate::events::context_window_tokens(&bus.model()) / 2
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Resolve the assumed cache TTL: `HIVE_CACHE_TTL_SECS` if set, else
|
/// Resolve the assumed cache TTL: `HIVE_CACHE_TTL_SECS` if set, else
|
||||||
|
|
@ -232,14 +214,19 @@ fn cache_ttl_secs() -> u64 {
|
||||||
.unwrap_or(DEFAULT_CACHE_TTL_SECS)
|
.unwrap_or(DEFAULT_CACHE_TTL_SECS)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Resolve the proactive-compaction watermark: `HIVE_COMPACT_WATERMARK_TOKENS`
|
/// Resolve the proactive-compaction watermark. Priority order:
|
||||||
/// if set to a valid integer, else `DEFAULT_COMPACT_WATERMARK_TOKENS`. A
|
/// 1. `HIVE_COMPACT_WATERMARK_TOKENS` env var (explicit override).
|
||||||
/// value of `0` disables proactive compaction.
|
/// 2. 75% of the model's context window (derived from `bus.model()` +
|
||||||
fn compact_watermark_tokens() -> u64 {
|
/// `events::context_window_tokens`).
|
||||||
std::env::var("HIVE_COMPACT_WATERMARK_TOKENS")
|
/// `0` disables proactive compaction (reactive path still applies).
|
||||||
|
fn compact_watermark_tokens(bus: &Bus) -> u64 {
|
||||||
|
if let Some(v) = std::env::var("HIVE_COMPACT_WATERMARK_TOKENS")
|
||||||
.ok()
|
.ok()
|
||||||
.and_then(|s| s.trim().parse::<u64>().ok())
|
.and_then(|s| s.trim().parse::<u64>().ok())
|
||||||
.unwrap_or(DEFAULT_COMPACT_WATERMARK_TOKENS)
|
{
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
crate::events::context_window_tokens(&bus.model()) * 3 / 4
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Drive one turn end-to-end. Three paths layer on top of the raw `run_turn`:
|
/// Drive one turn end-to-end. Three paths layer on top of the raw `run_turn`:
|
||||||
|
|
@ -291,7 +278,7 @@ pub async fn drive_turn(prompt: &str, files: &TurnFiles, bus: &Bus) -> TurnOutco
|
||||||
/// checkpoint or compaction is logged + surfaced as a Note but never
|
/// checkpoint or compaction is logged + surfaced as a Note but never
|
||||||
/// fails the turn that already succeeded.
|
/// fails the turn that already succeeded.
|
||||||
async fn maybe_checkpoint_and_compact(files: &TurnFiles, bus: &Bus) {
|
async fn maybe_checkpoint_and_compact(files: &TurnFiles, bus: &Bus) {
|
||||||
let watermark = compact_watermark_tokens();
|
let watermark = compact_watermark_tokens(bus);
|
||||||
if watermark == 0 {
|
if watermark == 0 {
|
||||||
return; // proactive compaction disabled
|
return; // proactive compaction disabled
|
||||||
}
|
}
|
||||||
|
|
@ -336,7 +323,7 @@ async fn maybe_checkpoint_and_compact(files: &TurnFiles, bus: &Bus) {
|
||||||
/// any turn before the reset would re-upload and re-warm the cache, which
|
/// any turn before the reset would re-upload and re-warm the cache, which
|
||||||
/// defeats the cost-optimisation purpose entirely.
|
/// defeats the cost-optimisation purpose entirely.
|
||||||
fn maybe_auto_reset(bus: &Bus) {
|
fn maybe_auto_reset(bus: &Bus) {
|
||||||
let watermark = auto_reset_watermark_tokens();
|
let watermark = auto_reset_watermark_tokens(bus);
|
||||||
if watermark == 0 {
|
if watermark == 0 {
|
||||||
return; // auto-reset disabled
|
return; // auto-reset disabled
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,45 @@
|
||||||
# only opts in from its own `agent.nix`.
|
# only opts in from its own `agent.nix`.
|
||||||
imports = [ ./weston-vnc.nix ];
|
imports = [ ./weston-vnc.nix ];
|
||||||
|
|
||||||
|
options.hyperhive.model = lib.mkOption {
|
||||||
|
type = lib.types.str;
|
||||||
|
default = "haiku";
|
||||||
|
example = "sonnet";
|
||||||
|
description = ''
|
||||||
|
Default claude model for this agent. Sets the `HIVE_DEFAULT_MODEL`
|
||||||
|
environment variable consumed by the harness at boot; if no
|
||||||
|
persisted model choice exists in the agent's state dir the harness
|
||||||
|
falls back to this value. The operator can still switch the model at
|
||||||
|
runtime via the per-agent web UI — that choice is persisted to the
|
||||||
|
state dir and takes precedence over this default until the agent is
|
||||||
|
purged.
|
||||||
|
|
||||||
|
Valid values are the short model names that `claude --model` accepts:
|
||||||
|
`"haiku"`, `"sonnet"`, `"opus"` (or any future identifier). The
|
||||||
|
harness derives sensible watermarks from the model family:
|
||||||
|
haiku → 200 000 token window; sonnet / opus → 1 000 000 token window.
|
||||||
|
Override the derived window via `hyperhive.contextWindowTokens`.
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
|
||||||
|
options.hyperhive.contextWindowTokens = lib.mkOption {
|
||||||
|
type = lib.types.int;
|
||||||
|
default = 0;
|
||||||
|
example = 1000000;
|
||||||
|
description = ''
|
||||||
|
Context-window size in tokens for this agent's model. `0` (the
|
||||||
|
default) means "auto-derive from the model name": haiku → 200 000,
|
||||||
|
sonnet / opus → 1 000 000. Set an explicit value here when you are
|
||||||
|
using a model the harness does not recognise, or when Anthropic
|
||||||
|
changes the window for an existing model family.
|
||||||
|
|
||||||
|
Sets the `HIVE_CONTEXT_WINDOW_TOKENS` environment variable; the
|
||||||
|
harness reads it at runtime and uses it to compute the default
|
||||||
|
compaction and auto-reset watermarks (75% and 50% of the window
|
||||||
|
respectively).
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
|
||||||
options.hyperhive.allowedBashPatterns = lib.mkOption {
|
options.hyperhive.allowedBashPatterns = lib.mkOption {
|
||||||
type = lib.types.listOf lib.types.str;
|
type = lib.types.listOf lib.types.str;
|
||||||
default = [ ];
|
default = [ ];
|
||||||
|
|
@ -208,6 +247,16 @@
|
||||||
environment.etc."hyperhive/claude-plugins-auto-update.json".text =
|
environment.etc."hyperhive/claude-plugins-auto-update.json".text =
|
||||||
builtins.toJSON config.hyperhive.claudePluginsAutoUpdate;
|
builtins.toJSON config.hyperhive.claudePluginsAutoUpdate;
|
||||||
|
|
||||||
|
# Model + context-window env vars consumed by the harness at boot.
|
||||||
|
# HIVE_DEFAULT_MODEL seeds the initial model selection when no persisted
|
||||||
|
# model choice exists in the state dir. HIVE_CONTEXT_WINDOW_TOKENS
|
||||||
|
# overrides the auto-derived window size (only set when the NixOS option
|
||||||
|
# is non-zero so an unset env var lets the harness use its own heuristic).
|
||||||
|
environment.variables.HIVE_DEFAULT_MODEL = config.hyperhive.model;
|
||||||
|
environment.variables = lib.mkIf (config.hyperhive.contextWindowTokens != 0) {
|
||||||
|
HIVE_CONTEXT_WINDOW_TOKENS = toString config.hyperhive.contextWindowTokens;
|
||||||
|
};
|
||||||
|
|
||||||
boot.isNspawnContainer = true;
|
boot.isNspawnContainer = true;
|
||||||
|
|
||||||
# Every agent gets flakes + the modern `nix` CLI out of the box.
|
# Every agent gets flakes + the modern `nix` CLI out of the box.
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue