detect rate-limit errors; park serve loop instead of crashing
This commit is contained in:
parent
6f7cc6e77d
commit
03db764101
3 changed files with 98 additions and 7 deletions
|
|
@ -34,6 +34,26 @@ const CLAUDE_SETTINGS: &str = include_str!("../prompts/claude-settings.json");
|
|||
/// claude exit with a useful error in the live view.
|
||||
const PROMPT_TOO_LONG_MARKER: &str = "Prompt is too long";
|
||||
|
||||
/// Substrings that indicate the Anthropic API is refusing the request due
|
||||
/// to a rate limit, per-account usage cap, or exhausted credit balance.
|
||||
/// Matched against both stdout and stderr; any hit returns
|
||||
/// `TurnOutcome::RateLimited` so the serve loop can park + retry instead
|
||||
/// of propagating a hard failure that looks identical to a crash.
|
||||
const RATE_LIMIT_MARKERS: &[&str] = &[
|
||||
"rate_limit_error",
|
||||
"overloaded_error",
|
||||
"Credit balance is too low",
|
||||
"Usage limit reached",
|
||||
"Request rate limit exceeded",
|
||||
];
|
||||
|
||||
/// How long to sleep after detecting a rate-limit before re-entering the
|
||||
/// serve loop. Overridable via `HIVE_RATE_LIMIT_SLEEP_SECS`. Default is
|
||||
/// 5 minutes — enough for most short-lived throttles; the operator can
|
||||
/// tune down for tight retry scenarios or up if they're hitting sustained
|
||||
/// capacity limits.
|
||||
const DEFAULT_RATE_LIMIT_SLEEP_SECS: u64 = 300;
|
||||
|
||||
/// Token watermark for *proactive* compaction. Once a turn finishes with
|
||||
/// the last inference's context size at or above this many tokens,
|
||||
/// `drive_turn` runs one dedicated notes-checkpoint turn (so the agent
|
||||
|
|
@ -153,9 +173,24 @@ pub enum TurnOutcome {
|
|||
/// claude saw "Prompt is too long" — the session needs compacting.
|
||||
/// Run `compact_session()` then retry the same wake-up prompt.
|
||||
PromptTooLong,
|
||||
/// The Anthropic API refused the request due to a rate limit, per-account
|
||||
/// usage cap, or exhausted credit balance. The serve loop should park for
|
||||
/// `rate_limit_sleep_secs()` and retry — NOT bubble up as a crash.
|
||||
RateLimited,
|
||||
Failed(anyhow::Error),
|
||||
}
|
||||
|
||||
/// How long to sleep after a rate-limit before re-entering the serve loop.
|
||||
/// Reads `HIVE_RATE_LIMIT_SLEEP_SECS` if set to a valid positive integer.
|
||||
#[must_use]
|
||||
pub fn rate_limit_sleep_secs() -> u64 {
|
||||
std::env::var("HIVE_RATE_LIMIT_SLEEP_SECS")
|
||||
.ok()
|
||||
.and_then(|s| s.trim().parse::<u64>().ok())
|
||||
.filter(|&v| v > 0)
|
||||
.unwrap_or(DEFAULT_RATE_LIMIT_SLEEP_SECS)
|
||||
}
|
||||
|
||||
/// Resolve the proactive-compaction watermark: `HIVE_COMPACT_WATERMARK_TOKENS`
|
||||
/// if set to a valid integer, else `DEFAULT_COMPACT_WATERMARK_TOKENS`. A
|
||||
/// value of `0` disables proactive compaction.
|
||||
|
|
@ -189,6 +224,9 @@ pub async fn drive_turn(prompt: &str, files: &TurnFiles, bus: &Bus) -> TurnOutco
|
|||
}
|
||||
run_turn(prompt, files, bus).await
|
||||
}
|
||||
// Rate-limited: no point retrying immediately — bubble up so the
|
||||
// serve loop can park + emit status before the next attempt.
|
||||
TurnOutcome::RateLimited => return TurnOutcome::RateLimited,
|
||||
other => other,
|
||||
};
|
||||
// Proactive: a turn just completed on a still-healthy session. If its
|
||||
|
|
@ -231,6 +269,9 @@ async fn maybe_checkpoint_and_compact(files: &TurnFiles, bus: &Bus) {
|
|||
TurnOutcome::PromptTooLong => bus.emit(LiveEvent::Note {
|
||||
text: "checkpoint turn overflowed the window — compacting without it".into(),
|
||||
}),
|
||||
TurnOutcome::RateLimited => bus.emit(LiveEvent::Note {
|
||||
text: "checkpoint turn was rate-limited — compacting anyway".into(),
|
||||
}),
|
||||
TurnOutcome::Failed(e) => bus.emit(LiveEvent::Note {
|
||||
text: format!("checkpoint turn failed ({e:#}) — compacting anyway"),
|
||||
}),
|
||||
|
|
@ -254,6 +295,13 @@ pub fn emit_turn_end(bus: &Bus, outcome: &TurnOutcome) {
|
|||
});
|
||||
tracing::info!("turn finished");
|
||||
}
|
||||
TurnOutcome::RateLimited => {
|
||||
bus.emit(LiveEvent::TurnEnd {
|
||||
ok: false,
|
||||
note: Some("rate limited — parking until quota resets".into()),
|
||||
});
|
||||
tracing::warn!("turn rate-limited");
|
||||
}
|
||||
TurnOutcome::Failed(e) => {
|
||||
let note = format!("{e:#}");
|
||||
bus.emit(LiveEvent::TurnEnd {
|
||||
|
|
@ -298,7 +346,8 @@ pub async fn wait_for_login(
|
|||
/// doesn't stall mid-turn — hyperhive owns compaction.
|
||||
pub async fn run_turn(prompt: &str, files: &TurnFiles, bus: &Bus) -> TurnOutcome {
|
||||
match run_claude(prompt, files, bus).await {
|
||||
Ok(too_long) if too_long => TurnOutcome::PromptTooLong,
|
||||
Ok((too_long, _)) if too_long => TurnOutcome::PromptTooLong,
|
||||
Ok((_, rate_limited)) if rate_limited => TurnOutcome::RateLimited,
|
||||
Ok(_) => TurnOutcome::Ok,
|
||||
Err(e) => TurnOutcome::Failed(e),
|
||||
}
|
||||
|
|
@ -314,7 +363,7 @@ pub async fn compact_session(files: &TurnFiles, bus: &Bus) -> Result<()> {
|
|||
bus.emit(LiveEvent::Note {
|
||||
text: "context overflow — running /compact on the persistent session".into(),
|
||||
});
|
||||
let _ = run_claude("/compact", files, bus).await?;
|
||||
let (_, _) = run_claude("/compact", files, bus).await?;
|
||||
bus.emit(LiveEvent::Note {
|
||||
text: "/compact done".into(),
|
||||
});
|
||||
|
|
@ -322,7 +371,7 @@ pub async fn compact_session(files: &TurnFiles, bus: &Bus) -> Result<()> {
|
|||
}
|
||||
|
||||
#[allow(clippy::too_many_lines)]
|
||||
async fn run_claude(prompt: &str, files: &TurnFiles, bus: &Bus) -> Result<bool> {
|
||||
async fn run_claude(prompt: &str, files: &TurnFiles, bus: &Bus) -> Result<(bool, bool)> {
|
||||
// Keep the last STDERR_TAIL_LINES of stderr so a non-zero exit can
|
||||
// include real context in the bail message (and downstream in the
|
||||
// failure notification to the manager) instead of just "exit 1".
|
||||
|
|
@ -377,8 +426,11 @@ async fn run_claude(prompt: &str, files: &TurnFiles, bus: &Bus) -> Result<bool>
|
|||
let stderr = child.stderr.take().expect("piped stderr");
|
||||
|
||||
let prompt_too_long = Arc::new(AtomicBool::new(false));
|
||||
let rate_limited = Arc::new(AtomicBool::new(false));
|
||||
let flag_out = prompt_too_long.clone();
|
||||
let flag_err = prompt_too_long.clone();
|
||||
let rate_out = rate_limited.clone();
|
||||
let rate_err = rate_limited.clone();
|
||||
let bus_out = bus.clone();
|
||||
let bus_err = bus.clone();
|
||||
let pump_stdout = tokio::spawn(async move {
|
||||
|
|
@ -394,6 +446,9 @@ async fn run_claude(prompt: &str, files: &TurnFiles, bus: &Bus) -> Result<bool>
|
|||
if line.contains(PROMPT_TOO_LONG_MARKER) {
|
||||
flag_out.store(true, Ordering::Relaxed);
|
||||
}
|
||||
if RATE_LIMIT_MARKERS.iter().any(|m| line.contains(m)) {
|
||||
rate_out.store(true, Ordering::Relaxed);
|
||||
}
|
||||
match serde_json::from_str::<serde_json::Value>(&line) {
|
||||
Ok(v) => {
|
||||
if let Some(u) = crate::events::TokenUsage::from_assistant_event(&v) {
|
||||
|
|
@ -424,6 +479,9 @@ async fn run_claude(prompt: &str, files: &TurnFiles, bus: &Bus) -> Result<bool>
|
|||
if line.contains(PROMPT_TOO_LONG_MARKER) {
|
||||
flag_err.store(true, Ordering::Relaxed);
|
||||
}
|
||||
if RATE_LIMIT_MARKERS.iter().any(|m| line.contains(m)) {
|
||||
rate_err.store(true, Ordering::Relaxed);
|
||||
}
|
||||
// Mirror to journald so post-mortems work without the web UI
|
||||
// or the events sqlite. The bus event is what the dashboard
|
||||
// renders; the tracing line is what `journalctl -M <c> -b`
|
||||
|
|
@ -444,7 +502,8 @@ async fn run_claude(prompt: &str, files: &TurnFiles, bus: &Bus) -> Result<bool>
|
|||
let _ = pump_stdout.await;
|
||||
let _ = pump_stderr.await;
|
||||
let too_long = prompt_too_long.load(Ordering::Relaxed);
|
||||
if !status.success() && !too_long {
|
||||
let is_rate_limited = rate_limited.load(Ordering::Relaxed);
|
||||
if !status.success() && !too_long && !is_rate_limited {
|
||||
let tail = stderr_tail.lock().unwrap();
|
||||
if tail.is_empty() {
|
||||
bail!("claude exited {status} (no stderr)");
|
||||
|
|
@ -452,5 +511,5 @@ async fn run_claude(prompt: &str, files: &TurnFiles, bus: &Bus) -> Result<bool>
|
|||
let tail_str = tail.iter().cloned().collect::<Vec<_>>().join("\n");
|
||||
bail!("claude exited {status}\nstderr tail:\n{tail_str}");
|
||||
}
|
||||
Ok(too_long)
|
||||
Ok((too_long, is_rate_limited))
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue