events_vacuum: drop row cap, age-only retention (7d min)

This commit is contained in:
müde 2026-05-17 23:19:08 +02:00
parent 8f5752980f
commit c2c475bd65

View file

@ -1,11 +1,13 @@
//! Host-side vacuum of every per-agent events.sqlite. The harness
//! writes to `/state/hyperhive-events.sqlite` (bind-mounted from
//! `/var/lib/hyperhive/agents/<name>/state/`); we open the same file
//! from the host every hour and apply the same two-stage delete
//! (drop rows older than `keep_secs`, then trim to `keep_rows`
//! newest). Keeping retention on the host means agents don't need any
//! cleanup wiring of their own, and a misbehaving harness can't
//! disable its own vacuum.
//! from the host every hour and delete rows older than `KEEP_SECS`.
//! Age-only — no row cap — so a chatty turn doesn't lose history
//! sooner than a quiet one; disk pressure on a sustained burst is
//! a cheaper problem than a missing event when the operator is
//! debugging a regression. Keeping retention on the host means
//! agents don't need any cleanup wiring of their own, and a
//! misbehaving harness can't disable its own vacuum.
use std::path::Path;
use std::sync::Arc;
@ -17,7 +19,6 @@ use crate::coordinator::Coordinator;
const VACUUM_INTERVAL: Duration = Duration::from_secs(3600);
const KEEP_SECS: i64 = 7 * 24 * 3600;
const KEEP_ROWS: i64 = 2000;
/// Background loop: sweep every existing agent state dir hourly, run
/// the vacuum SQL against its events.sqlite if present. Errors are
@ -56,13 +57,6 @@ fn vacuum_file(path: &Path) -> Result<u64> {
.and_then(|d| i64::try_from(d.as_secs()).ok())
.unwrap_or(0);
let cutoff = now - KEEP_SECS;
let by_age = conn.execute("DELETE FROM events WHERE ts < ?1", params![cutoff])?;
let by_count = conn.execute(
"DELETE FROM events
WHERE id NOT IN (
SELECT id FROM events ORDER BY id DESC LIMIT ?1
)",
params![KEEP_ROWS],
)?;
Ok(u64::try_from(by_age + by_count).unwrap_or(0))
let removed = conn.execute("DELETE FROM events WHERE ts < ?1", params![cutoff])?;
Ok(u64::try_from(removed).unwrap_or(0))
}