diff --git a/hive-c0re/src/main.rs b/hive-c0re/src/main.rs index 028a7d5..df9e989 100644 --- a/hive-c0re/src/main.rs +++ b/hive-c0re/src/main.rs @@ -17,6 +17,7 @@ mod crash_watch; mod dashboard; mod dashboard_events; mod events_vacuum; +mod stats_vacuum; mod forge; mod lifecycle; mod limits; @@ -168,6 +169,9 @@ async fn main() -> Result<()> { // Per-agent events.sqlite vacuum: host-side so the harness // doesn't need any retention wiring of its own. events_vacuum::spawn(coord.clone()); + // Per-agent turn-stats.sqlite vacuum: same pattern, 90-day + // retention so trend analysis has enough history. + stats_vacuum::spawn(coord.clone()); // Container crash watcher: emits HelperEvent::ContainerCrash // when a previously-running container goes away without an // operator-initiated transient state. diff --git a/hive-c0re/src/stats_vacuum.rs b/hive-c0re/src/stats_vacuum.rs new file mode 100644 index 0000000..17db547 --- /dev/null +++ b/hive-c0re/src/stats_vacuum.rs @@ -0,0 +1,60 @@ +//! Host-side vacuum of every per-agent turn-stats.sqlite. The harness +//! writes to `/state/hyperhive-turn-stats.sqlite` (bind-mounted from +//! `/var/lib/hyperhive/agents//state/`); we open the same file +//! from the host every hour and delete rows older than `KEEP_SECS`. +//! Mirrors `events_vacuum` in structure — host-side so the harness +//! can't disable it, age-only so a chatty burst doesn't evict old +//! rows sooner than expected. 90-day retention keeps enough history +//! for trend analysis without unbounded growth. + +use std::path::Path; +use std::sync::Arc; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; + +use rusqlite::{Connection, Result, params}; + +use crate::coordinator::Coordinator; + +const VACUUM_INTERVAL: Duration = Duration::from_secs(3600); +const KEEP_SECS: i64 = 90 * 24 * 3600; + +/// Background loop: sweep every existing agent state dir hourly, run +/// the vacuum SQL against its turn-stats.sqlite if present. Errors +/// are logged but don't tear the loop down. +pub fn spawn(coord: Arc) { + tokio::spawn(async move { + loop { + sweep_once(); + let _ = &coord; + tokio::time::sleep(VACUUM_INTERVAL).await; + } + }); +} + +fn sweep_once() { + for name in Coordinator::kept_state_names() { + let path = + Coordinator::agent_notes_dir(&name).join("hyperhive-turn-stats.sqlite"); + if !path.exists() { + continue; + } + match vacuum_file(&path) { + Ok(0) => {} + Ok(n) => tracing::info!(agent = %name, removed = n, "turn-stats vacuum"), + Err(e) => tracing::warn!(agent = %name, error = ?e, "turn-stats vacuum failed"), + } + } +} + +fn vacuum_file(path: &Path) -> Result { + let conn = Connection::open(path)?; + let now = SystemTime::now() + .duration_since(UNIX_EPOCH) + .ok() + .and_then(|d| i64::try_from(d.as_secs()).ok()) + .unwrap_or(0); + let cutoff = now - KEEP_SECS; + let removed = + conn.execute("DELETE FROM turn_stats WHERE started_at < ?1", params![cutoff])?; + Ok(u64::try_from(removed).unwrap_or(0)) +}