add host-side turn-stats vacuum (90d retention, hourly sweep)
This commit is contained in:
parent
d0b65b1f47
commit
6f7cc6e77d
2 changed files with 64 additions and 0 deletions
|
|
@ -17,6 +17,7 @@ mod crash_watch;
|
|||
mod dashboard;
|
||||
mod dashboard_events;
|
||||
mod events_vacuum;
|
||||
mod stats_vacuum;
|
||||
mod forge;
|
||||
mod lifecycle;
|
||||
mod limits;
|
||||
|
|
@ -168,6 +169,9 @@ async fn main() -> Result<()> {
|
|||
// Per-agent events.sqlite vacuum: host-side so the harness
|
||||
// doesn't need any retention wiring of its own.
|
||||
events_vacuum::spawn(coord.clone());
|
||||
// Per-agent turn-stats.sqlite vacuum: same pattern, 90-day
|
||||
// retention so trend analysis has enough history.
|
||||
stats_vacuum::spawn(coord.clone());
|
||||
// Container crash watcher: emits HelperEvent::ContainerCrash
|
||||
// when a previously-running container goes away without an
|
||||
// operator-initiated transient state.
|
||||
|
|
|
|||
60
hive-c0re/src/stats_vacuum.rs
Normal file
60
hive-c0re/src/stats_vacuum.rs
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
//! Host-side vacuum of every per-agent turn-stats.sqlite. The harness
|
||||
//! writes to `/state/hyperhive-turn-stats.sqlite` (bind-mounted from
|
||||
//! `/var/lib/hyperhive/agents/<name>/state/`); we open the same file
|
||||
//! from the host every hour and delete rows older than `KEEP_SECS`.
|
||||
//! Mirrors `events_vacuum` in structure — host-side so the harness
|
||||
//! can't disable it, age-only so a chatty burst doesn't evict old
|
||||
//! rows sooner than expected. 90-day retention keeps enough history
|
||||
//! for trend analysis without unbounded growth.
|
||||
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, SystemTime, UNIX_EPOCH};
|
||||
|
||||
use rusqlite::{Connection, Result, params};
|
||||
|
||||
use crate::coordinator::Coordinator;
|
||||
|
||||
const VACUUM_INTERVAL: Duration = Duration::from_secs(3600);
|
||||
const KEEP_SECS: i64 = 90 * 24 * 3600;
|
||||
|
||||
/// Background loop: sweep every existing agent state dir hourly, run
|
||||
/// the vacuum SQL against its turn-stats.sqlite if present. Errors
|
||||
/// are logged but don't tear the loop down.
|
||||
pub fn spawn(coord: Arc<Coordinator>) {
|
||||
tokio::spawn(async move {
|
||||
loop {
|
||||
sweep_once();
|
||||
let _ = &coord;
|
||||
tokio::time::sleep(VACUUM_INTERVAL).await;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
fn sweep_once() {
|
||||
for name in Coordinator::kept_state_names() {
|
||||
let path =
|
||||
Coordinator::agent_notes_dir(&name).join("hyperhive-turn-stats.sqlite");
|
||||
if !path.exists() {
|
||||
continue;
|
||||
}
|
||||
match vacuum_file(&path) {
|
||||
Ok(0) => {}
|
||||
Ok(n) => tracing::info!(agent = %name, removed = n, "turn-stats vacuum"),
|
||||
Err(e) => tracing::warn!(agent = %name, error = ?e, "turn-stats vacuum failed"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn vacuum_file(path: &Path) -> Result<u64> {
|
||||
let conn = Connection::open(path)?;
|
||||
let now = SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.ok()
|
||||
.and_then(|d| i64::try_from(d.as_secs()).ok())
|
||||
.unwrap_or(0);
|
||||
let cutoff = now - KEEP_SECS;
|
||||
let removed =
|
||||
conn.execute("DELETE FROM turn_stats WHERE started_at < ?1", params![cutoff])?;
|
||||
Ok(u64::try_from(removed).unwrap_or(0))
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue