events.sqlite vacuum moves host-side
retention is a host concern — agents have no business doing their own cleanup, and a misbehaving harness could skip it. drop spawn_events_vacuum from both hive-ag3nt and hive-m1nd, drop the matching Bus::vacuum + EventStore::vacuum methods. new hive_c0re::events_vacuum module sweeps every existing agents/<name>/state/hyperhive-events.sqlite on the same hourly cadence as the broker vacuum. same two-stage delete (older than 7 days, trim to 2000 newest). called from main alongside broker vacuum. also: server-side state badge entered into todo.md (today's badge is derived client-side from sse, fine for idle/thinking but a state machine that grows compacting/napping wants authoritative status from the harness).
This commit is contained in:
parent
897e7c07ae
commit
89ccc5e6c5
6 changed files with 89 additions and 63 deletions
68
hive-c0re/src/events_vacuum.rs
Normal file
68
hive-c0re/src/events_vacuum.rs
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
//! Host-side vacuum of every per-agent events.sqlite. The harness
|
||||
//! writes to `/state/hyperhive-events.sqlite` (bind-mounted from
|
||||
//! `/var/lib/hyperhive/agents/<name>/state/`); we open the same file
|
||||
//! from the host every hour and apply the same two-stage delete
|
||||
//! (drop rows older than `keep_secs`, then trim to `keep_rows`
|
||||
//! newest). Keeping retention on the host means agents don't need any
|
||||
//! cleanup wiring of their own, and a misbehaving harness can't
|
||||
//! disable its own vacuum.
|
||||
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, SystemTime, UNIX_EPOCH};
|
||||
|
||||
use rusqlite::{Connection, Result, params};
|
||||
|
||||
use crate::coordinator::Coordinator;
|
||||
|
||||
const VACUUM_INTERVAL: Duration = Duration::from_secs(3600);
|
||||
const KEEP_SECS: i64 = 7 * 24 * 3600;
|
||||
const KEEP_ROWS: i64 = 2000;
|
||||
|
||||
/// Background loop: sweep every existing agent state dir hourly, run
|
||||
/// the vacuum SQL against its events.sqlite if present. Errors are
|
||||
/// logged but don't tear the loop down.
|
||||
pub fn spawn(coord: Arc<Coordinator>) {
|
||||
tokio::spawn(async move {
|
||||
loop {
|
||||
sweep_once();
|
||||
// touching coord keeps the type wired in case future sweeps
|
||||
// need approvals/etc.; the ref is otherwise unused today.
|
||||
let _ = &coord;
|
||||
tokio::time::sleep(VACUUM_INTERVAL).await;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
fn sweep_once() {
|
||||
for name in Coordinator::kept_state_names() {
|
||||
let path = Coordinator::agent_notes_dir(&name).join("hyperhive-events.sqlite");
|
||||
if !path.exists() {
|
||||
continue;
|
||||
}
|
||||
match vacuum_file(&path) {
|
||||
Ok(0) => {}
|
||||
Ok(n) => tracing::info!(agent = %name, removed = n, "events vacuum"),
|
||||
Err(e) => tracing::warn!(agent = %name, error = ?e, "events vacuum failed"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn vacuum_file(path: &Path) -> Result<u64> {
|
||||
let conn = Connection::open(path)?;
|
||||
let now = SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.ok()
|
||||
.and_then(|d| i64::try_from(d.as_secs()).ok())
|
||||
.unwrap_or(0);
|
||||
let cutoff = now - KEEP_SECS;
|
||||
let by_age = conn.execute("DELETE FROM events WHERE ts < ?1", params![cutoff])?;
|
||||
let by_count = conn.execute(
|
||||
"DELETE FROM events
|
||||
WHERE id NOT IN (
|
||||
SELECT id FROM events ORDER BY id DESC LIMIT ?1
|
||||
)",
|
||||
params![KEEP_ROWS],
|
||||
)?;
|
||||
Ok(u64::try_from(by_age + by_count).unwrap_or(0))
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue