//! Per-container state watcher. Polls every managed container on a //! fixed interval, tracks three orthogonal state-sets across ticks, //! and emits a `HelperEvent` to the manager on each transition: //! //! - **running**: container is up. running → stopped without an //! operator-initiated transient (`Stopping` / `Restarting` / //! `Destroying` / `Rebuilding`) → `ContainerCrash`. //! - **logged-in**: claude session dir is populated. ! → ✓ → //! `LoggedIn`; ✓ → ! → `NeedsLogin` (rare — usually only fires //! on a fresh spawn / purge). //! - **up-to-date**: agent's recorded flake rev matches current. ✓ //! → ! → `NeedsUpdate`. The reverse direction (`NeedsUpdate` //! resolved) is covered by `Rebuilt`, so no separate event. //! //! D-Bus subscription would be lower-latency for the first axis, //! but polling is simpler and a 10s detection delay is fine. use std::collections::HashSet; use std::path::Path; use std::sync::Arc; use std::time::Duration; use crate::coordinator::{Coordinator, TransientKind}; use crate::lifecycle::{self, AGENT_PREFIX, MANAGER_NAME}; const POLL_INTERVAL: Duration = Duration::from_secs(10); pub fn spawn(coord: Arc) { tokio::spawn(async move { let mut prev_running: HashSet = HashSet::new(); let mut prev_logged_in: HashSet = HashSet::new(); let mut prev_updated: HashSet = HashSet::new(); let mut seeded = false; loop { let raw = lifecycle::list().await.unwrap_or_default(); let current_rev = crate::auto_update::current_flake_rev(&coord.hyperhive_flake); let mut current_running = HashSet::new(); let mut current_logged_in = HashSet::new(); let mut current_updated = HashSet::new(); let mut sub_agents: Vec = Vec::new(); for c in &raw { let logical = if c == MANAGER_NAME { MANAGER_NAME.to_owned() } else if let Some(n) = c.strip_prefix(AGENT_PREFIX) { n.to_owned() } else { continue; }; if logical != MANAGER_NAME { sub_agents.push(logical.clone()); } if lifecycle::is_running(&logical).await { current_running.insert(logical.clone()); } if logical != MANAGER_NAME && claude_has_session(&Coordinator::agent_claude_dir(&logical)) { current_logged_in.insert(logical.clone()); } if let Some(rev) = current_rev.as_deref() && !crate::auto_update::agent_needs_update(&logical, rev) { current_updated.insert(logical.clone()); } } if seeded { emit_crash_transitions(&coord, &prev_running, ¤t_running); emit_login_transitions(&coord, &prev_logged_in, ¤t_logged_in, &sub_agents); emit_update_transitions(&coord, &prev_updated, ¤t_updated, &sub_agents); } prev_running = current_running; prev_logged_in = current_logged_in; prev_updated = current_updated; seeded = true; tokio::time::sleep(POLL_INTERVAL).await; } }); } fn emit_crash_transitions(coord: &Coordinator, prev: &HashSet, current: &HashSet) { let transients = coord.transient_snapshot(); for stopped in prev.difference(current) { let deliberate = transients.get(stopped).is_some_and(|st| { matches!( st.kind, TransientKind::Stopping | TransientKind::Restarting | TransientKind::Destroying | TransientKind::Rebuilding ) }); if deliberate { continue; } tracing::warn!(agent = %stopped, "container crash detected"); coord.notify_manager(&hive_sh4re::HelperEvent::ContainerCrash { agent: stopped.clone(), note: Some("container stopped without an operator action".into()), }); } } fn emit_login_transitions( coord: &Coordinator, prev: &HashSet, current: &HashSet, sub_agents: &[String], ) { for agent in current.difference(prev) { tracing::info!(%agent, "agent logged in"); coord.notify_manager(&hive_sh4re::HelperEvent::LoggedIn { agent: agent.clone(), }); } // Only count NeedsLogin transitions for agents that exist and // are *not* logged in — the difference set above already gives // us "was in prev, gone from current" but we also want to fire // for agents that newly appeared as not-logged-in (post-spawn / // post-purge). Treat sub_agents minus current as the // currently-needs-login set; emit when an agent enters it. let prev_needs: HashSet<&str> = sub_agents .iter() .map(String::as_str) .filter(|n| !prev.contains(*n)) .collect(); let current_needs: HashSet<&str> = sub_agents .iter() .map(String::as_str) .filter(|n| !current.contains(*n)) .collect(); for agent in current_needs.difference(&prev_needs) { tracing::info!(%agent, "agent needs login"); coord.notify_manager(&hive_sh4re::HelperEvent::NeedsLogin { agent: (*agent).to_owned(), }); } } fn emit_update_transitions( coord: &Coordinator, prev_updated: &HashSet, current_updated: &HashSet, sub_agents: &[String], ) { // Fired on the "was up-to-date, now isn't" transition. The // reverse (rebuilt) is already covered by HelperEvent::Rebuilt. let prev_stale: HashSet<&str> = sub_agents .iter() .map(String::as_str) .filter(|n| !prev_updated.contains(*n)) .collect(); let current_stale: HashSet<&str> = sub_agents .iter() .map(String::as_str) .filter(|n| !current_updated.contains(*n)) .collect(); for agent in current_stale.difference(&prev_stale) { tracing::info!(%agent, "agent needs update"); coord.notify_manager(&hive_sh4re::HelperEvent::NeedsUpdate { agent: (*agent).to_owned(), }); } } /// Mirrors `dashboard::claude_has_session`. Lives here too so the /// watcher doesn't depend on dashboard internals. fn claude_has_session(dir: &Path) -> bool { let Ok(entries) = std::fs::read_dir(dir) else { return false; }; entries .flatten() .any(|e| e.file_type().is_ok_and(|t| t.is_file())) }