model: runtime override via /model slash; fixes for port + bind
- runtime model override: Bus::{model,set_model} + POST /api/model
(form-encoded {model: name}). turn.rs reads bus.model() per turn
so a flip lands on the next claude invocation. /api/state grows
a model field; agent page shows a 'model · <name>' chip in the
state row. '/model <name>' slash command POSTs to the endpoint
and refreshes state.
- port regression fix: agent_web_port no longer probes forward for
*existing* agents (the previous fix shifted ports for any agent
without a port file, including legacy ones whose container was
already bound to the bare hashed port — dashboard rendered the
new port, container was still on the old one, conn errors). new
rule: port file exists → use it; absent + applied flake present
→ legacy, persist port_hash without probing; absent + no applied
flake → fresh spawn, probe forward.
- SO_REUSEADDR on both the dashboard and per-agent web UI binds
via tokio::net::TcpSocket. operator hit 12 retries failing on
manager :8000 — REUSEADDR handles the TIME_WAIT case cleanly
without a new dep; retry still covers the genuine
process-still-alive overlap.
todo: drops the model-override entry (shipped); adds two new
items — model persistence (optional, future), and custom
per-agent MCP tools (groundwork for moving bitburner-agent into
hyperhive).
This commit is contained in:
parent
7d93dd9db4
commit
6db38cf70c
9 changed files with 196 additions and 39 deletions
|
|
@ -72,13 +72,13 @@ pub async fn serve(port: u16, coord: Arc<Coordinator>) -> Result<()> {
|
|||
// `/messages/stream` for broker traffic.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Retry-on-AddrInUse bind. Same shape as the per-agent variant —
|
||||
/// `SO_REUSEADDR` bind with retry. Mirrors the per-agent variant —
|
||||
/// hive-c0re restarts also race the previous process's socket release.
|
||||
async fn bind_with_retry(addr: SocketAddr) -> Result<tokio::net::TcpListener> {
|
||||
let mut delay_ms = 250u64;
|
||||
let mut attempts = 0u32;
|
||||
loop {
|
||||
match tokio::net::TcpListener::bind(addr).await {
|
||||
match try_bind(addr) {
|
||||
Ok(l) => return Ok(l),
|
||||
Err(e) if e.kind() == std::io::ErrorKind::AddrInUse && attempts < 12 => {
|
||||
tracing::warn!(
|
||||
|
|
@ -96,6 +96,16 @@ async fn bind_with_retry(addr: SocketAddr) -> Result<tokio::net::TcpListener> {
|
|||
}
|
||||
}
|
||||
|
||||
fn try_bind(addr: SocketAddr) -> std::io::Result<tokio::net::TcpListener> {
|
||||
let sock = match addr {
|
||||
SocketAddr::V4(_) => tokio::net::TcpSocket::new_v4()?,
|
||||
SocketAddr::V6(_) => tokio::net::TcpSocket::new_v6()?,
|
||||
};
|
||||
sock.set_reuseaddr(true)?;
|
||||
sock.bind(addr)?;
|
||||
sock.listen(1024)
|
||||
}
|
||||
|
||||
async fn serve_index() -> impl IntoResponse {
|
||||
Html(include_str!("../assets/index.html"))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -46,13 +46,18 @@ const DEFAULT_MEMORY_MAX: &str = "2G";
|
|||
const DEFAULT_CPU_QUOTA: &str = "50%";
|
||||
|
||||
/// Returns the per-agent web UI port. Manager is fixed at `MANAGER_PORT`.
|
||||
/// For sub-agents the port is sticky once chosen: looked up from
|
||||
/// `agent_state_root(name)/port` if present, otherwise derived from
|
||||
/// the FNV-1a hash of the name and *probed forward* through the
|
||||
/// allocated range to skip any port another sub-agent has already
|
||||
/// claimed (birthday-paradox collisions are real even at 2–3
|
||||
/// agents). The chosen port is written back so subsequent calls
|
||||
/// resolve to the same value without re-probing.
|
||||
/// For sub-agents the port is sticky once chosen:
|
||||
///
|
||||
/// - **Port file present** (`state_root/port`): use it. End of story.
|
||||
/// - **Port file absent, applied flake present**: this is a legacy
|
||||
/// agent whose container is already bound to the bare
|
||||
/// `port_hash(name)`. Don't probe; just migrate by writing that
|
||||
/// value to the port file. The container stays where it is and
|
||||
/// subsequent renders agree with it.
|
||||
/// - **Port file absent, no applied flake**: this is a fresh spawn.
|
||||
/// Probe forward from `port_hash(name)` to skip any port another
|
||||
/// sub-agent has already claimed (via port file or legacy hash).
|
||||
/// Write the chosen port back.
|
||||
#[must_use]
|
||||
pub fn agent_web_port(name: &str) -> u16 {
|
||||
if name == MANAGER_NAME {
|
||||
|
|
@ -66,27 +71,36 @@ pub fn agent_web_port(name: &str) -> u16 {
|
|||
{
|
||||
return port;
|
||||
}
|
||||
let taken = scan_taken_ports(name);
|
||||
let start = port_hash(name);
|
||||
let mut port = start;
|
||||
for _ in 0..WEB_PORT_RANGE {
|
||||
if !taken.contains(&port) {
|
||||
break;
|
||||
let applied_exists = crate::coordinator::Coordinator::agent_applied_dir(name).exists();
|
||||
let chosen = if applied_exists {
|
||||
// Legacy agent — container already running on the hashed
|
||||
// port. Don't move it; just persist the value so future
|
||||
// calls bypass this path.
|
||||
port_hash(name)
|
||||
} else {
|
||||
let taken = scan_taken_ports(name);
|
||||
let start = port_hash(name);
|
||||
let mut port = start;
|
||||
for _ in 0..WEB_PORT_RANGE {
|
||||
if !taken.contains(&port) {
|
||||
break;
|
||||
}
|
||||
port = next_port(port);
|
||||
if port == start {
|
||||
// Range fully exhausted (very unlikely — 900 slots) —
|
||||
// give up and use the hashed value; collisions are
|
||||
// surfaced as bind errors by the harness retry loop.
|
||||
tracing::warn!(%name, "agent_web_port: range exhausted, returning hash");
|
||||
break;
|
||||
}
|
||||
}
|
||||
port = next_port(port);
|
||||
if port == start {
|
||||
// Range fully exhausted (very unlikely — 900 slots) —
|
||||
// give up and just use the hashed value; collisions are
|
||||
// surfaced as bind errors by the harness retry loop.
|
||||
tracing::warn!(%name, "agent_web_port: range exhausted, returning hash");
|
||||
return start;
|
||||
}
|
||||
}
|
||||
port
|
||||
};
|
||||
let _ = std::fs::create_dir_all(&state_root);
|
||||
if let Err(e) = std::fs::write(&port_file, format!("{port}\n")) {
|
||||
if let Err(e) = std::fs::write(&port_file, format!("{chosen}\n")) {
|
||||
tracing::warn!(error = ?e, file = %port_file.display(), "persisting agent port failed");
|
||||
}
|
||||
port
|
||||
chosen
|
||||
}
|
||||
|
||||
fn port_hash(name: &str) -> u16 {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue