model: runtime override via /model slash; fixes for port + bind
- runtime model override: Bus::{model,set_model} + POST /api/model
(form-encoded {model: name}). turn.rs reads bus.model() per turn
so a flip lands on the next claude invocation. /api/state grows
a model field; agent page shows a 'model · <name>' chip in the
state row. '/model <name>' slash command POSTs to the endpoint
and refreshes state.
- port regression fix: agent_web_port no longer probes forward for
*existing* agents (the previous fix shifted ports for any agent
without a port file, including legacy ones whose container was
already bound to the bare hashed port — dashboard rendered the
new port, container was still on the old one, conn errors). new
rule: port file exists → use it; absent + applied flake present
→ legacy, persist port_hash without probing; absent + no applied
flake → fresh spawn, probe forward.
- SO_REUSEADDR on both the dashboard and per-agent web UI binds
via tokio::net::TcpSocket. operator hit 12 retries failing on
manager :8000 — REUSEADDR handles the TIME_WAIT case cleanly
without a new dep; retry still covers the genuine
process-still-alive overlap.
todo: drops the model-override entry (shipped); adds two new
items — model persistence (optional, future), and custom
per-agent MCP tools (groundwork for moving bitburner-agent into
hyperhive).
This commit is contained in:
parent
7d93dd9db4
commit
6db38cf70c
9 changed files with 196 additions and 39 deletions
|
|
@ -140,6 +140,13 @@ pub enum TurnState {
|
|||
Compacting,
|
||||
}
|
||||
|
||||
/// Default claude model when nothing's been set at runtime. The
|
||||
/// operator can switch via `/model <name>` in the web terminal; the
|
||||
/// chosen model lives in `Bus::model` for the rest of the harness
|
||||
/// process's life (resets on restart, by design — operator overrides
|
||||
/// shouldn't survive accidentally).
|
||||
pub const DEFAULT_MODEL: &str = "haiku";
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Bus {
|
||||
tx: Arc<broadcast::Sender<LiveEvent>>,
|
||||
|
|
@ -149,6 +156,9 @@ pub struct Bus {
|
|||
store: Option<Arc<EventStore>>,
|
||||
/// Current turn-loop state + since-when (unix seconds).
|
||||
state: Arc<Mutex<(TurnState, i64)>>,
|
||||
/// Model name passed to `claude --model`. Default `haiku`; the
|
||||
/// operator can override at runtime via `POST /api/model`.
|
||||
model: Arc<Mutex<String>>,
|
||||
}
|
||||
|
||||
impl Bus {
|
||||
|
|
@ -171,9 +181,23 @@ impl Bus {
|
|||
tx: Arc::new(tx),
|
||||
store,
|
||||
state: Arc::new(Mutex::new((TurnState::Idle, now_unix()))),
|
||||
model: Arc::new(Mutex::new(DEFAULT_MODEL.to_owned())),
|
||||
}
|
||||
}
|
||||
|
||||
/// Currently-selected claude model name. Read on every turn so a
|
||||
/// `/model <name>` flip takes effect on the next turn.
|
||||
#[must_use]
|
||||
pub fn model(&self) -> String {
|
||||
self.model.lock().unwrap().clone()
|
||||
}
|
||||
|
||||
/// Switch the model for future turns. The current turn (if any)
|
||||
/// keeps the model it was already running.
|
||||
pub fn set_model(&self, name: impl Into<String>) {
|
||||
*self.model.lock().unwrap() = name.into();
|
||||
}
|
||||
|
||||
/// Update the harness's authoritative turn-loop state. Records
|
||||
/// the transition time so `state_snapshot` can return a since-age.
|
||||
pub fn set_state(&self, next: TurnState) {
|
||||
|
|
|
|||
|
|
@ -227,13 +227,14 @@ async fn run_claude(
|
|||
flavor: mcp::Flavor,
|
||||
mode: ClaudeMode,
|
||||
) -> Result<bool> {
|
||||
let model = bus.model();
|
||||
let mut cmd = Command::new("claude");
|
||||
cmd.arg("--print")
|
||||
.arg("--verbose")
|
||||
.arg("--output-format")
|
||||
.arg("stream-json")
|
||||
.arg("--model")
|
||||
.arg("haiku")
|
||||
.arg(&model)
|
||||
.arg("--continue")
|
||||
.arg("--settings")
|
||||
.arg(settings);
|
||||
|
|
|
|||
|
|
@ -81,6 +81,7 @@ pub async fn serve(
|
|||
.route("/login/cancel", post(post_login_cancel))
|
||||
.route("/api/cancel", post(post_cancel_turn))
|
||||
.route("/api/compact", post(post_compact))
|
||||
.route("/api/model", post(post_set_model))
|
||||
.with_state(state);
|
||||
let addr = SocketAddr::from(([0, 0, 0, 0], port));
|
||||
let listener = bind_with_retry(addr, "web UI").await?;
|
||||
|
|
@ -93,16 +94,17 @@ pub async fn serve(
|
|||
// Static assets + state snapshot
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Bind a TCP listener, retrying on `AddrInUse` for up to ~20s.
|
||||
/// nspawn restarts can race the previous harness's socket release;
|
||||
/// without retry the new harness fails to bind and systemd just
|
||||
/// keeps restarting it. `SO_REUSEADDR` would be the proper fix but
|
||||
/// would require socket2; retry is good enough here.
|
||||
/// Bind a TCP listener with `SO_REUSEADDR` set, retrying on
|
||||
/// `AddrInUse` for up to ~20s. nspawn restarts can race the previous
|
||||
/// harness's socket release; `SO_REUSEADDR` lets us reclaim a port
|
||||
/// still in `TIME_WAIT` from a clean previous exit, and the retry
|
||||
/// covers the case where the previous process is genuinely still
|
||||
/// alive (systemd restart-delay overlap).
|
||||
async fn bind_with_retry(addr: SocketAddr, label: &str) -> Result<tokio::net::TcpListener> {
|
||||
let mut delay_ms = 250u64;
|
||||
let mut attempts = 0u32;
|
||||
loop {
|
||||
match tokio::net::TcpListener::bind(addr).await {
|
||||
match try_bind(addr) {
|
||||
Ok(l) => return Ok(l),
|
||||
Err(e) if e.kind() == std::io::ErrorKind::AddrInUse && attempts < 12 => {
|
||||
tracing::warn!(
|
||||
|
|
@ -120,6 +122,16 @@ async fn bind_with_retry(addr: SocketAddr, label: &str) -> Result<tokio::net::Tc
|
|||
}
|
||||
}
|
||||
|
||||
fn try_bind(addr: SocketAddr) -> std::io::Result<tokio::net::TcpListener> {
|
||||
let sock = match addr {
|
||||
SocketAddr::V4(_) => tokio::net::TcpSocket::new_v4()?,
|
||||
SocketAddr::V6(_) => tokio::net::TcpSocket::new_v6()?,
|
||||
};
|
||||
sock.set_reuseaddr(true)?;
|
||||
sock.bind(addr)?;
|
||||
sock.listen(1024)
|
||||
}
|
||||
|
||||
async fn serve_index() -> impl IntoResponse {
|
||||
(
|
||||
[("content-type", "text/html; charset=utf-8")],
|
||||
|
|
@ -158,6 +170,10 @@ struct StateSnapshot {
|
|||
/// client-side off this rather than tracking it from SSE events.
|
||||
turn_state: crate::events::TurnState,
|
||||
turn_state_since: i64,
|
||||
/// Currently-active claude model name. Reflected on the page so
|
||||
/// the operator can see what they just switched to (and what's
|
||||
/// in flight). Mutable at runtime via `POST /api/model`.
|
||||
model: String,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
|
|
@ -193,6 +209,7 @@ async fn api_state(State(state): State<AppState>) -> axum::Json<StateSnapshot> {
|
|||
.unwrap_or(7000);
|
||||
let inbox = recent_inbox(&state.socket, state.flavor).await;
|
||||
let (turn_state, turn_state_since) = state.bus.state_snapshot();
|
||||
let model = state.bus.model();
|
||||
axum::Json(StateSnapshot {
|
||||
label: state.label.clone(),
|
||||
dashboard_port,
|
||||
|
|
@ -201,6 +218,7 @@ async fn api_state(State(state): State<AppState>) -> axum::Json<StateSnapshot> {
|
|||
inbox,
|
||||
turn_state,
|
||||
turn_state_since,
|
||||
model,
|
||||
})
|
||||
}
|
||||
|
||||
|
|
@ -351,6 +369,30 @@ async fn post_login_cancel(State(state): State<AppState>) -> Response {
|
|||
/// the "/compact done" note) lands in the live event panel like any
|
||||
/// other turn. If a regular turn is in flight, claude's own session
|
||||
/// lock will reject this one and we surface the error as a Note.
|
||||
#[derive(Deserialize)]
|
||||
struct ModelForm {
|
||||
model: String,
|
||||
}
|
||||
|
||||
/// Switch the model for future turns. The current turn (if any)
|
||||
/// keeps its model; `/model <name>` applies starting with the next
|
||||
/// `recv` cycle. Empty / whitespace-only inputs are rejected. No
|
||||
/// claude-side validation — we just hand the string through to
|
||||
/// `claude --model <name>`; an unknown model surfaces as a turn
|
||||
/// failure in the live panel and the operator can revert.
|
||||
async fn post_set_model(State(state): State<AppState>, Form(form): Form<ModelForm>) -> Response {
|
||||
let name = form.model.trim();
|
||||
if name.is_empty() {
|
||||
return error_response("model: name required");
|
||||
}
|
||||
state.bus.set_model(name);
|
||||
state.bus.emit(crate::events::LiveEvent::Note(format!(
|
||||
"operator: /model — claude model set to '{name}' for future turns"
|
||||
)));
|
||||
tracing::info!(%name, "operator set model");
|
||||
Redirect::to("/").into_response()
|
||||
}
|
||||
|
||||
async fn post_compact(State(state): State<AppState>) -> Response {
|
||||
let bus = state.bus.clone();
|
||||
let socket = state.socket.clone();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue