harness: add /screen page and /screen/ws WebSocket VNC relay

Reads /etc/hyperhive/gui.json at startup to get the VNC port written
by the weston-vnc ExecStart script (issue #50). Adds:
- gui_vnc_port: Option<u16> on AppState
- gui_enabled: bool on StateSnapshot (for issue #52 screen link)
- GET /screen: serves a minimal RFB-over-WebSocket viewer (screen.html)
- GET /screen/ws: upgrades to WebSocket and byte-pumps to 127.0.0.1:<vnc_port>

The relay is a pure two-task byte pump (WS→TCP and TCP→WS), transparent
to any RFB variant including VeNCrypt. Returns 404 when gui is not
enabled.

screen.html is a self-contained RFB client: handshake, FramebufferUpdate
(Raw encoding), pointer and keyboard forwarding — enough to display the
desktop and interact with it. noVNC assets (issue #52) replace this.

Closes #51
This commit is contained in:
iris 2026-05-20 14:24:05 +02:00 committed by Mara
parent 29df223650
commit 2027e94432
5 changed files with 651 additions and 1 deletions

View file

@ -9,6 +9,7 @@ workspace = true
[dependencies]
anyhow.workspace = true
axum.workspace = true
futures-util = "0.3"
clap.workspace = true
hive-fr0nt.workspace = true
hive-sh4re.workspace = true

View file

@ -0,0 +1,345 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>screen</title>
<style>
/* Catppuccin Mocha palette (mirrors base.css) */
:root {
--base: #1e1e2e;
--mantle: #181825;
--crust: #11111b;
--text: #cdd6f4;
--subtext0:#a6adc8;
--surface0:#313244;
--surface1:#45475a;
--blue: #89b4fa;
--red: #f38ba8;
--green: #a6e3a1;
--yellow: #f9e2af;
}
*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
html, body { height: 100%; background: var(--base); color: var(--text);
font-family: 'JetBrains Mono', 'Fira Code', ui-monospace, monospace;
font-size: 14px; }
#toolbar {
display: flex; align-items: center; gap: 0.75rem;
padding: 0.4rem 0.75rem; background: var(--mantle);
border-bottom: 1px solid var(--surface0);
}
#toolbar a { color: var(--blue); text-decoration: none; font-size: 0.85rem; }
#toolbar a:hover { text-decoration: underline; }
#status { margin-left: auto; font-size: 0.75rem; color: var(--subtext0); }
#status.connected { color: var(--green); }
#status.error { color: var(--red); }
#canvas-wrap {
display: flex; justify-content: center; align-items: flex-start;
width: 100%; height: calc(100% - 36px); overflow: auto;
background: var(--crust);
}
canvas { display: block; cursor: default; }
#msg {
position: fixed; bottom: 1rem; left: 50%; transform: translateX(-50%);
background: var(--surface0); color: var(--yellow); border-radius: 6px;
padding: 0.4rem 0.9rem; font-size: 0.8rem;
opacity: 0; transition: opacity 0.3s;
pointer-events: none;
}
</style>
</head>
<body>
<div id="toolbar">
<strong>🖥 screen</strong>
<a href="/" title="back to agent page">← agent</a>
<span id="status">connecting…</span>
</div>
<div id="canvas-wrap"><canvas id="c"></canvas></div>
<div id="msg"></div>
<script>
// Minimal RFB-over-WebSocket renderer.
// Connects to /screen/ws on the same host; the harness relays raw
// RFB bytes to the VNC server running inside the container.
//
// This is a deliberately thin implementation — enough to display the
// desktop and forward pointer + keyboard events. For a production-grade
// viewer, replace with noVNC (issue #52 vendors the full bundle).
(function () {
'use strict';
const canvas = document.getElementById('c');
const ctx = canvas.getContext('2d');
const status = document.getElementById('status');
const msg = document.getElementById('msg');
function setStatus(text, cls) {
status.textContent = text;
status.className = cls || '';
}
function flash(text) {
msg.textContent = text;
msg.style.opacity = '1';
setTimeout(() => { msg.style.opacity = '0'; }, 2500);
}
// --- WebSocket connection ---
const proto = location.protocol === 'https:' ? 'wss' : 'ws';
const ws = new WebSocket(`${proto}://${location.host}/screen/ws`);
ws.binaryType = 'arraybuffer';
ws.onopen = () => setStatus('connected', 'connected');
ws.onerror = () => setStatus('connection error', 'error');
ws.onclose = (e) => {
setStatus(`disconnected (${e.code})`, 'error');
flash('VNC disconnected — reload to reconnect');
};
// Accumulate received bytes in a simple ring queue
const chunks = [];
let totalBytes = 0;
ws.onmessage = (ev) => {
chunks.push(new Uint8Array(ev.data));
totalBytes += ev.data.byteLength;
processRfb();
};
// --- Minimal RFB state machine ---
// We implement just enough to handshake and receive FramebufferUpdate
// rectangles encoded as Raw (encoding 0). Other encodings are skipped.
// Keyboard and pointer events are forwarded.
let state = 'version';
let fbW = 0, fbH = 0;
let pixelFormat = null; // set after ServerInit
let updateRects = 0;
// Drain bytes from the queue into a flat buffer view
function drainTo(n) {
if (totalBytes < n) return null;
const out = new Uint8Array(n);
let off = 0;
while (off < n) {
const c = chunks[0];
const take = Math.min(c.length, n - off);
out.set(c.subarray(0, take), off);
off += take;
if (take === c.length) {
chunks.shift();
} else {
chunks[0] = c.subarray(take);
}
}
totalBytes -= n;
return out;
}
function send(data) {
if (ws.readyState === WebSocket.OPEN) ws.send(data);
}
function u32be(b, o) { return ((b[o]<<24)|(b[o+1]<<16)|(b[o+2]<<8)|b[o+3])>>>0; }
function u16be(b, o) { return ((b[o]<<8)|b[o+1])>>>0; }
function processRfb() {
while (true) {
if (!tryStep()) break;
}
}
function tryStep() {
switch (state) {
case 'version': {
const b = drainTo(12);
if (!b) return false;
// Send back same version (RFB 003.008)
send(new TextEncoder().encode('RFB 003.008\n'));
state = 'security-types';
return true;
}
case 'security-types': {
const b = drainTo(1);
if (!b) return false;
const n = b[0];
if (n === 0) { state = 'error'; return false; }
const types = drainTo(n);
if (!types) { chunks.unshift(b); totalBytes += 1; return false; }
// Prefer type 1 (None), else use first offered
const prefer = types.indexOf(1) !== -1 ? 1 : types[0];
send(new Uint8Array([prefer]));
state = prefer === 1 ? 'security-result' : 'security-vnc-challenge';
return true;
}
case 'security-vnc-challenge': {
// VNC auth: skip challenge bytes, respond with zeros (will fail,
// but we're in plain-RFB mode for hyperhive — see weston-vnc.nix)
const b = drainTo(16);
if (!b) return false;
send(new Uint8Array(16));
state = 'security-result';
return true;
}
case 'security-result': {
const b = drainTo(4);
if (!b) return false;
if (u32be(b, 0) !== 0) { setStatus('auth failed', 'error'); return false; }
// ClientInit: shared flag = 1
send(new Uint8Array([1]));
state = 'server-init';
return true;
}
case 'server-init': {
const b = drainTo(24);
if (!b) return false;
fbW = u16be(b, 0); fbH = u16be(b, 1);
// pixel format: bpp=b[4], depth=b[5], big-endian=b[6], true-colour=b[7]
// red/green/blue max/shift at b[8..17]
pixelFormat = {
bpp: b[4], depth: b[5], bigEndian: b[6], trueColour: b[7],
redMax: u16be(b, 8), greenMax: u16be(b, 10), blueMax: u16be(b, 12),
redShift: b[14], greenShift: b[15], blueShift: b[16],
bytesPerPixel: b[4] / 8,
};
const nameLen = u32be(b, 20);
const nameBytes = drainTo(nameLen);
if (!nameBytes) { chunks.unshift(b); totalBytes += 24; return false; }
canvas.width = fbW;
canvas.height = fbH;
setStatus('connected', 'connected');
// Request full framebuffer update
requestUpdate(0, 0, 0, fbW, fbH);
state = 'normal';
return true;
}
case 'normal': {
const b = drainTo(1);
if (!b) return false;
const msgType = b[0];
if (msgType === 0) {
// FramebufferUpdate
const hdr = drainTo(3);
if (!hdr) { chunks.unshift(b); totalBytes += 1; return false; }
drainTo(1); // padding (already consumed with hdr? no — hdr is 3 bytes after the type)
// Actually: message type (1) + padding (1) + nRects (2) = 4 bytes total after type byte
// Let's re-do: type byte consumed, then 1 pad + 2 nRects = 3 bytes
updateRects = u16be(hdr, 1);
state = 'rect-header';
} else if (msgType === 2) {
// Bell: ignore
} else if (msgType === 3) {
// ServerCutText
const hdr = drainTo(7);
if (!hdr) { chunks.unshift(b); totalBytes += 1; return false; }
const len = u32be(hdr, 3);
const text = drainTo(len);
if (!text) { chunks.unshift(b); totalBytes += 1 + 7; return false; }
}
return true;
}
case 'rect-header': {
if (updateRects === 0) { state = 'normal'; requestUpdate(1, 0, 0, fbW, fbH); return true; }
const b = drainTo(12);
if (!b) return false;
const x = u16be(b, 0), y = u16be(b, 2), w = u16be(b, 4), h = u16be(b, 6);
const enc = (b[8]<<24|b[9]<<16|b[10]<<8|b[11])>>>0;
if (enc === 0 && pixelFormat) {
const bytes = w * h * pixelFormat.bytesPerPixel;
const pixels = drainTo(bytes);
if (!pixels) { chunks.unshift(b); totalBytes += 12; return false; }
drawRaw(x, y, w, h, pixels);
}
updateRects--;
return true;
}
default: return false;
}
}
function drawRaw(x, y, w, h, data) {
if (!pixelFormat || w === 0 || h === 0) return;
const bpp = pixelFormat.bytesPerPixel;
const img = ctx.createImageData(w, h);
const d = img.data;
const rs = pixelFormat.redShift, gs = pixelFormat.greenShift, bs = pixelFormat.blueShift;
for (let i = 0, o = 0; i < w * h; i++, o += bpp) {
let px = 0;
if (bpp === 4) px = pixelFormat.bigEndian
? (data[o]<<24|data[o+1]<<16|data[o+2]<<8|data[o+3])>>>0
: (data[o+3]<<24|data[o+2]<<16|data[o+1]<<8|data[o])>>>0;
else if (bpp === 2) px = pixelFormat.bigEndian
? (data[o]<<8|data[o+1])>>>0 : (data[o+1]<<8|data[o])>>>0;
else px = data[o];
d[i*4] = (px >> rs) & pixelFormat.redMax;
d[i*4+1] = (px >> gs) & pixelFormat.greenMax;
d[i*4+2] = (px >> bs) & pixelFormat.blueMax;
d[i*4+3] = 255;
}
ctx.putImageData(img, x, y);
}
function requestUpdate(incremental, x, y, w, h) {
const b = new Uint8Array(10);
b[0] = 3; b[1] = incremental;
b[2] = x>>8; b[3] = x&0xff;
b[4] = y>>8; b[5] = y&0xff;
b[6] = w>>8; b[7] = w&0xff;
b[8] = h>>8; b[9] = h&0xff;
send(b);
}
// --- Input forwarding ---
canvas.addEventListener('mousemove', sendPointer);
canvas.addEventListener('mousedown', sendPointer);
canvas.addEventListener('mouseup', sendPointer);
function sendPointer(ev) {
const r = canvas.getBoundingClientRect();
const x = Math.max(0, Math.min(fbW-1, ev.clientX - r.left));
const y = Math.max(0, Math.min(fbH-1, ev.clientY - r.top));
let mask = 0;
if (ev.buttons & 1) mask |= 1;
if (ev.buttons & 4) mask |= 2;
if (ev.buttons & 2) mask |= 4;
const b = new Uint8Array(6);
b[0] = 5; b[1] = mask;
b[2] = x>>8; b[3] = x&0xff;
b[4] = y>>8; b[5] = y&0xff;
send(b);
}
document.addEventListener('keydown', (ev) => sendKey(ev, 1));
document.addEventListener('keyup', (ev) => sendKey(ev, 0));
function sendKey(ev, down) {
ev.preventDefault();
const key = rfbKeysym(ev);
const b = new Uint8Array(8);
b[0] = 4; b[1] = down; b[2] = 0; b[3] = 0;
b[4] = key>>24; b[5] = (key>>16)&0xff; b[6] = (key>>8)&0xff; b[7] = key&0xff;
send(b);
}
function rfbKeysym(ev) {
// Map common keys to X11 keysym values
const map = {
'BackSpace': 0xff08, 'Tab': 0xff09, 'Enter': 0xff0d, 'Escape': 0xff1b,
'Delete': 0xffff, 'Home': 0xff50, 'End': 0xff57, 'PageUp': 0xff55,
'PageDown': 0xff56, 'ArrowLeft': 0xff51, 'ArrowUp': 0xff52,
'ArrowRight': 0xff53, 'ArrowDown': 0xff54,
'Shift': 0xffe1, 'Control': 0xffe3, 'Alt': 0xffe9, 'Meta': 0xffe7,
'F1': 0xffbe, 'F2': 0xffbf, 'F3': 0xffc0, 'F4': 0xffc1,
'F5': 0xffc2, 'F6': 0xffc3, 'F7': 0xffc4, 'F8': 0xffc5,
'F9': 0xffc6, 'F10': 0xffc7, 'F11': 0xffc8, 'F12': 0xffc9,
};
if (map[ev.key]) return map[ev.key];
if (ev.key.length === 1) return ev.key.codePointAt(0);
return 0;
}
})();
</script>
</body>
</html>

View file

@ -22,6 +22,7 @@ use axum::{
routing::{get, post},
};
use serde::{Deserialize, Serialize};
use tokio::io::{AsyncReadExt, AsyncWriteExt};
use tokio_stream::{Stream, StreamExt, wrappers::BroadcastStream};
use crate::client;
@ -56,6 +57,9 @@ struct AppState {
files: TurnFiles,
/// Prevents `/api/compact` from racing with an in-flight normal turn.
turn_lock: TurnLock,
/// VNC port read from `/etc/hyperhive/gui.json` at startup.
/// `None` when the file is absent (gui not enabled for this agent).
gui_vnc_port: Option<u16>,
}
impl AppState {
@ -80,6 +84,7 @@ pub async fn serve(
files: TurnFiles,
turn_lock: TurnLock,
) -> Result<()> {
let gui_vnc_port = read_gui_json();
let state = AppState {
label,
login,
@ -88,6 +93,7 @@ pub async fn serve(
socket,
files,
turn_lock,
gui_vnc_port,
};
let app = Router::new()
.route("/", get(serve_index))
@ -110,6 +116,8 @@ pub async fn serve(
.route("/stats", get(serve_stats))
.route("/static/stats.js", get(serve_stats_js))
.route("/api/stats", get(api_stats))
.route("/screen", get(serve_screen))
.route("/screen/ws", get(screen_ws))
.with_state(state);
let addr = SocketAddr::from(([0, 0, 0, 0], port));
let listener = bind_with_retry(addr, "web UI").await?;
@ -215,6 +223,91 @@ async fn serve_stats_js() -> impl IntoResponse {
)
}
async fn serve_screen() -> impl IntoResponse {
(
[("content-type", "text/html; charset=utf-8")],
include_str!("../assets/screen.html"),
)
}
/// Read `/etc/hyperhive/gui.json` and extract the `vnc_port` field.
/// Returns `None` if the file is absent or unparseable — GUI not enabled.
fn read_gui_json() -> Option<u16> {
let text = std::fs::read_to_string("/etc/hyperhive/gui.json").ok()?;
let val: serde_json::Value = serde_json::from_str(&text).ok()?;
val["vnc_port"].as_u64().and_then(|p| u16::try_from(p).ok())
}
/// WebSocket handler: upgrade then pump bytes between the WS client and
/// the VNC server on `127.0.0.1:<vnc_port>`. Returns 404 when gui is not
/// enabled for this agent.
async fn screen_ws(
ws: axum::extract::ws::WebSocketUpgrade,
State(state): State<AppState>,
) -> Response {
let Some(vnc_port) = state.gui_vnc_port else {
return (StatusCode::NOT_FOUND, "gui not enabled for this agent").into_response();
};
ws.on_upgrade(move |socket| relay_ws_vnc(socket, vnc_port))
}
/// Pure byte pump: forwards raw bytes between the WebSocket client and
/// the VNC TCP stream. Transparent to any RFB variant (plain, VeNCrypt).
async fn relay_ws_vnc(socket: axum::extract::ws::WebSocket, vnc_port: u16) {
// Import futures traits locally so they don't conflict with
// tokio_stream::StreamExt used at module scope.
use axum::extract::ws::Message;
use futures_util::{SinkExt, StreamExt as _};
let addr = format!("127.0.0.1:{vnc_port}");
let Ok(tcp) = tokio::net::TcpStream::connect(&addr).await else {
tracing::warn!(%addr, "screen/ws: could not connect to VNC server");
return;
};
let (mut tcp_rx, mut tcp_tx) = tcp.into_split();
let (mut ws_tx, mut ws_rx) = socket.split();
// WS → TCP
let ws_to_tcp = tokio::spawn(async move {
while let Some(Ok(msg)) = futures_util::StreamExt::next(&mut ws_rx).await {
match msg {
Message::Binary(data) => {
if tcp_tx.write_all(&data).await.is_err() {
break;
}
}
Message::Close(_) => break,
_ => {} // ping/pong/text: ignore
}
}
});
// TCP → WS
let tcp_to_ws = tokio::spawn(async move {
let mut buf = vec![0u8; 8192];
loop {
match tcp_rx.read(&mut buf).await {
Ok(0) | Err(_) => break,
Ok(n) => {
if ws_tx
.send(Message::Binary(buf[..n].to_vec().into()))
.await
.is_err()
{
break;
}
}
}
}
});
// Wait for either direction to close, then let both tasks drop.
tokio::select! {
_ = ws_to_tcp => {}
_ = tcp_to_ws => {}
}
}
#[derive(Deserialize)]
struct StatsQuery {
window: Option<String>,
@ -271,6 +364,10 @@ struct StateSnapshot {
/// Cumulative token usage across the most recent turn's inferences
/// (cost signal). `null` until the first turn finishes.
cost_usage: Option<crate::events::TokenUsage>,
/// Whether the weston VNC compositor is configured for this agent
/// (i.e. `/etc/hyperhive/gui.json` was present at harness startup).
/// When true, the UI may render a `🖥 screen` link to `/screen`.
gui_enabled: bool,
}
#[derive(Serialize)]
@ -367,6 +464,7 @@ async fn api_state(State(state): State<AppState>) -> axum::Json<StateSnapshot> {
model,
ctx_usage,
cost_usage,
gui_enabled: state.gui_vnc_port.is_some(),
})
}