hyperhive/nix/templates/harness-base.nix

{
  pkgs,
  lib,
  config,
  ...
}:
{
  # Shared scaffolding for any hyperhive harness container — both
  # sub-agents (`agent-base.nix`) and the manager (`manager.nix`) extend
  # this. The systemd service that actually runs the harness binary
  # differs per role and lives in the child module.

  # Optional feature modules. Each declares its own `hyperhive.*`
  # option(s), default-off, so every agent has them available but
  # only opts in from its own `agent.nix`.
  imports = [ ./weston-vnc.nix ];

  options.hyperhive.model = lib.mkOption {
    type = lib.types.str;
    default = "haiku";
    example = "sonnet";
    description = ''
      Default claude model for this agent. Sets the `HIVE_DEFAULT_MODEL`
      environment variable consumed by the harness at boot; if no
      persisted model choice exists in the agent's state dir the harness
      falls back to this value. The operator can still switch the model at
      runtime via the per-agent web UI — that choice is persisted to the
      state dir and takes precedence over this default until the agent is
      purged.

      Valid values are the short model names that `claude --model` accepts:
      `"haiku"`, `"sonnet"`, `"opus"` (or any future identifier). Context
      window sizes are looked up at runtime from the
      `HIVE_CONTEXT_WINDOW_TOKENS_<KEY_UPPER>` env vars injected by the
      meta flake; override sizes via `services.hive-c0re.contextWindowTokens`
      on the host.
    '';
  };

  options.hyperhive.allowedBashPatterns = lib.mkOption {
    type = lib.types.listOf lib.types.str;
    default = [ ];
    example = [
      "git *"
      "ls *"
      "cat /agents/*/state/*"
    ];
    description = ''
      Shell command patterns auto-approved for the `Bash` built-in tool.
      Empty list (the default) grants wholesale `Bash` approval —
      claude can run any shell command without a prompt. Non-empty list
      replaces `Bash` in `--allowedTools` with one `Bash(pattern)` entry
      per item; only commands matching a pattern are auto-approved; all
      others require confirmation (which in `--print` mode means they
      will not run). Use to sandbox agents to a known-safe command
      vocabulary.

      Patterns use the same glob syntax claude accepts in `Bash(…)`:
      `*` matches any string within a word, shell-style.
    '';
  };

  options.hyperhive.allowedRecipients = lib.mkOption {
    type = lib.types.listOf lib.types.str;
    default = [ ];
    example = [
      "alice"
      "manager"
    ];
    description = ''
      Names this agent is allowed to `send` to via
      `mcp__hyperhive__send`. Empty list (the default) means
      unrestricted — the agent can message any peer, the
      operator, or the manager. Non-empty list constrains the
      surface: only the listed names + the manager (always
      allowed) get through; anything else returns an error
      string to claude without touching the broker. The
      operator (`operator`) needs to be in the list if the
      agent should be able to surface output on the
      dashboard.

      Useful for sandboxing untrusted sub-agents — set
      `[ "manager" ]` to scope them to manager-only chatter.
      The manager itself is always exempt; this option only
      affects sub-agent `send`.
    '';
  };

  options.hyperhive.extraMcpServers = lib.mkOption {
    type = lib.types.attrsOf (
      lib.types.submodule {
        options = {
          command = lib.mkOption {
            type = lib.types.str;
            description = "Absolute path to the MCP server binary. Use `\${pkgs.foo}/bin/foo` or `/run/current-system/sw/bin/foo`.";
          };
          args = lib.mkOption {
            type = lib.types.listOf lib.types.str;
            default = [ ];
            description = "Args passed to the MCP server binary.";
          };
          env = lib.mkOption {
            type = lib.types.attrsOf lib.types.str;
            default = { };
            description = "Environment variables for the MCP server child process.";
          };
          allowedTools = lib.mkOption {
            type = lib.types.listOf lib.types.str;
            default = [ "*" ];
            example = [
              "send_message"
              "join_room"
            ];
            description = ''
              Tool names this MCP server is auto-approved to call via
              `--allowedTools`. Single entry `"*"` (the default) means
              "every tool from this server" — convenient but trusting.
              Tighten to a specific list when you only want a subset.
              Names are bare (e.g. `send_message`); the harness prepends
              `mcp__<server-key>__` at build time.
            '';
          };
        };
      }
    );
    default = { };
    example = lib.literalExpression ''
      {
        matrix = {
          command = "/run/current-system/sw/bin/mcp-matrix";
          args = [ "--config" "/state/matrix.toml" ];
          env.MATRIX_HOMESERVER = "https://matrix.example.org";
          allowedTools = [ "send_message" "join_room" ];
        };
      }
    '';
    description = ''
      Extra MCP servers claude sees alongside the hyperhive tool surface.
      Keys are the server names (claude addresses tools as
      `mcp__<key>__<tool>`). Rendered to `/etc/hyperhive/extra-mcp.json`
      at activation time; the harness reads that file at boot and merges
      it into `--mcp-config` + `--allowedTools`. Take effect on the
      agent's next harness restart (no operator approval needed beyond
      whatever brought the new agent.nix into deployed/*).
    '';
  };

  options.hyperhive.forge.url = lib.mkOption {
    type = lib.types.str;
    default = "http://localhost:3000";
    example = "http://forge.internal:3000";
    description = ''
      Base URL of the hyperhive-managed Forgejo. Used at container
      boot by a oneshot systemd unit that calls
      `tea login add --url <this> --token "$(cat /state/forge-token)"`
      so the agent's claude can shell out to `tea` without an extra
      auth dance. No-op when `/state/forge-token` is missing (i.e.
      hive-forge isn't running on the host).
    '';
  };

  options.hyperhive.forge.keepSubscriptions = lib.mkOption {
    type = lib.types.bool;
    default = true;
    description = ''
      When true (the default), the forge notification poller will NOT
      auto-unsubscribe from repo watches after delivering a
      "subscribed"-reason notification. Sub-agents keep their broad
      subscriptions so they stay informed about repos they contribute to.
      Set to false for agents (e.g. the manager) that use reason-based
      filtering and do not need firehose-level repo visibility — they will
      auto-unsubscribe after receiving a watched-repo notification.
    '';
  };

  options.hyperhive.dashboardLinks = lib.mkOption {
    type = lib.types.listOf (lib.types.submodule {
      options = {
        label = lib.mkOption {
          type = lib.types.str;
          description = "Display label for the link.";
        };
        icon = lib.mkOption {
          type = lib.types.str;
          default = "";
          description = "Optional icon emoji or short glyph.";
        };
        url = lib.mkOption {
          type = lib.types.str;
          description = "Full URL (may include a different port, e.g. http://localhost:9001/stats).";
        };
      };
    });
    default = [ ];
    example = lib.literalExpression ''
      [
        { label = "Stats"; icon = "📊"; url = "http://localhost:9001/stats"; }
      ]
    '';
    description = ''
      Extra navigation links surfaced on the hive-c0re dashboard card for
      this agent. Declare any additional web UI pages the agent exposes —
      stats pages, custom UIs, etc. hive-c0re reads the JSON file this
      option produces at each container-view snapshot and attaches the
      links to the agent card without any code changes.
    '';
  };

  options.hyperhive.claudeMarketplaces = lib.mkOption {
    type = lib.types.listOf lib.types.str;
    default = [ "anthropics/claude-plugins-official" ];
    example = [
      "anthropics/claude-plugins-official"
      "anthropics/claude-plugins-community"
    ];
    description = ''
      Claude Code plugin marketplaces to add at harness boot. Each
      entry is passed to `claude plugin marketplace add <source>`
      (`owner/repo`, full git URL, or local path). Idempotent —
      re-adding an existing marketplace is treated as success.
      Required before `hyperhive.claudePlugins` entries that
      reference a marketplace (e.g. `foo@claude-plugins-official`).
      Rendered to `/etc/hyperhive/claude-marketplaces.json`.

      Defaults to Anthropic's official marketplace; agents get it
      out of the box without any per-agent.nix wiring.
    '';
  };

  options.hyperhive.claudePlugins = lib.mkOption {
    type = lib.types.listOf lib.types.str;
    default = [ ];
    example = [
      "formatter@my-marketplace"
      "thinking-tools@anthropics"
    ];
    description = ''
      Claude Code plugins to install at harness boot. Each entry is
      passed verbatim to `claude plugin install <spec>` once per
      container start, before the turn loop opens. `claude plugin
      install` is expected to be idempotent, so reinstalling on every
      boot is cheap. Failures log a warning but do not abort boot — a
      missing plugin is preferable to a non-serving agent. Rendered to
      `/etc/hyperhive/claude-plugins.json`; the harness reads it via
      `plugins::install_configured`.
    '';
  };

  options.hyperhive.claudePluginsAutoUpdate = lib.mkOption {
    type = lib.types.bool;
    default = false;
    description = ''
      When true, the harness runs `claude plugin marketplace update`
      before installing plugins at boot, pulling the latest index from
      all configured marketplaces. Disabled by default — most agents
      want pinned plugin versions and the network round-trip adds to
      boot time. Enable for agents that should always install the latest
      available version of their plugins.
    '';
  };

  options.hyperhive.icon = lib.mkOption {
    type = lib.types.nullOr lib.types.path;
    default = null;
    example = lib.literalExpression "./icon.svg";
    description = ''
      Path to an SVG file used as this agent's icon — shown on the
      dashboard and the per-agent web UI (header + favicon). Commit
      the SVG into the agent's config repo next to `agent.nix` and
      reference it as a relative path (`./icon.svg`).

      When null (the default) the agent falls back to the shared
      hyperhive logo. The harness serves the icon (configured or
      default) at `GET /icon` on the per-agent web port.
    '';
  };

  options.hyperhive.autoCompact = lib.mkOption {
    type = lib.types.bool;
    default = true;
    description = ''
      Enable proactive watermark-based compaction. When `true` (the
      default) the harness automatically runs a notes-checkpoint turn
      followed by `/compact` once the context window crosses 75% of
      the model's limit, keeping later turns from hitting the hard
      overflow path. Set to `false` to disable proactive compaction
      entirely (`HIVE_COMPACT_WATERMARK_TOKENS=0`); the reactive path
      (compact-on-overflow when the session is already past the limit)
      still applies.

      Disable for agents that run large-context models (sonnet/opus)
      where the heuristic fires too early and discards useful history
      before the session is actually close to the limit.
    '';
  };

  config = {
    environment.etc."hyperhive/extra-mcp.json".text = builtins.toJSON config.hyperhive.extraMcpServers;

    # Operator-set per-agent icon (hyperhive.icon). When configured, the
    # SVG lands at /etc/hyperhive/icon.svg; the harness serves it at
    # GET /icon, falling back to the bundled hyperhive logo when absent.
    environment.etc."hyperhive/icon.svg" =
      lib.mkIf (config.hyperhive.icon != null) { source = config.hyperhive.icon; };

    environment.etc."hyperhive/bash-allow.json".text =
      builtins.toJSON config.hyperhive.allowedBashPatterns;

    environment.etc."hyperhive/send-allow.json".text =
      builtins.toJSON config.hyperhive.allowedRecipients;

    environment.etc."hyperhive/claude-plugins.json".text =
      builtins.toJSON config.hyperhive.claudePlugins;

    environment.etc."hyperhive/claude-marketplaces.json".text =
      builtins.toJSON config.hyperhive.claudeMarketplaces;

    environment.etc."hyperhive/claude-plugins-auto-update.json".text =
      builtins.toJSON config.hyperhive.claudePluginsAutoUpdate;

    # HIVE_DEFAULT_MODEL seeds the initial model selection when no persisted
    # model choice exists in the state dir. SHELL must be set so claude's
    # Bash tool finds a POSIX shell.
    # HIVE_CONTEXT_WINDOW_TOKENS_* are injected by the meta flake from the
    # host-level `services.hive-c0re.contextWindowTokens` option — not set here.
    environment.variables = {
      HIVE_DEFAULT_MODEL = config.hyperhive.model;
      SHELL = "${pkgs.bashInteractive}/bin/bash";
    } // lib.optionalAttrs (!config.hyperhive.autoCompact) {
      # Zero watermark disables proactive compaction; the reactive path
      # (compact-on-overflow) still fires when the session is truly full.
      HIVE_COMPACT_WATERMARK_TOKENS = "0";
    } // lib.optionalAttrs config.hyperhive.forge.keepSubscriptions {
      HIVE_FORGE_KEEP_SUBSCRIPTIONS = "1";
    };

    boot.isNspawnContainer = true;

    # Every agent gets flakes + the modern `nix` CLI out of the box.
    # Equivalent to passing `--extra-experimental-features 'nix-command
    # flakes'` on every invocation. Agents shell out to `nix build` /
    # `nix flake` constantly (devshells, ad-hoc evals, fetching their
    # own MCP-server flakes); without this they hit the "experimental
    # feature not enabled" wall on the first try.
    nix.settings.experimental-features = [
      "nix-command"
      "flakes"
    ];

    # Containers bind-mount the host's nix-daemon socket. The host daemon
    # may be configured with remote builders or strict sandbox settings
    # (sandbox-fallback = false) that make local `nix build` invocations
    # fail inside the container. Enable sandbox-fallback so builds that
    # can't set up the sandbox (no user-namespaces in nspawn) fall back
    # to unsandboxed local builds rather than failing outright.
    nix.settings.sandbox-fallback = true;

    # `claude-code` is unfree. Each per-agent container's nixosConfiguration
    # evaluates its own `nixpkgs` instance, so the operator's host-level
    # `nixpkgs.config.allowUnfreePredicate` does not propagate into here —
    # we have to allow it inside the container's config as well.
    nixpkgs.config.allowUnfreePredicate = pkg: builtins.elem (pkgs.lib.getName pkg) [ "claude-code" ];

    environment.systemPackages = with pkgs; [
      hyperhive
      claude-code
      bashInteractive
      coreutils-full
      # procps for pkill — used by the web UI's /api/cancel to SIGINT the
      # in-flight claude turn.
      procps
      # tea: gitea/forgejo CLI client. Configured at boot by the
      # tea-login oneshot below if /state/forge-token is present, so
      # claude can `tea repos create`, `tea pulls create`, etc.
      tea
      # jq: JSON processing in shell — useful for parsing API responses,
      # forge REST calls, sqlite output, etc.
      jq
      # curl: HTTP client for forge REST API and other web requests.
      curl
      # hive-forge <verb>: CLI wrapping common Forgejo REST API operations
      # (view, pr, issue, comment, assign, close, labels, branches, etc.)
      (pkgs.callPackage ../packages/hive-forge-tools.nix { })
    ];

    # One-shot: write tea's config.yml from the seeded forge token so
    # the agent can use `tea` without interactive prompts. Runs on
    # every boot so a rotated token (hive-c0re remints on each agent
    # rebuild) is always reflected. *Always* exits 0 — never fail a
    # NixOS switch-to-configuration over a missing/temperamental forge.
    systemd.services.tea-login = {
      description = "configure tea CLI from hive-forge token (best-effort)";
      wantedBy = [ "multi-user.target" ];
      after = [ "local-fs.target" ];
      serviceConfig = {
        Type = "oneshot";
        RemainAfterExit = true;
      };
      path = [
        pkgs.curl
        pkgs.python3
        pkgs.coreutils
      ];
      script = ''
                # No `set -e`: any subshell failure must not propagate.
                # A failed unit aborts `nixos-container update` which blocks rebuilds.
                FORGE_URL=${lib.escapeShellArg config.hyperhive.forge.url}
                # Manager bind-mounts state at /state; sub-agents at
                # /agents/<name>/state. Glob both — each container only sees
                # its own mount, so there is exactly one hit (or zero when
                # the forge hasn't been seeded yet).
                TOKEN_FILE=""
                for f in /state/forge-token /agents/*/state/forge-token; do
                  if [ -f "$f" ]; then
                    TOKEN_FILE="$f"
                    break
                  fi
                done
                if [ -z "$TOKEN_FILE" ]; then
                  echo "tea-login: no forge-token found; skipping"
                  exit 0
                fi
                TOKEN=$(cat "$TOKEN_FILE")
                # Resolve the agent username from the forge API.
                USER=$(curl -sf --max-time 5 \
                  -H "Authorization: token $TOKEN" \
                  "$FORGE_URL/api/v1/user" \
                  | python3 -c 'import sys,json; print(json.load(sys.stdin).get("login",""))' \
                  2>/dev/null || true)
                if [ -z "$USER" ]; then
                  echo "tea-login: could not resolve username from forge API; skipping"
                  exit 0
                fi
                # tea reads config from ~/.config/tea/config.yml (for root: /root/.config/tea/config.yml).
                # Write it directly so we control default:true and always
                # refresh a rotated token — no 'tea login add' interactive dance.
                # $HOME is unset in systemd service context (causing writes to
                # /.config/). Hardcode /root — always correct for NixOS containers
                # where the harness runs as root.
                CONFIG="/root/.config/tea/config.yml"
                mkdir -p "$(dirname "$CONFIG")" || true
                cat > "$CONFIG" << EOF
        logins:
            - name: forge
              url: $FORGE_URL
              token: $TOKEN
              default: true
              ssh_host: ""
              ssh_key: ""
              insecure: false
              ssh_agent: false
              user: $USER
        preferences:
            editor: false
            flag_defaults:
                remote: ""
        EOF
                echo "tea-login: configured for $FORGE_URL as $USER"
      '';
    };

    # One-shot: upload the agent's configured icon to its Forgejo user avatar
    # so the icon shows up on commits / PRs / issue comments in the forge.
    # Only runs when `/etc/hyperhive/icon.svg` is present (set via
    # `hyperhive.icon`). No-op when the forge is unreachable or the icon
    # is not set. *Always* exits 0.
    systemd.services.forge-avatar-sync = {
      description = "sync agent icon to Forgejo user avatar (best-effort)";
      wantedBy = [ "multi-user.target" ];
      after = [ "tea-login.service" ];
      serviceConfig = {
        Type = "oneshot";
        RemainAfterExit = true;
      };
      path = [
        pkgs.curl
        pkgs.coreutils
        pkgs.librsvg
      ];
      script = ''
        ICON=/etc/hyperhive/icon.svg
        if [ ! -f "$ICON" ]; then
          echo "forge-avatar-sync: no icon configured; skipping"
          exit 0
        fi
        FORGE_URL=${lib.escapeShellArg config.hyperhive.forge.url}
        TOKEN_FILE=""
        for f in /state/forge-token /agents/*/state/forge-token; do
          if [ -f "$f" ]; then
            TOKEN_FILE="$f"
            break
          fi
        done
        if [ -z "$TOKEN_FILE" ]; then
          echo "forge-avatar-sync: no forge-token found; skipping"
          exit 0
        fi
        TOKEN=$(cat "$TOKEN_FILE")
        # Rasterize SVG → PNG (Forgejo's Go image library can't decode SVG).
        PNG=$(mktemp --suffix=.png)
        if ! rsvg-convert -f png -w 512 -h 512 "$ICON" -o "$PNG" 2>/dev/null; then
          echo "forge-avatar-sync: rsvg-convert failed; skipping"
          rm -f "$PNG"
          exit 0
        fi
        IMAGE=$(base64 -w 0 < "$PNG")
        rm -f "$PNG"
        DATA_URI="data:image/png;base64,$IMAGE"
        RESP=$(curl -sf --max-time 10 \
          -X POST "$FORGE_URL/api/v1/user/avatar" \
          -H "Authorization: token $TOKEN" \
          -H "Content-Type: application/json" \
          -d "{\"image\":\"$DATA_URI\"}" \
          -w "\n%{http_code}" 2>/dev/null || true)
        CODE=$(printf '%s' "$RESP" | tail -1)
        if [ "$CODE" = "204" ] || [ "$CODE" = "200" ]; then
          echo "forge-avatar-sync: avatar uploaded (HTTP $CODE)"
        else
          echo "forge-avatar-sync: upload returned HTTP $CODE — skipping (non-fatal)"
        fi
      '';
    };

    # Write declared dashboardLinks to the state dir so hive-c0re can read
    # them without accessing the container's /etc/ from the host.
    # Runs every boot; idempotent (overwrite). Always exits 0.
    systemd.services.hive-dashboard-links = lib.mkIf (config.hyperhive.dashboardLinks != [ ]) {
      description = "write declarative dashboardLinks to agent state dir";
      wantedBy = [ "multi-user.target" ];
      serviceConfig = {
        Type = "oneshot";
        RemainAfterExit = true;
      };
      environment.LINKS_JSON = builtins.toJSON config.hyperhive.dashboardLinks;
      script = ''
        # Sub-agents have their state dir bind-mounted at /agents/<name>/state.
        # Use a glob — exactly one match per container at runtime.
        STATE_DIR=$(echo /agents/*/state)
        if [ ! -d "$STATE_DIR" ]; then
          echo "hive-dashboard-links: no state dir found at /agents/*/state; skipping"
          exit 0
        fi
        printf '%s' "$LINKS_JSON" > "$STATE_DIR/hyperhive-dashboard-links.json"
        echo "hive-dashboard-links: wrote $(printf '%s' "$LINKS_JSON" | wc -c) bytes to $STATE_DIR/hyperhive-dashboard-links.json"
      '';
    };

    # Git is needed by claude's Bash tool (for the agent <-> manager config
    # request flow) and by hive-c0re's own setup_applied / setup_proposed.
    # The per-agent `applied/<name>/flake.nix` overrides `user.name` and
    # `user.email` with the agent's identity — values here are `mkDefault`
    # so the per-agent override wins without needing `mkForce`.
    programs.git = {
      enable = true;
      config = {
        user = {
          name = lib.mkDefault "hyperhive";
          email = lib.mkDefault "hyperhive@local";
        };
        init.defaultBranch = lib.mkDefault "main";
      };
    };

    system.stateVersion = "25.11";
  };
}