#!/usr/bin/env bash set -euo pipefail AGENT_NAME=${1:-${MOSAIC_AGENT_NAME:-}} # Absent socket ⇒ the LITERAL default tmux socket (no -L). The roster's # socket_name is honored when set; absent never silently becomes mosaic-fleet # (spawn stays consistent with the onboarding cheat-sheet + fleet ps observe). MOSAIC_TMUX_SOCKET=${MOSAIC_TMUX_SOCKET:-} MOSAIC_AGENT_RUNTIME=${MOSAIC_AGENT_RUNTIME:-pi} MOSAIC_AGENT_MODEL=${MOSAIC_AGENT_MODEL:-} MOSAIC_AGENT_WORKDIR=${MOSAIC_AGENT_WORKDIR:-$HOME} MOSAIC_AGENT_COMMAND=${MOSAIC_AGENT_COMMAND:-} MOSAIC_HEARTBEAT_RUN_DIR=${MOSAIC_HEARTBEAT_RUN_DIR:-${MOSAIC_HOME:-$HOME/.config/mosaic}/fleet/run} MOSAIC_HEARTBEAT_INTERVAL=${MOSAIC_HEARTBEAT_INTERVAL:-15} if [ -z "$AGENT_NAME" ]; then echo "ERROR: agent name argument or MOSAIC_AGENT_NAME is required" >&2 exit 64 fi if ! command -v tmux >/dev/null 2>&1; then echo "ERROR: tmux is required" >&2 exit 69 fi # tmux wrapper: pass -L only when a socket is configured. An absent/empty socket # means the default tmux socket (no -L), keeping spawn == observe == cheat-sheet. _tmux() { if [ -n "$MOSAIC_TMUX_SOCKET" ]; then tmux -L "$MOSAIC_TMUX_SOCKET" "$@" else tmux "$@" fi } if _tmux has-session -t "=${AGENT_NAME}:0.0" 2>/dev/null; then echo "Mosaic agent session already running: $AGENT_NAME on socket ${MOSAIC_TMUX_SOCKET:-(default)}" exit 0 fi if [ -z "$MOSAIC_AGENT_COMMAND" ]; then # Map the roster's per-agent model_hint to `--model` so workers launch on the # configured model (e.g. pi on openai-codex/gpt-5.5:high). Omitted when unset. MOSAIC_AGENT_COMMAND="mosaic yolo $MOSAIC_AGENT_RUNTIME${MOSAIC_AGENT_MODEL:+ --model $MOSAIC_AGENT_MODEL}" fi # ── Derive a runtime-bin PATH prefix ───────────────────────────────────────── # Precedence: # 1. $MOSAIC_RUNTIME_BIN (explicit override) # 2. $(npm config get prefix)/bin (if npm is on PATH) # 3. Fallbacks: $HOME/.npm-global/bin and $HOME/.local/bin # # Only directories that already exist are included. The prefix is baked into # the pane command regardless of what the LAUNCHER process's $PATH contains, # because the tmux pane inherits the tmux SERVER environment (not this script's # environment). A dir on the launcher's PATH may be absent from the server PATH, # so every existing candidate must always be included. Dedup within the # constructed prefix avoids listing the same dir twice. _build_runtime_bin_prefix() { local candidates=() if [ -n "${MOSAIC_RUNTIME_BIN:-}" ]; then candidates+=("$MOSAIC_RUNTIME_BIN") fi if command -v npm >/dev/null 2>&1; then local npm_prefix npm_prefix=$(npm config get prefix 2>/dev/null) || true if [ -n "$npm_prefix" ]; then candidates+=("${npm_prefix}/bin") fi fi candidates+=("$HOME/.npm-global/bin") candidates+=("$HOME/.local/bin") local prefix="" for dir in "${candidates[@]}"; do [ -d "$dir" ] || continue if [ -z "$prefix" ]; then prefix="$dir" else case ":${prefix}:" in *":${dir}:"*) ;; # already in our prefix — skip *) prefix="${prefix}:${dir}" ;; esac fi done printf '%s' "$prefix" } MOSAIC_RUNTIME_BIN_PREFIX=$(_build_runtime_bin_prefix) # ── Build the pane command ──────────────────────────────────────────────────── # The pane command must: # - Export the augmented PATH so the runtime binary is found. # - exec the agent command so the runtime is the pane's foreground process # (makes `fleet ps` pane_current_command check reliable; no DRIFT false-positive). # # Quoting strategy: single-quote the inner shell snippet so that variable # references in MOSAIC_AGENT_COMMAND are NOT expanded here — they expand inside # the pane shell. However, MOSAIC_RUNTIME_BIN_PREFIX and PATH must be expanded # NOW (in this script) because the pane shell inherits the tmux server # environment, not this script's env. # # We build the snippet as a double-quoted here-string embedded in a printf call # to avoid nested quoting problems. # # MOSAIC_AGENT_NAME must also be exported INTO the pane: panes inherit the tmux # server environment (not this script's, and not the systemd unit's), so the # name would otherwise be empty in-pane and the runtime's native heartbeat # (which gates on MOSAIC_AGENT_NAME) would never fire. %q-quote it so it is a # safe single bash token regardless of the name's characters. AGENT_NAME_Q=$(printf '%q' "$AGENT_NAME") if [ -n "$MOSAIC_RUNTIME_BIN_PREFIX" ]; then PANE_SHELL_SNIPPET="export MOSAIC_AGENT_NAME=${AGENT_NAME_Q}; export PATH=\"${MOSAIC_RUNTIME_BIN_PREFIX}:\${PATH}\"; exec ${MOSAIC_AGENT_COMMAND}" else PANE_SHELL_SNIPPET="export MOSAIC_AGENT_NAME=${AGENT_NAME_Q}; exec ${MOSAIC_AGENT_COMMAND}" fi mkdir -p "$MOSAIC_AGENT_WORKDIR" # ── Pre-trust the workdir for the Claude runtime ───────────────────────────── # Claude Code shows a one-time "Is this a project you trust?" folder-trust gate # the first time it opens a directory. A fleet-launched agent has no human to # answer it, so the pane stalls forever at the prompt while its heartbeat keeps # reporting "healthy" (the pane process IS alive — it's just blocked). # # IMPORTANT: --dangerously-skip-permissions does NOT bypass this gate, and # neither does `trustedProjectDirectories` in settings.json (verified empirically # 2026-06-24). The ONLY thing the gate honors is the per-project record in # ~/.claude.json: projects[""].hasTrustDialogAccepted == true (exactly what # answering the prompt writes). So we pre-seed that record here. # # Idempotent, atomic, best-effort: any failure is non-fatal (the agent still # launches — worst case it stalls on the gate, i.e. the pre-fix status quo). # Only the claude runtime needs this; codex/pi have no such gate. _ensure_claude_workdir_trusted() { local workdir="$1" # The path claude keys on is the resolved cwd it is launched in. local rp rp=$(cd "$workdir" 2>/dev/null && pwd -P) || rp="$workdir" # ~/.claude.json lives next to the claude config dir; honor CLAUDE_CONFIG_DIR. local claude_json="${MOSAIC_CLAUDE_JSON:-${CLAUDE_CONFIG_DIR:+$CLAUDE_CONFIG_DIR/.claude.json}}" claude_json="${claude_json:-$HOME/.claude.json}" if ! command -v python3 >/dev/null 2>&1; then echo "WARNING: python3 not found; cannot pre-trust '$rp' for claude (agent may stall on the folder-trust gate)" >&2 return 1 fi # Serialize concurrent agent launches that share ~/.claude.json (flock if available). local lock="${claude_json}.mosaic-lock" _seed() { MOSAIC_CJ="$claude_json" MOSAIC_TRUST_DIR="$rp" python3 - <<'PY' import json, os, sys, tempfile cj = os.environ["MOSAIC_CJ"] d = os.environ["MOSAIC_TRUST_DIR"] try: data = json.load(open(cj)) if os.path.exists(cj) else {} if not isinstance(data, dict): data = {} except Exception: # Never corrupt an unreadable/partial file — bail without writing. sys.exit(2) projects = data.setdefault("projects", {}) entry = projects.get(d) if not isinstance(entry, dict): entry = {} projects[d] = entry if entry.get("hasTrustDialogAccepted") is True: sys.exit(0) # already trusted — nothing to do entry["hasTrustDialogAccepted"] = True tmp_dir = os.path.dirname(cj) or "." fd, tmp = tempfile.mkstemp(dir=tmp_dir, prefix=".claude.json.mosaic.") try: with os.fdopen(fd, "w") as f: json.dump(data, f, indent=2) os.replace(tmp, cj) # atomic except Exception: try: os.unlink(tmp) except OSError: pass sys.exit(3) PY } if command -v flock >/dev/null 2>&1; then ( flock 9; _seed ) 9>"$lock" 2>/dev/null || _seed else _seed fi } case "$MOSAIC_AGENT_RUNTIME" in claude) _ensure_claude_workdir_trusted "$MOSAIC_AGENT_WORKDIR" \ || echo "WARNING: could not pre-trust workdir for claude agent $AGENT_NAME" >&2 ;; esac # ── Launch the tmux session (no exec — we continue to wire the heartbeat) ──── _tmux new-session -d -s "$AGENT_NAME" -c "$MOSAIC_AGENT_WORKDIR" \ bash -c "$PANE_SHELL_SNIPPET" # ── Resolve the pane PID (retry briefly to let the session initialise) ──────── PANE_PID="" for _retry in 1 2 3 4 5; do PANE_PID=$(_tmux list-panes \ -t "=${AGENT_NAME}:0.0" -F '#{pane_pid}' 2>/dev/null || true) [ -n "$PANE_PID" ] && break sleep 0.2 done # ── Spawn the heartbeat sidecar (detached, best-effort) ────────────────────── # The sidecar writes ~/.config/mosaic/fleet/run/.hb atomically while the # pane process is alive, then exits so the file goes stale (fleet ps shows stale # then PANE=dead). It is runtime-agnostic: it only cares about the pane PID. _start_heartbeat_sidecar() { local agent="$1" local pane_pid="$2" local run_dir="$3" local interval="$4" local hb_file="${run_dir}/${agent}.hb" mkdir -p "$run_dir" # Write the sidecar as a self-contained bash one-liner so it carries no # references to any variables from this script's environment. local sidecar_script sidecar_script=$(printf \ 'hb=%q; pid=%q; iv=%q; mkdir -p "$(dirname "$hb")"; while kill -0 "$pid" 2>/dev/null; do nat="$hb.native"; if [ -f "$nat" ] && [ "$(( $(date +%%s) - $(stat -c %%Y "$nat" 2>/dev/null || echo 0) ))" -lt "$(( iv * 2 ))" ]; then sleep "$iv"; continue; fi; tmp="$hb.tmp.$$"; printf "ts=%%s\npid=%%s\nstatus=ok\n" "$(date +%%Y-%%m-%%dT%%H:%%M:%%S%%z)" "$pid" > "$tmp" && mv "$tmp" "$hb"; sleep "$iv"; done' \ "$hb_file" "$pane_pid" "$interval") # setsid + disown ensures the sidecar survives this script exiting. # stderr/stdout go to /dev/null; failures are non-fatal. if command -v setsid >/dev/null 2>&1; then setsid bash -c "$sidecar_script" /dev/null 2>&1 & else bash -c "$sidecar_script" /dev/null 2>&1 & fi disown $! 2>/dev/null || true } if [ -n "$PANE_PID" ]; then # Guard: do not let sidecar startup failures abort the launcher (set -e). _start_heartbeat_sidecar "$AGENT_NAME" "$PANE_PID" \ "$MOSAIC_HEARTBEAT_RUN_DIR" "$MOSAIC_HEARTBEAT_INTERVAL" || \ echo "WARNING: heartbeat sidecar could not be started for $AGENT_NAME" >&2 else echo "WARNING: could not resolve pane PID for $AGENT_NAME — heartbeat sidecar not started" >&2 fi