Compare commits
4 Commits
feat/fleet
...
feat/fleet
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
10d65c4a10 | ||
| c2c0b5fe8d | |||
| c9cfe36204 | |||
| fc90c89913 |
@@ -115,6 +115,11 @@ Every artifact, starting Phase 2, MUST:
|
||||
- Observation: **read-only default, opt-in takeover**.
|
||||
- Multi-host: **designed-for from day one**; control plane **rides federation (W1)**.
|
||||
- Delivery: **CLI-first now**, dogfood against the live stub fleet; webUI deferred to Phase 5.
|
||||
- Runtimes: fleet agents default to **Codex / pi-on-Codex**; **Claude is reserved for Claude
|
||||
Code only** (avoid alternate-harness API pricing). Validated durable recipe:
|
||||
`mosaic yolo pi --model openai-codex/gpt-5.5:high`. Durable detached launch requires the
|
||||
runtime-bin on PATH (baked into the pane command) + boot-survival (`enable` + linger),
|
||||
which `fleet init` should automate.
|
||||
|
||||
## Assumptions (veto-able)
|
||||
|
||||
|
||||
@@ -73,3 +73,28 @@ with a second agent on `dragon-lin`.
|
||||
tmux session-name fallback; the systemd/tmux env handoff needs a real fix.
|
||||
- Next: rebase on merged main, open Phase-2 PR, dual-engine review, merge, close
|
||||
`fleet-observability-1`. Defer launch-path + env-propagation fixes to Phase 3.
|
||||
- 2026-06-21 (session 3): Phase-2 PR #579 merged (3 dual-engine rounds hardened
|
||||
verify+watch). Then closed the launch-path question with Jason's input — CORRECTING
|
||||
earlier findings:
|
||||
- The ad-hoc launch deaths were NOT a fundamental TTY blocker: (a) codex was a stale
|
||||
version (Jason updated it); (b) pi was misconfigured to Claude auth (Jason removed it;
|
||||
default is now Codex). The REAL durable-launch bug is **PATH**: the detached tmux
|
||||
launch shell is login+non-interactive, so it misses `~/.npm-global/bin` (added only in
|
||||
`~/.bashrc`) -> `mosaic: command not found` (127) -> pane dies. tmux panes inherit the
|
||||
tmux _server_ env, so PATH must be baked into the pane command.
|
||||
- **Durable real-agent recipe (validated live on gpt-5.5, Claude-free):**
|
||||
`mosaic yolo pi --model openai-codex/gpt-5.5:high` — pi tolerates detached tmux; a raw
|
||||
interactive TUI (codex CLI) exits without an attached client. Status line confirmed
|
||||
`(openai-codex) gpt-5.5 • high`.
|
||||
- PATH fix landed in `start-agent-session.sh` (commit 32efc13, branch
|
||||
feat/fleet-launch-path): derive runtime-bin prefix (MOSAIC_RUNTIME_BIN | npm prefix |
|
||||
~/.npm-global/bin | ~/.local/bin), bake `export PATH=...; exec <cmd>` into the pane;
|
||||
`exec` also fixes the drift false-positive. Live-tested under stripped PATH -> durable.
|
||||
- Boot-survival: Jason ran `systemctl --user enable` (+ linger). TODO: auto-enable in
|
||||
**fleet init** so operators never have to remember it (agentic-enhancement cycle).
|
||||
- Future custom Pi harness build: pi cannot self-report its model (track
|
||||
runtime/model/effort as fleet metadata); drift detection should recognize `node` as
|
||||
pi's pane command (a node-wrapped pane can currently read as drift).
|
||||
- Findings recorded in AI Guide playbooks/tmux-fleet.md (aiguide PR #7, merged).
|
||||
- Policy: avoid Claude outside Claude Code (API pricing for alt-harness use) — fleet
|
||||
runtimes default to Codex / pi-on-Codex; Claude stays in Claude Code only.
|
||||
|
||||
@@ -70,6 +70,9 @@ Skills, hooks, MCP, and plugins are force multipliers you MUST use when applicab
|
||||
## Missing core file
|
||||
|
||||
If `CONSTITUTION.md`, `AGENTS.md`, `SOUL.md`, or the runtime contract is missing, stop and report it.
|
||||
This agent-facing strictness is intentional and stricter than the launcher: the launcher injects
|
||||
`CONSTITUTION.md` tolerantly (skipping it if absent so pre-upgrade hosts keep working), but once a host
|
||||
is re-seeded a genuinely missing core file is a stop-and-report condition — not something to proceed past.
|
||||
|
||||
## Session Closure
|
||||
|
||||
|
||||
@@ -2,8 +2,11 @@
|
||||
|
||||
The irreducible, non-negotiable law for every Mosaic agent on every harness.
|
||||
|
||||
**Framework-owned.** This file is overwritten verbatim on every upgrade — do not edit it. To change
|
||||
behavior, add a `.local.md` overlay or a `policy/` file (tighten-only; see `constitution/LAYER-MODEL.md`).
|
||||
**Framework-owned.** This file is overwritten verbatim on every upgrade — do not edit it. There is
|
||||
**no `CONSTITUTION.local.md`**: hard gates are not locally overridable. A lower layer may only make
|
||||
behavior _stricter_, never relax or override a gate (see Precedence). Operator customization lives in
|
||||
other layers — `SOUL.md` / `USER.md` and the tighten-only overlays `STANDARDS.local.md` /
|
||||
`SOUL.local.md` / `USER.local.md` / `policy/*.md` (see `constitution/LAYER-MODEL.md`).
|
||||
Authored in **capability verbs**: where a gate names a capability ("structured reasoning", "queue
|
||||
guard"), the runtime adapter binds it to a concrete tool and states whether absence is a hard stop.
|
||||
|
||||
|
||||
@@ -26,5 +26,75 @@ if [ -z "$MOSAIC_AGENT_COMMAND" ]; then
|
||||
MOSAIC_AGENT_COMMAND="mosaic yolo $MOSAIC_AGENT_RUNTIME"
|
||||
fi
|
||||
|
||||
# ── Derive a runtime-bin PATH prefix ─────────────────────────────────────────
|
||||
# Precedence:
|
||||
# 1. $MOSAIC_RUNTIME_BIN (explicit override)
|
||||
# 2. $(npm config get prefix)/bin (if npm is on PATH)
|
||||
# 3. Fallbacks: $HOME/.npm-global/bin and $HOME/.local/bin
|
||||
#
|
||||
# Only directories that already exist are included. The prefix is baked into
|
||||
# the pane command regardless of what the LAUNCHER process's $PATH contains,
|
||||
# because the tmux pane inherits the tmux SERVER environment (not this script's
|
||||
# environment). A dir on the launcher's PATH may be absent from the server PATH,
|
||||
# so every existing candidate must always be included. Dedup within the
|
||||
# constructed prefix avoids listing the same dir twice.
|
||||
_build_runtime_bin_prefix() {
|
||||
local candidates=()
|
||||
|
||||
if [ -n "${MOSAIC_RUNTIME_BIN:-}" ]; then
|
||||
candidates+=("$MOSAIC_RUNTIME_BIN")
|
||||
fi
|
||||
|
||||
if command -v npm >/dev/null 2>&1; then
|
||||
local npm_prefix
|
||||
npm_prefix=$(npm config get prefix 2>/dev/null) || true
|
||||
if [ -n "$npm_prefix" ]; then
|
||||
candidates+=("${npm_prefix}/bin")
|
||||
fi
|
||||
fi
|
||||
|
||||
candidates+=("$HOME/.npm-global/bin")
|
||||
candidates+=("$HOME/.local/bin")
|
||||
|
||||
local prefix=""
|
||||
for dir in "${candidates[@]}"; do
|
||||
[ -d "$dir" ] || continue
|
||||
if [ -z "$prefix" ]; then
|
||||
prefix="$dir"
|
||||
else
|
||||
case ":${prefix}:" in
|
||||
*":${dir}:"*) ;; # already in our prefix — skip
|
||||
*) prefix="${prefix}:${dir}" ;;
|
||||
esac
|
||||
fi
|
||||
done
|
||||
|
||||
printf '%s' "$prefix"
|
||||
}
|
||||
|
||||
MOSAIC_RUNTIME_BIN_PREFIX=$(_build_runtime_bin_prefix)
|
||||
|
||||
# ── Build the pane command ────────────────────────────────────────────────────
|
||||
# The pane command must:
|
||||
# - Export the augmented PATH so the runtime binary is found.
|
||||
# - exec the agent command so the runtime is the pane's foreground process
|
||||
# (makes `fleet ps` pane_current_command check reliable; no DRIFT false-positive).
|
||||
#
|
||||
# Quoting strategy: single-quote the inner shell snippet so that variable
|
||||
# references in MOSAIC_AGENT_COMMAND are NOT expanded here — they expand inside
|
||||
# the pane shell. However, MOSAIC_RUNTIME_BIN_PREFIX and PATH must be expanded
|
||||
# NOW (in this script) because the pane shell inherits the tmux server
|
||||
# environment, not this script's env.
|
||||
#
|
||||
# We build the snippet as a double-quoted here-string embedded in a printf call
|
||||
# to avoid nested quoting problems.
|
||||
|
||||
if [ -n "$MOSAIC_RUNTIME_BIN_PREFIX" ]; then
|
||||
PANE_SHELL_SNIPPET="export PATH=\"${MOSAIC_RUNTIME_BIN_PREFIX}:\${PATH}\"; exec ${MOSAIC_AGENT_COMMAND}"
|
||||
else
|
||||
PANE_SHELL_SNIPPET="exec ${MOSAIC_AGENT_COMMAND}"
|
||||
fi
|
||||
|
||||
mkdir -p "$MOSAIC_AGENT_WORKDIR"
|
||||
exec tmux -L "$MOSAIC_TMUX_SOCKET" new-session -d -s "$AGENT_NAME" -c "$MOSAIC_AGENT_WORKDIR" "$MOSAIC_AGENT_COMMAND"
|
||||
exec tmux -L "$MOSAIC_TMUX_SOCKET" new-session -d -s "$AGENT_NAME" -c "$MOSAIC_AGENT_WORKDIR" \
|
||||
bash -c "$PANE_SHELL_SNIPPET"
|
||||
|
||||
@@ -6,13 +6,26 @@ START="$SCRIPT_DIR/start-agent-session.sh"
|
||||
SOCKET="mosaic-agent-test-$RANDOM-$$"
|
||||
AGENT="agent-$RANDOM"
|
||||
WORKDIR=$(mktemp -d)
|
||||
trap 'tmux -L "$SOCKET" kill-server >/dev/null 2>&1 || true; rm -rf "$WORKDIR"' EXIT
|
||||
|
||||
# Keep a single cleanup trap that accumulates resources.
|
||||
CLEANUP_DIRS=("$WORKDIR")
|
||||
CLEANUP_SOCKETS=("$SOCKET")
|
||||
trap '_cleanup' EXIT
|
||||
_cleanup() {
|
||||
for s in "${CLEANUP_SOCKETS[@]:-}"; do
|
||||
tmux -L "$s" kill-server >/dev/null 2>&1 || true
|
||||
done
|
||||
for d in "${CLEANUP_DIRS[@]:-}"; do
|
||||
rm -rf "$d"
|
||||
done
|
||||
}
|
||||
|
||||
fail() {
|
||||
echo "FAIL: $*" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
# ── Test 1: basic session creation with workdir check ─────────────────────────
|
||||
MOSAIC_TMUX_SOCKET="$SOCKET" \
|
||||
MOSAIC_AGENT_WORKDIR="$WORKDIR" \
|
||||
MOSAIC_AGENT_COMMAND='bash --noprofile --norc -i' \
|
||||
@@ -22,6 +35,7 @@ tmux -L "$SOCKET" has-session -t "=$AGENT:0.0" || fail "agent session was not cr
|
||||
actual_dir=$(tmux -L "$SOCKET" display-message -p -t "=$AGENT:0.0" '#{pane_current_path}')
|
||||
[ "$actual_dir" = "$WORKDIR" ] || fail "agent workdir mismatch: $actual_dir"
|
||||
|
||||
# ── Test 2: idempotency (duplicate start prints 'already running') ─────────────
|
||||
MOSAIC_TMUX_SOCKET="$SOCKET" \
|
||||
MOSAIC_AGENT_WORKDIR="$WORKDIR" \
|
||||
MOSAIC_AGENT_COMMAND='bash --noprofile --norc -i' \
|
||||
@@ -29,4 +43,166 @@ MOSAIC_AGENT_COMMAND='bash --noprofile --norc -i' \
|
||||
|
||||
grep -qF 'already running' /tmp/mosaic-start-agent-idempotent.out || fail "duplicate start was not idempotent"
|
||||
|
||||
# ── Test 3: runtime-bin PATH prefix is baked into the pane command ────────────
|
||||
#
|
||||
# We capture the command the script would hand to tmux by injecting a fake
|
||||
# 'tmux' shim into PATH. The shim:
|
||||
# - Intercepts 'new-session' calls and records its arguments to a file.
|
||||
# - For 'has-session' calls, exits 1 (session does not exist) so the script
|
||||
# proceeds to launch instead of printing "already running".
|
||||
# - For all other subcommands, exits 0.
|
||||
#
|
||||
# Assertions:
|
||||
# a) 'export PATH=' with the synthetic MOSAIC_RUNTIME_BIN prefix appears.
|
||||
# b) 'exec' appears so the runtime replaces the wrapper shell.
|
||||
# c) MOSAIC_AGENT_COMMAND with flags is forwarded intact.
|
||||
|
||||
FAKE_BIN=$(mktemp -d)
|
||||
FAKE_RUNTIME_BIN=$(mktemp -d)
|
||||
TMUX_ARGS_FILE=$(mktemp)
|
||||
CLEANUP_DIRS+=("$FAKE_BIN" "$FAKE_RUNTIME_BIN")
|
||||
|
||||
# Write the fake tmux shim (uses only positional args, no sourced vars).
|
||||
cat > "$FAKE_BIN/tmux" <<SHIM
|
||||
#!/usr/bin/env bash
|
||||
# Fake tmux: record new-session args; report has-session as missing.
|
||||
subcmd="\$3" # argv: tmux -L <socket> <subcmd> ...
|
||||
if [ "\$subcmd" = "has-session" ]; then
|
||||
exit 1 # session not found → script will attempt new-session
|
||||
fi
|
||||
if [ "\$subcmd" = "new-session" ]; then
|
||||
printf '%s\n' "\$@" > "$TMUX_ARGS_FILE"
|
||||
exit 0
|
||||
fi
|
||||
exit 0
|
||||
SHIM
|
||||
chmod +x "$FAKE_BIN/tmux"
|
||||
|
||||
SOCKET3="mosaic-agent-test3-$RANDOM-$$"
|
||||
AGENT3="agent3-$RANDOM"
|
||||
WORKDIR3=$(mktemp -d)
|
||||
CLEANUP_DIRS+=("$WORKDIR3")
|
||||
|
||||
PATH="$FAKE_BIN:$PATH" \
|
||||
MOSAIC_TMUX_SOCKET="$SOCKET3" \
|
||||
MOSAIC_AGENT_WORKDIR="$WORKDIR3" \
|
||||
MOSAIC_AGENT_RUNTIME="pi" \
|
||||
MOSAIC_RUNTIME_BIN="$FAKE_RUNTIME_BIN" \
|
||||
MOSAIC_AGENT_COMMAND="mosaic yolo pi --model openai-codex/gpt-5.5:high" \
|
||||
"$START" "$AGENT3"
|
||||
|
||||
all_args=$(cat "$TMUX_ARGS_FILE" 2>/dev/null || true)
|
||||
rm -f "$TMUX_ARGS_FILE"
|
||||
|
||||
echo "--- captured tmux new-session args ---"
|
||||
echo "$all_args"
|
||||
echo "--- end args ---"
|
||||
|
||||
# a) PATH prefix containing FAKE_RUNTIME_BIN must appear.
|
||||
echo "$all_args" | grep -qF "export PATH=" || fail "pane command does not export PATH"
|
||||
echo "$all_args" | grep -qF "$FAKE_RUNTIME_BIN" || fail "pane command does not include MOSAIC_RUNTIME_BIN in PATH prefix"
|
||||
|
||||
# b) exec must appear so the runtime replaces the wrapper shell.
|
||||
echo "$all_args" | grep -qF "exec " || fail "pane command does not use exec"
|
||||
|
||||
# c) Full MOSAIC_AGENT_COMMAND (with flags) must be forwarded.
|
||||
echo "$all_args" | grep -qF "mosaic yolo pi --model openai-codex/gpt-5.5:high" || \
|
||||
fail "pane command does not forward MOSAIC_AGENT_COMMAND with flags intact"
|
||||
|
||||
# ── Test 4: when no extra runtime-bin dirs exist, exec still appears ───────────
|
||||
TMUX_ARGS_FILE2=$(mktemp)
|
||||
FAKE_BIN2=$(mktemp -d)
|
||||
CLEANUP_DIRS+=("$FAKE_BIN2")
|
||||
|
||||
cat > "$FAKE_BIN2/tmux" <<SHIM2
|
||||
#!/usr/bin/env bash
|
||||
subcmd="\$3"
|
||||
if [ "\$subcmd" = "has-session" ]; then exit 1; fi
|
||||
if [ "\$subcmd" = "new-session" ]; then
|
||||
printf '%s\n' "\$@" > "$TMUX_ARGS_FILE2"
|
||||
exit 0
|
||||
fi
|
||||
exit 0
|
||||
SHIM2
|
||||
chmod +x "$FAKE_BIN2/tmux"
|
||||
|
||||
SOCKET4="mosaic-agent-test4-$RANDOM-$$"
|
||||
AGENT4="agent4-$RANDOM"
|
||||
WORKDIR4=$(mktemp -d)
|
||||
CLEANUP_DIRS+=("$WORKDIR4")
|
||||
|
||||
# MOSAIC_RUNTIME_BIN points to a non-existent dir so prefix will be empty;
|
||||
# .npm-global/bin and .local/bin may or may not exist but we just want exec.
|
||||
PATH="$FAKE_BIN2:$PATH" \
|
||||
MOSAIC_TMUX_SOCKET="$SOCKET4" \
|
||||
MOSAIC_AGENT_WORKDIR="$WORKDIR4" \
|
||||
MOSAIC_AGENT_RUNTIME="pi" \
|
||||
MOSAIC_RUNTIME_BIN="/nonexistent-dir-$$" \
|
||||
MOSAIC_AGENT_COMMAND="mosaic yolo pi" \
|
||||
"$START" "$AGENT4"
|
||||
|
||||
all_args4=$(cat "$TMUX_ARGS_FILE2" 2>/dev/null || true)
|
||||
rm -f "$TMUX_ARGS_FILE2"
|
||||
rm -rf "$WORKDIR4"
|
||||
|
||||
echo "$all_args4" | grep -qF "exec " || fail "pane command (no prefix dirs) does not use exec"
|
||||
echo "$all_args4" | grep -qF "mosaic yolo pi" || fail "pane command does not include agent command when no prefix"
|
||||
|
||||
# ── Test 5: candidate dir already in LAUNCHER $PATH is still baked into pane ──
|
||||
#
|
||||
# Regression guard for the bug where _build_runtime_bin_prefix() used to skip
|
||||
# a candidate because it was already present in the launcher process's $PATH.
|
||||
# That check was wrong: the pane inherits the tmux SERVER environment, not the
|
||||
# launcher's env. Even if a dir is on the launcher's PATH it must always be
|
||||
# baked into the pane's PATH export.
|
||||
#
|
||||
# We prove this by setting PATH to include FAKE_RUNTIME_BIN5 (the candidate),
|
||||
# then asserting the generated new-session command still exports it.
|
||||
TMUX_ARGS_FILE5=$(mktemp)
|
||||
FAKE_BIN5=$(mktemp -d)
|
||||
FAKE_RUNTIME_BIN5=$(mktemp -d) # this dir IS on the launcher's PATH below
|
||||
CLEANUP_DIRS+=("$FAKE_BIN5" "$FAKE_RUNTIME_BIN5")
|
||||
|
||||
cat > "$FAKE_BIN5/tmux" <<SHIM5
|
||||
#!/usr/bin/env bash
|
||||
subcmd="\$3"
|
||||
if [ "\$subcmd" = "has-session" ]; then exit 1; fi
|
||||
if [ "\$subcmd" = "new-session" ]; then
|
||||
printf '%s\n' "\$@" > "$TMUX_ARGS_FILE5"
|
||||
exit 0
|
||||
fi
|
||||
exit 0
|
||||
SHIM5
|
||||
chmod +x "$FAKE_BIN5/tmux"
|
||||
|
||||
SOCKET5="mosaic-agent-test5-$RANDOM-$$"
|
||||
AGENT5="agent5-$RANDOM"
|
||||
WORKDIR5=$(mktemp -d)
|
||||
CLEANUP_DIRS+=("$WORKDIR5")
|
||||
CLEANUP_SOCKETS+=("$SOCKET5")
|
||||
|
||||
# FAKE_RUNTIME_BIN5 is deliberately placed on the LAUNCHER PATH so that the
|
||||
# old (buggy) code would have skipped it. The correct code must still include
|
||||
# it in the pane PATH export.
|
||||
PATH="$FAKE_BIN5:$FAKE_RUNTIME_BIN5:$PATH" \
|
||||
MOSAIC_TMUX_SOCKET="$SOCKET5" \
|
||||
MOSAIC_AGENT_WORKDIR="$WORKDIR5" \
|
||||
MOSAIC_AGENT_RUNTIME="pi" \
|
||||
MOSAIC_RUNTIME_BIN="$FAKE_RUNTIME_BIN5" \
|
||||
MOSAIC_AGENT_COMMAND="mosaic yolo pi" \
|
||||
"$START" "$AGENT5"
|
||||
|
||||
all_args5=$(cat "$TMUX_ARGS_FILE5" 2>/dev/null || true)
|
||||
rm -f "$TMUX_ARGS_FILE5"
|
||||
rm -rf "$WORKDIR5"
|
||||
|
||||
echo "--- test 5: launcher-PATH candidate must still appear in pane export ---"
|
||||
echo "$all_args5"
|
||||
echo "--- end test 5 args ---"
|
||||
|
||||
echo "$all_args5" | grep -qF "export PATH=" || \
|
||||
fail "test5: pane command does not export PATH when candidate is on launcher PATH"
|
||||
echo "$all_args5" | grep -qF "$FAKE_RUNTIME_BIN5" || \
|
||||
fail "test5: candidate dir (already on launcher PATH) was NOT baked into pane PATH — regression"
|
||||
|
||||
echo "ok - start-agent-session"
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
# 2. STRUCTURAL (private $HOME default in *.sh) — scanned everywhere EXCEPT examples/,
|
||||
# because worked example overlays/personas legitimately show placeholder paths.
|
||||
#
|
||||
# File types: *.md, *.sh, *.ps1, *.json, and the extensionless CLI scripts under
|
||||
# File types: *.md, *.sh, *.ps1, *.json, *.yml/*.yaml, *.toml, *.env, *.service, and the CLI scripts under
|
||||
# tools/_scripts/. Excludes node_modules/ and this gate file.
|
||||
#
|
||||
# NOTE: '\bPDA\b' intentionally matches "PDA-friendly" (the contamination removed in P2);
|
||||
@@ -39,7 +39,7 @@ cd "$FRAMEWORK_ROOT" || { echo "FRAMEWORK_ROOT not found: $FRAMEWORK_ROOT" >&2;
|
||||
# Identity scope = ALL shipped text files (examples/ INCLUDED).
|
||||
_files_identity() {
|
||||
find . -type f \
|
||||
\( -name '*.md' -o -name '*.sh' -o -name '*.ps1' -o -name '*.json' -o -path '*/tools/_scripts/*' \) \
|
||||
\( -name '*.md' -o -name '*.sh' -o -name '*.ps1' -o -name '*.json' -o -name '*.yml' -o -name '*.yaml' -o -name '*.toml' -o -name '*.env' -o -name '*.service' -o -path '*/tools/_scripts/*' \) \
|
||||
-not -path '*/node_modules/*' -not -path "./$SELF_REL" -print0
|
||||
}
|
||||
# Structural scope = shipped scripts, examples/ EXCLUDED.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "@mosaicstack/mosaic",
|
||||
"version": "0.0.34",
|
||||
"version": "0.0.35",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://git.mosaicstack.dev/mosaicstack/stack.git",
|
||||
|
||||
@@ -10,11 +10,14 @@ import {
|
||||
buildAgentWatchCreateViewerCommand,
|
||||
buildAgentWatchKillViewerCommand,
|
||||
buildAgentVerifyAcceptedCommand,
|
||||
buildEnableLingerCommand,
|
||||
buildFleetServiceCommand,
|
||||
buildSystemdEnableCommand,
|
||||
buildSystemdShowCommand,
|
||||
buildTmuxListPanesCommand,
|
||||
classifySendResult,
|
||||
detectDrift,
|
||||
enableFleetUnits,
|
||||
generateAgentEnv,
|
||||
getDefaultOperatorSourceLabel,
|
||||
getDefaultTenantAndHost,
|
||||
@@ -28,10 +31,12 @@ import {
|
||||
parseTmuxListPanes,
|
||||
registerFleetCommand,
|
||||
resolveFleetPaths,
|
||||
RUNTIME_ACCEPTABLE_COMMANDS,
|
||||
VERIFY_DEFAULT_TIMEOUT_MS,
|
||||
VERIFY_POLL_INTERVAL_MS,
|
||||
type AgentPsRow,
|
||||
type CommandRunner,
|
||||
type FleetRoster,
|
||||
type InteractiveRunner,
|
||||
type SleepFn,
|
||||
} from './fleet.js';
|
||||
@@ -909,6 +914,118 @@ describe('fleet ps — drift detection', () => {
|
||||
it('does NOT flag drift when pane command is null (pane dead)', () => {
|
||||
expect(detectDrift('pi', null)).toBe(false);
|
||||
});
|
||||
|
||||
it('does NOT flag drift when pane=node for wrapped pi agent (mosaic yolo pi)', () => {
|
||||
expect(detectDrift('pi', 'node')).toBe(false);
|
||||
});
|
||||
|
||||
it('does NOT flag drift when pane=node for wrapped codex agent (mosaic yolo codex)', () => {
|
||||
expect(detectDrift('codex', 'node')).toBe(false);
|
||||
});
|
||||
|
||||
it('flags drift when pane=python3 for pi runtime (canary-pi dogfood regression guard)', () => {
|
||||
expect(detectDrift('pi', 'python3')).toBe(true);
|
||||
});
|
||||
|
||||
it('does NOT flag drift when pane=python3 for dogfood runtime', () => {
|
||||
expect(detectDrift('dogfood', 'python3')).toBe(false);
|
||||
});
|
||||
|
||||
it('flags drift for unknown pane command on known runtime', () => {
|
||||
expect(detectDrift('claude', 'bash')).toBe(true);
|
||||
});
|
||||
|
||||
it('RUNTIME_ACCEPTABLE_COMMANDS is exported and contains expected entries', () => {
|
||||
expect(RUNTIME_ACCEPTABLE_COMMANDS['pi']).toContain('node');
|
||||
expect(RUNTIME_ACCEPTABLE_COMMANDS['pi']).not.toContain('python3');
|
||||
expect(RUNTIME_ACCEPTABLE_COMMANDS['dogfood']).toContain('python3');
|
||||
expect(RUNTIME_ACCEPTABLE_COMMANDS['codex']).toContain('node');
|
||||
});
|
||||
});
|
||||
|
||||
describe('fleet install — auto-enable units for boot-survival', () => {
|
||||
it('buildSystemdEnableCommand and buildEnableLingerCommand return correct command arrays', () => {
|
||||
expect(buildSystemdEnableCommand('mosaic-tmux-holder.service')).toEqual([
|
||||
'systemctl',
|
||||
'--user',
|
||||
'enable',
|
||||
'mosaic-tmux-holder.service',
|
||||
]);
|
||||
expect(buildEnableLingerCommand('testuser')).toEqual(['loginctl', 'enable-linger', 'testuser']);
|
||||
});
|
||||
|
||||
it('enables holder and each agent unit via injected runner after install', async () => {
|
||||
const minimalRoster: FleetRoster = {
|
||||
version: 1,
|
||||
transport: 'tmux',
|
||||
tmux: { socketName: 'mosaic-factory', holderSession: '_holder' },
|
||||
defaults: { workingDirectory: '~/src' },
|
||||
runtimes: { codex: { resetCommand: '/clear' } },
|
||||
agents: [{ name: 'coder0', runtime: 'codex', className: 'worker' }],
|
||||
};
|
||||
|
||||
const calls: string[][] = [];
|
||||
const runner: CommandRunner = async (command, args) => {
|
||||
calls.push([command, ...args]);
|
||||
return { stdout: '', stderr: '', exitCode: 0 };
|
||||
};
|
||||
|
||||
await enableFleetUnits(runner, minimalRoster, {});
|
||||
|
||||
expect(calls).toContainEqual(['systemctl', '--user', 'enable', 'mosaic-tmux-holder.service']);
|
||||
expect(calls).toContainEqual(['systemctl', '--user', 'enable', 'mosaic-agent@coder0.service']);
|
||||
});
|
||||
|
||||
it('install still succeeds when systemctl enable returns non-zero (non-fatal)', async () => {
|
||||
const minimalRoster: FleetRoster = {
|
||||
version: 1,
|
||||
transport: 'tmux',
|
||||
tmux: { socketName: 'mosaic-factory', holderSession: '_holder' },
|
||||
defaults: { workingDirectory: '~/src' },
|
||||
runtimes: { codex: { resetCommand: '/clear' } },
|
||||
agents: [{ name: 'coder0', runtime: 'codex', className: 'worker' }],
|
||||
};
|
||||
|
||||
const calls: string[][] = [];
|
||||
const runner: CommandRunner = async (command, args) => {
|
||||
calls.push([command, ...args]);
|
||||
// Simulate systemctl enable failure
|
||||
if (command === 'systemctl' && args.includes('enable')) {
|
||||
return { stdout: '', stderr: 'Unit not found', exitCode: 1 };
|
||||
}
|
||||
return { stdout: '', stderr: '', exitCode: 0 };
|
||||
};
|
||||
|
||||
// Must NOT reject/throw even when enable calls fail
|
||||
await expect(enableFleetUnits(runner, minimalRoster, {})).resolves.toBeUndefined();
|
||||
|
||||
// The enable attempt must have been made
|
||||
expect(calls.some((c) => c.includes('enable'))).toBe(true);
|
||||
});
|
||||
|
||||
it('--no-enable skips all systemctl enable and loginctl linger calls', async () => {
|
||||
const minimalRoster: FleetRoster = {
|
||||
version: 1,
|
||||
transport: 'tmux',
|
||||
tmux: { socketName: 'mosaic-factory', holderSession: '_holder' },
|
||||
defaults: { workingDirectory: '~/src' },
|
||||
runtimes: { codex: { resetCommand: '/clear' } },
|
||||
agents: [{ name: 'coder0', runtime: 'codex', className: 'worker' }],
|
||||
};
|
||||
|
||||
const calls: string[][] = [];
|
||||
const runner: CommandRunner = async (command, args) => {
|
||||
calls.push([command, ...args]);
|
||||
return { stdout: '', stderr: '', exitCode: 0 };
|
||||
};
|
||||
|
||||
await enableFleetUnits(runner, minimalRoster, { enable: false });
|
||||
|
||||
// No calls should include 'enable'
|
||||
expect(calls.every((c) => !c.includes('enable'))).toBe(true);
|
||||
// No loginctl calls at all
|
||||
expect(calls.every((c) => c[0] !== 'loginctl')).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('fleet ps — tenant and host', () => {
|
||||
|
||||
@@ -210,6 +210,93 @@ export function buildFleetServiceCommand(action: FleetServiceAction, agentName?:
|
||||
return ['systemctl', '--user', action, service];
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the systemctl --user enable command for a given unit.
|
||||
* Used by the install auto-enable step to persist units across reboots.
|
||||
*/
|
||||
export function buildSystemdEnableCommand(unit: string): string[] {
|
||||
return ['systemctl', '--user', 'enable', unit];
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the loginctl enable-linger command for a given user.
|
||||
* Linger allows user systemd services to survive logout.
|
||||
*/
|
||||
export function buildEnableLingerCommand(user: string): string[] {
|
||||
return ['loginctl', 'enable-linger', user];
|
||||
}
|
||||
|
||||
/**
|
||||
* Enable fleet units for boot-survival after install.
|
||||
* Non-fatal: if systemctl enable returns non-zero, a warning is printed and we continue.
|
||||
* If opts.enable === false (--no-enable flag), the whole step is skipped.
|
||||
*/
|
||||
export async function enableFleetUnits(
|
||||
runner: CommandRunner,
|
||||
roster: FleetRoster,
|
||||
opts: { enable?: boolean },
|
||||
): Promise<void> {
|
||||
if (opts.enable === false) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
let succeeded = 0;
|
||||
let failed = 0;
|
||||
|
||||
const holderResult = await runner(
|
||||
...splitCommand(buildSystemdEnableCommand('mosaic-tmux-holder.service')),
|
||||
);
|
||||
if (holderResult.exitCode === 0) {
|
||||
succeeded++;
|
||||
} else {
|
||||
failed++;
|
||||
process.stderr.write(
|
||||
`Warning: could not enable mosaic-tmux-holder.service: ${holderResult.stderr || holderResult.stdout || 'non-zero exit'}\n`,
|
||||
);
|
||||
}
|
||||
|
||||
for (const agent of roster.agents) {
|
||||
const unit = `mosaic-agent@${agent.name}.service`;
|
||||
const result = await runner(...splitCommand(buildSystemdEnableCommand(unit)));
|
||||
if (result.exitCode === 0) {
|
||||
succeeded++;
|
||||
} else {
|
||||
failed++;
|
||||
process.stderr.write(
|
||||
`Warning: could not enable ${unit}: ${result.stderr || result.stdout || 'non-zero exit'}\n`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if (succeeded > 0) {
|
||||
console.log(`Enabled ${succeeded} unit(s) for boot-survival.`);
|
||||
}
|
||||
if (failed > 0) {
|
||||
process.stderr.write(
|
||||
`Warning: ${failed} unit(s) could not be enabled (systemctl unavailable?). Run manually if needed.\n`,
|
||||
);
|
||||
}
|
||||
|
||||
// Best-effort linger
|
||||
let username: string;
|
||||
try {
|
||||
username = userInfo().username;
|
||||
} catch {
|
||||
username = process.env['USER'] ?? process.env['LOGNAME'] ?? 'unknown';
|
||||
}
|
||||
const lingerResult = await runner(...splitCommand(buildEnableLingerCommand(username)));
|
||||
if (lingerResult.exitCode !== 0) {
|
||||
process.stderr.write(
|
||||
`Hint: run 'loginctl enable-linger ${username}' as root to survive logout.\n`,
|
||||
);
|
||||
}
|
||||
} catch (err) {
|
||||
process.stderr.write(
|
||||
`Warning: auto-enable step failed unexpectedly: ${err instanceof Error ? err.message : String(err)}\n`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
export function buildAgentSendCommand(
|
||||
paths: FleetPaths,
|
||||
agentName: string,
|
||||
@@ -437,32 +524,41 @@ export function parseTmuxListPanes(
|
||||
return { pid, command, dead, idleSeconds };
|
||||
}
|
||||
|
||||
/**
|
||||
* Maps each known runtime to the set of acceptable pane commands.
|
||||
* A pane running any of these commands for the given runtime is NOT considered drifted.
|
||||
* Runtimes launched via `mosaic yolo` wrap in node, so 'node' is acceptable for most.
|
||||
* The dogfood runtime accepts python3/python (the canary-pi dogfood stub).
|
||||
*/
|
||||
export const RUNTIME_ACCEPTABLE_COMMANDS: Record<string, readonly string[]> = {
|
||||
claude: ['claude', 'node'],
|
||||
codex: ['codex', 'node'],
|
||||
opencode: ['opencode', 'node'],
|
||||
pi: ['pi', 'node'],
|
||||
dogfood: ['python3', 'python'],
|
||||
};
|
||||
|
||||
/**
|
||||
* Determine if there is a runtime drift: roster says one runtime but the pane
|
||||
* is actually running something from a different runtime. We detect this by
|
||||
* checking if the pane command doesn't match a known canonical command for the
|
||||
* checking if the pane command doesn't match a known acceptable command for the
|
||||
* roster's declared runtime.
|
||||
*
|
||||
* Known canonical commands per runtime:
|
||||
* claude → claude
|
||||
* codex → codex
|
||||
* opencode → opencode
|
||||
* pi → pi
|
||||
* Known acceptable commands per runtime (see RUNTIME_ACCEPTABLE_COMMANDS):
|
||||
* claude → claude, node (node covers mosaic yolo wrapper)
|
||||
* codex → codex, node
|
||||
* opencode → opencode, node
|
||||
* pi → pi, node (python3 still flags drift for canary-pi dogfood stub)
|
||||
* dogfood → python3, python
|
||||
*
|
||||
* If the pane is running something else (e.g., python3/dogfood-agent.py) for
|
||||
* an agent whose roster runtime is "pi", that's a drift.
|
||||
*/
|
||||
export function detectDrift(rosterRuntime: string, paneCommand: string | null): boolean {
|
||||
if (!paneCommand) return false;
|
||||
const knownCommands: Record<string, string[]> = {
|
||||
claude: ['claude'],
|
||||
codex: ['codex'],
|
||||
opencode: ['opencode'],
|
||||
pi: ['pi'],
|
||||
};
|
||||
const expected = knownCommands[rosterRuntime];
|
||||
if (!expected) return false;
|
||||
return !expected.includes(paneCommand);
|
||||
const acceptable = RUNTIME_ACCEPTABLE_COMMANDS[rosterRuntime];
|
||||
if (!acceptable) return false;
|
||||
return !acceptable.includes(paneCommand);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -706,12 +802,22 @@ export function registerFleetCommand(program: Command, deps: FleetCommandDeps =
|
||||
cmd
|
||||
.command('install')
|
||||
.description('Install local fleet tools and user systemd units')
|
||||
.action(async () => installFleet(cmd, frameworkRoot));
|
||||
.option('--no-enable', 'Skip enabling units for boot-survival')
|
||||
.action(async (opts: { enable?: boolean }) => {
|
||||
await installFleet(cmd, frameworkRoot);
|
||||
const roster = await loadRosterForCommand(cmd);
|
||||
await enableFleetUnits(runner, roster, opts);
|
||||
});
|
||||
|
||||
cmd
|
||||
.command('install-systemd')
|
||||
.description('Install local fleet tools and user systemd units')
|
||||
.action(async () => installFleet(cmd, frameworkRoot));
|
||||
.option('--no-enable', 'Skip enabling units for boot-survival')
|
||||
.action(async (opts: { enable?: boolean }) => {
|
||||
await installFleet(cmd, frameworkRoot);
|
||||
const roster = await loadRosterForCommand(cmd);
|
||||
await enableFleetUnits(runner, roster, opts);
|
||||
});
|
||||
|
||||
for (const action of ['start', 'stop', 'restart'] as const) {
|
||||
cmd
|
||||
|
||||
Reference in New Issue
Block a user