diff --git a/docs/fleet/PRD.md b/docs/fleet/PRD.md new file mode 100644 index 0000000..8a3cb04 --- /dev/null +++ b/docs/fleet/PRD.md @@ -0,0 +1,109 @@ +# PRD — Fleet Phase 2: Operator Observability + +> **Workstream:** W-FLEET under `mvp-20260312` · **Phase:** 2 +> **North star:** [docs/fleet/north-star.md](./north-star.md) +> **Source umbrella PRD:** [docs/PRD.md](../PRD.md) (Mosaic Stack v0.1.0) +> **Tracks task:** `fleet-observability-1` — restore operator observability into fleet agent sessions. + +## Problem + +The durable tmux fleet runs on the isolated `mosaic-factory` socket. That isolation +(which protects the operator's default tmux) makes the fleet **invisible** to default +tooling, and truth is split across three planes no single command joins — systemd +(`systemctl --user`), tmux (`-L mosaic-factory`), and the process tree (`pstree`). +`agent tail` (`capture-pane`) returns **blank for full-screen TUIs**, and `agent send` +confirms only keystroke injection, not acceptance. Net: the operator has near-zero +observability and no safe way to watch a session. + +## Goals + +1. One command shows the **whole fleet's** real state, joining all three planes. +2. **Liveness is truthful**: healthy = answered a heartbeat, not "pane alive". +3. The operator can **watch** any session read-only without disrupting it. +4. `send` reports **delivered-and-accepted**, not just injected. +5. Every record/address carries **`tenant_id` + `host`** (zero foreclosure for multi-tenant/multi-host). + +## Non-goals (this phase) + +- No webUI (Phase 5; rides federation for cross-host). +- No `fleetd` daemon or persistent history store. +- No real-runtime swap (Phase 3) — instrument the live **dogfood stub** fleet. +- No cross-host aggregation yet (addressing is host-tagged but queries stay local). + +## Functional requirements + +| ID | Requirement | +| ---- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FR-1 | `mosaic fleet ps [--json]` prints one row per roster agent joining: name · tenant · host · runtime · systemd(active/enabled) · pane(alive/dead) · pid · idle · **last-heartbeat age** · **drift** flag (roster runtime ≠ actual pane command) · **boot-enable** warning (active but `UnitFileState=disabled`). | +| FR-2 | **Heartbeat protocol v1** (see below); `dogfood-agent.py` implements the responder. `fleet ps` issues probes (or reads last-seen) and reports health per FR-1. | +| FR-3 | `mosaic agent watch ` opens a **read-only** view of the pane (grouped session or `tmux attach -r`) that cannot send keystrokes and does not shrink the agent's window. | +| FR-4 | `mosaic agent attach ` remains the **explicit** interactive-takeover path (separate verb, documented as the only one that can type). | +| FR-5 | `mosaic agent send --verify` confirms the message was **accepted** (not left as an unsubmitted draft) and returns non-zero if delivery cannot be verified. | +| FR-6 | All structured output (`--json`) includes `tenant_id` and `host` fields. | + +## Heartbeat protocol v1 + +- **Probe:** operator/`fleet ps` writes a sentinel line to the agent's input or a + well-known per-agent heartbeat file path `~/.config/mosaic/fleet/run/.hb`. +- **Response:** the runtime updates `.hb` with `ts= pid= status=` + on a fixed interval (default 15s) and on demand when probed. +- **Health rule:** `healthy` if `now - ts <= 3 × interval`; else `stale`; missing file = `unknown`. +- **Contract:** every runtime (dogfood stub now; claude/codex/pi/opencode in Phase 3) + MUST emit the heartbeat. The protocol is file-based so it works for headless stubs and + full-screen TUIs alike (no `capture-pane` dependency). +- `ASSUMPTION:` file-based heartbeat (vs in-pane echo) — chosen because it is TUI-safe and + uid-scoped, fitting per-tenant isolation. Open to an OTEL-span variant in Phase 3 (MVP-X6). + +## Acceptance criteria + +- `mosaic fleet ps` shows all 5 live sessions on `mosaic-factory` with correct + pane/pid/idle and flags the dogfood **drift** (`canary-pi` runtime=pi but pane runs + `dogfood-agent.py`) and the **boot-enable** gap (active but disabled). +- Killing one agent's pane flips its row to dead/stale within one `interval`. +- `agent watch` shows live output and provably cannot type into the pane; detaching + leaves the agent's window size unchanged. +- `agent send --verify` returns success on an accepting pane and non-zero on a wedged/draft pane. +- Quality gates green: `pnpm typecheck`, `pnpm lint`, `pnpm format:check`, plus + `pnpm --filter @mosaicstack/mosaic test`. +- Independent review passed; dogfood evidence captured against the live fleet. + +## Test plan + +- Unit/CLI specs in `packages/mosaic/src/commands/fleet.spec.ts` (and a new + `fleet-ps`/`watch`/`send-verify` spec) using the injected `CommandRunner` to assert + exact tmux/systemd command construction and JSON shape (tenant+host present). +- Situational: run against the live `mosaic-factory` fleet; capture `fleet ps` output, + a kill-and-detect cycle, a read-only `watch`, and a `send --verify` pass/fail pair. + +## Known limitations + +- **Verify heuristic is best-effort:** `agent send --verify` uses a `>` -prefix draft + heuristic that is specific to pi/claude TUIs. Draft detection for codex and opencode + TUIs is best-effort only; those runtimes may not use the same input-line indicator. +- **Pane-change check is the best Phase-2 signal; verify now polls up to a bounded + timeout:** `agent send --verify` captures a BEFORE snapshot, sends the message, then + polls `capture-pane` every ~400 ms up to a configurable total timeout (default ~6 s, + controlled by `--verify-timeout `). On each poll it runs classifySendResult: if + the pane shows 'accepted' or 'draft' the loop exits immediately; while the result is + 'unverifiable' (no pane change yet) it keeps polling. After the timeout with no + definitive result, it fails closed: exit 1 with "no pane change after send". This + eliminates false 'unverifiable' failures for slow/loaded TUIs that were previously + caused by the old fixed 300 ms single-capture. Definitive acceptance ultimately + requires a runtime acknowledgement (Phase-3 heartbeat-ack); the bounded pane-change + poll is the best signal available against an opaque TUI for Phase-2. +- **Blank AFTER capture fails closed:** Full-screen TUIs (claude, codex, opencode, pi) + render blank for `tmux capture-pane`. When the AFTER snapshot is empty, `send --verify` + returns non-zero with an "unverifiable" message rather than silently succeeding. This + is an intentional fail-closed design (FR-5). +- **`agent watch` uses a grouped viewer session:** `tmux attach -r` directly against the + agent session lets the viewer terminal shrink the agent's window. `agent watch` instead + creates a throwaway grouped session (`tmux new-session -d -t '=' -s +'-watch-'`), attaches read-only to that session, and kills it on detach. + The grouped session shares the agent's windows but has independent sizing, so the + agent's window is never affected. `tmux attach` is still interactive and requires + inherited stdio; the `interactiveRunner` handles TTY passthrough. + +## Surfaces & parity (MVP-X1) + +CLI lands this phase. TUI surface follows in the `packages/mosaic` wizard; webUI in +Phase 5 via federation. PRD records the parity debt explicitly so it is not lost. diff --git a/docs/fleet/TASKS.md b/docs/fleet/TASKS.md new file mode 100644 index 0000000..af72679 --- /dev/null +++ b/docs/fleet/TASKS.md @@ -0,0 +1,27 @@ +# Tasks — W-FLEET (Fleet) Phase 2: Observability + +> Workstream task file for the Fleet. Single-writer: Fleet workstream lead (orchestrator). +> Workers read but never modify. This is **not** the MVP rollup (`docs/TASKS.md`) — a +> rollup row is proposed to the MVP orchestrator, not written here. +> +> Mission: `mvp-20260312` · PRD: [docs/fleet/PRD.md](./PRD.md) · North star: [docs/fleet/north-star.md](./north-star.md) +> Status: `not-started` | `in-progress` | `done` | `blocked` | `failed` + +| id | status | description | depends_on | agent | pr | notes | +| ------------- | ----------- | ------------------------------------------------------------------------------------------------------------------ | --------------------- | ----------- | --- | ----------------------------------------------------------------------------------------------------------------------------- | +| FLEET-OBS-000 | done | Plan: north-star + Phase-2 PRD + workstream scaffolding | — | lead | — | persisted 2026-06-20 on `feat/fleet-observability` | +| FLEET-OBS-001 | done | Heartbeat protocol v1 spec finalized in PRD + framework doc | FLEET-OBS-000 | lead | — | file-based `~/.config/mosaic/fleet/run/.hb`; spec in PRD | +| FLEET-OBS-002 | in-progress | Implement heartbeat responder in `dogfood-agent.py` | FLEET-OBS-001 | fleet-coder | — | dispatched to ad-hoc `mosaic yolo` fleet agent (dogfood) | +| FLEET-OBS-003 | done | `mosaic fleet ps` — join systemd+tmux+proc+idle+heartbeat; tenant+host tagged; drift + boot-enable flags; `--json` | FLEET-OBS-001 | worker | — | commit ab47831; LIVE-verified on mosaic-factory; caught canary-pi DRIFT + BOOT-ENABLE. Polish: idleSeconds parse returns null | +| FLEET-OBS-004 | done | `mosaic agent watch ` — read-only join (no resize, no keystrokes) | FLEET-OBS-000 | worker | — | `attach -r`; verb wired | +| FLEET-OBS-005 | done | `mosaic agent send --verify` — delivery/acceptance receipt | FLEET-OBS-000 | worker | — | --verify flag; draft-heuristic verify | +| FLEET-OBS-006 | done | CLI specs for ps/watch/send-verify (tenant+host shape, command construction) | FLEET-OBS-003,004,005 | worker | — | 62 tests green (31 new); re-verified by lead | +| FLEET-OBS-007 | not-started | Framework doc: fleet observability guide + verbs | FLEET-OBS-003,004,005 | lead | — | `docs/guides/` or `framework/tools/.../README` | +| FLEET-OBS-008 | not-started | Independent review + dogfood verification on live fleet | FLEET-OBS-002..007 | reviewer | — | author ≠ reviewer; capture evidence in scratchpad | +| FLEET-OBS-009 | not-started | Open PR → green CI (queue guard) → squash-merge → close `fleet-observability-1` | FLEET-OBS-008 | lead | — | trunk merge; no direct push to main | + +## Proposed MVP rollup row (for the MVP orchestrator — not written by this workstream) + +``` +| W-FLEET | in-progress | Fleet (agent-session execution layer) | Phase 2/5 | docs/fleet/TASKS.md | observability dogfooded on live stub fleet; control plane rides federation (W1) | +``` diff --git a/docs/fleet/north-star.md b/docs/fleet/north-star.md new file mode 100644 index 0000000..22b6857 --- /dev/null +++ b/docs/fleet/north-star.md @@ -0,0 +1,128 @@ +# Mosaic Fleet — North Star + +> **Workstream:** W-FLEET (Fleet) under mission `mvp-20260312` +> **Umbrella:** [docs/MISSION-MANIFEST.md](../MISSION-MANIFEST.md) · [docs/PRD.md](../PRD.md) (Mosaic Stack v0.1.0) +> **Status:** doctrine — authored 2026-06-20. Owner of this file: Fleet workstream lead. +> This document does **not** modify the MVP rollup; a rollup row is proposed, not written here. + +## Vision + +A **customizable, multi-tenant fleet of always-on AI agents** — each defined by role, +materialized as a durable, joinable runtime session, coordinated by the proven +orchestrator/worker model, and observable end-to-end across hosts. Coding today; +finance, analytics, research as roster entries tomorrow — same primitives, different +roster. The fleet is the **agent-session execution layer** of the Mosaic Stack MVP: +the thing federation makes reachable across hosts and the webUI/TUI/CLI make visible. + +The USC tmux PoC (durable sessions + `agent-send` comms) proved the model. This +workstream makes it an official, observable, multi-tenant Mosaic Stack capability. + +## The Fleet as means of production (bootstrapping) + +The Fleet has a **dual role**, and that is the point: + +- **As product** — a multi-tenant agent-fleet capability of Mosaic Stack (this workstream). +- **As means of production** — the orchestrator/worker fleet that _actually builds the + entire MVP_ (federation W1, webUI, TUI, CLI, and the Fleet itself). + +We are **building the system that builds the system.** Every other MVP workstream is +delivered _by_ the fleet, so fleet observability and control are not merely product +features — they are the **operational floor of the whole delivery effort**. If we cannot +see and steer the agents, we cannot trust what they ship. This is why Phase 2 +(observability) leads: it is the instrument panel for the factory, dogfooded on the live +fleet that is, recursively, building Mosaic Stack. + +The discipline that makes great power safe is the same gate chain the fleet enforces: +independent review before merge, green CI, honest completion, decide-and-inform cadence, +and no irreversible action without authority. The bootstrap is only as trustworthy as +those gates. + +## Alignment with MVP cross-cutting requirements + +The Fleet inherits — does not re-invent — the MVP's hard requirements: + +| MVP req | What it means for the Fleet | +| ----------------------------- | ----------------------------------------------------------------------------------------------------------------------- | +| MVP-X1 three-surface parity | fleet observability/control reachable via **CLI + TUI + webUI** (CLI first; webUI is required for parity, not optional) | +| MVP-X2 multi-tenant isolation | one tenant = one **Linux uid** (own `systemd --user`, socket, `~/.config/mosaic`); no cross-tenant leakage | +| MVP-X3 auth (BetterAuth/SSO) | operator→fleet and cross-host views are auth-gated through the platform's existing auth | +| MVP-X4 quality gates | `pnpm typecheck`/`lint`/`format:check` green before any push | +| MVP-X5 federated topology | cross-host fleet visibility rides the **federation** boundary (W1), not a bespoke broker | +| MVP-X6 OTEL tracing | heartbeats, sends, and lifecycle events emit spans; `traceparent` crosses the federation boundary | +| MVP-X7 trunk merge | branch from `main`, squash-merge via PR, never push to `main` | + +## The stack — where every concern lives + +One **definition** is the source of truth; the **session** is how it runs. + +| Layer | Owner | Phase-2 reality | Destination | +| -------------------------------- | ------------------------------------------------------------------------------------------- | ------------------------------------------------------ | ------------------------------------------------------- | +| **Definition + identity + auth** | gateway / `mosaic-as` (scoped tokens, #541) | `roster.yaml` (tenant-tagged) | one definition; `mosaic agent --new` materializes it | +| **Tenancy boundary** | **Linux uid per tenant** (linger, own `systemd --user`, own socket, own `~/.config/mosaic`) | one tenant: `jarvis` = tenant zero | uid-per-tenant; federation aggregates across hosts | +| **Runtime** | per-tenant tmux session on isolated socket | dogfood stub sessions (live now on `mosaic-factory`) | claude/codex/pi/opencode TUIs | +| **Liveness** | **heartbeat protocol** every runtime answers | protocol defined + dogfood stub answers it | all runtimes answer; "healthy" ≠ "pane alive" | +| **Observation** | read-only `watch` (native tmux) + `pipe-pane` stream | CLI `watch`/`ps`; explicit opt-in `attach` for control | + auth-gated webUI streams | +| **Control plane** | **federation** across hosts × tenants | records already carry `tenant_id` + `host` | federated gateways expose fleet state; webUI in Phase 5 | + +## Operating model (inherited, not reinvented) + +The AI-guide law stands: one accountable **orchestrator**, isolated **workers** that +stop at PR-open, the serialized **gate chain** (independent review → green CI → +diff-sanity → squash-merge → verify), **decide-and-inform** cadence, and a durable +**board** so missions survive session death. The Fleet is the infrastructure _under_ +this model. See `mosaicstack-aiguide` whitepapers 01 (inter-agent comms) and 03 +(orchestration model) for the rationale. + +## Invariants — "maximal vision, incremental delivery, zero foreclosure" + +Every artifact, starting Phase 2, MUST: + +1. Carry **`tenant_id` + `host`** in schema and message addressing — even with one of each today. +2. Treat **isolation socket ≠ invisibility** — anything isolated is surfaced by one command. +3. Define **healthy = answered a heartbeat within N seconds**, never just "pane alive". +4. Make **observation read-only by default**; control is an explicit, separate, opt-in verb. + +## Observation model + +| Verb | Behavior | +| ----------------------------------- | -------------------------------------------------------------------------------------------------- | +| `mosaic fleet ps` | one table joining systemd + tmux + process + idle + last-heartbeat, with drift + boot-enable flags | +| `mosaic agent watch ` | **read-only** join (grouped session / `-r`), no resize tyranny, no keystrokes | +| `mosaic agent attach ` | explicit interactive takeover (the only path that can type) | +| `mosaic agent send --verify` | confirms message **accepted**, not merely keystroke-injected | + +> Why the current PoC blocks observation: sessions live on the isolated `mosaic-factory` +> socket (invisible to default `tmux ls`), the only sanctioned read is `capture-pane` +> (blank for full-screen TUIs), and `attach` is read-write + resizes the session. The +> verbs above restore "join and observe" safely. + +## Phased roadmap + +| Phase | Outcome | Status | +| ---------------------- | -------------------------------------------------------------------------------------------------------------------------------------------- | ------- | +| 0–1 | tmux PoC, hardening, published CLI v0.0.34 (#565–#568) | ✅ done | +| **2 — Observability** | `fleet ps` (host+tenant aware join), heartbeat protocol + dogfood stub answers it, `agent watch` (read-only), `agent send --verify` receipts | ▶ now | +| 3 — Real runtimes | claude/codex/pi/opencode answer heartbeat; **hybrid lifecycle** (core always-on: orchestrator+reviewer; ephemeral workers per lane) | planned | +| 4 — Unified definition | one agent schema in gateway; `mosaic agent --new` → materialized per-tenant session; uid-tenant provisioning | planned | +| 5 — Control plane | federation-backed cross-host × cross-tenant fleet view; **webUI** (surface chosen then) for MVP-X1 parity | planned | + +## Decisions of record (2026-06-20, with Jason) + +- Agent model: **config defines, session runs** (gateway = definition/identity/auth; tmux = runtime). +- Tenancy: **multi-tenant from the start**; isolation = **per-tenant Linux uid**. +- Health: **heartbeat required** (dogfood stub implements the protocol now). +- Lifecycle: **hybrid** — core always-on + ephemeral workers per lane. +- Observation: **read-only default, opt-in takeover**. +- Multi-host: **designed-for from day one**; control plane **rides federation (W1)**. +- Delivery: **CLI-first now**, dogfood against the live stub fleet; webUI deferred to Phase 5. + +## Assumptions (veto-able) + +- `ASSUMPTION:` first-class runtimes = claude, codex, pi, opencode; a "role" (analyst, + finance, researcher) = persona + skills + tools on top of a runtime, shipped as a + starter role library in the framework. +- `ASSUMPTION:` the cross-host control plane is the **federation** layer (W1), not a + separate `fleetd` daemon. +- `ASSUMPTION:` Fleet is workstream **W-FLEET** under `mvp-20260312`; a rollup row in + `docs/TASKS.md` and a workstream declaration in `MISSION-MANIFEST.md` are proposed to + the MVP orchestrator, not written by this workstream. diff --git a/docs/scratchpads/fleet-observability-phase2.md b/docs/scratchpads/fleet-observability-phase2.md new file mode 100644 index 0000000..22f0694 --- /dev/null +++ b/docs/scratchpads/fleet-observability-phase2.md @@ -0,0 +1,75 @@ +# Scratchpad — Fleet Phase 2: Observability (W-FLEET) + +> Append-only. Mission `mvp-20260312` / workstream W-FLEET. +> Lead: Jarvis (Claude) at `W-jarvis:mos-claude-18`. Coordinating with `jwoltje@dragon-lin:coder0-0`. + +## Mission prompt (2026-06-20) + +Establish the north star for the Mosaic Fleet feature and prepare Phase-2 observability +for delivery. The USC tmux PoC is the proven base. Jason granted lead authority: +"The fleet is a great way to actually build the MVP — we are building the system that +builds the system." Dogfood actual agent construction + ad-hoc deployment; coordinate +with a second agent on `dragon-lin`. + +## Decisions of record (with Jason, 2026-06-20) + +- Agent model: config defines, session runs (gateway = definition/identity/auth; tmux = runtime). +- Tenancy: multi-tenant from the start; isolation = per-tenant Linux uid. +- Health: heartbeat required; dogfood stub implements protocol now. +- Lifecycle: hybrid (core always-on + ephemeral workers). +- Observation: read-only default, opt-in takeover. +- Multi-host: designed-for day one; control plane rides federation (W1), not a bespoke broker. +- Delivery: CLI-first, dogfood on the live stub fleet; webUI deferred to Phase 5. +- Fleet is dual-role: product AND means of production (bootstrapping the MVP). +- Code review = **dual-engine**: Claude **and** gpt-5.5/Codex, run together (Jason: the + combination produces the best results). Launch reviewers via `mosaic yolo pi` / `codex` + (proven path) or `~/.config/mosaic/tools/codex/codex-code-review.sh`. Applies to all + code-review gates incl. FLEET-OBS-008. Per Jason 2026-06-20. +- Worktree discipline: do fleet work in `~/src/mosaicstack-stack-worktrees/`, NOT + the shared main checkout — concurrent processes mutate `main` there (learned 2026-06-20). + +## Environment facts (verified 2026-06-20) + +- Fleet is live on `W-jarvis` (uid 1000, `jarvis`, `Linger=yes`) on tmux socket + `mosaic-factory`: `_holder`, `canary-pi`, `dogfood-coder`, `dogfood-orchestrator`, + `dogfood-reviewer`. All panes run `~/.config/mosaic/fleet/dogfood-agent.py` (stub), + including `canary-pi` (roster says runtime=pi → **drift**). +- Holder + `mosaic-agent@*` units are `active (exited)` but `UnitFileState=disabled` + (reboot loses fleet → boot-enable gap to surface). +- Observation blocked by: isolated socket (hidden from default `tmux ls`), `capture-pane` + blank for TUIs, `attach` being read-write + resizing. +- Second agent: `jwoltje@dragon-lin`, session `coder0-0` (group `coder0`), running `node`, + default socket. ssh forward reach confirmed. + +## Governance / collision-safety + +- `mosaicstack-stack` has active mission `mvp-20260312` with single-writer locks on + `docs/MISSION-MANIFEST.md`, `docs/TASKS.md`, `docs/scratchpads/mvp-20260312.md`. +- This workstream touches NONE of those. All Fleet docs scoped under `docs/fleet/` + + this scratchpad. Rollup row proposed, not written. + +## Session log + +- 2026-06-20: Researched AI guide + fleet code + live state. Established north star with + Jason (8 forks decided). Branched `feat/fleet-observability`. Persisted + `docs/fleet/{north-star.md,PRD.md,TASKS.md}` + this scratchpad. Next: establish comms + with dragon-lin coder, commit docs, begin Phase-2 delivery (heartbeat + `fleet ps`). +- 2026-06-20 (session 2): Built Phase-2 CLI via worker (commit ab47831): `fleet ps`, + `agent watch`, `agent send --verify`, 62 tests. LIVE-verified `fleet ps` on + mosaic-factory — correctly flagged canary-pi DRIFT + BOOT-ENABLE, tenant_id+host in JSON. + Heartbeat responder added to dogfood-agent.py (FLEET-OBS-002) — `fleet ps` HB now + `healthy` for all 4 agents. +- Coordination: dual-engine-reviewed (Claude+Codex) and merged framework PRs #572 + (sanitization gate) + #575 (CONSTITUTION extraction) as Lead. Codex caught an Alpine + blocker on #572 (refuted by CI); Claude caught a CI-breaking format failure on #575. +- **FINDINGS (north-star / Phase-3 blockers):** + 1. Ad-hoc `mosaic yolo {codex,pi}` via `start-agent-session.sh` DIE immediately in a + detached tmux pane (codex: "stdin is not a terminal"; pi: same). Only the python stub + survives. => Real runtimes have NEVER run durably in the fleet. Launch path (PATH/TTY + in the detached shell) must be fixed before Phase-3 real-runtime swap. `fleet ps` + caught both dead panes instantly (tool validated). + 2. `MOSAIC_AGENT_NAME` (set in systemd EnvironmentFile) is NOT propagated into tmux's + global env, so agents defaulted to `unknown`. Worked around in dogfood-agent.py via + tmux session-name fallback; the systemd/tmux env handoff needs a real fix. +- Next: rebase on merged main, open Phase-2 PR, dual-engine review, merge, close + `fleet-observability-1`. Defer launch-path + env-propagation fixes to Phase 3. diff --git a/packages/mosaic/src/commands/fleet.spec.ts b/packages/mosaic/src/commands/fleet.spec.ts index 28f348b..5b852cf 100644 --- a/packages/mosaic/src/commands/fleet.spec.ts +++ b/packages/mosaic/src/commands/fleet.spec.ts @@ -5,15 +5,35 @@ import { Command } from 'commander'; import { afterEach, describe, expect, it, vi } from 'vitest'; import { buildAgentSendCommand, + buildAgentWatchAttachCommand, + buildAgentWatchCommand, + buildAgentWatchCreateViewerCommand, + buildAgentWatchKillViewerCommand, + buildAgentVerifyAcceptedCommand, buildFleetServiceCommand, + buildSystemdShowCommand, + buildTmuxListPanesCommand, + classifySendResult, + detectDrift, generateAgentEnv, getDefaultOperatorSourceLabel, + getDefaultTenantAndHost, getRosterAgent, + heartbeatPath, + isSendAccepted, loadFleetRoster, mergeAgentEnv, + parseHeartbeat, + parseSystemdShow, + parseTmuxListPanes, registerFleetCommand, resolveFleetPaths, + VERIFY_DEFAULT_TIMEOUT_MS, + VERIFY_POLL_INTERVAL_MS, + type AgentPsRow, type CommandRunner, + type InteractiveRunner, + type SleepFn, } from './fleet.js'; import { registerAgentCommand } from './agent.js'; @@ -39,6 +59,7 @@ describe('registerFleetCommand', () => { 'init', 'install', 'install-systemd', + 'ps', 'restart', 'start', 'status', @@ -59,6 +80,7 @@ describe('registerFleetCommand', () => { 'send', 'status', 'tail', + 'watch', ]); }); }); @@ -736,3 +758,993 @@ describe('fleet command construction', () => { expect(packageJson.files).toEqual(expect.arrayContaining(['dist', 'framework'])); }); }); + +// --------------------------------------------------------------------------- +// Phase-2 observability — unit tests (FR-1, FR-3, FR-5, FR-6) +// --------------------------------------------------------------------------- + +describe('fleet ps — command construction', () => { + it('builds exact systemd show command for an agent unit', () => { + expect(buildSystemdShowCommand('canary-pi')).toEqual([ + 'systemctl', + '--user', + 'show', + 'mosaic-agent@canary-pi.service', + '-p', + 'ActiveState', + '-p', + 'SubState', + '-p', + 'UnitFileState', + ]); + }); + + it('builds exact tmux list-panes command with the correct format string', () => { + expect(buildTmuxListPanesCommand('canary-pi', 'mosaic-factory')).toEqual([ + 'tmux', + '-L', + 'mosaic-factory', + 'list-panes', + '-t', + '=canary-pi:0.0', + '-F', + '#{pane_pid} #{pane_current_command} #{pane_dead} #{pane_activity}', + ]); + }); + + it('uses DEFAULT_SOCKET_NAME when socket is omitted from list-panes', () => { + const cmd = buildTmuxListPanesCommand('canary-pi'); + expect(cmd[2]).toBe('mosaic-factory'); + }); + + it('derives heartbeat path under ~/.config/mosaic/fleet/run/', () => { + const home = '/home/test/.config/mosaic'; + expect(heartbeatPath('canary-pi', home)).toBe( + '/home/test/.config/mosaic/fleet/run/canary-pi.hb', + ); + }); +}); + +describe('fleet ps — heartbeat parsing', () => { + const NOW = 1_700_000_000_000; // fixed epoch ms for deterministic tests + + it('parses a healthy heartbeat file', () => { + const ts = new Date(NOW - 10_000).toISOString(); // 10s ago — within 3×15s = 45s + const content = `ts=${ts}\npid=12345\nstatus=ok\n`; + const hb = parseHeartbeat(content, NOW); + expect(hb.health).toBe('healthy'); + expect(hb.pid).toBe(12345); + expect(hb.status).toBe('ok'); + expect(hb.ageMs).toBe(10_000); + }); + + it('reports stale when heartbeat is older than 3×interval', () => { + const ts = new Date(NOW - 60_000).toISOString(); // 60s ago > 45s threshold + const content = `ts=${ts}\npid=99\nstatus=busy\n`; + const hb = parseHeartbeat(content, NOW); + expect(hb.health).toBe('stale'); + expect(hb.status).toBe('busy'); + }); + + it('reports unknown when heartbeat file is missing (null input)', () => { + const hb = parseHeartbeat(null, NOW); + expect(hb.health).toBe('unknown'); + expect(hb.ts).toBeNull(); + expect(hb.pid).toBeNull(); + expect(hb.ageMs).toBeNull(); + }); + + it('tolerates missing fields in heartbeat file', () => { + const hb = parseHeartbeat('ts=not-a-date\n', NOW); + expect(hb.health).toBe('unknown'); + expect(hb.ts).toBeNull(); + }); +}); + +describe('fleet ps — systemd show parsing', () => { + it('parses ActiveState, SubState, UnitFileState from systemctl show output', () => { + const output = 'ActiveState=active\nSubState=running\nUnitFileState=enabled\n'; + expect(parseSystemdShow(output)).toEqual({ + ActiveState: 'active', + SubState: 'running', + UnitFileState: 'enabled', + }); + }); + + it('defaults missing keys to "unknown"', () => { + const result = parseSystemdShow('ActiveState=inactive\n'); + expect(result.SubState).toBe('unknown'); + expect(result.UnitFileState).toBe('unknown'); + }); +}); + +describe('fleet ps — tmux list-panes parsing', () => { + const NOW_MS = 1_700_000_000_000; + + it('parses alive pane with pid, command, and idle time', () => { + const activityEpoch = Math.floor((NOW_MS - 30_000) / 1000); // 30s ago + const output = `12345 claude 0 ${activityEpoch}\n`; + const result = parseTmuxListPanes(output, NOW_MS); + expect(result.pid).toBe(12345); + expect(result.command).toBe('claude'); + expect(result.dead).toBe(false); + expect(result.idleSeconds).toBe(30); + }); + + it('reports dead pane when pane_dead=1', () => { + const output = `0 bash 1 0\n`; + const result = parseTmuxListPanes(output, NOW_MS); + expect(result.dead).toBe(true); + }); + + it('returns nulls for empty pane output', () => { + const result = parseTmuxListPanes('', NOW_MS); + expect(result.pid).toBeNull(); + expect(result.command).toBeNull(); + expect(result.dead).toBe(true); + expect(result.idleSeconds).toBeNull(); + }); +}); + +describe('fleet ps — drift detection', () => { + it('flags drift when roster says pi but pane runs python3', () => { + expect(detectDrift('pi', 'python3')).toBe(true); + }); + + it('flags drift when roster says claude but pane runs dogfood-agent.py', () => { + expect(detectDrift('claude', 'dogfood-agent.py')).toBe(true); + }); + + it('does NOT flag drift when pane command matches the roster runtime', () => { + expect(detectDrift('claude', 'claude')).toBe(false); + expect(detectDrift('codex', 'codex')).toBe(false); + expect(detectDrift('pi', 'pi')).toBe(false); + expect(detectDrift('opencode', 'opencode')).toBe(false); + }); + + it('does NOT flag drift for unknown/custom runtimes (no canonical mapping)', () => { + expect(detectDrift('custom-runtime', 'anything')).toBe(false); + }); + + it('does NOT flag drift when pane command is null (pane dead)', () => { + expect(detectDrift('pi', null)).toBe(false); + }); +}); + +describe('fleet ps — tenant and host', () => { + it('returns tenant_id and host as non-empty strings', () => { + const { tenant_id, host } = getDefaultTenantAndHost(); + expect(typeof tenant_id).toBe('string'); + expect(tenant_id.length).toBeGreaterThan(0); + expect(typeof host).toBe('string'); + expect(host.length).toBeGreaterThan(0); + }); +}); + +describe('fleet ps — JSON output shape (FR-6)', () => { + it('produces --json records including tenant_id and host for each agent', async () => { + const home = await mkdtemp(join(tmpdir(), 'mosaic-fleet-')); + const rosterPath = join(home, 'fleet', 'roster.yaml'); + await mkdir(join(home, 'fleet'), { recursive: true }); + await writeFile( + rosterPath, + [ + 'version: 1', + 'transport: tmux', + 'agents:', + ' - name: canary-pi', + ' runtime: pi', + ' class: canary', + ].join('\n'), + ); + + const nowMs = Date.now(); + const activityEpoch = Math.floor((nowMs - 20_000) / 1000); + + const runner: CommandRunner = async (command, args) => { + const fullArgs = [command, ...args].join(' '); + if (fullArgs.includes('systemctl') && fullArgs.includes('show')) { + return { + stdout: 'ActiveState=active\nSubState=running\nUnitFileState=disabled\n', + stderr: '', + exitCode: 0, + }; + } + if (fullArgs.includes('list-panes')) { + return { + stdout: `12345 python3 0 ${activityEpoch}\n`, + stderr: '', + exitCode: 0, + }; + } + return { stdout: '', stderr: '', exitCode: 0 }; + }; + + const lines: string[] = []; + const origLog = console.log; + console.log = (msg: string) => { + lines.push(msg); + }; + + const program = new Command(); + program.exitOverride(); + registerFleetCommand(program, { runner, mosaicHome: home }); + + try { + await program.parseAsync(['node', 'mosaic', 'fleet', 'ps', '--json']); + } finally { + console.log = origLog; + await rm(home, { recursive: true, force: true }); + } + + const json = JSON.parse(lines.join('')) as AgentPsRow[]; + expect(Array.isArray(json)).toBe(true); + expect(json).toHaveLength(1); + + const row = json[0]!; + // FR-6: tenant_id and host must be present + expect(typeof row.tenant_id).toBe('string'); + expect(row.tenant_id.length).toBeGreaterThan(0); + expect(typeof row.host).toBe('string'); + expect(row.host.length).toBeGreaterThan(0); + + // drift: roster says pi, pane runs python3 → drift flag + expect(row.driftFlag).toBe(true); + // boot-enable warning: active + disabled + expect(row.bootEnableWarning).toBe(true); + + // heartbeat missing → unknown + expect(row.heartbeat.health).toBe('unknown'); + + expect(row.name).toBe('canary-pi'); + expect(row.runtime).toBe('pi'); + expect(row.systemdActive).toBe('active'); + expect(row.systemdEnabled).toBe('disabled'); + }); +}); + +describe('fleet ps — command sequences issued', () => { + it('issues systemd show + tmux list-panes per agent', async () => { + const home = await mkdtemp(join(tmpdir(), 'mosaic-fleet-')); + const rosterPath = join(home, 'fleet', 'roster.yaml'); + await mkdir(join(home, 'fleet'), { recursive: true }); + await writeFile( + rosterPath, + ['version: 1', 'transport: tmux', 'agents:', ' - name: coder0', ' runtime: codex'].join( + '\n', + ), + ); + + const calls: string[][] = []; + const runner: CommandRunner = async (command, args) => { + calls.push([command, ...args]); + return { + stdout: 'ActiveState=inactive\nSubState=dead\nUnitFileState=enabled\n', + stderr: '', + exitCode: 0, + }; + }; + + // suppress console.log for table output + const origLog = console.log; + console.log = () => {}; + + const program = new Command(); + program.exitOverride(); + registerFleetCommand(program, { runner, mosaicHome: home }); + + try { + await program.parseAsync(['node', 'mosaic', 'fleet', 'ps']); + expect(calls).toEqual([ + buildSystemdShowCommand('coder0'), + buildTmuxListPanesCommand('coder0', 'mosaic-factory'), + ]); + } finally { + console.log = origLog; + await rm(home, { recursive: true, force: true }); + } + }); +}); + +describe('agent watch', () => { + it('builds exact grouped-viewer creation command', () => { + expect( + buildAgentWatchCreateViewerCommand('canary-pi', 'canary-pi-watch-123', 'mosaic-factory'), + ).toEqual([ + 'tmux', + '-L', + 'mosaic-factory', + 'new-session', + '-d', + '-t', + '=canary-pi', + '-s', + 'canary-pi-watch-123', + ]); + }); + + it('builds exact viewer attach command (read-only)', () => { + expect(buildAgentWatchAttachCommand('canary-pi-watch-123', 'mosaic-factory')).toEqual([ + 'tmux', + '-L', + 'mosaic-factory', + 'attach', + '-r', + '-t', + 'canary-pi-watch-123', + ]); + }); + + it('builds exact viewer kill command', () => { + expect(buildAgentWatchKillViewerCommand('canary-pi-watch-123', 'mosaic-factory')).toEqual([ + 'tmux', + '-L', + 'mosaic-factory', + 'kill-session', + '-t', + 'canary-pi-watch-123', + ]); + }); + + it('buildAgentWatchCommand (deprecated) still uses DEFAULT_SOCKET_NAME when socket is omitted', () => { + const cmd = buildAgentWatchCommand('canary-pi'); + expect(cmd[2]).toBe('mosaic-factory'); + expect(cmd).toContain('-r'); + }); + + it('dispatch: creates grouped viewer session (runner) then attaches -r to viewer session (interactiveRunner), NOT a bare attach to the agent session', async () => { + const home = await mkdtemp(join(tmpdir(), 'mosaic-fleet-')); + await mkdir(join(home, 'fleet'), { recursive: true }); + await writeFile( + join(home, 'fleet', 'roster.yaml'), + ['version: 1', 'transport: tmux', 'agents:', ' - name: coder0', ' runtime: codex'].join( + '\n', + ), + ); + + const capturingCalls: string[][] = []; + const runner: CommandRunner = async (command, args) => { + capturingCalls.push([command, ...args]); + return { stdout: '', stderr: '', exitCode: 0 }; + }; + + const interactiveCalls: string[][] = []; + const interactiveRunner: InteractiveRunner = async (command, args) => { + interactiveCalls.push([command, ...args]); + return 0; + }; + + const program = new Command(); + program.exitOverride(); + registerAgentCommand(program, { runner, interactiveRunner, mosaicHome: home }); + + try { + await program.parseAsync(['node', 'mosaic', 'agent', 'watch', 'coder0']); + + // The capturing runner must be used for grouped-session creation and cleanup. + // It must NOT be used for the interactive attach. + expect(capturingCalls).toHaveLength(2); // new-session + kill-session + expect(capturingCalls[0]).toEqual( + expect.arrayContaining(['new-session', '-d', '-t', '=coder0']), + ); + // The new-session command must include a viewer session name derived from agent name. + expect(capturingCalls[0]!.join(' ')).toMatch(/coder0-watch-\d+/); + // Kill-session must target the same viewer session, not the agent session. + expect(capturingCalls[1]).toEqual(expect.arrayContaining(['kill-session', '-t'])); + expect(capturingCalls[1]!.join(' ')).toMatch(/coder0-watch-\d+/); + // The agent session itself must NOT be the attach target. + expect(capturingCalls[1]!.join(' ')).not.toContain('=coder0'); + + // The interactiveRunner must attach -r to the VIEWER session, not the agent session. + expect(interactiveCalls).toHaveLength(1); + expect(interactiveCalls[0]).toEqual(expect.arrayContaining(['attach', '-r', '-t'])); + // Target must be the viewer session name (not "=coder0"). + const attachTarget = interactiveCalls[0]![interactiveCalls[0]!.indexOf('-t') + 1]!; + expect(attachTarget).toMatch(/coder0-watch-\d+/); + expect(attachTarget).not.toBe('=coder0'); + } finally { + await rm(home, { recursive: true, force: true }); + } + }); + + it('rejects watch for agents not in the roster', async () => { + const home = await mkdtemp(join(tmpdir(), 'mosaic-fleet-')); + await mkdir(join(home, 'fleet'), { recursive: true }); + await writeFile( + join(home, 'fleet', 'roster.yaml'), + ['version: 1', 'transport: tmux', 'agents:', ' - name: coder0', ' runtime: codex'].join( + '\n', + ), + ); + + const runner = vi.fn(async () => ({ stdout: '', stderr: '', exitCode: 0 })); + const program = new Command(); + program.exitOverride(); + registerAgentCommand(program, { runner, mosaicHome: home }); + + try { + await expect( + program.parseAsync(['node', 'mosaic', 'agent', 'watch', 'typo']), + ).rejects.toThrow('Agent "typo" is not in the fleet roster'); + expect(runner).not.toHaveBeenCalled(); + } finally { + await rm(home, { recursive: true, force: true }); + } + }); +}); + +describe('agent send --verify', () => { + it('builds exact verify capture-pane command', () => { + expect(buildAgentVerifyAcceptedCommand('canary-pi', 'mosaic-factory', 5)).toEqual([ + 'tmux', + '-L', + 'mosaic-factory', + 'capture-pane', + '-t', + '=canary-pi:0.0', + '-p', + '-S', + '-5', + ]); + }); + + it('isSendAccepted: returns "accepted" for normal response output', () => { + expect(isSendAccepted('Some response text\nAnother line\n')).toBe('accepted'); + }); + + it('isSendAccepted: returns "draft" when last line starts with "> " (draft pattern)', () => { + expect(isSendAccepted('> my unsent message')).toBe('draft'); + }); + + it('isSendAccepted: returns "unverifiable" for blank/empty pane (full-screen TUI case)', () => { + expect(isSendAccepted('')).toBe('unverifiable'); + expect(isSendAccepted(' \n \n')).toBe('unverifiable'); + }); + + // --------------------------------------------------------------------------- + // classifySendResult — BEFORE/AFTER pane-diff classifier (regression suite) + // --------------------------------------------------------------------------- + + describe('classifySendResult (BEFORE/AFTER pane-diff classifier)', () => { + it('returns "accepted" when AFTER differs from BEFORE and AFTER has no draft line', () => { + const before = 'Old content from prior interaction\n'; + const after = 'Old content from prior interaction\nAgent response: task complete.\n'; + expect(classifySendResult(before, after)).toBe('accepted'); + }); + + it('returns "draft" when AFTER differs from BEFORE and AFTER ends in a draft line', () => { + const before = 'Previous output\n'; + const after = 'Previous output\n> unsent message\n'; + expect(classifySendResult(before, after)).toBe('draft'); + }); + + it('returns "unverifiable" when AFTER is blank/empty (full-screen TUI blank render)', () => { + const before = 'Some previous content\n'; + expect(classifySendResult(before, '')).toBe('unverifiable'); + expect(classifySendResult(before, ' \n \n')).toBe('unverifiable'); + }); + + it('returns "unverifiable" when AFTER == BEFORE (stale/wedged pane — no change after send)', () => { + const staleContent = 'Old non-empty content that never changed\n'; + expect(classifySendResult(staleContent, staleContent)).toBe('unverifiable'); + }); + + it('returns "unverifiable" when both BEFORE and AFTER are blank (both blank => no change)', () => { + expect(classifySendResult('', '')).toBe('unverifiable'); + }); + + it('returns "accepted" when BEFORE is blank and AFTER has non-draft content (pane woke up)', () => { + expect(classifySendResult('', 'Agent is now responding.\n')).toBe('accepted'); + }); + }); + + it('issues BEFORE-capture then send then AFTER-capture (3 calls) when --verify is passed and pane changes on first poll', async () => { + const home = await mkdtemp(join(tmpdir(), 'mosaic-fleet-')); + await mkdir(join(home, 'fleet'), { recursive: true }); + await writeFile( + join(home, 'fleet', 'roster.yaml'), + ['version: 1', 'transport: tmux', 'agents:', ' - name: coder0', ' runtime: codex'].join( + '\n', + ), + ); + + // no-op sleep so the test doesn't take VERIFY_DEFAULT_TIMEOUT_MS + const sleepFn: SleepFn = async () => {}; + + let callIndex = 0; + const calls: string[][] = []; + const runner: CommandRunner = async (command, args) => { + calls.push([command, ...args]); + const idx = callIndex++; + if ([command, ...args].join(' ').includes('agent-send.sh')) { + return { stdout: '', stderr: '', exitCode: 0 }; + } + // BEFORE capture (idx 0): return old content; first AFTER capture (idx 2): return new content + const stdout = idx === 0 ? 'Old pane content\n' : 'New response from agent\n'; + return { stdout, stderr: '', exitCode: 0 }; + }; + + const program = new Command(); + program.exitOverride(); + registerAgentCommand(program, { runner, sleepFn, mosaicHome: home }); + + try { + await program.parseAsync([ + 'node', + 'mosaic', + 'agent', + 'send', + 'coder0', + '--message', + 'hello world', + '--verify', + ]); + + // 3 calls: BEFORE-capture, send, AFTER-capture (pane changed on first poll → accepted immediately) + expect(calls).toHaveLength(3); + expect(calls[0]).toEqual(buildAgentVerifyAcceptedCommand('coder0', 'mosaic-factory', 5)); + expect(calls[1]![0]).toContain('agent-send.sh'); + expect(calls[2]).toEqual(buildAgentVerifyAcceptedCommand('coder0', 'mosaic-factory', 5)); + } finally { + await rm(home, { recursive: true, force: true }); + } + }, 10_000); + + it('does NOT issue capture-pane verify when --verify is not passed', async () => { + const home = await mkdtemp(join(tmpdir(), 'mosaic-fleet-')); + await mkdir(join(home, 'fleet'), { recursive: true }); + await writeFile( + join(home, 'fleet', 'roster.yaml'), + ['version: 1', 'transport: tmux', 'agents:', ' - name: coder0', ' runtime: codex'].join( + '\n', + ), + ); + + const calls: string[][] = []; + const runner: CommandRunner = async (command, args) => { + calls.push([command, ...args]); + return { stdout: '', stderr: '', exitCode: 0 }; + }; + + const program = new Command(); + program.exitOverride(); + registerAgentCommand(program, { runner, mosaicHome: home }); + + try { + await program.parseAsync([ + 'node', + 'mosaic', + 'agent', + 'send', + 'coder0', + '--message', + 'hello world', + ]); + // Only 1 call: agent-send.sh (no capture-pane) + expect(calls).toHaveLength(1); + expect(calls[0]![0]).toContain('agent-send.sh'); + } finally { + await rm(home, { recursive: true, force: true }); + } + }); + + it('send --verify: AFTER==BEFORE (stale/wedged pane) sets process.exitCode=1 (unverifiable) after timeout', async () => { + const originalExitCode = process.exitCode; + const stderrMessages: string[] = []; + const stderrSpy = vi.spyOn(process.stderr, 'write').mockImplementation((msg) => { + stderrMessages.push(String(msg)); + return true; + }); + + const home = await mkdtemp(join(tmpdir(), 'mosaic-fleet-')); + await mkdir(join(home, 'fleet'), { recursive: true }); + await writeFile( + join(home, 'fleet', 'roster.yaml'), + ['version: 1', 'transport: tmux', 'agents:', ' - name: coder0', ' runtime: codex'].join( + '\n', + ), + ); + + // Count sleep calls to verify polling happens; use no-op sleep for speed. + let sleepCalls = 0; + const sleepFn: SleepFn = async () => { + sleepCalls++; + }; + + const runner: CommandRunner = async (command, args) => { + const full = [command, ...args].join(' '); + if (full.includes('agent-send.sh')) return { stdout: '', stderr: '', exitCode: 0 }; + // BEFORE and AFTER are identical non-empty stale content — simulates a wedged pane + return { stdout: 'Stale old content that never changed\n', stderr: '', exitCode: 0 }; + }; + + const program = new Command(); + program.exitOverride(); + registerAgentCommand(program, { runner, sleepFn, mosaicHome: home }); + + try { + // Use a short verify-timeout (one poll interval worth) so the loop exits quickly. + // With a no-op sleep, Date.now() won't advance, so we only get 1 poll before + // deadline is exceeded. Use --verify-timeout=0 to force single-poll timeout. + await program.parseAsync([ + 'node', + 'mosaic', + 'agent', + 'send', + 'coder0', + '--message', + 'hello', + '--verify', + '--verify-timeout', + '0', + ]); + expect(process.exitCode).toBe(1); + // Must mention "no pane change" to distinguish from blank-capture case + expect(stderrMessages.join('')).toMatch(/no pane change after send/i); + // At least one poll should have happened + expect(sleepCalls).toBeGreaterThanOrEqual(1); + } finally { + process.exitCode = originalExitCode; + stderrSpy.mockRestore(); + await rm(home, { recursive: true, force: true }); + } + }, 10_000); + + it('send --verify: blank AFTER capture sets process.exitCode=1 (unverifiable, fails closed) after timeout', async () => { + const originalExitCode = process.exitCode; + const stderrMessages: string[] = []; + const stderrSpy = vi.spyOn(process.stderr, 'write').mockImplementation((msg) => { + stderrMessages.push(String(msg)); + return true; + }); + + const home = await mkdtemp(join(tmpdir(), 'mosaic-fleet-')); + await mkdir(join(home, 'fleet'), { recursive: true }); + await writeFile( + join(home, 'fleet', 'roster.yaml'), + ['version: 1', 'transport: tmux', 'agents:', ' - name: coder0', ' runtime: codex'].join( + '\n', + ), + ); + + const sleepFn: SleepFn = async () => {}; + + let captureCallCount = 0; + const runner: CommandRunner = async (command, args) => { + const full = [command, ...args].join(' '); + if (full.includes('agent-send.sh')) return { stdout: '', stderr: '', exitCode: 0 }; + captureCallCount++; + // BEFORE: some content; AFTER: blank (full-screen TUI renders blank after send) + const stdout = captureCallCount === 1 ? 'Previous content\n' : ''; + return { stdout, stderr: '', exitCode: 0 }; + }; + + const program = new Command(); + program.exitOverride(); + registerAgentCommand(program, { runner, sleepFn, mosaicHome: home }); + + try { + await program.parseAsync([ + 'node', + 'mosaic', + 'agent', + 'send', + 'coder0', + '--message', + 'hello', + '--verify', + '--verify-timeout', + '0', + ]); + expect(process.exitCode).toBe(1); + expect(stderrMessages.join('')).toMatch(/could not verify delivery/i); + } finally { + process.exitCode = originalExitCode; + stderrSpy.mockRestore(); + await rm(home, { recursive: true, force: true }); + } + }, 10_000); + + it('send --verify: AFTER differs from BEFORE with draft line sets process.exitCode=1 (returns immediately on first poll)', async () => { + const originalExitCode = process.exitCode; + const stderrMessages: string[] = []; + const stderrSpy = vi.spyOn(process.stderr, 'write').mockImplementation((msg) => { + stderrMessages.push(String(msg)); + return true; + }); + + const home = await mkdtemp(join(tmpdir(), 'mosaic-fleet-')); + await mkdir(join(home, 'fleet'), { recursive: true }); + await writeFile( + join(home, 'fleet', 'roster.yaml'), + ['version: 1', 'transport: tmux', 'agents:', ' - name: coder0', ' runtime: codex'].join( + '\n', + ), + ); + + let sleepCalls = 0; + const sleepFn: SleepFn = async () => { + sleepCalls++; + }; + + let captureCallCount = 0; + const runner: CommandRunner = async (command, args) => { + const full = [command, ...args].join(' '); + if (full.includes('agent-send.sh')) return { stdout: '', stderr: '', exitCode: 0 }; + captureCallCount++; + // BEFORE: old content; AFTER: message appeared but ended as a draft line + const stdout = captureCallCount === 1 ? 'Previous output\n' : '> unsent message\n'; + return { stdout, stderr: '', exitCode: 0 }; + }; + + const program = new Command(); + program.exitOverride(); + registerAgentCommand(program, { runner, sleepFn, mosaicHome: home }); + + try { + await program.parseAsync([ + 'node', + 'mosaic', + 'agent', + 'send', + 'coder0', + '--message', + 'hello', + '--verify', + ]); + expect(process.exitCode).toBe(1); + expect(stderrMessages.join('')).toMatch(/unsubmitted draft/i); + // Draft is returned on the first poll — only one sleep call expected + expect(sleepCalls).toBe(1); + } finally { + process.exitCode = originalExitCode; + stderrSpy.mockRestore(); + await rm(home, { recursive: true, force: true }); + } + }, 10_000); + + it('send --verify: AFTER differs from BEFORE with real response content sets exitCode=0 (accepted on first poll)', async () => { + const originalExitCode = process.exitCode; + + const home = await mkdtemp(join(tmpdir(), 'mosaic-fleet-')); + await mkdir(join(home, 'fleet'), { recursive: true }); + await writeFile( + join(home, 'fleet', 'roster.yaml'), + ['version: 1', 'transport: tmux', 'agents:', ' - name: coder0', ' runtime: codex'].join( + '\n', + ), + ); + + const sleepFn: SleepFn = async () => {}; + + let captureCallCount = 0; + const runner: CommandRunner = async (command, args) => { + const full = [command, ...args].join(' '); + if (full.includes('agent-send.sh')) return { stdout: '', stderr: '', exitCode: 0 }; + captureCallCount++; + // BEFORE: old content; AFTER: new response content (pane changed) + const stdout = + captureCallCount === 1 + ? 'Old pane content\n' + : 'Old pane content\nAgent response: task completed.\n'; + return { stdout, stderr: '', exitCode: 0 }; + }; + + const program = new Command(); + program.exitOverride(); + registerAgentCommand(program, { runner, sleepFn, mosaicHome: home }); + + try { + await program.parseAsync([ + 'node', + 'mosaic', + 'agent', + 'send', + 'coder0', + '--message', + 'hello', + '--verify', + ]); + // exitCode should remain unchanged (not set to 1) + expect(process.exitCode).toBe(originalExitCode); + } finally { + process.exitCode = originalExitCode; + await rm(home, { recursive: true, force: true }); + } + }, 10_000); + + // --------------------------------------------------------------------------- + // Bounded-polling tests (FR-5 enhancement) + // --------------------------------------------------------------------------- + + it('send --verify: accepted on 2nd poll (pane slow to respond) => exit 0', async () => { + // Simulates a slow/loaded TUI that only updates on the 2nd poll. + const originalExitCode = process.exitCode; + + const home = await mkdtemp(join(tmpdir(), 'mosaic-fleet-')); + await mkdir(join(home, 'fleet'), { recursive: true }); + await writeFile( + join(home, 'fleet', 'roster.yaml'), + ['version: 1', 'transport: tmux', 'agents:', ' - name: coder0', ' runtime: codex'].join( + '\n', + ), + ); + + let sleepCalls = 0; + const sleepFn: SleepFn = async (ms) => { + sleepCalls++; + expect(ms).toBe(VERIFY_POLL_INTERVAL_MS); + }; + + let captureCallCount = 0; + const runner: CommandRunner = async (command, args) => { + const full = [command, ...args].join(' '); + if (full.includes('agent-send.sh')) return { stdout: '', stderr: '', exitCode: 0 }; + captureCallCount++; + if (captureCallCount === 1) { + // BEFORE: old content + return { stdout: 'Old pane content\n', stderr: '', exitCode: 0 }; + } else if (captureCallCount === 2) { + // 1st AFTER poll: still unchanged (slow TUI) => unverifiable + return { stdout: 'Old pane content\n', stderr: '', exitCode: 0 }; + } else { + // 2nd AFTER poll: pane changed => accepted + return { stdout: 'Old pane content\nAgent accepted task.\n', stderr: '', exitCode: 0 }; + } + }; + + const program = new Command(); + program.exitOverride(); + registerAgentCommand(program, { runner, sleepFn, mosaicHome: home }); + + try { + // Give enough timeout for at least 2 polls (2 × VERIFY_POLL_INTERVAL_MS). + // With no-op sleep, Date.now() will advance between polls so we use a generous timeout. + await program.parseAsync([ + 'node', + 'mosaic', + 'agent', + 'send', + 'coder0', + '--message', + 'hello', + '--verify', + '--verify-timeout', + String(VERIFY_POLL_INTERVAL_MS * 3), + ]); + // Accepted on 2nd poll — exitCode should remain unchanged + expect(process.exitCode).toBe(originalExitCode); + // At least 2 sleep calls (one per poll until accepted) + expect(sleepCalls).toBeGreaterThanOrEqual(2); + } finally { + process.exitCode = originalExitCode; + await rm(home, { recursive: true, force: true }); + } + }, 10_000); + + it('send --verify: accepted on 3rd poll => exit 0', async () => { + const originalExitCode = process.exitCode; + + const home = await mkdtemp(join(tmpdir(), 'mosaic-fleet-')); + await mkdir(join(home, 'fleet'), { recursive: true }); + await writeFile( + join(home, 'fleet', 'roster.yaml'), + ['version: 1', 'transport: tmux', 'agents:', ' - name: coder0', ' runtime: codex'].join( + '\n', + ), + ); + + let sleepCalls = 0; + // Real-time advancing sleep needed so deadline check works correctly. + // Use a tiny delay (1ms) so the test runs fast but Date.now() still advances. + const sleepFn: SleepFn = async () => { + sleepCalls++; + await new Promise((r) => setTimeout(r, 1)); + }; + + let captureCallCount = 0; + const runner: CommandRunner = async (command, args) => { + const full = [command, ...args].join(' '); + if (full.includes('agent-send.sh')) return { stdout: '', stderr: '', exitCode: 0 }; + captureCallCount++; + if (captureCallCount === 1) { + return { stdout: 'Old content\n', stderr: '', exitCode: 0 }; + } else if (captureCallCount <= 3) { + // Polls 1 and 2: unchanged + return { stdout: 'Old content\n', stderr: '', exitCode: 0 }; + } else { + // Poll 3: accepted + return { stdout: 'Old content\nDone!\n', stderr: '', exitCode: 0 }; + } + }; + + const program = new Command(); + program.exitOverride(); + registerAgentCommand(program, { runner, sleepFn, mosaicHome: home }); + + try { + // Long enough timeout to allow at least 3 polls with 1ms sleeps + await program.parseAsync([ + 'node', + 'mosaic', + 'agent', + 'send', + 'coder0', + '--message', + 'hello', + '--verify', + '--verify-timeout', + '500', + ]); + expect(process.exitCode).toBe(originalExitCode); + expect(sleepCalls).toBeGreaterThanOrEqual(3); + } finally { + process.exitCode = originalExitCode; + await rm(home, { recursive: true, force: true }); + } + }, 10_000); + + it('send --verify: pane stays unchanged until timeout => exit 1 (unverifiable)', async () => { + const originalExitCode = process.exitCode; + const stderrMessages: string[] = []; + const stderrSpy = vi.spyOn(process.stderr, 'write').mockImplementation((msg) => { + stderrMessages.push(String(msg)); + return true; + }); + + const home = await mkdtemp(join(tmpdir(), 'mosaic-fleet-')); + await mkdir(join(home, 'fleet'), { recursive: true }); + await writeFile( + join(home, 'fleet', 'roster.yaml'), + ['version: 1', 'transport: tmux', 'agents:', ' - name: coder0', ' runtime: codex'].join( + '\n', + ), + ); + + let sleepCalls = 0; + const sleepFn: SleepFn = async () => { + sleepCalls++; + }; + + const runner: CommandRunner = async (command, args) => { + const full = [command, ...args].join(' '); + if (full.includes('agent-send.sh')) return { stdout: '', stderr: '', exitCode: 0 }; + // Always the same content — pane never changes + return { stdout: 'Unchanged pane content\n', stderr: '', exitCode: 0 }; + }; + + const program = new Command(); + program.exitOverride(); + registerAgentCommand(program, { runner, sleepFn, mosaicHome: home }); + + try { + // timeout=0 means deadline is immediately exceeded after the first poll + await program.parseAsync([ + 'node', + 'mosaic', + 'agent', + 'send', + 'coder0', + '--message', + 'hello', + '--verify', + '--verify-timeout', + '0', + ]); + expect(process.exitCode).toBe(1); + expect(stderrMessages.join('')).toMatch(/no pane change after send/i); + expect(sleepCalls).toBeGreaterThanOrEqual(1); + } finally { + process.exitCode = originalExitCode; + stderrSpy.mockRestore(); + await rm(home, { recursive: true, force: true }); + } + }, 10_000); + + it('send --verify: VERIFY_POLL_INTERVAL_MS and VERIFY_DEFAULT_TIMEOUT_MS are exported constants', () => { + expect(typeof VERIFY_POLL_INTERVAL_MS).toBe('number'); + expect(VERIFY_POLL_INTERVAL_MS).toBe(400); + expect(typeof VERIFY_DEFAULT_TIMEOUT_MS).toBe('number'); + expect(VERIFY_DEFAULT_TIMEOUT_MS).toBe(6_000); + }); +}); diff --git a/packages/mosaic/src/commands/fleet.ts b/packages/mosaic/src/commands/fleet.ts index 51ade3f..4e76035 100644 --- a/packages/mosaic/src/commands/fleet.ts +++ b/packages/mosaic/src/commands/fleet.ts @@ -1,12 +1,19 @@ import { constants } from 'node:fs'; import { access, chmod, copyFile, mkdir, readFile, writeFile } from 'node:fs/promises'; -import { homedir, hostname } from 'node:os'; +import { homedir, hostname, userInfo } from 'node:os'; import { dirname, join, resolve } from 'node:path'; import { fileURLToPath } from 'node:url'; import { spawn } from 'node:child_process'; import type { Command } from 'commander'; import YAML from 'yaml'; +/** + * A function that spawns a command with inherited stdio (TTY passthrough). + * Used for interactive commands like `tmux attach` that need a real terminal. + * Resolves with the process exit code. + */ +export type InteractiveRunner = (command: string, args: string[]) => Promise; + export interface CommandResult { stdout: string; stderr: string; @@ -15,8 +22,23 @@ export interface CommandResult { export type CommandRunner = (command: string, args: string[]) => Promise; +/** + * Injectable sleep helper used by the send --verify polling loop. + * Tests stub this to avoid real delays; production uses the default + * implementation backed by setTimeout. + */ +export type SleepFn = (ms: number) => Promise; + export interface FleetCommandDeps { runner?: CommandRunner; + /** Injectable interactive runner for commands needing inherited TTY (e.g., `tmux attach`). */ + interactiveRunner?: InteractiveRunner; + /** + * Injectable sleep function for the send --verify polling loop. + * Defaults to a real setTimeout-based sleep. Tests stub this to avoid + * real delays; the default is used in production. + */ + sleepFn?: SleepFn; mosaicHome?: string; frameworkRoot?: string; } @@ -92,6 +114,18 @@ type FleetServiceAction = 'start' | 'stop' | 'restart' | 'status'; const DEFAULT_SOCKET_NAME = 'mosaic-factory'; const DEFAULT_HOLDER_SESSION = '_holder'; const DEFAULT_WORKING_DIRECTORY = '~/src'; + +/** + * Default poll interval (ms) between capture-pane checks in `send --verify`. + * Kept short enough to react quickly while not hammering tmux on busy hosts. + */ +export const VERIFY_POLL_INTERVAL_MS = 400; + +/** + * Default total timeout (ms) for the `send --verify` polling loop. + * Configurable via `--verify-timeout ` on `agent send`. + */ +export const VERIFY_DEFAULT_TIMEOUT_MS = 6_000; const DEFAULT_RUNTIME_RESETS: Record = { claude: { resetCommand: '/clear' }, codex: { resetCommand: '/clear' }, @@ -236,6 +270,401 @@ export function buildAgentTailCommand( ]; } +// --------------------------------------------------------------------------- +// Fleet ps — phase 2 observability helpers +// --------------------------------------------------------------------------- + +export const HEARTBEAT_INTERVAL_MS = 15_000; +export const HEARTBEAT_HEALTHY_MULTIPLIER = 3; + +export interface HeartbeatInfo { + ts: Date | null; + pid: number | null; + status: 'ok' | 'busy' | null; + /** healthy | stale | unknown */ + health: 'healthy' | 'stale' | 'unknown'; + ageMs: number | null; +} + +export interface AgentPsRow { + name: string; + tenant_id: string; + host: string; + runtime: string; + systemdActive: string; + systemdEnabled: string; + paneAlive: boolean; + panePid: number | null; + paneCommand: string | null; + idleSeconds: number | null; + heartbeat: HeartbeatInfo; + /** roster runtime !== actual pane command */ + driftFlag: boolean; + /** active but UnitFileState=disabled */ + bootEnableWarning: boolean; +} + +/** + * Returns the systemd show command for an agent unit (active+enabled state). + * Returns: `systemctl --user show -p ActiveState -p SubState -p UnitFileState` + */ +export function buildSystemdShowCommand(agentName: string): string[] { + const unit = `mosaic-agent@${agentName}.service`; + return [ + 'systemctl', + '--user', + 'show', + unit, + '-p', + 'ActiveState', + '-p', + 'SubState', + '-p', + 'UnitFileState', + ]; +} + +/** + * Returns the tmux list-panes command for an agent pane. + * Format: `#{pane_pid} #{pane_current_command} #{pane_dead} #{pane_activity}` + */ +export function buildTmuxListPanesCommand( + agentName: string, + socketName = DEFAULT_SOCKET_NAME, +): string[] { + return [ + 'tmux', + '-L', + socketName, + 'list-panes', + '-t', + `=${agentName}:0.0`, + '-F', + '#{pane_pid} #{pane_current_command} #{pane_dead} #{pane_activity}', + ]; +} + +/** + * Returns the heartbeat file path for an agent. + */ +export function heartbeatPath(agentName: string, mosaicHome = defaultMosaicHome()): string { + return join(mosaicHome, 'fleet', 'run', `${agentName}.hb`); +} + +/** + * Parse a heartbeat file's contents into a HeartbeatInfo. + * File format (one key=value per line): + * ts= + * pid= + * status= + */ +export function parseHeartbeat(content: string | null, nowMs = Date.now()): HeartbeatInfo { + if (content === null) { + return { ts: null, pid: null, status: null, health: 'unknown', ageMs: null }; + } + const lines = content.split('\n'); + let ts: Date | null = null; + let pid: number | null = null; + let status: 'ok' | 'busy' | null = null; + for (const line of lines) { + const [key, ...rest] = line.split('='); + const val = rest.join('=').trim(); + if (key === 'ts' && val) { + const d = new Date(val); + if (!Number.isNaN(d.getTime())) ts = d; + } else if (key === 'pid' && val) { + const n = Number.parseInt(val, 10); + if (Number.isFinite(n)) pid = n; + } else if (key === 'status' && (val === 'ok' || val === 'busy')) { + status = val; + } + } + const thresholdMs = HEARTBEAT_INTERVAL_MS * HEARTBEAT_HEALTHY_MULTIPLIER; + let health: 'healthy' | 'stale' | 'unknown' = 'unknown'; + let ageMs: number | null = null; + if (ts !== null) { + ageMs = nowMs - ts.getTime(); + health = ageMs <= thresholdMs ? 'healthy' : 'stale'; + } + return { ts, pid, status, health, ageMs }; +} + +/** + * Parse the output of `systemctl --user show ... -p ActiveState -p SubState -p UnitFileState` + * Returns an object with the three properties. + */ +export function parseSystemdShow(output: string): { + ActiveState: string; + SubState: string; + UnitFileState: string; +} { + const result: Record = {}; + for (const line of output.split('\n')) { + const eq = line.indexOf('='); + if (eq !== -1) { + result[line.slice(0, eq)] = line.slice(eq + 1).trim(); + } + } + return { + ActiveState: result['ActiveState'] ?? 'unknown', + SubState: result['SubState'] ?? 'unknown', + UnitFileState: result['UnitFileState'] ?? 'unknown', + }; +} + +/** + * Parse the output of `tmux list-panes -F '#{pane_pid} #{pane_current_command} #{pane_dead} #{pane_activity}'` + * pane_activity is a Unix epoch timestamp (seconds). + */ +export function parseTmuxListPanes( + output: string, + nowMs = Date.now(), +): { pid: number | null; command: string | null; dead: boolean; idleSeconds: number | null } { + const line = output.trim().split('\n')[0]; + if (!line) { + return { pid: null, command: null, dead: true, idleSeconds: null }; + } + // format: + const parts = line.split(' '); + const pid = parts[0] ? (Number.isFinite(Number(parts[0])) ? Number(parts[0]) : null) : null; + const command = parts[1] ?? null; + const dead = parts[2] === '1'; + const activityEpoch = parts[3] ? Number(parts[3]) : NaN; + const idleSeconds = + Number.isFinite(activityEpoch) && activityEpoch > 0 + ? Math.floor((nowMs - activityEpoch * 1000) / 1000) + : null; + return { pid, command, dead, idleSeconds }; +} + +/** + * Determine if there is a runtime drift: roster says one runtime but the pane + * is actually running something from a different runtime. We detect this by + * checking if the pane command doesn't match a known canonical command for the + * roster's declared runtime. + * + * Known canonical commands per runtime: + * claude → claude + * codex → codex + * opencode → opencode + * pi → pi + * + * If the pane is running something else (e.g., python3/dogfood-agent.py) for + * an agent whose roster runtime is "pi", that's a drift. + */ +export function detectDrift(rosterRuntime: string, paneCommand: string | null): boolean { + if (!paneCommand) return false; + const knownCommands: Record = { + claude: ['claude'], + codex: ['codex'], + opencode: ['opencode'], + pi: ['pi'], + }; + const expected = knownCommands[rosterRuntime]; + if (!expected) return false; + return !expected.includes(paneCommand); +} + +/** + * Returns the default tenant_id (OS username) and host (short hostname). + * These MUST appear in every --json record for multi-tenant/multi-host zero-foreclosure. + */ +export function getDefaultTenantAndHost(): { tenant_id: string; host: string } { + let tenant_id: string; + try { + tenant_id = userInfo().username; + } catch { + tenant_id = process.env['USER'] ?? process.env['LOGNAME'] ?? 'unknown'; + } + const host = hostname().split('.')[0] || 'localhost'; + return { tenant_id, host }; +} + +/** + * Builds the command to create a grouped viewer session targeting an agent session. + * A grouped session shares the same windows as the target but gets INDEPENDENT sizing, + * so attaching the viewer never resizes the agent's window. + * + * The viewer session name is derived from the agent name and a unique suffix (typically + * the caller's PID) so multiple concurrent watchers don't collide. + * + * Usage sequence: + * 1. Run buildAgentWatchCreateViewerCommand → create grouped session (via capturing runner). + * 2. Run buildAgentWatchAttachCommand → attach -r to the viewer session (via interactiveRunner). + * 3. Run buildAgentWatchKillViewerCommand → kill the viewer session on detach (via capturing runner). + */ +export function buildAgentWatchCreateViewerCommand( + agentName: string, + viewerSessionName: string, + socketName = DEFAULT_SOCKET_NAME, +): string[] { + return [ + 'tmux', + '-L', + socketName, + 'new-session', + '-d', + '-t', + `=${agentName}`, + '-s', + viewerSessionName, + ]; +} + +/** + * Builds the interactive attach command for a viewer session (read-only). + * Must be run via interactiveRunner (stdio: 'inherit'). + */ +export function buildAgentWatchAttachCommand( + viewerSessionName: string, + socketName = DEFAULT_SOCKET_NAME, +): string[] { + return ['tmux', '-L', socketName, 'attach', '-r', '-t', viewerSessionName]; +} + +/** + * Builds the kill-session command to clean up a viewer session after detach. + * Keeps the agent session intact. + */ +export function buildAgentWatchKillViewerCommand( + viewerSessionName: string, + socketName = DEFAULT_SOCKET_NAME, +): string[] { + return ['tmux', '-L', socketName, 'kill-session', '-t', viewerSessionName]; +} + +/** + * Returns a unique viewer session name for a given agent. + * Uses process.pid so concurrent watchers produce distinct names. + */ +export function buildViewerSessionName(agentName: string): string { + return `${agentName}-watch-${process.pid}`; +} + +/** + * @deprecated Use buildAgentWatchCreateViewerCommand + buildAgentWatchAttachCommand + + * buildAgentWatchKillViewerCommand instead. This bare attach targets the agent session + * directly and can resize it when the viewer terminal is smaller than the agent's window. + * + * Kept for backward compatibility only. + */ +export function buildAgentWatchCommand( + agentName: string, + socketName = DEFAULT_SOCKET_NAME, +): string[] { + return ['tmux', '-L', socketName, 'attach', '-r', '-t', `=${agentName}`]; +} + +/** + * Builds the capture-pane command used to verify that agent send was accepted + * (not left as an unsubmitted draft). Captures the last N lines and checks for + * the draft heuristic. + */ +export function buildAgentVerifyAcceptedCommand( + agentName: string, + socketName = DEFAULT_SOCKET_NAME, + lines = 5, +): string[] { + return [ + 'tmux', + '-L', + socketName, + 'capture-pane', + '-t', + `=${agentName}:0.0`, + '-p', + '-S', + `-${lines}`, + ]; +} + +/** + * Result of a send-verify check. + * - 'accepted': positive evidence that the message was accepted (response content present). + * - 'draft': last non-empty line matches the draft heuristic (unsubmitted input). + * - 'unverifiable': pane did not change after send (stale or blank) — we cannot determine + * acceptance; fails closed per FR-5. + */ +export type SendVerifyResult = 'accepted' | 'draft' | 'unverifiable'; + +/** + * Classify the result of a send-verify check by comparing BEFORE and AFTER pane snapshots. + * + * This is the primary classifier for `send --verify`. It addresses the stale-pane + * false-success problem: if the pane content did not change after the send, the new + * message never registered in the TUI (wedged pane, send dropped, etc.). + * + * Classification logic: + * 'unverifiable' — AFTER is blank/empty OR AFTER == BEFORE (no pane change after send). + * 'draft' — AFTER differs from BEFORE AND the last non-empty line of AFTER starts + * with the draft pattern ("> "); message was typed but not submitted. + * 'accepted' — AFTER differs from BEFORE AND AFTER does not end in a draft line; + * positive evidence that the TUI accepted the message. + * + * NOTE on blank AFTER: Full-screen TUIs (claude, codex, opencode, pi) render blank for + * `tmux capture-pane`. A blank AFTER is indistinguishable from a wedged pane, so it + * is always classified 'unverifiable' (fail-closed). + * + * NOTE on definitive acceptance: Phase-2 can only observe the pane surface — there is no + * runtime acknowledgement (heartbeat-ack) at this phase. The pane-change check is the best + * signal available against an opaque TUI. Definitive acceptance ultimately requires a + * runtime acknowledgement (Phase-3 heartbeat-ack). + * + * Draft heuristic: a last non-empty line (after stripping ANSI escapes) that starts + * with "> " is treated as an unsubmitted input line. This pattern is specific to + * pi/claude TUIs; draft detection for codex/opencode TUIs is best-effort only. + * + * FR-5 requires `send --verify` to return non-zero when delivery cannot be verified. + * + * @param before Pane snapshot captured BEFORE the send command. + * @param after Pane snapshot captured AFTER the send command (after the delay). + */ +export function classifySendResult(before: string, after: string): SendVerifyResult { + const afterLines = after.split('\n').filter((l) => l.trim().length > 0); + // Blank/empty AFTER => full-screen TUI rendered blank, or pane is wedged => unverifiable. + if (afterLines.length === 0) return 'unverifiable'; + // No change => message didn't register in the TUI (stale/wedged pane) => unverifiable. + if (after === before) return 'unverifiable'; + // AFTER differs from BEFORE — check whether the pane is now showing a draft line. + const lastLine = afterLines[afterLines.length - 1]!; + const stripped = lastLine.replace(/\x1b\[[0-9;]*m/g, '').trim(); + // Heuristic: if stripped last line starts with "> " — that's the common draft pattern + // in pi/claude TUIs for showing user input before submission. + // NOTE: this heuristic is pi/claude-specific; draft detection for codex/opencode + // TUIs is best-effort only and may miss other unsubmitted-input indicators. + if (/^>\s/.test(stripped)) return 'draft'; + return 'accepted'; +} + +/** + * Check whether a send was accepted (not left as draft), using only the AFTER snapshot. + * + * @deprecated Prefer classifySendResult(before, after) which guards against stale-pane + * false-successes. This single-snapshot variant cannot detect a wedged pane that still + * shows old non-empty content — it will incorrectly return 'accepted' in that case. + * + * Retained for unit-test compatibility with single-snapshot assertions. + * + * Returns: + * 'unverifiable' — blank/empty capture (full-screen TUIs render blank; we cannot tell). + * 'draft' — last non-empty line matches the draft heuristic. + * 'accepted' — non-blank and not a draft line (but may be stale — see above). + */ +export function isSendAccepted(capturedOutput: string): SendVerifyResult { + const lines = capturedOutput.split('\n').filter((l) => l.trim().length > 0); + // Blank/empty capture => full-screen TUI rendered blank => unverifiable. + // This is the known-unverifiable case; fail closed (not treated as success). + if (lines.length === 0) return 'unverifiable'; + const lastLine = lines[lines.length - 1]!; + const stripped = lastLine.replace(/\x1b\[[0-9;]*m/g, '').trim(); + // Heuristic: if stripped last line starts with "> " — that's the common draft pattern + // in pi/claude TUIs for showing user input before submission. + // NOTE: this heuristic is pi/claude-specific; draft detection for codex/opencode + // TUIs is best-effort only and may miss other unsubmitted-input indicators. + if (/^>\s/.test(stripped)) return 'draft'; + return 'accepted'; +} + export function registerFleetCommand(program: Command, deps: FleetCommandDeps = {}): Command { const runner = deps.runner ?? runCommand; const paths = resolveFleetPaths(deps.mosaicHome); @@ -360,6 +789,113 @@ export function registerFleetCommand(program: Command, deps: FleetCommandDeps = console.log(`Verified fleet on tmux socket ${socketName}.`); }); + cmd + .command('ps') + .description('Show real-time status for all roster agents (systemd + tmux + heartbeat)') + .option('--json', 'Print JSON array') + .action(async (opts: { json?: boolean }) => { + const commandOpts = cmd.opts<{ mosaicHome: string; roster?: string }>(); + const activePaths = resolveFleetPaths(commandOpts.mosaicHome); + const roster = await loadRosterForCommand(cmd); + const { tenant_id, host } = getDefaultTenantAndHost(); + const nowMs = Date.now(); + + const rows: AgentPsRow[] = []; + + for (const agent of roster.agents) { + // systemd show + const showResult = await runner(...splitCommand(buildSystemdShowCommand(agent.name))); + const sysInfo = parseSystemdShow(showResult.stdout); + + // tmux list-panes + const panesResult = await runner( + ...splitCommand(buildTmuxListPanesCommand(agent.name, roster.tmux.socketName)), + ); + const paneInfo = parseTmuxListPanes(panesResult.stdout, nowMs); + + // heartbeat + const hbFile = heartbeatPath(agent.name, activePaths.mosaicHome); + let hbContent: string | null = null; + try { + hbContent = await readFile(hbFile, 'utf8'); + } catch { + hbContent = null; + } + const hb = parseHeartbeat(hbContent, nowMs); + + // drift and boot-enable + const driftFlag = detectDrift(agent.runtime, paneInfo.command); + const bootEnableWarning = + sysInfo.ActiveState === 'active' && sysInfo.UnitFileState === 'disabled'; + + rows.push({ + name: agent.name, + tenant_id, + host, + runtime: agent.runtime, + systemdActive: sysInfo.ActiveState, + systemdEnabled: sysInfo.UnitFileState, + paneAlive: !paneInfo.dead, + panePid: paneInfo.pid, + paneCommand: paneInfo.command, + idleSeconds: paneInfo.idleSeconds, + heartbeat: hb, + driftFlag, + bootEnableWarning, + }); + } + + if (opts.json) { + console.log(JSON.stringify(rows, null, 2)); + return; + } + + // Table output + const header = [ + 'NAME'.padEnd(18), + 'TENANT'.padEnd(12), + 'HOST'.padEnd(12), + 'RUNTIME'.padEnd(10), + 'SYSTEMD'.padEnd(16), + 'PANE'.padEnd(8), + 'PID'.padEnd(8), + 'IDLE'.padEnd(8), + 'HB'.padEnd(12), + 'FLAGS', + ].join(' '); + console.log(header); + console.log('-'.repeat(header.length)); + + for (const row of rows) { + const systemd = `${row.systemdActive}/${row.systemdEnabled}`; + const pane = row.paneAlive ? 'alive' : 'dead'; + const pid = row.panePid !== null ? String(row.panePid) : '-'; + const idle = row.idleSeconds !== null ? `${row.idleSeconds}s` : '-'; + const hbAge = + row.heartbeat.ageMs !== null + ? `${Math.round(row.heartbeat.ageMs / 1000)}s/${row.heartbeat.health}` + : `unknown`; + const flags: string[] = []; + if (row.driftFlag) flags.push('DRIFT'); + if (row.bootEnableWarning) flags.push('BOOT-ENABLE'); + + console.log( + [ + row.name.padEnd(18), + row.tenant_id.padEnd(12), + row.host.padEnd(12), + row.runtime.padEnd(10), + systemd.padEnd(16), + pane.padEnd(8), + pid.padEnd(8), + idle.padEnd(8), + hbAge.padEnd(12), + flags.join(','), + ].join(' '), + ); + } + }); + return cmd; } @@ -368,6 +904,8 @@ export function registerFleetAgentCommands( deps: FleetCommandDeps = {}, ): void { const runner = deps.runner ?? runCommand; + const iRunner = deps.interactiveRunner ?? spawnInteractive; + const sleepFn = deps.sleepFn ?? defaultSleep; agentCommand .command('roster') @@ -417,21 +955,141 @@ export function registerFleetAgentCommands( .requiredOption('--message ', 'Message text') .option('--source-label