From aec560162b4a5d177fd17ce09d1e049c4e0619fb Mon Sep 17 00:00:00 2001 From: Jarvis Date: Sat, 20 Jun 2026 22:45:22 -0500 Subject: [PATCH] fix(fleet): verify fails-closed on unverifiable + interactive watch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - isSendAccepted now returns 'accepted' | 'draft' | 'unverifiable' (was bool) - Blank/empty capture => 'unverifiable' => process.exitCode=1 with distinct "could not verify delivery (blank/no response captured)" message; previously blank was treated as success, violating FR-5 fail-closed semantics - Draft line ('^> ') => process.exitCode=1 with "left as unsubmitted draft" message; distinct wording from unverifiable case - agent watch now dispatched through injectable InteractiveRunner (stdio:inherit) instead of the capturing CommandRunner; tmux attach requires TTY passthrough - Default spawnInteractive implementation uses node:child_process spawn with stdio:'inherit'; injectable via FleetCommandDeps.interactiveRunner for tests - Removed buildSystemdIsActiveCommand (dead code — exported but unused) - Tests: blank=>exitCode=1, draft=>exitCode=1, real response=>exitCode=0, watch dispatched through interactiveRunner not capturing runner - PRD: added "Known limitations" section (heuristic verify, blank fails closed, non-pi/claude draft detection is best-effort, watch requires TTY passthrough) - Code comment on isSendAccepted notes pi/claude-specific draft heuristic Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_01RMoEx7hfdFGjUiCHuN1RRi --- docs/fleet/PRD.md | 13 ++ packages/mosaic/src/commands/fleet.spec.ts | 173 +++++++++++++++++++-- packages/mosaic/src/commands/fleet.ts | 123 +++++++++------ 3 files changed, 251 insertions(+), 58 deletions(-) diff --git a/docs/fleet/PRD.md b/docs/fleet/PRD.md index 2095a50..09e8738 100644 --- a/docs/fleet/PRD.md +++ b/docs/fleet/PRD.md @@ -75,6 +75,19 @@ observability and no safe way to watch a session. - Situational: run against the live `mosaic-factory` fleet; capture `fleet ps` output, a kill-and-detect cycle, a read-only `watch`, and a `send --verify` pass/fail pair. +## Known limitations + +- **Verify heuristic is best-effort:** `agent send --verify` uses a `>` -prefix draft + heuristic that is specific to pi/claude TUIs. Draft detection for codex and opencode + TUIs is best-effort only; those runtimes may not use the same input-line indicator. +- **Blank capture fails closed:** Full-screen TUIs (claude, codex, opencode, pi) render + blank for `tmux capture-pane`. When the captured output is empty, `send --verify` + returns non-zero with an "unverifiable" message rather than silently succeeding. This + is an intentional fail-closed design (FR-5). +- **`agent watch` requires TTY passthrough:** `tmux attach` is interactive and must be + run with inherited stdio. It cannot be captured through a pipe. Tests inject a fake + `interactiveRunner`; the real implementation spawns with `stdio: 'inherit'`. + ## Surfaces & parity (MVP-X1) CLI lands this phase. TUI surface follows in the `packages/mosaic` wizard; webUI in diff --git a/packages/mosaic/src/commands/fleet.spec.ts b/packages/mosaic/src/commands/fleet.spec.ts index 4efe809..b1a5cc7 100644 --- a/packages/mosaic/src/commands/fleet.spec.ts +++ b/packages/mosaic/src/commands/fleet.spec.ts @@ -26,6 +26,7 @@ import { resolveFleetPaths, type AgentPsRow, type CommandRunner, + type InteractiveRunner, } from './fleet.js'; import { registerAgentCommand } from './agent.js'; @@ -1057,7 +1058,7 @@ describe('agent watch', () => { expect(cmd).toContain('-r'); }); - it('issues the read-only attach command through the injected runner', async () => { + it('dispatches the read-only attach command through the interactiveRunner, NOT the capturing runner', async () => { const home = await mkdtemp(join(tmpdir(), 'mosaic-fleet-')); await mkdir(join(home, 'fleet'), { recursive: true }); await writeFile( @@ -1067,19 +1068,29 @@ describe('agent watch', () => { ), ); - const calls: string[][] = []; + const capturingCalls: string[][] = []; const runner: CommandRunner = async (command, args) => { - calls.push([command, ...args]); + capturingCalls.push([command, ...args]); return { stdout: '', stderr: '', exitCode: 0 }; }; + const interactiveCalls: string[][] = []; + const interactiveRunner: InteractiveRunner = async (command, args) => { + interactiveCalls.push([command, ...args]); + return 0; + }; + const program = new Command(); program.exitOverride(); - registerAgentCommand(program, { runner, mosaicHome: home }); + registerAgentCommand(program, { runner, interactiveRunner, mosaicHome: home }); try { await program.parseAsync(['node', 'mosaic', 'agent', 'watch', 'coder0']); - expect(calls).toEqual([['tmux', '-L', 'mosaic-factory', 'attach', '-r', '-t', '=coder0']]); + // Must go through interactiveRunner, not the capturing runner + expect(capturingCalls).toHaveLength(0); + expect(interactiveCalls).toEqual([ + ['tmux', '-L', 'mosaic-factory', 'attach', '-r', '-t', '=coder0'], + ]); } finally { await rm(home, { recursive: true, force: true }); } @@ -1126,17 +1137,17 @@ describe('agent send --verify', () => { ]); }); - it('isSendAccepted: returns true for normal response output', () => { - expect(isSendAccepted('Some response text\nAnother line\n')).toBe(true); + it('isSendAccepted: returns "accepted" for normal response output', () => { + expect(isSendAccepted('Some response text\nAnother line\n')).toBe('accepted'); }); - it('isSendAccepted: returns false when last line starts with "> " (draft pattern)', () => { - expect(isSendAccepted('> my unsent message')).toBe(false); + it('isSendAccepted: returns "draft" when last line starts with "> " (draft pattern)', () => { + expect(isSendAccepted('> my unsent message')).toBe('draft'); }); - it('isSendAccepted: returns true for blank pane (treated as submitted)', () => { - expect(isSendAccepted('')).toBe(true); - expect(isSendAccepted(' \n \n')).toBe(true); + it('isSendAccepted: returns "unverifiable" for blank/empty pane (full-screen TUI case)', () => { + expect(isSendAccepted('')).toBe('unverifiable'); + expect(isSendAccepted(' \n \n')).toBe('unverifiable'); }); it('issues send then verify capture via injected runner when --verify is passed', async () => { @@ -1219,4 +1230,142 @@ describe('agent send --verify', () => { await rm(home, { recursive: true, force: true }); } }); + + it('send --verify: blank capture sets process.exitCode=1 (unverifiable, fails closed)', async () => { + const originalExitCode = process.exitCode; + const stderrMessages: string[] = []; + const stderrSpy = vi.spyOn(process.stderr, 'write').mockImplementation((msg) => { + stderrMessages.push(String(msg)); + return true; + }); + + const home = await mkdtemp(join(tmpdir(), 'mosaic-fleet-')); + await mkdir(join(home, 'fleet'), { recursive: true }); + await writeFile( + join(home, 'fleet', 'roster.yaml'), + ['version: 1', 'transport: tmux', 'agents:', ' - name: coder0', ' runtime: codex'].join( + '\n', + ), + ); + + const runner: CommandRunner = async (command, args) => { + const full = [command, ...args].join(' '); + if (full.includes('agent-send.sh')) return { stdout: '', stderr: '', exitCode: 0 }; + // capture-pane returns blank (full-screen TUI) + return { stdout: '', stderr: '', exitCode: 0 }; + }; + + const program = new Command(); + program.exitOverride(); + registerAgentCommand(program, { runner, mosaicHome: home }); + + try { + await program.parseAsync([ + 'node', + 'mosaic', + 'agent', + 'send', + 'coder0', + '--message', + 'hello', + '--verify', + ]); + expect(process.exitCode).toBe(1); + expect(stderrMessages.join('')).toMatch(/could not verify delivery.*blank/i); + } finally { + process.exitCode = originalExitCode; + stderrSpy.mockRestore(); + await rm(home, { recursive: true, force: true }); + } + }, 10_000); + + it('send --verify: draft line sets process.exitCode=1 with distinct wording', async () => { + const originalExitCode = process.exitCode; + const stderrMessages: string[] = []; + const stderrSpy = vi.spyOn(process.stderr, 'write').mockImplementation((msg) => { + stderrMessages.push(String(msg)); + return true; + }); + + const home = await mkdtemp(join(tmpdir(), 'mosaic-fleet-')); + await mkdir(join(home, 'fleet'), { recursive: true }); + await writeFile( + join(home, 'fleet', 'roster.yaml'), + ['version: 1', 'transport: tmux', 'agents:', ' - name: coder0', ' runtime: codex'].join( + '\n', + ), + ); + + const runner: CommandRunner = async (command, args) => { + const full = [command, ...args].join(' '); + if (full.includes('agent-send.sh')) return { stdout: '', stderr: '', exitCode: 0 }; + // capture-pane returns a draft line ("> unsent message") + return { stdout: '> unsent message\n', stderr: '', exitCode: 0 }; + }; + + const program = new Command(); + program.exitOverride(); + registerAgentCommand(program, { runner, mosaicHome: home }); + + try { + await program.parseAsync([ + 'node', + 'mosaic', + 'agent', + 'send', + 'coder0', + '--message', + 'hello', + '--verify', + ]); + expect(process.exitCode).toBe(1); + expect(stderrMessages.join('')).toMatch(/unsubmitted draft/i); + } finally { + process.exitCode = originalExitCode; + stderrSpy.mockRestore(); + await rm(home, { recursive: true, force: true }); + } + }, 10_000); + + it('send --verify: real response content sets exitCode=0 (accepted)', async () => { + const originalExitCode = process.exitCode; + + const home = await mkdtemp(join(tmpdir(), 'mosaic-fleet-')); + await mkdir(join(home, 'fleet'), { recursive: true }); + await writeFile( + join(home, 'fleet', 'roster.yaml'), + ['version: 1', 'transport: tmux', 'agents:', ' - name: coder0', ' runtime: codex'].join( + '\n', + ), + ); + + const runner: CommandRunner = async (command, args) => { + const full = [command, ...args].join(' '); + if (full.includes('agent-send.sh')) return { stdout: '', stderr: '', exitCode: 0 }; + // capture-pane returns real response content + return { stdout: 'Agent response: task completed.\n', stderr: '', exitCode: 0 }; + }; + + const program = new Command(); + program.exitOverride(); + registerAgentCommand(program, { runner, mosaicHome: home }); + + try { + await program.parseAsync([ + 'node', + 'mosaic', + 'agent', + 'send', + 'coder0', + '--message', + 'hello', + '--verify', + ]); + // exitCode should remain unchanged (not set to 1) + expect(process.exitCode).toBe(originalExitCode); + } finally { + process.exitCode = originalExitCode; + await rm(home, { recursive: true, force: true }); + } + }, 10_000); }); diff --git a/packages/mosaic/src/commands/fleet.ts b/packages/mosaic/src/commands/fleet.ts index 947dae8..3276570 100644 --- a/packages/mosaic/src/commands/fleet.ts +++ b/packages/mosaic/src/commands/fleet.ts @@ -7,6 +7,13 @@ import { spawn } from 'node:child_process'; import type { Command } from 'commander'; import YAML from 'yaml'; +/** + * A function that spawns a command with inherited stdio (TTY passthrough). + * Used for interactive commands like `tmux attach` that need a real terminal. + * Resolves with the process exit code. + */ +export type InteractiveRunner = (command: string, args: string[]) => Promise; + export interface CommandResult { stdout: string; stderr: string; @@ -17,6 +24,8 @@ export type CommandRunner = (command: string, args: string[]) => Promise` - * or prompt indicator on the LAST non-empty line, the send is considered accepted. - * This mirrors the send-message.sh draft check: if the last line looks like an - * unsubmitted input line, it's a draft. - * - * Returns true if accepted (submitted), false if still a draft/unverifiable. + * Result of a send-verify check. + * - 'accepted': positive evidence that the message was accepted (response content present). + * - 'draft': last non-empty line matches the draft heuristic (unsubmitted input). + * - 'unverifiable': blank/empty capture — full-screen TUIs (claude, codex, opencode, pi) + * render blank for capture-pane, so we cannot determine acceptance; fails closed per FR-5. */ -export function isSendAccepted(capturedOutput: string): boolean { +export type SendVerifyResult = 'accepted' | 'draft' | 'unverifiable'; + +/** + * Check whether a send was accepted (not left as draft). + * + * Returns: + * 'unverifiable' — blank/empty capture (full-screen TUIs render blank; we cannot tell). + * 'draft' — last non-empty line matches the draft heuristic. + * 'accepted' — positive evidence of response content; not blank and not draft. + * + * Draft heuristic: a last non-empty line (after stripping ANSI escapes) that starts + * with "> " is treated as an unsubmitted input line. This pattern is specific to + * pi/claude TUIs and may miss drafts in codex/opencode TUIs — draft detection for + * those runtimes is best-effort only. + * + * FR-5 requires `send --verify` to return non-zero when delivery cannot be verified. + * Blank capture (full-screen TUI case) is the known-unverifiable case; it is treated + * as FAILURE (not success) so the caller fails closed rather than silently succeeding. + */ +export function isSendAccepted(capturedOutput: string): SendVerifyResult { const lines = capturedOutput.split('\n').filter((l) => l.trim().length > 0); - if (lines.length === 0) return true; // blank pane — treat as submitted + // Blank/empty capture => full-screen TUI rendered blank => unverifiable. + // This is the known-unverifiable case; fail closed (not treated as success). + if (lines.length === 0) return 'unverifiable'; const lastLine = lines[lines.length - 1]!; - // Heuristic: if last non-empty line is a bare user-typed draft line with no - // AI response yet (starts with or contains typical draft markers), flag as draft. - // Typical draft patterns: line ends with the sent message text with no ">" prefix, - // or the line is identical to a prompt that hasn't been cleared. - // We use a conservative heuristic: a line that is ONLY whitespace/prompt characters - // with no response indicator is suspicious, but since we can't reliably detect - // every TUI's draft state, we check for a specific pattern: - // if last line has trailing `█` (cursor block) or is blank after stripping ANSI, - // treat as submitted. Otherwise if we see the exact sent text repeated, it's draft. - // For robustness, we accept as submitted unless we see clear draft evidence. - // Real implementation: check if last meaningful line is just the input prompt. const stripped = lastLine.replace(/\x1b\[[0-9;]*m/g, '').trim(); - // If the pane shows a line that looks like a pending input (ends with cursor or is empty), - // that means the message was submitted and the pane is waiting for response. - // A draft line typically looks like: "> " without a response. - // For simplicity: if stripped last line starts with "> " — that's a common draft pattern + // Heuristic: if stripped last line starts with "> " — that's the common draft pattern // in pi/claude TUIs for showing user input before submission. - if (/^>\s/.test(stripped)) return false; - return true; + // NOTE: this heuristic is pi/claude-specific; draft detection for codex/opencode + // TUIs is best-effort only and may miss other unsubmitted-input indicators. + if (/^>\s/.test(stripped)) return 'draft'; + return 'accepted'; } export function registerFleetCommand(program: Command, deps: FleetCommandDeps = {}): Command { @@ -766,6 +769,7 @@ export function registerFleetAgentCommands( deps: FleetCommandDeps = {}, ): void { const runner = deps.runner ?? runCommand; + const iRunner = deps.interactiveRunner ?? spawnInteractive; agentCommand .command('roster') @@ -845,11 +849,16 @@ export function registerFleetAgentCommands( `send --verify: could not capture pane output to verify acceptance (tmux exited ${captureResult.exitCode}).`, ); } - const accepted = isSendAccepted(captureResult.stdout); - if (!accepted) { + const verifyResult = isSendAccepted(captureResult.stdout); + if (verifyResult === 'draft') { process.exitCode = 1; process.stderr.write( - `send --verify: message appears to be left as an unsubmitted draft in agent "${agent}".\n`, + `send --verify: message left as unsubmitted draft in agent "${agent}".\n`, + ); + } else if (verifyResult === 'unverifiable') { + process.exitCode = 1; + process.stderr.write( + `send --verify: could not verify delivery (blank/no response captured) for agent "${agent}".\n`, ); } } @@ -862,10 +871,14 @@ export function registerFleetAgentCommands( .action(async (agent: string) => { const roster = await loadRosterFromAgentCommand(agentCommand, deps.mosaicHome); getRosterAgent(roster, agent); - const result = await runner( - ...splitCommand(buildAgentWatchCommand(agent, roster.tmux.socketName)), - ); - writeCommandOutput(result); + // `tmux attach` is interactive and requires inherited TTY/stdin/stdout. + // Route through the interactiveRunner (stdio: 'inherit') instead of the + // capturing runner, which would hang or fail for full-screen TUI commands. + const [bin, args] = splitCommand(buildAgentWatchCommand(agent, roster.tmux.socketName)); + const exitCode = await iRunner(bin, args); + if (exitCode !== 0) { + process.exitCode = exitCode; + } }); agentCommand @@ -1300,6 +1313,24 @@ function resolveFrameworkRoot(): string { return resolve(dirname(currentFile), '..', '..', 'framework'); } +/** + * Default InteractiveRunner implementation: spawns the command with inherited + * stdio so the terminal is passed through to the child process. This is required + * for commands like `tmux attach` that are full-screen interactive and cannot be + * captured through a pipe. + */ +function spawnInteractive(command: string, args: string[]): Promise { + return new Promise((resolvePromise) => { + const child = spawn(command, args, { stdio: 'inherit' }); + child.on('error', () => { + resolvePromise(127); + }); + child.on('close', (code) => { + resolvePromise(code ?? 1); + }); + }); +} + async function canRead(path: string): Promise { try { await access(path, constants.R_OK);