From 8466ca2d813e60fe325ce69aefdf22d12138affa Mon Sep 17 00:00:00 2001 From: Jarvis Date: Sat, 20 Jun 2026 22:57:00 -0500 Subject: [PATCH] fix(fleet): verify via pane-change diff + non-resizing watch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Blocker fix: send --verify now captures a BEFORE snapshot immediately before the send and an AFTER snapshot after the delay, then uses classifySendResult(before, after) to classify. A wedged pane showing stale non-empty content is no longer falsely reported as 'accepted' — BEFORE==AFTER maps to 'unverifiable' (exit 1, "no pane change after send"). Blank AFTER still fails closed as 'unverifiable'. Only AFTER != BEFORE without a draft suffix counts as 'accepted' (exit 0). Should-fix: agent watch now uses a GROUPED VIEWER SESSION instead of a bare 'tmux attach -r' against the agent session. A bare attach lets the viewer terminal shrink the agent's window; a grouped session has independent sizing so the agent's window is never affected. Sequence: new-session -d -t '=' -s '-watch-' (runner), attach -r to viewer session (interactiveRunner), kill-session on detach (runner). New builder functions exported: buildAgentWatchCreateViewerCommand, buildAgentWatchAttachCommand, buildAgentWatchKillViewerCommand, buildViewerSessionName. buildAgentWatchCommand kept but deprecated. New exports: classifySendResult(before, after) — the testable classifier. Tests added: - classifySendResult unit suite (6 cases): accepted/draft/unverifiable/ stale-pane/both-blank/before-blank-after-response - send --verify regression: stale (before==after non-empty) => exit 1 - send --verify regression: blank AFTER => exit 1 - send --verify regression: draft after pane change => exit 1 - send --verify regression: changed non-draft => exit 0 - send --verify: 3-call sequence assertion (before-capture, send, after-capture) - watch dispatch: grouped viewer session created/attached/killed; no bare attach against agent session; viewer name matches -watch- PRD Known-limitations updated: pane-change check rationale, Phase-3 heartbeat-ack requirement, grouped-session watch design. All gates pass: pnpm typecheck, pnpm lint, pnpm --filter @mosaicstack/mosaic test (382 tests, 74 fleet), prettier --check. Co-Authored-By: Claude Opus 4.8 (1M context) Claude-Session: https://claude.ai/code/session_01RMoEx7hfdFGjUiCHuN1RRi --- docs/fleet/PRD.md | 22 ++- packages/mosaic/src/commands/fleet.spec.ts | 208 +++++++++++++++++--- packages/mosaic/src/commands/fleet.ts | 210 ++++++++++++++++++--- 3 files changed, 377 insertions(+), 63 deletions(-) diff --git a/docs/fleet/PRD.md b/docs/fleet/PRD.md index 09e8738..f250395 100644 --- a/docs/fleet/PRD.md +++ b/docs/fleet/PRD.md @@ -80,13 +80,25 @@ observability and no safe way to watch a session. - **Verify heuristic is best-effort:** `agent send --verify` uses a `>` -prefix draft heuristic that is specific to pi/claude TUIs. Draft detection for codex and opencode TUIs is best-effort only; those runtimes may not use the same input-line indicator. -- **Blank capture fails closed:** Full-screen TUIs (claude, codex, opencode, pi) render - blank for `tmux capture-pane`. When the captured output is empty, `send --verify` +- **Pane-change check is the best Phase-2 signal:** `agent send --verify` compares a + BEFORE snapshot (captured immediately before the send) to an AFTER snapshot (captured + after the send delay). A pane that changed and does not end in a draft line is reported + as 'accepted'. A pane that did not change — including a wedged pane showing stale + non-empty content — is reported 'unverifiable' (exit 1, "no pane change after send"). + Definitive acceptance ultimately requires a runtime acknowledgement (Phase-3 + heartbeat-ack); the pane-change check is the best signal available against an opaque + TUI for Phase-2. +- **Blank AFTER capture fails closed:** Full-screen TUIs (claude, codex, opencode, pi) + render blank for `tmux capture-pane`. When the AFTER snapshot is empty, `send --verify` returns non-zero with an "unverifiable" message rather than silently succeeding. This is an intentional fail-closed design (FR-5). -- **`agent watch` requires TTY passthrough:** `tmux attach` is interactive and must be - run with inherited stdio. It cannot be captured through a pipe. Tests inject a fake - `interactiveRunner`; the real implementation spawns with `stdio: 'inherit'`. +- **`agent watch` uses a grouped viewer session:** `tmux attach -r` directly against the + agent session lets the viewer terminal shrink the agent's window. `agent watch` instead + creates a throwaway grouped session (`tmux new-session -d -t '=' -s +'-watch-'`), attaches read-only to that session, and kills it on detach. + The grouped session shares the agent's windows but has independent sizing, so the + agent's window is never affected. `tmux attach` is still interactive and requires + inherited stdio; the `interactiveRunner` handles TTY passthrough. ## Surfaces & parity (MVP-X1) diff --git a/packages/mosaic/src/commands/fleet.spec.ts b/packages/mosaic/src/commands/fleet.spec.ts index b1a5cc7..09261c7 100644 --- a/packages/mosaic/src/commands/fleet.spec.ts +++ b/packages/mosaic/src/commands/fleet.spec.ts @@ -5,11 +5,15 @@ import { Command } from 'commander'; import { afterEach, describe, expect, it, vi } from 'vitest'; import { buildAgentSendCommand, + buildAgentWatchAttachCommand, buildAgentWatchCommand, + buildAgentWatchCreateViewerCommand, + buildAgentWatchKillViewerCommand, buildAgentVerifyAcceptedCommand, buildFleetServiceCommand, buildSystemdShowCommand, buildTmuxListPanesCommand, + classifySendResult, detectDrift, generateAgentEnv, getDefaultOperatorSourceLabel, @@ -1040,25 +1044,52 @@ describe('fleet ps — command sequences issued', () => { }); describe('agent watch', () => { - it('builds exact read-only tmux attach command', () => { - expect(buildAgentWatchCommand('canary-pi', 'mosaic-factory')).toEqual([ + it('builds exact grouped-viewer creation command', () => { + expect( + buildAgentWatchCreateViewerCommand('canary-pi', 'canary-pi-watch-123', 'mosaic-factory'), + ).toEqual([ + 'tmux', + '-L', + 'mosaic-factory', + 'new-session', + '-d', + '-t', + '=canary-pi', + '-s', + 'canary-pi-watch-123', + ]); + }); + + it('builds exact viewer attach command (read-only)', () => { + expect(buildAgentWatchAttachCommand('canary-pi-watch-123', 'mosaic-factory')).toEqual([ 'tmux', '-L', 'mosaic-factory', 'attach', '-r', '-t', - '=canary-pi', + 'canary-pi-watch-123', ]); }); - it('uses DEFAULT_SOCKET_NAME when socket is omitted', () => { + it('builds exact viewer kill command', () => { + expect(buildAgentWatchKillViewerCommand('canary-pi-watch-123', 'mosaic-factory')).toEqual([ + 'tmux', + '-L', + 'mosaic-factory', + 'kill-session', + '-t', + 'canary-pi-watch-123', + ]); + }); + + it('buildAgentWatchCommand (deprecated) still uses DEFAULT_SOCKET_NAME when socket is omitted', () => { const cmd = buildAgentWatchCommand('canary-pi'); expect(cmd[2]).toBe('mosaic-factory'); expect(cmd).toContain('-r'); }); - it('dispatches the read-only attach command through the interactiveRunner, NOT the capturing runner', async () => { + it('dispatch: creates grouped viewer session (runner) then attaches -r to viewer session (interactiveRunner), NOT a bare attach to the agent session', async () => { const home = await mkdtemp(join(tmpdir(), 'mosaic-fleet-')); await mkdir(join(home, 'fleet'), { recursive: true }); await writeFile( @@ -1086,11 +1117,28 @@ describe('agent watch', () => { try { await program.parseAsync(['node', 'mosaic', 'agent', 'watch', 'coder0']); - // Must go through interactiveRunner, not the capturing runner - expect(capturingCalls).toHaveLength(0); - expect(interactiveCalls).toEqual([ - ['tmux', '-L', 'mosaic-factory', 'attach', '-r', '-t', '=coder0'], - ]); + + // The capturing runner must be used for grouped-session creation and cleanup. + // It must NOT be used for the interactive attach. + expect(capturingCalls).toHaveLength(2); // new-session + kill-session + expect(capturingCalls[0]).toEqual( + expect.arrayContaining(['new-session', '-d', '-t', '=coder0']), + ); + // The new-session command must include a viewer session name derived from agent name. + expect(capturingCalls[0]!.join(' ')).toMatch(/coder0-watch-\d+/); + // Kill-session must target the same viewer session, not the agent session. + expect(capturingCalls[1]).toEqual(expect.arrayContaining(['kill-session', '-t'])); + expect(capturingCalls[1]!.join(' ')).toMatch(/coder0-watch-\d+/); + // The agent session itself must NOT be the attach target. + expect(capturingCalls[1]!.join(' ')).not.toContain('=coder0'); + + // The interactiveRunner must attach -r to the VIEWER session, not the agent session. + expect(interactiveCalls).toHaveLength(1); + expect(interactiveCalls[0]).toEqual(expect.arrayContaining(['attach', '-r', '-t'])); + // Target must be the viewer session name (not "=coder0"). + const attachTarget = interactiveCalls[0]![interactiveCalls[0]!.indexOf('-t') + 1]!; + expect(attachTarget).toMatch(/coder0-watch-\d+/); + expect(attachTarget).not.toBe('=coder0'); } finally { await rm(home, { recursive: true, force: true }); } @@ -1150,7 +1198,44 @@ describe('agent send --verify', () => { expect(isSendAccepted(' \n \n')).toBe('unverifiable'); }); - it('issues send then verify capture via injected runner when --verify is passed', async () => { + // --------------------------------------------------------------------------- + // classifySendResult — BEFORE/AFTER pane-diff classifier (regression suite) + // --------------------------------------------------------------------------- + + describe('classifySendResult (BEFORE/AFTER pane-diff classifier)', () => { + it('returns "accepted" when AFTER differs from BEFORE and AFTER has no draft line', () => { + const before = 'Old content from prior interaction\n'; + const after = 'Old content from prior interaction\nAgent response: task complete.\n'; + expect(classifySendResult(before, after)).toBe('accepted'); + }); + + it('returns "draft" when AFTER differs from BEFORE and AFTER ends in a draft line', () => { + const before = 'Previous output\n'; + const after = 'Previous output\n> unsent message\n'; + expect(classifySendResult(before, after)).toBe('draft'); + }); + + it('returns "unverifiable" when AFTER is blank/empty (full-screen TUI blank render)', () => { + const before = 'Some previous content\n'; + expect(classifySendResult(before, '')).toBe('unverifiable'); + expect(classifySendResult(before, ' \n \n')).toBe('unverifiable'); + }); + + it('returns "unverifiable" when AFTER == BEFORE (stale/wedged pane — no change after send)', () => { + const staleContent = 'Old non-empty content that never changed\n'; + expect(classifySendResult(staleContent, staleContent)).toBe('unverifiable'); + }); + + it('returns "unverifiable" when both BEFORE and AFTER are blank (both blank => no change)', () => { + expect(classifySendResult('', '')).toBe('unverifiable'); + }); + + it('returns "accepted" when BEFORE is blank and AFTER has non-draft content (pane woke up)', () => { + expect(classifySendResult('', 'Agent is now responding.\n')).toBe('accepted'); + }); + }); + + it('issues BEFORE-capture then send then AFTER-capture (3 calls) when --verify is passed', async () => { const home = await mkdtemp(join(tmpdir(), 'mosaic-fleet-')); await mkdir(join(home, 'fleet'), { recursive: true }); await writeFile( @@ -1160,11 +1245,17 @@ describe('agent send --verify', () => { ), ); + let callIndex = 0; const calls: string[][] = []; const runner: CommandRunner = async (command, args) => { calls.push([command, ...args]); - // For agent-send.sh: success; for capture-pane: return accepted output - return { stdout: 'Response from agent\n', stderr: '', exitCode: 0 }; + const idx = callIndex++; + if ([command, ...args].join(' ').includes('agent-send.sh')) { + return { stdout: '', stderr: '', exitCode: 0 }; + } + // BEFORE capture: return old content; AFTER capture: return new content + const stdout = idx === 0 ? 'Old pane content\n' : 'New response from agent\n'; + return { stdout, stderr: '', exitCode: 0 }; }; const program = new Command(); @@ -1183,11 +1274,11 @@ describe('agent send --verify', () => { '--verify', ]); - // First call should be agent-send.sh, second call should be capture-pane for verify - expect(calls).toHaveLength(2); - expect(calls[0]![0]).toContain('agent-send.sh'); - const captureCall = calls[1]!; - expect(captureCall).toEqual(buildAgentVerifyAcceptedCommand('coder0', 'mosaic-factory', 5)); + // 3 calls: BEFORE-capture, send, AFTER-capture + expect(calls).toHaveLength(3); + expect(calls[0]).toEqual(buildAgentVerifyAcceptedCommand('coder0', 'mosaic-factory', 5)); + expect(calls[1]![0]).toContain('agent-send.sh'); + expect(calls[2]).toEqual(buildAgentVerifyAcceptedCommand('coder0', 'mosaic-factory', 5)); } finally { await rm(home, { recursive: true, force: true }); } @@ -1231,7 +1322,7 @@ describe('agent send --verify', () => { } }); - it('send --verify: blank capture sets process.exitCode=1 (unverifiable, fails closed)', async () => { + it('send --verify: AFTER==BEFORE (stale/wedged pane) sets process.exitCode=1 (unverifiable)', async () => { const originalExitCode = process.exitCode; const stderrMessages: string[] = []; const stderrSpy = vi.spyOn(process.stderr, 'write').mockImplementation((msg) => { @@ -1251,8 +1342,8 @@ describe('agent send --verify', () => { const runner: CommandRunner = async (command, args) => { const full = [command, ...args].join(' '); if (full.includes('agent-send.sh')) return { stdout: '', stderr: '', exitCode: 0 }; - // capture-pane returns blank (full-screen TUI) - return { stdout: '', stderr: '', exitCode: 0 }; + // BEFORE and AFTER are identical non-empty stale content — simulates a wedged pane + return { stdout: 'Stale old content that never changed\n', stderr: '', exitCode: 0 }; }; const program = new Command(); @@ -1271,7 +1362,8 @@ describe('agent send --verify', () => { '--verify', ]); expect(process.exitCode).toBe(1); - expect(stderrMessages.join('')).toMatch(/could not verify delivery.*blank/i); + // Must mention "no pane change" to distinguish from blank-capture case + expect(stderrMessages.join('')).toMatch(/no pane change after send/i); } finally { process.exitCode = originalExitCode; stderrSpy.mockRestore(); @@ -1279,7 +1371,7 @@ describe('agent send --verify', () => { } }, 10_000); - it('send --verify: draft line sets process.exitCode=1 with distinct wording', async () => { + it('send --verify: blank AFTER capture sets process.exitCode=1 (unverifiable, fails closed)', async () => { const originalExitCode = process.exitCode; const stderrMessages: string[] = []; const stderrSpy = vi.spyOn(process.stderr, 'write').mockImplementation((msg) => { @@ -1296,11 +1388,65 @@ describe('agent send --verify', () => { ), ); + let captureCallCount = 0; const runner: CommandRunner = async (command, args) => { const full = [command, ...args].join(' '); if (full.includes('agent-send.sh')) return { stdout: '', stderr: '', exitCode: 0 }; - // capture-pane returns a draft line ("> unsent message") - return { stdout: '> unsent message\n', stderr: '', exitCode: 0 }; + captureCallCount++; + // BEFORE: some content; AFTER: blank (full-screen TUI renders blank after send) + const stdout = captureCallCount === 1 ? 'Previous content\n' : ''; + return { stdout, stderr: '', exitCode: 0 }; + }; + + const program = new Command(); + program.exitOverride(); + registerAgentCommand(program, { runner, mosaicHome: home }); + + try { + await program.parseAsync([ + 'node', + 'mosaic', + 'agent', + 'send', + 'coder0', + '--message', + 'hello', + '--verify', + ]); + expect(process.exitCode).toBe(1); + expect(stderrMessages.join('')).toMatch(/could not verify delivery/i); + } finally { + process.exitCode = originalExitCode; + stderrSpy.mockRestore(); + await rm(home, { recursive: true, force: true }); + } + }, 10_000); + + it('send --verify: AFTER differs from BEFORE with draft line sets process.exitCode=1', async () => { + const originalExitCode = process.exitCode; + const stderrMessages: string[] = []; + const stderrSpy = vi.spyOn(process.stderr, 'write').mockImplementation((msg) => { + stderrMessages.push(String(msg)); + return true; + }); + + const home = await mkdtemp(join(tmpdir(), 'mosaic-fleet-')); + await mkdir(join(home, 'fleet'), { recursive: true }); + await writeFile( + join(home, 'fleet', 'roster.yaml'), + ['version: 1', 'transport: tmux', 'agents:', ' - name: coder0', ' runtime: codex'].join( + '\n', + ), + ); + + let captureCallCount = 0; + const runner: CommandRunner = async (command, args) => { + const full = [command, ...args].join(' '); + if (full.includes('agent-send.sh')) return { stdout: '', stderr: '', exitCode: 0 }; + captureCallCount++; + // BEFORE: old content; AFTER: message appeared but ended as a draft line + const stdout = captureCallCount === 1 ? 'Previous output\n' : '> unsent message\n'; + return { stdout, stderr: '', exitCode: 0 }; }; const program = new Command(); @@ -1327,7 +1473,7 @@ describe('agent send --verify', () => { } }, 10_000); - it('send --verify: real response content sets exitCode=0 (accepted)', async () => { + it('send --verify: AFTER differs from BEFORE with real response content sets exitCode=0 (accepted)', async () => { const originalExitCode = process.exitCode; const home = await mkdtemp(join(tmpdir(), 'mosaic-fleet-')); @@ -1339,11 +1485,17 @@ describe('agent send --verify', () => { ), ); + let captureCallCount = 0; const runner: CommandRunner = async (command, args) => { const full = [command, ...args].join(' '); if (full.includes('agent-send.sh')) return { stdout: '', stderr: '', exitCode: 0 }; - // capture-pane returns real response content - return { stdout: 'Agent response: task completed.\n', stderr: '', exitCode: 0 }; + captureCallCount++; + // BEFORE: old content; AFTER: new response content (pane changed) + const stdout = + captureCallCount === 1 + ? 'Old pane content\n' + : 'Old pane content\nAgent response: task completed.\n'; + return { stdout, stderr: '', exitCode: 0 }; }; const program = new Command(); diff --git a/packages/mosaic/src/commands/fleet.ts b/packages/mosaic/src/commands/fleet.ts index 3276570..630a638 100644 --- a/packages/mosaic/src/commands/fleet.ts +++ b/packages/mosaic/src/commands/fleet.ts @@ -456,8 +456,72 @@ export function getDefaultTenantAndHost(): { tenant_id: string; host: string } { } /** - * Builds the `agent watch` command: read-only tmux attach. - * Uses `-r` flag to prevent keystrokes and `=` exact-match session target. + * Builds the command to create a grouped viewer session targeting an agent session. + * A grouped session shares the same windows as the target but gets INDEPENDENT sizing, + * so attaching the viewer never resizes the agent's window. + * + * The viewer session name is derived from the agent name and a unique suffix (typically + * the caller's PID) so multiple concurrent watchers don't collide. + * + * Usage sequence: + * 1. Run buildAgentWatchCreateViewerCommand → create grouped session (via capturing runner). + * 2. Run buildAgentWatchAttachCommand → attach -r to the viewer session (via interactiveRunner). + * 3. Run buildAgentWatchKillViewerCommand → kill the viewer session on detach (via capturing runner). + */ +export function buildAgentWatchCreateViewerCommand( + agentName: string, + viewerSessionName: string, + socketName = DEFAULT_SOCKET_NAME, +): string[] { + return [ + 'tmux', + '-L', + socketName, + 'new-session', + '-d', + '-t', + `=${agentName}`, + '-s', + viewerSessionName, + ]; +} + +/** + * Builds the interactive attach command for a viewer session (read-only). + * Must be run via interactiveRunner (stdio: 'inherit'). + */ +export function buildAgentWatchAttachCommand( + viewerSessionName: string, + socketName = DEFAULT_SOCKET_NAME, +): string[] { + return ['tmux', '-L', socketName, 'attach', '-r', '-t', viewerSessionName]; +} + +/** + * Builds the kill-session command to clean up a viewer session after detach. + * Keeps the agent session intact. + */ +export function buildAgentWatchKillViewerCommand( + viewerSessionName: string, + socketName = DEFAULT_SOCKET_NAME, +): string[] { + return ['tmux', '-L', socketName, 'kill-session', '-t', viewerSessionName]; +} + +/** + * Returns a unique viewer session name for a given agent. + * Uses process.pid so concurrent watchers produce distinct names. + */ +export function buildViewerSessionName(agentName: string): string { + return `${agentName}-watch-${process.pid}`; +} + +/** + * @deprecated Use buildAgentWatchCreateViewerCommand + buildAgentWatchAttachCommand + + * buildAgentWatchKillViewerCommand instead. This bare attach targets the agent session + * directly and can resize it when the viewer terminal is smaller than the agent's window. + * + * Kept for backward compatibility only. */ export function buildAgentWatchCommand( agentName: string, @@ -493,27 +557,73 @@ export function buildAgentVerifyAcceptedCommand( * Result of a send-verify check. * - 'accepted': positive evidence that the message was accepted (response content present). * - 'draft': last non-empty line matches the draft heuristic (unsubmitted input). - * - 'unverifiable': blank/empty capture — full-screen TUIs (claude, codex, opencode, pi) - * render blank for capture-pane, so we cannot determine acceptance; fails closed per FR-5. + * - 'unverifiable': pane did not change after send (stale or blank) — we cannot determine + * acceptance; fails closed per FR-5. */ export type SendVerifyResult = 'accepted' | 'draft' | 'unverifiable'; /** - * Check whether a send was accepted (not left as draft). + * Classify the result of a send-verify check by comparing BEFORE and AFTER pane snapshots. + * + * This is the primary classifier for `send --verify`. It addresses the stale-pane + * false-success problem: if the pane content did not change after the send, the new + * message never registered in the TUI (wedged pane, send dropped, etc.). + * + * Classification logic: + * 'unverifiable' — AFTER is blank/empty OR AFTER == BEFORE (no pane change after send). + * 'draft' — AFTER differs from BEFORE AND the last non-empty line of AFTER starts + * with the draft pattern ("> "); message was typed but not submitted. + * 'accepted' — AFTER differs from BEFORE AND AFTER does not end in a draft line; + * positive evidence that the TUI accepted the message. + * + * NOTE on blank AFTER: Full-screen TUIs (claude, codex, opencode, pi) render blank for + * `tmux capture-pane`. A blank AFTER is indistinguishable from a wedged pane, so it + * is always classified 'unverifiable' (fail-closed). + * + * NOTE on definitive acceptance: Phase-2 can only observe the pane surface — there is no + * runtime acknowledgement (heartbeat-ack) at this phase. The pane-change check is the best + * signal available against an opaque TUI. Definitive acceptance ultimately requires a + * runtime acknowledgement (Phase-3 heartbeat-ack). + * + * Draft heuristic: a last non-empty line (after stripping ANSI escapes) that starts + * with "> " is treated as an unsubmitted input line. This pattern is specific to + * pi/claude TUIs; draft detection for codex/opencode TUIs is best-effort only. + * + * FR-5 requires `send --verify` to return non-zero when delivery cannot be verified. + * + * @param before Pane snapshot captured BEFORE the send command. + * @param after Pane snapshot captured AFTER the send command (after the delay). + */ +export function classifySendResult(before: string, after: string): SendVerifyResult { + const afterLines = after.split('\n').filter((l) => l.trim().length > 0); + // Blank/empty AFTER => full-screen TUI rendered blank, or pane is wedged => unverifiable. + if (afterLines.length === 0) return 'unverifiable'; + // No change => message didn't register in the TUI (stale/wedged pane) => unverifiable. + if (after === before) return 'unverifiable'; + // AFTER differs from BEFORE — check whether the pane is now showing a draft line. + const lastLine = afterLines[afterLines.length - 1]!; + const stripped = lastLine.replace(/\x1b\[[0-9;]*m/g, '').trim(); + // Heuristic: if stripped last line starts with "> " — that's the common draft pattern + // in pi/claude TUIs for showing user input before submission. + // NOTE: this heuristic is pi/claude-specific; draft detection for codex/opencode + // TUIs is best-effort only and may miss other unsubmitted-input indicators. + if (/^>\s/.test(stripped)) return 'draft'; + return 'accepted'; +} + +/** + * Check whether a send was accepted (not left as draft), using only the AFTER snapshot. + * + * @deprecated Prefer classifySendResult(before, after) which guards against stale-pane + * false-successes. This single-snapshot variant cannot detect a wedged pane that still + * shows old non-empty content — it will incorrectly return 'accepted' in that case. + * + * Retained for unit-test compatibility with single-snapshot assertions. * * Returns: * 'unverifiable' — blank/empty capture (full-screen TUIs render blank; we cannot tell). * 'draft' — last non-empty line matches the draft heuristic. - * 'accepted' — positive evidence of response content; not blank and not draft. - * - * Draft heuristic: a last non-empty line (after stripping ANSI escapes) that starts - * with "> " is treated as an unsubmitted input line. This pattern is specific to - * pi/claude TUIs and may miss drafts in codex/opencode TUIs — draft detection for - * those runtimes is best-effort only. - * - * FR-5 requires `send --verify` to return non-zero when delivery cannot be verified. - * Blank capture (full-screen TUI case) is the known-unverifiable case; it is treated - * as FAILURE (not success) so the caller fails closed rather than silently succeeding. + * 'accepted' — non-blank and not a draft line (but may be stale — see above). */ export function isSendAccepted(capturedOutput: string): SendVerifyResult { const lines = capturedOutput.split('\n').filter((l) => l.trim().length > 0); @@ -834,22 +944,37 @@ export function registerFleetAgentCommands( resolveMosaicHomeFromCommand(agentCommand, deps.mosaicHome), ); const sourceLabel = opts.sourceLabel ?? opts.source ?? getDefaultOperatorSourceLabel(); - await runChecked( - runner, - buildAgentSendCommand(paths, agent, opts.message, roster.tmux.socketName, sourceLabel), - ); if (opts.verify) { - // Brief pause to allow the TUI to process the send before capturing - await new Promise((res) => setTimeout(res, 300)); - const captureResult = await runner( + // Capture BEFORE snapshot so we can detect stale-pane false-successes. + // A wedged pane that still shows old non-empty content must not be reported + // as 'accepted' — we compare BEFORE vs AFTER to guard against that case. + const beforeResult = await runner( ...splitCommand(buildAgentVerifyAcceptedCommand(agent, roster.tmux.socketName)), ); - if (captureResult.exitCode !== 0) { + if (beforeResult.exitCode !== 0) { throw new Error( - `send --verify: could not capture pane output to verify acceptance (tmux exited ${captureResult.exitCode}).`, + `send --verify: could not capture pane output before send (tmux exited ${beforeResult.exitCode}).`, ); } - const verifyResult = isSendAccepted(captureResult.stdout); + const beforeSnapshot = beforeResult.stdout; + + await runChecked( + runner, + buildAgentSendCommand(paths, agent, opts.message, roster.tmux.socketName, sourceLabel), + ); + + // Brief pause to allow the TUI to process the send before capturing the AFTER snapshot. + await new Promise((res) => setTimeout(res, 300)); + const afterResult = await runner( + ...splitCommand(buildAgentVerifyAcceptedCommand(agent, roster.tmux.socketName)), + ); + if (afterResult.exitCode !== 0) { + throw new Error( + `send --verify: could not capture pane output to verify acceptance (tmux exited ${afterResult.exitCode}).`, + ); + } + // Classify using BEFORE/AFTER comparison to guard against stale-pane false-successes. + const verifyResult = classifySendResult(beforeSnapshot, afterResult.stdout); if (verifyResult === 'draft') { process.exitCode = 1; process.stderr.write( @@ -858,9 +983,14 @@ export function registerFleetAgentCommands( } else if (verifyResult === 'unverifiable') { process.exitCode = 1; process.stderr.write( - `send --verify: could not verify delivery (blank/no response captured) for agent "${agent}".\n`, + `send --verify: could not verify delivery (no pane change after send) for agent "${agent}".\n`, ); } + } else { + await runChecked( + runner, + buildAgentSendCommand(paths, agent, opts.message, roster.tmux.socketName, sourceLabel), + ); } }, ); @@ -871,11 +1001,31 @@ export function registerFleetAgentCommands( .action(async (agent: string) => { const roster = await loadRosterFromAgentCommand(agentCommand, deps.mosaicHome); getRosterAgent(roster, agent); - // `tmux attach` is interactive and requires inherited TTY/stdin/stdout. - // Route through the interactiveRunner (stdio: 'inherit') instead of the - // capturing runner, which would hang or fail for full-screen TUI commands. - const [bin, args] = splitCommand(buildAgentWatchCommand(agent, roster.tmux.socketName)); + + // Use a GROUPED VIEWER SESSION to prevent the observer from resizing the agent's + // window. A bare `tmux attach -r` against the agent session itself still lets the + // client shrink the session to its terminal size; a grouped session gets INDEPENDENT + // sizing so the agent's window is never affected by the viewer's terminal dimensions. + // + // Sequence: + // 1. Create a throwaway grouped session targeting the agent (capturing runner). + // 2. Attach -r (read-only) to the viewer session (interactiveRunner / TTY). + // 3. Kill the viewer session on detach so stale sessions don't accumulate. + const viewerName = buildViewerSessionName(agent); + const socketName = roster.tmux.socketName; + + await runChecked(runner, buildAgentWatchCreateViewerCommand(agent, viewerName, socketName)); + + const [bin, args] = splitCommand(buildAgentWatchAttachCommand(viewerName, socketName)); const exitCode = await iRunner(bin, args); + + // Best-effort cleanup of the viewer session regardless of how the user detached. + // Errors here are intentionally suppressed — the agent session is unaffected. + const killResult = await runner( + ...splitCommand(buildAgentWatchKillViewerCommand(viewerName, socketName)), + ); + void killResult; // result is intentionally ignored + if (exitCode !== 0) { process.exitCode = exitCode; }