From aec560162b4a5d177fd17ce09d1e049c4e0619fb Mon Sep 17 00:00:00 2001
From: Jarvis <jarvis@woltje.com>
Date: Sat, 20 Jun 2026 22:45:22 -0500
Subject: [PATCH] fix(fleet): verify fails-closed on unverifiable + interactive
 watch
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- isSendAccepted now returns 'accepted' | 'draft' | 'unverifiable' (was bool)
- Blank/empty capture => 'unverifiable' => process.exitCode=1 with distinct
  "could not verify delivery (blank/no response captured)" message; previously
  blank was treated as success, violating FR-5 fail-closed semantics
- Draft line ('^> ') => process.exitCode=1 with "left as unsubmitted draft"
  message; distinct wording from unverifiable case
- agent watch now dispatched through injectable InteractiveRunner (stdio:inherit)
  instead of the capturing CommandRunner; tmux attach requires TTY passthrough
- Default spawnInteractive implementation uses node:child_process spawn with
  stdio:'inherit'; injectable via FleetCommandDeps.interactiveRunner for tests
- Removed buildSystemdIsActiveCommand (dead code — exported but unused)
- Tests: blank=>exitCode=1, draft=>exitCode=1, real response=>exitCode=0,
  watch dispatched through interactiveRunner not capturing runner
- PRD: added "Known limitations" section (heuristic verify, blank fails closed,
  non-pi/claude draft detection is best-effort, watch requires TTY passthrough)
- Code comment on isSendAccepted notes pi/claude-specific draft heuristic

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_01RMoEx7hfdFGjUiCHuN1RRi
---
 docs/fleet/PRD.md                          |  13 ++
 packages/mosaic/src/commands/fleet.spec.ts | 173 +++++++++++++++++++--
 packages/mosaic/src/commands/fleet.ts      | 123 +++++++++------
 3 files changed, 251 insertions(+), 58 deletions(-)

diff --git a/docs/fleet/PRD.md b/docs/fleet/PRD.md
index 2095a50..09e8738 100644
--- a/docs/fleet/PRD.md
+++ b/docs/fleet/PRD.md
@@ -75,6 +75,19 @@ observability and no safe way to watch a session.
 - Situational: run against the live `mosaic-factory` fleet; capture `fleet ps` output,
   a kill-and-detect cycle, a read-only `watch`, and a `send --verify` pass/fail pair.
 
+## Known limitations
+
+- **Verify heuristic is best-effort:** `agent send --verify` uses a `>` -prefix draft
+  heuristic that is specific to pi/claude TUIs. Draft detection for codex and opencode
+  TUIs is best-effort only; those runtimes may not use the same input-line indicator.
+- **Blank capture fails closed:** Full-screen TUIs (claude, codex, opencode, pi) render
+  blank for `tmux capture-pane`. When the captured output is empty, `send --verify`
+  returns non-zero with an "unverifiable" message rather than silently succeeding. This
+  is an intentional fail-closed design (FR-5).
+- **`agent watch` requires TTY passthrough:** `tmux attach` is interactive and must be
+  run with inherited stdio. It cannot be captured through a pipe. Tests inject a fake
+  `interactiveRunner`; the real implementation spawns with `stdio: 'inherit'`.
+
 ## Surfaces & parity (MVP-X1)
 
 CLI lands this phase. TUI surface follows in the `packages/mosaic` wizard; webUI in
diff --git a/packages/mosaic/src/commands/fleet.spec.ts b/packages/mosaic/src/commands/fleet.spec.ts
index 4efe809..b1a5cc7 100644
--- a/packages/mosaic/src/commands/fleet.spec.ts
+++ b/packages/mosaic/src/commands/fleet.spec.ts
@@ -26,6 +26,7 @@ import {
   resolveFleetPaths,
   type AgentPsRow,
   type CommandRunner,
+  type InteractiveRunner,
 } from './fleet.js';
 import { registerAgentCommand } from './agent.js';
 
@@ -1057,7 +1058,7 @@ describe('agent watch', () => {
     expect(cmd).toContain('-r');
   });
 
-  it('issues the read-only attach command through the injected runner', async () => {
+  it('dispatches the read-only attach command through the interactiveRunner, NOT the capturing runner', async () => {
     const home = await mkdtemp(join(tmpdir(), 'mosaic-fleet-'));
     await mkdir(join(home, 'fleet'), { recursive: true });
     await writeFile(
@@ -1067,19 +1068,29 @@ describe('agent watch', () => {
       ),
     );
 
-    const calls: string[][] = [];
+    const capturingCalls: string[][] = [];
     const runner: CommandRunner = async (command, args) => {
-      calls.push([command, ...args]);
+      capturingCalls.push([command, ...args]);
       return { stdout: '', stderr: '', exitCode: 0 };
     };
 
+    const interactiveCalls: string[][] = [];
+    const interactiveRunner: InteractiveRunner = async (command, args) => {
+      interactiveCalls.push([command, ...args]);
+      return 0;
+    };
+
     const program = new Command();
     program.exitOverride();
-    registerAgentCommand(program, { runner, mosaicHome: home });
+    registerAgentCommand(program, { runner, interactiveRunner, mosaicHome: home });
 
     try {
       await program.parseAsync(['node', 'mosaic', 'agent', 'watch', 'coder0']);
-      expect(calls).toEqual([['tmux', '-L', 'mosaic-factory', 'attach', '-r', '-t', '=coder0']]);
+      // Must go through interactiveRunner, not the capturing runner
+      expect(capturingCalls).toHaveLength(0);
+      expect(interactiveCalls).toEqual([
+        ['tmux', '-L', 'mosaic-factory', 'attach', '-r', '-t', '=coder0'],
+      ]);
     } finally {
       await rm(home, { recursive: true, force: true });
     }
@@ -1126,17 +1137,17 @@ describe('agent send --verify', () => {
     ]);
   });
 
-  it('isSendAccepted: returns true for normal response output', () => {
-    expect(isSendAccepted('Some response text\nAnother line\n')).toBe(true);
+  it('isSendAccepted: returns "accepted" for normal response output', () => {
+    expect(isSendAccepted('Some response text\nAnother line\n')).toBe('accepted');
   });
 
-  it('isSendAccepted: returns false when last line starts with "> " (draft pattern)', () => {
-    expect(isSendAccepted('> my unsent message')).toBe(false);
+  it('isSendAccepted: returns "draft" when last line starts with "> " (draft pattern)', () => {
+    expect(isSendAccepted('> my unsent message')).toBe('draft');
   });
 
-  it('isSendAccepted: returns true for blank pane (treated as submitted)', () => {
-    expect(isSendAccepted('')).toBe(true);
-    expect(isSendAccepted('   \n  \n')).toBe(true);
+  it('isSendAccepted: returns "unverifiable" for blank/empty pane (full-screen TUI case)', () => {
+    expect(isSendAccepted('')).toBe('unverifiable');
+    expect(isSendAccepted('   \n  \n')).toBe('unverifiable');
   });
 
   it('issues send then verify capture via injected runner when --verify is passed', async () => {
@@ -1219,4 +1230,142 @@ describe('agent send --verify', () => {
       await rm(home, { recursive: true, force: true });
     }
   });
+
+  it('send --verify: blank capture sets process.exitCode=1 (unverifiable, fails closed)', async () => {
+    const originalExitCode = process.exitCode;
+    const stderrMessages: string[] = [];
+    const stderrSpy = vi.spyOn(process.stderr, 'write').mockImplementation((msg) => {
+      stderrMessages.push(String(msg));
+      return true;
+    });
+
+    const home = await mkdtemp(join(tmpdir(), 'mosaic-fleet-'));
+    await mkdir(join(home, 'fleet'), { recursive: true });
+    await writeFile(
+      join(home, 'fleet', 'roster.yaml'),
+      ['version: 1', 'transport: tmux', 'agents:', '  - name: coder0', '    runtime: codex'].join(
+        '\n',
+      ),
+    );
+
+    const runner: CommandRunner = async (command, args) => {
+      const full = [command, ...args].join(' ');
+      if (full.includes('agent-send.sh')) return { stdout: '', stderr: '', exitCode: 0 };
+      // capture-pane returns blank (full-screen TUI)
+      return { stdout: '', stderr: '', exitCode: 0 };
+    };
+
+    const program = new Command();
+    program.exitOverride();
+    registerAgentCommand(program, { runner, mosaicHome: home });
+
+    try {
+      await program.parseAsync([
+        'node',
+        'mosaic',
+        'agent',
+        'send',
+        'coder0',
+        '--message',
+        'hello',
+        '--verify',
+      ]);
+      expect(process.exitCode).toBe(1);
+      expect(stderrMessages.join('')).toMatch(/could not verify delivery.*blank/i);
+    } finally {
+      process.exitCode = originalExitCode;
+      stderrSpy.mockRestore();
+      await rm(home, { recursive: true, force: true });
+    }
+  }, 10_000);
+
+  it('send --verify: draft line sets process.exitCode=1 with distinct wording', async () => {
+    const originalExitCode = process.exitCode;
+    const stderrMessages: string[] = [];
+    const stderrSpy = vi.spyOn(process.stderr, 'write').mockImplementation((msg) => {
+      stderrMessages.push(String(msg));
+      return true;
+    });
+
+    const home = await mkdtemp(join(tmpdir(), 'mosaic-fleet-'));
+    await mkdir(join(home, 'fleet'), { recursive: true });
+    await writeFile(
+      join(home, 'fleet', 'roster.yaml'),
+      ['version: 1', 'transport: tmux', 'agents:', '  - name: coder0', '    runtime: codex'].join(
+        '\n',
+      ),
+    );
+
+    const runner: CommandRunner = async (command, args) => {
+      const full = [command, ...args].join(' ');
+      if (full.includes('agent-send.sh')) return { stdout: '', stderr: '', exitCode: 0 };
+      // capture-pane returns a draft line ("> unsent message")
+      return { stdout: '> unsent message\n', stderr: '', exitCode: 0 };
+    };
+
+    const program = new Command();
+    program.exitOverride();
+    registerAgentCommand(program, { runner, mosaicHome: home });
+
+    try {
+      await program.parseAsync([
+        'node',
+        'mosaic',
+        'agent',
+        'send',
+        'coder0',
+        '--message',
+        'hello',
+        '--verify',
+      ]);
+      expect(process.exitCode).toBe(1);
+      expect(stderrMessages.join('')).toMatch(/unsubmitted draft/i);
+    } finally {
+      process.exitCode = originalExitCode;
+      stderrSpy.mockRestore();
+      await rm(home, { recursive: true, force: true });
+    }
+  }, 10_000);
+
+  it('send --verify: real response content sets exitCode=0 (accepted)', async () => {
+    const originalExitCode = process.exitCode;
+
+    const home = await mkdtemp(join(tmpdir(), 'mosaic-fleet-'));
+    await mkdir(join(home, 'fleet'), { recursive: true });
+    await writeFile(
+      join(home, 'fleet', 'roster.yaml'),
+      ['version: 1', 'transport: tmux', 'agents:', '  - name: coder0', '    runtime: codex'].join(
+        '\n',
+      ),
+    );
+
+    const runner: CommandRunner = async (command, args) => {
+      const full = [command, ...args].join(' ');
+      if (full.includes('agent-send.sh')) return { stdout: '', stderr: '', exitCode: 0 };
+      // capture-pane returns real response content
+      return { stdout: 'Agent response: task completed.\n', stderr: '', exitCode: 0 };
+    };
+
+    const program = new Command();
+    program.exitOverride();
+    registerAgentCommand(program, { runner, mosaicHome: home });
+
+    try {
+      await program.parseAsync([
+        'node',
+        'mosaic',
+        'agent',
+        'send',
+        'coder0',
+        '--message',
+        'hello',
+        '--verify',
+      ]);
+      // exitCode should remain unchanged (not set to 1)
+      expect(process.exitCode).toBe(originalExitCode);
+    } finally {
+      process.exitCode = originalExitCode;
+      await rm(home, { recursive: true, force: true });
+    }
+  }, 10_000);
 });
diff --git a/packages/mosaic/src/commands/fleet.ts b/packages/mosaic/src/commands/fleet.ts
index 947dae8..3276570 100644
--- a/packages/mosaic/src/commands/fleet.ts
+++ b/packages/mosaic/src/commands/fleet.ts
@@ -7,6 +7,13 @@ import { spawn } from 'node:child_process';
 import type { Command } from 'commander';
 import YAML from 'yaml';
 
+/**
+ * A function that spawns a command with inherited stdio (TTY passthrough).
+ * Used for interactive commands like `tmux attach` that need a real terminal.
+ * Resolves with the process exit code.
+ */
+export type InteractiveRunner = (command: string, args: string[]) => Promise<number>;
+
 export interface CommandResult {
   stdout: string;
   stderr: string;
@@ -17,6 +24,8 @@ export type CommandRunner = (command: string, args: string[]) => Promise<Command
 
 export interface FleetCommandDeps {
   runner?: CommandRunner;
+  /** Injectable interactive runner for commands needing inherited TTY (e.g., `tmux attach`). */
+  interactiveRunner?: InteractiveRunner;
   mosaicHome?: string;
   frameworkRoot?: string;
 }
@@ -290,14 +299,6 @@ export function buildSystemdShowCommand(agentName: string): string[] {
   ];
 }
 
-/**
- * Returns the systemd is-active command for an agent unit.
- */
-export function buildSystemdIsActiveCommand(agentName: string): string[] {
-  const unit = `mosaic-agent@${agentName}.service`;
-  return ['systemctl', '--user', 'is-active', unit];
-}
-
 /**
  * Returns the tmux list-panes command for an agent pane.
  * Format: `#{pane_pid} #{pane_current_command} #{pane_dead} #{pane_activity}`
@@ -489,42 +490,44 @@ export function buildAgentVerifyAcceptedCommand(
 }
 
 /**
- * Check whether a send was accepted (not left as draft).
- * A message is considered NOT accepted (draft) if the captured pane output
- * still shows the message text at the bottom prompt without a newline/submission.
- * We look for the common TUI pattern: the text appears at the last line but
- * hasn't been cleared (which would happen after submission).
- *
- * Heuristic: if pane capture is non-empty and does NOT contain a leading `>`
- * or prompt indicator on the LAST non-empty line, the send is considered accepted.
- * This mirrors the send-message.sh draft check: if the last line looks like an
- * unsubmitted input line, it's a draft.
- *
- * Returns true if accepted (submitted), false if still a draft/unverifiable.
+ * Result of a send-verify check.
+ * - 'accepted': positive evidence that the message was accepted (response content present).
+ * - 'draft': last non-empty line matches the draft heuristic (unsubmitted input).
+ * - 'unverifiable': blank/empty capture — full-screen TUIs (claude, codex, opencode, pi)
+ *   render blank for capture-pane, so we cannot determine acceptance; fails closed per FR-5.
  */
-export function isSendAccepted(capturedOutput: string): boolean {
+export type SendVerifyResult = 'accepted' | 'draft' | 'unverifiable';
+
+/**
+ * Check whether a send was accepted (not left as draft).
+ *
+ * Returns:
+ *   'unverifiable' — blank/empty capture (full-screen TUIs render blank; we cannot tell).
+ *   'draft'        — last non-empty line matches the draft heuristic.
+ *   'accepted'     — positive evidence of response content; not blank and not draft.
+ *
+ * Draft heuristic: a last non-empty line (after stripping ANSI escapes) that starts
+ * with "> " is treated as an unsubmitted input line. This pattern is specific to
+ * pi/claude TUIs and may miss drafts in codex/opencode TUIs — draft detection for
+ * those runtimes is best-effort only.
+ *
+ * FR-5 requires `send --verify` to return non-zero when delivery cannot be verified.
+ * Blank capture (full-screen TUI case) is the known-unverifiable case; it is treated
+ * as FAILURE (not success) so the caller fails closed rather than silently succeeding.
+ */
+export function isSendAccepted(capturedOutput: string): SendVerifyResult {
   const lines = capturedOutput.split('\n').filter((l) => l.trim().length > 0);
-  if (lines.length === 0) return true; // blank pane — treat as submitted
+  // Blank/empty capture => full-screen TUI rendered blank => unverifiable.
+  // This is the known-unverifiable case; fail closed (not treated as success).
+  if (lines.length === 0) return 'unverifiable';
   const lastLine = lines[lines.length - 1]!;
-  // Heuristic: if last non-empty line is a bare user-typed draft line with no
-  // AI response yet (starts with or contains typical draft markers), flag as draft.
-  // Typical draft patterns: line ends with the sent message text with no ">" prefix,
-  // or the line is identical to a prompt that hasn't been cleared.
-  // We use a conservative heuristic: a line that is ONLY whitespace/prompt characters
-  // with no response indicator is suspicious, but since we can't reliably detect
-  // every TUI's draft state, we check for a specific pattern:
-  // if last line has trailing `█` (cursor block) or is blank after stripping ANSI,
-  // treat as submitted. Otherwise if we see the exact sent text repeated, it's draft.
-  // For robustness, we accept as submitted unless we see clear draft evidence.
-  // Real implementation: check if last meaningful line is just the input prompt.
   const stripped = lastLine.replace(/\x1b\[[0-9;]*m/g, '').trim();
-  // If the pane shows a line that looks like a pending input (ends with cursor or is empty),
-  // that means the message was submitted and the pane is waiting for response.
-  // A draft line typically looks like: "> <message text>" without a response.
-  // For simplicity: if stripped last line starts with "> " — that's a common draft pattern
+  // Heuristic: if stripped last line starts with "> " — that's the common draft pattern
   // in pi/claude TUIs for showing user input before submission.
-  if (/^>\s/.test(stripped)) return false;
-  return true;
+  // NOTE: this heuristic is pi/claude-specific; draft detection for codex/opencode
+  // TUIs is best-effort only and may miss other unsubmitted-input indicators.
+  if (/^>\s/.test(stripped)) return 'draft';
+  return 'accepted';
 }
 
 export function registerFleetCommand(program: Command, deps: FleetCommandDeps = {}): Command {
@@ -766,6 +769,7 @@ export function registerFleetAgentCommands(
   deps: FleetCommandDeps = {},
 ): void {
   const runner = deps.runner ?? runCommand;
+  const iRunner = deps.interactiveRunner ?? spawnInteractive;
 
   agentCommand
     .command('roster')
@@ -845,11 +849,16 @@ export function registerFleetAgentCommands(
               `send --verify: could not capture pane output to verify acceptance (tmux exited ${captureResult.exitCode}).`,
             );
           }
-          const accepted = isSendAccepted(captureResult.stdout);
-          if (!accepted) {
+          const verifyResult = isSendAccepted(captureResult.stdout);
+          if (verifyResult === 'draft') {
             process.exitCode = 1;
             process.stderr.write(
-              `send --verify: message appears to be left as an unsubmitted draft in agent "${agent}".\n`,
+              `send --verify: message left as unsubmitted draft in agent "${agent}".\n`,
+            );
+          } else if (verifyResult === 'unverifiable') {
+            process.exitCode = 1;
+            process.stderr.write(
+              `send --verify: could not verify delivery (blank/no response captured) for agent "${agent}".\n`,
             );
           }
         }
@@ -862,10 +871,14 @@ export function registerFleetAgentCommands(
     .action(async (agent: string) => {
       const roster = await loadRosterFromAgentCommand(agentCommand, deps.mosaicHome);
       getRosterAgent(roster, agent);
-      const result = await runner(
-        ...splitCommand(buildAgentWatchCommand(agent, roster.tmux.socketName)),
-      );
-      writeCommandOutput(result);
+      // `tmux attach` is interactive and requires inherited TTY/stdin/stdout.
+      // Route through the interactiveRunner (stdio: 'inherit') instead of the
+      // capturing runner, which would hang or fail for full-screen TUI commands.
+      const [bin, args] = splitCommand(buildAgentWatchCommand(agent, roster.tmux.socketName));
+      const exitCode = await iRunner(bin, args);
+      if (exitCode !== 0) {
+        process.exitCode = exitCode;
+      }
     });
 
   agentCommand
@@ -1300,6 +1313,24 @@ function resolveFrameworkRoot(): string {
   return resolve(dirname(currentFile), '..', '..', 'framework');
 }
 
+/**
+ * Default InteractiveRunner implementation: spawns the command with inherited
+ * stdio so the terminal is passed through to the child process. This is required
+ * for commands like `tmux attach` that are full-screen interactive and cannot be
+ * captured through a pipe.
+ */
+function spawnInteractive(command: string, args: string[]): Promise<number> {
+  return new Promise((resolvePromise) => {
+    const child = spawn(command, args, { stdio: 'inherit' });
+    child.on('error', () => {
+      resolvePromise(127);
+    });
+    child.on('close', (code) => {
+      resolvePromise(code ?? 1);
+    });
+  });
+}
+
 async function canRead(path: string): Promise<boolean> {
   try {
     await access(path, constants.R_OK);