fix(fleet): verify via pane-change diff + non-resizing watch

Blocker fix: send --verify now captures a BEFORE snapshot immediately before the send and an AFTER snapshot after the delay, then uses classifySendResult(before, after) to classify. A wedged pane showing stale non-empty content is no longer falsely reported as 'accepted' — BEFORE==AFTER maps to 'unverifiable' (exit 1, "no pane change after send"). Blank AFTER still fails closed as 'unverifiable'. Only AFTER != BEFORE without a draft suffix counts as 'accepted' (exit 0). Should-fix: agent watch now uses a GROUPED VIEWER SESSION instead of a bare 'tmux attach -r' against the agent session. A bare attach lets the viewer terminal shrink the agent's window; a grouped session has independent sizing so the agent's window is never affected. Sequence: new-session -d -t '=<agent>' -s '<agent>-watch-<pid>' (runner), attach -r to viewer session (interactiveRunner), kill-session on detach (runner). New builder functions exported: buildAgentWatchCreateViewerCommand, buildAgentWatchAttachCommand, buildAgentWatchKillViewerCommand, buildViewerSessionName. buildAgentWatchCommand kept but deprecated. New exports: classifySendResult(before, after) — the testable classifier. Tests added: - classifySendResult unit suite (6 cases): accepted/draft/unverifiable/ stale-pane/both-blank/before-blank-after-response - send --verify regression: stale (before==after non-empty) => exit 1 - send --verify regression: blank AFTER => exit 1 - send --verify regression: draft after pane change => exit 1 - send --verify regression: changed non-draft => exit 0 - send --verify: 3-call sequence assertion (before-capture, send, after-capture) - watch dispatch: grouped viewer session created/attached/killed; no bare attach against agent session; viewer name matches <agent>-watch-<pid> PRD Known-limitations updated: pane-change check rationale, Phase-3 heartbeat-ack requirement, grouped-session watch design. All gates pass: pnpm typecheck, pnpm lint, pnpm --filter @mosaicstack/mosaic test (382 tests, 74 fleet), prettier --check. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> Claude-Session: https://claude.ai/code/session_01RMoEx7hfdFGjUiCHuN1RRi
2026-06-20 22:57:00 -05:00
parent aec560162b
commit 8466ca2d81
3 changed files with 377 additions and 63 deletions
--- a/packages/mosaic/src/commands/fleet.spec.ts
+++ b/packages/mosaic/src/commands/fleet.spec.ts
@@ -5,11 +5,15 @@ import { Command } from 'commander';
 import { afterEach, describe, expect, it, vi } from 'vitest';
 import {
  buildAgentSendCommand,
+  buildAgentWatchAttachCommand,
  buildAgentWatchCommand,
+  buildAgentWatchCreateViewerCommand,
+  buildAgentWatchKillViewerCommand,
  buildAgentVerifyAcceptedCommand,
  buildFleetServiceCommand,
  buildSystemdShowCommand,
  buildTmuxListPanesCommand,
+  classifySendResult,
  detectDrift,
  generateAgentEnv,
  getDefaultOperatorSourceLabel,
@@ -1040,25 +1044,52 @@ describe('fleet ps — command sequences issued', () => {
 });

 describe('agent watch', () => {
-  it('builds exact read-only tmux attach command', () => {
-    expect(buildAgentWatchCommand('canary-pi', 'mosaic-factory')).toEqual([
+  it('builds exact grouped-viewer creation command', () => {
+    expect(
+      buildAgentWatchCreateViewerCommand('canary-pi', 'canary-pi-watch-123', 'mosaic-factory'),
+    ).toEqual([
+      'tmux',
+      '-L',
+      'mosaic-factory',
+      'new-session',
+      '-d',
+      '-t',
+      '=canary-pi',
+      '-s',
+      'canary-pi-watch-123',
+    ]);
+  });
+
+  it('builds exact viewer attach command (read-only)', () => {
+    expect(buildAgentWatchAttachCommand('canary-pi-watch-123', 'mosaic-factory')).toEqual([
      'tmux',
      '-L',
      'mosaic-factory',
      'attach',
      '-r',
      '-t',
-      '=canary-pi',
+      'canary-pi-watch-123',
    ]);
  });

-  it('uses DEFAULT_SOCKET_NAME when socket is omitted', () => {
+  it('builds exact viewer kill command', () => {
+    expect(buildAgentWatchKillViewerCommand('canary-pi-watch-123', 'mosaic-factory')).toEqual([
+      'tmux',
+      '-L',
+      'mosaic-factory',
+      'kill-session',
+      '-t',
+      'canary-pi-watch-123',
+    ]);
+  });
+
+  it('buildAgentWatchCommand (deprecated) still uses DEFAULT_SOCKET_NAME when socket is omitted', () => {
    const cmd = buildAgentWatchCommand('canary-pi');
    expect(cmd[2]).toBe('mosaic-factory');
    expect(cmd).toContain('-r');
  });

-  it('dispatches the read-only attach command through the interactiveRunner, NOT the capturing runner', async () => {
+  it('dispatch: creates grouped viewer session (runner) then attaches -r to viewer session (interactiveRunner), NOT a bare attach to the agent session', async () => {
    const home = await mkdtemp(join(tmpdir(), 'mosaic-fleet-'));
    await mkdir(join(home, 'fleet'), { recursive: true });
    await writeFile(
@@ -1086,11 +1117,28 @@ describe('agent watch', () => {

    try {
      await program.parseAsync(['node', 'mosaic', 'agent', 'watch', 'coder0']);
-      // Must go through interactiveRunner, not the capturing runner
-      expect(capturingCalls).toHaveLength(0);
-      expect(interactiveCalls).toEqual([
-        ['tmux', '-L', 'mosaic-factory', 'attach', '-r', '-t', '=coder0'],
-      ]);
+
+      // The capturing runner must be used for grouped-session creation and cleanup.
+      // It must NOT be used for the interactive attach.
+      expect(capturingCalls).toHaveLength(2); // new-session + kill-session
+      expect(capturingCalls[0]).toEqual(
+        expect.arrayContaining(['new-session', '-d', '-t', '=coder0']),
+      );
+      // The new-session command must include a viewer session name derived from agent name.
+      expect(capturingCalls[0]!.join(' ')).toMatch(/coder0-watch-\d+/);
+      // Kill-session must target the same viewer session, not the agent session.
+      expect(capturingCalls[1]).toEqual(expect.arrayContaining(['kill-session', '-t']));
+      expect(capturingCalls[1]!.join(' ')).toMatch(/coder0-watch-\d+/);
+      // The agent session itself must NOT be the attach target.
+      expect(capturingCalls[1]!.join(' ')).not.toContain('=coder0');
+
+      // The interactiveRunner must attach -r to the VIEWER session, not the agent session.
+      expect(interactiveCalls).toHaveLength(1);
+      expect(interactiveCalls[0]).toEqual(expect.arrayContaining(['attach', '-r', '-t']));
+      // Target must be the viewer session name (not "=coder0").
+      const attachTarget = interactiveCalls[0]![interactiveCalls[0]!.indexOf('-t') + 1]!;
+      expect(attachTarget).toMatch(/coder0-watch-\d+/);
+      expect(attachTarget).not.toBe('=coder0');
    } finally {
      await rm(home, { recursive: true, force: true });
    }
@@ -1150,7 +1198,44 @@ describe('agent send --verify', () => {
    expect(isSendAccepted('   \n  \n')).toBe('unverifiable');
  });

-  it('issues send then verify capture via injected runner when --verify is passed', async () => {
+  // ---------------------------------------------------------------------------
+  // classifySendResult — BEFORE/AFTER pane-diff classifier (regression suite)
+  // ---------------------------------------------------------------------------
+
+  describe('classifySendResult (BEFORE/AFTER pane-diff classifier)', () => {
+    it('returns "accepted" when AFTER differs from BEFORE and AFTER has no draft line', () => {
+      const before = 'Old content from prior interaction\n';
+      const after = 'Old content from prior interaction\nAgent response: task complete.\n';
+      expect(classifySendResult(before, after)).toBe('accepted');
+    });
+
+    it('returns "draft" when AFTER differs from BEFORE and AFTER ends in a draft line', () => {
+      const before = 'Previous output\n';
+      const after = 'Previous output\n> unsent message\n';
+      expect(classifySendResult(before, after)).toBe('draft');
+    });
+
+    it('returns "unverifiable" when AFTER is blank/empty (full-screen TUI blank render)', () => {
+      const before = 'Some previous content\n';
+      expect(classifySendResult(before, '')).toBe('unverifiable');
+      expect(classifySendResult(before, '   \n  \n')).toBe('unverifiable');
+    });
+
+    it('returns "unverifiable" when AFTER == BEFORE (stale/wedged pane — no change after send)', () => {
+      const staleContent = 'Old non-empty content that never changed\n';
+      expect(classifySendResult(staleContent, staleContent)).toBe('unverifiable');
+    });
+
+    it('returns "unverifiable" when both BEFORE and AFTER are blank (both blank => no change)', () => {
+      expect(classifySendResult('', '')).toBe('unverifiable');
+    });
+
+    it('returns "accepted" when BEFORE is blank and AFTER has non-draft content (pane woke up)', () => {
+      expect(classifySendResult('', 'Agent is now responding.\n')).toBe('accepted');
+    });
+  });
+
+  it('issues BEFORE-capture then send then AFTER-capture (3 calls) when --verify is passed', async () => {
    const home = await mkdtemp(join(tmpdir(), 'mosaic-fleet-'));
    await mkdir(join(home, 'fleet'), { recursive: true });
    await writeFile(
@@ -1160,11 +1245,17 @@ describe('agent send --verify', () => {
      ),
    );

+    let callIndex = 0;
    const calls: string[][] = [];
    const runner: CommandRunner = async (command, args) => {
      calls.push([command, ...args]);
-      // For agent-send.sh: success; for capture-pane: return accepted output
-      return { stdout: 'Response from agent\n', stderr: '', exitCode: 0 };
+      const idx = callIndex++;
+      if ([command, ...args].join(' ').includes('agent-send.sh')) {
+        return { stdout: '', stderr: '', exitCode: 0 };
+      }
+      // BEFORE capture: return old content; AFTER capture: return new content
+      const stdout = idx === 0 ? 'Old pane content\n' : 'New response from agent\n';
+      return { stdout, stderr: '', exitCode: 0 };
    };

    const program = new Command();
@@ -1183,11 +1274,11 @@ describe('agent send --verify', () => {
        '--verify',
      ]);

-      // First call should be agent-send.sh, second call should be capture-pane for verify
-      expect(calls).toHaveLength(2);
-      expect(calls[0]![0]).toContain('agent-send.sh');
-      const captureCall = calls[1]!;
-      expect(captureCall).toEqual(buildAgentVerifyAcceptedCommand('coder0', 'mosaic-factory', 5));
+      // 3 calls: BEFORE-capture, send, AFTER-capture
+      expect(calls).toHaveLength(3);
+      expect(calls[0]).toEqual(buildAgentVerifyAcceptedCommand('coder0', 'mosaic-factory', 5));
+      expect(calls[1]![0]).toContain('agent-send.sh');
+      expect(calls[2]).toEqual(buildAgentVerifyAcceptedCommand('coder0', 'mosaic-factory', 5));
    } finally {
      await rm(home, { recursive: true, force: true });
    }
@@ -1231,7 +1322,7 @@ describe('agent send --verify', () => {
    }
  });

-  it('send --verify: blank capture sets process.exitCode=1 (unverifiable, fails closed)', async () => {
+  it('send --verify: AFTER==BEFORE (stale/wedged pane) sets process.exitCode=1 (unverifiable)', async () => {
    const originalExitCode = process.exitCode;
    const stderrMessages: string[] = [];
    const stderrSpy = vi.spyOn(process.stderr, 'write').mockImplementation((msg) => {
@@ -1251,8 +1342,8 @@ describe('agent send --verify', () => {
    const runner: CommandRunner = async (command, args) => {
      const full = [command, ...args].join(' ');
      if (full.includes('agent-send.sh')) return { stdout: '', stderr: '', exitCode: 0 };
-      // capture-pane returns blank (full-screen TUI)
-      return { stdout: '', stderr: '', exitCode: 0 };
+      // BEFORE and AFTER are identical non-empty stale content — simulates a wedged pane
+      return { stdout: 'Stale old content that never changed\n', stderr: '', exitCode: 0 };
    };

    const program = new Command();
@@ -1271,7 +1362,8 @@ describe('agent send --verify', () => {
        '--verify',
      ]);
      expect(process.exitCode).toBe(1);
-      expect(stderrMessages.join('')).toMatch(/could not verify delivery.*blank/i);
+      // Must mention "no pane change" to distinguish from blank-capture case
+      expect(stderrMessages.join('')).toMatch(/no pane change after send/i);
    } finally {
      process.exitCode = originalExitCode;
      stderrSpy.mockRestore();
@@ -1279,7 +1371,7 @@ describe('agent send --verify', () => {
    }
  }, 10_000);

-  it('send --verify: draft line sets process.exitCode=1 with distinct wording', async () => {
+  it('send --verify: blank AFTER capture sets process.exitCode=1 (unverifiable, fails closed)', async () => {
    const originalExitCode = process.exitCode;
    const stderrMessages: string[] = [];
    const stderrSpy = vi.spyOn(process.stderr, 'write').mockImplementation((msg) => {
@@ -1296,11 +1388,65 @@ describe('agent send --verify', () => {
      ),
    );

+    let captureCallCount = 0;
    const runner: CommandRunner = async (command, args) => {
      const full = [command, ...args].join(' ');
      if (full.includes('agent-send.sh')) return { stdout: '', stderr: '', exitCode: 0 };
-      // capture-pane returns a draft line ("> unsent message")
-      return { stdout: '> unsent message\n', stderr: '', exitCode: 0 };
+      captureCallCount++;
+      // BEFORE: some content; AFTER: blank (full-screen TUI renders blank after send)
+      const stdout = captureCallCount === 1 ? 'Previous content\n' : '';
+      return { stdout, stderr: '', exitCode: 0 };
+    };
+
+    const program = new Command();
+    program.exitOverride();
+    registerAgentCommand(program, { runner, mosaicHome: home });
+
+    try {
+      await program.parseAsync([
+        'node',
+        'mosaic',
+        'agent',
+        'send',
+        'coder0',
+        '--message',
+        'hello',
+        '--verify',
+      ]);
+      expect(process.exitCode).toBe(1);
+      expect(stderrMessages.join('')).toMatch(/could not verify delivery/i);
+    } finally {
+      process.exitCode = originalExitCode;
+      stderrSpy.mockRestore();
+      await rm(home, { recursive: true, force: true });
+    }
+  }, 10_000);
+
+  it('send --verify: AFTER differs from BEFORE with draft line sets process.exitCode=1', async () => {
+    const originalExitCode = process.exitCode;
+    const stderrMessages: string[] = [];
+    const stderrSpy = vi.spyOn(process.stderr, 'write').mockImplementation((msg) => {
+      stderrMessages.push(String(msg));
+      return true;
+    });
+
+    const home = await mkdtemp(join(tmpdir(), 'mosaic-fleet-'));
+    await mkdir(join(home, 'fleet'), { recursive: true });
+    await writeFile(
+      join(home, 'fleet', 'roster.yaml'),
+      ['version: 1', 'transport: tmux', 'agents:', '  - name: coder0', '    runtime: codex'].join(
+        '\n',
+      ),
+    );
+
+    let captureCallCount = 0;
+    const runner: CommandRunner = async (command, args) => {
+      const full = [command, ...args].join(' ');
+      if (full.includes('agent-send.sh')) return { stdout: '', stderr: '', exitCode: 0 };
+      captureCallCount++;
+      // BEFORE: old content; AFTER: message appeared but ended as a draft line
+      const stdout = captureCallCount === 1 ? 'Previous output\n' : '> unsent message\n';
+      return { stdout, stderr: '', exitCode: 0 };
    };

    const program = new Command();
@@ -1327,7 +1473,7 @@ describe('agent send --verify', () => {
    }
  }, 10_000);

-  it('send --verify: real response content sets exitCode=0 (accepted)', async () => {
+  it('send --verify: AFTER differs from BEFORE with real response content sets exitCode=0 (accepted)', async () => {
    const originalExitCode = process.exitCode;

    const home = await mkdtemp(join(tmpdir(), 'mosaic-fleet-'));
@@ -1339,11 +1485,17 @@ describe('agent send --verify', () => {
      ),
    );

+    let captureCallCount = 0;
    const runner: CommandRunner = async (command, args) => {
      const full = [command, ...args].join(' ');
      if (full.includes('agent-send.sh')) return { stdout: '', stderr: '', exitCode: 0 };
-      // capture-pane returns real response content
-      return { stdout: 'Agent response: task completed.\n', stderr: '', exitCode: 0 };
+      captureCallCount++;
+      // BEFORE: old content; AFTER: new response content (pane changed)
+      const stdout =
+        captureCallCount === 1
+          ? 'Old pane content\n'
+          : 'Old pane content\nAgent response: task completed.\n';
+      return { stdout, stderr: '', exitCode: 0 };
    };

    const program = new Command();