fix(fleet): watch viewer-session leak + workdir test settle-race

The 2 deferred F3 items from the #595/#599 reviews: - agent watch: wrap the interactive attach in try/finally and kill the grouped viewer session in finally — so an attach that throws or an interrupted process never leaves a stray <agent>-watch-<pid> session behind (the leak you found). - test-start-agent-session.sh Test 1: pane_current_path briefly reflects the tmux server's cwd until the pane process establishes its -c start dir; poll until it settles. Fixes the cwd-dependent false failure that aborted the suite before the heartbeat tests (6/7) — those now run. Verified: full shell suite green from /tmp (was failing); prettier clean. Refs #588 #542 Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-21 18:38:06 -05:00
parent 130837365f
commit 234dc56c5f
2 changed files with 22 additions and 11 deletions
--- a/packages/mosaic/framework/tools/fleet/test-start-agent-session.sh
+++ b/packages/mosaic/framework/tools/fleet/test-start-agent-session.sh
@@ -32,8 +32,15 @@ MOSAIC_AGENT_COMMAND='bash --noprofile --norc -i' \
  "$START" "$AGENT"

 tmux -L "$SOCKET" has-session -t "=$AGENT:0.0" || fail "agent session was not created"
-actual_dir=$(tmux -L "$SOCKET" display-message -p -t "=$AGENT:0.0" '#{pane_current_path}')
-[ "$actual_dir" = "$WORKDIR" ] || fail "agent workdir mismatch: $actual_dir"
+# Retry: pane_current_path briefly reflects the tmux server's cwd until the pane
+# process establishes its own cwd (the -c start dir). Poll until it settles.
+actual_dir=""
+for _ in $(seq 1 30); do
+  actual_dir=$(tmux -L "$SOCKET" display-message -p -t "=$AGENT:0.0" '#{pane_current_path}')
+  [ "$actual_dir" = "$WORKDIR" ] && break
+  sleep 0.1
+done
+[ "$actual_dir" = "$WORKDIR" ] || fail "agent workdir mismatch: $actual_dir (expected $WORKDIR)"

 # ── Test 2: idempotency (duplicate start prints 'already running') ─────────────
 MOSAIC_TMUX_SOCKET="$SOCKET" \
--- a/packages/mosaic/src/commands/fleet.ts
+++ b/packages/mosaic/src/commands/fleet.ts
@@ -1438,15 +1438,19 @@ export function registerFleetAgentCommands(

      await runChecked(runner, buildAgentWatchCreateViewerCommand(agent, viewerName, socketName));

-      const [bin, args] = splitCommand(buildAgentWatchAttachCommand(viewerName, socketName));
-      const exitCode = await iRunner(bin, args);
-
-      // Best-effort cleanup of the viewer session regardless of how the user detached.
-      // Errors here are intentionally suppressed — the agent session is unaffected.
-      const killResult = await runner(
-        ...splitCommand(buildAgentWatchKillViewerCommand(viewerName, socketName)),
-      );
-      void killResult; // result is intentionally ignored
+      let exitCode = 0;
+      try {
+        const [bin, args] = splitCommand(buildAgentWatchAttachCommand(viewerName, socketName));
+        exitCode = await iRunner(bin, args);
+      } finally {
+        // ALWAYS clean up the viewer session — even if attach threw or the process was
+        // interrupted — so stale grouped *-watch-* sessions never accumulate. Errors here
+        // are intentionally suppressed; the agent session is unaffected.
+        const killResult = await runner(
+          ...splitCommand(buildAgentWatchKillViewerCommand(viewerName, socketName)),
+        );
+        void killResult;
+      }

      if (exitCode !== 0) {
        process.exitCode = exitCode;