fix(fleet): watch viewer-session leak + workdir test settle-race
The 2 deferred F3 items from the #595/#599 reviews: - agent watch: wrap the interactive attach in try/finally and kill the grouped viewer session in finally — so an attach that throws or an interrupted process never leaves a stray <agent>-watch-<pid> session behind (the leak you found). - test-start-agent-session.sh Test 1: pane_current_path briefly reflects the tmux server's cwd until the pane process establishes its -c start dir; poll until it settles. Fixes the cwd-dependent false failure that aborted the suite before the heartbeat tests (6/7) — those now run. Verified: full shell suite green from /tmp (was failing); prettier clean. Refs #588 #542 Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -32,8 +32,15 @@ MOSAIC_AGENT_COMMAND='bash --noprofile --norc -i' \
|
||||
"$START" "$AGENT"
|
||||
|
||||
tmux -L "$SOCKET" has-session -t "=$AGENT:0.0" || fail "agent session was not created"
|
||||
actual_dir=$(tmux -L "$SOCKET" display-message -p -t "=$AGENT:0.0" '#{pane_current_path}')
|
||||
[ "$actual_dir" = "$WORKDIR" ] || fail "agent workdir mismatch: $actual_dir"
|
||||
# Retry: pane_current_path briefly reflects the tmux server's cwd until the pane
|
||||
# process establishes its own cwd (the -c start dir). Poll until it settles.
|
||||
actual_dir=""
|
||||
for _ in $(seq 1 30); do
|
||||
actual_dir=$(tmux -L "$SOCKET" display-message -p -t "=$AGENT:0.0" '#{pane_current_path}')
|
||||
[ "$actual_dir" = "$WORKDIR" ] && break
|
||||
sleep 0.1
|
||||
done
|
||||
[ "$actual_dir" = "$WORKDIR" ] || fail "agent workdir mismatch: $actual_dir (expected $WORKDIR)"
|
||||
|
||||
# ── Test 2: idempotency (duplicate start prints 'already running') ─────────────
|
||||
MOSAIC_TMUX_SOCKET="$SOCKET" \
|
||||
|
||||
@@ -1438,15 +1438,19 @@ export function registerFleetAgentCommands(
|
||||
|
||||
await runChecked(runner, buildAgentWatchCreateViewerCommand(agent, viewerName, socketName));
|
||||
|
||||
const [bin, args] = splitCommand(buildAgentWatchAttachCommand(viewerName, socketName));
|
||||
const exitCode = await iRunner(bin, args);
|
||||
|
||||
// Best-effort cleanup of the viewer session regardless of how the user detached.
|
||||
// Errors here are intentionally suppressed — the agent session is unaffected.
|
||||
const killResult = await runner(
|
||||
...splitCommand(buildAgentWatchKillViewerCommand(viewerName, socketName)),
|
||||
);
|
||||
void killResult; // result is intentionally ignored
|
||||
let exitCode = 0;
|
||||
try {
|
||||
const [bin, args] = splitCommand(buildAgentWatchAttachCommand(viewerName, socketName));
|
||||
exitCode = await iRunner(bin, args);
|
||||
} finally {
|
||||
// ALWAYS clean up the viewer session — even if attach threw or the process was
|
||||
// interrupted — so stale grouped *-watch-* sessions never accumulate. Errors here
|
||||
// are intentionally suppressed; the agent session is unaffected.
|
||||
const killResult = await runner(
|
||||
...splitCommand(buildAgentWatchKillViewerCommand(viewerName, socketName)),
|
||||
);
|
||||
void killResult;
|
||||
}
|
||||
|
||||
if (exitCode !== 0) {
|
||||
process.exitCode = exitCode;
|
||||
|
||||
Reference in New Issue
Block a user