feat(fleet): phase-2 observability — fleet ps + watch + send --verify
FR-1 fleet ps: joins systemd show (ActiveState/SubState/UnitFileState), tmux list-panes (pid/command/dead/activity), and file-based heartbeat (~/.config/mosaic/fleet/run/<name>.hb) into one table per roster agent. Flags DRIFT (roster runtime ≠ actual pane command) and BOOT-ENABLE (active but UnitFileState=disabled). --json output includes tenant_id and host on every record (FR-6 zero-foreclosure for multi-tenant/host). FR-3 agent watch: read-only tmux attach (-r flag) so the operator can observe any session without injecting keystrokes or resizing the window. Registered as a new verb alongside tail/send/reset in registerFleetAgentCommands. FR-5 agent send --verify: after keystroke injection, captures the last 5 pane lines and checks for draft heuristic (last non-empty line starts with '> '). Exits non-zero and writes to stderr if the message appears unsubmitted. Default send behavior is unchanged when --verify is omitted. New pure exported helpers (all unit-testable without real tmux/systemd): buildSystemdShowCommand, buildTmuxListPanesCommand, buildAgentWatchCommand, buildAgentVerifyAcceptedCommand, parseHeartbeat, parseSystemdShow, parseTmuxListPanes, detectDrift, getDefaultTenantAndHost, isSendAccepted, heartbeatPath. Added 31 new spec cases (62 total) covering exact command construction, JSON shape, heartbeat parsing, drift detection, and verify flow. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> Claude-Session: https://claude.ai/code/session_01RMoEx7hfdFGjUiCHuN1RRi
This commit is contained in:
@@ -5,14 +5,26 @@ import { Command } from 'commander';
|
||||
import { afterEach, describe, expect, it, vi } from 'vitest';
|
||||
import {
|
||||
buildAgentSendCommand,
|
||||
buildAgentWatchCommand,
|
||||
buildAgentVerifyAcceptedCommand,
|
||||
buildFleetServiceCommand,
|
||||
buildSystemdShowCommand,
|
||||
buildTmuxListPanesCommand,
|
||||
detectDrift,
|
||||
generateAgentEnv,
|
||||
getDefaultOperatorSourceLabel,
|
||||
getDefaultTenantAndHost,
|
||||
getRosterAgent,
|
||||
heartbeatPath,
|
||||
isSendAccepted,
|
||||
loadFleetRoster,
|
||||
mergeAgentEnv,
|
||||
parseHeartbeat,
|
||||
parseSystemdShow,
|
||||
parseTmuxListPanes,
|
||||
registerFleetCommand,
|
||||
resolveFleetPaths,
|
||||
type AgentPsRow,
|
||||
type CommandRunner,
|
||||
} from './fleet.js';
|
||||
import { registerAgentCommand } from './agent.js';
|
||||
@@ -39,6 +51,7 @@ describe('registerFleetCommand', () => {
|
||||
'init',
|
||||
'install',
|
||||
'install-systemd',
|
||||
'ps',
|
||||
'restart',
|
||||
'start',
|
||||
'status',
|
||||
@@ -59,6 +72,7 @@ describe('registerFleetCommand', () => {
|
||||
'send',
|
||||
'status',
|
||||
'tail',
|
||||
'watch',
|
||||
]);
|
||||
});
|
||||
});
|
||||
@@ -736,3 +750,473 @@ describe('fleet command construction', () => {
|
||||
expect(packageJson.files).toEqual(expect.arrayContaining(['dist', 'framework']));
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Phase-2 observability — unit tests (FR-1, FR-3, FR-5, FR-6)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('fleet ps — command construction', () => {
|
||||
it('builds exact systemd show command for an agent unit', () => {
|
||||
expect(buildSystemdShowCommand('canary-pi')).toEqual([
|
||||
'systemctl',
|
||||
'--user',
|
||||
'show',
|
||||
'mosaic-agent@canary-pi.service',
|
||||
'-p',
|
||||
'ActiveState',
|
||||
'-p',
|
||||
'SubState',
|
||||
'-p',
|
||||
'UnitFileState',
|
||||
]);
|
||||
});
|
||||
|
||||
it('builds exact tmux list-panes command with the correct format string', () => {
|
||||
expect(buildTmuxListPanesCommand('canary-pi', 'mosaic-factory')).toEqual([
|
||||
'tmux',
|
||||
'-L',
|
||||
'mosaic-factory',
|
||||
'list-panes',
|
||||
'-t',
|
||||
'=canary-pi:0.0',
|
||||
'-F',
|
||||
'#{pane_pid} #{pane_current_command} #{pane_dead} #{pane_activity}',
|
||||
]);
|
||||
});
|
||||
|
||||
it('uses DEFAULT_SOCKET_NAME when socket is omitted from list-panes', () => {
|
||||
const cmd = buildTmuxListPanesCommand('canary-pi');
|
||||
expect(cmd[2]).toBe('mosaic-factory');
|
||||
});
|
||||
|
||||
it('derives heartbeat path under ~/.config/mosaic/fleet/run/', () => {
|
||||
const home = '/home/test/.config/mosaic';
|
||||
expect(heartbeatPath('canary-pi', home)).toBe(
|
||||
'/home/test/.config/mosaic/fleet/run/canary-pi.hb',
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('fleet ps — heartbeat parsing', () => {
|
||||
const NOW = 1_700_000_000_000; // fixed epoch ms for deterministic tests
|
||||
|
||||
it('parses a healthy heartbeat file', () => {
|
||||
const ts = new Date(NOW - 10_000).toISOString(); // 10s ago — within 3×15s = 45s
|
||||
const content = `ts=${ts}\npid=12345\nstatus=ok\n`;
|
||||
const hb = parseHeartbeat(content, NOW);
|
||||
expect(hb.health).toBe('healthy');
|
||||
expect(hb.pid).toBe(12345);
|
||||
expect(hb.status).toBe('ok');
|
||||
expect(hb.ageMs).toBe(10_000);
|
||||
});
|
||||
|
||||
it('reports stale when heartbeat is older than 3×interval', () => {
|
||||
const ts = new Date(NOW - 60_000).toISOString(); // 60s ago > 45s threshold
|
||||
const content = `ts=${ts}\npid=99\nstatus=busy\n`;
|
||||
const hb = parseHeartbeat(content, NOW);
|
||||
expect(hb.health).toBe('stale');
|
||||
expect(hb.status).toBe('busy');
|
||||
});
|
||||
|
||||
it('reports unknown when heartbeat file is missing (null input)', () => {
|
||||
const hb = parseHeartbeat(null, NOW);
|
||||
expect(hb.health).toBe('unknown');
|
||||
expect(hb.ts).toBeNull();
|
||||
expect(hb.pid).toBeNull();
|
||||
expect(hb.ageMs).toBeNull();
|
||||
});
|
||||
|
||||
it('tolerates missing fields in heartbeat file', () => {
|
||||
const hb = parseHeartbeat('ts=not-a-date\n', NOW);
|
||||
expect(hb.health).toBe('unknown');
|
||||
expect(hb.ts).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe('fleet ps — systemd show parsing', () => {
|
||||
it('parses ActiveState, SubState, UnitFileState from systemctl show output', () => {
|
||||
const output = 'ActiveState=active\nSubState=running\nUnitFileState=enabled\n';
|
||||
expect(parseSystemdShow(output)).toEqual({
|
||||
ActiveState: 'active',
|
||||
SubState: 'running',
|
||||
UnitFileState: 'enabled',
|
||||
});
|
||||
});
|
||||
|
||||
it('defaults missing keys to "unknown"', () => {
|
||||
const result = parseSystemdShow('ActiveState=inactive\n');
|
||||
expect(result.SubState).toBe('unknown');
|
||||
expect(result.UnitFileState).toBe('unknown');
|
||||
});
|
||||
});
|
||||
|
||||
describe('fleet ps — tmux list-panes parsing', () => {
|
||||
const NOW_MS = 1_700_000_000_000;
|
||||
|
||||
it('parses alive pane with pid, command, and idle time', () => {
|
||||
const activityEpoch = Math.floor((NOW_MS - 30_000) / 1000); // 30s ago
|
||||
const output = `12345 claude 0 ${activityEpoch}\n`;
|
||||
const result = parseTmuxListPanes(output, NOW_MS);
|
||||
expect(result.pid).toBe(12345);
|
||||
expect(result.command).toBe('claude');
|
||||
expect(result.dead).toBe(false);
|
||||
expect(result.idleSeconds).toBe(30);
|
||||
});
|
||||
|
||||
it('reports dead pane when pane_dead=1', () => {
|
||||
const output = `0 bash 1 0\n`;
|
||||
const result = parseTmuxListPanes(output, NOW_MS);
|
||||
expect(result.dead).toBe(true);
|
||||
});
|
||||
|
||||
it('returns nulls for empty pane output', () => {
|
||||
const result = parseTmuxListPanes('', NOW_MS);
|
||||
expect(result.pid).toBeNull();
|
||||
expect(result.command).toBeNull();
|
||||
expect(result.dead).toBe(true);
|
||||
expect(result.idleSeconds).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe('fleet ps — drift detection', () => {
|
||||
it('flags drift when roster says pi but pane runs python3', () => {
|
||||
expect(detectDrift('pi', 'python3')).toBe(true);
|
||||
});
|
||||
|
||||
it('flags drift when roster says claude but pane runs dogfood-agent.py', () => {
|
||||
expect(detectDrift('claude', 'dogfood-agent.py')).toBe(true);
|
||||
});
|
||||
|
||||
it('does NOT flag drift when pane command matches the roster runtime', () => {
|
||||
expect(detectDrift('claude', 'claude')).toBe(false);
|
||||
expect(detectDrift('codex', 'codex')).toBe(false);
|
||||
expect(detectDrift('pi', 'pi')).toBe(false);
|
||||
expect(detectDrift('opencode', 'opencode')).toBe(false);
|
||||
});
|
||||
|
||||
it('does NOT flag drift for unknown/custom runtimes (no canonical mapping)', () => {
|
||||
expect(detectDrift('custom-runtime', 'anything')).toBe(false);
|
||||
});
|
||||
|
||||
it('does NOT flag drift when pane command is null (pane dead)', () => {
|
||||
expect(detectDrift('pi', null)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('fleet ps — tenant and host', () => {
|
||||
it('returns tenant_id and host as non-empty strings', () => {
|
||||
const { tenant_id, host } = getDefaultTenantAndHost();
|
||||
expect(typeof tenant_id).toBe('string');
|
||||
expect(tenant_id.length).toBeGreaterThan(0);
|
||||
expect(typeof host).toBe('string');
|
||||
expect(host.length).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('fleet ps — JSON output shape (FR-6)', () => {
|
||||
it('produces --json records including tenant_id and host for each agent', async () => {
|
||||
const home = await mkdtemp(join(tmpdir(), 'mosaic-fleet-'));
|
||||
const rosterPath = join(home, 'fleet', 'roster.yaml');
|
||||
await mkdir(join(home, 'fleet'), { recursive: true });
|
||||
await writeFile(
|
||||
rosterPath,
|
||||
[
|
||||
'version: 1',
|
||||
'transport: tmux',
|
||||
'agents:',
|
||||
' - name: canary-pi',
|
||||
' runtime: pi',
|
||||
' class: canary',
|
||||
].join('\n'),
|
||||
);
|
||||
|
||||
const nowMs = Date.now();
|
||||
const activityEpoch = Math.floor((nowMs - 20_000) / 1000);
|
||||
|
||||
const runner: CommandRunner = async (command, args) => {
|
||||
const fullArgs = [command, ...args].join(' ');
|
||||
if (fullArgs.includes('systemctl') && fullArgs.includes('show')) {
|
||||
return {
|
||||
stdout: 'ActiveState=active\nSubState=running\nUnitFileState=disabled\n',
|
||||
stderr: '',
|
||||
exitCode: 0,
|
||||
};
|
||||
}
|
||||
if (fullArgs.includes('list-panes')) {
|
||||
return {
|
||||
stdout: `12345 python3 0 ${activityEpoch}\n`,
|
||||
stderr: '',
|
||||
exitCode: 0,
|
||||
};
|
||||
}
|
||||
return { stdout: '', stderr: '', exitCode: 0 };
|
||||
};
|
||||
|
||||
const lines: string[] = [];
|
||||
const origLog = console.log;
|
||||
console.log = (msg: string) => {
|
||||
lines.push(msg);
|
||||
};
|
||||
|
||||
const program = new Command();
|
||||
program.exitOverride();
|
||||
registerFleetCommand(program, { runner, mosaicHome: home });
|
||||
|
||||
try {
|
||||
await program.parseAsync(['node', 'mosaic', 'fleet', 'ps', '--json']);
|
||||
} finally {
|
||||
console.log = origLog;
|
||||
await rm(home, { recursive: true, force: true });
|
||||
}
|
||||
|
||||
const json = JSON.parse(lines.join('')) as AgentPsRow[];
|
||||
expect(Array.isArray(json)).toBe(true);
|
||||
expect(json).toHaveLength(1);
|
||||
|
||||
const row = json[0]!;
|
||||
// FR-6: tenant_id and host must be present
|
||||
expect(typeof row.tenant_id).toBe('string');
|
||||
expect(row.tenant_id.length).toBeGreaterThan(0);
|
||||
expect(typeof row.host).toBe('string');
|
||||
expect(row.host.length).toBeGreaterThan(0);
|
||||
|
||||
// drift: roster says pi, pane runs python3 → drift flag
|
||||
expect(row.driftFlag).toBe(true);
|
||||
// boot-enable warning: active + disabled
|
||||
expect(row.bootEnableWarning).toBe(true);
|
||||
|
||||
// heartbeat missing → unknown
|
||||
expect(row.heartbeat.health).toBe('unknown');
|
||||
|
||||
expect(row.name).toBe('canary-pi');
|
||||
expect(row.runtime).toBe('pi');
|
||||
expect(row.systemdActive).toBe('active');
|
||||
expect(row.systemdEnabled).toBe('disabled');
|
||||
});
|
||||
});
|
||||
|
||||
describe('fleet ps — command sequences issued', () => {
|
||||
it('issues systemd show + tmux list-panes per agent', async () => {
|
||||
const home = await mkdtemp(join(tmpdir(), 'mosaic-fleet-'));
|
||||
const rosterPath = join(home, 'fleet', 'roster.yaml');
|
||||
await mkdir(join(home, 'fleet'), { recursive: true });
|
||||
await writeFile(
|
||||
rosterPath,
|
||||
['version: 1', 'transport: tmux', 'agents:', ' - name: coder0', ' runtime: codex'].join(
|
||||
'\n',
|
||||
),
|
||||
);
|
||||
|
||||
const calls: string[][] = [];
|
||||
const runner: CommandRunner = async (command, args) => {
|
||||
calls.push([command, ...args]);
|
||||
return {
|
||||
stdout: 'ActiveState=inactive\nSubState=dead\nUnitFileState=enabled\n',
|
||||
stderr: '',
|
||||
exitCode: 0,
|
||||
};
|
||||
};
|
||||
|
||||
// suppress console.log for table output
|
||||
const origLog = console.log;
|
||||
console.log = () => {};
|
||||
|
||||
const program = new Command();
|
||||
program.exitOverride();
|
||||
registerFleetCommand(program, { runner, mosaicHome: home });
|
||||
|
||||
try {
|
||||
await program.parseAsync(['node', 'mosaic', 'fleet', 'ps']);
|
||||
expect(calls).toEqual([
|
||||
buildSystemdShowCommand('coder0'),
|
||||
buildTmuxListPanesCommand('coder0', 'mosaic-factory'),
|
||||
]);
|
||||
} finally {
|
||||
console.log = origLog;
|
||||
await rm(home, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('agent watch', () => {
|
||||
it('builds exact read-only tmux attach command', () => {
|
||||
expect(buildAgentWatchCommand('canary-pi', 'mosaic-factory')).toEqual([
|
||||
'tmux',
|
||||
'-L',
|
||||
'mosaic-factory',
|
||||
'attach',
|
||||
'-r',
|
||||
'-t',
|
||||
'=canary-pi',
|
||||
]);
|
||||
});
|
||||
|
||||
it('uses DEFAULT_SOCKET_NAME when socket is omitted', () => {
|
||||
const cmd = buildAgentWatchCommand('canary-pi');
|
||||
expect(cmd[2]).toBe('mosaic-factory');
|
||||
expect(cmd).toContain('-r');
|
||||
});
|
||||
|
||||
it('issues the read-only attach command through the injected runner', async () => {
|
||||
const home = await mkdtemp(join(tmpdir(), 'mosaic-fleet-'));
|
||||
await mkdir(join(home, 'fleet'), { recursive: true });
|
||||
await writeFile(
|
||||
join(home, 'fleet', 'roster.yaml'),
|
||||
['version: 1', 'transport: tmux', 'agents:', ' - name: coder0', ' runtime: codex'].join(
|
||||
'\n',
|
||||
),
|
||||
);
|
||||
|
||||
const calls: string[][] = [];
|
||||
const runner: CommandRunner = async (command, args) => {
|
||||
calls.push([command, ...args]);
|
||||
return { stdout: '', stderr: '', exitCode: 0 };
|
||||
};
|
||||
|
||||
const program = new Command();
|
||||
program.exitOverride();
|
||||
registerAgentCommand(program, { runner, mosaicHome: home });
|
||||
|
||||
try {
|
||||
await program.parseAsync(['node', 'mosaic', 'agent', 'watch', 'coder0']);
|
||||
expect(calls).toEqual([['tmux', '-L', 'mosaic-factory', 'attach', '-r', '-t', '=coder0']]);
|
||||
} finally {
|
||||
await rm(home, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
it('rejects watch for agents not in the roster', async () => {
|
||||
const home = await mkdtemp(join(tmpdir(), 'mosaic-fleet-'));
|
||||
await mkdir(join(home, 'fleet'), { recursive: true });
|
||||
await writeFile(
|
||||
join(home, 'fleet', 'roster.yaml'),
|
||||
['version: 1', 'transport: tmux', 'agents:', ' - name: coder0', ' runtime: codex'].join(
|
||||
'\n',
|
||||
),
|
||||
);
|
||||
|
||||
const runner = vi.fn<CommandRunner>(async () => ({ stdout: '', stderr: '', exitCode: 0 }));
|
||||
const program = new Command();
|
||||
program.exitOverride();
|
||||
registerAgentCommand(program, { runner, mosaicHome: home });
|
||||
|
||||
try {
|
||||
await expect(
|
||||
program.parseAsync(['node', 'mosaic', 'agent', 'watch', 'typo']),
|
||||
).rejects.toThrow('Agent "typo" is not in the fleet roster');
|
||||
expect(runner).not.toHaveBeenCalled();
|
||||
} finally {
|
||||
await rm(home, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('agent send --verify', () => {
|
||||
it('builds exact verify capture-pane command', () => {
|
||||
expect(buildAgentVerifyAcceptedCommand('canary-pi', 'mosaic-factory', 5)).toEqual([
|
||||
'tmux',
|
||||
'-L',
|
||||
'mosaic-factory',
|
||||
'capture-pane',
|
||||
'-t',
|
||||
'=canary-pi:0.0',
|
||||
'-p',
|
||||
'-S',
|
||||
'-5',
|
||||
]);
|
||||
});
|
||||
|
||||
it('isSendAccepted: returns true for normal response output', () => {
|
||||
expect(isSendAccepted('Some response text\nAnother line\n')).toBe(true);
|
||||
});
|
||||
|
||||
it('isSendAccepted: returns false when last line starts with "> " (draft pattern)', () => {
|
||||
expect(isSendAccepted('> my unsent message')).toBe(false);
|
||||
});
|
||||
|
||||
it('isSendAccepted: returns true for blank pane (treated as submitted)', () => {
|
||||
expect(isSendAccepted('')).toBe(true);
|
||||
expect(isSendAccepted(' \n \n')).toBe(true);
|
||||
});
|
||||
|
||||
it('issues send then verify capture via injected runner when --verify is passed', async () => {
|
||||
const home = await mkdtemp(join(tmpdir(), 'mosaic-fleet-'));
|
||||
await mkdir(join(home, 'fleet'), { recursive: true });
|
||||
await writeFile(
|
||||
join(home, 'fleet', 'roster.yaml'),
|
||||
['version: 1', 'transport: tmux', 'agents:', ' - name: coder0', ' runtime: codex'].join(
|
||||
'\n',
|
||||
),
|
||||
);
|
||||
|
||||
const calls: string[][] = [];
|
||||
const runner: CommandRunner = async (command, args) => {
|
||||
calls.push([command, ...args]);
|
||||
// For agent-send.sh: success; for capture-pane: return accepted output
|
||||
return { stdout: 'Response from agent\n', stderr: '', exitCode: 0 };
|
||||
};
|
||||
|
||||
const program = new Command();
|
||||
program.exitOverride();
|
||||
registerAgentCommand(program, { runner, mosaicHome: home });
|
||||
|
||||
try {
|
||||
await program.parseAsync([
|
||||
'node',
|
||||
'mosaic',
|
||||
'agent',
|
||||
'send',
|
||||
'coder0',
|
||||
'--message',
|
||||
'hello world',
|
||||
'--verify',
|
||||
]);
|
||||
|
||||
// First call should be agent-send.sh, second call should be capture-pane for verify
|
||||
expect(calls).toHaveLength(2);
|
||||
expect(calls[0]![0]).toContain('agent-send.sh');
|
||||
const captureCall = calls[1]!;
|
||||
expect(captureCall).toEqual(buildAgentVerifyAcceptedCommand('coder0', 'mosaic-factory', 5));
|
||||
} finally {
|
||||
await rm(home, { recursive: true, force: true });
|
||||
}
|
||||
}, 10_000);
|
||||
|
||||
it('does NOT issue capture-pane verify when --verify is not passed', async () => {
|
||||
const home = await mkdtemp(join(tmpdir(), 'mosaic-fleet-'));
|
||||
await mkdir(join(home, 'fleet'), { recursive: true });
|
||||
await writeFile(
|
||||
join(home, 'fleet', 'roster.yaml'),
|
||||
['version: 1', 'transport: tmux', 'agents:', ' - name: coder0', ' runtime: codex'].join(
|
||||
'\n',
|
||||
),
|
||||
);
|
||||
|
||||
const calls: string[][] = [];
|
||||
const runner: CommandRunner = async (command, args) => {
|
||||
calls.push([command, ...args]);
|
||||
return { stdout: '', stderr: '', exitCode: 0 };
|
||||
};
|
||||
|
||||
const program = new Command();
|
||||
program.exitOverride();
|
||||
registerAgentCommand(program, { runner, mosaicHome: home });
|
||||
|
||||
try {
|
||||
await program.parseAsync([
|
||||
'node',
|
||||
'mosaic',
|
||||
'agent',
|
||||
'send',
|
||||
'coder0',
|
||||
'--message',
|
||||
'hello world',
|
||||
]);
|
||||
// Only 1 call: agent-send.sh (no capture-pane)
|
||||
expect(calls).toHaveLength(1);
|
||||
expect(calls[0]![0]).toContain('agent-send.sh');
|
||||
} finally {
|
||||
await rm(home, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user