Compare commits
1 Commits
release/mo
...
feat/fleet
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
10d65c4a10 |
@@ -10,11 +10,14 @@ import {
|
|||||||
buildAgentWatchCreateViewerCommand,
|
buildAgentWatchCreateViewerCommand,
|
||||||
buildAgentWatchKillViewerCommand,
|
buildAgentWatchKillViewerCommand,
|
||||||
buildAgentVerifyAcceptedCommand,
|
buildAgentVerifyAcceptedCommand,
|
||||||
|
buildEnableLingerCommand,
|
||||||
buildFleetServiceCommand,
|
buildFleetServiceCommand,
|
||||||
|
buildSystemdEnableCommand,
|
||||||
buildSystemdShowCommand,
|
buildSystemdShowCommand,
|
||||||
buildTmuxListPanesCommand,
|
buildTmuxListPanesCommand,
|
||||||
classifySendResult,
|
classifySendResult,
|
||||||
detectDrift,
|
detectDrift,
|
||||||
|
enableFleetUnits,
|
||||||
generateAgentEnv,
|
generateAgentEnv,
|
||||||
getDefaultOperatorSourceLabel,
|
getDefaultOperatorSourceLabel,
|
||||||
getDefaultTenantAndHost,
|
getDefaultTenantAndHost,
|
||||||
@@ -28,10 +31,12 @@ import {
|
|||||||
parseTmuxListPanes,
|
parseTmuxListPanes,
|
||||||
registerFleetCommand,
|
registerFleetCommand,
|
||||||
resolveFleetPaths,
|
resolveFleetPaths,
|
||||||
|
RUNTIME_ACCEPTABLE_COMMANDS,
|
||||||
VERIFY_DEFAULT_TIMEOUT_MS,
|
VERIFY_DEFAULT_TIMEOUT_MS,
|
||||||
VERIFY_POLL_INTERVAL_MS,
|
VERIFY_POLL_INTERVAL_MS,
|
||||||
type AgentPsRow,
|
type AgentPsRow,
|
||||||
type CommandRunner,
|
type CommandRunner,
|
||||||
|
type FleetRoster,
|
||||||
type InteractiveRunner,
|
type InteractiveRunner,
|
||||||
type SleepFn,
|
type SleepFn,
|
||||||
} from './fleet.js';
|
} from './fleet.js';
|
||||||
@@ -909,6 +914,118 @@ describe('fleet ps — drift detection', () => {
|
|||||||
it('does NOT flag drift when pane command is null (pane dead)', () => {
|
it('does NOT flag drift when pane command is null (pane dead)', () => {
|
||||||
expect(detectDrift('pi', null)).toBe(false);
|
expect(detectDrift('pi', null)).toBe(false);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('does NOT flag drift when pane=node for wrapped pi agent (mosaic yolo pi)', () => {
|
||||||
|
expect(detectDrift('pi', 'node')).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('does NOT flag drift when pane=node for wrapped codex agent (mosaic yolo codex)', () => {
|
||||||
|
expect(detectDrift('codex', 'node')).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('flags drift when pane=python3 for pi runtime (canary-pi dogfood regression guard)', () => {
|
||||||
|
expect(detectDrift('pi', 'python3')).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('does NOT flag drift when pane=python3 for dogfood runtime', () => {
|
||||||
|
expect(detectDrift('dogfood', 'python3')).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('flags drift for unknown pane command on known runtime', () => {
|
||||||
|
expect(detectDrift('claude', 'bash')).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('RUNTIME_ACCEPTABLE_COMMANDS is exported and contains expected entries', () => {
|
||||||
|
expect(RUNTIME_ACCEPTABLE_COMMANDS['pi']).toContain('node');
|
||||||
|
expect(RUNTIME_ACCEPTABLE_COMMANDS['pi']).not.toContain('python3');
|
||||||
|
expect(RUNTIME_ACCEPTABLE_COMMANDS['dogfood']).toContain('python3');
|
||||||
|
expect(RUNTIME_ACCEPTABLE_COMMANDS['codex']).toContain('node');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('fleet install — auto-enable units for boot-survival', () => {
|
||||||
|
it('buildSystemdEnableCommand and buildEnableLingerCommand return correct command arrays', () => {
|
||||||
|
expect(buildSystemdEnableCommand('mosaic-tmux-holder.service')).toEqual([
|
||||||
|
'systemctl',
|
||||||
|
'--user',
|
||||||
|
'enable',
|
||||||
|
'mosaic-tmux-holder.service',
|
||||||
|
]);
|
||||||
|
expect(buildEnableLingerCommand('testuser')).toEqual(['loginctl', 'enable-linger', 'testuser']);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('enables holder and each agent unit via injected runner after install', async () => {
|
||||||
|
const minimalRoster: FleetRoster = {
|
||||||
|
version: 1,
|
||||||
|
transport: 'tmux',
|
||||||
|
tmux: { socketName: 'mosaic-factory', holderSession: '_holder' },
|
||||||
|
defaults: { workingDirectory: '~/src' },
|
||||||
|
runtimes: { codex: { resetCommand: '/clear' } },
|
||||||
|
agents: [{ name: 'coder0', runtime: 'codex', className: 'worker' }],
|
||||||
|
};
|
||||||
|
|
||||||
|
const calls: string[][] = [];
|
||||||
|
const runner: CommandRunner = async (command, args) => {
|
||||||
|
calls.push([command, ...args]);
|
||||||
|
return { stdout: '', stderr: '', exitCode: 0 };
|
||||||
|
};
|
||||||
|
|
||||||
|
await enableFleetUnits(runner, minimalRoster, {});
|
||||||
|
|
||||||
|
expect(calls).toContainEqual(['systemctl', '--user', 'enable', 'mosaic-tmux-holder.service']);
|
||||||
|
expect(calls).toContainEqual(['systemctl', '--user', 'enable', 'mosaic-agent@coder0.service']);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('install still succeeds when systemctl enable returns non-zero (non-fatal)', async () => {
|
||||||
|
const minimalRoster: FleetRoster = {
|
||||||
|
version: 1,
|
||||||
|
transport: 'tmux',
|
||||||
|
tmux: { socketName: 'mosaic-factory', holderSession: '_holder' },
|
||||||
|
defaults: { workingDirectory: '~/src' },
|
||||||
|
runtimes: { codex: { resetCommand: '/clear' } },
|
||||||
|
agents: [{ name: 'coder0', runtime: 'codex', className: 'worker' }],
|
||||||
|
};
|
||||||
|
|
||||||
|
const calls: string[][] = [];
|
||||||
|
const runner: CommandRunner = async (command, args) => {
|
||||||
|
calls.push([command, ...args]);
|
||||||
|
// Simulate systemctl enable failure
|
||||||
|
if (command === 'systemctl' && args.includes('enable')) {
|
||||||
|
return { stdout: '', stderr: 'Unit not found', exitCode: 1 };
|
||||||
|
}
|
||||||
|
return { stdout: '', stderr: '', exitCode: 0 };
|
||||||
|
};
|
||||||
|
|
||||||
|
// Must NOT reject/throw even when enable calls fail
|
||||||
|
await expect(enableFleetUnits(runner, minimalRoster, {})).resolves.toBeUndefined();
|
||||||
|
|
||||||
|
// The enable attempt must have been made
|
||||||
|
expect(calls.some((c) => c.includes('enable'))).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('--no-enable skips all systemctl enable and loginctl linger calls', async () => {
|
||||||
|
const minimalRoster: FleetRoster = {
|
||||||
|
version: 1,
|
||||||
|
transport: 'tmux',
|
||||||
|
tmux: { socketName: 'mosaic-factory', holderSession: '_holder' },
|
||||||
|
defaults: { workingDirectory: '~/src' },
|
||||||
|
runtimes: { codex: { resetCommand: '/clear' } },
|
||||||
|
agents: [{ name: 'coder0', runtime: 'codex', className: 'worker' }],
|
||||||
|
};
|
||||||
|
|
||||||
|
const calls: string[][] = [];
|
||||||
|
const runner: CommandRunner = async (command, args) => {
|
||||||
|
calls.push([command, ...args]);
|
||||||
|
return { stdout: '', stderr: '', exitCode: 0 };
|
||||||
|
};
|
||||||
|
|
||||||
|
await enableFleetUnits(runner, minimalRoster, { enable: false });
|
||||||
|
|
||||||
|
// No calls should include 'enable'
|
||||||
|
expect(calls.every((c) => !c.includes('enable'))).toBe(true);
|
||||||
|
// No loginctl calls at all
|
||||||
|
expect(calls.every((c) => c[0] !== 'loginctl')).toBe(true);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe('fleet ps — tenant and host', () => {
|
describe('fleet ps — tenant and host', () => {
|
||||||
|
|||||||
@@ -210,6 +210,93 @@ export function buildFleetServiceCommand(action: FleetServiceAction, agentName?:
|
|||||||
return ['systemctl', '--user', action, service];
|
return ['systemctl', '--user', action, service];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the systemctl --user enable command for a given unit.
|
||||||
|
* Used by the install auto-enable step to persist units across reboots.
|
||||||
|
*/
|
||||||
|
export function buildSystemdEnableCommand(unit: string): string[] {
|
||||||
|
return ['systemctl', '--user', 'enable', unit];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the loginctl enable-linger command for a given user.
|
||||||
|
* Linger allows user systemd services to survive logout.
|
||||||
|
*/
|
||||||
|
export function buildEnableLingerCommand(user: string): string[] {
|
||||||
|
return ['loginctl', 'enable-linger', user];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Enable fleet units for boot-survival after install.
|
||||||
|
* Non-fatal: if systemctl enable returns non-zero, a warning is printed and we continue.
|
||||||
|
* If opts.enable === false (--no-enable flag), the whole step is skipped.
|
||||||
|
*/
|
||||||
|
export async function enableFleetUnits(
|
||||||
|
runner: CommandRunner,
|
||||||
|
roster: FleetRoster,
|
||||||
|
opts: { enable?: boolean },
|
||||||
|
): Promise<void> {
|
||||||
|
if (opts.enable === false) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
let succeeded = 0;
|
||||||
|
let failed = 0;
|
||||||
|
|
||||||
|
const holderResult = await runner(
|
||||||
|
...splitCommand(buildSystemdEnableCommand('mosaic-tmux-holder.service')),
|
||||||
|
);
|
||||||
|
if (holderResult.exitCode === 0) {
|
||||||
|
succeeded++;
|
||||||
|
} else {
|
||||||
|
failed++;
|
||||||
|
process.stderr.write(
|
||||||
|
`Warning: could not enable mosaic-tmux-holder.service: ${holderResult.stderr || holderResult.stdout || 'non-zero exit'}\n`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const agent of roster.agents) {
|
||||||
|
const unit = `mosaic-agent@${agent.name}.service`;
|
||||||
|
const result = await runner(...splitCommand(buildSystemdEnableCommand(unit)));
|
||||||
|
if (result.exitCode === 0) {
|
||||||
|
succeeded++;
|
||||||
|
} else {
|
||||||
|
failed++;
|
||||||
|
process.stderr.write(
|
||||||
|
`Warning: could not enable ${unit}: ${result.stderr || result.stdout || 'non-zero exit'}\n`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (succeeded > 0) {
|
||||||
|
console.log(`Enabled ${succeeded} unit(s) for boot-survival.`);
|
||||||
|
}
|
||||||
|
if (failed > 0) {
|
||||||
|
process.stderr.write(
|
||||||
|
`Warning: ${failed} unit(s) could not be enabled (systemctl unavailable?). Run manually if needed.\n`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Best-effort linger
|
||||||
|
let username: string;
|
||||||
|
try {
|
||||||
|
username = userInfo().username;
|
||||||
|
} catch {
|
||||||
|
username = process.env['USER'] ?? process.env['LOGNAME'] ?? 'unknown';
|
||||||
|
}
|
||||||
|
const lingerResult = await runner(...splitCommand(buildEnableLingerCommand(username)));
|
||||||
|
if (lingerResult.exitCode !== 0) {
|
||||||
|
process.stderr.write(
|
||||||
|
`Hint: run 'loginctl enable-linger ${username}' as root to survive logout.\n`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
process.stderr.write(
|
||||||
|
`Warning: auto-enable step failed unexpectedly: ${err instanceof Error ? err.message : String(err)}\n`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
export function buildAgentSendCommand(
|
export function buildAgentSendCommand(
|
||||||
paths: FleetPaths,
|
paths: FleetPaths,
|
||||||
agentName: string,
|
agentName: string,
|
||||||
@@ -437,32 +524,41 @@ export function parseTmuxListPanes(
|
|||||||
return { pid, command, dead, idleSeconds };
|
return { pid, command, dead, idleSeconds };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Maps each known runtime to the set of acceptable pane commands.
|
||||||
|
* A pane running any of these commands for the given runtime is NOT considered drifted.
|
||||||
|
* Runtimes launched via `mosaic yolo` wrap in node, so 'node' is acceptable for most.
|
||||||
|
* The dogfood runtime accepts python3/python (the canary-pi dogfood stub).
|
||||||
|
*/
|
||||||
|
export const RUNTIME_ACCEPTABLE_COMMANDS: Record<string, readonly string[]> = {
|
||||||
|
claude: ['claude', 'node'],
|
||||||
|
codex: ['codex', 'node'],
|
||||||
|
opencode: ['opencode', 'node'],
|
||||||
|
pi: ['pi', 'node'],
|
||||||
|
dogfood: ['python3', 'python'],
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Determine if there is a runtime drift: roster says one runtime but the pane
|
* Determine if there is a runtime drift: roster says one runtime but the pane
|
||||||
* is actually running something from a different runtime. We detect this by
|
* is actually running something from a different runtime. We detect this by
|
||||||
* checking if the pane command doesn't match a known canonical command for the
|
* checking if the pane command doesn't match a known acceptable command for the
|
||||||
* roster's declared runtime.
|
* roster's declared runtime.
|
||||||
*
|
*
|
||||||
* Known canonical commands per runtime:
|
* Known acceptable commands per runtime (see RUNTIME_ACCEPTABLE_COMMANDS):
|
||||||
* claude → claude
|
* claude → claude, node (node covers mosaic yolo wrapper)
|
||||||
* codex → codex
|
* codex → codex, node
|
||||||
* opencode → opencode
|
* opencode → opencode, node
|
||||||
* pi → pi
|
* pi → pi, node (python3 still flags drift for canary-pi dogfood stub)
|
||||||
|
* dogfood → python3, python
|
||||||
*
|
*
|
||||||
* If the pane is running something else (e.g., python3/dogfood-agent.py) for
|
* If the pane is running something else (e.g., python3/dogfood-agent.py) for
|
||||||
* an agent whose roster runtime is "pi", that's a drift.
|
* an agent whose roster runtime is "pi", that's a drift.
|
||||||
*/
|
*/
|
||||||
export function detectDrift(rosterRuntime: string, paneCommand: string | null): boolean {
|
export function detectDrift(rosterRuntime: string, paneCommand: string | null): boolean {
|
||||||
if (!paneCommand) return false;
|
if (!paneCommand) return false;
|
||||||
const knownCommands: Record<string, string[]> = {
|
const acceptable = RUNTIME_ACCEPTABLE_COMMANDS[rosterRuntime];
|
||||||
claude: ['claude'],
|
if (!acceptable) return false;
|
||||||
codex: ['codex'],
|
return !acceptable.includes(paneCommand);
|
||||||
opencode: ['opencode'],
|
|
||||||
pi: ['pi'],
|
|
||||||
};
|
|
||||||
const expected = knownCommands[rosterRuntime];
|
|
||||||
if (!expected) return false;
|
|
||||||
return !expected.includes(paneCommand);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -706,12 +802,22 @@ export function registerFleetCommand(program: Command, deps: FleetCommandDeps =
|
|||||||
cmd
|
cmd
|
||||||
.command('install')
|
.command('install')
|
||||||
.description('Install local fleet tools and user systemd units')
|
.description('Install local fleet tools and user systemd units')
|
||||||
.action(async () => installFleet(cmd, frameworkRoot));
|
.option('--no-enable', 'Skip enabling units for boot-survival')
|
||||||
|
.action(async (opts: { enable?: boolean }) => {
|
||||||
|
await installFleet(cmd, frameworkRoot);
|
||||||
|
const roster = await loadRosterForCommand(cmd);
|
||||||
|
await enableFleetUnits(runner, roster, opts);
|
||||||
|
});
|
||||||
|
|
||||||
cmd
|
cmd
|
||||||
.command('install-systemd')
|
.command('install-systemd')
|
||||||
.description('Install local fleet tools and user systemd units')
|
.description('Install local fleet tools and user systemd units')
|
||||||
.action(async () => installFleet(cmd, frameworkRoot));
|
.option('--no-enable', 'Skip enabling units for boot-survival')
|
||||||
|
.action(async (opts: { enable?: boolean }) => {
|
||||||
|
await installFleet(cmd, frameworkRoot);
|
||||||
|
const roster = await loadRosterForCommand(cmd);
|
||||||
|
await enableFleetUnits(runner, roster, opts);
|
||||||
|
});
|
||||||
|
|
||||||
for (const action of ['start', 'stop', 'restart'] as const) {
|
for (const action of ['start', 'stop', 'restart'] as const) {
|
||||||
cmd
|
cmd
|
||||||
|
|||||||
Reference in New Issue
Block a user