From afcbbb302f6e107601c058afce130a1decb91654 Mon Sep 17 00:00:00 2001 From: "jason.woltje" Date: Sun, 21 Jun 2026 20:02:19 +0000 Subject: [PATCH] feat(fleet): auto-enable units on install + drift recognizes wrapped runtimes (#583) --- packages/mosaic/src/commands/fleet.spec.ts | 117 +++++++++++++++++ packages/mosaic/src/commands/fleet.ts | 140 ++++++++++++++++++--- 2 files changed, 240 insertions(+), 17 deletions(-) diff --git a/packages/mosaic/src/commands/fleet.spec.ts b/packages/mosaic/src/commands/fleet.spec.ts index 5b852cf..e51e32e 100644 --- a/packages/mosaic/src/commands/fleet.spec.ts +++ b/packages/mosaic/src/commands/fleet.spec.ts @@ -10,11 +10,14 @@ import { buildAgentWatchCreateViewerCommand, buildAgentWatchKillViewerCommand, buildAgentVerifyAcceptedCommand, + buildEnableLingerCommand, buildFleetServiceCommand, + buildSystemdEnableCommand, buildSystemdShowCommand, buildTmuxListPanesCommand, classifySendResult, detectDrift, + enableFleetUnits, generateAgentEnv, getDefaultOperatorSourceLabel, getDefaultTenantAndHost, @@ -28,10 +31,12 @@ import { parseTmuxListPanes, registerFleetCommand, resolveFleetPaths, + RUNTIME_ACCEPTABLE_COMMANDS, VERIFY_DEFAULT_TIMEOUT_MS, VERIFY_POLL_INTERVAL_MS, type AgentPsRow, type CommandRunner, + type FleetRoster, type InteractiveRunner, type SleepFn, } from './fleet.js'; @@ -909,6 +914,118 @@ describe('fleet ps — drift detection', () => { it('does NOT flag drift when pane command is null (pane dead)', () => { expect(detectDrift('pi', null)).toBe(false); }); + + it('does NOT flag drift when pane=node for wrapped pi agent (mosaic yolo pi)', () => { + expect(detectDrift('pi', 'node')).toBe(false); + }); + + it('does NOT flag drift when pane=node for wrapped codex agent (mosaic yolo codex)', () => { + expect(detectDrift('codex', 'node')).toBe(false); + }); + + it('flags drift when pane=python3 for pi runtime (canary-pi dogfood regression guard)', () => { + expect(detectDrift('pi', 'python3')).toBe(true); + }); + + it('does NOT flag drift when pane=python3 for dogfood runtime', () => { + expect(detectDrift('dogfood', 'python3')).toBe(false); + }); + + it('flags drift for unknown pane command on known runtime', () => { + expect(detectDrift('claude', 'bash')).toBe(true); + }); + + it('RUNTIME_ACCEPTABLE_COMMANDS is exported and contains expected entries', () => { + expect(RUNTIME_ACCEPTABLE_COMMANDS['pi']).toContain('node'); + expect(RUNTIME_ACCEPTABLE_COMMANDS['pi']).not.toContain('python3'); + expect(RUNTIME_ACCEPTABLE_COMMANDS['dogfood']).toContain('python3'); + expect(RUNTIME_ACCEPTABLE_COMMANDS['codex']).toContain('node'); + }); +}); + +describe('fleet install — auto-enable units for boot-survival', () => { + it('buildSystemdEnableCommand and buildEnableLingerCommand return correct command arrays', () => { + expect(buildSystemdEnableCommand('mosaic-tmux-holder.service')).toEqual([ + 'systemctl', + '--user', + 'enable', + 'mosaic-tmux-holder.service', + ]); + expect(buildEnableLingerCommand('testuser')).toEqual(['loginctl', 'enable-linger', 'testuser']); + }); + + it('enables holder and each agent unit via injected runner after install', async () => { + const minimalRoster: FleetRoster = { + version: 1, + transport: 'tmux', + tmux: { socketName: 'mosaic-factory', holderSession: '_holder' }, + defaults: { workingDirectory: '~/src' }, + runtimes: { codex: { resetCommand: '/clear' } }, + agents: [{ name: 'coder0', runtime: 'codex', className: 'worker' }], + }; + + const calls: string[][] = []; + const runner: CommandRunner = async (command, args) => { + calls.push([command, ...args]); + return { stdout: '', stderr: '', exitCode: 0 }; + }; + + await enableFleetUnits(runner, minimalRoster, {}); + + expect(calls).toContainEqual(['systemctl', '--user', 'enable', 'mosaic-tmux-holder.service']); + expect(calls).toContainEqual(['systemctl', '--user', 'enable', 'mosaic-agent@coder0.service']); + }); + + it('install still succeeds when systemctl enable returns non-zero (non-fatal)', async () => { + const minimalRoster: FleetRoster = { + version: 1, + transport: 'tmux', + tmux: { socketName: 'mosaic-factory', holderSession: '_holder' }, + defaults: { workingDirectory: '~/src' }, + runtimes: { codex: { resetCommand: '/clear' } }, + agents: [{ name: 'coder0', runtime: 'codex', className: 'worker' }], + }; + + const calls: string[][] = []; + const runner: CommandRunner = async (command, args) => { + calls.push([command, ...args]); + // Simulate systemctl enable failure + if (command === 'systemctl' && args.includes('enable')) { + return { stdout: '', stderr: 'Unit not found', exitCode: 1 }; + } + return { stdout: '', stderr: '', exitCode: 0 }; + }; + + // Must NOT reject/throw even when enable calls fail + await expect(enableFleetUnits(runner, minimalRoster, {})).resolves.toBeUndefined(); + + // The enable attempt must have been made + expect(calls.some((c) => c.includes('enable'))).toBe(true); + }); + + it('--no-enable skips all systemctl enable and loginctl linger calls', async () => { + const minimalRoster: FleetRoster = { + version: 1, + transport: 'tmux', + tmux: { socketName: 'mosaic-factory', holderSession: '_holder' }, + defaults: { workingDirectory: '~/src' }, + runtimes: { codex: { resetCommand: '/clear' } }, + agents: [{ name: 'coder0', runtime: 'codex', className: 'worker' }], + }; + + const calls: string[][] = []; + const runner: CommandRunner = async (command, args) => { + calls.push([command, ...args]); + return { stdout: '', stderr: '', exitCode: 0 }; + }; + + await enableFleetUnits(runner, minimalRoster, { enable: false }); + + // No calls should include 'enable' + expect(calls.every((c) => !c.includes('enable'))).toBe(true); + // No loginctl calls at all + expect(calls.every((c) => c[0] !== 'loginctl')).toBe(true); + }); }); describe('fleet ps — tenant and host', () => { diff --git a/packages/mosaic/src/commands/fleet.ts b/packages/mosaic/src/commands/fleet.ts index 4e76035..7845cb6 100644 --- a/packages/mosaic/src/commands/fleet.ts +++ b/packages/mosaic/src/commands/fleet.ts @@ -210,6 +210,93 @@ export function buildFleetServiceCommand(action: FleetServiceAction, agentName?: return ['systemctl', '--user', action, service]; } +/** + * Returns the systemctl --user enable command for a given unit. + * Used by the install auto-enable step to persist units across reboots. + */ +export function buildSystemdEnableCommand(unit: string): string[] { + return ['systemctl', '--user', 'enable', unit]; +} + +/** + * Returns the loginctl enable-linger command for a given user. + * Linger allows user systemd services to survive logout. + */ +export function buildEnableLingerCommand(user: string): string[] { + return ['loginctl', 'enable-linger', user]; +} + +/** + * Enable fleet units for boot-survival after install. + * Non-fatal: if systemctl enable returns non-zero, a warning is printed and we continue. + * If opts.enable === false (--no-enable flag), the whole step is skipped. + */ +export async function enableFleetUnits( + runner: CommandRunner, + roster: FleetRoster, + opts: { enable?: boolean }, +): Promise { + if (opts.enable === false) { + return; + } + try { + let succeeded = 0; + let failed = 0; + + const holderResult = await runner( + ...splitCommand(buildSystemdEnableCommand('mosaic-tmux-holder.service')), + ); + if (holderResult.exitCode === 0) { + succeeded++; + } else { + failed++; + process.stderr.write( + `Warning: could not enable mosaic-tmux-holder.service: ${holderResult.stderr || holderResult.stdout || 'non-zero exit'}\n`, + ); + } + + for (const agent of roster.agents) { + const unit = `mosaic-agent@${agent.name}.service`; + const result = await runner(...splitCommand(buildSystemdEnableCommand(unit))); + if (result.exitCode === 0) { + succeeded++; + } else { + failed++; + process.stderr.write( + `Warning: could not enable ${unit}: ${result.stderr || result.stdout || 'non-zero exit'}\n`, + ); + } + } + + if (succeeded > 0) { + console.log(`Enabled ${succeeded} unit(s) for boot-survival.`); + } + if (failed > 0) { + process.stderr.write( + `Warning: ${failed} unit(s) could not be enabled (systemctl unavailable?). Run manually if needed.\n`, + ); + } + + // Best-effort linger + let username: string; + try { + username = userInfo().username; + } catch { + username = process.env['USER'] ?? process.env['LOGNAME'] ?? 'unknown'; + } + const lingerResult = await runner(...splitCommand(buildEnableLingerCommand(username))); + if (lingerResult.exitCode !== 0) { + process.stderr.write( + `Hint: run 'loginctl enable-linger ${username}' as root to survive logout.\n`, + ); + } + } catch (err) { + process.stderr.write( + `Warning: auto-enable step failed unexpectedly: ${err instanceof Error ? err.message : String(err)}\n`, + ); + } +} + export function buildAgentSendCommand( paths: FleetPaths, agentName: string, @@ -437,32 +524,41 @@ export function parseTmuxListPanes( return { pid, command, dead, idleSeconds }; } +/** + * Maps each known runtime to the set of acceptable pane commands. + * A pane running any of these commands for the given runtime is NOT considered drifted. + * Runtimes launched via `mosaic yolo` wrap in node, so 'node' is acceptable for most. + * The dogfood runtime accepts python3/python (the canary-pi dogfood stub). + */ +export const RUNTIME_ACCEPTABLE_COMMANDS: Record = { + claude: ['claude', 'node'], + codex: ['codex', 'node'], + opencode: ['opencode', 'node'], + pi: ['pi', 'node'], + dogfood: ['python3', 'python'], +}; + /** * Determine if there is a runtime drift: roster says one runtime but the pane * is actually running something from a different runtime. We detect this by - * checking if the pane command doesn't match a known canonical command for the + * checking if the pane command doesn't match a known acceptable command for the * roster's declared runtime. * - * Known canonical commands per runtime: - * claude → claude - * codex → codex - * opencode → opencode - * pi → pi + * Known acceptable commands per runtime (see RUNTIME_ACCEPTABLE_COMMANDS): + * claude → claude, node (node covers mosaic yolo wrapper) + * codex → codex, node + * opencode → opencode, node + * pi → pi, node (python3 still flags drift for canary-pi dogfood stub) + * dogfood → python3, python * * If the pane is running something else (e.g., python3/dogfood-agent.py) for * an agent whose roster runtime is "pi", that's a drift. */ export function detectDrift(rosterRuntime: string, paneCommand: string | null): boolean { if (!paneCommand) return false; - const knownCommands: Record = { - claude: ['claude'], - codex: ['codex'], - opencode: ['opencode'], - pi: ['pi'], - }; - const expected = knownCommands[rosterRuntime]; - if (!expected) return false; - return !expected.includes(paneCommand); + const acceptable = RUNTIME_ACCEPTABLE_COMMANDS[rosterRuntime]; + if (!acceptable) return false; + return !acceptable.includes(paneCommand); } /** @@ -706,12 +802,22 @@ export function registerFleetCommand(program: Command, deps: FleetCommandDeps = cmd .command('install') .description('Install local fleet tools and user systemd units') - .action(async () => installFleet(cmd, frameworkRoot)); + .option('--no-enable', 'Skip enabling units for boot-survival') + .action(async (opts: { enable?: boolean }) => { + await installFleet(cmd, frameworkRoot); + const roster = await loadRosterForCommand(cmd); + await enableFleetUnits(runner, roster, opts); + }); cmd .command('install-systemd') .description('Install local fleet tools and user systemd units') - .action(async () => installFleet(cmd, frameworkRoot)); + .option('--no-enable', 'Skip enabling units for boot-survival') + .action(async (opts: { enable?: boolean }) => { + await installFleet(cmd, frameworkRoot); + const roster = await loadRosterForCommand(cmd); + await enableFleetUnits(runner, roster, opts); + }); for (const action of ['start', 'stop', 'restart'] as const) { cmd