fix(fleet): boot-survival symmetry — disable-on-remove + add-enable + init-R5 (#612)
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/push/publish Pipeline was successful

Co-authored-by: Jason Woltje <jason@diversecanvas.com>
Co-committed-by: Jason Woltje <jason@diversecanvas.com>
This commit was merged in pull request #612.
This commit is contained in:
2026-06-22 08:12:58 +00:00
committed by jason.woltje
parent 8ddd48c843
commit d46ac40890
4 changed files with 203 additions and 6 deletions

View File

@@ -227,6 +227,15 @@ export function buildSystemdEnableCommand(unit: string): string[] {
return ['systemctl', '--user', 'enable', unit];
}
/**
* Returns the systemctl --user disable command for a given unit.
* Used by `fleet remove` so a removed agent's enabled unit cannot resurrect on
* boot pointing at deleted config (boot-survival symmetry with enable-on-add).
*/
export function buildSystemdDisableCommand(unit: string): string[] {
return ['systemctl', '--user', 'disable', unit];
}
/**
* Returns the loginctl enable-linger command for a given user.
* Linger allows user systemd services to survive logout.
@@ -872,15 +881,19 @@ export function registerFleetCommand(program: Command, deps: FleetCommandDeps =
await mkdir(dirname(destination), { recursive: true });
await writeFile(destination, content);
// Validate: exactly one orchestrator required (R5) — friendly summary on success.
// Guarantee R5: exactly one orchestrator for every profile except the
// sanctioned no-orchestrator `minimal` preset. A mismatch means a
// corrupted/edited preset — fail hard rather than write a malformed fleet.
const written = await loadFleetRoster(destination);
const orchCount = countOrchestrators(written);
if (orchCount !== 1) {
process.stderr.write(
`Warning: fleet roster at ${destination} has ${orchCount} orchestrator agent(s) (expected exactly 1).\n`,
);
if (profile === 'minimal') {
console.log(
`Initialized ${profile} fleet: ${written.agents.length} agent(s). Next: mosaic fleet install`,
`Initialized ${profile} fleet: ${written.agents.length} agent(s) (no orchestrator). Next: mosaic fleet install`,
);
} else if (orchCount !== 1) {
throw new Error(
`Fleet init failed: the "${profile}" roster has ${orchCount} orchestrator agent(s), ` +
`expected exactly 1 (R5). The preset may be corrupted — re-install the framework.`,
);
} else {
const workerCount = written.agents.length - 1;
@@ -1218,6 +1231,24 @@ export function registerFleetCommand(program: Command, deps: FleetCommandDeps =
console.log(`Added ${name} (${opts.runtime}/${opts.class}) to the fleet.`);
// Enable the unit for boot-survival (non-fatal) — symmetry with
// disable-on-remove. Independent of --start so a queued agent still
// survives a reboot once its unit exists.
try {
const enableResult = await runner(
...splitCommand(buildSystemdEnableCommand(`mosaic-agent@${name}.service`)),
);
if (enableResult.exitCode !== 0) {
process.stderr.write(
`Warning: could not enable mosaic-agent@${name}.service: ${enableResult.stderr || enableResult.stdout || 'non-zero exit'}\n`,
);
}
} catch (err) {
process.stderr.write(
`Warning: enable command failed for ${name}: ${err instanceof Error ? err.message : String(err)}\n`,
);
}
if (opts.start !== false) {
await runChecked(runner, buildFleetServiceCommand('start', name));
console.log(`Started mosaic-agent@${name}.service.`);
@@ -1254,6 +1285,26 @@ export function registerFleetCommand(program: Command, deps: FleetCommandDeps =
);
}
// Disable the unit (non-fatal) so an enabled instance cannot resurrect on
// boot pointing at the now-deleted config — boot-survival symmetry with
// enable-on-add. Skipped only when --keep-files keeps the config in place.
if (!opts.keepFiles) {
try {
const disableResult = await runner(
...splitCommand(buildSystemdDisableCommand(`mosaic-agent@${name}.service`)),
);
if (disableResult.exitCode !== 0) {
process.stderr.write(
`Warning: could not disable mosaic-agent@${name}.service: ${disableResult.stderr || disableResult.stdout || 'non-zero exit'}\n`,
);
}
} catch (err) {
process.stderr.write(
`Warning: disable command failed for ${name}: ${err instanceof Error ? err.message : String(err)}\n`,
);
}
}
// Write updated roster
await writeFile(rosterPath, serializeRosterToYaml(updatedRoster));