fix(install): preserve user fleet data on re-seed + refresh active units (CRITICAL) #632

Merged
jason.woltje merged 1 commits from fix/631-reseed-preserves-fleet-data into main 2026-06-22 21:38:10 +00:00
9 changed files with 216 additions and 13 deletions

View File

@@ -82,3 +82,7 @@ Active workstream is **W1 — Federation v1**. Workers should:
## north-star doctrine consolidation — doc PR — feat/north-star-doctrine
- Status: applied Mos's consolidated merge-map to docs/fleet/north-star.md (budget governance + control plane/central register + 200k cap + delegation + unified-identity Fleet + role-based naming + tmux security + drift re-captures). Doctrine only; #622/#623/#625/#628 out-of-scope. Conflict checklist green. Detail: scratchpads/north-star-doctrine.md.
## #631 — re-seed preserves user fleet data (CRITICAL) — fix/631-reseed-preserves-fleet-data
- Status: implemented + tested. PRIMARY: install.sh PRESERVE_PATHS += fleet/\*.yaml + fleet/agents + fleet/run (glob-aware cp-fallback); TS parity. SECONDARY: refreshActiveFleetUnits propagates unit fixes to ~/.config/systemd/user on mosaic update. bash F6 + TS + unit tests green. Detail: scratchpads/631-reseed-preserves-fleet.md.

View File

@@ -0,0 +1,32 @@
# #631 — re-seed must preserve user fleet data (CRITICAL data-loss)
- **Issue:** #631 · **Branch:** `fix/631-reseed-preserves-fleet-data`
## Root cause
`mosaic update` auto-runs `install.sh` keep-mode sync (#610). install.sh's rsync `--delete` (keep mode)
honored PRESERVE_PATHS, but `fleet/` wasn't listed → the sync WIPED `~/.config/mosaic/fleet/roster.yaml`
(+ run/, agents/). Any user running `mosaic update` lost their roster. (overwrite mode wipes by design;
the live loss was keep mode.)
## Fix (PRIMARY)
- install.sh PRESERVE_PATHS += `fleet/*.yaml`, `fleet/agents`, `fleet/run` — the framework still SEEDS
fleet/examples + fleet/roles + fleet/roster.schema.json (synced), but user files survive.
- Made the cp-fallback (no-rsync) GLOB-AWARE so `fleet/*.yaml` preserves every user roster there too;
fixed the restore to re-glob per-pattern (so only the user file is restored, not the whole fleet/ dir).
- file-adapter.ts (TS installer): mirrored the preserve list for parity. (TS syncDirectory is copy-only,
never --delete, so it never had the bug — belt-and-suspenders + parity.)
## Fix (SECONDARY)
- `refreshActiveFleetUnits()` (update-checker.ts): the re-seed updates ~/.config/mosaic/systemd/user but
systemd runs ~/.config/systemd/user, so unit fixes (#627) didn't take effect. After the re-seed,
`mosaic update` now copies the fresh mosaic-\*.service → the active dir + daemon-reload (best-effort,
only when a fleet is already installed). Wired into the cli.ts update flow.
## Verification
- bash F6 fixture (6 checks: roster/custom-yaml/agents/run survive + examples refreshed + schema seeded);
20/20 migration matrix green. TS file-adapter test (roster/run/agents survive keep sync). 2 unit tests
for refreshActiveFleetUnits. tsc/eslint/prettier/sanitize clean.

View File

@@ -23,7 +23,15 @@ INSTALL_MODE="${MOSAIC_INSTALL_MODE:-prompt}"
# entries (CONSTITUTION/AGENTS/STANDARDS) ARE re-applied afterward by
# reconcile_framework_files (overwrite + backup-once); the rest stay user-owned.
# User-created content in these paths survives rsync --delete.
PRESERVE_PATHS=("CONSTITUTION.md" "AGENTS.md" "SOUL.md" "USER.md" "TOOLS.md" "STANDARDS.md" "memory" "sources" "credentials")
#
# fleet/* — the framework SEEDS only fleet/examples, fleet/roles, and
# fleet/roster.schema.json (synced normally). The user's own fleet files MUST
# survive `mosaic update` (which runs this sync automatically): the active
# roster (`fleet/roster.yaml` + any other `fleet/*.yaml`), per-agent env
# (`fleet/agents/`), and heartbeat run dir (`fleet/run/`). Without these, an
# update wipes the operator's fleet. Glob entries are honored by both the rsync
# path (`--exclude`) and the glob-aware cp fallback below.
PRESERVE_PATHS=("CONSTITUTION.md" "AGENTS.md" "SOUL.md" "USER.md" "TOOLS.md" "STANDARDS.md" "memory" "sources" "credentials" "fleet/*.yaml" "fleet/agents" "fleet/run")
# Framework-owned contract files: re-copied from defaults/ on every upgrade (the
# user must not edit them; a divergent copy is backed up once before overwrite).
@@ -179,15 +187,23 @@ sync_framework() {
return
fi
# Fallback: cp-based sync
# Fallback: cp-based sync. Glob-aware so entries like "fleet/*.yaml" preserve
# every matching user file (parity with the rsync --exclude path above).
local preserve_tmp=""
if [[ "$INSTALL_MODE" == "keep" ]]; then
preserve_tmp="$(mktemp -d "${TMPDIR:-/tmp}/mosaic-preserve-XXXXXX")"
local match rel
for path in "${PRESERVE_PATHS[@]}"; do
if [[ -e "$TARGET_DIR/$path" ]]; then
mkdir -p "$preserve_tmp/$(dirname "$path")"
cp -R "$TARGET_DIR/$path" "$preserve_tmp/$path"
fi
# Unquoted $path lets the glob expand against TARGET_DIR; nullglob makes a
# non-matching pattern vanish instead of staying literal.
shopt -s nullglob
for match in "$TARGET_DIR/"$path; do
[[ -e "$match" ]] || continue
rel="${match#"$TARGET_DIR/"}"
mkdir -p "$preserve_tmp/$(dirname "$rel")"
cp -R "$match" "$preserve_tmp/$rel"
done
shopt -u nullglob
done
fi
@@ -196,12 +212,19 @@ sync_framework() {
rm -rf "$TARGET_DIR/.git"
if [[ -n "$preserve_tmp" ]]; then
# Restore by re-globbing the SAME patterns against preserve_tmp, so each
# preserved item is restored at its own relative path (e.g. only
# fleet/roster.yaml is replaced — the freshly-synced fleet/examples stays).
for path in "${PRESERVE_PATHS[@]}"; do
if [[ -e "$preserve_tmp/$path" ]]; then
rm -rf "$TARGET_DIR/$path"
mkdir -p "$TARGET_DIR/$(dirname "$path")"
cp -R "$preserve_tmp/$path" "$TARGET_DIR/$path"
fi
shopt -s nullglob
for match in "$preserve_tmp/"$path; do
[[ -e "$match" ]] || continue
rel="${match#"$preserve_tmp/"}"
rm -rf "$TARGET_DIR/$rel"
mkdir -p "$TARGET_DIR/$(dirname "$rel")"
cp -R "$match" "$TARGET_DIR/$rel"
done
shopt -u nullglob
done
rm -rf "$preserve_tmp"
fi

View File

@@ -61,7 +61,25 @@ MOSAIC_HOME="$T5" MOSAIC_INSTALL_MODE=bogus MOSAIC_SYNC_ONLY=1 bash "$INSTALL" >
chk "F5 failure: invalid mode rejected (nonzero exit)" "[ $rc -ne 0 ]"
chk "F5 failure: SOUL + credentials intact" "grep -q orig '$T5/SOUL.md' && grep -q keepme '$T5/credentials/c.json'"
rm -rf "$T1" "$T2" "$T3" "$T4" "$T5"
# F6 — keep-mode re-seed (the `mosaic update` path) MUST NOT wipe user fleet data.
# Regression for the roster-loss bug: fleet/ was not in PRESERVE_PATHS.
T6=$(mktemp -d); mkdir -p "$T6/fleet/examples" "$T6/fleet/run" "$T6/fleet/agents"
printf '# persona\n' > "$T6/SOUL.md" # makes it a recognized existing install (→ keep mode)
printf 'version: 1\nagents:\n - name: coder0\n' > "$T6/fleet/roster.yaml"
printf 'version: 1\nagents:\n - name: custom\n' > "$T6/fleet/my-fleet.yaml"
printf 'ts=x\n' > "$T6/fleet/run/coder0.hb"
printf 'MOSAIC_AGENT_NAME=coder0\n' > "$T6/fleet/agents/coder0.env"
printf '# stale preset\n' > "$T6/fleet/examples/general.yaml"
echo 3 > "$T6/.framework-version"
run "$T6" keep
chk "F6 reseed: user roster.yaml SURVIVES keep-mode sync" "grep -q coder0 '$T6/fleet/roster.yaml'"
chk "F6 reseed: other user fleet/*.yaml survives (glob)" "[ -f '$T6/fleet/my-fleet.yaml' ]"
chk "F6 reseed: per-agent env (fleet/agents) survives" "[ -f '$T6/fleet/agents/coder0.env' ]"
chk "F6 reseed: heartbeat run dir (fleet/run) survives" "[ -f '$T6/fleet/run/coder0.hb' ]"
chk "F6 reseed: framework examples ARE refreshed (not preserved stale)" "grep -q orchestrator '$T6/fleet/examples/general.yaml'"
chk "F6 reseed: framework roster.schema.json seeded" "[ -f '$T6/fleet/roster.schema.json' ]"
rm -rf "$T1" "$T2" "$T3" "$T4" "$T5" "$T6"
echo
echo "RESULT: $pass passed, $fail failed"
[ "$fail" -eq 0 ]

View File

@@ -27,6 +27,7 @@ import {
formatAllPackagesTable,
getInstallAllCommand,
runFrameworkReseed,
refreshActiveFleetUnits,
readRosterAgentNames,
buildRelaunchCommands,
FRAMEWORK_RESEED_PACKAGE,
@@ -466,6 +467,12 @@ program
const reseed = runFrameworkReseed();
if (reseed.ok) {
console.log('✔ Framework re-seeded.');
// Propagate shipped systemd unit fixes to the ACTIVE units (re-seed only
// touches ~/.config/mosaic/systemd/user; systemd runs ~/.config/systemd/user).
const units = refreshActiveFleetUnits();
if (units.refreshed.length > 0) {
console.log(`✔ Refreshed ${units.refreshed.length} active systemd unit(s).`);
}
const agents = readRosterAgentNames();
if (agents.length > 0) {
if (opts.relaunch) {

View File

@@ -153,6 +153,30 @@ describe('FileConfigAdapter.syncFramework — defaults seeding', () => {
expect(readFileSync(join(fixture.mosaicHome, 'AGENTS.md'), 'utf-8')).toBe('# AGENTS default\n');
});
it('preserves user fleet data (roster.yaml, agents/, run/) through a keep-mode sync', async () => {
// Regression for the roster-loss bug (#631): user-authored fleet files must
// survive the framework re-seed that `mosaic update` runs.
mkdirSync(join(fixture.mosaicHome, 'fleet', 'run'), { recursive: true });
mkdirSync(join(fixture.mosaicHome, 'fleet', 'agents'), { recursive: true });
writeFileSync(join(fixture.mosaicHome, 'fleet', 'roster.yaml'), 'version: 1\nMINE\n');
writeFileSync(join(fixture.mosaicHome, 'fleet', 'run', 'a.hb'), 'ts=x\n');
writeFileSync(join(fixture.mosaicHome, 'fleet', 'agents', 'a.env'), 'X=1\n');
// The framework ships fleet/examples — it should still seed/refresh.
mkdirSync(join(fixture.sourceDir, 'fleet', 'examples'), { recursive: true });
writeFileSync(join(fixture.sourceDir, 'fleet', 'examples', 'general.yaml'), '# preset\n');
const adapter = new FileConfigAdapter(fixture.mosaicHome, fixture.sourceDir);
await adapter.syncFramework('keep');
expect(readFileSync(join(fixture.mosaicHome, 'fleet', 'roster.yaml'), 'utf-8')).toBe(
'version: 1\nMINE\n',
);
expect(existsSync(join(fixture.mosaicHome, 'fleet', 'run', 'a.hb'))).toBe(true);
expect(existsSync(join(fixture.mosaicHome, 'fleet', 'agents', 'a.env'))).toBe(true);
// framework-owned fleet/examples is seeded
expect(existsSync(join(fixture.mosaicHome, 'fleet', 'examples', 'general.yaml'))).toBe(true);
});
it('is a no-op for seeding when defaults/ dir does not exist', async () => {
rmSync(fixture.defaultsDir, { recursive: true });

View File

@@ -173,6 +173,13 @@ export class FileConfigAdapter implements ConfigService {
'memory',
'sources',
'credentials',
// User-authored fleet data MUST survive `mosaic update`'s re-seed.
// The framework seeds only fleet/examples + fleet/roles +
// fleet/roster.schema.json; the operator's roster, per-agent env, and
// heartbeat run dir stay user-owned. (Mirror of install.sh PRESERVE_PATHS.)
'fleet/*.yaml',
'fleet/agents',
'fleet/run',
]
: [];

View File

@@ -7,7 +7,9 @@ import {
buildRelaunchCommands,
readRosterAgentNames,
runFrameworkReseed,
refreshActiveFleetUnits,
} from './update-checker.js';
import { existsSync, readFileSync } from 'node:fs';
/**
* F3-m3 / R13: `mosaic update` re-seeds the framework + (opt-in) relaunches
@@ -83,3 +85,41 @@ describe('runFrameworkReseed', () => {
rmSync(missing, { recursive: true, force: true });
});
});
describe('refreshActiveFleetUnits', () => {
let root: string;
let mosaicHome: string;
let configHome: string;
beforeEach(() => {
root = mkdtempSync(join(tmpdir(), 'mosaic-units-'));
mosaicHome = join(root, 'mosaic');
configHome = join(root, 'config');
mkdirSync(join(mosaicHome, 'systemd', 'user'), { recursive: true });
mkdirSync(join(configHome, 'systemd', 'user'), { recursive: true });
// Freshly re-seeded units (new content).
writeFileSync(join(mosaicHome, 'systemd', 'user', 'mosaic-agent@.service'), 'NEW\n');
writeFileSync(join(mosaicHome, 'systemd', 'user', 'mosaic-tmux-holder.service'), 'NEW\n');
});
afterEach(() => rmSync(root, { recursive: true, force: true }));
it('refreshes active units when a fleet is already installed', () => {
// Active dir already carries mosaic units (stale) → fleet is installed.
writeFileSync(join(configHome, 'systemd', 'user', 'mosaic-agent@.service'), 'OLD\n');
const res = refreshActiveFleetUnits(mosaicHome, {
XDG_CONFIG_HOME: configHome,
} as NodeJS.ProcessEnv);
expect(res.refreshed).toContain('mosaic-agent@.service');
expect(
readFileSync(join(configHome, 'systemd', 'user', 'mosaic-agent@.service'), 'utf-8'),
).toBe('NEW\n');
});
it('is a no-op when no fleet is installed (active dir has no mosaic units)', () => {
const res = refreshActiveFleetUnits(mosaicHome, {
XDG_CONFIG_HOME: configHome,
} as NodeJS.ProcessEnv);
expect(res.refreshed).toEqual([]);
expect(existsSync(join(configHome, 'systemd', 'user', 'mosaic-agent@.service'))).toBe(false);
});
});

View File

@@ -14,7 +14,14 @@
*/
import { execSync } from 'node:child_process';
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
import {
existsSync,
mkdirSync,
readFileSync,
writeFileSync,
readdirSync,
copyFileSync,
} from 'node:fs';
import { homedir } from 'node:os';
import { dirname, join, resolve } from 'node:path';
import { fileURLToPath } from 'node:url';
@@ -536,6 +543,47 @@ export function readRosterAgentNames(mosaicHome = join(homedir(), '.config', 'mo
return names;
}
/**
* Refresh the ACTIVE systemd user units from the freshly re-seeded copies.
*
* The re-seed updates `~/.config/mosaic/systemd/user/*.service`, but the units
* systemd actually runs live at `~/.config/systemd/user/`. Without this copy,
* shipped unit fixes (e.g. the socket-env change) never take effect after
* `mosaic update` until `mosaic fleet install` is re-run. Best-effort + scoped:
* only refreshes when a fleet is already installed (the active dir already
* carries `mosaic-*` units), so non-fleet hosts are untouched.
*/
export function refreshActiveFleetUnits(
mosaicHome = join(homedir(), '.config', 'mosaic'),
env: NodeJS.ProcessEnv = process.env,
): { refreshed: string[]; ok: boolean; reason?: string } {
const src = join(mosaicHome, 'systemd', 'user');
const configHome = env['XDG_CONFIG_HOME'] ?? join(homedir(), '.config');
const dest = join(configHome, 'systemd', 'user');
if (!existsSync(src)) return { refreshed: [], ok: true };
// Only refresh when a fleet is already installed (active dir has mosaic units).
const fleetInstalled =
existsSync(dest) &&
readdirSync(dest).some((f) => f.startsWith('mosaic-') && f.endsWith('.service'));
if (!fleetInstalled) return { refreshed: [], ok: true };
const units = readdirSync(src).filter((f) => f.startsWith('mosaic-') && f.endsWith('.service'));
const refreshed: string[] = [];
for (const unit of units) {
try {
copyFileSync(join(src, unit), join(dest, unit));
refreshed.push(unit);
} catch {
// best-effort per unit
}
}
try {
execSync('systemctl --user daemon-reload', { stdio: 'ignore', timeout: 15_000 });
} catch {
// non-systemd host or no session bus — non-fatal
}
return { refreshed, ok: true };
}
/** Build the per-agent systemd relaunch commands (drain+relaunch via restart). */
export function buildRelaunchCommands(agentNames: string[]): string[][] {
return agentNames.map((name) => [