diff --git a/docs/TASKS.md b/docs/TASKS.md index 15ac354..1ccf253 100644 --- a/docs/TASKS.md +++ b/docs/TASKS.md @@ -82,3 +82,7 @@ Active workstream is **W1 — Federation v1**. Workers should: ## north-star doctrine consolidation — doc PR — feat/north-star-doctrine - Status: applied Mos's consolidated merge-map to docs/fleet/north-star.md (budget governance + control plane/central register + 200k cap + delegation + unified-identity Fleet + role-based naming + tmux security + drift re-captures). Doctrine only; #622/#623/#625/#628 out-of-scope. Conflict checklist green. Detail: scratchpads/north-star-doctrine.md. + +## #631 — re-seed preserves user fleet data (CRITICAL) — fix/631-reseed-preserves-fleet-data + +- Status: implemented + tested. PRIMARY: install.sh PRESERVE_PATHS += fleet/\*.yaml + fleet/agents + fleet/run (glob-aware cp-fallback); TS parity. SECONDARY: refreshActiveFleetUnits propagates unit fixes to ~/.config/systemd/user on mosaic update. bash F6 + TS + unit tests green. Detail: scratchpads/631-reseed-preserves-fleet.md. diff --git a/docs/scratchpads/631-reseed-preserves-fleet.md b/docs/scratchpads/631-reseed-preserves-fleet.md new file mode 100644 index 0000000..504c184 --- /dev/null +++ b/docs/scratchpads/631-reseed-preserves-fleet.md @@ -0,0 +1,32 @@ +# #631 — re-seed must preserve user fleet data (CRITICAL data-loss) + +- **Issue:** #631 · **Branch:** `fix/631-reseed-preserves-fleet-data` + +## Root cause + +`mosaic update` auto-runs `install.sh` keep-mode sync (#610). install.sh's rsync `--delete` (keep mode) +honored PRESERVE_PATHS, but `fleet/` wasn't listed → the sync WIPED `~/.config/mosaic/fleet/roster.yaml` +(+ run/, agents/). Any user running `mosaic update` lost their roster. (overwrite mode wipes by design; +the live loss was keep mode.) + +## Fix (PRIMARY) + +- install.sh PRESERVE_PATHS += `fleet/*.yaml`, `fleet/agents`, `fleet/run` — the framework still SEEDS + fleet/examples + fleet/roles + fleet/roster.schema.json (synced), but user files survive. +- Made the cp-fallback (no-rsync) GLOB-AWARE so `fleet/*.yaml` preserves every user roster there too; + fixed the restore to re-glob per-pattern (so only the user file is restored, not the whole fleet/ dir). +- file-adapter.ts (TS installer): mirrored the preserve list for parity. (TS syncDirectory is copy-only, + never --delete, so it never had the bug — belt-and-suspenders + parity.) + +## Fix (SECONDARY) + +- `refreshActiveFleetUnits()` (update-checker.ts): the re-seed updates ~/.config/mosaic/systemd/user but + systemd runs ~/.config/systemd/user, so unit fixes (#627) didn't take effect. After the re-seed, + `mosaic update` now copies the fresh mosaic-\*.service → the active dir + daemon-reload (best-effort, + only when a fleet is already installed). Wired into the cli.ts update flow. + +## Verification + +- bash F6 fixture (6 checks: roster/custom-yaml/agents/run survive + examples refreshed + schema seeded); + 20/20 migration matrix green. TS file-adapter test (roster/run/agents survive keep sync). 2 unit tests + for refreshActiveFleetUnits. tsc/eslint/prettier/sanitize clean. diff --git a/packages/mosaic/framework/install.sh b/packages/mosaic/framework/install.sh index da5d596..52d31b4 100755 --- a/packages/mosaic/framework/install.sh +++ b/packages/mosaic/framework/install.sh @@ -23,7 +23,15 @@ INSTALL_MODE="${MOSAIC_INSTALL_MODE:-prompt}" # entries (CONSTITUTION/AGENTS/STANDARDS) ARE re-applied afterward by # reconcile_framework_files (overwrite + backup-once); the rest stay user-owned. # User-created content in these paths survives rsync --delete. -PRESERVE_PATHS=("CONSTITUTION.md" "AGENTS.md" "SOUL.md" "USER.md" "TOOLS.md" "STANDARDS.md" "memory" "sources" "credentials") +# +# fleet/* — the framework SEEDS only fleet/examples, fleet/roles, and +# fleet/roster.schema.json (synced normally). The user's own fleet files MUST +# survive `mosaic update` (which runs this sync automatically): the active +# roster (`fleet/roster.yaml` + any other `fleet/*.yaml`), per-agent env +# (`fleet/agents/`), and heartbeat run dir (`fleet/run/`). Without these, an +# update wipes the operator's fleet. Glob entries are honored by both the rsync +# path (`--exclude`) and the glob-aware cp fallback below. +PRESERVE_PATHS=("CONSTITUTION.md" "AGENTS.md" "SOUL.md" "USER.md" "TOOLS.md" "STANDARDS.md" "memory" "sources" "credentials" "fleet/*.yaml" "fleet/agents" "fleet/run") # Framework-owned contract files: re-copied from defaults/ on every upgrade (the # user must not edit them; a divergent copy is backed up once before overwrite). @@ -179,15 +187,23 @@ sync_framework() { return fi - # Fallback: cp-based sync + # Fallback: cp-based sync. Glob-aware so entries like "fleet/*.yaml" preserve + # every matching user file (parity with the rsync --exclude path above). local preserve_tmp="" if [[ "$INSTALL_MODE" == "keep" ]]; then preserve_tmp="$(mktemp -d "${TMPDIR:-/tmp}/mosaic-preserve-XXXXXX")" + local match rel for path in "${PRESERVE_PATHS[@]}"; do - if [[ -e "$TARGET_DIR/$path" ]]; then - mkdir -p "$preserve_tmp/$(dirname "$path")" - cp -R "$TARGET_DIR/$path" "$preserve_tmp/$path" - fi + # Unquoted $path lets the glob expand against TARGET_DIR; nullglob makes a + # non-matching pattern vanish instead of staying literal. + shopt -s nullglob + for match in "$TARGET_DIR/"$path; do + [[ -e "$match" ]] || continue + rel="${match#"$TARGET_DIR/"}" + mkdir -p "$preserve_tmp/$(dirname "$rel")" + cp -R "$match" "$preserve_tmp/$rel" + done + shopt -u nullglob done fi @@ -196,12 +212,19 @@ sync_framework() { rm -rf "$TARGET_DIR/.git" if [[ -n "$preserve_tmp" ]]; then + # Restore by re-globbing the SAME patterns against preserve_tmp, so each + # preserved item is restored at its own relative path (e.g. only + # fleet/roster.yaml is replaced — the freshly-synced fleet/examples stays). for path in "${PRESERVE_PATHS[@]}"; do - if [[ -e "$preserve_tmp/$path" ]]; then - rm -rf "$TARGET_DIR/$path" - mkdir -p "$TARGET_DIR/$(dirname "$path")" - cp -R "$preserve_tmp/$path" "$TARGET_DIR/$path" - fi + shopt -s nullglob + for match in "$preserve_tmp/"$path; do + [[ -e "$match" ]] || continue + rel="${match#"$preserve_tmp/"}" + rm -rf "$TARGET_DIR/$rel" + mkdir -p "$TARGET_DIR/$(dirname "$rel")" + cp -R "$match" "$TARGET_DIR/$rel" + done + shopt -u nullglob done rm -rf "$preserve_tmp" fi diff --git a/packages/mosaic/framework/tools/quality/scripts/test-install-migration.sh b/packages/mosaic/framework/tools/quality/scripts/test-install-migration.sh index 9b8647a..e584b2d 100755 --- a/packages/mosaic/framework/tools/quality/scripts/test-install-migration.sh +++ b/packages/mosaic/framework/tools/quality/scripts/test-install-migration.sh @@ -61,7 +61,25 @@ MOSAIC_HOME="$T5" MOSAIC_INSTALL_MODE=bogus MOSAIC_SYNC_ONLY=1 bash "$INSTALL" > chk "F5 failure: invalid mode rejected (nonzero exit)" "[ $rc -ne 0 ]" chk "F5 failure: SOUL + credentials intact" "grep -q orig '$T5/SOUL.md' && grep -q keepme '$T5/credentials/c.json'" -rm -rf "$T1" "$T2" "$T3" "$T4" "$T5" +# F6 — keep-mode re-seed (the `mosaic update` path) MUST NOT wipe user fleet data. +# Regression for the roster-loss bug: fleet/ was not in PRESERVE_PATHS. +T6=$(mktemp -d); mkdir -p "$T6/fleet/examples" "$T6/fleet/run" "$T6/fleet/agents" +printf '# persona\n' > "$T6/SOUL.md" # makes it a recognized existing install (→ keep mode) +printf 'version: 1\nagents:\n - name: coder0\n' > "$T6/fleet/roster.yaml" +printf 'version: 1\nagents:\n - name: custom\n' > "$T6/fleet/my-fleet.yaml" +printf 'ts=x\n' > "$T6/fleet/run/coder0.hb" +printf 'MOSAIC_AGENT_NAME=coder0\n' > "$T6/fleet/agents/coder0.env" +printf '# stale preset\n' > "$T6/fleet/examples/general.yaml" +echo 3 > "$T6/.framework-version" +run "$T6" keep +chk "F6 reseed: user roster.yaml SURVIVES keep-mode sync" "grep -q coder0 '$T6/fleet/roster.yaml'" +chk "F6 reseed: other user fleet/*.yaml survives (glob)" "[ -f '$T6/fleet/my-fleet.yaml' ]" +chk "F6 reseed: per-agent env (fleet/agents) survives" "[ -f '$T6/fleet/agents/coder0.env' ]" +chk "F6 reseed: heartbeat run dir (fleet/run) survives" "[ -f '$T6/fleet/run/coder0.hb' ]" +chk "F6 reseed: framework examples ARE refreshed (not preserved stale)" "grep -q orchestrator '$T6/fleet/examples/general.yaml'" +chk "F6 reseed: framework roster.schema.json seeded" "[ -f '$T6/fleet/roster.schema.json' ]" + +rm -rf "$T1" "$T2" "$T3" "$T4" "$T5" "$T6" echo echo "RESULT: $pass passed, $fail failed" [ "$fail" -eq 0 ] diff --git a/packages/mosaic/src/cli.ts b/packages/mosaic/src/cli.ts index 187bbbd..43906eb 100644 --- a/packages/mosaic/src/cli.ts +++ b/packages/mosaic/src/cli.ts @@ -27,6 +27,7 @@ import { formatAllPackagesTable, getInstallAllCommand, runFrameworkReseed, + refreshActiveFleetUnits, readRosterAgentNames, buildRelaunchCommands, FRAMEWORK_RESEED_PACKAGE, @@ -466,6 +467,12 @@ program const reseed = runFrameworkReseed(); if (reseed.ok) { console.log('✔ Framework re-seeded.'); + // Propagate shipped systemd unit fixes to the ACTIVE units (re-seed only + // touches ~/.config/mosaic/systemd/user; systemd runs ~/.config/systemd/user). + const units = refreshActiveFleetUnits(); + if (units.refreshed.length > 0) { + console.log(`✔ Refreshed ${units.refreshed.length} active systemd unit(s).`); + } const agents = readRosterAgentNames(); if (agents.length > 0) { if (opts.relaunch) { diff --git a/packages/mosaic/src/config/file-adapter.test.ts b/packages/mosaic/src/config/file-adapter.test.ts index b8629d7..94f3c26 100644 --- a/packages/mosaic/src/config/file-adapter.test.ts +++ b/packages/mosaic/src/config/file-adapter.test.ts @@ -153,6 +153,30 @@ describe('FileConfigAdapter.syncFramework — defaults seeding', () => { expect(readFileSync(join(fixture.mosaicHome, 'AGENTS.md'), 'utf-8')).toBe('# AGENTS default\n'); }); + it('preserves user fleet data (roster.yaml, agents/, run/) through a keep-mode sync', async () => { + // Regression for the roster-loss bug (#631): user-authored fleet files must + // survive the framework re-seed that `mosaic update` runs. + mkdirSync(join(fixture.mosaicHome, 'fleet', 'run'), { recursive: true }); + mkdirSync(join(fixture.mosaicHome, 'fleet', 'agents'), { recursive: true }); + writeFileSync(join(fixture.mosaicHome, 'fleet', 'roster.yaml'), 'version: 1\nMINE\n'); + writeFileSync(join(fixture.mosaicHome, 'fleet', 'run', 'a.hb'), 'ts=x\n'); + writeFileSync(join(fixture.mosaicHome, 'fleet', 'agents', 'a.env'), 'X=1\n'); + // The framework ships fleet/examples — it should still seed/refresh. + mkdirSync(join(fixture.sourceDir, 'fleet', 'examples'), { recursive: true }); + writeFileSync(join(fixture.sourceDir, 'fleet', 'examples', 'general.yaml'), '# preset\n'); + + const adapter = new FileConfigAdapter(fixture.mosaicHome, fixture.sourceDir); + await adapter.syncFramework('keep'); + + expect(readFileSync(join(fixture.mosaicHome, 'fleet', 'roster.yaml'), 'utf-8')).toBe( + 'version: 1\nMINE\n', + ); + expect(existsSync(join(fixture.mosaicHome, 'fleet', 'run', 'a.hb'))).toBe(true); + expect(existsSync(join(fixture.mosaicHome, 'fleet', 'agents', 'a.env'))).toBe(true); + // framework-owned fleet/examples is seeded + expect(existsSync(join(fixture.mosaicHome, 'fleet', 'examples', 'general.yaml'))).toBe(true); + }); + it('is a no-op for seeding when defaults/ dir does not exist', async () => { rmSync(fixture.defaultsDir, { recursive: true }); diff --git a/packages/mosaic/src/config/file-adapter.ts b/packages/mosaic/src/config/file-adapter.ts index 8e45617..b81260d 100644 --- a/packages/mosaic/src/config/file-adapter.ts +++ b/packages/mosaic/src/config/file-adapter.ts @@ -173,6 +173,13 @@ export class FileConfigAdapter implements ConfigService { 'memory', 'sources', 'credentials', + // User-authored fleet data MUST survive `mosaic update`'s re-seed. + // The framework seeds only fleet/examples + fleet/roles + + // fleet/roster.schema.json; the operator's roster, per-agent env, and + // heartbeat run dir stay user-owned. (Mirror of install.sh PRESERVE_PATHS.) + 'fleet/*.yaml', + 'fleet/agents', + 'fleet/run', ] : []; diff --git a/packages/mosaic/src/runtime/update-checker.reseed.spec.ts b/packages/mosaic/src/runtime/update-checker.reseed.spec.ts index 932cdd4..c51b74b 100644 --- a/packages/mosaic/src/runtime/update-checker.reseed.spec.ts +++ b/packages/mosaic/src/runtime/update-checker.reseed.spec.ts @@ -7,7 +7,9 @@ import { buildRelaunchCommands, readRosterAgentNames, runFrameworkReseed, + refreshActiveFleetUnits, } from './update-checker.js'; +import { existsSync, readFileSync } from 'node:fs'; /** * F3-m3 / R13: `mosaic update` re-seeds the framework + (opt-in) relaunches @@ -83,3 +85,41 @@ describe('runFrameworkReseed', () => { rmSync(missing, { recursive: true, force: true }); }); }); + +describe('refreshActiveFleetUnits', () => { + let root: string; + let mosaicHome: string; + let configHome: string; + + beforeEach(() => { + root = mkdtempSync(join(tmpdir(), 'mosaic-units-')); + mosaicHome = join(root, 'mosaic'); + configHome = join(root, 'config'); + mkdirSync(join(mosaicHome, 'systemd', 'user'), { recursive: true }); + mkdirSync(join(configHome, 'systemd', 'user'), { recursive: true }); + // Freshly re-seeded units (new content). + writeFileSync(join(mosaicHome, 'systemd', 'user', 'mosaic-agent@.service'), 'NEW\n'); + writeFileSync(join(mosaicHome, 'systemd', 'user', 'mosaic-tmux-holder.service'), 'NEW\n'); + }); + afterEach(() => rmSync(root, { recursive: true, force: true })); + + it('refreshes active units when a fleet is already installed', () => { + // Active dir already carries mosaic units (stale) → fleet is installed. + writeFileSync(join(configHome, 'systemd', 'user', 'mosaic-agent@.service'), 'OLD\n'); + const res = refreshActiveFleetUnits(mosaicHome, { + XDG_CONFIG_HOME: configHome, + } as NodeJS.ProcessEnv); + expect(res.refreshed).toContain('mosaic-agent@.service'); + expect( + readFileSync(join(configHome, 'systemd', 'user', 'mosaic-agent@.service'), 'utf-8'), + ).toBe('NEW\n'); + }); + + it('is a no-op when no fleet is installed (active dir has no mosaic units)', () => { + const res = refreshActiveFleetUnits(mosaicHome, { + XDG_CONFIG_HOME: configHome, + } as NodeJS.ProcessEnv); + expect(res.refreshed).toEqual([]); + expect(existsSync(join(configHome, 'systemd', 'user', 'mosaic-agent@.service'))).toBe(false); + }); +}); diff --git a/packages/mosaic/src/runtime/update-checker.ts b/packages/mosaic/src/runtime/update-checker.ts index d56836e..243cf90 100644 --- a/packages/mosaic/src/runtime/update-checker.ts +++ b/packages/mosaic/src/runtime/update-checker.ts @@ -14,7 +14,14 @@ */ import { execSync } from 'node:child_process'; -import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs'; +import { + existsSync, + mkdirSync, + readFileSync, + writeFileSync, + readdirSync, + copyFileSync, +} from 'node:fs'; import { homedir } from 'node:os'; import { dirname, join, resolve } from 'node:path'; import { fileURLToPath } from 'node:url'; @@ -536,6 +543,47 @@ export function readRosterAgentNames(mosaicHome = join(homedir(), '.config', 'mo return names; } +/** + * Refresh the ACTIVE systemd user units from the freshly re-seeded copies. + * + * The re-seed updates `~/.config/mosaic/systemd/user/*.service`, but the units + * systemd actually runs live at `~/.config/systemd/user/`. Without this copy, + * shipped unit fixes (e.g. the socket-env change) never take effect after + * `mosaic update` until `mosaic fleet install` is re-run. Best-effort + scoped: + * only refreshes when a fleet is already installed (the active dir already + * carries `mosaic-*` units), so non-fleet hosts are untouched. + */ +export function refreshActiveFleetUnits( + mosaicHome = join(homedir(), '.config', 'mosaic'), + env: NodeJS.ProcessEnv = process.env, +): { refreshed: string[]; ok: boolean; reason?: string } { + const src = join(mosaicHome, 'systemd', 'user'); + const configHome = env['XDG_CONFIG_HOME'] ?? join(homedir(), '.config'); + const dest = join(configHome, 'systemd', 'user'); + if (!existsSync(src)) return { refreshed: [], ok: true }; + // Only refresh when a fleet is already installed (active dir has mosaic units). + const fleetInstalled = + existsSync(dest) && + readdirSync(dest).some((f) => f.startsWith('mosaic-') && f.endsWith('.service')); + if (!fleetInstalled) return { refreshed: [], ok: true }; + const units = readdirSync(src).filter((f) => f.startsWith('mosaic-') && f.endsWith('.service')); + const refreshed: string[] = []; + for (const unit of units) { + try { + copyFileSync(join(src, unit), join(dest, unit)); + refreshed.push(unit); + } catch { + // best-effort per unit + } + } + try { + execSync('systemctl --user daemon-reload', { stdio: 'ignore', timeout: 15_000 }); + } catch { + // non-systemd host or no session bus — non-fatal + } + return { refreshed, ok: true }; +} + /** Build the per-agent systemd relaunch commands (drain+relaunch via restart). */ export function buildRelaunchCommands(agentNames: string[]): string[][] { return agentNames.map((name) => [