Compare commits

...

12 Commits

Author SHA1 Message Date
Jarvis
af7dd3fa7c ci: re-trigger pipeline (flaky pglite WASM OOM in packages/db, unrelated)
All checks were successful
ci/woodpecker/pr/ci Pipeline was successful
ci/woodpecker/push/ci Pipeline was successful
The push/ci run for the prior commit failed only in packages/db's
src/migrate.test.ts with 'memory access out of bounds' inside the pglite
WASM module — a known-flaky in-memory-Postgres crash under CI memory
pressure. The pr/ci pipeline passed on the identical tree, and this PR
changes only a bash launcher script (no TS / no db package), so the
failure cannot originate here. Empty commit to re-run CI.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-24 00:04:41 -05:00
Jarvis
644d2805d2 fix(fleet): pre-trust claude agent workdir to clear the folder-trust gate (#644)
Fleet-launched Claude agents stall forever at Claude Code's one-time
"Is this a project you trust?" folder-trust prompt: there is no human in
the pane to answer it, yet the heartbeat keeps reporting "healthy" because
the pane process is alive — it's just blocked. This is the most common
fleet outage (F1 / premature stop).

--dangerously-skip-permissions does NOT bypass this gate, and neither does
`trustedProjectDirectories` in settings.json (both verified empirically on
2026-06-24). The only record the gate honors is the per-project entry in
~/.claude.json: projects["<dir>"].hasTrustDialogAccepted == true — exactly
what answering the prompt writes.

start-agent-session.sh now pre-seeds that record for the claude runtime
before launching the pane. The seeding is:
- claude-only (codex/pi have no such gate),
- idempotent (no-op when already trusted),
- atomic (tempfile + os.replace; never corrupts a partial/unreadable file),
- flock-serialized across concurrent agent launches sharing ~/.claude.json,
- best-effort (any failure is non-fatal — the agent still launches, worst
  case it falls back to the pre-fix behavior).

Verified end-to-end: with /home/jarvis untrusted, the modified launcher
flips hasTrustDialogAccepted to true and Claude boots straight to the ready
prompt with no gate.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-24 00:04:41 -05:00
16ae809442 fix(update): re-seed framework on version drift, not just in-command updates (#642) (#646)
All checks were successful
ci/woodpecker/push/publish Pipeline was successful
ci/woodpecker/push/ci Pipeline was successful
2026-06-24 05:04:34 +00:00
6980e40e51 fix(db): stop pglite migration tests flaking CI (timeout + WASM OOM) (#647)
Some checks failed
ci/woodpecker/push/ci Pipeline was canceled
ci/woodpecker/push/publish Pipeline was canceled
2026-06-24 05:04:28 +00:00
e6b53ea103 fix(tools): default AGENT_WORK_ROOT to $HOME/mosaic/agent-work (#641)
Some checks failed
ci/woodpecker/push/publish Pipeline was successful
ci/woodpecker/push/ci Pipeline was canceled
2026-06-23 13:40:13 +00:00
4da87640e8 feat(tmux): agent-send.sh --class triage tag for the comms daemon (#552)
All checks were successful
ci/woodpecker/push/publish Pipeline was successful
ci/woodpecker/push/ci Pipeline was successful
2026-06-23 03:25:16 +00:00
a38a491403 chore(release): mosaic CLI 0.0.41 (#640)
All checks were successful
ci/woodpecker/push/publish Pipeline was successful
ci/woodpecker/push/ci Pipeline was successful
2026-06-23 02:21:04 +00:00
78d67c6261 chore(ci): bump ci-base image node 22 → 24-alpine (#639)
All checks were successful
ci/woodpecker/push/ci-image Pipeline was successful
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/push/publish Pipeline was successful
2026-06-23 00:59:39 +00:00
94e5cd7a81 ci: eliminate cold pnpm install via pre-baked CI base image (Phase 1) (#635)
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/push/publish Pipeline was successful
2026-06-22 22:50:21 +00:00
4e84f8e850 feat(fleet): comms-block emitter + FLEET-LAUNCH runbook (#633) (#638)
Some checks failed
ci/woodpecker/push/ci Pipeline was canceled
ci/woodpecker/push/publish Pipeline was canceled
Co-authored-by: Jason Woltje <jason@diversecanvas.com>
Co-committed-by: Jason Woltje <jason@diversecanvas.com>
2026-06-22 22:23:50 +00:00
cf8ceb3095 CI: add pre-baked ci-base image (producer) [Phase 1a] (#637)
Some checks failed
ci/woodpecker/push/ci-image Pipeline was successful
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/push/publish Pipeline was canceled
2026-06-22 22:20:48 +00:00
bf2a6745c8 fix(install): preserve user fleet data on re-seed + refresh active units (CRITICAL) (#632)
All checks were successful
ci/woodpecker/push/publish Pipeline was successful
ci/woodpecker/push/ci Pipeline was successful
Co-authored-by: Jason Woltje <jason@diversecanvas.com>
Co-committed-by: Jason Woltje <jason@diversecanvas.com>
2026-06-22 21:38:09 +00:00
30 changed files with 1079 additions and 75 deletions

7
.gitignore vendored
View File

@@ -15,3 +15,10 @@ infra/step-ca/dev-password
# Scratch dirs created by the framework git-wrapper shell test harnesses # Scratch dirs created by the framework git-wrapper shell test harnesses
.mosaic-test-work/ .mosaic-test-work/
# Transient config files vite/vitest/esbuild write next to a *.config.ts while
# loading it, then unlink. They are untracked but were not ignored, so turbo's
# package traversal hashed them and intermittently failed CI with "Package
# traversal error: ... .timestamp-*.mjs: No such file or directory" when the
# file vanished mid-scan. Ignoring them removes the race.
*.timestamp-*.mjs

4
.npmrc
View File

@@ -1 +1,5 @@
@mosaicstack:registry=https://git.mosaicstack.dev/api/packages/mosaicstack/npm/ @mosaicstack:registry=https://git.mosaicstack.dev/api/packages/mosaicstack/npm/
# Pin the pnpm store to the same path the ci-base image warms (Dockerfile.ci),
# so the pipeline `pnpm install --prefer-offline` consumes the baked store
# instead of repopulating a fresh one.
store-dir=/root/.local/share/pnpm/store

40
.woodpecker/ci-image.yml Normal file
View File

@@ -0,0 +1,40 @@
# Build & push the pre-baked CI base image (Dockerfile.ci) to the Gitea
# registry CI already publishes to. Reuses the exact kaniko + auth pattern
# from publish.yml (REGISTRY_USER/REGISTRY_PASS from_secret, /kaniko/.docker
# config.json). Other pipelines (ci.yml, publish.yml) pull `ci-base:latest`
# for their install step.
#
# Rebuild ONLY when the dependency set or the image recipe changes — a normal
# code push must not trigger a 25-min image build. `path` applies to push/PR
# events; `event: tag` (releases) rebuilds unconditionally so a tagged release
# always ships a fresh base.
when:
- event: tag
- event: [push, manual]
branch: main
path:
include:
- 'pnpm-lock.yaml'
- 'Dockerfile.ci'
steps:
build-ci-base:
image: gcr.io/kaniko-project/executor:debug
environment:
REGISTRY_USER:
from_secret: gitea_username
REGISTRY_PASS:
from_secret: gitea_password
CI_COMMIT_BRANCH: ${CI_COMMIT_BRANCH}
CI_COMMIT_TAG: ${CI_COMMIT_TAG}
CI_COMMIT_SHA: ${CI_COMMIT_SHA}
commands:
- mkdir -p /kaniko/.docker
- echo "{\"auths\":{\"git.mosaicstack.dev\":{\"username\":\"$REGISTRY_USER\",\"password\":\"$REGISTRY_PASS\"}}}" > /kaniko/.docker/config.json
- |
# Lockfile-hash tag: an immutable identity for the exact dep set baked
# into this image. `:latest` is the mutable pointer pipelines consume.
LOCK_HASH=$(sha256sum pnpm-lock.yaml | cut -c1-12)
DESTINATIONS="--destination git.mosaicstack.dev/mosaicstack/stack/ci-base:latest"
DESTINATIONS="$DESTINATIONS --destination git.mosaicstack.dev/mosaicstack/stack/ci-base:lock-$LOCK_HASH"
/kaniko/executor --context . --dockerfile Dockerfile.ci $DESTINATIONS

View File

@@ -1,5 +1,9 @@
# &node_image is the pre-baked CI base built by .woodpecker/ci-image.yml:
# node:24-alpine + python3/make/g++/postgresql-client + pnpm + a warm pnpm
# store. The install step resolves from the baked store (--prefer-offline)
# instead of paying a ~731s cold fetch + native compile every run.
variables: variables:
- &node_image 'node:22-alpine' - &node_image 'git.mosaicstack.dev/mosaicstack/stack/ci-base:latest'
- &enable_pnpm 'corepack enable' - &enable_pnpm 'corepack enable'
when: when:
@@ -15,8 +19,9 @@ steps:
image: *node_image image: *node_image
commands: commands:
- corepack enable - corepack enable
- apk add --no-cache python3 make g++ # python3/make/g++ are baked into ci-base; --prefer-offline resolves from
- pnpm install --frozen-lockfile # the baked pnpm store.
- pnpm install --frozen-lockfile --prefer-offline
# Blocking gate: public framework package must contain no operator-specific # Blocking gate: public framework package must contain no operator-specific
# personal data or private $HOME defaults. Runs early (no node_modules needed). # personal data or private $HOME defaults. Runs early (no node_modules needed).
@@ -64,8 +69,7 @@ steps:
DATABASE_URL: postgresql://mosaic:mosaic@ci-postgres:5432/mosaic DATABASE_URL: postgresql://mosaic:mosaic@ci-postgres:5432/mosaic
commands: commands:
- *enable_pnpm - *enable_pnpm
# Install postgresql-client for pg_isready # postgresql-client (pg_isready) is baked into ci-base.
- apk add --no-cache postgresql-client
# Wait up to 60s for CI postgres to be ready; fail fast if it never comes up. # Wait up to 60s for CI postgres to be ready; fail fast if it never comes up.
- | - |
ready=0 ready=0

View File

@@ -2,7 +2,9 @@
# Runs only on main branch push/tag # Runs only on main branch push/tag
variables: variables:
- &node_image 'node:22-alpine' # Pre-baked CI base (see .woodpecker/ci-image.yml): node:24-alpine +
# toolchain + warm pnpm store. Kills the second cold install publish pays.
- &node_image 'git.mosaicstack.dev/mosaicstack/stack/ci-base:latest'
- &enable_pnpm 'corepack enable' - &enable_pnpm 'corepack enable'
# Heavy kaniko image builds (~25 min) — gate them so a merge that only touches # Heavy kaniko image builds (~25 min) — gate them so a merge that only touches
# the npm-only CLI (@mosaicstack/mosaic) or docs does NOT rebuild the platform # the npm-only CLI (@mosaicstack/mosaic) or docs does NOT rebuild the platform
@@ -31,7 +33,8 @@ steps:
image: *node_image image: *node_image
commands: commands:
- corepack enable - corepack enable
- pnpm install --frozen-lockfile # Resolve from the baked pnpm store instead of a cold network fetch.
- pnpm install --frozen-lockfile --prefer-offline
build: build:
image: *node_image image: *node_image

45
Dockerfile.ci Normal file
View File

@@ -0,0 +1,45 @@
# Pre-baked CI base image for Woodpecker pipelines.
#
# Purpose: eliminate the cold `pnpm install` that dominates every pipeline
# (~731s median). This image ships the native toolchain (no per-run `apk add`)
# AND a warm, content-addressable pnpm store with the dependency-tree tarballs
# already fetched at build time. `pnpm fetch` only populates the store from the
# lockfile — it does NOT run the native node-gyp builds (better-sqlite3,
# node-pty, sqlite3, canvas, sharp); those still compile at `pnpm install`,
# which is exactly why the musl toolchain stays baked into this image. A
# pipeline `pnpm install --frozen-lockfile --prefer-offline` then resolves
# tarballs from local hard-links (no network) and compiles natives against the
# already-present toolchain, in tens of seconds instead of ~731s.
#
# Rebuilt only when `pnpm-lock.yaml` or this Dockerfile change
# (see .woodpecker/ci-image.yml).
#
# Node version is pinned to 24 (Active LTS). This is the follow-up bump from
# node:22 — sequenced AFTER the CI cache work landed so the runtime change
# carries zero cache variables. node:26 stays held until it reaches LTS
# (Oct 2026); the Current line risks native-module (node-gyp) breakage on a
# runner that compiles better-sqlite3 / canvas / sharp / node-pty from source.
FROM node:24-alpine
# Native toolchain required to compile node-gyp deps on musl, plus the
# postgresql-client used by the test step's pg_isready readiness probe. `bash`
# is baked here too — the sanitization step in ci.yml otherwise does a per-run
# `apk add bash`.
RUN apk add --no-cache python3 make g++ postgresql-client bash
# Pin pnpm to the repo's packageManager version via corepack.
RUN corepack enable && corepack prepare pnpm@10.6.2 --activate
WORKDIR /app
# Pin the store location so the pipeline can point `store-dir` at the same path.
ENV PNPM_HOME=/root/.local/share/pnpm
RUN pnpm config set store-dir /root/.local/share/pnpm/store
# Warm the store. `pnpm fetch` populates the content-addressable store with the
# dependency tarballs directly from the lockfile (no package.json / workspace
# needed), so a baked store stays valid until the lockfile changes. Note:
# `fetch` does NOT compile native modules — that happens later at `pnpm install`
# in the pipeline, against the toolchain baked above.
COPY pnpm-lock.yaml ./
RUN pnpm fetch --frozen-lockfile

View File

@@ -82,3 +82,11 @@ Active workstream is **W1 — Federation v1**. Workers should:
## north-star doctrine consolidation — doc PR — feat/north-star-doctrine ## north-star doctrine consolidation — doc PR — feat/north-star-doctrine
- Status: applied Mos's consolidated merge-map to docs/fleet/north-star.md (budget governance + control plane/central register + 200k cap + delegation + unified-identity Fleet + role-based naming + tmux security + drift re-captures). Doctrine only; #622/#623/#625/#628 out-of-scope. Conflict checklist green. Detail: scratchpads/north-star-doctrine.md. - Status: applied Mos's consolidated merge-map to docs/fleet/north-star.md (budget governance + control plane/central register + 200k cap + delegation + unified-identity Fleet + role-based naming + tmux security + drift re-captures). Doctrine only; #622/#623/#625/#628 out-of-scope. Conflict checklist green. Detail: scratchpads/north-star-doctrine.md.
## #631 — re-seed preserves user fleet data (CRITICAL) — fix/631-reseed-preserves-fleet-data
- Status: implemented + tested. PRIMARY: install.sh PRESERVE_PATHS += fleet/\*.yaml + fleet/agents + fleet/run (glob-aware cp-fallback); TS parity. SECONDARY: refreshActiveFleetUnits propagates unit fixes to ~/.config/systemd/user on mosaic update. bash F6 + TS + unit tests green. Detail: scratchpads/631-reseed-preserves-fleet.md.
## #633 — comms-block emitter + FLEET-LAUNCH runbook — feat/633-comms-block-runbook
- Status: implemented + tested (TDD). `mosaic fleet comms-block <role> [--host]` wraps resolveCommsBlock → readFleetCommsBlock; fails loud (stderr + exit 1) on unknown role / missing roster instead of silent empty. docs/fleet/FLEET-LAUNCH.md runbook: worker path + orchestrator .env fold (MOSAIC_AGENT_COMMAND; line-41 [-z] short-circuits line-44 yolo hardcode) + 3 launch gotchas + #632 preserve note + North-Star 4-field arc (harness ✅/model ✅ roster-native today; yolo + command/channels = PATH B #636). 177 fleet+comms tests green (6 new resolveCommsBlock cases). PATH A of the A→B→webUI arc. Detail: scratchpads/633-comms-block-runbook.md.

114
docs/fleet/FLEET-LAUNCH.md Normal file
View File

@@ -0,0 +1,114 @@
# Fleet Launch Runbook
How every Mosaic fleet agent — workers **and** the orchestrator — is launched, and how to
configure each one. The guiding principle: **one roster-driven launcher**. There is no bespoke
per-agent launch script; the roster plus per-agent `.env` files are the single source of launch
config.
## The launch chain
| Layer | File | Responsibility |
| ---------------- | ------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------- |
| systemd unit | `mosaic-agent@<role>.service` | One templated unit per role; `ExecStart` runs the session launcher with the instance name `%i`. Defaults `MOSAIC_AGENT_RUNTIME=pi`, `MOSAIC_AGENT_NAME=%i`. |
| session launcher | `tools/fleet/start-agent-session.sh <role>` | Builds the launch command, opens the tmux pane, wires the heartbeat. |
| launch command | `mosaic yolo <runtime>` (or a per-agent override) | Replaces the pane's foreground process with the runtime, fully seeded. |
| seeding | `mosaic`'s `composeContract()` | Injects the Constitution/USER/TOOLS/runtime contract, `*.local` overlays, **and** the Fleet-Comms cheat-sheet — all via `--append-system-prompt`. |
Per-agent overrides live in `fleet/agents/<role>.env`, generated from `roster.yaml` by
`generateAgentEnv` (`packages/mosaic/src/commands/fleet.ts`) and consumed by the launcher.
## Worker launch path (default)
1. `roster.yaml` carries each agent's `runtime` and optional `model_hint`.
2. `generateAgentEnv` emits `fleet/agents/<role>.env` with `MOSAIC_AGENT_NAME`,
`MOSAIC_AGENT_RUNTIME`, and `MOSAIC_AGENT_MODEL`.
3. `start-agent-session.sh` has no `MOSAIC_AGENT_COMMAND` set, so it falls through to the default
(line ~44):
```sh
MOSAIC_AGENT_COMMAND="mosaic yolo $MOSAIC_AGENT_RUNTIME${MOSAIC_AGENT_MODEL:+ --model $MOSAIC_AGENT_MODEL}"
```
4. The launcher bakes `MOSAIC_AGENT_NAME` into the pane command (line ~118), so `composeContract`
can inject the Fleet-Comms cheat-sheet for that role.
That is the whole worker path: roster → `.env` → `mosaic yolo <runtime>` → seeded pane.
## Orchestrator fold (PATH A — ships today)
The orchestrator is **just another roster agent** launched through the canonical path — not a
snowflake script.
| Piece | Value |
| ------------------ | ----------------------------------- |
| host-side launcher | `orchestrator-launch.sh` |
| systemd unit | `mosaic-fleet-orchestrator.service` |
| tmux session | `orchestrator` (role-named) |
Set its launch command via `fleet/agents/orchestrator.env`:
```sh
MOSAIC_AGENT_COMMAND='mosaic yolo claude --channels plugin:discord@<channel>'
```
When `MOSAIC_AGENT_COMMAND` is set, `start-agent-session.sh`'s `if [ -z "$MOSAIC_AGENT_COMMAND" ]`
guard (line ~41) is false, so the line-44 default — **including its hardcoded `yolo`** — is skipped
entirely. The override fully controls the runtime and flags. Routing through `mosaic yolo claude`
(rather than a raw `claude` invocation) is what gives the orchestrator the same full
`composeContract` seeding + Fleet-Comms cheat-sheet as every worker, with `--channels` and any
other flags passed straight through to the `claude` binary.
## Launch gotchas
1. **Flag conflict.** `mosaic yolo claude` already injects `--dangerously-skip-permissions`. Do
**not** also pass `--permission-mode bypassPermissions` — the `claude` binary would receive both.
Use `mosaic yolo claude …` alone (yolo covers the unattended posture), **or** non-yolo
`mosaic claude --permission-mode bypassPermissions …`. Never mix the two.
2. **`MOSAIC_AGENT_NAME` must reach the pane.** The launcher bakes it from the instance name, and
`composeContract` gates the Fleet-Comms block on it (`launch.ts`, in `composeContract`) — **and**
the role must be a member of `roster.yaml`, or the block resolves empty.
3. **`launchRuntime` guards.** `mosaic yolo claude` runs `checkSoul` / `checkRuntime` /
`checkSequentialThinking`. The host needs `SOUL.md` and the sequential-thinking MCP, or the
launch aborts (a raw `claude` invocation skipped these checks). Dry-run the composed command in a
throwaway tmux session before swapping a live launcher.
## Why per-agent `.env` survives upgrades (#632)
`install.sh` `PRESERVE_PATHS` includes `fleet/*.yaml`, `fleet/agents`, and `fleet/run`, so
`mosaic update`'s framework re-seed **preserves** your roster and per-agent `.env` overrides
(glob-aware `cp` fallback; matching TS parity in `file-adapter.ts`). Before #632, an auto re-seed
could wipe them — which is exactly why PATH A's `.env` override is safe to rely on now.
## Inspecting the comms wiring
- `mosaic fleet comms-block <role>` prints the Fleet-Comms cheat-sheet a given role receives at
launch — its `[host:session]` identity, the exact `agent-send.sh` command for each peer, and the
FLIP / `--verify` conventions. `--host <h>` previews a cross-host view. An unknown role or missing
roster **fails loud** (stderr + non-zero exit), so a typo is never a silent no-op.
- Versus `mosaic compose-contract <runtime>`: that emits the **whole** system prompt and reads the
role from `MOSAIC_AGENT_NAME` (a full-prompt smoke test). `comms-block` is the targeted,
explicit-arg, comms-only view — e.g. `mosaic fleet comms-block coder0-0` to preview a peer.
## North Star / future direction
**Vision:** a webUI lets the user edit each agent's launch config — switch **harness**
(claude / pi / codex / opencode), toggle **yolo**, pick a **model**, set a **command/channels**
override — with no terminal.
**Continuity — this is not a new launch path.** It is a data-model + UI-binding layer over the
existing roster-driven launcher. Field-by-field status today:
| Launch-config field | Roster-native today? | Mechanism / gap |
| ------------------------ | -------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------- |
| **harness** (`runtime`) | ✅ end-to-end | `roster.runtime` → `generateAgentEnv` emits `MOSAIC_AGENT_RUNTIME` → launcher line 44. UI just writes the field. |
| **model** (`model_hint`) | ✅ end-to-end | `roster.model_hint` → `MOSAIC_AGENT_MODEL` → launcher line 44 `--model`. UI just writes the field. |
| **yolo** | ❌ new | Launcher line 44 **hardcodes** `mosaic yolo`. A non-yolo toggle needs a roster `yolo` field → emit `MOSAIC_AGENT_YOLO` → make line 44 conditional. |
| **command / channels** | ❌ new | `MOSAIC_AGENT_COMMAND` is **consumed** (launcher line ~12) but `generateAgentEnv` does not emit it. Needs a roster `command`/`channels` field → emitted. |
**The arc:**
- **A** — `.env` `MOSAIC_AGENT_COMMAND` hatch: manual, ships now, kept safe across upgrades by #632.
- **B** — roster-native launch-config: harness + model are already there; add the **yolo** toggle
(line-44 conditional) and **command/channels** emission to complete the data model.
- **webUI** — binds dropdowns/toggles directly to those four roster fields.
PATH A's `.env` override is the **manual form** of exactly what PATH B makes roster-native and the
webUI edits — one continuous arc, not three separate features. PATH B is tracked as #636.

View File

@@ -0,0 +1,32 @@
# #631 — re-seed must preserve user fleet data (CRITICAL data-loss)
- **Issue:** #631 · **Branch:** `fix/631-reseed-preserves-fleet-data`
## Root cause
`mosaic update` auto-runs `install.sh` keep-mode sync (#610). install.sh's rsync `--delete` (keep mode)
honored PRESERVE_PATHS, but `fleet/` wasn't listed → the sync WIPED `~/.config/mosaic/fleet/roster.yaml`
(+ run/, agents/). Any user running `mosaic update` lost their roster. (overwrite mode wipes by design;
the live loss was keep mode.)
## Fix (PRIMARY)
- install.sh PRESERVE_PATHS += `fleet/*.yaml`, `fleet/agents`, `fleet/run` — the framework still SEEDS
fleet/examples + fleet/roles + fleet/roster.schema.json (synced), but user files survive.
- Made the cp-fallback (no-rsync) GLOB-AWARE so `fleet/*.yaml` preserves every user roster there too;
fixed the restore to re-glob per-pattern (so only the user file is restored, not the whole fleet/ dir).
- file-adapter.ts (TS installer): mirrored the preserve list for parity. (TS syncDirectory is copy-only,
never --delete, so it never had the bug — belt-and-suspenders + parity.)
## Fix (SECONDARY)
- `refreshActiveFleetUnits()` (update-checker.ts): the re-seed updates ~/.config/mosaic/systemd/user but
systemd runs ~/.config/systemd/user, so unit fixes (#627) didn't take effect. After the re-seed,
`mosaic update` now copies the fresh mosaic-\*.service → the active dir + daemon-reload (best-effort,
only when a fleet is already installed). Wired into the cli.ts update flow.
## Verification
- bash F6 fixture (6 checks: roster/custom-yaml/agents/run survive + examples refreshed + schema seeded);
20/20 migration matrix green. TS file-adapter test (roster/run/agents survive keep sync). 2 unit tests
for refreshActiveFleetUnits. tsc/eslint/prettier/sanitize clean.

View File

@@ -0,0 +1,54 @@
# #633 — comms-block emitter + FLEET-LAUNCH runbook
Branch: `feat/633-comms-block-runbook` (off `bf2a6745`, post-#632 merge)
Issue: #633 · Follow-up filed: #636 (PATH B)
## Goal
PATH A of the orchestrator-launch fix: give every launch path the Fleet-Comms onboarding, and
document the canonical roster-driven launcher so the orchestrator stops being a bespoke snowflake.
## Deliverables
1. **`mosaic fleet comms-block <role> [--host <h>]`** — explicit-arg, comms-block-only emitter.
- Backed by new `resolveCommsBlock(mosaicHome, role, fleetHost?)` in `fleet/comms-onboarding.ts`
returning `{ ok, output, error }`.
- Unlike `readFleetCommsBlock` (returns `''` on any miss so `composeContract` can no-op silently
during launch), the emitter **fails loud**: unknown role / missing roster → `ok:false` → CLI
prints to stderr + sets `process.exitCode = 1`. A typo is never a silent no-op.
- Distinct from `mosaic compose-contract <runtime>` (whole prompt, env-coupled via
`MOSAIC_AGENT_NAME`); comms-block is the targeted, explicit-arg, comms-only view.
2. **`docs/fleet/FLEET-LAUNCH.md`** — worker path + orchestrator `.env` fold + 3 launch gotchas +
#632 preserve note + North-Star 4-field arc.
## Key findings (drove the design)
- `mosaic yolo claude` **already** forwards `--channels`/`--permission-mode` to the binary
(`launch.ts` claude case `cliArgs.push(...args)`) AND injects the comms block via
`composeContract``readFleetCommsBlock(home, env.MOSAIC_AGENT_NAME)`. So no `launch.ts` change
was needed — PATH A is `.env` + doc only.
- `start-agent-session.sh` line ~41 `[ -z "$MOSAIC_AGENT_COMMAND" ]` short-circuits the line-44
default, so an `.env` `MOSAIC_AGENT_COMMAND` override bypasses the hardcoded `yolo` entirely — the
yolo-conditional is therefore a PATH B (default-path) concern, not PATH A.
- `generateAgentEnv` (`fleet.ts` ~202-207) emits NAME/RUNTIME/MODEL but **not** `MOSAIC_AGENT_COMMAND`
— the seam PATH B (#636) closes.
## A → B → webUI arc (North Star)
- A = `.env` `MOSAIC_AGENT_COMMAND` hatch (manual, ships now, #632-safe).
- B (#636) = roster-native launch-config: harness ✅ + model ✅ already there; add **yolo** (line-44
conditional `MOSAIC_AGENT_YOLO`) + **command/channels** (`generateAgentEnv` emission).
- webUI binds dropdowns/toggles to those four roster fields. One launcher, no new launch path.
## Results
- TDD: spec first (`comms-onboarding.spec.ts`, 6 new `resolveCommsBlock` cases) → red → implement → green.
- `fleet.spec.ts` subcommand-list assertion extended with `comms-block`.
- 177 fleet+comms tests green; typecheck clean; eslint clean; prettier clean.
## Risks / notes
- Pre-existing local-only failure `uninstall.spec.ts > removeFramework > handles missing mosaicHome
gracefully` (EACCES on `/nonexistent` as non-root) — unrelated to #633, passes in CI as root.
- Did NOT run `mosaic update` / anything auto-reseed: installed CLI still 0.0.40 (roster-wipe live
until mos-claude-0 ships 0.0.41). All work is in-repo + vitest, never touches the live mosaic home.

View File

@@ -28,6 +28,7 @@ export default tseslint.config(
'apps/web/e2e/helpers/*.ts', 'apps/web/e2e/helpers/*.ts',
'apps/web/playwright.config.ts', 'apps/web/playwright.config.ts',
'apps/gateway/vitest.config.ts', 'apps/gateway/vitest.config.ts',
'packages/db/vitest.config.ts',
'packages/storage/vitest.config.ts', 'packages/storage/vitest.config.ts',
'packages/mosaic/__tests__/*.ts', 'packages/mosaic/__tests__/*.ts',
'tools/federation-harness/*.ts', 'tools/federation-harness/*.ts',

View File

@@ -4,5 +4,22 @@ export default defineConfig({
test: { test: {
globals: true, globals: true,
environment: 'node', environment: 'node',
// The migration suite spins up a real PGlite (WASM Postgres) instance per
// test and applies the full drizzle migration set. Each case legitimately
// takes ~5s locally and considerably longer on CI, where turbo runs many
// packages' test suites concurrently. The 5s vitest default then expires
// mid-migration and the run fails as a phantom "Test timed out in 5000ms"
// (often surfacing the underlying WASM `memory access out of bounds` when
// the heap is starved). Give migrations real headroom.
testTimeout: 120_000,
hookTimeout: 120_000,
// Each PGlite instance carries a multi-hundred-MB WASM heap. Running test
// files in parallel forks multiplies that peak and is what tips the CI
// runner into the WASM OOM. A single fork keeps only one instance resident
// at a time — slightly slower, but deterministic.
pool: 'forks',
poolOptions: {
forks: { singleFork: true },
},
}, },
}); });

View File

@@ -23,7 +23,15 @@ INSTALL_MODE="${MOSAIC_INSTALL_MODE:-prompt}"
# entries (CONSTITUTION/AGENTS/STANDARDS) ARE re-applied afterward by # entries (CONSTITUTION/AGENTS/STANDARDS) ARE re-applied afterward by
# reconcile_framework_files (overwrite + backup-once); the rest stay user-owned. # reconcile_framework_files (overwrite + backup-once); the rest stay user-owned.
# User-created content in these paths survives rsync --delete. # User-created content in these paths survives rsync --delete.
PRESERVE_PATHS=("CONSTITUTION.md" "AGENTS.md" "SOUL.md" "USER.md" "TOOLS.md" "STANDARDS.md" "memory" "sources" "credentials") #
# fleet/* — the framework SEEDS only fleet/examples, fleet/roles, and
# fleet/roster.schema.json (synced normally). The user's own fleet files MUST
# survive `mosaic update` (which runs this sync automatically): the active
# roster (`fleet/roster.yaml` + any other `fleet/*.yaml`), per-agent env
# (`fleet/agents/`), and heartbeat run dir (`fleet/run/`). Without these, an
# update wipes the operator's fleet. Glob entries are honored by both the rsync
# path (`--exclude`) and the glob-aware cp fallback below.
PRESERVE_PATHS=("CONSTITUTION.md" "AGENTS.md" "SOUL.md" "USER.md" "TOOLS.md" "STANDARDS.md" "memory" "sources" "credentials" "fleet/*.yaml" "fleet/agents" "fleet/run")
# Framework-owned contract files: re-copied from defaults/ on every upgrade (the # Framework-owned contract files: re-copied from defaults/ on every upgrade (the
# user must not edit them; a divergent copy is backed up once before overwrite). # user must not edit them; a divergent copy is backed up once before overwrite).
@@ -179,15 +187,23 @@ sync_framework() {
return return
fi fi
# Fallback: cp-based sync # Fallback: cp-based sync. Glob-aware so entries like "fleet/*.yaml" preserve
# every matching user file (parity with the rsync --exclude path above).
local preserve_tmp="" local preserve_tmp=""
if [[ "$INSTALL_MODE" == "keep" ]]; then if [[ "$INSTALL_MODE" == "keep" ]]; then
preserve_tmp="$(mktemp -d "${TMPDIR:-/tmp}/mosaic-preserve-XXXXXX")" preserve_tmp="$(mktemp -d "${TMPDIR:-/tmp}/mosaic-preserve-XXXXXX")"
local match rel
for path in "${PRESERVE_PATHS[@]}"; do for path in "${PRESERVE_PATHS[@]}"; do
if [[ -e "$TARGET_DIR/$path" ]]; then # Unquoted $path lets the glob expand against TARGET_DIR; nullglob makes a
mkdir -p "$preserve_tmp/$(dirname "$path")" # non-matching pattern vanish instead of staying literal.
cp -R "$TARGET_DIR/$path" "$preserve_tmp/$path" shopt -s nullglob
fi for match in "$TARGET_DIR/"$path; do
[[ -e "$match" ]] || continue
rel="${match#"$TARGET_DIR/"}"
mkdir -p "$preserve_tmp/$(dirname "$rel")"
cp -R "$match" "$preserve_tmp/$rel"
done
shopt -u nullglob
done done
fi fi
@@ -196,12 +212,19 @@ sync_framework() {
rm -rf "$TARGET_DIR/.git" rm -rf "$TARGET_DIR/.git"
if [[ -n "$preserve_tmp" ]]; then if [[ -n "$preserve_tmp" ]]; then
# Restore by re-globbing the SAME patterns against preserve_tmp, so each
# preserved item is restored at its own relative path (e.g. only
# fleet/roster.yaml is replaced — the freshly-synced fleet/examples stays).
for path in "${PRESERVE_PATHS[@]}"; do for path in "${PRESERVE_PATHS[@]}"; do
if [[ -e "$preserve_tmp/$path" ]]; then shopt -s nullglob
rm -rf "$TARGET_DIR/$path" for match in "$preserve_tmp/"$path; do
mkdir -p "$TARGET_DIR/$(dirname "$path")" [[ -e "$match" ]] || continue
cp -R "$preserve_tmp/$path" "$TARGET_DIR/$path" rel="${match#"$preserve_tmp/"}"
fi rm -rf "$TARGET_DIR/$rel"
mkdir -p "$TARGET_DIR/$(dirname "$rel")"
cp -R "$match" "$TARGET_DIR/$rel"
done
shopt -u nullglob
done done
rm -rf "$preserve_tmp" rm -rf "$preserve_tmp"
fi fi

View File

@@ -122,6 +122,85 @@ fi
mkdir -p "$MOSAIC_AGENT_WORKDIR" mkdir -p "$MOSAIC_AGENT_WORKDIR"
# ── Pre-trust the workdir for the Claude runtime ─────────────────────────────
# Claude Code shows a one-time "Is this a project you trust?" folder-trust gate
# the first time it opens a directory. A fleet-launched agent has no human to
# answer it, so the pane stalls forever at the prompt while its heartbeat keeps
# reporting "healthy" (the pane process IS alive — it's just blocked).
#
# IMPORTANT: --dangerously-skip-permissions does NOT bypass this gate, and
# neither does `trustedProjectDirectories` in settings.json (verified empirically
# 2026-06-24). The ONLY thing the gate honors is the per-project record in
# ~/.claude.json: projects["<dir>"].hasTrustDialogAccepted == true (exactly what
# answering the prompt writes). So we pre-seed that record here.
#
# Idempotent, atomic, best-effort: any failure is non-fatal (the agent still
# launches — worst case it stalls on the gate, i.e. the pre-fix status quo).
# Only the claude runtime needs this; codex/pi have no such gate.
_ensure_claude_workdir_trusted() {
local workdir="$1"
# The path claude keys on is the resolved cwd it is launched in.
local rp
rp=$(cd "$workdir" 2>/dev/null && pwd -P) || rp="$workdir"
# ~/.claude.json lives next to the claude config dir; honor CLAUDE_CONFIG_DIR.
local claude_json="${MOSAIC_CLAUDE_JSON:-${CLAUDE_CONFIG_DIR:+$CLAUDE_CONFIG_DIR/.claude.json}}"
claude_json="${claude_json:-$HOME/.claude.json}"
if ! command -v python3 >/dev/null 2>&1; then
echo "WARNING: python3 not found; cannot pre-trust '$rp' for claude (agent may stall on the folder-trust gate)" >&2
return 1
fi
# Serialize concurrent agent launches that share ~/.claude.json (flock if available).
local lock="${claude_json}.mosaic-lock"
_seed() {
MOSAIC_CJ="$claude_json" MOSAIC_TRUST_DIR="$rp" python3 - <<'PY'
import json, os, sys, tempfile
cj = os.environ["MOSAIC_CJ"]
d = os.environ["MOSAIC_TRUST_DIR"]
try:
data = json.load(open(cj)) if os.path.exists(cj) else {}
if not isinstance(data, dict):
data = {}
except Exception:
# Never corrupt an unreadable/partial file — bail without writing.
sys.exit(2)
projects = data.setdefault("projects", {})
entry = projects.get(d)
if not isinstance(entry, dict):
entry = {}
projects[d] = entry
if entry.get("hasTrustDialogAccepted") is True:
sys.exit(0) # already trusted — nothing to do
entry["hasTrustDialogAccepted"] = True
tmp_dir = os.path.dirname(cj) or "."
fd, tmp = tempfile.mkstemp(dir=tmp_dir, prefix=".claude.json.mosaic.")
try:
with os.fdopen(fd, "w") as f:
json.dump(data, f, indent=2)
os.replace(tmp, cj) # atomic
except Exception:
try:
os.unlink(tmp)
except OSError:
pass
sys.exit(3)
PY
}
if command -v flock >/dev/null 2>&1; then
( flock 9; _seed ) 9>"$lock" 2>/dev/null || _seed
else
_seed
fi
}
case "$MOSAIC_AGENT_RUNTIME" in
claude)
_ensure_claude_workdir_trusted "$MOSAIC_AGENT_WORKDIR" \
|| echo "WARNING: could not pre-trust workdir for claude agent $AGENT_NAME" >&2
;;
esac
# ── Launch the tmux session (no exec — we continue to wire the heartbeat) ──── # ── Launch the tmux session (no exec — we continue to wire the heartbeat) ────
_tmux new-session -d -s "$AGENT_NAME" -c "$MOSAIC_AGENT_WORKDIR" \ _tmux new-session -d -s "$AGENT_NAME" -c "$MOSAIC_AGENT_WORKDIR" \
bash -c "$PANE_SHELL_SNIPPET" bash -c "$PANE_SHELL_SNIPPET"

View File

@@ -128,8 +128,8 @@ PY
merge_gitea_with_api() { merge_gitea_with_api() {
local host="$1" api_url token basic_auth body_file raw_code payload local host="$1" api_url token basic_auth body_file raw_code payload
api_url="https://${host}/api/v1/repos/${OWNER}/${REPO}/pulls/${PR_NUMBER}/merge" api_url="https://${host}/api/v1/repos/${OWNER}/${REPO}/pulls/${PR_NUMBER}/merge"
mkdir -p "${AGENT_WORK_ROOT:-/home/hermes/agent-work}" mkdir -p "${AGENT_WORK_ROOT:-${HOME:-/tmp}/mosaic/agent-work}"
body_file=$(mktemp "${AGENT_WORK_ROOT:-/home/hermes/agent-work}/pr-merge-api-response.XXXXXX") body_file=$(mktemp "${AGENT_WORK_ROOT:-${HOME:-/tmp}/mosaic/agent-work}/pr-merge-api-response.XXXXXX")
payload='{"Do":"squash"}' payload='{"Do":"squash"}'
token=$(get_gitea_token "$host" || true) token=$(get_gitea_token "$host" || true)
@@ -214,8 +214,8 @@ case "$PLATFORM" in
TEA_LOGIN="$(get_gitea_login_for_host "$HOST" || true)" TEA_LOGIN="$(get_gitea_login_for_host "$HOST" || true)"
if [[ -n "$TEA_LOGIN" ]]; then if [[ -n "$TEA_LOGIN" ]]; then
mkdir -p "${AGENT_WORK_ROOT:-/home/hermes/agent-work}" mkdir -p "${AGENT_WORK_ROOT:-${HOME:-/tmp}/mosaic/agent-work}"
TEA_ERROR_FILE=$(mktemp "${AGENT_WORK_ROOT:-/home/hermes/agent-work}/pr-merge-tea-error.XXXXXX") TEA_ERROR_FILE=$(mktemp "${AGENT_WORK_ROOT:-${HOME:-/tmp}/mosaic/agent-work}/pr-merge-tea-error.XXXXXX")
if tea pr merge "$PR_NUMBER" --style squash --repo "$OWNER/$REPO" --login "$TEA_LOGIN" 2> "$TEA_ERROR_FILE"; then if tea pr merge "$PR_NUMBER" --style squash --repo "$OWNER/$REPO" --login "$TEA_LOGIN" 2> "$TEA_ERROR_FILE"; then
rm -f "$TEA_ERROR_FILE" rm -f "$TEA_ERROR_FILE"
elif is_known_tea_empty_identity_failure "$TEA_ERROR_FILE"; then elif is_known_tea_empty_identity_failure "$TEA_ERROR_FILE"; then

View File

@@ -4,7 +4,7 @@
set -euo pipefail set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
WORK_ROOT="${AGENT_WORK_ROOT:-/home/hermes/agent-work}" WORK_ROOT="${AGENT_WORK_ROOT:-${HOME:-/tmp}/mosaic/agent-work}"
SANDBOX="$WORK_ROOT/pr-merge-empty-uid-test-$$" SANDBOX="$WORK_ROOT/pr-merge-empty-uid-test-$$"
MOCK_BIN="$SANDBOX/bin" MOCK_BIN="$SANDBOX/bin"
REPO_DIR="$SANDBOX/repo" REPO_DIR="$SANDBOX/repo"

View File

@@ -61,7 +61,25 @@ MOSAIC_HOME="$T5" MOSAIC_INSTALL_MODE=bogus MOSAIC_SYNC_ONLY=1 bash "$INSTALL" >
chk "F5 failure: invalid mode rejected (nonzero exit)" "[ $rc -ne 0 ]" chk "F5 failure: invalid mode rejected (nonzero exit)" "[ $rc -ne 0 ]"
chk "F5 failure: SOUL + credentials intact" "grep -q orig '$T5/SOUL.md' && grep -q keepme '$T5/credentials/c.json'" chk "F5 failure: SOUL + credentials intact" "grep -q orig '$T5/SOUL.md' && grep -q keepme '$T5/credentials/c.json'"
rm -rf "$T1" "$T2" "$T3" "$T4" "$T5" # F6 — keep-mode re-seed (the `mosaic update` path) MUST NOT wipe user fleet data.
# Regression for the roster-loss bug: fleet/ was not in PRESERVE_PATHS.
T6=$(mktemp -d); mkdir -p "$T6/fleet/examples" "$T6/fleet/run" "$T6/fleet/agents"
printf '# persona\n' > "$T6/SOUL.md" # makes it a recognized existing install (→ keep mode)
printf 'version: 1\nagents:\n - name: coder0\n' > "$T6/fleet/roster.yaml"
printf 'version: 1\nagents:\n - name: custom\n' > "$T6/fleet/my-fleet.yaml"
printf 'ts=x\n' > "$T6/fleet/run/coder0.hb"
printf 'MOSAIC_AGENT_NAME=coder0\n' > "$T6/fleet/agents/coder0.env"
printf '# stale preset\n' > "$T6/fleet/examples/general.yaml"
echo 3 > "$T6/.framework-version"
run "$T6" keep
chk "F6 reseed: user roster.yaml SURVIVES keep-mode sync" "grep -q coder0 '$T6/fleet/roster.yaml'"
chk "F6 reseed: other user fleet/*.yaml survives (glob)" "[ -f '$T6/fleet/my-fleet.yaml' ]"
chk "F6 reseed: per-agent env (fleet/agents) survives" "[ -f '$T6/fleet/agents/coder0.env' ]"
chk "F6 reseed: heartbeat run dir (fleet/run) survives" "[ -f '$T6/fleet/run/coder0.hb' ]"
chk "F6 reseed: framework examples ARE refreshed (not preserved stale)" "grep -q orchestrator '$T6/fleet/examples/general.yaml'"
chk "F6 reseed: framework roster.schema.json seeded" "[ -f '$T6/fleet/roster.schema.json' ]"
rm -rf "$T1" "$T2" "$T3" "$T4" "$T5" "$T6"
echo echo
echo "RESULT: $pass passed, $fail failed" echo "RESULT: $pass passed, $fail failed"
[ "$fail" -eq 0 ] [ "$fail" -eq 0 ]

View File

@@ -2,12 +2,20 @@
when: when:
- event: [push, pull_request, manual] - event: [push, pull_request, manual]
# Dependencies are installed ONCE in the `install` step and every downstream
# step depends on it, reusing the populated node_modules from the shared
# workspace volume. Do NOT re-run `npm ci` per step — that pays the full cold
# install (network fetch + native rebuilds) N times and is the dominant cost
# in a pipeline.
#
# For best results, replace `&node_image` with a pre-baked CI base image that
# ships your toolchain (python3/make/g++ for native modules) and a warm npm
# cache, then keep `--prefer-offline` so installs resolve from the cache. See
# the Mosaic Stack repo's Dockerfile.ci + .woodpecker/ci-image.yml for the
# baked-image pattern.
variables: variables:
- &node_image 'node:20-alpine' - &node_image 'node:20-alpine'
- &gitleaks_image 'ghcr.io/gitleaks/gitleaks:v8.24.0' - &gitleaks_image 'ghcr.io/gitleaks/gitleaks:v8.24.0'
- &install_deps |
corepack enable
npm ci --ignore-scripts
steps: steps:
# Secret scanning (runs in parallel with install, no deps) # Secret scanning (runs in parallel with install, no deps)
@@ -17,15 +25,18 @@ steps:
- gitleaks git --redact --verbose --log-opts="HEAD~1..HEAD" - gitleaks git --redact --verbose --log-opts="HEAD~1..HEAD"
depends_on: [] depends_on: []
# Single cached install. Every other step depends on this and reuses the
# node_modules it produces in the shared workspace.
install: install:
image: *node_image image: *node_image
commands: commands:
- *install_deps - corepack enable
- npm ci --ignore-scripts --prefer-offline
depends_on: []
security-audit: security-audit:
image: *node_image image: *node_image
commands: commands:
- *install_deps
- npm audit --audit-level=high - npm audit --audit-level=high
depends_on: depends_on:
- install - install
@@ -35,7 +46,6 @@ steps:
environment: environment:
SKIP_ENV_VALIDATION: 'true' SKIP_ENV_VALIDATION: 'true'
commands: commands:
- *install_deps
- npm run lint - npm run lint
depends_on: depends_on:
- install - install
@@ -45,7 +55,6 @@ steps:
environment: environment:
SKIP_ENV_VALIDATION: 'true' SKIP_ENV_VALIDATION: 'true'
commands: commands:
- *install_deps
- npm run type-check - npm run type-check
depends_on: depends_on:
- install - install
@@ -55,7 +64,6 @@ steps:
environment: environment:
SKIP_ENV_VALIDATION: 'true' SKIP_ENV_VALIDATION: 'true'
commands: commands:
- *install_deps
- npm run test -- --coverage --coverageThreshold='{"global":{"branches":80,"functions":80,"lines":80,"statements":80}}' - npm run test -- --coverage --coverageThreshold='{"global":{"branches":80,"functions":80,"lines":80,"statements":80}}'
depends_on: depends_on:
- install - install
@@ -66,7 +74,6 @@ steps:
SKIP_ENV_VALIDATION: 'true' SKIP_ENV_VALIDATION: 'true'
NODE_ENV: 'production' NODE_ENV: 'production'
commands: commands:
- *install_deps
- npm run build - npm run build
depends_on: depends_on:
- lint - lint

View File

@@ -12,6 +12,10 @@
# ambiguity about lanes or origin. Recipients replying should FLIP the # ambiguity about lanes or origin. Recipients replying should FLIP the
# preamble: [<dst> -> <src>] ... (this tool sends; it does not auto-reply). # preamble: [<dst> -> <src>] ... (this tool sends; it does not auto-reply).
# #
# Optionally tags the message with a TRIAGE CLASS (see -C / --class) so a
# comms daemon can route it (deliver-to-agent vs log-and-drop) from an exact
# field instead of re-deriving intent from the body.
#
# WHY A WRAPPER # WHY A WRAPPER
# Reliable submission into an interactive REPL (Claude Code / Codex) is fiddly: # Reliable submission into an interactive REPL (Claude Code / Codex) is fiddly:
# a trailing Enter is often swallowed and the message sits as an unsubmitted # a trailing Enter is often swallowed and the message sits as an unsubmitted
@@ -26,6 +30,7 @@
# agent-send.sh [-L socket] -s <dst_session> -m "message" # local target # agent-send.sh [-L socket] -s <dst_session> -m "message" # local target
# agent-send.sh [-L socket] -H user@host -s <dst_session> -m "message" # remote target # agent-send.sh [-L socket] -H user@host -s <dst_session> -m "message" # remote target
# agent-send.sh [-L socket] -H user@host -n <dst_hostname> -s <sess> -f msg.txt # agent-send.sh [-L socket] -H user@host -n <dst_hostname> -s <sess> -f msg.txt
# agent-send.sh -s mos-claude --class terminal-log -m "ACK — received"
# echo "msg" | agent-send.sh [-L socket] -H user@host -s <dst_session> # echo "msg" | agent-send.sh [-L socket] -H user@host -s <dst_session>
# #
# OPTIONS # OPTIONS
@@ -36,27 +41,61 @@
# Default: local hostname, or (remote) resolved via one ssh. # Default: local hostname, or (remote) resolved via one ssh.
# -m MESSAGE message text (single- or multi-line) # -m MESSAGE message text (single- or multi-line)
# -f FILE read message from FILE instead of -m # -f FILE read message from FILE instead of -m
# -C CLASS triage class for a comms daemon. One of:
# terminal-log log-only; never needs the agent's attention
# actionable carries a decision/blocker/gate — deliver
# human from a human operator — deliver
# reaction an emoji/ack reaction
# Long form: --class CLASS (or --class=CLASS). When SET, the
# preamble carries a ` class=<CLASS>` token INSIDE the bracket:
# [<src> -> <dst> class=terminal-log] <message>
# When OMITTED, NO token is emitted and the preamble is
# byte-for-byte identical to the classic format. Consumers MUST
# treat an absent class as 'actionable' (fail-safe: agent sees it).
# -S SRC_LABEL override source label "<host>:<session>" (default: auto) # -S SRC_LABEL override source label "<host>:<session>" (default: auto)
# -r N Enter-flush attempts passed through (default 2) # -r N Enter-flush attempts passed through (default 2)
# -v verbose: print pane tail after delivery # -v verbose: print pane tail after delivery
# -h help # -h help
# #
# PREAMBLE GRAMMAR (for consumers / daemons mirroring this producer)
# ^\[(\S+) -> (\S+?)(?: class=(terminal-log|actionable|human|reaction))?\] (.*)$
# group 1 = src label group 2 = dst host:session
# group 3 = class (absent => actionable) group 4 = message body
#
# EXIT CODES (passed through from send-message.sh) # EXIT CODES (passed through from send-message.sh)
# 0 delivered/queued · 1 target not found · 2 still draft · 3 usage error # 0 delivered/queued · 1 target not found · 2 still draft · 3 usage error
set -uo pipefail set -uo pipefail
SELF_DIR=$(cd -- "$(dirname -- "$0")" && pwd) SELF_DIR=$(cd -- "$(dirname -- "$0")" && pwd)
SENDER="$SELF_DIR/send-message.sh" # Sender is overridable via env purely for testing (inject a capture stub). The
# default is the canonical send-message.sh beside this script; production callers
# never set AGENT_SEND_SENDER, so behavior is unchanged.
SENDER="${AGENT_SEND_SENDER:-$SELF_DIR/send-message.sh}"
# Translate the long option --class[=value] into "-C value" so getopts (which is
# short-option-only) can parse it. Every other argument passes through untouched,
# so callers that never use --class hit the exact original getopts path.
args=()
while [ $# -gt 0 ]; do
case "$1" in
--class) [ $# -ge 2 ] || { echo "ERROR: --class requires a value" >&2; exit 3; }
args+=(-C "$2"); shift 2 ;;
--class=*) args+=(-C "${1#*=}"); shift ;;
*) args+=("$1"); shift ;;
esac
done
set -- ${args[@]+"${args[@]}"}
DST_SESSION=""; SSH_TARGET=""; DST_HOST=""; MSG=""; FILE=""; SOCKET_NAME="" DST_SESSION=""; SSH_TARGET=""; DST_HOST=""; MSG=""; FILE=""; SOCKET_NAME=""
SRC_LABEL=""; RETRIES=2; VERBOSE=0 SRC_LABEL=""; RETRIES=2; VERBOSE=0; CLASS=""
usage() { sed -n '2,44p' "$0"; exit "${1:-3}"; } usage() { sed -n '2,/^set -uo pipefail/{/^set -uo pipefail/d;p}' "$0"; exit "${1:-3}"; }
while getopts "L:s:H:n:m:f:S:r:vh" o; do while getopts "L:s:H:n:m:f:S:r:C:vh" o; do
case "$o" in case "$o" in
L) SOCKET_NAME=$OPTARG ;; L) SOCKET_NAME=$OPTARG ;;
s) DST_SESSION=$OPTARG ;; H) SSH_TARGET=$OPTARG ;; n) DST_HOST=$OPTARG ;; s) DST_SESSION=$OPTARG ;; H) SSH_TARGET=$OPTARG ;; n) DST_HOST=$OPTARG ;;
m) MSG=$OPTARG ;; f) FILE=$OPTARG ;; S) SRC_LABEL=$OPTARG ;; m) MSG=$OPTARG ;; f) FILE=$OPTARG ;; S) SRC_LABEL=$OPTARG ;;
C) CLASS=$OPTARG ;;
r) RETRIES=$OPTARG ;; v) VERBOSE=1 ;; h) usage 0 ;; *) usage 3 ;; r) RETRIES=$OPTARG ;; v) VERBOSE=1 ;; h) usage 0 ;; *) usage 3 ;;
esac esac
done done
@@ -64,6 +103,17 @@ done
[ -n "$DST_SESSION" ] || { echo "ERROR: -s DST_SESSION is required" >&2; usage 3; } [ -n "$DST_SESSION" ] || { echo "ERROR: -s DST_SESSION is required" >&2; usage 3; }
[ -x "$SENDER" ] || { echo "ERROR: send-message.sh not found beside this script" >&2; exit 3; } [ -x "$SENDER" ] || { echo "ERROR: send-message.sh not found beside this script" >&2; exit 3; }
# Validate the triage class only when one was given. An absent class emits NO
# token (preamble byte-identical to the classic format); the consumer defaults
# absent => actionable.
CLASS_TOKEN=""
if [ -n "$CLASS" ]; then
case "$CLASS" in
terminal-log|actionable|human|reaction) CLASS_TOKEN=" class=${CLASS}" ;;
*) echo "ERROR: invalid --class '$CLASS' (allowed: terminal-log, actionable, human, reaction)" >&2; exit 3 ;;
esac
fi
# Message body from -f / -m / stdin. # Message body from -f / -m / stdin.
if [ -n "$FILE" ]; then [ -r "$FILE" ] || { echo "ERROR: cannot read $FILE" >&2; exit 3; }; MSG=$(cat -- "$FILE") if [ -n "$FILE" ]; then [ -r "$FILE" ] || { echo "ERROR: cannot read $FILE" >&2; exit 3; }; MSG=$(cat -- "$FILE")
elif [ -z "$MSG" ] && [ ! -t 0 ]; then MSG=$(cat) elif [ -z "$MSG" ] && [ ! -t 0 ]; then MSG=$(cat)
@@ -90,7 +140,7 @@ if [ -z "$DST_HOST" ]; then
fi fi
fi fi
PREAMBLE="[${SRC_LABEL} -> ${DST_HOST}:${DST_SESSION}]" PREAMBLE="[${SRC_LABEL} -> ${DST_HOST}:${DST_SESSION}${CLASS_TOKEN}]"
FULL="${PREAMBLE} ${MSG}" FULL="${PREAMBLE} ${MSG}"
B64=$(printf '%s' "$FULL" | base64 -w0) B64=$(printf '%s' "$FULL" | base64 -w0)

View File

@@ -0,0 +1,97 @@
#!/usr/bin/env bash
# agent-send.test.sh — regression + grammar lock for agent-send.sh --class.
#
# Strategy: inject a capture stub via AGENT_SEND_SENDER that decodes the -b
# base64 payload and prints the FULL message (preamble + body) so we can assert
# the exact bytes on the wire. Local path only (no ssh), -n pins the dst host so
# the preamble is deterministic across machines.
#
# Guarantees locked here:
# 1. REGRESSION BAR — no --class => preamble byte-for-byte identical to classic.
# 2. --class <c> => ` class=<c>` token emitted inside the bracket.
# 3. --class=<c> (equals form) parses identically to the space form.
# 4. -C <c> short form parses identically.
# 5. invalid class => exit 3, nothing sent.
# 6. --class with no value => exit 3.
# 7. the documented consumer regex parses producer output for every class.
set -uo pipefail
HERE=$(cd -- "$(dirname -- "$0")" && pwd)
TOOL="$HERE/agent-send.sh"
# Capture stub: stands in for send-message.sh. Decodes -b and prints the payload.
STUB=$(mktemp)
trap 'rm -f "$STUB"' EXIT
cat >"$STUB" <<'STUB_EOF'
#!/usr/bin/env bash
set -uo pipefail
b64=""
while getopts "t:b:r:v" o; do case "$o" in b) b64=$OPTARG ;; *) : ;; esac; done
printf '%s' "$b64" | base64 -d
STUB_EOF
chmod +x "$STUB"
PASS=0; FAIL=0
ok() { PASS=$((PASS+1)); printf 'ok %s\n' "$1"; }
no() { FAIL=$((FAIL+1)); printf 'FAIL %s\n %s\n' "$1" "$2"; }
# Run the tool with the stub injected; echoes captured payload on stdout.
run() { AGENT_SEND_SENDER="$STUB" bash "$TOOL" -S a:src -n dsthost "$@"; }
# Documented consumer grammar — the daemon will mirror exactly this.
GRAMMAR='^\[(\S+) -> (\S+) class=(terminal-log|actionable|human|reaction)\] (.*)$'
GRAMMAR_NOCLASS='^\[(\S+) -> (\S+)\] (.*)$'
# 1. REGRESSION BAR: classic preamble, byte-for-byte.
got=$(run -s mos -m "hello world")
want='[a:src -> dsthost:mos] hello world'
[ "$got" = "$want" ] && ok "regression: no --class is byte-identical" \
|| no "regression: no --class is byte-identical" "got=[$got] want=[$want]"
# 2. --class space form emits the token.
got=$(run -s mos --class terminal-log -m "ACK")
want='[a:src -> dsthost:mos class=terminal-log] ACK'
[ "$got" = "$want" ] && ok "--class terminal-log emits token" \
|| no "--class terminal-log emits token" "got=[$got] want=[$want]"
# 3. --class=value equals form.
got=$(run -s mos --class=actionable -m "decide X")
want='[a:src -> dsthost:mos class=actionable] decide X'
[ "$got" = "$want" ] && ok "--class=actionable (equals form)" \
|| no "--class=actionable (equals form)" "got=[$got] want=[$want]"
# 4. -C short form.
got=$(run -s mos -C human -m "from a person")
want='[a:src -> dsthost:mos class=human] from a person'
[ "$got" = "$want" ] && ok "-C human (short form)" \
|| no "-C human (short form)" "got=[$got] want=[$want]"
# 5. invalid class => exit 3, no send.
if out=$(run -s mos --class bogus -m "x" 2>/dev/null); then
no "invalid class rejected" "expected non-zero exit, got 0 (out=[$out])"
else
rc=$?
[ "$rc" = 3 ] && [ -z "$out" ] && ok "invalid class => exit 3, nothing sent" \
|| no "invalid class => exit 3, nothing sent" "rc=$rc out=[$out]"
fi
# 6. --class with no value => exit 3.
if run -s mos -m "x" --class 2>/dev/null; then
no "--class with no value rejected" "expected non-zero exit, got 0"
else
[ "$?" = 3 ] && ok "--class with no value => exit 3" || no "--class with no value => exit 3" "wrong rc"
fi
# 7. consumer grammar parses every class + classic line.
for c in terminal-log actionable human reaction; do
line=$(run -s mos --class "$c" -m "body $c")
[[ "$line" =~ $GRAMMAR ]] && [ "${BASH_REMATCH[3]}" = "$c" ] && [ "${BASH_REMATCH[4]}" = "body $c" ] \
&& ok "grammar parses class=$c" || no "grammar parses class=$c" "line=[$line]"
done
classic=$(run -s mos -m "plain body")
[[ "$classic" =~ $GRAMMAR_NOCLASS ]] && [ "${BASH_REMATCH[3]}" = "plain body" ] \
&& ok "grammar (no-class) parses classic line" || no "grammar (no-class) parses classic line" "line=[$classic]"
echo "---"
echo "PASS=$PASS FAIL=$FAIL"
[ "$FAIL" -eq 0 ]

View File

@@ -1,6 +1,6 @@
{ {
"name": "@mosaicstack/mosaic", "name": "@mosaicstack/mosaic",
"version": "0.0.40", "version": "0.0.41",
"repository": { "repository": {
"type": "git", "type": "git",
"url": "https://git.mosaicstack.dev/mosaicstack/stack.git", "url": "https://git.mosaicstack.dev/mosaicstack/stack.git",

View File

@@ -27,8 +27,10 @@ import {
formatAllPackagesTable, formatAllPackagesTable,
getInstallAllCommand, getInstallAllCommand,
runFrameworkReseed, runFrameworkReseed,
refreshActiveFleetUnits,
readRosterAgentNames, readRosterAgentNames,
buildRelaunchCommands, buildRelaunchCommands,
checkFrameworkDrift,
FRAMEWORK_RESEED_PACKAGE, FRAMEWORK_RESEED_PACKAGE,
} from './runtime/update-checker.js'; } from './runtime/update-checker.js';
import { runWizard } from './wizard.js'; import { runWizard } from './wizard.js';
@@ -417,6 +419,48 @@ program
// checkForAllUpdates imported statically above // checkForAllUpdates imported statically above
const { execSync } = await import('node:child_process'); const { execSync } = await import('node:child_process');
// Re-seed the framework from the freshly-installed package, propagate shipped
// systemd unit fixes to the active units, and (opt-in) relaunch durable
// agents. Shared by the "packages updated" and the "framework drift" paths.
const reseedFramework = (reason: string): void => {
console.log(reason);
const reseed = runFrameworkReseed();
if (!reseed.ok) {
console.error(
`\n⚠ Framework re-seed skipped: ${reseed.reason ?? 'unknown'}.\n` +
' Activate manually: bash "$(npm root -g)/@mosaicstack/mosaic/framework/install.sh" ' +
'(MOSAIC_SYNC_ONLY=1 MOSAIC_INSTALL_MODE=keep)',
);
return;
}
console.log('✔ Framework re-seeded.');
// Propagate shipped systemd unit fixes to the ACTIVE units (re-seed only
// touches ~/.config/mosaic/systemd/user; systemd runs ~/.config/systemd/user).
const units = refreshActiveFleetUnits();
if (units.refreshed.length > 0) {
console.log(`✔ Refreshed ${units.refreshed.length} active systemd unit(s).`);
}
const agents = readRosterAgentNames();
if (agents.length === 0) return;
if (opts.relaunch) {
console.log(`\nRelaunching ${agents.length} fleet agent(s) to pick up the new runtime…`);
for (const restart of buildRelaunchCommands(agents)) {
try {
execSync(restart.join(' '), { stdio: 'inherit', timeout: 30_000 });
} catch {
console.error(` ⚠ failed to restart agent — run: ${restart.join(' ')}`);
}
}
console.log('✔ Agents relaunched.');
} else {
console.log(
`\n ${agents.length} fleet agent(s) are still running the previous runtime. ` +
'Restart them to activate the update:\n mosaic update --relaunch ' +
'(or: mosaic fleet restart <agent>)',
);
}
};
console.log('Checking for updates…'); console.log('Checking for updates…');
const results = checkForAllUpdates({ skipCache: true }); const results = checkForAllUpdates({ skipCache: true });
@@ -431,6 +475,18 @@ program
process.exit(1); process.exit(1);
} }
console.log('\n✔ All packages up to date.'); console.log('\n✔ All packages up to date.');
// #642: the CLI may have been upgraded outside `mosaic update` (e.g. a
// direct `npm i -g`), leaving the framework files stale even though no
// package is reported outdated. Detect that via the framework version and
// re-seed so shipped launcher/runtime fixes still activate.
const drift = checkFrameworkDrift();
if (drift.drifted && opts.reseed !== false) {
reseedFramework(
`\nFramework drift detected (on-disk v${drift.installed} < bundled v${drift.bundled}) — ` +
'the CLI was updated outside `mosaic update`. Re-seeding framework files into ' +
'~/.config/mosaic (data-safe; keeps your edits)…',
);
}
return; return;
} }
@@ -455,46 +511,17 @@ program
// F3-m3 / R13: the CLI is updated, but the framework files in // F3-m3 / R13: the CLI is updated, but the framework files in
// ~/.config/mosaic/ are still the previous version. Re-seed them from the // ~/.config/mosaic/ are still the previous version. Re-seed them from the
// freshly-installed package so shipped launcher/runtime changes ACTIVATE. // freshly-installed package so shipped launcher/runtime changes ACTIVATE.
// Only when the framework-bearing package itself updated. // Re-seed when the framework-bearing package itself updated OR the on-disk
// framework is older than the freshly-installed one (#642 — e.g. only
// sibling packages were outdated but the CLI was already ahead).
const mosaicUpdated = outdated.some( const mosaicUpdated = outdated.some(
(r: { package: string }) => r.package === FRAMEWORK_RESEED_PACKAGE, (r: { package: string }) => r.package === FRAMEWORK_RESEED_PACKAGE,
); );
if (mosaicUpdated && opts.reseed !== false) { const drift = checkFrameworkDrift();
console.log( if ((mosaicUpdated || drift.drifted) && opts.reseed !== false) {
reseedFramework(
'\nRe-seeding framework files into ~/.config/mosaic (data-safe; keeps your edits)…', '\nRe-seeding framework files into ~/.config/mosaic (data-safe; keeps your edits)…',
); );
const reseed = runFrameworkReseed();
if (reseed.ok) {
console.log('✔ Framework re-seeded.');
const agents = readRosterAgentNames();
if (agents.length > 0) {
if (opts.relaunch) {
console.log(
`\nRelaunching ${agents.length} fleet agent(s) to pick up the new runtime…`,
);
for (const restart of buildRelaunchCommands(agents)) {
try {
execSync(restart.join(' '), { stdio: 'inherit', timeout: 30_000 });
} catch {
console.error(` ⚠ failed to restart agent — run: ${restart.join(' ')}`);
}
}
console.log('✔ Agents relaunched.');
} else {
console.log(
`\n ${agents.length} fleet agent(s) are still running the previous runtime. ` +
'Restart them to activate the update:\n mosaic update --relaunch ' +
'(or: mosaic fleet restart <agent>)',
);
}
}
} else {
console.error(
`\n⚠ Framework re-seed skipped: ${reseed.reason ?? 'unknown'}.\n` +
' Activate manually: bash "$(npm root -g)/@mosaicstack/mosaic/framework/install.sh" ' +
'(MOSAIC_SYNC_ONLY=1 MOSAIC_INSTALL_MODE=keep)',
);
}
} }
}); });

View File

@@ -95,6 +95,7 @@ describe('registerFleetCommand', () => {
expect(agent).toBeDefined(); expect(agent).toBeDefined();
expect(agent!.options.map((option) => option.long)).toContain('--list'); expect(agent!.options.map((option) => option.long)).toContain('--list');
expect(agent!.commands.map((command) => command.name()).sort()).toEqual([ expect(agent!.commands.map((command) => command.name()).sort()).toEqual([
'comms-block',
'reset', 'reset',
'roster', 'roster',
'send', 'send',

View File

@@ -7,6 +7,7 @@ import { spawn } from 'node:child_process';
import * as readline from 'node:readline'; import * as readline from 'node:readline';
import type { Command } from 'commander'; import type { Command } from 'commander';
import YAML from 'yaml'; import YAML from 'yaml';
import { resolveCommsBlock } from '../fleet/comms-onboarding.js';
/** /**
* A function that spawns a command with inherited stdio (TTY passthrough). * A function that spawns a command with inherited stdio (TTY passthrough).
@@ -1359,6 +1360,23 @@ export function registerFleetAgentCommands(
} }
}); });
agentCommand
.command('comms-block <role>')
.description(
"Print the Fleet Comms cheat-sheet for a roster role (preview a peer's peer-reach view)",
)
.option('--host <host>', 'Override the fleet host (preview a cross-host peer view)')
.action((role: string, opts: { host?: string }) => {
const mosaicHome = resolveMosaicHomeFromCommand(agentCommand, deps.mosaicHome);
const res = resolveCommsBlock(mosaicHome, role, opts.host);
if (!res.ok) {
console.error(`[mosaic] comms-block: ${res.error}`);
process.exitCode = 1;
return;
}
console.log(res.output);
});
agentCommand agentCommand
.command('status [agent]') .command('status [agent]')
.description('Show tmux status for the local fleet or one agent') .description('Show tmux status for the local fleet or one agent')

View File

@@ -153,6 +153,30 @@ describe('FileConfigAdapter.syncFramework — defaults seeding', () => {
expect(readFileSync(join(fixture.mosaicHome, 'AGENTS.md'), 'utf-8')).toBe('# AGENTS default\n'); expect(readFileSync(join(fixture.mosaicHome, 'AGENTS.md'), 'utf-8')).toBe('# AGENTS default\n');
}); });
it('preserves user fleet data (roster.yaml, agents/, run/) through a keep-mode sync', async () => {
// Regression for the roster-loss bug (#631): user-authored fleet files must
// survive the framework re-seed that `mosaic update` runs.
mkdirSync(join(fixture.mosaicHome, 'fleet', 'run'), { recursive: true });
mkdirSync(join(fixture.mosaicHome, 'fleet', 'agents'), { recursive: true });
writeFileSync(join(fixture.mosaicHome, 'fleet', 'roster.yaml'), 'version: 1\nMINE\n');
writeFileSync(join(fixture.mosaicHome, 'fleet', 'run', 'a.hb'), 'ts=x\n');
writeFileSync(join(fixture.mosaicHome, 'fleet', 'agents', 'a.env'), 'X=1\n');
// The framework ships fleet/examples — it should still seed/refresh.
mkdirSync(join(fixture.sourceDir, 'fleet', 'examples'), { recursive: true });
writeFileSync(join(fixture.sourceDir, 'fleet', 'examples', 'general.yaml'), '# preset\n');
const adapter = new FileConfigAdapter(fixture.mosaicHome, fixture.sourceDir);
await adapter.syncFramework('keep');
expect(readFileSync(join(fixture.mosaicHome, 'fleet', 'roster.yaml'), 'utf-8')).toBe(
'version: 1\nMINE\n',
);
expect(existsSync(join(fixture.mosaicHome, 'fleet', 'run', 'a.hb'))).toBe(true);
expect(existsSync(join(fixture.mosaicHome, 'fleet', 'agents', 'a.env'))).toBe(true);
// framework-owned fleet/examples is seeded
expect(existsSync(join(fixture.mosaicHome, 'fleet', 'examples', 'general.yaml'))).toBe(true);
});
it('is a no-op for seeding when defaults/ dir does not exist', async () => { it('is a no-op for seeding when defaults/ dir does not exist', async () => {
rmSync(fixture.defaultsDir, { recursive: true }); rmSync(fixture.defaultsDir, { recursive: true });

View File

@@ -173,6 +173,13 @@ export class FileConfigAdapter implements ConfigService {
'memory', 'memory',
'sources', 'sources',
'credentials', 'credentials',
// User-authored fleet data MUST survive `mosaic update`'s re-seed.
// The framework seeds only fleet/examples + fleet/roles +
// fleet/roster.schema.json; the operator's roster, per-agent env, and
// heartbeat run dir stay user-owned. (Mirror of install.sh PRESERVE_PATHS.)
'fleet/*.yaml',
'fleet/agents',
'fleet/run',
] ]
: []; : [];

View File

@@ -7,6 +7,7 @@ import {
buildFleetCommsBlock, buildFleetCommsBlock,
renderPeerReach, renderPeerReach,
readFleetCommsBlock, readFleetCommsBlock,
resolveCommsBlock,
type CommsPeer, type CommsPeer,
} from './comms-onboarding.js'; } from './comms-onboarding.js';
@@ -185,3 +186,53 @@ describe('readFleetCommsBlock — situational (the context a spawned agent gets)
expect(readFleetCommsBlock(mkdtempSync(join(tmpdir(), 'noroster-')), 'orchestrator')).toBe(''); expect(readFleetCommsBlock(mkdtempSync(join(tmpdir(), 'noroster-')), 'orchestrator')).toBe('');
}); });
}); });
describe('resolveCommsBlock — `mosaic fleet comms-block <role>` emitter semantics', () => {
// The emitter wraps readFleetCommsBlock but must NEVER print an empty string silently:
// an unknown role / missing roster has to fail loud (caller maps !ok → stderr + exit 1)
// so `mosaic fleet comms-block bogus` is a visible error, not a confusing no-op. The
// success path returns the block verbatim for `mosaic fleet comms-block <peer>` previews.
let home: string;
beforeEach(() => {
home = mkdtempSync(join(tmpdir(), 'mosaic-commsblk-'));
mkdirSync(join(home, 'fleet'), { recursive: true });
writeFileSync(join(home, 'fleet', 'roster.yaml'), ROSTER);
});
afterEach(() => rmSync(home, { recursive: true, force: true }));
it('returns ok + the cheat-sheet for a roster member', () => {
const res = resolveCommsBlock(home, 'orchestrator', 'w-jarvis');
expect(res.ok).toBe(true);
expect(res.output).toContain('# Fleet Comms');
expect(res.output).toContain('| enhancer |');
expect(res.error).toBeUndefined();
});
it('fails loud (not ok + error naming the role) for a non-member — never silently empty', () => {
const res = resolveCommsBlock(home, 'stranger', 'w-jarvis');
expect(res.ok).toBe(false);
expect(res.output).toBe('');
expect(res.error).toContain('stranger');
});
it('fails loud when no roster exists at the mosaic home', () => {
const noRoster = mkdtempSync(join(tmpdir(), 'mosaic-noroster-'));
const res = resolveCommsBlock(noRoster, 'orchestrator', 'w-jarvis');
expect(res.ok).toBe(false);
expect(res.error).toBeTruthy();
rmSync(noRoster, { recursive: true, force: true });
});
it('fails loud for a missing role argument', () => {
const res = resolveCommsBlock(home, undefined, 'w-jarvis');
expect(res.ok).toBe(false);
expect(res.error).toBeTruthy();
});
it('honors a host override so a peer can preview its own cross-host view', () => {
// coder0-0 viewing with its own host → its self-identity line uses that host.
const res = resolveCommsBlock(home, 'coder0-0', '10.1.10.37');
expect(res.ok).toBe(true);
expect(res.output).toContain('`[10.1.10.37:coder0-0]`');
});
});

View File

@@ -179,5 +179,48 @@ export function readFleetCommsBlock(
}); });
} }
/** Result of resolving a comms-block emit request — see `mosaic fleet comms-block`. */
export interface CommsBlockResult {
/** True when a cheat-sheet was produced; false maps to stderr + non-zero exit. */
ok: boolean;
/** The Fleet-Comms cheat-sheet (empty unless ok). */
output: string;
/** Operator-facing reason when !ok. */
error?: string;
}
/**
* Resolve the Fleet-Comms cheat-sheet for an explicit <role>, backing the
* `mosaic fleet comms-block <role>` command. Unlike readFleetCommsBlock — which
* returns '' on any miss so composeContract can no-op silently during a launch —
* this NEVER silently emits empty: an unknown role or missing roster yields
* ok:false + an operator-facing reason, so the CLI surfaces it (stderr + exit 1)
* rather than printing nothing. That makes it safe to preview any peer's view,
* e.g. `mosaic fleet comms-block coder0-0`.
*/
export function resolveCommsBlock(
mosaicHome: string,
role: string | undefined,
fleetHost?: string,
): CommsBlockResult {
if (!role) {
return { ok: false, output: '', error: 'comms-block requires a <role> argument' };
}
const block = fleetHost
? readFleetCommsBlock(mosaicHome, role, fleetHost)
: readFleetCommsBlock(mosaicHome, role);
if (!block) {
const rosterPath = join(mosaicHome, 'fleet', 'roster.yaml');
return {
ok: false,
output: '',
error: existsSync(rosterPath)
? `role "${role}" is not a member of the fleet roster at ${rosterPath}`
: `no fleet roster at ${rosterPath}`,
};
}
return { ok: true, output: block };
}
/** Default mosaic home (mirrors launch.ts), for callers that don't pass one. */ /** Default mosaic home (mirrors launch.ts), for callers that don't pass one. */
export const DEFAULT_MOSAIC_HOME_FOR_COMMS = join(homedir(), '.config', 'mosaic'); export const DEFAULT_MOSAIC_HOME_FOR_COMMS = join(homedir(), '.config', 'mosaic');

View File

@@ -7,7 +7,12 @@ import {
buildRelaunchCommands, buildRelaunchCommands,
readRosterAgentNames, readRosterAgentNames,
runFrameworkReseed, runFrameworkReseed,
refreshActiveFleetUnits,
readInstalledFrameworkVersion,
readBundledFrameworkVersion,
checkFrameworkDrift,
} from './update-checker.js'; } from './update-checker.js';
import { existsSync, readFileSync } from 'node:fs';
/** /**
* F3-m3 / R13: `mosaic update` re-seeds the framework + (opt-in) relaunches * F3-m3 / R13: `mosaic update` re-seeds the framework + (opt-in) relaunches
@@ -83,3 +88,111 @@ describe('runFrameworkReseed', () => {
rmSync(missing, { recursive: true, force: true }); rmSync(missing, { recursive: true, force: true });
}); });
}); });
describe('refreshActiveFleetUnits', () => {
let root: string;
let mosaicHome: string;
let configHome: string;
beforeEach(() => {
root = mkdtempSync(join(tmpdir(), 'mosaic-units-'));
mosaicHome = join(root, 'mosaic');
configHome = join(root, 'config');
mkdirSync(join(mosaicHome, 'systemd', 'user'), { recursive: true });
mkdirSync(join(configHome, 'systemd', 'user'), { recursive: true });
// Freshly re-seeded units (new content).
writeFileSync(join(mosaicHome, 'systemd', 'user', 'mosaic-agent@.service'), 'NEW\n');
writeFileSync(join(mosaicHome, 'systemd', 'user', 'mosaic-tmux-holder.service'), 'NEW\n');
});
afterEach(() => rmSync(root, { recursive: true, force: true }));
it('refreshes active units when a fleet is already installed', () => {
// Active dir already carries mosaic units (stale) → fleet is installed.
writeFileSync(join(configHome, 'systemd', 'user', 'mosaic-agent@.service'), 'OLD\n');
const res = refreshActiveFleetUnits(mosaicHome, {
XDG_CONFIG_HOME: configHome,
} as NodeJS.ProcessEnv);
expect(res.refreshed).toContain('mosaic-agent@.service');
expect(
readFileSync(join(configHome, 'systemd', 'user', 'mosaic-agent@.service'), 'utf-8'),
).toBe('NEW\n');
});
it('is a no-op when no fleet is installed (active dir has no mosaic units)', () => {
const res = refreshActiveFleetUnits(mosaicHome, {
XDG_CONFIG_HOME: configHome,
} as NodeJS.ProcessEnv);
expect(res.refreshed).toEqual([]);
expect(existsSync(join(configHome, 'systemd', 'user', 'mosaic-agent@.service'))).toBe(false);
});
});
/**
* #642: re-seed when the on-disk framework is older than the bundled one even
* if no package is reported outdated (CLI upgraded outside `mosaic update`).
*/
describe('framework drift detection', () => {
let home: string; // stand-in for ~/.config/mosaic
let fw: string; // stand-in for the bundled framework root
beforeEach(() => {
const root = mkdtempSync(join(tmpdir(), 'mosaic-drift-'));
home = join(root, 'mosaic');
fw = join(root, 'framework');
mkdirSync(home, { recursive: true });
mkdirSync(fw, { recursive: true });
});
afterEach(() => {
rmSync(join(home, '..'), { recursive: true, force: true });
});
const writeInstalled = (v: string) => writeFileSync(join(home, '.framework-version'), v);
const writeBundled = (v: string) =>
writeFileSync(join(fw, 'install.sh'), `#!/usr/bin/env bash\nFRAMEWORK_VERSION=${v}\n`);
describe('readInstalledFrameworkVersion', () => {
it('returns undefined when the version file is absent', () => {
expect(readInstalledFrameworkVersion(home)).toBeUndefined();
});
it('parses the integer (tolerating surrounding whitespace)', () => {
writeInstalled(' 3\n');
expect(readInstalledFrameworkVersion(home)).toBe(3);
});
it('returns undefined for non-numeric content', () => {
writeInstalled('not-a-number\n');
expect(readInstalledFrameworkVersion(home)).toBeUndefined();
});
});
describe('readBundledFrameworkVersion', () => {
it('returns undefined when install.sh is absent', () => {
expect(readBundledFrameworkVersion(fw)).toBeUndefined();
});
it('parses FRAMEWORK_VERSION=<n> from install.sh', () => {
writeBundled('4');
expect(readBundledFrameworkVersion(fw)).toBe(4);
});
});
describe('checkFrameworkDrift', () => {
it('reports drift when on-disk is older than bundled', () => {
writeInstalled('3');
writeBundled('4');
expect(checkFrameworkDrift(home, fw)).toEqual({ drifted: true, installed: 3, bundled: 4 });
});
it('no drift when versions match', () => {
writeInstalled('4');
writeBundled('4');
expect(checkFrameworkDrift(home, fw)).toMatchObject({ drifted: false });
});
it('no drift when on-disk is newer than bundled', () => {
writeInstalled('5');
writeBundled('4');
expect(checkFrameworkDrift(home, fw)).toMatchObject({ drifted: false });
});
it('no drift (conservative) when a version cannot be read', () => {
writeBundled('4'); // installed version file missing
expect(checkFrameworkDrift(home, fw)).toMatchObject({ drifted: false, bundled: 4 });
});
});
});

View File

@@ -14,7 +14,14 @@
*/ */
import { execSync } from 'node:child_process'; import { execSync } from 'node:child_process';
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs'; import {
existsSync,
mkdirSync,
readFileSync,
writeFileSync,
readdirSync,
copyFileSync,
} from 'node:fs';
import { homedir } from 'node:os'; import { homedir } from 'node:os';
import { dirname, join, resolve } from 'node:path'; import { dirname, join, resolve } from 'node:path';
import { fileURLToPath } from 'node:url'; import { fileURLToPath } from 'node:url';
@@ -514,6 +521,75 @@ export function runFrameworkReseed(
} }
} }
// ─── Framework drift detection (#642) ────────────────────────────────────────
//
// `mosaic update` only re-seeds the framework when the @mosaicstack/mosaic
// package itself is upgraded *within that command*. When the CLI is upgraded
// some OTHER way — a direct `npm i -g @mosaicstack/mosaic`, or an upgrade run
// where only sibling packages were outdated — the framework files in
// ~/.config/mosaic stay stale and shipped launcher/runtime fixes never
// activate. Comparing the on-disk framework schema version against the version
// bundled in the installed package detects exactly that situation.
/** Read the framework schema version recorded on disk (~/.config/mosaic/.framework-version). */
export function readInstalledFrameworkVersion(
mosaicHome = join(homedir(), '.config', 'mosaic'),
): number | undefined {
const vf = join(mosaicHome, '.framework-version');
if (!existsSync(vf)) return undefined;
try {
const n = parseInt(readFileSync(vf, 'utf-8').trim(), 10);
return Number.isFinite(n) ? n : undefined;
} catch {
return undefined;
}
}
/**
* Read the framework schema version shipped in the installed package by parsing
* `FRAMEWORK_VERSION=<n>` out of the bundled install.sh (the authoritative
* source the installer writes to .framework-version).
*/
export function readBundledFrameworkVersion(
frameworkRoot = resolveBundledFrameworkRoot(),
): number | undefined {
const installer = join(frameworkRoot, 'install.sh');
if (!existsSync(installer)) return undefined;
try {
const m = readFileSync(installer, 'utf-8').match(/^\s*FRAMEWORK_VERSION=(\d+)/m);
const raw = m?.[1];
if (!raw) return undefined;
const n = parseInt(raw, 10);
return Number.isFinite(n) ? n : undefined;
} catch {
return undefined;
}
}
export interface FrameworkDrift {
/** True only when both versions are known AND the on-disk one is older. */
drifted: boolean;
installed?: number;
bundled?: number;
}
/**
* Detect whether the on-disk framework is older than the framework bundled in
* the installed CLI (#642). Conservative: if either version can't be read the
* result is no-drift, so a missing/unreadable version file never triggers an
* unexpected re-seed.
*/
export function checkFrameworkDrift(
mosaicHome = join(homedir(), '.config', 'mosaic'),
frameworkRoot = resolveBundledFrameworkRoot(),
): FrameworkDrift {
const installed = readInstalledFrameworkVersion(mosaicHome);
const bundled = readBundledFrameworkVersion(frameworkRoot);
const drifted =
typeof installed === 'number' && typeof bundled === 'number' && installed < bundled;
return { drifted, installed, bundled };
}
/** /**
* Best-effort parse of the fleet roster for agent names (used to relaunch * Best-effort parse of the fleet roster for agent names (used to relaunch
* durable agents after a re-seed). Returns [] when no roster exists. * durable agents after a re-seed). Returns [] when no roster exists.
@@ -536,6 +612,47 @@ export function readRosterAgentNames(mosaicHome = join(homedir(), '.config', 'mo
return names; return names;
} }
/**
* Refresh the ACTIVE systemd user units from the freshly re-seeded copies.
*
* The re-seed updates `~/.config/mosaic/systemd/user/*.service`, but the units
* systemd actually runs live at `~/.config/systemd/user/`. Without this copy,
* shipped unit fixes (e.g. the socket-env change) never take effect after
* `mosaic update` until `mosaic fleet install` is re-run. Best-effort + scoped:
* only refreshes when a fleet is already installed (the active dir already
* carries `mosaic-*` units), so non-fleet hosts are untouched.
*/
export function refreshActiveFleetUnits(
mosaicHome = join(homedir(), '.config', 'mosaic'),
env: NodeJS.ProcessEnv = process.env,
): { refreshed: string[]; ok: boolean; reason?: string } {
const src = join(mosaicHome, 'systemd', 'user');
const configHome = env['XDG_CONFIG_HOME'] ?? join(homedir(), '.config');
const dest = join(configHome, 'systemd', 'user');
if (!existsSync(src)) return { refreshed: [], ok: true };
// Only refresh when a fleet is already installed (active dir has mosaic units).
const fleetInstalled =
existsSync(dest) &&
readdirSync(dest).some((f) => f.startsWith('mosaic-') && f.endsWith('.service'));
if (!fleetInstalled) return { refreshed: [], ok: true };
const units = readdirSync(src).filter((f) => f.startsWith('mosaic-') && f.endsWith('.service'));
const refreshed: string[] = [];
for (const unit of units) {
try {
copyFileSync(join(src, unit), join(dest, unit));
refreshed.push(unit);
} catch {
// best-effort per unit
}
}
try {
execSync('systemctl --user daemon-reload', { stdio: 'ignore', timeout: 15_000 });
} catch {
// non-systemd host or no session bus — non-fatal
}
return { refreshed, ok: true };
}
/** Build the per-agent systemd relaunch commands (drain+relaunch via restart). */ /** Build the per-agent systemd relaunch commands (drain+relaunch via restart). */
export function buildRelaunchCommands(agentNames: string[]): string[][] { export function buildRelaunchCommands(agentNames: string[]): string[][] {
return agentNames.map((name) => [ return agentNames.map((name) => [