Compare commits

..

13 Commits

Author SHA1 Message Date
Jarvis
9da71bd861 ci: switch pipelines to pre-baked ci-base image (consumer) [Phase 1b]
Some checks failed
ci/woodpecker/push/ci Pipeline failed
ci/woodpecker/pr/ci Pipeline was successful
Consumer half of the Woodpecker CI cache work (#634). Re-scoped from the
original combined change: the image recipe (Dockerfile.ci, ci-image.yml)
now lives in the producer PR #637. This branch only flips the consumers.

- ci.yml / publish.yml: pull git.mosaicstack.dev/mosaicstack/stack/ci-base
  :latest for the install step and resolve from the baked pnpm store via
  --prefer-offline (drops the per-run apk add + cold network fetch).
- framework monorepo template: single cached install instead of npm ci per
  step, so scaffolded repos inherit the fix.

B2 fix (blocker): pin store-dir in root .npmrc to
/root/.local/share/pnpm/store — the exact path Dockerfile.ci warms — so the
pipeline install actually consumes the baked store instead of repopulating
a fresh one. The existing @mosaicstack registry line is preserved.

BLOCKED ON: PR #637 merge + a manual ci-image prime of ci-base:latest on
main. Until the image is primed this branch's CI is red (it pulls an image
that does not exist yet). Do not merge until a green re-run after priming.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-22 16:50:39 -05:00
bf2a6745c8 fix(install): preserve user fleet data on re-seed + refresh active units (CRITICAL) (#632)
All checks were successful
ci/woodpecker/push/publish Pipeline was successful
ci/woodpecker/push/ci Pipeline was successful
Co-authored-by: Jason Woltje <jason@diversecanvas.com>
Co-committed-by: Jason Woltje <jason@diversecanvas.com>
2026-06-22 21:38:09 +00:00
d539d61e0e refactor(fleet): rename tmux socket mosaic-factory → mosaic-fleet (#630)
All checks were successful
ci/woodpecker/push/publish Pipeline was successful
ci/woodpecker/push/ci Pipeline was successful
Co-authored-by: Jason Woltje <jason@diversecanvas.com>
Co-committed-by: Jason Woltje <jason@diversecanvas.com>
2026-06-22 21:08:43 +00:00
3f69d45334 docs(fleet): consolidate north-star doctrine (budget + control plane + identity) (#629)
Some checks failed
ci/woodpecker/push/publish Pipeline was canceled
ci/woodpecker/push/ci Pipeline was canceled
Co-authored-by: Jason Woltje <jason@diversecanvas.com>
Co-committed-by: Jason Woltje <jason@diversecanvas.com>
2026-06-22 21:08:41 +00:00
e2336bb0ca chore(release): mosaic CLI 0.0.40 (#624)
All checks were successful
ci/woodpecker/push/publish Pipeline was successful
ci/woodpecker/push/ci Pipeline was successful
2026-06-22 19:49:45 +00:00
7342415a32 fix(fleet): consume model_hint + fix socket-default trap (stand-up fixes) (#627)
All checks were successful
ci/woodpecker/push/publish Pipeline was successful
ci/woodpecker/push/ci Pipeline was successful
Co-authored-by: Jason Woltje <jason@diversecanvas.com>
Co-committed-by: Jason Woltje <jason@diversecanvas.com>
2026-06-22 19:18:01 +00:00
095e19443b feat(fleet): onboarding-injection — comms cheat-sheet + peer roster per agent (#621)
All checks were successful
ci/woodpecker/push/publish Pipeline was successful
ci/woodpecker/push/ci Pipeline was successful
Co-authored-by: Jason Woltje <jason@diversecanvas.com>
Co-committed-by: Jason Woltje <jason@diversecanvas.com>
2026-06-22 17:54:54 +00:00
fabc413407 feat(fleet): F4 Phase 2a — Matrix CS-API connector client + factory (#618)
All checks were successful
ci/woodpecker/push/publish Pipeline was successful
ci/woodpecker/push/ci Pipeline was successful
Co-authored-by: Jason Woltje <jason@diversecanvas.com>
Co-committed-by: Jason Woltje <jason@diversecanvas.com>
2026-06-22 16:48:17 +00:00
858d90329d feat(fleet): F4 Phase 1 — chat connector abstraction + Matrix design (#617)
All checks were successful
ci/woodpecker/push/publish Pipeline was successful
ci/woodpecker/push/ci Pipeline was successful
Co-authored-by: Jason Woltje <jason@diversecanvas.com>
Co-committed-by: Jason Woltje <jason@diversecanvas.com>
2026-06-22 16:14:32 +00:00
2bf66136e4 feat(fleet): enhancer role + two-agent floor (orchestrator + enhancer) (#615)
All checks were successful
ci/woodpecker/push/publish Pipeline was successful
ci/woodpecker/push/ci Pipeline was successful
Co-authored-by: Jason Woltje <jason@diversecanvas.com>
Co-committed-by: Jason Woltje <jason@diversecanvas.com>
2026-06-22 13:15:59 +00:00
4434c3c481 docs(fleet): orchestrator+enhancer two-agent floor + role library + Discord plugin north-star (#613)
Some checks failed
ci/woodpecker/push/publish Pipeline was canceled
ci/woodpecker/push/ci Pipeline was canceled
2026-06-22 13:15:05 +00:00
dd0a0d38c6 ci(publish): gate kaniko image builds + publish on changed paths (CI throughput) (#619)
Some checks failed
ci/woodpecker/push/publish Pipeline was canceled
ci/woodpecker/push/ci Pipeline was canceled
Co-authored-by: Jason Woltje <jason@diversecanvas.com>
Co-committed-by: Jason Woltje <jason@diversecanvas.com>
2026-06-22 13:14:31 +00:00
d46ac40890 fix(fleet): boot-survival symmetry — disable-on-remove + add-enable + init-R5 (#612)
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/push/publish Pipeline was successful
Co-authored-by: Jason Woltje <jason@diversecanvas.com>
Co-committed-by: Jason Woltje <jason@diversecanvas.com>
2026-06-22 08:12:58 +00:00
45 changed files with 1698 additions and 201 deletions

4
.npmrc
View File

@@ -1 +1,5 @@
@mosaicstack:registry=https://git.mosaicstack.dev/api/packages/mosaicstack/npm/
# Pin the pnpm store to the same path the ci-base image warms (Dockerfile.ci),
# so the pipeline `pnpm install --prefer-offline` consumes the baked store
# instead of repopulating a fresh one.
store-dir=/root/.local/share/pnpm/store

View File

@@ -1,5 +1,9 @@
# &node_image is the pre-baked CI base built by .woodpecker/ci-image.yml:
# node:22-alpine + python3/make/g++/postgresql-client + pnpm + a warm pnpm
# store. The install step resolves from the baked store (--prefer-offline)
# instead of paying a ~731s cold fetch + native compile every run.
variables:
- &node_image 'node:22-alpine'
- &node_image 'git.mosaicstack.dev/mosaicstack/stack/ci-base:latest'
- &enable_pnpm 'corepack enable'
when:
@@ -15,8 +19,9 @@ steps:
image: *node_image
commands:
- corepack enable
- apk add --no-cache python3 make g++
- pnpm install --frozen-lockfile
# python3/make/g++ are baked into ci-base; --prefer-offline resolves from
# the baked pnpm store.
- pnpm install --frozen-lockfile --prefer-offline
# Blocking gate: public framework package must contain no operator-specific
# personal data or private $HOME defaults. Runs early (no node_modules needed).
@@ -64,8 +69,7 @@ steps:
DATABASE_URL: postgresql://mosaic:mosaic@ci-postgres:5432/mosaic
commands:
- *enable_pnpm
# Install postgresql-client for pg_isready
- apk add --no-cache postgresql-client
# postgresql-client (pg_isready) is baked into ci-base.
# Wait up to 60s for CI postgres to be ready; fail fast if it never comes up.
- |
ready=0

View File

@@ -2,8 +2,27 @@
# Runs only on main branch push/tag
variables:
- &node_image 'node:22-alpine'
# Pre-baked CI base (see .woodpecker/ci-image.yml): node:22-alpine +
# toolchain + warm pnpm store. Kills the second cold install publish pays.
- &node_image 'git.mosaicstack.dev/mosaicstack/stack/ci-base:latest'
- &enable_pnpm 'corepack enable'
# Heavy kaniko image builds (~25 min) — gate them so a merge that only touches
# the npm-only CLI (@mosaicstack/mosaic) or docs does NOT rebuild the platform
# images (gateway/appservice/web do not depend on @mosaicstack/mosaic). Releases
# (tags) always build everything. Exclude-list keeps the default SAFE: any
# non-excluded change still builds, so no transitive dep can silently go stale.
# (Woodpecker: `when` entries are OR'd; `path` applies to push/PR only — hence
# the separate `event: tag` entry.)
- &image_build_when
- event: tag
- event: [push, manual]
branch: main
path:
exclude:
- 'packages/mosaic/**'
- 'docs/**'
- '**/*.md'
- '.woodpecker/**'
when:
- branch: [main]
@@ -14,7 +33,8 @@ steps:
image: *node_image
commands:
- corepack enable
- pnpm install --frozen-lockfile
# Resolve from the baked pnpm store instead of a cold network fetch.
- pnpm install --frozen-lockfile --prefer-offline
build:
image: *node_image
@@ -26,6 +46,15 @@ steps:
publish-npm:
image: *node_image
# Publish only when a publishable package changed (or on a release tag); a
# pure-docs merge runs no publish. Cheap step, but gated for cleanliness.
when:
- event: tag
- event: [push, manual]
branch: main
path:
include:
- 'packages/**'
environment:
NPM_TOKEN:
from_secret: gitea_token
@@ -91,6 +120,7 @@ steps:
build-gateway:
image: gcr.io/kaniko-project/executor:debug
when: *image_build_when
environment:
REGISTRY_USER:
from_secret: gitea_username
@@ -116,6 +146,7 @@ steps:
build-appservice:
image: gcr.io/kaniko-project/executor:debug
when: *image_build_when
environment:
REGISTRY_USER:
from_secret: gitea_username
@@ -141,6 +172,7 @@ steps:
build-web:
image: gcr.io/kaniko-project/executor:debug
when: *image_build_when
environment:
REGISTRY_USER:
from_secret: gitea_username

View File

@@ -59,6 +59,30 @@ Active workstream is **W1 — Federation v1**. Workers should:
- Status: implemented + tested. Closes R13: `mosaic update` now re-seeds the framework (data-safe MOSAIC_SYNC_ONLY) after the CLI install so shipped launcher/runtime changes activate; `--relaunch` restarts rostered agents; `--no-reseed` opts out. Detail: scratchpads/f3-m3-update-reseed.md.
## Fleet-polish bundle — boot-survival symmetry (#611) — feat/fleet-polish-bundle
- Status: MERGED to main. disable-on-remove (boot-resurrection bug, TDD) + add-enable + init-R5 hard guarantee. 4 new + 147 existing fleet tests green. Detail: scratchpads/fleet-polish-bundle.md.
## Fleet enhancer role + two-agent floor (#614) — feat/fleet-enhancer-floor
- Status: MERGED to main. enhancer added to 4 presets; init guarantees 1 orchestrator + >=1 enhancer; remove protects the sole enhancer; enhancer role doc. 155 fleet tests green. Detail: scratchpads/fleet-enhancer-floor.md.
## F4 — Orchestrator chat connector + Matrix (#616) — feat/f4-matrix-connector
- Status: Phase 1 done (abstraction + scaffold). Connector interface (send/subscribe/health) + registry + roster connector schema + design doc; tmux default/back-compat; matrix factory + CS-API client landed (Phase 2a, #617-stacked); 20 connector tests green; no fleet.ts changes (independent of #615). Detail: scratchpads/f4-matrix-connector.md.
- Status: Phase 1 MERGED (#617: connector interface send/subscribe/health + registry + roster schema + design). Phase 2a (#618): Matrix CS-API client + factory. 20 connector tests green; no fleet.ts changes. Remaining Phase 2: init/configure connector-selection UX + roster wiring, systemd launch wiring, Conduit deploy guide. Detail: scratchpads/f4-matrix-connector.md.
## Fleet onboarding-injection — comms cheat-sheet + peer roster (#620) — feat/fleet-comms-onboarding
- Status: implemented + tested. Injects # Fleet Comms (peer roster + cross-host agent-send commands + FLIP-reply + --verify) into each spawned fleet agent via composeContract; optional per-agent host/ssh/socket roster fields (socket: named → -L, unset → default socket no -L). 10 + 2 tests green. Detail: scratchpads/fleet-comms-onboarding.md.
## Fleet stand-up fixes — model_hint→--model + socket-default trap (#626) — feat/fleet-standup-fixes
- Status: implemented + tested. FIX1 model_hint→MOSAIC_AGENT_MODEL→--model. FIX2 absent socket = default tmux socket (no -L) across parse/spawn/systemd-unit/observe (socketArgs helper, bare-empty shellEnvValue, conditional -L). 158 fleet tests green; shipped presets unaffected (explicit socket_name). Detail: scratchpads/fleet-standup-fixes.md.
## north-star doctrine consolidation — doc PR — feat/north-star-doctrine
- Status: applied Mos's consolidated merge-map to docs/fleet/north-star.md (budget governance + control plane/central register + 200k cap + delegation + unified-identity Fleet + role-based naming + tmux security + drift re-captures). Doctrine only; #622/#623/#625/#628 out-of-scope. Conflict checklist green. Detail: scratchpads/north-star-doctrine.md.
## #631 — re-seed preserves user fleet data (CRITICAL) — fix/631-reseed-preserves-fleet-data
- Status: implemented + tested. PRIMARY: install.sh PRESERVE_PATHS += fleet/\*.yaml + fleet/agents + fleet/run (glob-aware cp-fallback); TS parity. SECONDARY: refreshActiveFleetUnits propagates unit fixes to ~/.config/systemd/user on mosaic update. bash F6 + TS + unit tests green. Detail: scratchpads/631-reseed-preserves-fleet.md.

View File

@@ -7,10 +7,10 @@
## Problem
The durable tmux fleet runs on the isolated `mosaic-factory` socket. That isolation
The durable tmux fleet runs on the isolated `mosaic-fleet` socket. That isolation
(which protects the operator's default tmux) makes the fleet **invisible** to default
tooling, and truth is split across three planes no single command joins — systemd
(`systemctl --user`), tmux (`-L mosaic-factory`), and the process tree (`pstree`).
(`systemctl --user`), tmux (`-L mosaic-fleet`), and the process tree (`pstree`).
`agent tail` (`capture-pane`) returns **blank for full-screen TUIs**, and `agent send`
confirms only keystroke injection, not acceptance. Net: the operator has near-zero
observability and no safe way to watch a session.
@@ -56,7 +56,7 @@ observability and no safe way to watch a session.
## Acceptance criteria
- `mosaic fleet ps` shows all 5 live sessions on `mosaic-factory` with correct
- `mosaic fleet ps` shows all 5 live sessions on `mosaic-fleet` with correct
pane/pid/idle and flags the dogfood **drift** (`canary-pi` runtime=pi but pane runs
`dogfood-agent.py`) and the **boot-enable** gap (active but disabled).
- Killing one agent's pane flips its row to dead/stale within one `interval`.
@@ -72,7 +72,7 @@ observability and no safe way to watch a session.
- Unit/CLI specs in `packages/mosaic/src/commands/fleet.spec.ts` (and a new
`fleet-ps`/`watch`/`send-verify` spec) using the injected `CommandRunner` to assert
exact tmux/systemd command construction and JSON shape (tenant+host present).
- Situational: run against the live `mosaic-factory` fleet; capture `fleet ps` output,
- Situational: run against the live `mosaic-fleet` fleet; capture `fleet ps` output,
a kill-and-detect cycle, a read-only `watch`, and a `send --verify` pass/fail pair.
## Known limitations

View File

@@ -7,18 +7,18 @@
> Mission: `mvp-20260312` · PRD: [docs/fleet/PRD.md](./PRD.md) · North star: [docs/fleet/north-star.md](./north-star.md)
> Status: `not-started` | `in-progress` | `done` | `blocked` | `failed`
| id | status | description | depends_on | agent | pr | notes |
| ------------- | ----------- | ------------------------------------------------------------------------------------------------------------------ | --------------------- | ----------- | --- | ----------------------------------------------------------------------------------------------------------------------------- |
| FLEET-OBS-000 | done | Plan: north-star + Phase-2 PRD + workstream scaffolding | — | lead | — | persisted 2026-06-20 on `feat/fleet-observability` |
| FLEET-OBS-001 | done | Heartbeat protocol v1 spec finalized in PRD + framework doc | FLEET-OBS-000 | lead | — | file-based `~/.config/mosaic/fleet/run/<agent>.hb`; spec in PRD |
| FLEET-OBS-002 | in-progress | Implement heartbeat responder in `dogfood-agent.py` | FLEET-OBS-001 | fleet-coder | — | dispatched to ad-hoc `mosaic yolo` fleet agent (dogfood) |
| FLEET-OBS-003 | done | `mosaic fleet ps` — join systemd+tmux+proc+idle+heartbeat; tenant+host tagged; drift + boot-enable flags; `--json` | FLEET-OBS-001 | worker | — | commit ab47831; LIVE-verified on mosaic-factory; caught canary-pi DRIFT + BOOT-ENABLE. Polish: idleSeconds parse returns null |
| FLEET-OBS-004 | done | `mosaic agent watch <name>` — read-only join (no resize, no keystrokes) | FLEET-OBS-000 | worker | — | `attach -r`; verb wired |
| FLEET-OBS-005 | done | `mosaic agent send --verify` — delivery/acceptance receipt | FLEET-OBS-000 | worker | — | --verify flag; draft-heuristic verify |
| FLEET-OBS-006 | done | CLI specs for ps/watch/send-verify (tenant+host shape, command construction) | FLEET-OBS-003,004,005 | worker | — | 62 tests green (31 new); re-verified by lead |
| FLEET-OBS-007 | not-started | Framework doc: fleet observability guide + verbs | FLEET-OBS-003,004,005 | lead | — | `docs/guides/` or `framework/tools/.../README` |
| FLEET-OBS-008 | not-started | Independent review + dogfood verification on live fleet | FLEET-OBS-002..007 | reviewer | — | author ≠ reviewer; capture evidence in scratchpad |
| FLEET-OBS-009 | not-started | Open PR → green CI (queue guard) → squash-merge → close `fleet-observability-1` | FLEET-OBS-008 | lead | — | trunk merge; no direct push to main |
| id | status | description | depends_on | agent | pr | notes |
| ------------- | ----------- | ------------------------------------------------------------------------------------------------------------------ | --------------------- | ----------- | --- | --------------------------------------------------------------------------------------------------------------------------- |
| FLEET-OBS-000 | done | Plan: north-star + Phase-2 PRD + workstream scaffolding | — | lead | — | persisted 2026-06-20 on `feat/fleet-observability` |
| FLEET-OBS-001 | done | Heartbeat protocol v1 spec finalized in PRD + framework doc | FLEET-OBS-000 | lead | — | file-based `~/.config/mosaic/fleet/run/<agent>.hb`; spec in PRD |
| FLEET-OBS-002 | in-progress | Implement heartbeat responder in `dogfood-agent.py` | FLEET-OBS-001 | fleet-coder | — | dispatched to ad-hoc `mosaic yolo` fleet agent (dogfood) |
| FLEET-OBS-003 | done | `mosaic fleet ps` — join systemd+tmux+proc+idle+heartbeat; tenant+host tagged; drift + boot-enable flags; `--json` | FLEET-OBS-001 | worker | — | commit ab47831; LIVE-verified on mosaic-fleet; caught canary-pi DRIFT + BOOT-ENABLE. Polish: idleSeconds parse returns null |
| FLEET-OBS-004 | done | `mosaic agent watch <name>` — read-only join (no resize, no keystrokes) | FLEET-OBS-000 | worker | — | `attach -r`; verb wired |
| FLEET-OBS-005 | done | `mosaic agent send --verify` — delivery/acceptance receipt | FLEET-OBS-000 | worker | — | --verify flag; draft-heuristic verify |
| FLEET-OBS-006 | done | CLI specs for ps/watch/send-verify (tenant+host shape, command construction) | FLEET-OBS-003,004,005 | worker | — | 62 tests green (31 new); re-verified by lead |
| FLEET-OBS-007 | not-started | Framework doc: fleet observability guide + verbs | FLEET-OBS-003,004,005 | lead | — | `docs/guides/` or `framework/tools/.../README` |
| FLEET-OBS-008 | not-started | Independent review + dogfood verification on live fleet | FLEET-OBS-002..007 | reviewer | — | author ≠ reviewer; capture evidence in scratchpad |
| FLEET-OBS-009 | not-started | Open PR → green CI (queue guard) → squash-merge → close `fleet-observability-1` | FLEET-OBS-008 | lead | — | trunk merge; no direct push to main |
## Proposed MVP rollup row (for the MVP orchestrator — not written by this workstream)

View File

@@ -55,14 +55,22 @@ The Fleet inherits — does not re-invent — the MVP's hard requirements:
One **definition** is the source of truth; the **session** is how it runs.
| Layer | Owner | Phase-2 reality | Destination |
| -------------------------------- | ------------------------------------------------------------------------------------------- | ------------------------------------------------------ | ------------------------------------------------------- |
| **Definition + identity + auth** | gateway / `mosaic-as` (scoped tokens, #541) | `roster.yaml` (tenant-tagged) | one definition; `mosaic agent --new` materializes it |
| **Tenancy boundary** | **Linux uid per tenant** (linger, own `systemd --user`, own socket, own `~/.config/mosaic`) | one tenant: `jarvis` = tenant zero | uid-per-tenant; federation aggregates across hosts |
| **Runtime** | per-tenant tmux session on isolated socket | dogfood stub sessions (live now on `mosaic-factory`) | claude/codex/pi/opencode TUIs |
| **Liveness** | **heartbeat protocol** every runtime answers | protocol defined + dogfood stub answers it | all runtimes answer; "healthy" ≠ "pane alive" |
| **Observation** | read-only `watch` (native tmux) + `pipe-pane` stream | CLI `watch`/`ps`; explicit opt-in `attach` for control | + auth-gated webUI streams |
| **Control plane** | **federation** across hosts × tenants | records already carry `tenant_id` + `host` | federated gateways expose fleet state; webUI in Phase 5 |
| Layer | Owner | Phase-2 reality | Destination |
| -------------------------------- | ------------------------------------------------------------------------------------------- | ------------------------------------------------------ | ----------------------------------------------------------------------------------------------------------------------------------- |
| **Definition + identity + auth** | gateway / `mosaic-as` (scoped tokens, #541) | `roster.yaml` (tenant-tagged) | one definition; `mosaic agent --new` materializes it |
| **Tenancy boundary** | **Linux uid per tenant** (linger, own `systemd --user`, own socket, own `~/.config/mosaic`) | one tenant: `jarvis` = tenant zero | uid-per-tenant; federation aggregates across hosts |
| **Runtime** | per-tenant tmux session on isolated socket | dogfood stub sessions (live now on `mosaic-factory`) | claude/codex/pi/opencode TUIs |
| **Liveness** | **heartbeat protocol** every runtime answers | protocol defined + dogfood stub answers it | all runtimes answer; "healthy" ≠ "pane alive" |
| **Observation** | read-only `watch` (native tmux) + `pipe-pane` stream | CLI `watch`/`ps`; explicit opt-in `attach` for control | + auth-gated webUI streams |
| **Control plane** | **federation** across hosts × tenants | records already carry `tenant_id` + `host` | federated gateways expose fleet state; webUI in Phase 5 |
| **Central register** | Postgres `fleet` schema (gateway instance); access via gateway API only | _none in PoC_ (files + `roster.yaml`) | agents, missions, tasks, heartbeats, spend — single network-accessible SSOT; docs = generated projections |
| **Budget / spend governance** | **per-tenant budget policy** ingested by the orchestrator + routing layer | none today (spend is unmetered) | usage-vs-limit feedback ingested; spend auto-paced to the limit window; per-provider/per-account/concurrency/API-$ budgets enforced |
> **PoC socket hygiene:** the PoC fleet runs on the **default tmux socket** (no `-L`).
> The named production-isolation socket is **`mosaic-fleet`** (matches the product brand);
> an absent roster `socket_name` means the default socket everywhere (spawn, `fleet ps`,
> onboarding cheat-sheet). The legacy dogfood canary still runs on the old `mosaic-factory`
> socket pending migration.
## Operating model (inherited, not reinvented)
@@ -73,6 +81,37 @@ diff-sanity → squash-merge → verify), **decide-and-inform** cadence, and a d
this model. See `mosaicstack-aiguide` whitepapers 01 (inter-agent comms) and 03
(orchestration model) for the rationale.
## Fleet roster — the two-agent floor and the role library
A fleet is **never a single agent**. The minimum viable fleet is **two**:
| Role | Mandate | Boundaries |
| ---------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------ |
| **Orchestrator** | The user's **single point of contact**. Owns the general flow, keeps agentic actions on-target, and **adds/removes agents from the fleet at will** to meet goals and user needs. Exactly **one** per fleet (the existing R5 invariant). | Delegates source work; never the sole worker. |
| **Enhancer** | The fleet's **continuous-improvement loop**. Monitors fleet activity, analyzes for enhancements/optimizations, builds a **plan of remediation**, and — **with the orchestrator** — upgrades fleet capability: tool creation/repair, skills, harness improvements, and **bug reports filed to Mosaic Stack** for proper remediation. Recommends which agents are needed. | **Does not code, review code, or perform delivery tasks.** Improvement and diagnosis only. |
> **Why two, not one:** the orchestrator drives delivery; the enhancer makes the fleet
> _get better at delivering_ over time. The enhancer is how the fleet self-heals its tools,
> skills, and harnesses, and how real defects flow back to Mosaic Stack as bug reports.
> Together they are the irreducible core — every other role is added on demand.
A **general** fleet starts at this floor: the orchestrator (advised by the enhancer)
materializes whatever roles prove necessary over the mission's life. Specialized presets
(coding, research, etc.) seed additional roles up front, but all reduce to the same two-agent
spine plus an on-demand **role library**:
| Role profile | Purpose |
| ------------------- | --------------------------------------------------------------------------------- |
| **orchestrator** | point of contact, flow control, fleet composition (1 per fleet) |
| **enhancer** | fleet monitoring, optimization, tool/skill/harness upgrades, upstream bug reports |
| **coder** | implementation (worker; stops at PR-open) |
| **code review** | independent code review gate |
| **security review** | security/auth/secret review gate |
| **research** | investigation, synthesis, options analysis |
| **board** | deliberation panel — moonshot, contrarian, technical, business, financial lenses |
| **operations** | infra, deploy, health, incident response |
| _…extensible_ | new profiles added as missions demand (orchestrator + enhancer decide) |
## Invariants — "maximal vision, incremental delivery, zero foreclosure"
Every artifact, starting Phase 2, MUST:
@@ -82,6 +121,67 @@ Every artifact, starting Phase 2, MUST:
3. Define **healthy = answered a heartbeat within N seconds**, never just "pane alive".
4. Make **observation read-only by default**; control is an explicit, separate, opt-in verb.
> **OPS INVARIANT — runtime agents need a real TTY.** Claude/Codex/pi/opencode agents
> cannot be bare-launched from a systemd `ExecStart`; a durable harness with a real PTY is
> required. This is **why `start-agent-session.sh` launches into tmux** and uses a
> `MOSAIC_AGENT_COMMAND` override rather than running the runtime directly under systemd.
## Budget & token governance (first-class fleet concern)
Spend is a fleet-level resource, not a per-agent afterthought. The fleet treats token
and API-dollar budget the way it treats liveness: a signal every runtime exposes and the
control plane is accountable for. This rides the same primitives as everything else —
`tenant_id` + `host` on every spend record, **read-only metering by default**, and the
**federation** layer as the cross-host aggregation point (W1) — so budgeting is zero-foreclosure
from day one even while one tenant exists.
**Two spend regimes, one policy surface:**
| Regime | Feedback signal | Fleet obligation |
| ------------------------------------------------------- | ------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------- |
| **OAuth-subscription runtimes** (Claude sub, Codex sub) | runtime exposes **current-usage-vs-limit** within a rolling limit window | **ingest** the signal per sub-account; **auto-pace** agentic spend so the window is not exhausted early |
| **API-token runtimes** (metered per token) | provider billing / token counts | enforce **hard $-spend ceilings**; on breach, **downgrade → queue → refuse** (below) |
**Auto-pacing law (OAuth subs) — EVEN-SPREAD default (Jason override, 2026-06-22):** the fleet
paces agentic token spend to consume the limit window **evenly over remaining time**:
target rate = _(remaining usage available)_ ÷ _(remaining time in the window)_. Example: 100% of
a 7-day window = **~14.285%/day**; the system tracks current usage and continuously re-splits the
remainder evenly to hold pace. **Anticipated token-spend-per-task is the budgeting informant**
tasks are scheduled against the daily pace, not run until the quota is gone. Rationale: spreading
delivery evenly beats rapidly exhausting usage and losing **multiple days of momentum**.
**Rapid pacing / overspend requires EXPLICIT user authorization;** absent it, even-spread holds.
Pacing is a control-plane decision, surfaced read-only before it throttles a lane.
**Hard-cap breach behavior (ladder):** when a budget ceiling is hit mid-work, the fleet
**downgrades first** (opus → sonnet → haiku, then Claude → Codex), **queues** the lane at the
cheapest floor until the window resets, and **refuses** only as a last resort. Refusal is never
the first response to a breach.
**Spend accounting, learning & telemetry:**
- **Multi-subscription auto-routing:** a tenant with multiple subscriptions may let the fleet
**auto-route work to the account with the most available usage** (within budget policy).
- **Historical spend learning:** every task's token spend is **recorded**; historical data
continuously updates known **spend-per-task**, **typical daily spend**, and projections — so
estimates self-correct and pacing stays on target.
- **Projected + actual spend on artifacts (Mosaic Stack mandate):** PRDs, missions, and task
decomposition **MUST note projected AND actual token spend** — a Mosaic Stack process standard
(template-level), tracked separately as **#622**.
- **Anonymized telemetry → mosaicstack.dev:** spend data is reported (anonymous) to the
mosaicstack.dev telemetry endpoint so other agents/fleets budget and optimize from real,
anonymized data. Product workstream, tracked separately as **#623**.
**User-settable budgets (the policy surface).** A tenant operator can set budgets for every
configured **provider** (per-provider ceilings), the **account-to-task mapping**, the **agentic
routing flow**, **concurrency** (the spend multiplier), and **hard API-token $-limits**. Budgets
are enforced at the orchestrator + routing boundary, not inside individual workers (a worker never
decides its own budget — see delegation discipline).
**Budget CLI UX (#558):** `mosaic budget set --reset-at` sets the window reset; reset-datetimes
carry **confidence tags** (`user` / `provider` / `estimated` / `unknown`); and **urgency/criticality
is a dispatch-gate modifier** — high-urgency work may override even-spread pacing **within
authorization**. (Also feeds the budgeting workstream, not only this doc.)
## Observation model
| Verb | Behavior |
@@ -96,15 +196,83 @@ Every artifact, starting Phase 2, MUST:
> (blank for full-screen TUIs), and `attach` is read-write + resizes the session. The
> verbs above restore "join and observe" safely.
## Control plane & central register
### Why the register must be Postgres
The fleet is multi-host (w-jarvis + dragon-lin + future). A SQLite file is a local
file — it is not a network service and cannot be shared across hosts. Beyond topology,
Postgres MVCC eliminates the concurrent-writer corruption class Hermes hit with SQLite
under multi-agent access.
Access is exclusively through the **gateway API** (`apps/gateway` — typed, auth-gated,
scoped tokens). No agent or dispatcher pane ever holds a raw DB credential; a
compromised pane cannot corrupt or exfiltrate the register.
### Architecture (layers)
| Layer | Responsibility | Implementation |
| ---------------------- | ------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| **Register** | Source of truth: agents, missions, tasks, heartbeats, spend | Postgres `fleet` schema — existing stack instance (`@mosaicstack/db`) |
| **Access** | Typed, auth-gated API | Gateway `fleet/*` routes |
| **Dispatcher** | Brief classification, BOD review, planning/coding/review/test/deploy sequencing + gates → fleet task dispatch | **forge pipeline engine** (`runPipeline`/`resumePipeline`, brief classifier, BOD) **+ thin `forge-exec` adapter → `agent-send.sh`**; NOT a new daemon — forge is reused, only stage→agent dispatch is new |
| **Orchestrator (Mos)** | Goals, missions, judgment, user/PA interface | Context-light; sets intent → re-engages only for decisions |
### Dispatcher = forge (reuse, do not rebuild)
The dispatcher is **not new work**: it is `@mosaicstack/forge`, a fully-implemented
software-factory pipeline engine (brief → Board-of-Directors review → 3 planning stages →
coding → review/remediation → testing → deploy). Forge already provides
`runPipeline`/`resumePipeline`, a brief classifier, and a BOD persona loader, so the fleet
does **not** re-implement sequencing, gate logic, or brief classification. The only new
fleet-owned code is a thin **`forge-exec` TaskExecutor adapter** (`ForgeTask`
`agent-send.sh` to a named agent) — forge's single missing piece — tracked as a Gitea
issue and built post-PoC. The Postgres register backs forge's pipeline state (durable
`resumePipeline`, cross-host) in addition to cross-project missions/tasks/Kanban. The
north-star **'board' role IS forge's Board-of-Directors** — reused from forge, not a new
role implementation.
### Docs as projections
`docs/TASKS.md` and `MISSION-MANIFEST.md` are **generated projections** of the DB,
not hand-maintained. The dispatcher (or a scheduled job) renders Markdown from
`fleet.*` tables and commits the output. DB is authoritative; docs are for human
reference.
### Spend
`fleet.spend_ledger` records projected and actual token spend per agent/mission/task
(ties to issue #622). The dispatcher enforces budget caps before dispatching. Mos reads
the roll-up via API — no raw DB access, no context-bloating dumps.
### Federation
Cross-host fleet state flows through federated gateway queries (existing
`federation_peers` / `federation_grants` machinery). This is the existing north-star
invariant: **control plane rides federation (W1), not a bespoke broker.** No new
broker introduced.
### Scope
This is Phase 45 of this roadmap, materialized. It MUST NOT block the PoC (which
runs correctly on files + `roster.yaml`). Begin when Phase 2 heartbeat protocol is
stable and concurrent-agent count makes file coordination the bottleneck.
### Open sub-decision
Dedicated Postgres **instance** vs. dedicated **schema** in the existing instance.
Recommendation: dedicated schema, existing instance (a migration file, not new infra);
re-evaluate if isolation or write-volume demands it.
## Phased roadmap
| Phase | Outcome | Status |
| ---------------------- | -------------------------------------------------------------------------------------------------------------------------------------------- | ------- |
| 01 | tmux PoC, hardening, published CLI v0.0.34 (#565#568) | ✅ done |
| **2 — Observability** | `fleet ps` (host+tenant aware join), heartbeat protocol + dogfood stub answers it, `agent watch` (read-only), `agent send --verify` receipts | ▶ now |
| 3 — Real runtimes | claude/codex/pi/opencode answer heartbeat; **hybrid lifecycle** (core always-on: orchestrator+reviewer; ephemeral workers per lane) | planned |
| 4 — Unified definition | one agent schema in gateway; `mosaic agent --new` → materialized per-tenant session; uid-tenant provisioning | planned |
| 5 — Control plane | federation-backed cross-host × cross-tenant fleet view; **webUI** (surface chosen then) for MVP-X1 parity | planned |
| Phase | Outcome | Status |
| ---------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- |
| 01 | tmux PoC, hardening, published CLI v0.0.34 (#565#568) | ✅ done |
| **2 — Observability** | `fleet ps` (host+tenant aware join), heartbeat protocol + dogfood stub answers it, `agent watch` (read-only), `agent send --verify` receipts | ▶ now |
| 3 — Real runtimes | claude/codex/pi/opencode answer heartbeat; **hybrid lifecycle** (core always-on: **orchestrator + enhancer**; ephemeral workers per lane) | planned |
| 4 — Unified definition | one agent schema in gateway; `mosaic agent --new` → materialized per-tenant session; uid-tenant provisioning; **`fleet` schema migration + `forge-exec` TaskExecutor adapter (forge → `agent-send.sh`)** | planned |
| 5 — Control plane | federation-backed cross-host × cross-tenant fleet view; **webUI** (surface chosen then) for MVP-X1 parity; **central register live (spend ledger, docs-as-projections, multi-host Kanban)** | planned |
## Decisions of record (2026-06-20, with Jason)
@@ -121,6 +289,89 @@ Every artifact, starting Phase 2, MUST:
runtime-bin on PATH (baked into the pane command) + boot-survival (`enable` + linger),
which `fleet init` should automate.
## Decisions of record (2026-06-22, with Jason)
- **Two-agent floor:** every fleet has, at minimum, an **orchestrator** and an **enhancer**.
The orchestrator is the user's point of contact and composes the fleet; the enhancer runs the
continuous-improvement loop (monitor → analyze → remediate → upgrade tools/skills/harness →
file Mosaic Stack bug reports) and **does not code or review**.
- **Role library:** orchestrator, enhancer, coder, code review, security review, research,
board (moonshot/contrarian/technical/business/financial), operations — extensible; the
orchestrator (advised by the enhancer) adds roles as missions demand.
- **Orchestrator chat connector:** the orchestrator is reachable over a user-chosen connector
(tmux now; Telegram/Discord/Matrix/Slack configurable). Validated live: **"Mos" orchestrator
on Discord** via the Claude Code discord channel plugin (w-jarvis).
- **Session context cap = 200k tokens (GLOBAL to all Claude sessions):** Claude Code sessions are
capped at a **max 200k-token context window**. Long-running sessions extended toward 1M tokens
have proven **worse in practice** (degraded steering, off-plan divergence); 200k is the standard.
**Enforcement split:** the _window_ lives in **`~/.claude/settings.json`** (host-global) as
`"autoCompactWindow": 200000` + `"autoCompactEnabled": true`; the _1M-disable_ lives in **launch
ENV** (`CLAUDE_CODE_DISABLE_1M_CONTEXT=1`, plus `CLAUDE_CODE_AUTO_COMPACT_WINDOW=200000`) wherever
a `[1m]` model can be selected (`mos-claude.service` + the fleet Claude launcher), so every Claude
agent is capped at spawn. (settings = window; env = 1M-disable.)
- **Worker context bound (#8):** workers are kept context-bounded via the **ephemeral-per-lane
lifecycle + native compaction**, not via the 200k knob. The explicit `autoCompactWindow` 200k knob
**stays Claude-specific** — the _principle_ (bounded context) extends to workers, the _knob_ does not.
- **Orchestrator delegation discipline:** the orchestrator **delegates all delivery work** to
subagents / workflows / ultracode / coder agents and confines its own context to \*\*orchestration
- the personal-assistant lane\*\*. Keeping delivery out of the orchestrator's window keeps its
context unpolluted and measurably reduces off-plan divergence. The orchestrator coordinates and
decides; it does not implement.
- **Budget governance is fleet doctrine:** token/API-dollar budgeting is a first-class fleet concern
(see "Budget & token governance"). OAuth-sub usage-vs-limit feedback is ingested per account, spend
is **auto-paced EVEN-SPREAD over remaining time** (rapid/overspend only on explicit authorization),
spend is **tracked historically** to self-correct per-task/daily estimates, multi-sub tenants may
**auto-route by available usage**, and operators set budgets per provider, per account-to-task
mapping, per routing flow, per concurrency level, and as hard API-$ ceilings.
- **Spend accounting is a Mosaic Stack process mandate:** PRDs, missions, and task decomposition
**MUST carry projected + actual token spend**; used locally for pacing and reported as **anonymized
telemetry to mosaicstack.dev**. The template standard (#622) and telemetry product (#623) are
tracked separately.
- **Unified identity = "Fleet" (Jason, 2026-06-22):** the product is **Mosaic Fleet** — one unified
user-facing identity and CLI surface. **forge** is the Fleet's **internal** delivery/orchestration
engine (not a separate product); the control-plane **Postgres register is the Fleet's register**;
workers/runtime are the **Fleet substrate**. **"factory" is RETIRED as a product term** — it was
only ever the software-factory concept (which forge implements) and the old `mosaic-factory` tmux
socket name. The production-isolation socket is now **`mosaic-fleet`** (matches the product brand);
the legacy dogfood canary remains on the old `mosaic-factory` socket pending migration. **Code stays
layered** (forge + fleet + control-plane as internal layers);
only the **identity + CLI surface unify under Fleet.**
- **Role-based session naming (Jason, 2026-06-22):** agent tmux sessions are named by **role**
(`orchestrator`, `enhancer`, `research`, `coder0-0`, …), not by persona. **Persona lives in
`SOUL.md`**; the front-end / Discord presents a **friendly alias** (e.g. "Mos" = the orchestrator's
alias). The session name is the stable addressing handle; the alias is presentation.
### Control plane & central register
- **Store:** Postgres (existing stack instance, dedicated `fleet` schema via `@mosaicstack/db`). SQLite rejected: (1) it is a local file — structurally incompatible with a multi-host fleet; (2) concurrent multi-agent writes caused repeated corruption in Hermes. "SQLite + access service" rejected as reinventing a DB server badly; "LLM agent gating DB access" rejected as slow, expensive, and a single point of failure.
- **Access:** gateway API only (`apps/gateway`, `fleet/*` routes). No raw DB credentials in any agent/dispatcher pane — directly mitigates the tmux attack-surface concern.
- **Dispatcher = forge (reuse, not a new build):** the dispatcher IS `@mosaicstack/forge`'s pipeline engine (`runPipeline`/`resumePipeline` + brief classifier + BOD persona loader), a fully-implemented software-factory pipeline (brief → BOD review → 3 planning stages → coding → review/remediation → testing → deploy). We do **not** design/build a new dispatcher and do **not** re-implement sequencing, gate logic, or brief classification. The only new fleet-owned piece is a thin **`forge-exec` TaskExecutor adapter** (suggested package `packages/forge-exec`) mapping a `ForgeTask``agent-send.sh` dispatch to a named fleet agent — forge's single missing piece. It is tracked as a Gitea issue and built **post-PoC** (not now).
- **Register backs forge:** the Postgres `fleet` register is genuinely new (neither forge nor the fleet has cross-project state). It BACKS forge's pipeline state (durable `resumePipeline`, cross-host) plus cross-project missions/tasks/Kanban.
- **'board' role = forge BOD:** the north-star role-library 'board' role IS forge's Board-of-Directors — reused, not reinvented.
- **Orchestration vs. dispatch:** Orchestrator (Mos) sets intent and handles judgment; forge works the mechanical pipeline (sequencing, gates, status transitions, spend ledger). LLM escalation reserved for judgment: mission decomposition, re-planning on failure.
- **Spend in the register:** `fleet.spend_ledger` tracks projected vs. actual tokens per agent/mission/task; ties to issue #622.
- **Docs as projections:** `docs/TASKS.md` and `MISSION-MANIFEST.md` become generated exports of the DB, not hand-maintained.
- **Sub-decision pending:** dedicated schema in existing PG instance (recommended) vs. dedicated PG instance. Revisit if isolation or write-volume demands it.
## Future enhancements (north-star, post-MVP — not on the MVP track)
- **Mosaic Claude Discord Plugin** — a first-party Mosaic Discord connector that properly
implements the basic Discord functions **and native Discord threads**. Threads let a user
separate conversation topics with the orchestrator (the pattern proven by the Hermes agent).
A major enhancement over the current third-party channel plugin; **not required for the MVP**,
but a committed north-star target. `ASSUMPTION:` ships as a Mosaic-owned plugin so the fleet
controls Discord UX (threads, reactions, attachments, per-thread context) end-to-end.
- **Matrix on a local homeserver — strategic future transport.** **F4 (in progress) IS the Matrix
connector**: an orchestrator chat connector speaking the Matrix client-server API against a
self-hosted homeserver (Conduit default, Synapse alt). Matrix is named here as the strategic
future transport — peer to tmux/Discord, not superseded by them.
- **tmux fleet attack-surface hardening.** Many always-on tmux sessions are an attack surface;
`tmux send-keys` / socket access could enable malicious action against agents directly.
Mitigations to build toward: socket ownership/perms, per-tenant socket isolation (already an
invariant), authenticated `agent-send`, and an audit of who can write to any pane. **Post-MVP
unless a P0 surfaces.** The control-plane register reinforces this (gateway-API access = no raw
DB creds in panes). A not-started risk-assessment + mitigation-plan task rides the Fleet `TASKS.md`.
## Assumptions (veto-able)
- `ASSUMPTION:` first-class runtimes = claude, codex, pi, opencode; a "role" (analyst,
@@ -131,3 +382,30 @@ Every artifact, starting Phase 2, MUST:
- `ASSUMPTION:` Fleet is workstream **W-FLEET** under `mvp-20260312`; a rollup row in
`docs/TASKS.md` and a workstream declaration in `MISSION-MANIFEST.md` are proposed to
the MVP orchestrator, not written by this workstream.
- `ASSUMPTION:` OAuth-subscription runtimes (Claude sub, Codex sub) expose a machine-readable
current-usage-vs-limit signal the fleet can poll/ingest; if a provider exposes no such signal,
that provider's accounts fall back to API-style hard-ceiling budgeting only (no auto-pacing).
- `ASSUMPTION:` budget policy lives at the orchestrator + routing layer and is surfaced through the
same CLI→TUI→webUI parity (MVP-X1) as the rest of fleet state — not a separate budgeting daemon.
- `ASSUMPTION:` the 200k session cap is enforced by Claude Code settings/env composition (model
variant + `autoCompactWindow`), not by a Mosaic wrapper; a wrapper is the fallback only if the
harness later removes those knobs.
- `ASSUMPTION:` The central register (Postgres `fleet` schema + gateway API + forge as dispatcher) is
the Phase 45 control plane, begun after Phase 2 observability is proven. It is a dedicated
**W-FLEET** sub-workstream entry, not a separate mission. The dispatcher is `@mosaicstack/forge`
(reused, not a new daemon); the only new fleet-owned code is the thin **`forge-exec` TaskExecutor
adapter** (suggested package `packages/forge-exec`, `ForgeTask``agent-send.sh`), tracked as a
Gitea issue and built post-PoC.
---
> **Release procedure (drift re-capture, 2026-06-22):** `mosaic update` only propagates new fleet
> commands when the **CLI version is bumped** — without a version bump, fleet command changes never
> reach installed hosts. The release/version-bump procedure (bump → publish → `mosaic update`
> [→ `--relaunch`]) must be documented so fleet changes actually land. (Also feeds the budgeting
> workstream.)
>
> **Tracked separately (not in scope for this doc PR):** **#622** PRD/mission/task projected+actual
> spend template standard · **#623** anonymized spend telemetry → mosaicstack.dev (product) ·
> **#625** `tenant_id` roster-schema field (multi-tenant; invariant #1 home) · **#628** `forge-exec`
> TaskExecutor adapter (post-PoC). This PR records **doctrine only** — no implementation.

View File

@@ -1,7 +1,7 @@
# Local Fleet Canary
The local fleet canary runs a small tmux-backed Mosaic agent fleet on an
isolated tmux socket. The default socket is `mosaic-factory`; the commands do
isolated tmux socket. The default socket is `mosaic-fleet`; the commands do
not use or stop the default tmux server.
## Files
@@ -67,7 +67,7 @@ mosaic agent tail canary-pi -n 80
These commands read the roster and target the configured tmux socket. The
generated systemd agent services use `start-agent-session.sh`; message delivery
uses the tmux send tools with `-L mosaic-factory`.
uses the tmux send tools with `-L mosaic-fleet`.
`mosaic agent send` is operator-origin traffic unless a caller explicitly says
otherwise. The CLI always passes a deterministic source label to
@@ -82,7 +82,7 @@ impersonating a known handoff lane. The lower-level inter-agent wrapper
Use these checks before expanding the roster:
```bash
tmux -L mosaic-factory ls
tmux -L mosaic-fleet ls
tmux ls
mosaic fleet verify
systemctl --user status mosaic-tmux-holder.service
@@ -90,7 +90,7 @@ systemctl --user status mosaic-tmux-holder.service
Expected results:
- `tmux -L mosaic-factory ls` shows `_holder` and roster agent sessions.
- `tmux -L mosaic-fleet ls` shows `_holder` and roster agent sessions.
- `tmux ls` shows only the default tmux server sessions and is not changed by
fleet start/stop operations.
- `mosaic fleet verify` checks exact session targets on the isolated socket.
@@ -108,7 +108,7 @@ Run this checklist before cutting or dogfooding a fleet release:
repeated `start` against the named socket; verify the default tmux server is
unchanged.
- Liveness verification: run `mosaic fleet verify` and confirm roster sessions
with `tmux -L mosaic-factory ls` or exact `has-session` checks.
with `tmux -L mosaic-fleet ls` or exact `has-session` checks.
- Package dry-run: run `npm pack --dry-run --json` from `packages/mosaic` and
confirm `framework/fleet`, `framework/systemd/user`,
`framework/tools/fleet`, and `framework/tools/tmux` assets are included.
@@ -140,5 +140,5 @@ This rollback leaves the default tmux server untouched. If a canary session is
still present after service stop, remove only the isolated socket server:
```bash
tmux -L mosaic-factory kill-server
tmux -L mosaic-fleet kill-server
```

View File

@@ -17,7 +17,7 @@ Implement enough product surface to use the fleet locally:
- roster schema and examples
- local canary docs and rollback instructions
- tests for CLI behavior where practical
- canary verification on named tmux socket `mosaic-factory`
- canary verification on named tmux socket `mosaic-fleet`
## Non-goals
@@ -30,7 +30,7 @@ Implement enough product surface to use the fleet locally:
- CLI can initialize a minimal roster outside product defaults.
- CLI can install user systemd units and fleet helper scripts to a configurable Mosaic home.
- CLI can start/stop/status/verify a canary fleet using `mosaic-factory`.
- CLI can start/stop/status/verify a canary fleet using `mosaic-fleet`.
- `mosaic agent send` uses existing named-socket/exact-target tmux tooling.
- `mosaic agent reset` targets only the named agent session on the named socket.
- Verification proves default tmux sessions remain untouched.

View File

@@ -0,0 +1,32 @@
# #631 — re-seed must preserve user fleet data (CRITICAL data-loss)
- **Issue:** #631 · **Branch:** `fix/631-reseed-preserves-fleet-data`
## Root cause
`mosaic update` auto-runs `install.sh` keep-mode sync (#610). install.sh's rsync `--delete` (keep mode)
honored PRESERVE_PATHS, but `fleet/` wasn't listed → the sync WIPED `~/.config/mosaic/fleet/roster.yaml`
(+ run/, agents/). Any user running `mosaic update` lost their roster. (overwrite mode wipes by design;
the live loss was keep mode.)
## Fix (PRIMARY)
- install.sh PRESERVE_PATHS += `fleet/*.yaml`, `fleet/agents`, `fleet/run` — the framework still SEEDS
fleet/examples + fleet/roles + fleet/roster.schema.json (synced), but user files survive.
- Made the cp-fallback (no-rsync) GLOB-AWARE so `fleet/*.yaml` preserves every user roster there too;
fixed the restore to re-glob per-pattern (so only the user file is restored, not the whole fleet/ dir).
- file-adapter.ts (TS installer): mirrored the preserve list for parity. (TS syncDirectory is copy-only,
never --delete, so it never had the bug — belt-and-suspenders + parity.)
## Fix (SECONDARY)
- `refreshActiveFleetUnits()` (update-checker.ts): the re-seed updates ~/.config/mosaic/systemd/user but
systemd runs ~/.config/systemd/user, so unit fixes (#627) didn't take effect. After the re-seed,
`mosaic update` now copies the fresh mosaic-\*.service → the active dir + daemon-reload (best-effort,
only when a fleet is already installed). Wired into the cli.ts update flow.
## Verification
- bash F6 fixture (6 checks: roster/custom-yaml/agents/run survive + examples refreshed + schema seeded);
20/20 migration matrix green. TS file-adapter test (roster/run/agents survive keep sync). 2 unit tests
for refreshActiveFleetUnits. tsc/eslint/prettier/sanitize clean.

View File

@@ -0,0 +1,31 @@
# Fleet onboarding-injection — comms cheat-sheet + peer roster (#620)
- **Issue:** #620 · **Branch:** `feat/fleet-comms-onboarding` (off main). Root cause of Mos's failed first send.
## What
Inject a `# Fleet Comms` block into each spawned fleet agent's system prompt (via composeContract — the
runtime-agnostic path every `mosaic yolo <runtime>` agent hits), so it boots knowing how to reach peers.
- `src/fleet/comms-onboarding.ts` (standalone, no fleet.ts coupling):
- `parseRosterAgents` (name/class/host/ssh, lenient), `renderPeerReach` (same-host `-s` vs cross-host
`-H <ssh> -s`), `buildFleetCommsBlock` (self [host:session] identity + agent-send path + peer table +
FLIP-to-reply + `agent send --verify`=ACCEPTED), `readFleetCommsBlock` (reads roster.yaml; '' if not a member).
- `composeContract` appends it only when MOSAIC_AGENT_NAME is set + the agent is in the roster.
- `roster.schema.json`: optional per-agent `host` + `ssh` (cross-host addresses; manual = pre-federation
stopgap, federation/W1 auto-discovers later).
## Acceptance criteria (Mos) — all covered
1. own [host:session] + agent-send path + peer roster ✓
2. cross-host correctness: local→`-s` (no -H); remote→`-H <ssh> -s` ✓ (concrete coder0-0@dragon-lin)
3. FLIP-the-preamble reply rule ✓
4. `agent send --verify` = ACCEPTED ✓
5. no `-L` (default socket); matches live tooling ✓
## Verification
- 10 onboarding unit tests (parse, render local/remote/fallback/equal-host, build, situational read) +
2 composeContract situational tests (injects for fleet agent w/ correct cross-host addr; no-op when
MOSAIC_AGENT_NAME unset). tsc/eslint/prettier/sanitize clean.
- Post-merge validation: Mos spawns a real w-jarvis agent → first-try reach to coder0-0@dragon-lin + a local peer.

View File

@@ -0,0 +1,26 @@
# Fleet enhancer role + two-agent floor (#614)
- **Issue:** #614 · **Branch:** `feat/fleet-enhancer-floor` (stacked on #612 `feat/fleet-polish-bundle`)
- **Doctrine:** `docs/fleet/north-star.md` (PR #613) — every fleet = orchestrator + enhancer minimum.
## Changes
- **Presets** (general, coding, research, hybrid): add `enhancer` (claude, `class: enhancer`,
`persistent_persona: true`) as a core always-on agent alongside the orchestrator. minimal/local-canary
unchanged.
- **fleet.ts**: `countEnhancers` helper; init guarantee extended — non-minimal profiles must yield
exactly 1 orchestrator AND >=1 enhancer (hard-fail otherwise); `removeAgentFromRoster` refuses to drop
the sole enhancer (symmetric with the sole-orchestrator guard) so the floor holds at runtime, not just init.
- **Role doc**: `framework/fleet/roles/enhancer.md` — the enhancer mandate (monitor → analyze → plan →
upgrade tools/skills/harness WITH orchestrator → file Mosaic Stack bug reports) + boundaries (does NOT
code or review).
## Verification
- 155 fleet tests green (new: countEnhancers; remove-sole-enhancer guard; remove-allows-when-another;
init two-agent-floor; every-non-minimal-preset-has-enhancer; updated preset rosters). tsc/eslint/
prettier/sanitize clean. TDD on the init guarantee + remove protection.
## Stacking
Built on #612's init-R5 code. PR shows #612 + enhancer until #612 merges; then rebase onto main → clean.

View File

@@ -31,7 +31,7 @@ with a second agent on `dragon-lin`.
## Environment facts (verified 2026-06-20)
- Fleet is live on `W-jarvis` (uid 1000, `jarvis`, `Linger=yes`) on tmux socket
`mosaic-factory`: `_holder`, `canary-pi`, `dogfood-coder`, `dogfood-orchestrator`,
`mosaic-fleet`: `_holder`, `canary-pi`, `dogfood-coder`, `dogfood-orchestrator`,
`dogfood-reviewer`. All panes run `~/.config/mosaic/fleet/dogfood-agent.py` (stub),
including `canary-pi` (roster says runtime=pi → **drift**).
- Holder + `mosaic-agent@*` units are `active (exited)` but `UnitFileState=disabled`
@@ -56,7 +56,7 @@ with a second agent on `dragon-lin`.
with dragon-lin coder, commit docs, begin Phase-2 delivery (heartbeat + `fleet ps`).
- 2026-06-20 (session 2): Built Phase-2 CLI via worker (commit ab47831): `fleet ps`,
`agent watch`, `agent send --verify`, 62 tests. LIVE-verified `fleet ps` on
mosaic-factory — correctly flagged canary-pi DRIFT + BOOT-ENABLE, tenant_id+host in JSON.
mosaic-fleet — correctly flagged canary-pi DRIFT + BOOT-ENABLE, tenant_id+host in JSON.
Heartbeat responder added to dogfood-agent.py (FLEET-OBS-002) — `fleet ps` HB now
`healthy` for all 4 agents.
- Coordination: dual-engine-reviewed (Claude+Codex) and merged framework PRs #572

View File

@@ -0,0 +1,20 @@
# Fleet-polish bundle — boot-survival symmetry (#611)
- **Issue:** #611 · **Branch:** `feat/fleet-polish-bundle` · From the Lead's Codex symmetry-gap finding.
## Three fixes
1. **disable-on-remove (BUG, TDD).** `fleet remove` stopped + deleted roster/env/heartbeat but never
`systemctl --user disable mosaic-agent@NAME.service` → a removed-but-enabled unit could resurrect on
reboot pointing at deleted config. Fix: `buildSystemdDisableCommand` + disable in `remove`
(best-effort, gated on !--keep-files).
2. **add-enable.** `fleet add` now enables the new agent's unit for boot-survival (best-effort,
independent of --start) — symmetry with disable-on-remove.
3. **init-R5 guarantee.** `fleet init --write` now FAILS HARD when a non-minimal profile doesn't yield
exactly one orchestrator (was a soft warning). `minimal` (sanctioned no-orchestrator) still allowed.
## Verification
- 4 new tests (disable builder; remove-invokes-disable; add-invokes-enable; init general → exactly 1
orchestrator) + 147 existing fleet tests green (151 total). tsc/eslint/prettier clean.
- TDD on the disable bug per contract.

View File

@@ -0,0 +1,28 @@
# Fleet stand-up fixes — model_hint→--model + socket-default trap (#626)
- **Issue:** #626 · **Branch:** `feat/fleet-standup-fixes` (off main). PoC-blocking, before doctrine doc.
## FIX 1 — model_hint consumed
- generateAgentEnv emits `MOSAIC_AGENT_MODEL=<modelHint>` (bare empty when unset).
- start-agent-session.sh default command → `mosaic yolo $RUNTIME ${MOSAIC_AGENT_MODEL:+--model $MOSAIC_AGENT_MODEL}`.
→ pi workers launch with `--model openai-codex/gpt-5.5:high`.
## FIX 2 — socket default trap (absent ⇒ literal default socket, no -L everywhere)
- THE TRAP (3 sites): parseRosterText fallback was DEFAULT_SOCKET_NAME; systemd unit had
`Environment=MOSAIC_TMUX_SOCKET=mosaic-fleet` + `ExecStop ${…:-mosaic-fleet}`; start-agent-session
defaulted `:-mosaic-fleet`. All fixed → absent socket = '' = default tmux socket (no -L).
- `socketArgs(name)` helper → `name ? ['-L', name] : []`; replaced all ~15 -L render sites in fleet.ts.
- shellEnvValue('') now emits a **bare** `VAR=` (not `''`) — unambiguous empty in systemd EnvironmentFile
(a quoted '' could become a literal socket named "''").
- start-agent-session.sh: `_tmux` wrapper passes -L only when socket set; mosaic-agent@.service: dropped the
socket default + conditional ExecStop. So spawn == observe == onboarding cheat-sheet.
- CONTAINMENT: all 6 shipped presets set socket_name: mosaic-fleet explicitly → unaffected; only
socket-less rosters (the PoC) get default-socket behavior. DEFAULT_SOCKET_NAME exported for explicit use.
## Verification
- 158 fleet + 201 fleet-adjacent tests green; new: socketArgs none/named, model_hint→env, explicit-socket
renders -L, socket-less env bare. tsc/eslint/prettier/sanitize clean. Shell bash -n + end-to-end sim
(socket-less→no -L, model→--model).

View File

@@ -0,0 +1,19 @@
# north-star doctrine consolidation (#620-adjacent doc PR)
- **Branch:** `feat/north-star-doctrine` (off main). Source: Mos's consolidated handoff + 2 drafts (budgeting/200k/delegation + control-plane). ONE conflict-free PR per the merge-map.
## Applied (merge-map, in order)
1. Stack table: +2 rows (Central register, Budget/spend governance) after Control plane + PoC-socket-hygiene note.
2. `## Budget & token governance` after Invariants (even-spread pacing [Jason override], hard-cap ladder, multi-sub auto-routing, historical learning, #558 CLI UX) + TTY OPS INVARIANT note.
3. `## Control plane & central register` after Observation model (Postgres fleet schema, gateway-API access, dispatcher = forge pipeline engine + forge-exec adapter [NOT a daemon], register backs forge, board = forge BOD).
4. Phased roadmap Phase 4/5 annotated (fleet schema migration + forge-exec; central register live).
5. Decisions of record (2026-06-22): doctrine §1(c) bullets (200k cap, worker bound #8, delegation, budget, spend mandate, unified identity Fleet, role-based session naming) + control-plane 6c `### Control plane & central register` subgroup.
6. Future enhancements: Matrix-future-transport (#10, F4 IS Matrix) + tmux security hardening (§5).
7. Assumptions: doctrine §1(d) (3) + control-plane 6e (1) + release-procedure note + tracked-separately note.
## Conflict checklist: all ✓
1 Decisions-2026-06-22; order Invariants→Budget→Observation→Control plane→Roadmap; 2 stack rows; even-spread (no opportunistic/HOLD); control-plane UNHELD; forge-exec = tracked #628 post-PoC; §7 drift re-captures all present (#8/#10/#558/TTY/release).
## Out of scope (cited in doc + PR): #622 (spend template std), #623 (telemetry product), #625 (tenant_id schema), #628 (forge-exec adapter). Doctrine only — no implementation.

View File

@@ -8,7 +8,7 @@ package, normally at:
~/.config/mosaic/fleet/roster.yaml
```
The default tmux socket is `mosaic-factory` so fleet commands do not touch the
The default tmux socket is `mosaic-fleet` so fleet commands do not touch the
default tmux server.
## Examples

View File

@@ -1,7 +1,7 @@
version: 1
transport: tmux
tmux:
socket_name: mosaic-factory
socket_name: mosaic-fleet
holder_session: _holder
defaults:
working_directory: ~
@@ -15,6 +15,10 @@ agents:
runtime: claude
class: orchestrator
persistent_persona: true
- name: enhancer
runtime: claude
class: enhancer
persistent_persona: true
- name: coder0
runtime: pi
class: implementer

View File

@@ -1,7 +1,7 @@
version: 1
transport: tmux
tmux:
socket_name: mosaic-factory
socket_name: mosaic-fleet
holder_session: _holder
defaults:
working_directory: ~
@@ -15,6 +15,10 @@ agents:
runtime: claude
class: orchestrator
persistent_persona: true
- name: enhancer
runtime: claude
class: enhancer
persistent_persona: true
- name: generalist
runtime: pi
class: worker

View File

@@ -1,7 +1,7 @@
version: 1
transport: tmux
tmux:
socket_name: mosaic-factory
socket_name: mosaic-fleet
holder_session: _holder
defaults:
working_directory: ~
@@ -15,6 +15,10 @@ agents:
runtime: claude
class: orchestrator
persistent_persona: true
- name: enhancer
runtime: claude
class: enhancer
persistent_persona: true
- name: coder0
runtime: pi
class: implementer

View File

@@ -1,7 +1,7 @@
version: 1
transport: tmux
tmux:
socket_name: mosaic-factory
socket_name: mosaic-fleet
holder_session: _holder
defaults:
working_directory: ~/src

View File

@@ -1,7 +1,7 @@
version: 1
transport: tmux
tmux:
socket_name: mosaic-factory
socket_name: mosaic-fleet
holder_session: _holder
defaults:
working_directory: ~/src

View File

@@ -1,7 +1,7 @@
version: 1
transport: tmux
tmux:
socket_name: mosaic-factory
socket_name: mosaic-fleet
holder_session: _holder
defaults:
working_directory: ~
@@ -15,6 +15,10 @@ agents:
runtime: claude
class: orchestrator
persistent_persona: true
- name: enhancer
runtime: claude
class: enhancer
persistent_persona: true
- name: researcher0
runtime: pi
class: researcher

View File

@@ -0,0 +1,41 @@
# Enhancer — fleet role definition
The **enhancer** is one half of the fleet's two-agent floor: every fleet runs, at
minimum, an **orchestrator** and an **enhancer**. The orchestrator drives delivery;
the enhancer makes the fleet _get better at delivering_ over time.
It is a **core, always-on** agent (`class: enhancer`, `persistent_persona: true`),
not an ephemeral per-lane worker.
## Mandate
The enhancer runs the fleet's **continuous-improvement loop**:
1. **Monitor** fleet activity — agents, heartbeats, sessions, throughput, failures.
2. **Analyze** for enhancements and optimizations — friction, gaps, recurring defects,
missing or broken tools, skill/harness shortfalls.
3. **Plan** a remediation: a concrete improvement with rationale and expected effect.
4. **Upgrade fleet capability — with the orchestrator** — tool creation/repair, skills,
harness improvements. The orchestrator owns fleet composition; the enhancer advises and
implements improvements to the _means of production_, not the product.
5. **File upstream bug reports** to Mosaic Stack for real defects, so they flow back to the
framework for proper remediation rather than being patched over locally.
6. **Recommend which agents are needed** — advise the orchestrator on roles to add/remove as
the mission evolves.
## Boundaries
- **Does NOT write product/source code.**
- **Does NOT review code** (that is the code-review / security-review roles).
- **Does NOT perform delivery tasks.**
Improvement and diagnosis only. When the enhancer finds work that requires coding or review,
it files it (bug report / recommendation) and the orchestrator materializes the right worker.
## Why two, not one
The orchestrator alone optimizes for _this_ delivery; the enhancer optimizes for _every future_
delivery — self-healing the fleet's tools, skills, and harnesses, and routing real defects
upstream. Together they are the irreducible core; every other role is added on demand.
> Doctrine: `docs/fleet/north-star.md` (two-agent floor + role library).

View File

@@ -18,11 +18,11 @@
"properties": {
"socket_name": {
"type": "string",
"default": "mosaic-factory"
"default": "mosaic-fleet"
},
"socketName": {
"type": "string",
"default": "mosaic-factory"
"default": "mosaic-fleet"
},
"holder_session": {
"type": "string",
@@ -81,6 +81,18 @@
"class": {
"type": "string"
},
"host": {
"description": "Host the agent runs on (hostname or IP). Absent = the fleet host. Used by onboarding-injection to render cross-host comms addresses. Manual cross-host listing is a pre-federation stopgap; federation (W1) auto-discovers later.",
"type": "string"
},
"ssh": {
"description": "SSH target (user@host) for a cross-host peer, so onboarding renders the `agent-send.sh -H <user@host>` form. Optional; only needed for agents on a different host than the fleet.",
"type": "string"
},
"socket": {
"description": "tmux socket the agent's session runs on. Onboarding renders `-L <socket>` when set; absent = the default socket (no `-L`). Must match the LIVE socket, not blindly inherit the roster's tmux.socket_name.",
"type": "string"
},
"working_directory": {
"type": "string"
},

View File

@@ -23,7 +23,15 @@ INSTALL_MODE="${MOSAIC_INSTALL_MODE:-prompt}"
# entries (CONSTITUTION/AGENTS/STANDARDS) ARE re-applied afterward by
# reconcile_framework_files (overwrite + backup-once); the rest stay user-owned.
# User-created content in these paths survives rsync --delete.
PRESERVE_PATHS=("CONSTITUTION.md" "AGENTS.md" "SOUL.md" "USER.md" "TOOLS.md" "STANDARDS.md" "memory" "sources" "credentials")
#
# fleet/* — the framework SEEDS only fleet/examples, fleet/roles, and
# fleet/roster.schema.json (synced normally). The user's own fleet files MUST
# survive `mosaic update` (which runs this sync automatically): the active
# roster (`fleet/roster.yaml` + any other `fleet/*.yaml`), per-agent env
# (`fleet/agents/`), and heartbeat run dir (`fleet/run/`). Without these, an
# update wipes the operator's fleet. Glob entries are honored by both the rsync
# path (`--exclude`) and the glob-aware cp fallback below.
PRESERVE_PATHS=("CONSTITUTION.md" "AGENTS.md" "SOUL.md" "USER.md" "TOOLS.md" "STANDARDS.md" "memory" "sources" "credentials" "fleet/*.yaml" "fleet/agents" "fleet/run")
# Framework-owned contract files: re-copied from defaults/ on every upgrade (the
# user must not edit them; a divergent copy is backed up once before overwrite).
@@ -179,15 +187,23 @@ sync_framework() {
return
fi
# Fallback: cp-based sync
# Fallback: cp-based sync. Glob-aware so entries like "fleet/*.yaml" preserve
# every matching user file (parity with the rsync --exclude path above).
local preserve_tmp=""
if [[ "$INSTALL_MODE" == "keep" ]]; then
preserve_tmp="$(mktemp -d "${TMPDIR:-/tmp}/mosaic-preserve-XXXXXX")"
local match rel
for path in "${PRESERVE_PATHS[@]}"; do
if [[ -e "$TARGET_DIR/$path" ]]; then
mkdir -p "$preserve_tmp/$(dirname "$path")"
cp -R "$TARGET_DIR/$path" "$preserve_tmp/$path"
fi
# Unquoted $path lets the glob expand against TARGET_DIR; nullglob makes a
# non-matching pattern vanish instead of staying literal.
shopt -s nullglob
for match in "$TARGET_DIR/"$path; do
[[ -e "$match" ]] || continue
rel="${match#"$TARGET_DIR/"}"
mkdir -p "$preserve_tmp/$(dirname "$rel")"
cp -R "$match" "$preserve_tmp/$rel"
done
shopt -u nullglob
done
fi
@@ -196,12 +212,19 @@ sync_framework() {
rm -rf "$TARGET_DIR/.git"
if [[ -n "$preserve_tmp" ]]; then
# Restore by re-globbing the SAME patterns against preserve_tmp, so each
# preserved item is restored at its own relative path (e.g. only
# fleet/roster.yaml is replaced — the freshly-synced fleet/examples stays).
for path in "${PRESERVE_PATHS[@]}"; do
if [[ -e "$preserve_tmp/$path" ]]; then
rm -rf "$TARGET_DIR/$path"
mkdir -p "$TARGET_DIR/$(dirname "$path")"
cp -R "$preserve_tmp/$path" "$TARGET_DIR/$path"
fi
shopt -s nullglob
for match in "$preserve_tmp/"$path; do
[[ -e "$match" ]] || continue
rel="${match#"$preserve_tmp/"}"
rm -rf "$TARGET_DIR/$rel"
mkdir -p "$TARGET_DIR/$(dirname "$rel")"
cp -R "$match" "$TARGET_DIR/$rel"
done
shopt -u nullglob
done
rm -rf "$preserve_tmp"
fi

View File

@@ -33,7 +33,7 @@ Per-agent overrides live outside the package in:
Example:
```dotenv
MOSAIC_TMUX_SOCKET=mosaic-factory
MOSAIC_TMUX_SOCKET=mosaic-fleet
MOSAIC_AGENT_RUNTIME=claude
MOSAIC_AGENT_WORKDIR=$HOME/src/your-project
# Optional escape hatch for PoC/canary agents:
@@ -50,8 +50,8 @@ chmod +x ~/.config/mosaic/tools/fleet/start-agent-session.sh
systemctl --user daemon-reload
systemctl --user start mosaic-tmux-holder.service
systemctl --user start mosaic-agent@canary.service
tmux -L mosaic-factory ls
tmux -L mosaic-fleet ls
```
Do not use `tmux kill-server` without `-L mosaic-factory`; this pattern is meant
Do not use `tmux kill-server` without `-L mosaic-fleet`; this pattern is meant
to avoid disturbing the user's default tmux server.

View File

@@ -8,13 +8,15 @@ PartOf=mosaic-tmux-holder.service
[Service]
Type=oneshot
RemainAfterExit=yes
Environment=MOSAIC_TMUX_SOCKET=mosaic-factory
# No default MOSAIC_TMUX_SOCKET: an absent roster socket means the literal
# default tmux socket (no -L). The per-agent .env sets it when the roster names
# one; otherwise it stays unset and start-agent-session.sh uses the default socket.
Environment=MOSAIC_AGENT_NAME=%i
Environment=MOSAIC_AGENT_RUNTIME=pi
Environment=MOSAIC_AGENT_WORKDIR=%h
EnvironmentFile=-%h/.config/mosaic/fleet/agents/%i.env
ExecStart=/bin/bash %h/.config/mosaic/tools/fleet/start-agent-session.sh %i
ExecStop=-/bin/bash -lc 'tmux -L "${MOSAIC_TMUX_SOCKET:-mosaic-factory}" kill-session -t "=%i"'
ExecStop=-/bin/bash -lc 'if [ -n "${MOSAIC_TMUX_SOCKET:-}" ]; then tmux -L "$MOSAIC_TMUX_SOCKET" kill-session -t "=%i"; else tmux kill-session -t "=%i"; fi'
[Install]
WantedBy=default.target

View File

@@ -6,7 +6,7 @@ After=default.target
[Service]
Type=oneshot
RemainAfterExit=yes
Environment=MOSAIC_TMUX_SOCKET=mosaic-factory
Environment=MOSAIC_TMUX_SOCKET=mosaic-fleet
Environment=MOSAIC_TMUX_HOLDER=_holder
ExecStart=/bin/bash -lc 'tmux -L "$MOSAIC_TMUX_SOCKET" has-session -t "=${MOSAIC_TMUX_HOLDER}:0.0" 2>/dev/null || tmux -L "$MOSAIC_TMUX_SOCKET" new-session -d -s "$MOSAIC_TMUX_HOLDER" "while true; do sleep 3600; done"'
ExecStop=-/bin/bash -lc 'tmux -L "$MOSAIC_TMUX_SOCKET" kill-server'

View File

@@ -2,8 +2,12 @@
set -euo pipefail
AGENT_NAME=${1:-${MOSAIC_AGENT_NAME:-}}
MOSAIC_TMUX_SOCKET=${MOSAIC_TMUX_SOCKET:-mosaic-factory}
# Absent socket ⇒ the LITERAL default tmux socket (no -L). The roster's
# socket_name is honored when set; absent never silently becomes mosaic-fleet
# (spawn stays consistent with the onboarding cheat-sheet + fleet ps observe).
MOSAIC_TMUX_SOCKET=${MOSAIC_TMUX_SOCKET:-}
MOSAIC_AGENT_RUNTIME=${MOSAIC_AGENT_RUNTIME:-pi}
MOSAIC_AGENT_MODEL=${MOSAIC_AGENT_MODEL:-}
MOSAIC_AGENT_WORKDIR=${MOSAIC_AGENT_WORKDIR:-$HOME}
MOSAIC_AGENT_COMMAND=${MOSAIC_AGENT_COMMAND:-}
MOSAIC_HEARTBEAT_RUN_DIR=${MOSAIC_HEARTBEAT_RUN_DIR:-${MOSAIC_HOME:-$HOME/.config/mosaic}/fleet/run}
@@ -19,13 +23,25 @@ if ! command -v tmux >/dev/null 2>&1; then
exit 69
fi
if tmux -L "$MOSAIC_TMUX_SOCKET" has-session -t "=${AGENT_NAME}:0.0" 2>/dev/null; then
echo "Mosaic agent session already running: $AGENT_NAME on socket $MOSAIC_TMUX_SOCKET"
# tmux wrapper: pass -L only when a socket is configured. An absent/empty socket
# means the default tmux socket (no -L), keeping spawn == observe == cheat-sheet.
_tmux() {
if [ -n "$MOSAIC_TMUX_SOCKET" ]; then
tmux -L "$MOSAIC_TMUX_SOCKET" "$@"
else
tmux "$@"
fi
}
if _tmux has-session -t "=${AGENT_NAME}:0.0" 2>/dev/null; then
echo "Mosaic agent session already running: $AGENT_NAME on socket ${MOSAIC_TMUX_SOCKET:-(default)}"
exit 0
fi
if [ -z "$MOSAIC_AGENT_COMMAND" ]; then
MOSAIC_AGENT_COMMAND="mosaic yolo $MOSAIC_AGENT_RUNTIME"
# Map the roster's per-agent model_hint to `--model` so workers launch on the
# configured model (e.g. pi on openai-codex/gpt-5.5:high). Omitted when unset.
MOSAIC_AGENT_COMMAND="mosaic yolo $MOSAIC_AGENT_RUNTIME${MOSAIC_AGENT_MODEL:+ --model $MOSAIC_AGENT_MODEL}"
fi
# ── Derive a runtime-bin PATH prefix ─────────────────────────────────────────
@@ -107,13 +123,13 @@ fi
mkdir -p "$MOSAIC_AGENT_WORKDIR"
# ── Launch the tmux session (no exec — we continue to wire the heartbeat) ────
tmux -L "$MOSAIC_TMUX_SOCKET" new-session -d -s "$AGENT_NAME" -c "$MOSAIC_AGENT_WORKDIR" \
_tmux new-session -d -s "$AGENT_NAME" -c "$MOSAIC_AGENT_WORKDIR" \
bash -c "$PANE_SHELL_SNIPPET"
# ── Resolve the pane PID (retry briefly to let the session initialise) ────────
PANE_PID=""
for _retry in 1 2 3 4 5; do
PANE_PID=$(tmux -L "$MOSAIC_TMUX_SOCKET" list-panes \
PANE_PID=$(_tmux list-panes \
-t "=${AGENT_NAME}:0.0" -F '#{pane_pid}' 2>/dev/null || true)
[ -n "$PANE_PID" ] && break
sleep 0.2

View File

@@ -61,7 +61,25 @@ MOSAIC_HOME="$T5" MOSAIC_INSTALL_MODE=bogus MOSAIC_SYNC_ONLY=1 bash "$INSTALL" >
chk "F5 failure: invalid mode rejected (nonzero exit)" "[ $rc -ne 0 ]"
chk "F5 failure: SOUL + credentials intact" "grep -q orig '$T5/SOUL.md' && grep -q keepme '$T5/credentials/c.json'"
rm -rf "$T1" "$T2" "$T3" "$T4" "$T5"
# F6 — keep-mode re-seed (the `mosaic update` path) MUST NOT wipe user fleet data.
# Regression for the roster-loss bug: fleet/ was not in PRESERVE_PATHS.
T6=$(mktemp -d); mkdir -p "$T6/fleet/examples" "$T6/fleet/run" "$T6/fleet/agents"
printf '# persona\n' > "$T6/SOUL.md" # makes it a recognized existing install (→ keep mode)
printf 'version: 1\nagents:\n - name: coder0\n' > "$T6/fleet/roster.yaml"
printf 'version: 1\nagents:\n - name: custom\n' > "$T6/fleet/my-fleet.yaml"
printf 'ts=x\n' > "$T6/fleet/run/coder0.hb"
printf 'MOSAIC_AGENT_NAME=coder0\n' > "$T6/fleet/agents/coder0.env"
printf '# stale preset\n' > "$T6/fleet/examples/general.yaml"
echo 3 > "$T6/.framework-version"
run "$T6" keep
chk "F6 reseed: user roster.yaml SURVIVES keep-mode sync" "grep -q coder0 '$T6/fleet/roster.yaml'"
chk "F6 reseed: other user fleet/*.yaml survives (glob)" "[ -f '$T6/fleet/my-fleet.yaml' ]"
chk "F6 reseed: per-agent env (fleet/agents) survives" "[ -f '$T6/fleet/agents/coder0.env' ]"
chk "F6 reseed: heartbeat run dir (fleet/run) survives" "[ -f '$T6/fleet/run/coder0.hb' ]"
chk "F6 reseed: framework examples ARE refreshed (not preserved stale)" "grep -q orchestrator '$T6/fleet/examples/general.yaml'"
chk "F6 reseed: framework roster.schema.json seeded" "[ -f '$T6/fleet/roster.schema.json' ]"
rm -rf "$T1" "$T2" "$T3" "$T4" "$T5" "$T6"
echo
echo "RESULT: $pass passed, $fail failed"
[ "$fail" -eq 0 ]

View File

@@ -2,12 +2,20 @@
when:
- event: [push, pull_request, manual]
# Dependencies are installed ONCE in the `install` step and every downstream
# step depends on it, reusing the populated node_modules from the shared
# workspace volume. Do NOT re-run `npm ci` per step — that pays the full cold
# install (network fetch + native rebuilds) N times and is the dominant cost
# in a pipeline.
#
# For best results, replace `&node_image` with a pre-baked CI base image that
# ships your toolchain (python3/make/g++ for native modules) and a warm npm
# cache, then keep `--prefer-offline` so installs resolve from the cache. See
# the Mosaic Stack repo's Dockerfile.ci + .woodpecker/ci-image.yml for the
# baked-image pattern.
variables:
- &node_image 'node:20-alpine'
- &gitleaks_image 'ghcr.io/gitleaks/gitleaks:v8.24.0'
- &install_deps |
corepack enable
npm ci --ignore-scripts
steps:
# Secret scanning (runs in parallel with install, no deps)
@@ -17,15 +25,18 @@ steps:
- gitleaks git --redact --verbose --log-opts="HEAD~1..HEAD"
depends_on: []
# Single cached install. Every other step depends on this and reuses the
# node_modules it produces in the shared workspace.
install:
image: *node_image
commands:
- *install_deps
- corepack enable
- npm ci --ignore-scripts --prefer-offline
depends_on: []
security-audit:
image: *node_image
commands:
- *install_deps
- npm audit --audit-level=high
depends_on:
- install
@@ -35,7 +46,6 @@ steps:
environment:
SKIP_ENV_VALIDATION: 'true'
commands:
- *install_deps
- npm run lint
depends_on:
- install
@@ -45,7 +55,6 @@ steps:
environment:
SKIP_ENV_VALIDATION: 'true'
commands:
- *install_deps
- npm run type-check
depends_on:
- install
@@ -55,7 +64,6 @@ steps:
environment:
SKIP_ENV_VALIDATION: 'true'
commands:
- *install_deps
- npm run test -- --coverage --coverageThreshold='{"global":{"branches":80,"functions":80,"lines":80,"statements":80}}'
depends_on:
- install
@@ -66,7 +74,6 @@ steps:
SKIP_ENV_VALIDATION: 'true'
NODE_ENV: 'production'
commands:
- *install_deps
- npm run build
depends_on:
- lint

View File

@@ -35,7 +35,7 @@ delivers reliably to local OR remote panes.
agent-send.sh -s <dst_session> -m "message"
# Local target on a Mosaic fleet socket
agent-send.sh -L mosaic-factory -s '=coder0' -m "message"
agent-send.sh -L mosaic-fleet -s '=coder0' -m "message"
# Remote target (over ssh)
agent-send.sh -H user@host -s <dst_session> -m "message"
@@ -58,9 +58,9 @@ commands do not fall back to tmux's prefix matching behavior.
Durable Mosaic fleets should use a dedicated tmux socket, for example:
```bash
tmux -L mosaic-factory ls
agent-send.sh -L mosaic-factory -s '=coder0' -m "status?"
send-message.sh -L mosaic-factory -t '=coder0' -m "raw pane message"
tmux -L mosaic-fleet ls
agent-send.sh -L mosaic-fleet -s '=coder0' -m "status?"
send-message.sh -L mosaic-fleet -t '=coder0' -m "raw pane message"
```
This keeps fleet operations away from the user's default tmux server. It is the

View File

@@ -1,6 +1,6 @@
{
"name": "@mosaicstack/mosaic",
"version": "0.0.39",
"version": "0.0.40",
"repository": {
"type": "git",
"url": "https://git.mosaicstack.dev/mosaicstack/stack.git",

View File

@@ -27,6 +27,7 @@ import {
formatAllPackagesTable,
getInstallAllCommand,
runFrameworkReseed,
refreshActiveFleetUnits,
readRosterAgentNames,
buildRelaunchCommands,
FRAMEWORK_RESEED_PACKAGE,
@@ -466,6 +467,12 @@ program
const reseed = runFrameworkReseed();
if (reseed.ok) {
console.log('✔ Framework re-seeded.');
// Propagate shipped systemd unit fixes to the ACTIVE units (re-seed only
// touches ~/.config/mosaic/systemd/user; systemd runs ~/.config/systemd/user).
const units = refreshActiveFleetUnits();
if (units.refreshed.length > 0) {
console.log(`✔ Refreshed ${units.refreshed.length} active systemd unit(s).`);
}
const agents = readRosterAgentNames();
if (agents.length > 0) {
if (opts.relaunch) {

View File

@@ -56,6 +56,55 @@ describe('composeContract — overlay composer', () => {
rmSync(cwdDir, { recursive: true, force: true });
});
it('injects the fleet comms cheat-sheet for a spawned fleet agent (situational)', () => {
// A spawned agent has MOSAIC_AGENT_NAME set + is a member of the roster.
mkdirSync(join(fixture.home, 'fleet'), { recursive: true });
writeFileSync(
join(fixture.home, 'fleet', 'roster.yaml'),
[
'version: 1',
'transport: tmux',
'agents:',
' - name: orchestrator',
' runtime: claude',
' class: orchestrator',
' - name: enhancer',
' runtime: claude',
' class: enhancer',
' - name: coder0-0',
' runtime: claude',
' class: implementer',
' host: 10.1.10.37',
' ssh: jwoltje@10.1.10.37',
'',
].join('\n'),
);
const prev = process.env['MOSAIC_AGENT_NAME'];
try {
process.env['MOSAIC_AGENT_NAME'] = 'enhancer';
const out = composeContract('claude', fixture.home);
expect(out).toContain('# Fleet Comms');
expect(out).toMatch(/`\[[^\]]+:enhancer\]`/); // own [host:session] identity (host machine-dependent)
// local peer → no -H; cross-host peer → -H ssh
expect(out).toContain('-s orchestrator -m "…"');
expect(out).toContain('-H jwoltje@10.1.10.37 -s coder0-0 -m "…"');
expect(out).not.toContain('-H jwoltje@10.1.10.37 -s orchestrator'); // local stays local
} finally {
if (prev === undefined) delete process.env['MOSAIC_AGENT_NAME'];
else process.env['MOSAIC_AGENT_NAME'] = prev;
}
});
it('does NOT inject fleet comms when MOSAIC_AGENT_NAME is unset (non-fleet launch)', () => {
const prev = process.env['MOSAIC_AGENT_NAME'];
try {
delete process.env['MOSAIC_AGENT_NAME'];
expect(composeContract('claude', fixture.home)).not.toContain('# Fleet Comms');
} finally {
if (prev !== undefined) process.env['MOSAIC_AGENT_NAME'] = prev;
}
});
it('includes the per-tier anchors and the selected harness runtime', () => {
const out = composeContract('claude', fixture.home);
expect(out).toContain('GATE-1: the non-negotiable law.'); // L0

View File

@@ -14,11 +14,14 @@ import {
buildEnableLingerCommand,
buildFleetServiceCommand,
buildSystemdEnableCommand,
buildSystemdDisableCommand,
socketArgs,
buildSystemdShowCommand,
buildTmuxListPanesCommand,
buildTmuxListSessionsCommand,
classifySendResult,
countOrchestrators,
countEnhancers,
detectDrift,
enableFleetUnits,
FLEET_PROFILES,
@@ -112,7 +115,7 @@ describe('fleet roster parsing', () => {
}
});
it('defaults local canary rosters to the isolated mosaic-factory socket', async () => {
it('defaults a socket-less roster to the literal default tmux socket (empty, no -L)', async () => {
cleanup = await tempDir();
const rosterPath = join(cleanup, 'roster.yaml');
await writeFile(
@@ -129,12 +132,55 @@ describe('fleet roster parsing', () => {
const roster = await loadFleetRoster(rosterPath);
expect(roster.tmux.socketName).toBe('mosaic-factory');
expect(roster.tmux.socketName).toBe(''); // absent ⇒ default socket (no -L), not mosaic-fleet
expect(roster.tmux.holderSession).toBe('_holder');
expect(roster.agents).toHaveLength(1);
expect(getRosterAgent(roster, 'canary-pi').runtime).toBe('pi');
});
it('socketArgs: named socket → -L <name>; empty → no -L (default socket)', () => {
expect(socketArgs('mosaic-fleet')).toEqual(['-L', 'mosaic-fleet']);
expect(socketArgs('')).toEqual([]);
});
it('honors an explicit socket_name (renders -L) — containment for shipped presets', async () => {
cleanup = await tempDir();
const rosterPath = join(cleanup, 'roster.yaml');
await writeFile(
rosterPath,
[
'version: 1',
'transport: tmux',
'tmux:',
' socket_name: mosaic-fleet',
'agents:',
' - name: canary-pi',
' runtime: pi',
].join('\n'),
);
const roster = await loadFleetRoster(rosterPath);
expect(roster.tmux.socketName).toBe('mosaic-fleet');
expect(buildTmuxListSessionsCommand(roster.tmux.socketName)).toContain('-L');
});
it('maps a per-agent model_hint into MOSAIC_AGENT_MODEL', async () => {
cleanup = await tempDir();
const rosterPath = join(cleanup, 'roster.json');
await writeFile(
rosterPath,
JSON.stringify({
version: 1,
transport: 'tmux',
agents: [{ name: 'coder0', runtime: 'pi', model_hint: 'openai-codex/gpt-5.5:high' }],
}),
);
const roster = await loadFleetRoster(rosterPath);
const env = generateAgentEnv(roster, getRosterAgent(roster, 'coder0'));
expect(env).toContain('MOSAIC_AGENT_MODEL=openai-codex/gpt-5.5:high');
// socket-less roster ⇒ a bare empty socket (no quotes), so spawn uses no -L
expect(env).toContain('MOSAIC_TMUX_SOCKET=\n');
});
it('generates deterministic per-agent EnvironmentFile content', async () => {
cleanup = await tempDir();
const rosterPath = join(cleanup, 'roster.json');
@@ -143,7 +189,7 @@ describe('fleet roster parsing', () => {
JSON.stringify({
version: 1,
transport: 'tmux',
tmux: { socket_name: 'mosaic-factory' },
tmux: { socket_name: 'mosaic-fleet' },
defaults: { working_directory: '/srv/mosaic' },
agents: [{ name: 'coder0', runtime: 'codex', class: 'implementer' }],
}),
@@ -154,8 +200,9 @@ describe('fleet roster parsing', () => {
[
'MOSAIC_AGENT_NAME=coder0',
'MOSAIC_AGENT_RUNTIME=codex',
'MOSAIC_AGENT_MODEL=',
'MOSAIC_AGENT_WORKDIR=/srv/mosaic',
'MOSAIC_TMUX_SOCKET=mosaic-factory',
'MOSAIC_TMUX_SOCKET=mosaic-fleet',
'',
].join('\n'),
);
@@ -166,7 +213,7 @@ describe('fleet roster parsing', () => {
'MOSAIC_AGENT_NAME=coder0',
'MOSAIC_AGENT_RUNTIME=codex',
'MOSAIC_AGENT_WORKDIR=/srv/new',
'MOSAIC_TMUX_SOCKET=mosaic-factory',
'MOSAIC_TMUX_SOCKET=mosaic-fleet',
'',
].join('\n');
const existing = [
@@ -184,7 +231,7 @@ describe('fleet roster parsing', () => {
'MOSAIC_AGENT_NAME=coder0',
'MOSAIC_AGENT_RUNTIME=codex',
'MOSAIC_AGENT_WORKDIR=/srv/new',
'MOSAIC_TMUX_SOCKET=mosaic-factory',
'MOSAIC_TMUX_SOCKET=mosaic-fleet',
'MOSAIC_AGENT_COMMAND=/home/jarvis/.config/mosaic/fleet/canary.sh',
'# site note',
'',
@@ -277,7 +324,7 @@ describe('fleet roster parsing', () => {
const localCanary = await loadFleetRoster(join(examplesDir, 'local-canary.yaml'));
expect(minimal.agents.map((agent) => agent.name)).toEqual(['canary-pi']);
expect(localCanary.tmux.socketName).toBe('mosaic-factory');
expect(localCanary.tmux.socketName).toBe('mosaic-fleet');
expect(localCanary.agents.map((agent) => agent.name)).toEqual(['lead', 'coder0', 'reviewer0']);
expect(localCanaryText).not.toMatch(/usc|ultron|secrev/i);
});
@@ -302,11 +349,11 @@ describe('fleet command construction', () => {
it('builds socket-scoped agent send commands', () => {
const paths = resolveFleetPaths('/home/test/.config/mosaic');
expect(
buildAgentSendCommand(paths, 'coder0', 'hello', 'mosaic-factory', 'operator:mosaic-cli'),
buildAgentSendCommand(paths, 'coder0', 'hello', 'mosaic-fleet', 'operator:mosaic-cli'),
).toEqual([
'/home/test/.config/mosaic/tools/tmux/agent-send.sh',
'-L',
'mosaic-factory',
'mosaic-fleet',
'-S',
'operator:mosaic-cli',
'-s',
@@ -353,8 +400,9 @@ describe('fleet command construction', () => {
try {
await program.parseAsync(['node', 'mosaic', 'fleet', 'verify']);
expect(calls).toEqual([
['tmux', '-L', 'mosaic-factory', 'has-session', '-t', '=_holder:0.0'],
['tmux', '-L', 'mosaic-factory', 'has-session', '-t', '=coder0:0.0'],
// socket-less roster ⇒ default tmux socket (no -L)
['tmux', 'has-session', '-t', '=_holder:0.0'],
['tmux', 'has-session', '-t', '=coder0:0.0'],
]);
} finally {
await rm(home, { recursive: true, force: true });
@@ -635,7 +683,7 @@ describe('fleet command construction', () => {
try {
await program.parseAsync(['node', 'mosaic', 'agent', 'status', 'json-agent']);
expect(calls).toEqual([
['tmux', '-L', 'mosaic-factory', 'has-session', '-t', '=json-agent:0.0'],
['tmux', 'has-session', '-t', '=json-agent:0.0'], // socket-less ⇒ no -L
]);
} finally {
await rm(home, { recursive: true, force: true });
@@ -675,8 +723,6 @@ describe('fleet command construction', () => {
expect(calls).toEqual([
[
join(home, 'tools', 'tmux', 'agent-send.sh'),
'-L',
'mosaic-factory',
'-S',
getDefaultOperatorSourceLabel(),
'-s',
@@ -725,8 +771,6 @@ describe('fleet command construction', () => {
expect(calls).toEqual([
[
join(home, 'tools', 'tmux', 'agent-send.sh'),
'-L',
'mosaic-factory',
'-S',
'lead:manual',
'-s',
@@ -797,10 +841,10 @@ describe('fleet ps — command construction', () => {
});
it('builds exact tmux list-panes command with the correct format string', () => {
expect(buildTmuxListPanesCommand('canary-pi', 'mosaic-factory')).toEqual([
expect(buildTmuxListPanesCommand('canary-pi', 'mosaic-fleet')).toEqual([
'tmux',
'-L',
'mosaic-factory',
'mosaic-fleet',
'list-panes',
'-t',
'=canary-pi:0.0',
@@ -809,9 +853,11 @@ describe('fleet ps — command construction', () => {
]);
});
it('uses DEFAULT_SOCKET_NAME when socket is omitted from list-panes', () => {
it('uses the default tmux socket (no -L) when socket is omitted from list-panes', () => {
const cmd = buildTmuxListPanesCommand('canary-pi');
expect(cmd[2]).toBe('mosaic-factory');
expect(cmd).not.toContain('-L'); // omitted socket ⇒ default socket
expect(cmd[0]).toBe('tmux');
expect(cmd[1]).toBe('list-panes');
});
it('derives heartbeat path under ~/.config/mosaic/fleet/run/', () => {
@@ -983,6 +1029,129 @@ describe('fleet ps — drift detection', () => {
});
});
describe('fleet-polish bundle — boot-survival symmetry', () => {
async function rosterHome(agents: string): Promise<string> {
const home = await tempDir();
await mkdir(join(home, 'fleet'), { recursive: true });
await writeFile(join(home, 'fleet', 'roster.yaml'), agents);
return home;
}
it('buildSystemdDisableCommand returns the systemctl --user disable array', () => {
expect(buildSystemdDisableCommand('mosaic-agent@coder0.service')).toEqual([
'systemctl',
'--user',
'disable',
'mosaic-agent@coder0.service',
]);
});
it('fleet remove DISABLES the unit so a removed agent cannot resurrect on boot', async () => {
const home = await rosterHome(
[
'version: 1',
'transport: tmux',
'agents:',
' - name: orchestrator',
' runtime: pi',
' class: orchestrator',
' - name: coder0',
' runtime: codex',
' class: worker',
].join('\n') + '\n',
);
const calls: string[][] = [];
const runner: CommandRunner = async (command, args) => {
calls.push([command, ...args]);
return { stdout: '', stderr: '', exitCode: 0 };
};
const program = new Command();
program.exitOverride();
registerFleetCommand(program, { runner, mosaicHome: home });
try {
await program.parseAsync(['node', 'mosaic', 'fleet', 'remove', 'coder0']);
expect(calls).toContainEqual([
'systemctl',
'--user',
'disable',
'mosaic-agent@coder0.service',
]);
// stop must still happen too
expect(calls).toContainEqual(['systemctl', '--user', 'stop', 'mosaic-agent@coder0.service']);
} finally {
await rm(home, { recursive: true, force: true });
}
});
it('fleet add ENABLES the new agent unit for boot-survival', async () => {
const home = await rosterHome(
['version: 1', 'transport: tmux', 'agents:', ' - name: coder0', ' runtime: codex'].join(
'\n',
) + '\n',
);
const calls: string[][] = [];
const runner: CommandRunner = async (command, args) => {
calls.push([command, ...args]);
return { stdout: '', stderr: '', exitCode: 0 };
};
const program = new Command();
program.exitOverride();
registerFleetCommand(program, { runner, mosaicHome: home });
try {
await program.parseAsync([
'node',
'mosaic',
'fleet',
'add',
'coder1',
'--runtime',
'codex',
'--class',
'worker',
'--no-start',
]);
expect(calls).toContainEqual([
'systemctl',
'--user',
'enable',
'mosaic-agent@coder1.service',
]);
} finally {
await rm(home, { recursive: true, force: true });
}
});
it('fleet init --write enforces the two-agent floor (1 orchestrator + >=1 enhancer)', async () => {
// The general profile must yield exactly one orchestrator AND at least one
// enhancer; the guarantee is enforced (not just warned). Happy path writes cleanly.
const home = await tempDir();
const program = new Command();
program.exitOverride();
registerFleetCommand(program, {
runner: async () => ({ stdout: '', stderr: '', exitCode: 0 }),
mosaicHome: home,
});
try {
await program.parseAsync([
'node',
'mosaic',
'fleet',
'init',
'--profile',
'general',
'--write',
]);
const written = await readFile(join(home, 'fleet', 'roster.yaml'), 'utf8');
const orchestrators = (written.match(/class:\s*orchestrator/g) ?? []).length;
const enhancers = (written.match(/class:\s*enhancer/g) ?? []).length;
expect(orchestrators).toBe(1);
expect(enhancers).toBeGreaterThanOrEqual(1);
} finally {
await rm(home, { recursive: true, force: true });
}
});
});
describe('fleet install — auto-enable units for boot-survival', () => {
it('buildSystemdEnableCommand and buildEnableLingerCommand return correct command arrays', () => {
expect(buildSystemdEnableCommand('mosaic-tmux-holder.service')).toEqual([
@@ -998,7 +1167,7 @@ describe('fleet install — auto-enable units for boot-survival', () => {
const minimalRoster: FleetRoster = {
version: 1,
transport: 'tmux',
tmux: { socketName: 'mosaic-factory', holderSession: '_holder' },
tmux: { socketName: 'mosaic-fleet', holderSession: '_holder' },
defaults: { workingDirectory: '~/src' },
runtimes: { codex: { resetCommand: '/clear' } },
agents: [{ name: 'coder0', runtime: 'codex', className: 'worker' }],
@@ -1020,7 +1189,7 @@ describe('fleet install — auto-enable units for boot-survival', () => {
const minimalRoster: FleetRoster = {
version: 1,
transport: 'tmux',
tmux: { socketName: 'mosaic-factory', holderSession: '_holder' },
tmux: { socketName: 'mosaic-fleet', holderSession: '_holder' },
defaults: { workingDirectory: '~/src' },
runtimes: { codex: { resetCommand: '/clear' } },
agents: [{ name: 'coder0', runtime: 'codex', className: 'worker' }],
@@ -1047,7 +1216,7 @@ describe('fleet install — auto-enable units for boot-survival', () => {
const minimalRoster: FleetRoster = {
version: 1,
transport: 'tmux',
tmux: { socketName: 'mosaic-factory', holderSession: '_holder' },
tmux: { socketName: 'mosaic-fleet', holderSession: '_holder' },
defaults: { workingDirectory: '~/src' },
runtimes: { codex: { resetCommand: '/clear' } },
agents: [{ name: 'coder0', runtime: 'codex', className: 'worker' }],
@@ -1206,8 +1375,9 @@ describe('fleet ps — command sequences issued', () => {
await program.parseAsync(['node', 'mosaic', 'fleet', 'ps']);
expect(calls).toEqual([
buildSystemdShowCommand('coder0'),
buildTmuxListPanesCommand('coder0', 'mosaic-factory'),
buildTmuxListSessionsCommand('mosaic-factory'),
// socket-less roster ⇒ default socket (no -L)
buildTmuxListPanesCommand('coder0'),
buildTmuxListSessionsCommand(),
]);
} finally {
console.log = origLog;
@@ -1218,19 +1388,20 @@ describe('fleet ps — command sequences issued', () => {
describe('buildTmuxListSessionsCommand', () => {
it('builds exact list-sessions command with session_name format', () => {
expect(buildTmuxListSessionsCommand('mosaic-factory')).toEqual([
expect(buildTmuxListSessionsCommand('mosaic-fleet')).toEqual([
'tmux',
'-L',
'mosaic-factory',
'mosaic-fleet',
'list-sessions',
'-F',
'#{session_name}',
]);
});
it('uses DEFAULT_SOCKET_NAME when socket is omitted', () => {
it('uses the default tmux socket (no -L) when socket is omitted', () => {
const cmd = buildTmuxListSessionsCommand();
expect(cmd[2]).toBe('mosaic-factory');
expect(cmd).not.toContain('-L');
expect(cmd).toEqual(['tmux', 'list-sessions', '-F', '#{session_name}']);
});
});
@@ -1471,11 +1642,11 @@ describe('fleet ps — unmanaged socket sessions', () => {
describe('agent watch', () => {
it('builds exact grouped-viewer creation command', () => {
expect(
buildAgentWatchCreateViewerCommand('canary-pi', 'canary-pi-watch-123', 'mosaic-factory'),
buildAgentWatchCreateViewerCommand('canary-pi', 'canary-pi-watch-123', 'mosaic-fleet'),
).toEqual([
'tmux',
'-L',
'mosaic-factory',
'mosaic-fleet',
'new-session',
'-d',
'-t',
@@ -1486,10 +1657,10 @@ describe('agent watch', () => {
});
it('builds exact viewer attach command (read-only)', () => {
expect(buildAgentWatchAttachCommand('canary-pi-watch-123', 'mosaic-factory')).toEqual([
expect(buildAgentWatchAttachCommand('canary-pi-watch-123', 'mosaic-fleet')).toEqual([
'tmux',
'-L',
'mosaic-factory',
'mosaic-fleet',
'attach',
'-r',
'-t',
@@ -1498,19 +1669,20 @@ describe('agent watch', () => {
});
it('builds exact viewer kill command', () => {
expect(buildAgentWatchKillViewerCommand('canary-pi-watch-123', 'mosaic-factory')).toEqual([
expect(buildAgentWatchKillViewerCommand('canary-pi-watch-123', 'mosaic-fleet')).toEqual([
'tmux',
'-L',
'mosaic-factory',
'mosaic-fleet',
'kill-session',
'-t',
'canary-pi-watch-123',
]);
});
it('buildAgentWatchCommand (deprecated) still uses DEFAULT_SOCKET_NAME when socket is omitted', () => {
it('buildAgentWatchCommand (deprecated) uses the default tmux socket (no -L) when socket is omitted', () => {
const cmd = buildAgentWatchCommand('canary-pi');
expect(cmd[2]).toBe('mosaic-factory');
expect(cmd).not.toContain('-L'); // omitted socket ⇒ default socket
expect(cmd[0]).toBe('tmux');
expect(cmd).toContain('-r');
});
@@ -1597,10 +1769,10 @@ describe('agent watch', () => {
describe('agent send --verify', () => {
it('builds exact verify capture-pane command', () => {
expect(buildAgentVerifyAcceptedCommand('canary-pi', 'mosaic-factory', 5)).toEqual([
expect(buildAgentVerifyAcceptedCommand('canary-pi', 'mosaic-fleet', 5)).toEqual([
'tmux',
'-L',
'mosaic-factory',
'mosaic-fleet',
'capture-pane',
'-t',
'=canary-pi:0.0',
@@ -1704,9 +1876,10 @@ describe('agent send --verify', () => {
// 3 calls: BEFORE-capture, send, AFTER-capture (pane changed on first poll → accepted immediately)
expect(calls).toHaveLength(3);
expect(calls[0]).toEqual(buildAgentVerifyAcceptedCommand('coder0', 'mosaic-factory', 5));
// socket-less roster ⇒ default socket (no -L)
expect(calls[0]).toEqual(buildAgentVerifyAcceptedCommand('coder0', '', 5));
expect(calls[1]![0]).toContain('agent-send.sh');
expect(calls[2]).toEqual(buildAgentVerifyAcceptedCommand('coder0', 'mosaic-factory', 5));
expect(calls[2]).toEqual(buildAgentVerifyAcceptedCommand('coder0', '', 5));
} finally {
await rm(home, { recursive: true, force: true });
}
@@ -2188,47 +2361,63 @@ describe('fleet preset rosters', () => {
},
);
it('general preset: orchestrator + one generalist worker', async () => {
it('general preset: orchestrator + enhancer + one generalist worker', async () => {
const roster = await loadFleetRoster(join(examplesDir, 'general.yaml'));
expect(roster.agents.map((a) => a.name)).toEqual(['orchestrator', 'generalist']);
expect(roster.agents.map((a) => a.name)).toEqual(['orchestrator', 'enhancer', 'generalist']);
expect(roster.agents.find((a) => a.name === 'orchestrator')?.runtime).toBe('claude');
expect(roster.agents.find((a) => a.name === 'enhancer')?.className).toBe('enhancer');
expect(roster.agents.find((a) => a.name === 'generalist')?.runtime).toBe('pi');
});
it('coding preset: orchestrator + coder0 + coder1 + reviewer', async () => {
it('coding preset: orchestrator + enhancer + coder0 + coder1 + reviewer', async () => {
const roster = await loadFleetRoster(join(examplesDir, 'coding.yaml'));
expect(roster.agents.map((a) => a.name)).toEqual([
'orchestrator',
'enhancer',
'coder0',
'coder1',
'reviewer',
]);
});
it('research preset: orchestrator + researcher0 + researcher1 + analyst', async () => {
it('research preset: orchestrator + enhancer + researcher0 + researcher1 + analyst', async () => {
const roster = await loadFleetRoster(join(examplesDir, 'research.yaml'));
expect(roster.agents.map((a) => a.name)).toEqual([
'orchestrator',
'enhancer',
'researcher0',
'researcher1',
'analyst',
]);
});
it('hybrid preset: orchestrator + coder0 + researcher0 + reviewer', async () => {
it('hybrid preset: orchestrator + enhancer + coder0 + researcher0 + reviewer', async () => {
const roster = await loadFleetRoster(join(examplesDir, 'hybrid.yaml'));
expect(roster.agents.map((a) => a.name)).toEqual([
'orchestrator',
'enhancer',
'coder0',
'researcher0',
'reviewer',
]);
});
it('every non-minimal preset carries an enhancer (two-agent floor)', async () => {
for (const preset of ['general', 'coding', 'research', 'hybrid'] as FleetProfile[]) {
const roster = await loadFleetRoster(join(examplesDir, `${preset}.yaml`));
expect(countOrchestrators(roster)).toBe(1);
expect(countEnhancers(roster)).toBeGreaterThanOrEqual(1);
expect(roster.agents.find((a) => a.className === 'enhancer')?.runtime).toBe('claude');
}
});
it('worker agents in new presets use pi runtime with model_hint openai-codex/gpt-5.5:high', async () => {
for (const preset of ['general', 'coding', 'research', 'hybrid'] as FleetProfile[]) {
const roster = await loadFleetRoster(join(examplesDir, `${preset}.yaml`));
const workers = roster.agents.filter((a) => a.name !== 'orchestrator');
// Core agents (orchestrator + enhancer) run claude; only ephemeral workers are pi.
const workers = roster.agents.filter(
(a) => a.className !== 'orchestrator' && a.className !== 'enhancer',
);
for (const worker of workers) {
expect(worker.runtime).toBe('pi');
expect(worker.modelHint).toBe('openai-codex/gpt-5.5:high');
@@ -2295,7 +2484,7 @@ describe('fleet add/remove — pure helpers', () => {
const baseRoster: FleetRoster = {
version: 1,
transport: 'tmux',
tmux: { socketName: 'mosaic-factory', holderSession: '_holder' },
tmux: { socketName: 'mosaic-fleet', holderSession: '_holder' },
defaults: { workingDirectory: '~/src' },
runtimes: { codex: { resetCommand: '/clear' } },
agents: [
@@ -2370,6 +2559,43 @@ describe('fleet add/remove — pure helpers', () => {
expect(updated.agents.map((a) => a.name)).toEqual(['orchestrator2', 'coder0']);
});
it('countEnhancers counts enhancer-class agents (two-agent floor)', () => {
const roster: FleetRoster = {
...baseRoster,
agents: [
{ name: 'orchestrator', runtime: 'claude', className: 'orchestrator' },
{ name: 'enhancer', runtime: 'claude', className: 'enhancer' },
{ name: 'coder0', runtime: 'codex', className: 'worker' },
],
};
expect(countEnhancers(roster)).toBe(1);
expect(countEnhancers(baseRoster)).toBe(0);
});
it('removeAgentFromRoster throws when removing the sole enhancer (two-agent floor)', () => {
const roster: FleetRoster = {
...baseRoster,
agents: [
{ name: 'orchestrator', runtime: 'claude', className: 'orchestrator' },
{ name: 'enhancer', runtime: 'claude', className: 'enhancer' },
],
};
expect(() => removeAgentFromRoster(roster, 'enhancer')).toThrow('sole enhancer');
});
it('removeAgentFromRoster allows removing an enhancer when another remains', () => {
const roster: FleetRoster = {
...baseRoster,
agents: [
{ name: 'orchestrator', runtime: 'claude', className: 'orchestrator' },
{ name: 'enhancer', runtime: 'claude', className: 'enhancer' },
{ name: 'enhancer2', runtime: 'claude', className: 'enhancer' },
],
};
const updated = removeAgentFromRoster(roster, 'enhancer');
expect(updated.agents.map((a) => a.name)).toEqual(['orchestrator', 'enhancer2']);
});
it('serializeRosterToYaml produces YAML that round-trips through loadFleetRoster', async () => {
const yaml = serializeRosterToYaml(baseRoster);
expect(typeof yaml).toBe('string');
@@ -2384,7 +2610,7 @@ describe('fleet add/remove — pure helpers', () => {
await writeFile(rosterPath, yaml);
const loaded = await loadFleetRoster(rosterPath);
expect(loaded.agents.map((a) => a.name)).toEqual(['orchestrator', 'coder0']);
expect(loaded.tmux.socketName).toBe('mosaic-factory');
expect(loaded.tmux.socketName).toBe('mosaic-fleet');
expect(loaded.agents[0]!.className).toBe('orchestrator');
} finally {
await rm(dir, { recursive: true, force: true });

View File

@@ -117,10 +117,26 @@ export interface FleetPaths {
type FleetServiceAction = 'start' | 'stop' | 'restart' | 'status';
const DEFAULT_SOCKET_NAME = 'mosaic-factory';
/**
* The named tmux socket the canonical fleet uses. Kept as a public constant for
* rosters/callers that explicitly want isolation; it is NO LONGER the silent
* fallback for a socket-less roster (that now resolves to the default socket).
*/
export const DEFAULT_SOCKET_NAME = 'mosaic-fleet';
const DEFAULT_HOLDER_SESSION = '_holder';
const DEFAULT_WORKING_DIRECTORY = '~/src';
/**
* tmux `-L` args for a socket name. An empty/absent socket ⇒ the LITERAL default
* tmux socket (no `-L`), so spawn, observe (`fleet ps`/watch), and the onboarding
* cheat-sheet all agree. A named socket ⇒ `-L <name>`. `DEFAULT_SOCKET_NAME`
* remains a constant for callers that explicitly want mosaic-fleet; it is no
* longer the silent fallback for a socket-less roster.
*/
export function socketArgs(socketName: string): string[] {
return socketName ? ['-L', socketName] : [];
}
/**
* Default poll interval (ms) between capture-pane checks in `send --verify`.
* Kept short enough to react quickly while not hammering tmux on busy hosts.
@@ -185,6 +201,10 @@ export function generateAgentEnv(roster: FleetRoster, agent: FleetAgent): string
return [
`MOSAIC_AGENT_NAME=${shellEnvValue(agent.name)}`,
`MOSAIC_AGENT_RUNTIME=${shellEnvValue(agent.runtime)}`,
// Per-agent model hint → start-agent-session.sh appends `--model <hint>` to
// the `mosaic yolo` launch so workers run on the roster's model (e.g. pi on
// openai-codex/gpt-5.5:high). Empty when the agent declares no model_hint.
`MOSAIC_AGENT_MODEL=${shellEnvValue(agent.modelHint ?? '')}`,
`MOSAIC_AGENT_WORKDIR=${shellEnvValue(expandHome(workingDirectory))}`,
`MOSAIC_TMUX_SOCKET=${shellEnvValue(roster.tmux.socketName)}`,
'',
@@ -227,6 +247,15 @@ export function buildSystemdEnableCommand(unit: string): string[] {
return ['systemctl', '--user', 'enable', unit];
}
/**
* Returns the systemctl --user disable command for a given unit.
* Used by `fleet remove` so a removed agent's enabled unit cannot resurrect on
* boot pointing at deleted config (boot-survival symmetry with enable-on-add).
*/
export function buildSystemdDisableCommand(unit: string): string[] {
return ['systemctl', '--user', 'disable', unit];
}
/**
* Returns the loginctl enable-linger command for a given user.
* Linger allows user systemd services to survive logout.
@@ -310,13 +339,12 @@ export function buildAgentSendCommand(
paths: FleetPaths,
agentName: string,
message: string,
socketName = DEFAULT_SOCKET_NAME,
socketName = '',
sourceLabel = getDefaultOperatorSourceLabel(),
): string[] {
return [
join(paths.tmuxToolsDir, 'agent-send.sh'),
'-L',
socketName,
...socketArgs(socketName),
'-S',
sourceLabel,
'-s',
@@ -335,12 +363,11 @@ export function buildAgentResetCommand(
paths: FleetPaths,
agentName: string,
resetCommand: string,
socketName = DEFAULT_SOCKET_NAME,
socketName = '',
): string[] {
return [
join(paths.tmuxToolsDir, 'send-message.sh'),
'-L',
socketName,
...socketArgs(socketName),
'-t',
`=${agentName}`,
'-m',
@@ -348,15 +375,10 @@ export function buildAgentResetCommand(
];
}
export function buildAgentTailCommand(
agentName: string,
lines: number,
socketName = DEFAULT_SOCKET_NAME,
): string[] {
export function buildAgentTailCommand(agentName: string, lines: number, socketName = ''): string[] {
return [
'tmux',
'-L',
socketName,
...socketArgs(socketName),
'capture-pane',
'-t',
`=${agentName}:0.0`,
@@ -440,14 +462,10 @@ export function buildSystemdShowCommand(agentName: string): string[] {
* Returns the tmux list-panes command for an agent pane.
* Format: `#{pane_pid} #{pane_current_command} #{pane_dead} #{pane_activity}`
*/
export function buildTmuxListPanesCommand(
agentName: string,
socketName = DEFAULT_SOCKET_NAME,
): string[] {
export function buildTmuxListPanesCommand(agentName: string, socketName = ''): string[] {
return [
'tmux',
'-L',
socketName,
...socketArgs(socketName),
'list-panes',
'-t',
`=${agentName}:0.0`,
@@ -461,8 +479,8 @@ export function buildTmuxListPanesCommand(
* Format: `tmux -L <socket> list-sessions -F '#{session_name}'`
* Used to discover ad-hoc sessions that are not in the roster.
*/
export function buildTmuxListSessionsCommand(socketName = DEFAULT_SOCKET_NAME): string[] {
return ['tmux', '-L', socketName, 'list-sessions', '-F', '#{session_name}'];
export function buildTmuxListSessionsCommand(socketName = ''): string[] {
return ['tmux', ...socketArgs(socketName), 'list-sessions', '-F', '#{session_name}'];
}
/**
@@ -644,12 +662,11 @@ export function getDefaultTenantAndHost(): { tenant_id: string; host: string } {
export function buildAgentWatchCreateViewerCommand(
agentName: string,
viewerSessionName: string,
socketName = DEFAULT_SOCKET_NAME,
socketName = '',
): string[] {
return [
'tmux',
'-L',
socketName,
...socketArgs(socketName),
'new-session',
'-d',
'-t',
@@ -663,11 +680,8 @@ export function buildAgentWatchCreateViewerCommand(
* Builds the interactive attach command for a viewer session (read-only).
* Must be run via interactiveRunner (stdio: 'inherit').
*/
export function buildAgentWatchAttachCommand(
viewerSessionName: string,
socketName = DEFAULT_SOCKET_NAME,
): string[] {
return ['tmux', '-L', socketName, 'attach', '-r', '-t', viewerSessionName];
export function buildAgentWatchAttachCommand(viewerSessionName: string, socketName = ''): string[] {
return ['tmux', ...socketArgs(socketName), 'attach', '-r', '-t', viewerSessionName];
}
/**
@@ -676,9 +690,9 @@ export function buildAgentWatchAttachCommand(
*/
export function buildAgentWatchKillViewerCommand(
viewerSessionName: string,
socketName = DEFAULT_SOCKET_NAME,
socketName = '',
): string[] {
return ['tmux', '-L', socketName, 'kill-session', '-t', viewerSessionName];
return ['tmux', ...socketArgs(socketName), 'kill-session', '-t', viewerSessionName];
}
/**
@@ -696,11 +710,8 @@ export function buildViewerSessionName(agentName: string): string {
*
* Kept for backward compatibility only.
*/
export function buildAgentWatchCommand(
agentName: string,
socketName = DEFAULT_SOCKET_NAME,
): string[] {
return ['tmux', '-L', socketName, 'attach', '-r', '-t', `=${agentName}`];
export function buildAgentWatchCommand(agentName: string, socketName = ''): string[] {
return ['tmux', ...socketArgs(socketName), 'attach', '-r', '-t', `=${agentName}`];
}
/**
@@ -710,13 +721,12 @@ export function buildAgentWatchCommand(
*/
export function buildAgentVerifyAcceptedCommand(
agentName: string,
socketName = DEFAULT_SOCKET_NAME,
socketName = '',
lines = 5,
): string[] {
return [
'tmux',
'-L',
socketName,
...socketArgs(socketName),
'capture-pane',
'-t',
`=${agentName}:0.0`,
@@ -872,20 +882,33 @@ export function registerFleetCommand(program: Command, deps: FleetCommandDeps =
await mkdir(dirname(destination), { recursive: true });
await writeFile(destination, content);
// Validate: exactly one orchestrator required (R5) — friendly summary on success.
// Guarantee the two-agent floor: exactly one orchestrator AND at least
// one enhancer for every profile except the sanctioned no-orchestrator
// `minimal` preset. A mismatch means a corrupted/edited preset — fail hard
// rather than write a malformed fleet.
const written = await loadFleetRoster(destination);
const orchCount = countOrchestrators(written);
if (orchCount !== 1) {
process.stderr.write(
`Warning: fleet roster at ${destination} has ${orchCount} orchestrator agent(s) (expected exactly 1).\n`,
);
const enhancerCount = countEnhancers(written);
if (profile === 'minimal') {
console.log(
`Initialized ${profile} fleet: ${written.agents.length} agent(s). Next: mosaic fleet install`,
`Initialized ${profile} fleet: ${written.agents.length} agent(s) (no orchestrator). Next: mosaic fleet install`,
);
} else if (orchCount !== 1) {
throw new Error(
`Fleet init failed: the "${profile}" roster has ${orchCount} orchestrator agent(s), ` +
`expected exactly 1 (R5). The preset may be corrupted — re-install the framework.`,
);
} else if (enhancerCount < 1) {
throw new Error(
`Fleet init failed: the "${profile}" roster has no enhancer agent. Every fleet keeps an ` +
`orchestrator + enhancer minimum (two-agent floor). The preset may be corrupted — ` +
`re-install the framework.`,
);
} else {
const workerCount = written.agents.length - 1;
const workerCount = written.agents.length - 1 - enhancerCount;
console.log(
`Initialized ${profile} fleet: 1 orchestrator + ${workerCount} agent(s). Next: mosaic fleet install`,
`Initialized ${profile} fleet: 1 orchestrator + ${enhancerCount} enhancer(s) + ` +
`${workerCount} worker(s). Next: mosaic fleet install`,
);
}
});
@@ -967,8 +990,7 @@ export function registerFleetCommand(program: Command, deps: FleetCommandDeps =
const socketName = roster.tmux.socketName;
await runChecked(runner, [
'tmux',
'-L',
socketName,
...socketArgs(socketName),
'has-session',
'-t',
`=${roster.tmux.holderSession}:0.0`,
@@ -976,8 +998,7 @@ export function registerFleetCommand(program: Command, deps: FleetCommandDeps =
for (const agent of roster.agents) {
await runChecked(runner, [
'tmux',
'-L',
socketName,
...socketArgs(socketName),
'has-session',
'-t',
`=${agent.name}:0.0`,
@@ -1218,6 +1239,24 @@ export function registerFleetCommand(program: Command, deps: FleetCommandDeps =
console.log(`Added ${name} (${opts.runtime}/${opts.class}) to the fleet.`);
// Enable the unit for boot-survival (non-fatal) — symmetry with
// disable-on-remove. Independent of --start so a queued agent still
// survives a reboot once its unit exists.
try {
const enableResult = await runner(
...splitCommand(buildSystemdEnableCommand(`mosaic-agent@${name}.service`)),
);
if (enableResult.exitCode !== 0) {
process.stderr.write(
`Warning: could not enable mosaic-agent@${name}.service: ${enableResult.stderr || enableResult.stdout || 'non-zero exit'}\n`,
);
}
} catch (err) {
process.stderr.write(
`Warning: enable command failed for ${name}: ${err instanceof Error ? err.message : String(err)}\n`,
);
}
if (opts.start !== false) {
await runChecked(runner, buildFleetServiceCommand('start', name));
console.log(`Started mosaic-agent@${name}.service.`);
@@ -1254,6 +1293,26 @@ export function registerFleetCommand(program: Command, deps: FleetCommandDeps =
);
}
// Disable the unit (non-fatal) so an enabled instance cannot resurrect on
// boot pointing at the now-deleted config — boot-survival symmetry with
// enable-on-add. Skipped only when --keep-files keeps the config in place.
if (!opts.keepFiles) {
try {
const disableResult = await runner(
...splitCommand(buildSystemdDisableCommand(`mosaic-agent@${name}.service`)),
);
if (disableResult.exitCode !== 0) {
process.stderr.write(
`Warning: could not disable mosaic-agent@${name}.service: ${disableResult.stderr || disableResult.stdout || 'non-zero exit'}\n`,
);
}
} catch (err) {
process.stderr.write(
`Warning: disable command failed for ${name}: ${err instanceof Error ? err.message : String(err)}\n`,
);
}
}
// Write updated roster
await writeFile(rosterPath, serializeRosterToYaml(updatedRoster));
@@ -1310,8 +1369,8 @@ export function registerFleetAgentCommands(
getRosterAgent(roster, agent);
}
const command = agent
? ['tmux', '-L', roster.tmux.socketName, 'has-session', '-t', `=${agent}:0.0`]
: ['tmux', '-L', roster.tmux.socketName, 'ls'];
? ['tmux', ...socketArgs(roster.tmux.socketName), 'has-session', '-t', `=${agent}:0.0`]
: ['tmux', ...socketArgs(roster.tmux.socketName), 'ls'];
const result = await runner(...splitCommand(command));
if (opts.json) {
console.log(
@@ -1629,9 +1688,12 @@ function normalizeRoster(raw: RawFleetRoster): FleetRoster {
version: 1,
transport: 'tmux',
tmux: {
// Absent socket_name ⇒ '' (the literal default tmux socket, no -L) — NOT
// mosaic-fleet. Shipped presets set socket_name explicitly, so they are
// unaffected; only socket-less rosters get default-socket behavior.
socketName: stringValue(
raw.tmux?.socket_name ?? raw.tmux?.socketName,
DEFAULT_SOCKET_NAME,
'',
'Fleet roster tmux socket_name',
),
holderSession: stringValue(
@@ -1797,6 +1859,12 @@ function expandHome(path: string): string {
}
function shellEnvValue(value: string): string {
// Empty ⇒ a bare `VAR=` (unambiguous empty in a systemd EnvironmentFile and
// when shell-sourced). Quoting it as '' risks a literal two-char value (e.g.
// a tmux socket named "''"), which would defeat the default-socket behavior.
if (value === '') {
return '';
}
if (/^[A-Za-z0-9_./:=@+-]+$/.test(value)) {
return value;
}
@@ -1894,6 +1962,15 @@ export function countOrchestrators(roster: FleetRoster): number {
return roster.agents.filter((a) => a.className === 'orchestrator').length;
}
/**
* Count enhancer agents in a parsed roster. The two-agent floor (north-star)
* requires every non-minimal fleet to carry at least one enhancer alongside the
* sole orchestrator.
*/
export function countEnhancers(roster: FleetRoster): number {
return roster.agents.filter((a) => a.className === 'enhancer').length;
}
/** Valid runtime identifiers for fleet agents. */
export const VALID_FLEET_RUNTIMES: readonly string[] = [
'pi',
@@ -1936,6 +2013,15 @@ export function removeAgentFromRoster(roster: FleetRoster, name: string): FleetR
`Cannot remove agent "${name}": it is the sole orchestrator. Add another orchestrator first (R5).`,
);
}
// Two-agent floor: never drop the last enhancer (the continuous-improvement
// loop). Symmetric with the sole-orchestrator guard.
const remainingEnhancerCount = remaining.filter((a) => a.className === 'enhancer').length;
if (remainingEnhancerCount === 0 && agent.className === 'enhancer') {
throw new Error(
`Cannot remove agent "${name}": it is the sole enhancer. Every fleet keeps at least one ` +
`enhancer (two-agent floor). Add another enhancer first.`,
);
}
return {
...roster,
agents: remaining,

View File

@@ -19,6 +19,7 @@ import { createRequire } from 'node:module';
import { homedir } from 'node:os';
import { join, dirname } from 'node:path';
import type { Command } from 'commander';
import { readFleetCommsBlock } from '../fleet/comms-onboarding.js';
const MOSAIC_HOME = process.env['MOSAIC_HOME'] ?? join(homedir(), '.config', 'mosaic');
@@ -383,6 +384,12 @@ For required push/merge/issue-close/release actions, execute without routine con
// Runtime-specific contract
parts.push('\n\n# Runtime-Specific Contract\n\n' + readFileSync(runtimeFile, 'utf-8'));
// Fleet onboarding: when this is a spawned fleet agent (MOSAIC_AGENT_NAME set
// and present in the roster), inject a comms cheat-sheet + peer roster so it
// knows how to reach the orchestrator and its peers from its first turn.
const fleetComms = readFleetCommsBlock(mosaicHome, process.env['MOSAIC_AGENT_NAME']);
if (fleetComms) parts.push('\n\n' + fleetComms);
return parts.join('\n');
}

View File

@@ -153,6 +153,30 @@ describe('FileConfigAdapter.syncFramework — defaults seeding', () => {
expect(readFileSync(join(fixture.mosaicHome, 'AGENTS.md'), 'utf-8')).toBe('# AGENTS default\n');
});
it('preserves user fleet data (roster.yaml, agents/, run/) through a keep-mode sync', async () => {
// Regression for the roster-loss bug (#631): user-authored fleet files must
// survive the framework re-seed that `mosaic update` runs.
mkdirSync(join(fixture.mosaicHome, 'fleet', 'run'), { recursive: true });
mkdirSync(join(fixture.mosaicHome, 'fleet', 'agents'), { recursive: true });
writeFileSync(join(fixture.mosaicHome, 'fleet', 'roster.yaml'), 'version: 1\nMINE\n');
writeFileSync(join(fixture.mosaicHome, 'fleet', 'run', 'a.hb'), 'ts=x\n');
writeFileSync(join(fixture.mosaicHome, 'fleet', 'agents', 'a.env'), 'X=1\n');
// The framework ships fleet/examples — it should still seed/refresh.
mkdirSync(join(fixture.sourceDir, 'fleet', 'examples'), { recursive: true });
writeFileSync(join(fixture.sourceDir, 'fleet', 'examples', 'general.yaml'), '# preset\n');
const adapter = new FileConfigAdapter(fixture.mosaicHome, fixture.sourceDir);
await adapter.syncFramework('keep');
expect(readFileSync(join(fixture.mosaicHome, 'fleet', 'roster.yaml'), 'utf-8')).toBe(
'version: 1\nMINE\n',
);
expect(existsSync(join(fixture.mosaicHome, 'fleet', 'run', 'a.hb'))).toBe(true);
expect(existsSync(join(fixture.mosaicHome, 'fleet', 'agents', 'a.env'))).toBe(true);
// framework-owned fleet/examples is seeded
expect(existsSync(join(fixture.mosaicHome, 'fleet', 'examples', 'general.yaml'))).toBe(true);
});
it('is a no-op for seeding when defaults/ dir does not exist', async () => {
rmSync(fixture.defaultsDir, { recursive: true });

View File

@@ -173,6 +173,13 @@ export class FileConfigAdapter implements ConfigService {
'memory',
'sources',
'credentials',
// User-authored fleet data MUST survive `mosaic update`'s re-seed.
// The framework seeds only fleet/examples + fleet/roles +
// fleet/roster.schema.json; the operator's roster, per-agent env, and
// heartbeat run dir stay user-owned. (Mirror of install.sh PRESERVE_PATHS.)
'fleet/*.yaml',
'fleet/agents',
'fleet/run',
]
: [];

View File

@@ -0,0 +1,187 @@
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
import { mkdtempSync, mkdirSync, writeFileSync, rmSync } from 'node:fs';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import {
parseRosterAgents,
buildFleetCommsBlock,
renderPeerReach,
readFleetCommsBlock,
type CommsPeer,
} from './comms-onboarding.js';
const ROSTER = [
'version: 1',
'transport: tmux',
'agents:',
' - name: orchestrator',
' runtime: claude',
' class: orchestrator',
' - name: enhancer',
' runtime: claude',
' class: enhancer',
' - name: coder0',
' runtime: pi',
' class: implementer',
' # a manually-listed cross-host peer (pre-federation stopgap)',
' - name: coder0-0',
' runtime: claude',
' class: implementer',
' host: 10.1.10.37',
' ssh: jwoltje@10.1.10.37',
'',
].join('\n');
describe('parseRosterAgents', () => {
it('parses name + class + optional host/ssh', () => {
const peers = parseRosterAgents(ROSTER);
expect(peers.map((p) => p.name)).toEqual(['orchestrator', 'enhancer', 'coder0', 'coder0-0']);
expect(peers.find((p) => p.name === 'coder0')).toMatchObject({ className: 'implementer' });
expect(peers.find((p) => p.name === 'coder0-0')).toMatchObject({
className: 'implementer',
host: '10.1.10.37',
ssh: 'jwoltje@10.1.10.37',
});
// local agents have no host/ssh
expect(peers.find((p) => p.name === 'orchestrator')!.host).toBeUndefined();
});
it('parses an optional per-agent socket', () => {
const peers = parseRosterAgents(
['agents:', ' - name: a', ' class: worker', ' socket: mosaic-fleet'].join('\n'),
);
expect(peers[0]).toMatchObject({ name: 'a', socket: 'mosaic-fleet' });
});
it('stops at the next top-level key', () => {
const peers = parseRosterAgents(
['agents:', ' - name: a', ' class: worker', 'defaults:', ' working_directory: ~'].join(
'\n',
),
);
expect(peers.map((p) => p.name)).toEqual(['a']);
});
});
describe('renderPeerReach — same-host vs cross-host', () => {
const send = '/home/u/.config/mosaic/tools/tmux/agent-send.sh';
it('renders the short form for a same-host peer', () => {
const peer: CommsPeer = { name: 'enhancer', className: 'enhancer' };
expect(renderPeerReach(peer, 'w-jarvis', send)).toBe(`${send} -s enhancer -m "…"`);
});
it('renders the -H form for a cross-host peer using ssh', () => {
const peer: CommsPeer = {
name: 'coder0-0',
className: 'implementer',
host: '10.1.10.37',
ssh: 'jwoltje@10.1.10.37',
};
expect(renderPeerReach(peer, 'w-jarvis', send)).toBe(
`${send} -H jwoltje@10.1.10.37 -s coder0-0 -m "…"`,
);
});
it('falls back to host when a cross-host peer has no ssh', () => {
const peer: CommsPeer = { name: 'x', className: 'worker', host: '10.0.0.9' };
expect(renderPeerReach(peer, 'w-jarvis', send)).toBe(`${send} -H 10.0.0.9 -s x -m "…"`);
});
it('treats a peer whose host equals the fleet host as same-host', () => {
const peer: CommsPeer = { name: 'y', className: 'worker', host: 'w-jarvis' };
expect(renderPeerReach(peer, 'w-jarvis', send)).toBe(`${send} -s y -m "…"`);
});
it('emits NO -L for an unset/default socket', () => {
const peer: CommsPeer = { name: 'lead', className: 'orchestrator' };
expect(renderPeerReach(peer, 'w-jarvis', send)).toBe(`${send} -s lead -m "…"`);
});
it('emits -L <socket> for a named socket', () => {
const peer: CommsPeer = { name: 'coder0', className: 'implementer', socket: 'mosaic-fleet' };
expect(renderPeerReach(peer, 'w-jarvis', send)).toBe(
`${send} -L mosaic-fleet -s coder0 -m "…"`,
);
});
it('combines -L (named socket) and -H (cross-host) in order', () => {
const peer: CommsPeer = {
name: 'coder0-0',
className: 'implementer',
host: '10.1.10.37',
ssh: 'jwoltje@10.1.10.37',
socket: 'mosaic-fleet',
};
expect(renderPeerReach(peer, 'w-jarvis', send)).toBe(
`${send} -L mosaic-fleet -H jwoltje@10.1.10.37 -s coder0-0 -m "…"`,
);
});
});
describe('buildFleetCommsBlock', () => {
const send = '/h/.config/mosaic/tools/tmux/agent-send.sh';
const agents = parseRosterAgents(ROSTER);
it('excludes self, lists peers, flags the orchestrator, and emits both address forms', () => {
const block = buildFleetCommsBlock({
selfName: 'enhancer',
agents,
fleetHost: 'w-jarvis',
agentSendPath: send,
});
expect(block).toContain('# Fleet Comms');
expect(block).toContain('You are **enhancer**');
// criterion 1: agent's own [host:session] identity
expect(block).toContain('`[w-jarvis:enhancer]`');
// self excluded
expect(block).not.toMatch(/\|\s*enhancer\s*\|/);
// peers present
expect(block).toContain('| orchestrator |');
expect(block).toContain('point of contact');
// same-host peer short form
expect(block).toContain(`${send} -s coder0 -m "…"`);
// cross-host peer -H form + host annotation
expect(block).toContain(`${send} -H jwoltje@10.1.10.37 -s coder0-0 -m "…"`);
expect(block).toContain('host `10.1.10.37`');
// conventions
expect(block).toContain('FLIP the preamble');
expect(block).toContain('ACCEPTED');
});
it('returns empty when the agent has no peers', () => {
expect(
buildFleetCommsBlock({
selfName: 'solo',
agents: [{ name: 'solo', className: 'orchestrator' }],
fleetHost: 'h',
agentSendPath: send,
}),
).toBe('');
});
});
describe('readFleetCommsBlock — situational (the context a spawned agent gets)', () => {
let home: string;
beforeEach(() => {
home = mkdtempSync(join(tmpdir(), 'mosaic-comms-'));
mkdirSync(join(home, 'fleet'), { recursive: true });
writeFileSync(join(home, 'fleet', 'roster.yaml'), ROSTER);
});
afterEach(() => rmSync(home, { recursive: true, force: true }));
it('builds the cheat-sheet with correct peer addresses for a fleet member', () => {
const block = readFleetCommsBlock(home, 'orchestrator', 'w-jarvis');
expect(block).toContain('# Fleet Comms');
expect(block).toContain('| enhancer |');
expect(block).toContain(`${join(home, 'tools', 'tmux', 'agent-send.sh')} -s coder0 -m "…"`);
expect(block).toContain('-H jwoltje@10.1.10.37 -s coder0-0');
expect(block).not.toMatch(/\|\s*orchestrator\s*\|/); // self excluded
});
it('returns empty when MOSAIC_AGENT_NAME is unset, no roster, or agent not a member', () => {
expect(readFleetCommsBlock(home, undefined, 'w-jarvis')).toBe('');
expect(readFleetCommsBlock(home, 'stranger', 'w-jarvis')).toBe('');
expect(readFleetCommsBlock(mkdtempSync(join(tmpdir(), 'noroster-')), 'orchestrator')).toBe('');
});
});

View File

@@ -0,0 +1,183 @@
/**
* Fleet onboarding-injection (#620).
*
* Fleet agents are born not knowing how to reach their peers — the root cause of
* a spawned agent's failed first send. When an agent boots via `mosaic yolo
* <runtime>` (→ composeContract → system prompt), we append a comms cheat-sheet
* + peer roster so it can talk to the orchestrator and other agents immediately.
*
* Cross-host aware: a peer may carry `host`/`ssh` (a deliberate pre-federation
* stopgap — manual cross-host listing; federation/W1 auto-discovers later), so a
* w-jarvis agent is born knowing the exact `-H` command to reach a dragon-lin
* peer. Same-host peers render the short form.
*
* Standalone (no fleet.ts import) to keep launch.ts's prompt path free of the
* heavy fleet command module. The roster is parsed leniently — the cheat-sheet
* is best-effort onboarding, never a hard dependency.
*/
import { existsSync, readFileSync } from 'node:fs';
import { homedir, hostname } from 'node:os';
import { join } from 'node:path';
export interface CommsPeer {
name: string;
/** Roster `class` (orchestrator | enhancer | implementer | worker | …). */
className: string;
/** Host the peer runs on; absent ⇒ the fleet host (same host). */
host?: string;
/** SSH target (user@host) for a cross-host peer; renders the `-H` form. */
ssh?: string;
/** tmux socket the peer's session lives on; absent ⇒ default socket (no `-L`). */
socket?: string;
}
/**
* Lenient parse of a fleet `roster.yaml` for agent name/class/host/ssh. Avoids a
* dependency on the full fleet roster parser; the format is `- name:` list items
* with `class:`/`host:`/`ssh:` siblings under `agents:`.
*/
export function parseRosterAgents(yamlText: string): CommsPeer[] {
const peers: CommsPeer[] = [];
let current: CommsPeer | null = null;
let inAgents = false;
const scalar = (line: string, key: string): string | null => {
const m = line.match(new RegExp(`^\\s*${key}:\\s*["']?([^"'#]+?)["']?\\s*$`));
return m ? (m[1] as string).trim() : null;
};
for (const rawLine of yamlText.split('\n')) {
const line = rawLine.replace(/\s+$/, '');
if (/^agents:\s*$/.test(line)) {
inAgents = true;
continue;
}
if (!inAgents) continue;
// A new top-level key (no leading space) ends the agents block.
if (/^\S/.test(line)) break;
const nameMatch = line.match(/^\s*-\s*name:\s*["']?([A-Za-z0-9._-]+)["']?\s*$/);
if (nameMatch) {
if (current) peers.push(current);
current = { name: nameMatch[1] as string, className: 'worker' };
continue;
}
if (!current) continue;
const cls = scalar(line, 'class');
if (cls) current.className = cls;
const host = scalar(line, 'host');
if (host) current.host = host;
const ssh = scalar(line, 'ssh');
if (ssh) current.ssh = ssh;
const socket = scalar(line, 'socket');
if (socket) current.socket = socket;
}
if (current) peers.push(current);
return peers;
}
export interface FleetCommsOptions {
/** This agent's name (it is excluded from its own peer list). */
selfName: string;
/** All roster agents (including self; filtered out internally). */
agents: CommsPeer[];
/** Host the fleet runs on (short hostname) — the same-host baseline. */
fleetHost: string;
/** Absolute path to agent-send.sh in this install. */
agentSendPath: string;
}
/** Is this peer on a different host than the fleet baseline? */
function isRemote(peer: CommsPeer, fleetHost: string): boolean {
return peer.host !== undefined && peer.host !== fleetHost;
}
/**
* Render the exact agent-send command to reach a peer (session = agent name).
* Data-driven per peer: a named `socket` → `-L <socket>`; an unset socket → the
* default tmux socket (no `-L`). A cross-host peer adds `-H <ssh|host>`.
*/
export function renderPeerReach(peer: CommsPeer, fleetHost: string, agentSendPath: string): string {
const parts = [agentSendPath];
if (peer.socket) parts.push('-L', peer.socket); // unset ⇒ default socket, no -L
if (isRemote(peer, fleetHost)) parts.push('-H', peer.ssh ?? (peer.host as string));
parts.push('-s', peer.name, '-m', '"…"');
return parts.join(' ');
}
/**
* Build the `# Fleet Comms` onboarding block (pure markdown). Returns '' when
* the agent has no peers (a single-agent roster has no one to talk to).
*/
export function buildFleetCommsBlock(opts: FleetCommsOptions): string {
const peers = opts.agents.filter((a) => a.name !== opts.selfName);
if (peers.length === 0) return '';
const orchestrator = peers.find((p) => p.className === 'orchestrator');
const rows = peers
.map((p) => {
const where = isRemote(p, opts.fleetHost)
? `${p.className} · host \`${p.host}\``
: p.className;
const role = p.className === 'orchestrator' ? `${where} ← point of contact` : where;
return `| ${p.name} | ${role} | \`${renderPeerReach(p, opts.fleetHost, opts.agentSendPath)}\` |`;
})
.join('\n');
const orchLine = orchestrator
? `Your point of contact is **${orchestrator.name}** (the orchestrator) — route questions, ` +
`status, and decisions there.`
: `This fleet has no orchestrator in its roster; coordinate with your peers directly.`;
return `# Fleet Comms — reach your peers
You are **${opts.selfName}** in this fleet. Your comms identity is \`[${opts.fleetHost}:${opts.selfName}]\`
that is the \`<src>\` other agents see and reply to. Reach other agents (durable tmux sessions) with the
Mosaic comms tool at \`${opts.agentSendPath}\`. The **Reach** column below is the exact command per peer:
same-host peers use the short form (no \`-H\`); cross-host peers include \`-H <user@host>\`.
## Peers
| Agent | Role | Reach (session = agent name) |
| ----- | ---- | ---------------------------- |
${rows}
${orchLine}
## Conventions
- Every message carries a self-identifying preamble \`[<src_host>:<src_session> -> <dst_host>:<dst_session>]\`\`agent-send.sh\` adds it automatically.
- **To reply, FLIP the preamble:** address your reply to the sender's \`src\` (their host:session becomes your \`-s\`/\`-H\`).
- \`agent-send.sh\` (a.k.a. \`agent send --verify\`) confirms the message was **ACCEPTED** at the destination prompt — not merely injected. Prefer it for anything that matters.`;
}
/**
* Read the fleet roster from `mosaicHome` and build the comms block for
* `selfName`. Returns '' when there is no roster, the agent is not in it, or
* there are no peers — onboarding is best-effort and never throws.
*/
export function readFleetCommsBlock(
mosaicHome: string,
selfName: string | undefined,
fleetHost: string = hostname().split('.')[0] || 'localhost',
): string {
if (!selfName) return '';
const rosterPath = join(mosaicHome, 'fleet', 'roster.yaml');
if (!existsSync(rosterPath)) return '';
let text: string;
try {
text = readFileSync(rosterPath, 'utf-8');
} catch {
return '';
}
const agents = parseRosterAgents(text);
if (!agents.some((a) => a.name === selfName)) return ''; // not a member of this fleet
return buildFleetCommsBlock({
selfName,
agents,
fleetHost,
agentSendPath: join(mosaicHome, 'tools', 'tmux', 'agent-send.sh'),
});
}
/** Default mosaic home (mirrors launch.ts), for callers that don't pass one. */
export const DEFAULT_MOSAIC_HOME_FOR_COMMS = join(homedir(), '.config', 'mosaic');

View File

@@ -7,7 +7,9 @@ import {
buildRelaunchCommands,
readRosterAgentNames,
runFrameworkReseed,
refreshActiveFleetUnits,
} from './update-checker.js';
import { existsSync, readFileSync } from 'node:fs';
/**
* F3-m3 / R13: `mosaic update` re-seeds the framework + (opt-in) relaunches
@@ -83,3 +85,41 @@ describe('runFrameworkReseed', () => {
rmSync(missing, { recursive: true, force: true });
});
});
describe('refreshActiveFleetUnits', () => {
let root: string;
let mosaicHome: string;
let configHome: string;
beforeEach(() => {
root = mkdtempSync(join(tmpdir(), 'mosaic-units-'));
mosaicHome = join(root, 'mosaic');
configHome = join(root, 'config');
mkdirSync(join(mosaicHome, 'systemd', 'user'), { recursive: true });
mkdirSync(join(configHome, 'systemd', 'user'), { recursive: true });
// Freshly re-seeded units (new content).
writeFileSync(join(mosaicHome, 'systemd', 'user', 'mosaic-agent@.service'), 'NEW\n');
writeFileSync(join(mosaicHome, 'systemd', 'user', 'mosaic-tmux-holder.service'), 'NEW\n');
});
afterEach(() => rmSync(root, { recursive: true, force: true }));
it('refreshes active units when a fleet is already installed', () => {
// Active dir already carries mosaic units (stale) → fleet is installed.
writeFileSync(join(configHome, 'systemd', 'user', 'mosaic-agent@.service'), 'OLD\n');
const res = refreshActiveFleetUnits(mosaicHome, {
XDG_CONFIG_HOME: configHome,
} as NodeJS.ProcessEnv);
expect(res.refreshed).toContain('mosaic-agent@.service');
expect(
readFileSync(join(configHome, 'systemd', 'user', 'mosaic-agent@.service'), 'utf-8'),
).toBe('NEW\n');
});
it('is a no-op when no fleet is installed (active dir has no mosaic units)', () => {
const res = refreshActiveFleetUnits(mosaicHome, {
XDG_CONFIG_HOME: configHome,
} as NodeJS.ProcessEnv);
expect(res.refreshed).toEqual([]);
expect(existsSync(join(configHome, 'systemd', 'user', 'mosaic-agent@.service'))).toBe(false);
});
});

View File

@@ -14,7 +14,14 @@
*/
import { execSync } from 'node:child_process';
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
import {
existsSync,
mkdirSync,
readFileSync,
writeFileSync,
readdirSync,
copyFileSync,
} from 'node:fs';
import { homedir } from 'node:os';
import { dirname, join, resolve } from 'node:path';
import { fileURLToPath } from 'node:url';
@@ -536,6 +543,47 @@ export function readRosterAgentNames(mosaicHome = join(homedir(), '.config', 'mo
return names;
}
/**
* Refresh the ACTIVE systemd user units from the freshly re-seeded copies.
*
* The re-seed updates `~/.config/mosaic/systemd/user/*.service`, but the units
* systemd actually runs live at `~/.config/systemd/user/`. Without this copy,
* shipped unit fixes (e.g. the socket-env change) never take effect after
* `mosaic update` until `mosaic fleet install` is re-run. Best-effort + scoped:
* only refreshes when a fleet is already installed (the active dir already
* carries `mosaic-*` units), so non-fleet hosts are untouched.
*/
export function refreshActiveFleetUnits(
mosaicHome = join(homedir(), '.config', 'mosaic'),
env: NodeJS.ProcessEnv = process.env,
): { refreshed: string[]; ok: boolean; reason?: string } {
const src = join(mosaicHome, 'systemd', 'user');
const configHome = env['XDG_CONFIG_HOME'] ?? join(homedir(), '.config');
const dest = join(configHome, 'systemd', 'user');
if (!existsSync(src)) return { refreshed: [], ok: true };
// Only refresh when a fleet is already installed (active dir has mosaic units).
const fleetInstalled =
existsSync(dest) &&
readdirSync(dest).some((f) => f.startsWith('mosaic-') && f.endsWith('.service'));
if (!fleetInstalled) return { refreshed: [], ok: true };
const units = readdirSync(src).filter((f) => f.startsWith('mosaic-') && f.endsWith('.service'));
const refreshed: string[] = [];
for (const unit of units) {
try {
copyFileSync(join(src, unit), join(dest, unit));
refreshed.push(unit);
} catch {
// best-effort per unit
}
}
try {
execSync('systemctl --user daemon-reload', { stdio: 'ignore', timeout: 15_000 });
} catch {
// non-systemd host or no session bus — non-fatal
}
return { refreshed, ok: true };
}
/** Build the per-agent systemd relaunch commands (drain+relaunch via restart). */
export function buildRelaunchCommands(agentNames: string[]): string[][] {
return agentNames.map((name) => [