feat(fleet): add durable tmux fleet poc

docs: plan durable tmux fleet install
fix(launch): include Pi native skill roots in 'all' mode; dedup 'discover' force-loads (#556 )
2026-06-19 15:50:35 -05:00 · 2026-06-19 15:10:36 -05:00 · 2026-06-19 19:58:09 +00:00 · 2026-06-19 18:31:02 +00:00 · 2026-06-19 18:03:03 +00:00 · 2026-06-18 22:39:35 +00:00
62 changed files with 4215 additions and 145 deletions
--- a/apps/appservice/src/tests/server.test.ts
+++ b/apps/appservice/src/tests/server.test.ts
@@ -3,6 +3,8 @@ import { describe, expect, it, vi } from 'vitest';
 import { AppserviceDaemon } from '../server.js';
 import type { DaemonConfig, DaemonRequest } from '../server.js';

+const AGENTS_TYPE = 'org.uscllc.mosaic_as.agents';
+
 const cfg: DaemonConfig = {
  homeserverUrl: 'https://hs.example',
  domain: 'hs.example',
@@ -228,6 +230,149 @@ describe('AppserviceDaemon routing', () => {
    expect(bad.status).toBe(400);
  });

+  // A daemon whose fetch mock backs account_data with a mutable in-test object,
+  // so register/verify/revoke round-trip through the (faked) homeserver.
+  const makeAgentDaemon = () => {
+    const accountData: { value: Record<string, unknown> | null } = { value: null };
+    const fetchMock = vi.fn(async (input: URL | string, init?: RequestInit) => {
+      const url = new URL(String(input));
+      const path = url.pathname;
+      if (path.includes(`/account_data/${AGENTS_TYPE}`)) {
+        if (init?.method === 'PUT') {
+          accountData.value = JSON.parse(String(init.body)) as Record<string, unknown>;
+          return jsonResponse(200, {});
+        }
+        if (accountData.value === null) {
+          return jsonResponse(404, { errcode: 'M_NOT_FOUND', error: 'not found' });
+        }
+        return jsonResponse(200, accountData.value);
+      }
+      if (path.endsWith('/register')) return jsonResponse(200, { user_id: 'whatever' });
+      if (path.includes('/send/m.room.message/')) return jsonResponse(200, { event_id: '$sent' });
+      return jsonResponse(200, {});
+    });
+    const daemon = new AppserviceDaemon(cfg, fetchMock as unknown as typeof fetch, () => {});
+    return { daemon, fetchMock };
+  };
+
+  const registerAgent = async (
+    daemon: AppserviceDaemon,
+    body: Record<string, unknown> = { alias: 'pi0', host: 'web1' },
+  ) =>
+    daemon.handle(
+      request({
+        method: 'POST',
+        path: '/bridge/v1/agents',
+        authorizationHeader: 'Bearer bridge-secret',
+        body,
+      }),
+    );
+
+  it('host token registers an agent and returns agent_user_id + bridge_token', async () => {
+    const { daemon, fetchMock } = makeAgentDaemon();
+    const res = await registerAgent(daemon, { alias: 'pi0', host: 'web1' });
+    expect(res.status).toBe(200);
+    expect(res.body.agent_user_id).toBe('@agent-pi0-web1:hs.example');
+    expect(String(res.body.bridge_token).startsWith('magt_')).toBe(true);
+    const registerCall = fetchMock.mock.calls
+      .map((c) => new URL(String(c[0])))
+      .find((u) => u.pathname.endsWith('/register'));
+    expect(registerCall).toBeDefined();
+  });
+
+  it('register requires a HOST token (agent token and no token are 403)', async () => {
+    const { daemon } = makeAgentDaemon();
+    const minted = await registerAgent(daemon);
+    const agentToken = String(minted.body.bridge_token);
+
+    const asAgent = await daemon.handle(
+      request({
+        method: 'POST',
+        path: '/bridge/v1/agents',
+        authorizationHeader: `Bearer ${agentToken}`,
+        body: { alias: 'pi1', host: 'web2' },
+      }),
+    );
+    expect(asAgent.status).toBe(403);
+
+    const noAuth = await daemon.handle(
+      request({ method: 'POST', path: '/bridge/v1/agents', body: { alias: 'pi1', host: 'web2' } }),
+    );
+    expect(noAuth.status).toBe(403);
+  });
+
+  it('agent-scoped token may send as itself but not as another agent', async () => {
+    const { daemon } = makeAgentDaemon();
+    const minted = await registerAgent(daemon, { alias: 'pi0', host: 'web1' });
+    const agentToken = String(minted.body.bridge_token);
+
+    const self = await daemon.handle(
+      request({
+        method: 'POST',
+        path: '/bridge/v1/messages',
+        authorizationHeader: `Bearer ${agentToken}`,
+        body: { room_id: '!r:hs.example', agent: 'pi0-web1', body: 'hi' },
+      }),
+    );
+    expect(self.status).toBe(200);
+
+    const other = await daemon.handle(
+      request({
+        method: 'POST',
+        path: '/bridge/v1/messages',
+        authorizationHeader: `Bearer ${agentToken}`,
+        body: { room_id: '!r:hs.example', agent: 'pi9-web9', body: 'hi' },
+      }),
+    );
+    expect(other.status).toBe(403);
+    expect(other.body.error).toBe('token not scoped to this agent');
+  });
+
+  it('revoked agent token is rejected on messages', async () => {
+    const { daemon } = makeAgentDaemon();
+    const minted = await registerAgent(daemon, { alias: 'pi0', host: 'web1' });
+    const agentToken = String(minted.body.bridge_token);
+
+    const revoke = await daemon.handle(
+      request({
+        method: 'POST',
+        path: '/bridge/v1/agents/revoke',
+        authorizationHeader: 'Bearer bridge-secret',
+        body: { agent_user_id: '@agent-pi0-web1:hs.example' },
+      }),
+    );
+    expect(revoke.status).toBe(200);
+    expect(revoke.body.revoked).toBe(1);
+
+    const afterRevoke = await daemon.handle(
+      request({
+        method: 'POST',
+        path: '/bridge/v1/messages',
+        authorizationHeader: `Bearer ${agentToken}`,
+        body: { room_id: '!r:hs.example', agent: 'pi0-web1', body: 'hi' },
+      }),
+    );
+    expect(afterRevoke.status).toBe(403);
+  });
+
+  it('GET /bridge/v1/agents lists registered agents (host only)', async () => {
+    const { daemon } = makeAgentDaemon();
+    await registerAgent(daemon, { alias: 'pi0', host: 'web1', display_name: 'Pi Zero' });
+
+    const res = await daemon.handle(
+      request({
+        method: 'GET',
+        path: '/bridge/v1/agents',
+        authorizationHeader: 'Bearer bridge-secret',
+      }),
+    );
+    expect(res.status).toBe(200);
+    const agents = res.body.agents as Array<Record<string, unknown>>;
+    expect(agents).toHaveLength(1);
+    expect(agents[0]?.agent_user_id).toBe('@agent-pi0-web1:hs.example');
+    expect(agents[0]?.display_name).toBe('Pi Zero');
+  });
+
  it('empty bridge token list denies everything', async () => {
    const daemon = new AppserviceDaemon({ ...cfg, bridgeTokens: [] }, undefined, () => {});
    const res = await daemon.handle(
--- a/apps/appservice/src/server.ts
+++ b/apps/appservice/src/server.ts
@@ -1,11 +1,14 @@
 import { createHmac, randomBytes, timingSafeEqual } from 'node:crypto';

 import {
+  AgentTokenStore,
  AppserviceIntent,
  TransactionHandler,
  validateBridgeMessage,
  validateBridgeTyping,
  validateProvisionRoom,
+  validateRegisterAgent,
+  validateRevokeAgent,
 } from '@mosaicstack/appservice';
 import type { AppserviceConfig, MatrixEvent } from '@mosaicstack/appservice';

@@ -37,6 +40,13 @@ const safeEqual = (a: string, b: string): boolean => timingSafeEqual(digest(a),

 const TXN_PATH = /^\/_matrix\/app\/v1\/transactions\/([^/]+)$/;

+/**
+ * Resolved identity for an authenticated /bridge/v1/* caller. Host principals
+ * (the agent-comms host daemons) are unrestricted; agent principals are scoped
+ * to a single virtual user and may only act as themselves.
+ */
+export type BridgePrincipal = { kind: 'host' } | { kind: 'agent'; agentUserId: string } | null;
+
 /**
 * HTTP-framework-agnostic request router for the mosaic-as daemon: the
 * Application Service transactions endpoint (Synapse-facing) plus the
@@ -46,6 +56,7 @@ const TXN_PATH = /^\/_matrix\/app\/v1\/transactions\/([^/]+)$/;
 export class AppserviceDaemon {
  readonly intent: AppserviceIntent;
  private readonly transactions: TransactionHandler;
+  private readonly agents: AgentTokenStore;

  constructor(
    private readonly cfg: DaemonConfig,
@@ -53,6 +64,7 @@ export class AppserviceDaemon {
    private readonly log: (line: string) => void = (line) => console.log(line),
  ) {
    this.intent = new AppserviceIntent(cfg, fetchImpl);
+    this.agents = new AgentTokenStore(this.intent);
    this.transactions = new TransactionHandler({
      hsToken: cfg.hsToken,
      onEvent: (event) => this.onEvent(event),
@@ -69,10 +81,20 @@ export class AppserviceDaemon {
    }
  }

-  private bridgeAuthorized(authorizationHeader: string | undefined): boolean {
-    if (!authorizationHeader?.startsWith('Bearer ')) return false;
+  /** Resolve the calling principal, or null when unauthorized. Fail-closed:
+   * host tokens win (timing-safe compare); otherwise a magt_* bearer is looked
+   * up in the agent token store; anything else is rejected. */
+  private async bridgeAuthorized(
+    authorizationHeader: string | undefined,
+  ): Promise<BridgePrincipal> {
+    if (!authorizationHeader?.startsWith('Bearer ')) return null;
    const presented = authorizationHeader.slice('Bearer '.length);
-    return this.cfg.bridgeTokens.some((token) => safeEqual(presented, token));
+    if (this.cfg.bridgeTokens.some((token) => safeEqual(presented, token))) {
+      return { kind: 'host' };
+    }
+    const agentUserId = await this.agents.verifyToken(presented);
+    if (agentUserId) return { kind: 'agent', agentUserId };
+    return null;
  }

  async handle(req: DaemonRequest): Promise<DaemonResponse> {
@@ -89,12 +111,60 @@ export class AppserviceDaemon {
    }

    if (req.path.startsWith('/bridge/v1/')) {
-      if (!this.bridgeAuthorized(req.authorizationHeader)) {
+      const principal = await this.bridgeAuthorized(req.authorizationHeader);
+      if (!principal) {
        return { status: 403, body: { errcode: 'M_FORBIDDEN', error: 'bad bridge token' } };
      }
      try {
+        if (req.method === 'POST' && req.path === '/bridge/v1/agents') {
+          if (principal.kind !== 'host') {
+            return {
+              status: 403,
+              body: { errcode: 'M_FORBIDDEN', error: 'agents cannot register agents' },
+            };
+          }
+          validateRegisterAgent(req.body);
+          const { agentUserId, token } = await this.agents.register({
+            alias: req.body.alias,
+            host: req.body.host,
+            displayName: req.body.display_name,
+          });
+          this.log(`registered agent ${agentUserId}`);
+          return { status: 200, body: { agent_user_id: agentUserId, bridge_token: token } };
+        }
+        if (req.method === 'POST' && req.path === '/bridge/v1/agents/revoke') {
+          if (principal.kind !== 'host') {
+            return {
+              status: 403,
+              body: { errcode: 'M_FORBIDDEN', error: 'agents cannot revoke agents' },
+            };
+          }
+          validateRevokeAgent(req.body);
+          const revoked = await this.agents.revoke(req.body.agent_user_id);
+          this.log(`revoked ${revoked} token(s) for ${req.body.agent_user_id}`);
+          return { status: 200, body: { revoked } };
+        }
+        if (req.method === 'GET' && req.path === '/bridge/v1/agents') {
+          if (principal.kind !== 'host') {
+            return {
+              status: 403,
+              body: { errcode: 'M_FORBIDDEN', error: 'agents cannot list agents' },
+            };
+          }
+          const agents = await this.agents.list();
+          return { status: 200, body: { agents } };
+        }
        if (req.method === 'POST' && req.path === '/bridge/v1/messages') {
          validateBridgeMessage(req.body);
+          if (
+            principal.kind === 'agent' &&
+            this.intent.agentUserId(req.body.agent) !== principal.agentUserId
+          ) {
+            return {
+              status: 403,
+              body: { errcode: 'M_FORBIDDEN', error: 'token not scoped to this agent' },
+            };
+          }
          const eventId = await this.intent.sendAsAgent({
            roomId: req.body.room_id,
            agent: req.body.agent,
@@ -107,6 +177,15 @@ export class AppserviceDaemon {
        }
        if (req.method === 'POST' && req.path === '/bridge/v1/typing') {
          validateBridgeTyping(req.body);
+          if (
+            principal.kind === 'agent' &&
+            this.intent.agentUserId(req.body.agent) !== principal.agentUserId
+          ) {
+            return {
+              status: 403,
+              body: { errcode: 'M_FORBIDDEN', error: 'token not scoped to this agent' },
+            };
+          }
          await this.intent.setTyping(req.body.room_id, req.body.agent, req.body.typing);
          return { status: 200, body: {} };
        }
--- a/docs/plans/agent-reflection-loop-PRD.md
+++ b/docs/plans/agent-reflection-loop-PRD.md
@@ -0,0 +1,173 @@
+# PRD — Agent Reflection Loop (durable kernel)
+
+**Issue:** [#544](http://git.mosaicstack.dev/mosaicstack/stack/issues/544)
+**Source design:** jarvis-brain `docs/planning/AGENT-REFLECTION-LOOP.md` (commit df6576fc, debate-hardened v2)
+**Status:** in-progress
+**Scope rule:** Build the **durable kernel** only. The closed calibration/skill-synthesis loop
+(design §7–§8) is **gated** behind Phase-0 experiments P1/P2/P3 and is explicitly out of scope here.
+
+---
+
+## 1. Problem
+
+At end-of-run an agent holds context that never reaches the diff or the "done" message —
+assumptions, shortcuts, untested paths, the single most-likely way the work is wrong. That context
+is what a lead/human needs to judge trust, and it evaporates when the session ends. Capture it
+mechanically as **structured data** (`reflection.v1`), and derive a **review risk-floor** from the
+change surface so risky diffs are flagged for independent review.
+
+## 2. Non-goals (gated on Phase-0)
+
+- No closed calibration loop (predicted-vs-actual scoring as a routing input).
+- No skill synthesis.
+- No automated reviewer routing/dispatch. The kernel **writes** the sidecar; pickup is future work.
+
+## 3. Components & exact placement (main-branch truth)
+
+| #   | Component            | Path                                                                                             | Mirror                              |
+| --- | -------------------- | ------------------------------------------------------------------------------------------------ | ----------------------------------- |
+| a   | Stop hook (capture)  | `packages/mosaic/framework/tools/qa/reflect-stop-hook.sh`                                        | `tools/qa/prevent-memory-write.sh`  |
+| a   | Hook registration    | `packages/mosaic/framework/runtime/claude/settings.json` (`hooks.Stop`)                          | existing `PreToolUse`/`PostToolUse` |
+| b   | JSON Schema          | `packages/macp/src/schemas/reflection.v1.schema.json`                                            | `schemas/task.schema.json`          |
+| b   | TS types (zod) + DTO | `packages/types/src/reflection/{index.ts,reflection.dto.ts}` + re-export from `src/index.ts`     | `packages/types/src/federation/*`   |
+| c   | Diff risk-floor      | `packages/macp/src/risk-floor.ts` (+ `__tests__/risk-floor.test.ts`, export from `src/index.ts`) | `packages/macp/src/gate-runner.ts`  |
+| d   | Phase-0 scripts      | `scripts/analysis/reflect-{git-history,board-history,calibration}.sh`                            | `scripts/publish-npmjs.sh`          |
+
+**Activation note (deliberate deviation):** the `settings-overlays/` directory has **no merge
+mechanism** (referenced only in docs), so a hooks overlay there would be inert. The Stop hook is
+registered in the canonical `runtime/claude/settings.json` — the same file the `mosaic` launcher
+reflects into `~/.claude/settings.json` (verified byte-identical hooks live there). Still fully
+vendored in-repo.
+
+## 4. `reflection.v1` schema (authoritative field list)
+
+```jsonc
+{
+  "schema": "reflection.v1", // literal
+  "task_ref": "string", // canonical task ref; kernel derives from REFLECTION_TASK_REF or repo+branch
+  "agent": "string", // persona/runtime id (REFLECTION_AGENT or "unknown")
+  "session_id": "string", // from Stop payload session_id, else "unknown"
+  "timestamp": "string", // ISO-8601 UTC
+  "repo": "string", // repo root basename
+  "confidence": 0.0, // FLOAT [0,1] — SELF-REPORTED (optional; null if not supplied)
+  "most_likely_wrong": {
+    // SELF-REPORTED (optional)
+    "surface": "auth|data|infra|ui|build|test|docs|none",
+    "description": "string",
+  },
+  "known_not_in_diff": "string|null", // SELF-REPORTED: "what I know that isn't visible in the diff"
+  "risk": {
+    // MECHANICAL — from risk-floor
+    "needs_review": true,
+    "score": 0.0, // [0,1]
+    "surface": "auth|data|infra|ui|build|test|docs|none",
+    "reason": "string",
+  },
+  "files_changed": ["string"], // MECHANICAL — git diff name-only
+  "provenance": {
+    "source": "stop-hook",
+    "reflection_attempt": 1,
+    "degraded": false, // true if self-report inputs missing/unreadable
+    "reflection_mode": "off|solo|orchestrated",
+  },
+}
+```
+
+**Mechanical vs self-reported.** A bash Stop hook cannot author the agent's self-assessment. The
+hook populates the **mechanical** fields deterministically (risk, files_changed, provenance, ids).
+The **self-reported** fields are read from an optional agent-supplied input file
+(`$REFLECTION_INPUT`, default `<repo>/.mosaic/reflection-input.json`) and merged if present;
+absent/unreadable → those fields null and `provenance.degraded=true`. This realizes the design's
+"hook is a pre-seed, not the asker" (§4).
+
+## 5. Stop hook behavior (fail-closed, non-blocking)
+
+1. Read Stop payload JSON from stdin.
+2. **Fail-closed:** if `REFLECTION_MODE` is unset or `off` → `exit 0` immediately (strict no-op). This
+   is the global-registration safety guarantee.
+3. **Sentinel guard:** if `<sidecar>.lock` exists → `exit 0` (prevents re-fire loops). Create it,
+   `trap` cleanup.
+4. Determine output dir: `$REFLECTION_DIR` else `<repo>/.mosaic/reflections/`. `mkdir -p`.
+5. Compute mechanical fields: `git diff --name-only` (HEAD + staged + worktree, best-effort),
+   call risk-floor logic (inline bash port OR `node -e` into `@mosaicstack/macp` — see §6), session
+   ids from payload + env.
+6. Merge optional `$REFLECTION_INPUT` self-report if readable JSON.
+7. Write `reflection.v1` to a temp file, `mv` (atomic) to `<dir>/<session>-<ts>.reflection.json`.
+8. Always `exit 0`. **Never** emit a `decision` field (Stop hooks are observational).
+
+Hook must never fail the session: wrap risky steps, default to `degraded:true` on any error, exit 0.
+
+## 6. Risk-floor (`packages/macp/src/risk-floor.ts`)
+
+Pure, deterministic, no IO. Single source of truth for the verdict; the hook calls it via
+`node --input-type=module -e` (importing the built package) **or**, to avoid a node dependency in the
+hook path, the hook ports the same surface table. **Decision:** implement the canonical logic in TS
+(tested), and have the hook shell out to node when available, else fall back to a minimal inline
+classifier flagged `degraded:true`. (Keep the TS the authority; the inline path is a safety net.)
+
+```ts
+export type ReviewSurface = 'auth' | 'data' | 'infra' | 'ui' | 'build' | 'test' | 'docs' | 'none';
+export interface RiskFloorInput {
+  filesChanged: string[];
+  insertions?: number;
+  deletions?: number;
+}
+export interface RiskFloorVerdict {
+  needs_review: boolean;
+  score: number;
+  surface: ReviewSurface;
+  reason: string;
+}
+export function evaluateRiskFloor(input: RiskFloorInput): RiskFloorVerdict;
+```
+
+Surface classification by path regex (first match wins, highest-risk surface dominates):
+
+- `auth` (weight 1.0): `auth`, `login`, `session`, `token`, `permission`, `rbac`, `credential`, `secret`
+- `data` (0.9): `migration`, `prisma`, `schema`, `\.sql`, `entity`, `repository`, `seed`
+- `infra` (0.85): `docker`, `\.woodpecker`, `compose`, `traefik`, `deploy`, `helm`, `k8s`, `terraform`
+- `build` (0.6): `package.json`, `tsconfig`, `turbo.json`, `pnpm-`, `\.config\.`, `eslint`, `vite`
+- `ui` (0.4): `\.tsx`, `\.css`, `components/`, `apps/web/`
+- `test` (0.2): `\.spec\.`, `\.test\.`, `__tests__/`
+- `docs` (0.1): `\.md`, `docs/`
+- `none` (0.0): anything else
+
+`needs_review = score >= THRESHOLD` (default `0.5`, overridable). `reason` names the files+surface
+that tripped it. **Subordinate to CI:** this is a _floor_ (minimum review requirement) only;
+consumers MUST treat CI/tests as authoritative above the floor (precedence: CI/tests > human merge >
+reviewer verdict > self-reflection). Documented in the module header.
+
+## 7. Phase-0 experiment scripts (`scripts/analysis/`)
+
+Offline, no-infra bash. Each script: `#!/usr/bin/env bash`, `set -euo pipefail`, header `Usage:` +
+`Requirements:`, flag parsing, **prints its pre-registered kill condition**, emits structured
+(JSON/markdown) output. They are harnesses + rubrics — real corpora are wired later.
+
+- `reflect-git-history.sh` (**P2** — only-self-reflection bucket): scan `git log` for failure signals
+  (reverts, `fix:`/`hotfix` shortly after a feature merge) over a window; classify each by which gate
+  would catch it (CI / human-review / only-self-reflection) via a pre-registered heuristic; tally.
+  Kill: bucket-3 near-empty → no §7/§8.
+- `reflect-board-history.sh` (**P3** — outcome detectability): given a task/board export (or the
+  git history of `data/` task files), measure the fraction of completed tasks with a
+  machine-detectable correct/wrong signal within 30 days. Kill: base-rate < 20% → caveat-notes only.
+- `reflect-calibration.sh` (**P1** — confidence signal): consume a labeled corpus (JSONL of
+  `{confidence, correct}`), compute discrimination (AUC/lift) on the self-rated-high subset, print
+  the metric vs the pre-registered chance threshold. Kill: AUC ≈ chance on the high subset → no §7/§8.
+
+## 8. CI / quality gates
+
+- TS packages: `pnpm typecheck` (tsc --noEmit), `pnpm lint` (eslint), `pnpm format:check`
+  (prettier), `pnpm test` (vitest). ESM, NodeNext, `.js` import specifiers, `*.dto.ts` at boundaries.
+- New files in existing packages need no CI config change; add ≥1 vitest spec per new TS module.
+- Bash scripts/hook are dev/runtime tooling, not CI-built; keep them `shellcheck`-clean.
+
+## 9. Acceptance criteria
+
+1. `REFLECTION_MODE` unset → hook is a strict no-op (`exit 0`, no file written). **(test)**
+2. With `REFLECTION_MODE=solo`, hook writes a schema-valid `reflection.v1` with correct mechanical
+   fields; self-report merged when `$REFLECTION_INPUT` present, `degraded:true` when absent.
+3. `evaluateRiskFloor` deterministic across all surfaces; unit-tested incl. auth/data/infra → review,
+   docs/test → no review, empty → `none`/no review.
+4. `reflection.v1` zod type + JSON Schema agree; sidecar validates against the schema.
+5. Phase-0 scripts run offline, print kill conditions, emit structured output, shellcheck-clean.
+6. `pnpm typecheck && pnpm lint && pnpm format:check && pnpm test` green; independent review passed.
--- a/docs/scratchpads/544-agent-reflection-loop.md
+++ b/docs/scratchpads/544-agent-reflection-loop.md
@@ -0,0 +1,55 @@
+# Scratchpad — #544 Agent Reflection Loop (durable kernel)
+
+**Started:** 2026-06-16 · **Branch:** `feat/agent-reflection-loop` · **Base:** `main` @ c461380
+
+## Goal
+
+Bake the durable kernel of the agent reflection loop into the Mosaic Stack
+monorepo through full delivery gates. Kernel only; closed loop (§7–§8) gated on
+Phase-0. Authoritative spec: `docs/plans/agent-reflection-loop-PRD.md`. Task
+breakdown: `docs/tasks/544-agent-reflection-loop.md`.
+
+## Timeline / decisions
+
+- Mapped house style against `main` truth (the earlier recon had mapped a dirty
+  feature branch and returned non-existent paths; re-cloned `main` clean).
+- macp uses co-located `*.spec.ts`; types uses `src/<mod>/{*.ts, *.dto.ts, __tests__/*.spec.ts}`.
+- zod v4 + class-validator/class-transformer present in `@mosaicstack/types`;
+  `packages/types/tsconfig.json` enables `experimentalDecorators`/`emitDecoratorMetadata`.
+- **Gotcha (fixed):** `class-transformer`'s `@Type` calls `Reflect.getMetadata`
+  at module-load time; the types vitest env has no `reflect-metadata`, so any test
+  importing the reflection barrel crashed on import. `chat.dto.ts` avoids this by
+  using class-validator only. Fix: dropped `@Type`/`@ValidateNested` from the DTO;
+  zod owns deep nested validation.
+- **Gotcha (fixed):** Stop hook `EXIT` trap referenced a `main`-local `lock` →
+  `unbound variable` under `set -u` at exit. Promoted to a global `LOCKFILE`.
+- **Gotcha (fixed):** the hook's own lock + `.mosaic/` scratch leaked into
+  `files_changed`. Excluded `^\.mosaic/` from the change-surface scan.
+
+## Verification evidence
+
+- macp: typecheck OK, lint OK, **88 tests pass** (15 new risk-floor).
+- types: typecheck OK, lint OK, **64 tests pass** (10 new reflection).
+- Root: `pnpm typecheck` (41 tasks), `pnpm lint` (23), `pnpm format:check`, `pnpm build` (23) — all green.
+- Stop hook smoke (throwaway git repo): TEST1 no-op (mode unset, 0 files);
+  TEST2 solo degraded, `.mosaic/` excluded, auth→needs_review; TEST3 self-report
+  merged, degraded=false; TEST4 lock suppresses re-fire. All pass, always exit 0.
+- shellcheck clean: hook + `reflect-{git-history,board-history,calibration}.sh`.
+- Phase-0 smoke: P2 on this repo (142 failures classified), P1 AUC=0.875 on a
+  synthetic fixture, P3 base-rate on a synthetic board — all emit structured output
+  - kill conditions.
+
+## Open risks / follow-ups
+
+- Full `pnpm test` (DB-bound packages) validated via CI's postgres service, not
+  locally; affected packages (macp, types) are DB-independent and green here.
+- sequential-thinking MCP was registered mid-session (effective next session);
+  this session compensated with the written PRD as the planning artifact.
+- Phase-0 corpora are not yet wired — scripts are harnesses + pre-registered
+  rubrics (P1/P2/P3 tasks tracked in jarvis-brain `agent-reflection-loop` project).
+
+## Gate status
+
+- [x] PRD authored · [x] issue #544 created + linked · [x] code + tests
+- [x] local gates green · [ ] independent code review · [ ] PR opened
+- [ ] CI terminal green · [ ] merged to main · [ ] issue closed
--- a/docs/scratchpads/t-a292e96f-gitea-pr-metadata.md
+++ b/docs/scratchpads/t-a292e96f-gitea-pr-metadata.md
@@ -51,3 +51,48 @@ This repository currently has no root `CHANGELOG.md`; the scratchpad and `docs/T
  - PR #1908: `Dry run: would merge PR #1908 on git.uscllc.com with authenticated Gitea API fallback (base=main, method=squash).`
 - PR: `https://git.mosaicstack.dev/mosaicstack/stack/pulls/518`, branch `fix/t-a292e96f-gitea-pr-metadata`.
 - CI: Recent PR/push pipelines failed before clone/test execution due Woodpecker/Kubernetes PVC API timeout: `dial tcp 10.43.0.1:443: i/o timeout`. No repository test step executed in CI; local targeted verification above remains clean.
+
+## 2026-06-18 — PR #549 functional blocker remediation
+
+### Assignment
+
+Coordinator `mos-claude` assigned remediation for PR #549: fix `packages/mosaic/framework/tools/git/pr-metadata.sh` tmpfile cleanup where an `EXIT` trap references function-local `body_file` after the function returns inside `RAW=$(...)`, producing `body_file: unbound variable` on the authenticated success path and failing to clean up safely on early `set -e` exits.
+
+### Plan
+
+1. Add a non-vacuous Gitea test that exercises `curl_gitea_pull` with stubbed `curl` and `GITEA_TOKEN` instead of `MOSAIC_GITEA_PR_METADATA_RAW_FILE`.
+2. Prove the new test is RED against the current PR head.
+3. Replace the function-local `EXIT` cleanup with robust function-scoped tmpfile cleanup.
+4. Re-run targeted tests, `bash -n`, and review gates; commit and push branch only. Do not merge.
+
+### Constraints / assumptions
+
+- Do not modify prior injection/JSON fixes in `issue-edit`, `issue-assign`, or `milestone-create`.
+- Worker role: do not modify `docs/TASKS.md`; orchestrator remains the single writer.
+- Budget: no explicit token cap provided; keep scope to shell wrapper + targeted regression harness.
+
+### Remediation results
+
+- Rebased `fix/tooling-eval-injection-jq-json` onto `origin/main`; branch was already current.
+- Added a curl-stub regression path that does not use `MOSAIC_GITEA_PR_METADATA_RAW_FILE`, so it exercises `curl_gitea_pull` and its temp body file.
+- RED evidence: copied the new harness next to the pre-fix `HEAD` version of `pr-metadata.sh`; `MOSAIC_TEST_WORK_DIR=$PWD/.mosaic-test-work/pr-metadata-red-work .../test-pr-metadata-gitea.sh` failed with `body_file: unbound variable` on the curl success path.
+- Fix: replaced `EXIT` temp-file cleanup with a `RETURN`-scoped cleanup function that removes the body file while the function-local variable is still in scope, preserves the original return status, and clears the `RETURN` trap.
+- GREEN evidence:
+  - `MOSAIC_TEST_WORK_DIR=$PWD/.mosaic-test-work/pr-metadata-gitea-current packages/mosaic/framework/tools/git/test-pr-metadata-gitea.sh` passed.
+  - `bash -n packages/mosaic/framework/tools/git/pr-metadata.sh packages/mosaic/framework/tools/git/test-pr-metadata-gitea.sh` passed.
+  - `shellcheck -x -P . -e SC1090 packages/mosaic/framework/tools/git/pr-metadata.sh packages/mosaic/framework/tools/git/test-pr-metadata-gitea.sh` passed.
+
+### Review remediation
+
+- Codex review returned one should-fix: the early-exit test used `chmod 000`, which is not root-safe in container CI.
+- Remediation: changed the stubbed 2xx/cat-failure mode to replace the curl output with a broken symlink, which fails deterministically even as root and still validates cleanup via `rm -f -- "$body_file"`.
+
+### Second review remediation
+
+- Codex review found the 2xx `cat "$body_file"` read could be masked under command substitution semantics because the branch returned 0 unconditionally.
+- Remediation: both authenticated 2xx branches now use `cat "$body_file" || return $?` before returning success.
+- Strengthened the broken-symlink test to require the body-read failure and reject the later `Gitea API returned non-JSON` parse-failure path, so the test verifies the helper-level failure propagation rather than eventual downstream failure.
+
+### Final review gate
+
+- Codex review after remediation: approved (`0 blockers, 0 should-fix, 0 suggestions`).
--- a/docs/tasks/544-agent-reflection-loop.md
+++ b/docs/tasks/544-agent-reflection-loop.md
@@ -0,0 +1,67 @@
+# 544: Agent Reflection Loop — durable kernel
+
+**Issue:** [#544](http://git.mosaicstack.dev/mosaicstack/stack/issues/544)
+**PRD:** [`docs/plans/agent-reflection-loop-PRD.md`](../plans/agent-reflection-loop-PRD.md)
+**Branch:** `feat/agent-reflection-loop`
+
+## Context
+
+Build the **durable kernel** of the agent reflection loop: passive end-of-run
+capture of the doer's end-state as structured `reflection.v1` data, plus a
+deterministic diff **review risk-floor**. The closed calibration / skill-synthesis
+loop (design §7–§8) stays **gated** behind Phase-0 experiments P1/P2/P3 and is
+explicitly out of scope here. Source design: jarvis-brain
+`docs/planning/AGENT-REFLECTION-LOOP.md` (debate-hardened v2).
+
+Scope rule, non-goals, the full `reflection.v1` field list, and acceptance
+criteria live in the PRD. This file is the task breakdown + status.
+
+## Work items
+
+| #   | Item                                                  | Path                                                      | Status |
+| --- | ----------------------------------------------------- | --------------------------------------------------------- | ------ |
+| 1   | Diff risk-floor (pure, deterministic) + unit tests    | `packages/macp/src/risk-floor.ts`, `risk-floor.spec.ts`   | done   |
+| 2   | `reflection.v1` JSON Schema (documented contract)     | `packages/macp/src/schemas/reflection.v1.schema.json`     | done   |
+| 3   | `reflection.v1` zod schemas + self-report DTO + tests | `packages/types/src/reflection/*`                         | done   |
+| 4   | Stop hook (fail-closed capture)                       | `packages/mosaic/framework/tools/qa/reflect-stop-hook.sh` | done   |
+| 5   | Hook registration (`hooks.Stop`)                      | `packages/mosaic/framework/runtime/claude/settings.json`  | done   |
+| 6   | Phase-0 experiment harnesses (P1/P2/P3)               | `scripts/analysis/reflect-*.sh`                           | done   |
+
+## Design decisions (this implementation)
+
+- **Mechanical vs self-reported split.** A bash Stop hook cannot author the
+  agent's self-assessment, so it writes the mechanical fields (risk-floor verdict,
+  `files_changed`, ids, provenance) and merges an optional agent-supplied
+  `$REFLECTION_INPUT` self-report; absent/unreadable ⇒ those fields `null` and
+  `provenance.degraded = true`.
+- **Risk-floor authority.** `evaluateRiskFloor` (TS, tested) is the source of
+  truth. The hook ports the same surface table inline to avoid a node/build
+  dependency on the hook path; the two are documented as kept in sync.
+- **Hook registration deviation.** `settings-overlays/` has no merge mechanism
+  (docs-only), so a hooks overlay there would be inert. The Stop hook is
+  registered in the canonical `runtime/claude/settings.json` — the same file the
+  `mosaic` launcher reflects into `~/.claude/settings.json`. Still vendored in-repo.
+- **DTO without class-transformer.** `reflection.dto.ts` uses class-validator only
+  (no `@Type`), matching `chat.dto.ts`, so the module imports without a
+  `reflect-metadata` shim in the types-package test env. Deep nested validation is
+  owned by the zod `ReflectionSelfReportSchema` (the runtime authority the hook uses).
+- **`.mosaic/` excluded** from the change surface — it is agent scratch
+  (reflections, locks, self-report input), not part of the diff under review.
+
+## Verification
+
+- `pnpm --filter @mosaicstack/macp test` → 88 passed (15 new risk-floor).
+- `pnpm --filter @mosaicstack/types test` → 64 passed (10 new reflection).
+- Root `pnpm typecheck`, `pnpm lint`, `pnpm format:check`, `pnpm build` → green.
+- Stop hook smoke: fail-closed no-op (mode unset), solo capture (degraded),
+  self-report merge (degraded=false), re-fire lock guard — all pass.
+- All bash (hook + 3 Phase-0 scripts) shellcheck-clean; Phase-0 scripts emit
+  structured JSON/markdown and print their pre-registered kill conditions.
+
+## Activation (post-merge, deployment concern — not a blocker)
+
+The Stop hook only activates when a launcher/profile sets
+`REFLECTION_MODE=solo|orchestrated`; unset/`off` is a strict no-op, so global
+registration is safe. `framework/install.sh` rsyncs the hook into
+`~/.config/mosaic/tools/qa/`, and the `mosaic` launcher reflects the updated
+`settings.json` (`hooks.Stop`) into `~/.claude/settings.json`.
--- a/packages/appservice/src/tests/agent-store.test.ts
+++ b/packages/appservice/src/tests/agent-store.test.ts
@@ -0,0 +1,116 @@
+import { describe, expect, it } from 'vitest';
+
+import { AGENTS_ACCOUNT_DATA_TYPE, AgentTokenStore } from '../agent-store.js';
+import type { AppserviceIntent } from '../intent.js';
+
+/** Fake intent: in-memory account_data, no-op user provisioning. Only the
+ * surface AgentTokenStore touches is implemented. */
+const makeFakeIntent = () => {
+  const store: Record<string, Record<string, unknown>> = {};
+  const fake = {
+    domain: 'hs.example',
+    getSenderAccountData: async (type: string): Promise<Record<string, unknown> | null> =>
+      store[type] ?? null,
+    setSenderAccountData: async (type: string, content: Record<string, unknown>): Promise<void> => {
+      store[type] = structuredClone(content);
+    },
+    ensureRegistered: async (agent: string): Promise<string> => `@agent-${agent}:hs.example`,
+    setDisplayName: async (): Promise<void> => {},
+  };
+  return { intent: fake as unknown as AppserviceIntent, store };
+};
+
+describe('AgentTokenStore', () => {
+  it('mints a magt_ token and stores only its sha256 (never plaintext)', async () => {
+    const { intent, store } = makeFakeIntent();
+    const s = new AgentTokenStore(intent);
+    const { agentUserId, token } = await s.register({ alias: 'pi0', host: 'web1' });
+
+    expect(agentUserId).toBe('@agent-pi0-web1:hs.example');
+    expect(token.startsWith('magt_')).toBe(true);
+
+    const raw = JSON.stringify(store[AGENTS_ACCOUNT_DATA_TYPE]);
+    expect(raw).not.toContain(token);
+    // The stored hash is sha256hex(token), 64 hex chars.
+    const { createHash } = await import('node:crypto');
+    const hash = createHash('sha256').update(token).digest('hex');
+    expect(raw).toContain(hash);
+  });
+
+  it('verifyToken returns the agentUserId for a fresh token, null otherwise', async () => {
+    const { intent } = makeFakeIntent();
+    const s = new AgentTokenStore(intent);
+    const { agentUserId, token } = await s.register({ alias: 'pi0', host: 'web1' });
+
+    expect(await s.verifyToken(token)).toBe(agentUserId);
+    expect(await s.verifyToken('magt_garbage')).toBeNull();
+    expect(await s.verifyToken('not-a-token')).toBeNull();
+    expect(await s.verifyToken('')).toBeNull();
+  });
+
+  it('revoke invalidates tokens, returns count, and hides agent from list', async () => {
+    const { intent } = makeFakeIntent();
+    const s = new AgentTokenStore(intent);
+    const { agentUserId, token } = await s.register({ alias: 'pi0', host: 'web1' });
+
+    expect((await s.list()).map((a) => a.agent_user_id)).toContain(agentUserId);
+
+    const count = await s.revoke(agentUserId);
+    expect(count).toBe(1);
+    expect(await s.verifyToken(token)).toBeNull();
+    expect((await s.list()).map((a) => a.agent_user_id)).not.toContain(agentUserId);
+
+    // Idempotent on unknown / already-revoked.
+    expect(await s.revoke(agentUserId)).toBe(0);
+    expect(await s.revoke('@agent-nope:hs.example')).toBe(0);
+  });
+
+  it('re-register after revoke yields a working token and the agent reappears', async () => {
+    const { intent } = makeFakeIntent();
+    const s = new AgentTokenStore(intent);
+    const { agentUserId, token: t1 } = await s.register({ alias: 'pi0', host: 'web1' });
+    await s.revoke(agentUserId);
+
+    const { token: t2 } = await s.register({ alias: 'pi0', host: 'web1' });
+    expect(await s.verifyToken(t1)).toBeNull();
+    expect(await s.verifyToken(t2)).toBe(agentUserId);
+    expect((await s.list()).map((a) => a.agent_user_id)).toContain(agentUserId);
+  });
+
+  it('agent A token never verifies as agent B', async () => {
+    const { intent } = makeFakeIntent();
+    const s = new AgentTokenStore(intent);
+    const a = await s.register({ alias: 'pi0', host: 'web1' });
+    const b = await s.register({ alias: 'pi1', host: 'web2' });
+
+    expect(await s.verifyToken(a.token)).toBe(a.agentUserId);
+    expect(await s.verifyToken(b.token)).toBe(b.agentUserId);
+    expect(a.agentUserId).not.toBe(b.agentUserId);
+  });
+
+  it('rejects an ambiguous re-registration that collides on one Matrix id', async () => {
+    const { intent } = makeFakeIntent();
+    const s = new AgentTokenStore(intent);
+    // alias="a-b",host="c" and alias="a",host="b-c" both -> @agent-a-b-c.
+    const first = await s.register({ alias: 'a-b', host: 'c' });
+    expect(first.agentUserId).toBe('@agent-a-b-c:hs.example');
+
+    await expect(s.register({ alias: 'a', host: 'b-c' })).rejects.toThrow(/collision/);
+
+    // The original registration is untouched: still one active token, correct pair.
+    expect(await s.verifyToken(first.token)).toBe(first.agentUserId);
+    const summary = (await s.list()).find((x) => x.agent_user_id === first.agentUserId);
+    expect(summary?.alias).toBe('a-b');
+    expect(summary?.host).toBe('c');
+    expect(summary?.active_token_count).toBe(1);
+  });
+
+  it('display_name is stored and surfaced in list', async () => {
+    const { intent } = makeFakeIntent();
+    const s = new AgentTokenStore(intent);
+    await s.register({ alias: 'pi0', host: 'web1', displayName: 'Pi Zero' });
+    const summary = (await s.list())[0];
+    expect(summary?.display_name).toBe('Pi Zero');
+    expect(summary?.active_token_count).toBe(1);
+  });
+});
--- a/packages/appservice/src/agent-registry.dto.ts
+++ b/packages/appservice/src/agent-registry.dto.ts
@@ -0,0 +1,63 @@
+/** DTOs for agent registration + scoped/revocable bridge tokens (US-007). */
+
+export interface RegisterAgentDto {
+  /** Agent alias slug, e.g. "pi0". Combined with host into the agent slug. */
+  alias: string;
+  /** Host slug, e.g. "web1". Combined with alias into the agent slug. */
+  host: string;
+  display_name?: string;
+}
+
+export interface RevokeAgentDto {
+  agent_user_id: string;
+}
+
+export interface RegisterAgentResponse {
+  agent_user_id: string;
+  bridge_token: string;
+}
+
+export interface AgentSummary {
+  agent_user_id: string;
+  alias: string;
+  host: string;
+  display_name?: string;
+  created_at: string;
+  active_token_count: number;
+}
+
+const SLUG_RE = /^[a-z0-9][a-z0-9_.-]*$/;
+
+/** Combined agent slug, e.g. alias="pi0", host="web1" -> "pi0-web1". */
+export function agentSlug(alias: string, host: string): string {
+  return `${alias}-${host}`;
+}
+
+const assertSlug = (value: unknown, field: string): void => {
+  if (typeof value !== 'string' || value.length === 0 || !SLUG_RE.test(value)) {
+    throw new Error(`${field} must match [a-z0-9][a-z0-9_.-]* (lowercase, non-empty)`);
+  }
+};
+
+export function validateRegisterAgent(input: unknown): asserts input is RegisterAgentDto {
+  const o = input as Partial<RegisterAgentDto> | null | undefined;
+  if (!o || typeof o !== 'object') throw new Error('payload must be an object');
+  assertSlug(o.alias, 'alias');
+  assertSlug(o.host, 'host');
+  if (o.display_name !== undefined) {
+    if (typeof o.display_name !== 'string' || o.display_name.length === 0) {
+      throw new Error('display_name must be a non-empty string');
+    }
+    if (o.display_name.length > 100) {
+      throw new Error('display_name must be at most 100 chars');
+    }
+  }
+}
+
+export function validateRevokeAgent(input: unknown): asserts input is RevokeAgentDto {
+  const o = input as Partial<RevokeAgentDto> | null | undefined;
+  if (!o || typeof o !== 'object') throw new Error('payload must be an object');
+  if (typeof o.agent_user_id !== 'string' || !o.agent_user_id.startsWith('@')) {
+    throw new Error('agent_user_id must be a Matrix user id');
+  }
+}
--- a/packages/appservice/src/agent-store.ts
+++ b/packages/appservice/src/agent-store.ts
@@ -0,0 +1,160 @@
+import { createHash, randomBytes, timingSafeEqual } from 'node:crypto';
+
+import { agentSlug } from './agent-registry.dto.js';
+import type { AgentSummary } from './agent-registry.dto.js';
+import type { AppserviceIntent } from './intent.js';
+
+/** account_data type holding the agent registry on the AS sender user. */
+export const AGENTS_ACCOUNT_DATA_TYPE = 'org.uscllc.mosaic_as.agents';
+
+const TOKEN_PREFIX = 'magt_';
+
+interface StoredAgent {
+  alias: string;
+  host: string;
+  display_name?: string;
+  created_at: string;
+  /** sha256hex of each active token. Plaintext tokens are NEVER stored. */
+  token_hashes: string[];
+  revoked_at?: string;
+}
+
+interface AgentRegistry {
+  agents: Record<string, StoredAgent>;
+}
+
+const sha256hex = (value: string): string => createHash('sha256').update(value).digest('hex');
+
+const mintToken = (): string => `${TOKEN_PREFIX}${randomBytes(32).toString('base64url')}`;
+
+/**
+ * Persists scoped/revocable bridge tokens for agent virtual users in Matrix
+ * account_data on the AS sender user (no new infra; survives restart).
+ *
+ * Tokens are stored only as sha256 hashes (the high-entropy `magt_` token makes
+ * plain sha256 safe — no salt/KDF needed since brute force is infeasible).
+ *
+ * KNOWN v1 LIMIT: Synapse caps a single account_data object (default
+ * max_account_data_size, ~100KB). Each agent + hash entry is small, so this
+ * supports thousands of agents, but a very large fleet would eventually need a
+ * dedicated store. Revoked agents with no active tokens are pruned of hashes
+ * (kept as tombstones) to bound growth.
+ */
+export class AgentTokenStore {
+  constructor(private readonly intent: AppserviceIntent) {}
+
+  /** Read the registry fresh from account_data (low-frequency ops favor
+   * correctness over caching; verifyToken/list also read fresh). */
+  private async read(): Promise<AgentRegistry> {
+    const data = await this.intent.getSenderAccountData(AGENTS_ACCOUNT_DATA_TYPE);
+    const agents = data?.agents;
+    if (agents && typeof agents === 'object') {
+      return { agents: agents as Record<string, StoredAgent> };
+    }
+    return { agents: {} };
+  }
+
+  private async write(registry: AgentRegistry): Promise<void> {
+    await this.intent.setSenderAccountData(AGENTS_ACCOUNT_DATA_TYPE, {
+      agents: registry.agents,
+    });
+  }
+
+  /** Ensure the virtual user exists, mint a fresh token, store its hash, and
+   * return the plaintext token ONCE. Clears any prior revocation. */
+  async register(opts: {
+    alias: string;
+    host: string;
+    displayName?: string;
+  }): Promise<{ agentUserId: string; token: string }> {
+    const slug = agentSlug(opts.alias, opts.host);
+    const agentUserId = await this.intent.ensureRegistered(slug);
+    if (opts.displayName !== undefined) {
+      await this.intent.setDisplayName(slug, opts.displayName);
+    }
+
+    const token = mintToken();
+    const hash = sha256hex(token);
+
+    const registry = await this.read();
+    const existing = registry.agents[agentUserId];
+    if (existing) {
+      // The agent slug `<alias>-<host>` joins with a `-`, which is also a legal
+      // slug char, so distinct pairs can collide on one Matrix id (e.g.
+      // a/b-c and a-b/c both -> @agent-a-b-c). They ARE the same Matrix user,
+      // but silently overwriting the stored alias/host of a different pair
+      // would conflate two logical agents into one token bucket. Reject the
+      // ambiguous re-registration instead of overwriting.
+      if (existing.alias !== opts.alias || existing.host !== opts.host) {
+        throw new Error(
+          `agent id collision: ${agentUserId} already registered as ` +
+            `${existing.alias}/${existing.host}, refusing ${opts.alias}/${opts.host}`,
+        );
+      }
+      if (opts.displayName !== undefined) existing.display_name = opts.displayName;
+      existing.token_hashes = [...existing.token_hashes, hash];
+      delete existing.revoked_at;
+    } else {
+      registry.agents[agentUserId] = {
+        alias: opts.alias,
+        host: opts.host,
+        ...(opts.displayName !== undefined ? { display_name: opts.displayName } : {}),
+        created_at: new Date().toISOString(),
+        token_hashes: [hash],
+      };
+    }
+    await this.write(registry);
+    return { agentUserId, token };
+  }
+
+  /** Return the agentUserId bound to an active (non-revoked) token, else null.
+   * Constant-time hash comparison; no early-out on match. */
+  async verifyToken(token: string): Promise<string | null> {
+    if (!token.startsWith(TOKEN_PREFIX)) return null;
+    const presented = Buffer.from(sha256hex(token), 'hex');
+
+    const registry = await this.read();
+    let matched: string | null = null;
+    for (const [agentUserId, agent] of Object.entries(registry.agents)) {
+      if (agent.revoked_at) continue;
+      for (const stored of agent.token_hashes) {
+        const candidate = Buffer.from(stored, 'hex');
+        if (candidate.length === presented.length && timingSafeEqual(candidate, presented)) {
+          // No early break: keep scanning so timing does not reveal match position.
+          matched = agentUserId;
+        }
+      }
+    }
+    return matched;
+  }
+
+  /** Revoke all active tokens for an agent. Idempotent; returns count revoked. */
+  async revoke(agentUserId: string): Promise<number> {
+    const registry = await this.read();
+    const agent = registry.agents[agentUserId];
+    if (!agent) return 0;
+    const count = agent.token_hashes.length;
+    agent.token_hashes = [];
+    agent.revoked_at = new Date().toISOString();
+    await this.write(registry);
+    return count;
+  }
+
+  /** List agents with at least one active token (never advertise revoked/phantom). */
+  async list(): Promise<AgentSummary[]> {
+    const registry = await this.read();
+    const out: AgentSummary[] = [];
+    for (const [agentUserId, agent] of Object.entries(registry.agents)) {
+      if (agent.revoked_at || agent.token_hashes.length === 0) continue;
+      out.push({
+        agent_user_id: agentUserId,
+        alias: agent.alias,
+        host: agent.host,
+        ...(agent.display_name !== undefined ? { display_name: agent.display_name } : {}),
+        created_at: agent.created_at,
+        active_token_count: agent.token_hashes.length,
+      });
+    }
+    return out;
+  }
+}
--- a/packages/appservice/src/index.ts
+++ b/packages/appservice/src/index.ts
@@ -10,6 +10,14 @@ export {
  validateProvisionRoom,
 } from './bridge.dto.js';
 export type { BridgeMessageDto, BridgeTypingDto, ProvisionRoomDto } from './bridge.dto.js';
+export { agentSlug, validateRegisterAgent, validateRevokeAgent } from './agent-registry.dto.js';
+export type {
+  RegisterAgentDto,
+  RevokeAgentDto,
+  RegisterAgentResponse,
+  AgentSummary,
+} from './agent-registry.dto.js';
+export { AgentTokenStore, AGENTS_ACCOUNT_DATA_TYPE } from './agent-store.js';
 export type {
  AppserviceConfig,
  EventHandler,
--- a/packages/appservice/src/intent.ts
+++ b/packages/appservice/src/intent.ts
@@ -233,4 +233,30 @@ export class AppserviceIntent {
      body: { displayname: displayName },
    });
  }
+
+  /** Read an account_data object on the AS sender user. Returns null when the
+   * key has never been written (M_NOT_FOUND), so callers can treat that as an
+   * empty store; any other error propagates. */
+  async getSenderAccountData(type: string): Promise<Record<string, unknown> | null> {
+    const user = encodeURIComponent(this.senderUserId);
+    const key = encodeURIComponent(type);
+    try {
+      return await this.request('GET', `/_matrix/client/v3/user/${user}/account_data/${key}`, {
+        userId: this.senderUserId,
+      });
+    } catch (err) {
+      if (err instanceof MatrixApiError && err.errcode === 'M_NOT_FOUND') return null;
+      throw err;
+    }
+  }
+
+  /** Write an account_data object on the AS sender user. */
+  async setSenderAccountData(type: string, content: Record<string, unknown>): Promise<void> {
+    const user = encodeURIComponent(this.senderUserId);
+    const key = encodeURIComponent(type);
+    await this.request('PUT', `/_matrix/client/v3/user/${user}/account_data/${key}`, {
+      userId: this.senderUserId,
+      body: content,
+    });
+  }
 }
--- a/packages/macp/src/index.ts
+++ b/packages/macp/src/index.ts
@@ -39,6 +39,11 @@ export { normalizeGate, runShell, countAIFindings, runGate, runGates } from './g

 export type { NormalizedGate } from './gate-runner.js';

+// Risk-floor (agent reflection loop — diff review classifier)
+export { evaluateRiskFloor, DEFAULT_RISK_THRESHOLD } from './risk-floor.js';
+
+export type { ReviewSurface, RiskFloorInput, RiskFloorVerdict } from './risk-floor.js';
+
 // Event emitter
 export { nowISO, appendEvent, emitEvent } from './event-emitter.js';

--- a/packages/macp/src/risk-floor.spec.ts
+++ b/packages/macp/src/risk-floor.spec.ts
@@ -0,0 +1,87 @@
+import { describe, expect, it } from 'vitest';
+
+import { DEFAULT_RISK_THRESHOLD, evaluateRiskFloor, type ReviewSurface } from './risk-floor.js';
+
+describe('evaluateRiskFloor', () => {
+  it('returns a no-review "none" verdict for an empty diff', () => {
+    const v = evaluateRiskFloor({ filesChanged: [] });
+    expect(v).toEqual({
+      needs_review: false,
+      score: 0,
+      surface: 'none',
+      reason: 'no files changed',
+    });
+  });
+
+  it('ignores empty/non-string entries', () => {
+    const v = evaluateRiskFloor({ filesChanged: ['', '   ' as unknown as string].filter(Boolean) });
+    // only the whitespace string survives the Boolean filter; it classifies to none
+    expect(v.surface).toBe('none');
+    expect(v.needs_review).toBe(false);
+  });
+
+  it.each<[string, string, ReviewSurface, boolean]>([
+    ['auth', 'apps/api/src/auth/session.guard.ts', 'auth', true],
+    ['data', 'packages/db/migrations/0007_add_users.sql', 'data', true],
+    ['infra', '.woodpecker/deploy.yml', 'infra', true],
+    ['build', 'packages/types/tsconfig.json', 'build', true],
+    ['ui', 'apps/web/src/components/Button.tsx', 'ui', false],
+    ['test', 'packages/macp/src/risk-floor.spec.ts', 'test', false],
+    ['docs', 'docs/plans/agent-reflection-loop-PRD.md', 'docs', false],
+    ['none', 'README', 'none', false],
+  ])(
+    'classifies a single %s file → surface=%s needs_review=%s',
+    (_label, file, surface, needsReview) => {
+      const v = evaluateRiskFloor({ filesChanged: [file] });
+      expect(v.surface).toBe(surface);
+      expect(v.needs_review).toBe(needsReview);
+      expect(v.reason).toContain(
+        file === 'README' ? 'no sensitive surface' : surface === 'none' ? '' : surface,
+      );
+    },
+  );
+
+  it('lets the highest-risk surface dominate a mixed diff', () => {
+    const v = evaluateRiskFloor({
+      filesChanged: [
+        'docs/readme.md',
+        'apps/web/src/components/Nav.tsx',
+        'apps/api/src/auth/token.service.ts',
+      ],
+    });
+    expect(v.surface).toBe('auth');
+    expect(v.score).toBe(1.0);
+    expect(v.needs_review).toBe(true);
+    expect(v.reason).toContain('token.service.ts');
+    expect(v.reason).not.toContain('readme.md');
+  });
+
+  it('names every file that ties at the dominant surface', () => {
+    const v = evaluateRiskFloor({
+      filesChanged: ['src/login.ts', 'src/permission-check.ts'],
+    });
+    expect(v.surface).toBe('auth');
+    expect(v.reason).toContain('src/login.ts');
+    expect(v.reason).toContain('src/permission-check.ts');
+  });
+
+  it('treats docs+test-only diffs as below the floor', () => {
+    const v = evaluateRiskFloor({
+      filesChanged: ['docs/guide.md', 'packages/x/src/x.test.ts'],
+    });
+    expect(v.needs_review).toBe(false);
+    expect(v.surface).toBe('test'); // higher weight than docs
+  });
+
+  it('honors a custom threshold', () => {
+    const docsOnly = { filesChanged: ['docs/guide.md'] };
+    expect(evaluateRiskFloor(docsOnly, 0.05).needs_review).toBe(true);
+    expect(evaluateRiskFloor(docsOnly, DEFAULT_RISK_THRESHOLD).needs_review).toBe(false);
+  });
+
+  it('is deterministic across call order', () => {
+    const a = evaluateRiskFloor({ filesChanged: ['a.md', 'auth/x.ts', 'b.tsx'] });
+    const b = evaluateRiskFloor({ filesChanged: ['b.tsx', 'a.md', 'auth/x.ts'] });
+    expect(a).toEqual(b);
+  });
+});
--- a/packages/macp/src/risk-floor.ts
+++ b/packages/macp/src/risk-floor.ts
@@ -0,0 +1,138 @@
+/**
+ * Diff risk-floor — deterministic review-need classifier.
+ *
+ * Given the set of changed files in a diff, derive a *minimum* review
+ * requirement ("floor") from the change surface. This is the mechanical half
+ * of the agent reflection loop (design §6): risky surfaces (auth, data, infra)
+ * trip a review requirement regardless of what the agent self-reports.
+ *
+ * Precedence (authoritative ordering, see design §5):
+ *   CI/tests  >  human merge  >  reviewer verdict  >  self-reflection
+ * This module sits at the *floor*. It NEVER overrides CI or a human; a
+ * `needs_review: false` verdict means "no surface tripped the floor", not
+ * "safe to merge". Consumers MUST keep CI/tests authoritative above it.
+ *
+ * Pure and deterministic: no IO, no clock, no randomness. Same input → same
+ * verdict. Safe to call from a Stop hook via `node -e` or to port inline.
+ */
+
+/** Review surfaces, ordered most- to least-sensitive. */
+export type ReviewSurface = 'auth' | 'data' | 'infra' | 'build' | 'ui' | 'test' | 'docs' | 'none';
+
+export interface RiskFloorInput {
+  /** Paths of changed files, repo-relative. Order-insensitive. */
+  filesChanged: string[];
+  /** Optional diff size signals; reserved for future weighting. */
+  insertions?: number;
+  deletions?: number;
+}
+
+export interface RiskFloorVerdict {
+  /** True when the change surface meets/exceeds the review threshold. */
+  needs_review: boolean;
+  /** Aggregate risk score in [0, 1] — the max surface weight across files. */
+  score: number;
+  /** The dominant (highest-weight) surface across all changed files. */
+  surface: ReviewSurface;
+  /** Human-readable explanation naming the surface and tripping files. */
+  reason: string;
+}
+
+/** Default review threshold; `score >= THRESHOLD` ⇒ `needs_review`. */
+export const DEFAULT_RISK_THRESHOLD = 0.5;
+
+interface SurfaceRule {
+  surface: ReviewSurface;
+  weight: number;
+  /** Case-insensitive regex matched against the file path. */
+  pattern: RegExp;
+}
+
+/**
+ * Surface classification rules, evaluated highest-weight first. The first
+ * rule whose pattern matches a path classifies that file; the file's surface
+ * is the highest-risk surface it matches (rules are pre-sorted by weight).
+ */
+const SURFACE_RULES: readonly SurfaceRule[] = [
+  {
+    surface: 'auth',
+    weight: 1.0,
+    pattern: /auth|login|session|token|permission|rbac|credential|secret/i,
+  },
+  {
+    surface: 'data',
+    weight: 0.9,
+    pattern: /migration|prisma|schema|\.sql|entity|repository|seed/i,
+  },
+  {
+    surface: 'infra',
+    weight: 0.85,
+    pattern: /docker|\.woodpecker|compose|traefik|deploy|helm|k8s|terraform/i,
+  },
+  {
+    surface: 'build',
+    weight: 0.6,
+    pattern: /package\.json|tsconfig|turbo\.json|pnpm-|\.config\.|eslint|vite/i,
+  },
+  { surface: 'ui', weight: 0.4, pattern: /\.tsx|\.css|components\/|apps\/web\// },
+  { surface: 'test', weight: 0.2, pattern: /\.spec\.|\.test\.|__tests__\// },
+  { surface: 'docs', weight: 0.1, pattern: /\.md$|docs\// },
+];
+
+const NONE_WEIGHT = 0.0;
+
+/** Classify a single path to its highest-risk surface and weight. */
+function classify(path: string): { surface: ReviewSurface; weight: number } {
+  for (const rule of SURFACE_RULES) {
+    if (rule.pattern.test(path)) {
+      return { surface: rule.surface, weight: rule.weight };
+    }
+  }
+  return { surface: 'none', weight: NONE_WEIGHT };
+}
+
+/**
+ * Evaluate the review risk-floor for a diff.
+ *
+ * @param input         changed files (+ optional size signals)
+ * @param threshold     review cutoff; defaults to {@link DEFAULT_RISK_THRESHOLD}
+ */
+export function evaluateRiskFloor(
+  input: RiskFloorInput,
+  threshold: number = DEFAULT_RISK_THRESHOLD,
+): RiskFloorVerdict {
+  const files = (input.filesChanged ?? []).filter((f) => typeof f === 'string' && f.length > 0);
+
+  if (files.length === 0) {
+    return {
+      needs_review: false,
+      score: 0,
+      surface: 'none',
+      reason: 'no files changed',
+    };
+  }
+
+  let topSurface: ReviewSurface = 'none';
+  let topWeight = NONE_WEIGHT;
+  const tripping: string[] = [];
+
+  for (const file of files) {
+    const { surface, weight } = classify(file);
+    if (weight > topWeight) {
+      topWeight = weight;
+      topSurface = surface;
+      tripping.length = 0;
+      tripping.push(file);
+    } else if (weight === topWeight && surface === topSurface && surface !== 'none') {
+      tripping.push(file);
+    }
+  }
+
+  const needs_review = topWeight >= threshold;
+  const reason =
+    topSurface === 'none'
+      ? `no sensitive surface in ${files.length} changed file(s)`
+      : `${topSurface} surface (weight ${topWeight}) in: ${tripping.join(', ')}`;
+
+  return { needs_review, score: topWeight, surface: topSurface, reason };
+}
--- a/packages/macp/src/schemas/reflection.v1.schema.json
+++ b/packages/macp/src/schemas/reflection.v1.schema.json
@@ -0,0 +1,105 @@
+{
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "$id": "https://mosaicstack.dev/schemas/reflection/reflection.v1.schema.json",
+  "title": "Agent Reflection (v1)",
+  "description": "End-of-run reflection sidecar. Mechanical fields are written by the Stop hook; self-reported fields are merged from an optional agent-supplied input and are null when absent (provenance.degraded=true).",
+  "type": "object",
+  "required": [
+    "schema",
+    "task_ref",
+    "agent",
+    "session_id",
+    "timestamp",
+    "repo",
+    "risk",
+    "files_changed",
+    "provenance"
+  ],
+  "properties": {
+    "schema": {
+      "const": "reflection.v1"
+    },
+    "task_ref": {
+      "type": "string",
+      "description": "Canonical task ref; derived from REFLECTION_TASK_REF or repo+branch."
+    },
+    "agent": {
+      "type": "string",
+      "description": "Persona/runtime id (REFLECTION_AGENT or 'unknown')."
+    },
+    "session_id": {
+      "type": "string",
+      "description": "From the Stop payload session_id, else 'unknown'."
+    },
+    "timestamp": {
+      "type": "string",
+      "format": "date-time",
+      "description": "ISO-8601 UTC capture time."
+    },
+    "repo": {
+      "type": "string",
+      "description": "Repo root basename."
+    },
+    "confidence": {
+      "type": ["number", "null"],
+      "minimum": 0,
+      "maximum": 1,
+      "description": "SELF-REPORTED. Agent's overall confidence; null when not supplied."
+    },
+    "most_likely_wrong": {
+      "type": ["object", "null"],
+      "description": "SELF-REPORTED. The single most-likely way the work is wrong.",
+      "required": ["surface", "description"],
+      "properties": {
+        "surface": { "$ref": "#/$defs/surface" },
+        "description": { "type": "string" }
+      },
+      "additionalProperties": false
+    },
+    "known_not_in_diff": {
+      "type": ["string", "null"],
+      "description": "SELF-REPORTED. What the agent knows that isn't visible in the diff."
+    },
+    "risk": {
+      "type": "object",
+      "description": "MECHANICAL. Output of the diff risk-floor.",
+      "required": ["needs_review", "score", "surface", "reason"],
+      "properties": {
+        "needs_review": { "type": "boolean" },
+        "score": { "type": "number", "minimum": 0, "maximum": 1 },
+        "surface": { "$ref": "#/$defs/surface" },
+        "reason": { "type": "string" }
+      },
+      "additionalProperties": false
+    },
+    "files_changed": {
+      "type": "array",
+      "items": { "type": "string" },
+      "description": "MECHANICAL. git diff name-only."
+    },
+    "provenance": {
+      "type": "object",
+      "required": ["source", "reflection_attempt", "degraded", "reflection_mode"],
+      "properties": {
+        "source": { "const": "stop-hook" },
+        "reflection_attempt": { "type": "integer", "minimum": 1 },
+        "degraded": {
+          "type": "boolean",
+          "description": "True when self-report inputs were missing/unreadable."
+        },
+        "reflection_mode": {
+          "type": "string",
+          "enum": ["off", "solo", "orchestrated"]
+        }
+      },
+      "additionalProperties": false
+    }
+  },
+  "additionalProperties": false,
+  "$defs": {
+    "surface": {
+      "type": "string",
+      "enum": ["auth", "data", "infra", "build", "ui", "test", "docs", "none"]
+    }
+  }
+}
--- a/packages/mosaic/framework/defaults/TOOLS.md
+++ b/packages/mosaic/framework/defaults/TOOLS.md
@@ -5,10 +5,39 @@ Tool suites live at `~/.config/mosaic/tools/<suite>/`. This is the index only.
 read it (or the relevant service guide) when your task actually touches that service.
 Project-specific tooling belongs in the project's `AGENTS.md`, not here.

+## ⚡ Most-used fleet tools (reach for these FIRST — don't hand-roll)
+
+You are a Mosaic fleet agent. These cover the highest-frequency cross-agent and git-provider
+tasks — use them before improvising with raw `tmux send-keys`, raw `tea`/`gh`/`glab`, or `curl`.
+
+**1. Message another agent** → `tools/tmux/agent-send.sh` (NOT raw `tmux send-keys`):
+
+```bash
+tools/tmux/agent-send.sh -s <target-session> -m "message"   # or -f <file> to send a file's contents
+```
+
+The coordinator session is `mos-claude` — send status, findings, and questions there.
+
+**2. Issues / PRs / milestones** → `tools/git/*.sh` wrappers (before raw `tea`/`gh`/`glab`):
+
+```bash
+tools/git/pr-create.sh ...   tools/git/issue-create.sh ...   tools/git/pr-merge.sh ...
+tools/git/ci-queue-wait.sh --purpose push|merge   # REQUIRED before any push/merge
+```
+
+**GITEA_LOGIN gotcha** — the wrappers default to login `mosaicstack`; on a USC repo that fails with
+`gitea / Error: GetUserByName ... not found`. Pick the login from the repo's `origin` host first:
+
+| origin host           | login                                    |
+| --------------------- | ---------------------------------------- |
+| `git.uscllc.com`      | `export GITEA_LOGIN=usc`                 |
+| `git.mosaicstack.dev` | default `mosaicstack` (no export needed) |
+
 ## Suites (use wrappers first)

 | Suite      | Path                                             | Purpose                                                                  |
 | ---------- | ------------------------------------------------ | ------------------------------------------------------------------------ |
+| tmux       | `tools/tmux/agent-send.sh`                       | inter-agent messaging (see "Most-used" above)                            |
 | git        | `tools/git/*.sh`                                 | issues, PRs, milestones, CI queue guard (platform-auto-detected)         |
 | woodpecker | `tools/woodpecker/*.sh`                          | CI pipelines (`-a mosaic`\|`usc`; match git remote host)                 |
 | portainer  | `tools/portainer/*.sh`                           | Docker Swarm stacks (status/redeploy/list)                               |
--- a/packages/mosaic/framework/runtime/claude/settings.json
+++ b/packages/mosaic/framework/runtime/claude/settings.json
@@ -34,6 +34,17 @@
          }
        ]
      }
+    ],
+    "Stop": [
+      {
+        "hooks": [
+          {
+            "type": "command",
+            "command": "~/.config/mosaic/tools/qa/reflect-stop-hook.sh",
+            "timeout": 15
+          }
+        ]
+      }
    ]
  },
  "enabledPlugins": {
--- a/packages/mosaic/framework/runtime/pi/RUNTIME.md
+++ b/packages/mosaic/framework/runtime/pi/RUNTIME.md
@@ -29,7 +29,21 @@ Pi supports `--models` for Ctrl+P model cycling during a session. Use cheaper mo

 ### Skills

-Mosaic skills are loaded natively via Pi's `--skill` flag. Skills are discovered from:
+By default the launcher starts Pi with `--no-skills` to keep startup context small, then
+force-loads a small set of fleet-critical skills via explicit `--skill` flags (an explicit
+`--skill` overrides `--no-skills` for that path). The default forced set is `mosaic-tools`
+(the must-use `~/.config/mosaic/tools/` cheatsheet: inter-agent messaging + git wrappers).
+
+Tune skill loading with environment variables:
+
+- `MOSAIC_PI_FORCE_SKILLS` — colon-separated skill dir names to force-load (default: `mosaic-tools`;
+  set to an empty string to disable force-loading). Missing skills are skipped silently.
+- `MOSAIC_PI_SKILL_MODE=all` — link every skill found in `~/.config/mosaic/{skills,skills-local}/`
+  (full catalog; larger context).
+- `MOSAIC_PI_SKILL_MODE=discover` — let Pi discover skills natively (no `--no-skills`), still
+  force-loading the fleet set on top.
+
+Skills are discovered from:

 - `~/.config/mosaic/skills/` (Mosaic global skills)
 - `~/.pi/agent/skills/` (Pi global skills)
--- a/packages/mosaic/framework/systemd/user/README.md
+++ b/packages/mosaic/framework/systemd/user/README.md
@@ -0,0 +1,57 @@
+# Mosaic tmux Fleet PoC
+
+This directory contains the first durable tmux-backed fleet primitives for the
+Mosaic software-factory model.
+
+The lifecycle model follows the organization-neutral AI Guide playbook
+`mosaicstack/aiguide:playbooks/tmux-fleet.md` (commit `2a0b0b5`): a dedicated
+holder owns the tmux server/socket; agent units join it and stop only their own
+exact-match session.
+
+## Layout
+
+- `mosaic-tmux-holder.service` — user-mode holder that owns the named tmux server.
+- `mosaic-agent@.service` — user-mode template for one reusable agent session.
+- `test-fleet-units.sh` — validates unit syntax and required relationships.
+
+The agent template calls:
+
+```text
+~/.config/mosaic/tools/fleet/start-agent-session.sh <agent-name>
+```
+
+which starts or reuses a tmux session on `MOSAIC_TMUX_SOCKET`.
+
+## Local customization
+
+Per-agent overrides live outside the package in:
+
+```text
+~/.config/mosaic/fleet/agents/<agent>.env
+```
+
+Example:
+
+```dotenv
+MOSAIC_TMUX_SOCKET=mosaic-factory
+MOSAIC_AGENT_RUNTIME=claude
+MOSAIC_AGENT_WORKDIR=/home/jarvis/src/mosaic-stack
+# Optional escape hatch for PoC/canary agents:
+# MOSAIC_AGENT_COMMAND=mosaic yolo claude
+```
+
+## Manual canary sequence
+
+```bash
+mkdir -p ~/.config/systemd/user ~/.config/mosaic/tools/fleet ~/.config/mosaic/fleet/agents
+cp packages/mosaic/framework/systemd/user/mosaic-*.service ~/.config/systemd/user/
+cp packages/mosaic/framework/tools/fleet/start-agent-session.sh ~/.config/mosaic/tools/fleet/
+chmod +x ~/.config/mosaic/tools/fleet/start-agent-session.sh
+systemctl --user daemon-reload
+systemctl --user start mosaic-tmux-holder.service
+systemctl --user start mosaic-agent@canary.service
+tmux -L mosaic-factory ls
+```
+
+Do not use `tmux kill-server` without `-L mosaic-factory`; this pattern is meant
+to avoid disturbing the user's default tmux server.
--- a/packages/mosaic/framework/systemd/user/mosaic-agent@.service
+++ b/packages/mosaic/framework/systemd/user/mosaic-agent@.service
@@ -0,0 +1,20 @@
+[Unit]
+Description=Mosaic tmux fleet agent %i
+Documentation=https://git.mosaicstack.dev/mosaicstack/stack
+Requires=mosaic-tmux-holder.service
+After=mosaic-tmux-holder.service
+PartOf=mosaic-tmux-holder.service
+
+[Service]
+Type=oneshot
+RemainAfterExit=yes
+Environment=MOSAIC_TMUX_SOCKET=mosaic-factory
+Environment=MOSAIC_AGENT_NAME=%i
+Environment=MOSAIC_AGENT_RUNTIME=pi
+Environment=MOSAIC_AGENT_WORKDIR=%h
+EnvironmentFile=-%h/.config/mosaic/fleet/agents/%i.env
+ExecStart=/bin/bash %h/.config/mosaic/tools/fleet/start-agent-session.sh %i
+ExecStop=-/bin/bash -lc 'tmux -L "${MOSAIC_TMUX_SOCKET:-mosaic-factory}" kill-session -t "=%i"'
+
+[Install]
+WantedBy=default.target
--- a/packages/mosaic/framework/systemd/user/mosaic-tmux-holder.service
+++ b/packages/mosaic/framework/systemd/user/mosaic-tmux-holder.service
@@ -0,0 +1,15 @@
+[Unit]
+Description=Mosaic tmux fleet holder
+Documentation=https://git.mosaicstack.dev/mosaicstack/stack
+After=default.target
+
+[Service]
+Type=oneshot
+RemainAfterExit=yes
+Environment=MOSAIC_TMUX_SOCKET=mosaic-factory
+Environment=MOSAIC_TMUX_HOLDER=_holder
+ExecStart=/bin/bash -lc 'tmux -L "$MOSAIC_TMUX_SOCKET" has-session -t "=${MOSAIC_TMUX_HOLDER}:0.0" 2>/dev/null || tmux -L "$MOSAIC_TMUX_SOCKET" new-session -d -s "$MOSAIC_TMUX_HOLDER" "while true; do sleep 3600; done"'
+ExecStop=-/bin/bash -lc 'tmux -L "$MOSAIC_TMUX_SOCKET" kill-server'
+
+[Install]
+WantedBy=default.target
--- a/packages/mosaic/framework/systemd/user/test-fleet-units.sh
+++ b/packages/mosaic/framework/systemd/user/test-fleet-units.sh
@@ -0,0 +1,30 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+SCRIPT_DIR=$(cd -- "$(dirname -- "$0")" && pwd)
+HOLDER="$SCRIPT_DIR/mosaic-tmux-holder.service"
+AGENT="$SCRIPT_DIR/mosaic-agent@.service"
+
+fail() {
+  echo "FAIL: $*" >&2
+  exit 1
+}
+
+[ -f "$HOLDER" ] || fail "missing mosaic-tmux-holder.service"
+[ -f "$AGENT" ] || fail "missing mosaic-agent@.service"
+
+grep -qF 'ExecStart=' "$HOLDER" || fail "holder has no ExecStart"
+grep -qF 'tmux -L' "$HOLDER" || fail "holder does not use named tmux socket"
+grep -qF '_holder' "$HOLDER" || fail "holder session is not explicit"
+grep -qF 'Requires=mosaic-tmux-holder.service' "$AGENT" || fail "agent does not require holder"
+grep -qF 'start-agent-session.sh' "$AGENT" || fail "agent unit does not call start-agent-session.sh"
+grep -qF 'kill-session -t "=%i"' "$AGENT" || fail "agent stop does not exact-match its session"
+
+if command -v systemd-analyze >/dev/null 2>&1; then
+  systemd-analyze verify --user "$HOLDER" "$AGENT" >/tmp/mosaic-fleet-systemd-verify.log 2>&1 || {
+    cat /tmp/mosaic-fleet-systemd-verify.log >&2
+    fail "systemd-analyze verify failed"
+  }
+fi
+
+echo "ok - fleet systemd unit templates"
--- a/packages/mosaic/framework/templates/agent/AGENTS.md.template
+++ b/packages/mosaic/framework/templates/agent/AGENTS.md.template
@@ -9,8 +9,8 @@
 2. Do NOT ask for routine confirmation before required push/merge/issue-close/release/tag actions.
 3. Completion is forbidden at PR-open stage.
 4. Completion requires merged PR to `main` + terminal green CI + linked issue/internal task closed.
-5. Before push or merge, run queue guard: `~/.config/mosaic/rails/git/ci-queue-wait.sh --purpose push|merge -B main`.
-6. For issue/PR/milestone operations, use Mosaic wrappers first (`~/.config/mosaic/rails/git/*.sh`).
+5. Before push or merge, run queue guard: `~/.config/mosaic/tools/git/ci-queue-wait.sh --purpose push|merge -B main`.
+6. For issue/PR/milestone operations, use Mosaic wrappers first (`~/.config/mosaic/tools/git/*.sh`).
 7. If any required wrapper command fails: report `blocked` with the exact failed wrapper command and stop.
 8. Do NOT stop at "PR created" and do NOT ask "should I merge?" for routine flow.

@@ -58,7 +58,7 @@ ${QUALITY_GATES}
 2. If external git provider is available (Gitea/GitHub/GitLab), create/update issue(s) before coding and map them in `docs/TASKS.md`.
 3. If no external provider is available, use internal refs in `docs/TASKS.md` (example: `TASKS:T1`).
 4. Keep `docs/TASKS.md` status in sync with actual progress until completion.
-5. For issue/PR/milestone actions, detect platform and use `~/.config/mosaic/rails/git/*.sh` wrappers first (no raw `gh`/`tea`/`glab` as first choice).
+5. For issue/PR/milestone actions, detect platform and use `~/.config/mosaic/tools/git/*.sh` wrappers first (no raw `gh`/`tea`/`glab` as first choice).
 6. If wrapper-driven merge/CI/issue-closure fails, report blocker with the exact failed wrapper command and stop (do not claim completion).

 ## Documentation Contract
@@ -88,7 +88,7 @@ Reference:
 5. Do not mark implementation complete until PR is merged.
 6. Do not mark implementation complete until CI/pipeline status is terminal green.
 7. Close linked issues/tasks only after merge + green CI.
-8. Before push or merge, run CI queue guard: `~/.config/mosaic/rails/git/ci-queue-wait.sh --purpose push|merge -B main`.
+8. Before push or merge, run CI queue guard: `~/.config/mosaic/tools/git/ci-queue-wait.sh --purpose push|merge -B main`.

 ## Container Release Strategy (When Applicable)

@@ -138,8 +138,8 @@ When completing an orchestrated task:
 ### Post-Coding Review
 After implementing changes, code review is REQUIRED for any source-code modification.
 For orchestrated tasks, the orchestrator will run:
-1. **Codex code review** — `~/.config/mosaic/rails/codex/codex-code-review.sh --uncommitted`
-2. **Codex security review** — `~/.config/mosaic/rails/codex/codex-security-review.sh --uncommitted`
+1. **Codex code review** — `~/.config/mosaic/tools/codex/codex-code-review.sh --uncommitted`
+2. **Codex security review** — `~/.config/mosaic/tools/codex/codex-security-review.sh --uncommitted`
 3. If blockers/critical findings: remediation task created
 4. If clean: task marked done

--- a/packages/mosaic/framework/templates/agent/CLAUDE.md.template
+++ b/packages/mosaic/framework/templates/agent/CLAUDE.md.template
@@ -135,7 +135,7 @@ ${QUALITY_GATES}
 ## Issue Tracking

 Use external git provider issues when available. If no external provider exists, `docs/TASKS.md` is the canonical tracker for tasks, milestones, and issue-equivalent work.
-For issue/PR/milestone operations, detect platform and use `~/.config/mosaic/rails/git/*.sh` wrappers first; do not use raw `gh`/`tea`/`glab` as first choice.
+For issue/PR/milestone operations, detect platform and use `~/.config/mosaic/tools/git/*.sh` wrappers first; do not use raw `gh`/`tea`/`glab` as first choice.
 If wrapper-driven merge/CI/issue-closure fails, report blocker with exact failed wrapper command and stop.
 Do NOT stop at "PR created" and do NOT ask "should I merge?" or "should I close the issue?" for routine delivery flow.

@@ -147,9 +147,9 @@ Do NOT stop at "PR created" and do NOT ask "should I merge?" or "should I close
 5. Ensure `docs/PRD.md` or `docs/PRD.json` exists and is current before coding.
 6. Create scratchpad: `docs/scratchpads/{task-id}-{short-name}.md` and include issue/internal ref.
 7. Update `docs/TASKS.md` status + issue/internal ref before coding.
-8. Before push, run CI queue guard: `~/.config/mosaic/rails/git/ci-queue-wait.sh --purpose push -B main`.
+8. Before push, run CI queue guard: `~/.config/mosaic/tools/git/ci-queue-wait.sh --purpose push -B main`.
 9. Open PR to `main` for delivery changes (no direct push to `main`).
-10. Before merge, run CI queue guard: `~/.config/mosaic/rails/git/ci-queue-wait.sh --purpose merge -B main`.
+10. Before merge, run CI queue guard: `~/.config/mosaic/tools/git/ci-queue-wait.sh --purpose merge -B main`.
 11. Merge PRs that pass required checks and review gates with squash strategy only.
 12. Reference issues/internal refs in commits (`Fixes #123`, `Refs #123`, or `Refs TASKS:T1`).
 13. Close issue/internal task only after testing and documentation gates pass, PR merge is complete, and CI/pipeline status is terminal green.
@@ -176,10 +176,10 @@ Run independent reviews:

 ```bash
 # Code quality review (Codex)
-~/.config/mosaic/rails/codex/codex-code-review.sh --uncommitted
+~/.config/mosaic/tools/codex/codex-code-review.sh --uncommitted

 # Security review (Codex)
-~/.config/mosaic/rails/codex/codex-security-review.sh --uncommitted
+~/.config/mosaic/tools/codex/codex-security-review.sh --uncommitted
 ```

 **Fallback:** If Codex is unavailable, use Claude's built-in review skills.
--- a/packages/mosaic/framework/templates/agent/projects/django/AGENTS.md.template
+++ b/packages/mosaic/framework/templates/agent/projects/django/AGENTS.md.template
@@ -9,8 +9,8 @@
 2. Do NOT ask for routine confirmation before required push/merge/issue-close/release/tag actions.
 3. Completion is forbidden at PR-open stage.
 4. Completion requires merged PR to `main` + terminal green CI + linked issue/internal task closed.
-5. Before push or merge, run queue guard: `~/.config/mosaic/rails/git/ci-queue-wait.sh --purpose push|merge -B main`.
-6. For issue/PR/milestone operations, use Mosaic wrappers first (`~/.config/mosaic/rails/git/*.sh`).
+5. Before push or merge, run queue guard: `~/.config/mosaic/tools/git/ci-queue-wait.sh --purpose push|merge -B main`.
+6. For issue/PR/milestone operations, use Mosaic wrappers first (`~/.config/mosaic/tools/git/*.sh`).
 7. If any required wrapper command fails: report `blocked` with the exact failed wrapper command and stop.
 8. Do NOT stop at "PR created" and do NOT ask "should I merge?" for routine flow.

@@ -68,7 +68,7 @@ ruff check . && mypy . && pytest tests/
 2. If external git provider is available (Gitea/GitHub/GitLab), create/update issue(s) before coding and map them in `docs/TASKS.md`.
 3. If no external provider is available, use internal refs in `docs/TASKS.md` (example: `TASKS:T1`).
 4. Keep `docs/TASKS.md` status in sync with actual progress until completion.
-5. For issue/PR/milestone actions, detect platform and use `~/.config/mosaic/rails/git/*.sh` wrappers first (no raw `gh`/`tea`/`glab` as first choice).
+5. For issue/PR/milestone actions, detect platform and use `~/.config/mosaic/tools/git/*.sh` wrappers first (no raw `gh`/`tea`/`glab` as first choice).
 6. If wrapper-driven merge/CI/issue-closure fails, report blocker with the exact failed wrapper command and stop (do not claim completion).

 ## Documentation Contract
@@ -97,7 +97,7 @@ Reference:
 5. Do not mark implementation complete until PR is merged.
 6. Do not mark implementation complete until CI/pipeline status is terminal green.
 7. Close linked issues/tasks only after merge + green CI.
-8. Before push or merge, run CI queue guard: `~/.config/mosaic/rails/git/ci-queue-wait.sh --purpose push|merge -B main`.
+8. Before push or merge, run CI queue guard: `~/.config/mosaic/tools/git/ci-queue-wait.sh --purpose push|merge -B main`.


 ## Container Release Strategy (When Applicable)
@@ -139,8 +139,8 @@ Use `${TASK_PREFIX}` for orchestrated tasks (e.g., `${TASK_PREFIX}-SEC-001`).
 ### Post-Coding Review
 After implementing changes, code review is REQUIRED for any source-code modification.
 For orchestrated tasks, the orchestrator will run:
-1. **Codex code review** — `~/.config/mosaic/rails/codex/codex-code-review.sh --uncommitted`
-2. **Codex security review** — `~/.config/mosaic/rails/codex/codex-security-review.sh --uncommitted`
+1. **Codex code review** — `~/.config/mosaic/tools/codex/codex-code-review.sh --uncommitted`
+2. **Codex security review** — `~/.config/mosaic/tools/codex/codex-security-review.sh --uncommitted`
 3. If blockers/critical findings: remediation task created
 4. If clean: task marked done

--- a/packages/mosaic/framework/templates/agent/projects/django/CLAUDE.md.template
+++ b/packages/mosaic/framework/templates/agent/projects/django/CLAUDE.md.template
@@ -159,10 +159,10 @@ Run independent reviews:

 ```bash
 # Code quality review (Codex)
-~/.config/mosaic/rails/codex/codex-code-review.sh --uncommitted
+~/.config/mosaic/tools/codex/codex-code-review.sh --uncommitted

 # Security review (Codex)
-~/.config/mosaic/rails/codex/codex-security-review.sh --uncommitted
+~/.config/mosaic/tools/codex/codex-security-review.sh --uncommitted
 ```

 See `~/.config/mosaic/guides/CODE-REVIEW.md` for the full review checklist.
@@ -186,7 +186,7 @@ See `~/.config/mosaic/guides/DOCUMENTATION.md` for required documentation delive
 ## Issue Tracking

 Use external git provider issues when available. If no external provider exists, `docs/TASKS.md` is the canonical tracker for tasks, milestones, and issue-equivalent work.
-For issue/PR/milestone operations, detect platform and use `~/.config/mosaic/rails/git/*.sh` wrappers first; do not use raw `gh`/`tea`/`glab` as first choice.
+For issue/PR/milestone operations, detect platform and use `~/.config/mosaic/tools/git/*.sh` wrappers first; do not use raw `gh`/`tea`/`glab` as first choice.
 If wrapper-driven merge/CI/issue-closure fails, report blocker with exact failed wrapper command and stop.
 Do NOT stop at "PR created" and do NOT ask "should I merge?" or "should I close the issue?" for routine delivery flow.

@@ -198,9 +198,9 @@ Do NOT stop at "PR created" and do NOT ask "should I merge?" or "should I close
 5. Ensure `docs/PRD.md` or `docs/PRD.json` exists and is current before coding.
 6. Create scratchpad: `docs/scratchpads/{task-id}-{short-name}.md` and include issue/internal ref.
 7. Update `docs/TASKS.md` status + issue/internal ref before coding.
-8. Before push, run CI queue guard: `~/.config/mosaic/rails/git/ci-queue-wait.sh --purpose push -B main`.
+8. Before push, run CI queue guard: `~/.config/mosaic/tools/git/ci-queue-wait.sh --purpose push -B main`.
 9. Open PR to `main` for delivery changes (no direct push to `main`).
-10. Before merge, run CI queue guard: `~/.config/mosaic/rails/git/ci-queue-wait.sh --purpose merge -B main`.
+10. Before merge, run CI queue guard: `~/.config/mosaic/tools/git/ci-queue-wait.sh --purpose merge -B main`.
 11. Merge PRs that pass required checks and review gates with squash strategy only.
 12. Reference issues/internal refs in commits (`Fixes #123`, `Refs #123`, or `Refs TASKS:T1`).
 13. Close issue/internal task only after testing and documentation gates pass, PR merge is complete, and CI/pipeline status is terminal green.
--- a/packages/mosaic/framework/templates/agent/projects/nestjs-nextjs/AGENTS.md.template
+++ b/packages/mosaic/framework/templates/agent/projects/nestjs-nextjs/AGENTS.md.template
@@ -9,8 +9,8 @@
 2. Do NOT ask for routine confirmation before required push/merge/issue-close/release/tag actions.
 3. Completion is forbidden at PR-open stage.
 4. Completion requires merged PR to `main` + terminal green CI + linked issue/internal task closed.
-5. Before push or merge, run queue guard: `~/.config/mosaic/rails/git/ci-queue-wait.sh --purpose push|merge -B main`.
-6. For issue/PR/milestone operations, use Mosaic wrappers first (`~/.config/mosaic/rails/git/*.sh`).
+5. Before push or merge, run queue guard: `~/.config/mosaic/tools/git/ci-queue-wait.sh --purpose push|merge -B main`.
+6. For issue/PR/milestone operations, use Mosaic wrappers first (`~/.config/mosaic/tools/git/*.sh`).
 7. If any required wrapper command fails: report `blocked` with the exact failed wrapper command and stop.
 8. Do NOT stop at "PR created" and do NOT ask "should I merge?" for routine flow.

@@ -72,7 +72,7 @@ pnpm typecheck && pnpm lint && pnpm test
 2. If external git provider is available (Gitea/GitHub/GitLab), create/update issue(s) before coding and map them in `docs/TASKS.md`.
 3. If no external provider is available, use internal refs in `docs/TASKS.md` (example: `TASKS:T1`).
 4. Keep `docs/TASKS.md` status in sync with actual progress until completion.
-5. For issue/PR/milestone actions, detect platform and use `~/.config/mosaic/rails/git/*.sh` wrappers first (no raw `gh`/`tea`/`glab` as first choice).
+5. For issue/PR/milestone actions, detect platform and use `~/.config/mosaic/tools/git/*.sh` wrappers first (no raw `gh`/`tea`/`glab` as first choice).
 6. If wrapper-driven merge/CI/issue-closure fails, report blocker with the exact failed wrapper command and stop (do not claim completion).

 ## Documentation Contract
@@ -101,7 +101,7 @@ Reference:
 5. Do not mark implementation complete until PR is merged.
 6. Do not mark implementation complete until CI/pipeline status is terminal green.
 7. Close linked issues/tasks only after merge + green CI.
-8. Before push or merge, run CI queue guard: `~/.config/mosaic/rails/git/ci-queue-wait.sh --purpose push|merge -B main`.
+8. Before push or merge, run CI queue guard: `~/.config/mosaic/tools/git/ci-queue-wait.sh --purpose push|merge -B main`.


 ## Container Release Strategy (When Applicable)
@@ -143,8 +143,8 @@ Use `${TASK_PREFIX}` for orchestrated tasks (e.g., `${TASK_PREFIX}-SEC-001`).
 ### Post-Coding Review
 After implementing changes, code review is REQUIRED for any source-code modification.
 For orchestrated tasks, the orchestrator will run:
-1. **Codex code review** — `~/.config/mosaic/rails/codex/codex-code-review.sh --uncommitted`
-2. **Codex security review** — `~/.config/mosaic/rails/codex/codex-security-review.sh --uncommitted`
+1. **Codex code review** — `~/.config/mosaic/tools/codex/codex-code-review.sh --uncommitted`
+2. **Codex security review** — `~/.config/mosaic/tools/codex/codex-security-review.sh --uncommitted`
 3. If blockers/critical findings: remediation task created
 4. If clean: task marked done

--- a/packages/mosaic/framework/templates/agent/projects/nestjs-nextjs/CLAUDE.md.template
+++ b/packages/mosaic/framework/templates/agent/projects/nestjs-nextjs/CLAUDE.md.template
@@ -191,10 +191,10 @@ Run independent reviews:

 ```bash
 # Code quality review (Codex)
-~/.config/mosaic/rails/codex/codex-code-review.sh --uncommitted
+~/.config/mosaic/tools/codex/codex-code-review.sh --uncommitted

 # Security review (Codex)
-~/.config/mosaic/rails/codex/codex-security-review.sh --uncommitted
+~/.config/mosaic/tools/codex/codex-security-review.sh --uncommitted
 ```

 See `~/.config/mosaic/guides/CODE-REVIEW.md` for the full review checklist.
@@ -218,7 +218,7 @@ See `~/.config/mosaic/guides/DOCUMENTATION.md` for required documentation delive
 ## Issue Tracking

 Use external git provider issues when available. If no external provider exists, `docs/TASKS.md` is the canonical tracker for tasks, milestones, and issue-equivalent work.
-For issue/PR/milestone operations, detect platform and use `~/.config/mosaic/rails/git/*.sh` wrappers first; do not use raw `gh`/`tea`/`glab` as first choice.
+For issue/PR/milestone operations, detect platform and use `~/.config/mosaic/tools/git/*.sh` wrappers first; do not use raw `gh`/`tea`/`glab` as first choice.
 If wrapper-driven merge/CI/issue-closure fails, report blocker with exact failed wrapper command and stop.
 Do NOT stop at "PR created" and do NOT ask "should I merge?" or "should I close the issue?" for routine delivery flow.

@@ -230,9 +230,9 @@ Do NOT stop at "PR created" and do NOT ask "should I merge?" or "should I close
 5. Ensure `docs/PRD.md` or `docs/PRD.json` exists and is current before coding.
 6. Create scratchpad: `docs/scratchpads/{task-id}-{short-name}.md` and include issue/internal ref.
 7. Update `docs/TASKS.md` status + issue/internal ref before coding.
-8. Before push, run CI queue guard: `~/.config/mosaic/rails/git/ci-queue-wait.sh --purpose push -B main`.
+8. Before push, run CI queue guard: `~/.config/mosaic/tools/git/ci-queue-wait.sh --purpose push -B main`.
 9. Open PR to `main` for delivery changes (no direct push to `main`).
-10. Before merge, run CI queue guard: `~/.config/mosaic/rails/git/ci-queue-wait.sh --purpose merge -B main`.
+10. Before merge, run CI queue guard: `~/.config/mosaic/tools/git/ci-queue-wait.sh --purpose merge -B main`.
 11. Merge PRs that pass required checks and review gates with squash strategy only.
 12. Reference issues/internal refs in commits (`Fixes #123`, `Refs #123`, or `Refs TASKS:T1`).
 13. Close issue/internal task only after testing and documentation gates pass, PR merge is complete, and CI/pipeline status is terminal green.
--- a/packages/mosaic/framework/templates/agent/projects/python-fastapi/AGENTS.md.template
+++ b/packages/mosaic/framework/templates/agent/projects/python-fastapi/AGENTS.md.template
@@ -9,8 +9,8 @@
 2. Do NOT ask for routine confirmation before required push/merge/issue-close/release/tag actions.
 3. Completion is forbidden at PR-open stage.
 4. Completion requires merged PR to `main` + terminal green CI + linked issue/internal task closed.
-5. Before push or merge, run queue guard: `~/.config/mosaic/rails/git/ci-queue-wait.sh --purpose push|merge -B main`.
-6. For issue/PR/milestone operations, use Mosaic wrappers first (`~/.config/mosaic/rails/git/*.sh`).
+5. Before push or merge, run queue guard: `~/.config/mosaic/tools/git/ci-queue-wait.sh --purpose push|merge -B main`.
+6. For issue/PR/milestone operations, use Mosaic wrappers first (`~/.config/mosaic/tools/git/*.sh`).
 7. If any required wrapper command fails: report `blocked` with the exact failed wrapper command and stop.
 8. Do NOT stop at "PR created" and do NOT ask "should I merge?" for routine flow.

@@ -58,7 +58,7 @@ uv run ruff check src/ tests/ && uv run ruff format --check src/ && uv run mypy
 2. If external git provider is available (Gitea/GitHub/GitLab), create/update issue(s) before coding and map them in `docs/TASKS.md`.
 3. If no external provider is available, use internal refs in `docs/TASKS.md` (example: `TASKS:T1`).
 4. Keep `docs/TASKS.md` status in sync with actual progress until completion.
-5. For issue/PR/milestone actions, detect platform and use `~/.config/mosaic/rails/git/*.sh` wrappers first (no raw `gh`/`tea`/`glab` as first choice).
+5. For issue/PR/milestone actions, detect platform and use `~/.config/mosaic/tools/git/*.sh` wrappers first (no raw `gh`/`tea`/`glab` as first choice).
 6. If wrapper-driven merge/CI/issue-closure fails, report blocker with the exact failed wrapper command and stop (do not claim completion).

 ## Documentation Contract
@@ -87,7 +87,7 @@ Reference:
 5. Do not mark implementation complete until PR is merged.
 6. Do not mark implementation complete until CI/pipeline status is terminal green.
 7. Close linked issues/tasks only after merge + green CI.
-8. Before push or merge, run CI queue guard: `~/.config/mosaic/rails/git/ci-queue-wait.sh --purpose push|merge -B main`.
+8. Before push or merge, run CI queue guard: `~/.config/mosaic/tools/git/ci-queue-wait.sh --purpose push|merge -B main`.

 ## Container Release Strategy (When Applicable)

--- a/packages/mosaic/framework/templates/agent/projects/python-fastapi/CLAUDE.md.template
+++ b/packages/mosaic/framework/templates/agent/projects/python-fastapi/CLAUDE.md.template
@@ -135,7 +135,7 @@ uv run ruff check src/ tests/ && uv run ruff format --check src/ && uv run mypy
 ## Issue Tracking

 Use external git provider issues when available. If no external provider exists, `docs/TASKS.md` is the canonical tracker for tasks, milestones, and issue-equivalent work.
-For issue/PR/milestone operations, detect platform and use `~/.config/mosaic/rails/git/*.sh` wrappers first; do not use raw `gh`/`tea`/`glab` as first choice.
+For issue/PR/milestone operations, detect platform and use `~/.config/mosaic/tools/git/*.sh` wrappers first; do not use raw `gh`/`tea`/`glab` as first choice.
 If wrapper-driven merge/CI/issue-closure fails, report blocker with exact failed wrapper command and stop.
 Do NOT stop at "PR created" and do NOT ask "should I merge?" or "should I close the issue?" for routine delivery flow.

@@ -146,9 +146,9 @@ Do NOT stop at "PR created" and do NOT ask "should I merge?" or "should I close
 5. Ensure `docs/PRD.md` or `docs/PRD.json` exists and is current before coding.
 6. Create scratchpad: `docs/scratchpads/{task-id}-{short-name}.md` and include issue/internal ref.
 7. Update `docs/TASKS.md` status + issue/internal ref before coding.
-8. Before push, run CI queue guard: `~/.config/mosaic/rails/git/ci-queue-wait.sh --purpose push -B main`.
+8. Before push, run CI queue guard: `~/.config/mosaic/tools/git/ci-queue-wait.sh --purpose push -B main`.
 9. Open PR to `main` for delivery changes (no direct push to `main`).
-10. Before merge, run CI queue guard: `~/.config/mosaic/rails/git/ci-queue-wait.sh --purpose merge -B main`.
+10. Before merge, run CI queue guard: `~/.config/mosaic/tools/git/ci-queue-wait.sh --purpose merge -B main`.
 11. Merge PRs that pass required checks and review gates with squash strategy only.
 12. Reference issues/internal refs in commits (`Fixes #123`, `Refs #123`, or `Refs TASKS:T1`).
 13. Close issue/internal task only after testing and documentation gates pass, PR merge is complete, and CI/pipeline status is terminal green.
@@ -171,8 +171,8 @@ If you modify source code, independent code review is REQUIRED before completion
 Run independent reviews:

 ```bash
-~/.config/mosaic/rails/codex/codex-code-review.sh --uncommitted
-~/.config/mosaic/rails/codex/codex-security-review.sh --uncommitted
+~/.config/mosaic/tools/codex/codex-code-review.sh --uncommitted
+~/.config/mosaic/tools/codex/codex-security-review.sh --uncommitted
 ```

 See `~/.config/mosaic/guides/CODE-REVIEW.md` for the full review checklist.
--- a/packages/mosaic/framework/templates/agent/projects/python-library/AGENTS.md.template
+++ b/packages/mosaic/framework/templates/agent/projects/python-library/AGENTS.md.template
@@ -9,8 +9,8 @@
 2. Do NOT ask for routine confirmation before required push/merge/issue-close/release/tag actions.
 3. Completion is forbidden at PR-open stage.
 4. Completion requires merged PR to `main` + terminal green CI + linked issue/internal task closed.
-5. Before push or merge, run queue guard: `~/.config/mosaic/rails/git/ci-queue-wait.sh --purpose push|merge -B main`.
-6. For issue/PR/milestone operations, use Mosaic wrappers first (`~/.config/mosaic/rails/git/*.sh`).
+5. Before push or merge, run queue guard: `~/.config/mosaic/tools/git/ci-queue-wait.sh --purpose push|merge -B main`.
+6. For issue/PR/milestone operations, use Mosaic wrappers first (`~/.config/mosaic/tools/git/*.sh`).
 7. If any required wrapper command fails: report `blocked` with the exact failed wrapper command and stop.
 8. Do NOT stop at "PR created" and do NOT ask "should I merge?" for routine flow.

@@ -55,7 +55,7 @@ uv run ruff check src/ tests/ && uv run ruff format --check src/ && uv run mypy
 2. If external git provider is available (Gitea/GitHub/GitLab), create/update issue(s) before coding and map them in `docs/TASKS.md`.
 3. If no external provider is available, use internal refs in `docs/TASKS.md` (example: `TASKS:T1`).
 4. Keep `docs/TASKS.md` status in sync with actual progress until completion.
-5. For issue/PR/milestone actions, detect platform and use `~/.config/mosaic/rails/git/*.sh` wrappers first (no raw `gh`/`tea`/`glab` as first choice).
+5. For issue/PR/milestone actions, detect platform and use `~/.config/mosaic/tools/git/*.sh` wrappers first (no raw `gh`/`tea`/`glab` as first choice).
 6. If wrapper-driven merge/CI/issue-closure fails, report blocker with the exact failed wrapper command and stop (do not claim completion).

 ## Documentation Contract
@@ -84,7 +84,7 @@ Reference:
 5. Do not mark implementation complete until PR is merged.
 6. Do not mark implementation complete until CI/pipeline status is terminal green.
 7. Close linked issues/tasks only after merge + green CI.
-8. Before push or merge, run CI queue guard: `~/.config/mosaic/rails/git/ci-queue-wait.sh --purpose push|merge -B main`.
+8. Before push or merge, run CI queue guard: `~/.config/mosaic/tools/git/ci-queue-wait.sh --purpose push|merge -B main`.

 ## Container Release Strategy (When Applicable)

--- a/packages/mosaic/framework/templates/agent/projects/python-library/CLAUDE.md.template
+++ b/packages/mosaic/framework/templates/agent/projects/python-library/CLAUDE.md.template
@@ -125,7 +125,7 @@ uv run ruff check src/ tests/ && uv run ruff format --check src/ && uv run mypy
 ## Issue Tracking

 Use external git provider issues when available. If no external provider exists, `docs/TASKS.md` is the canonical tracker for tasks, milestones, and issue-equivalent work.
-For issue/PR/milestone operations, detect platform and use `~/.config/mosaic/rails/git/*.sh` wrappers first; do not use raw `gh`/`tea`/`glab` as first choice.
+For issue/PR/milestone operations, detect platform and use `~/.config/mosaic/tools/git/*.sh` wrappers first; do not use raw `gh`/`tea`/`glab` as first choice.
 If wrapper-driven merge/CI/issue-closure fails, report blocker with exact failed wrapper command and stop.
 Do NOT stop at "PR created" and do NOT ask "should I merge?" or "should I close the issue?" for routine delivery flow.

@@ -136,9 +136,9 @@ Do NOT stop at "PR created" and do NOT ask "should I merge?" or "should I close
 5. Ensure `docs/PRD.md` or `docs/PRD.json` exists and is current before coding.
 6. Create scratchpad: `docs/scratchpads/{task-id}-{short-name}.md` and include issue/internal ref.
 7. Update `docs/TASKS.md` status + issue/internal ref before coding.
-8. Before push, run CI queue guard: `~/.config/mosaic/rails/git/ci-queue-wait.sh --purpose push -B main`.
+8. Before push, run CI queue guard: `~/.config/mosaic/tools/git/ci-queue-wait.sh --purpose push -B main`.
 9. Open PR to `main` for delivery changes (no direct push to `main`).
-10. Before merge, run CI queue guard: `~/.config/mosaic/rails/git/ci-queue-wait.sh --purpose merge -B main`.
+10. Before merge, run CI queue guard: `~/.config/mosaic/tools/git/ci-queue-wait.sh --purpose merge -B main`.
 11. Merge PRs that pass required checks and review gates with squash strategy only.
 12. Reference issues/internal refs in commits (`Fixes #123`, `Refs #123`, or `Refs TASKS:T1`).
 13. Close issue/internal task only after testing and documentation gates pass, PR merge is complete, and CI/pipeline status is terminal green.
@@ -161,8 +161,8 @@ If you modify source code, independent code review is REQUIRED before completion
 Run independent reviews:

 ```bash
-~/.config/mosaic/rails/codex/codex-code-review.sh --uncommitted
-~/.config/mosaic/rails/codex/codex-security-review.sh --uncommitted
+~/.config/mosaic/tools/codex/codex-code-review.sh --uncommitted
+~/.config/mosaic/tools/codex/codex-security-review.sh --uncommitted
 ```

 See `~/.config/mosaic/guides/CODE-REVIEW.md` for the full review checklist.
--- a/packages/mosaic/framework/templates/agent/projects/typescript/AGENTS.md.template
+++ b/packages/mosaic/framework/templates/agent/projects/typescript/AGENTS.md.template
@@ -9,8 +9,8 @@
 2. Do NOT ask for routine confirmation before required push/merge/issue-close/release/tag actions.
 3. Completion is forbidden at PR-open stage.
 4. Completion requires merged PR to `main` + terminal green CI + linked issue/internal task closed.
-5. Before push or merge, run queue guard: `~/.config/mosaic/rails/git/ci-queue-wait.sh --purpose push|merge -B main`.
-6. For issue/PR/milestone operations, use Mosaic wrappers first (`~/.config/mosaic/rails/git/*.sh`).
+5. Before push or merge, run queue guard: `~/.config/mosaic/tools/git/ci-queue-wait.sh --purpose push|merge -B main`.
+6. For issue/PR/milestone operations, use Mosaic wrappers first (`~/.config/mosaic/tools/git/*.sh`).
 7. If any required wrapper command fails: report `blocked` with the exact failed wrapper command and stop.
 8. Do NOT stop at "PR created" and do NOT ask "should I merge?" for routine flow.

@@ -56,7 +56,7 @@ ${QUALITY_GATES}
 2. If external git provider is available (Gitea/GitHub/GitLab), create/update issue(s) before coding and map them in `docs/TASKS.md`.
 3. If no external provider is available, use internal refs in `docs/TASKS.md` (example: `TASKS:T1`).
 4. Keep `docs/TASKS.md` status in sync with actual progress until completion.
-5. For issue/PR/milestone actions, detect platform and use `~/.config/mosaic/rails/git/*.sh` wrappers first (no raw `gh`/`tea`/`glab` as first choice).
+5. For issue/PR/milestone actions, detect platform and use `~/.config/mosaic/tools/git/*.sh` wrappers first (no raw `gh`/`tea`/`glab` as first choice).
 6. If wrapper-driven merge/CI/issue-closure fails, report blocker with the exact failed wrapper command and stop (do not claim completion).

 ## Documentation Contract
@@ -85,7 +85,7 @@ Reference:
 5. Do not mark implementation complete until PR is merged.
 6. Do not mark implementation complete until CI/pipeline status is terminal green.
 7. Close linked issues/tasks only after merge + green CI.
-8. Before push or merge, run CI queue guard: `~/.config/mosaic/rails/git/ci-queue-wait.sh --purpose push|merge -B main`.
+8. Before push or merge, run CI queue guard: `~/.config/mosaic/tools/git/ci-queue-wait.sh --purpose push|merge -B main`.

 ## Container Release Strategy (When Applicable)

--- a/packages/mosaic/framework/templates/agent/projects/typescript/CLAUDE.md.template
+++ b/packages/mosaic/framework/templates/agent/projects/typescript/CLAUDE.md.template
@@ -122,7 +122,7 @@ ${QUALITY_GATES}
 ## Issue Tracking

 Use external git provider issues when available. If no external provider exists, `docs/TASKS.md` is the canonical tracker for tasks, milestones, and issue-equivalent work.
-For issue/PR/milestone operations, detect platform and use `~/.config/mosaic/rails/git/*.sh` wrappers first; do not use raw `gh`/`tea`/`glab` as first choice.
+For issue/PR/milestone operations, detect platform and use `~/.config/mosaic/tools/git/*.sh` wrappers first; do not use raw `gh`/`tea`/`glab` as first choice.
 If wrapper-driven merge/CI/issue-closure fails, report blocker with exact failed wrapper command and stop.
 Do NOT stop at "PR created" and do NOT ask "should I merge?" or "should I close the issue?" for routine delivery flow.

@@ -133,9 +133,9 @@ Do NOT stop at "PR created" and do NOT ask "should I merge?" or "should I close
 5. Ensure `docs/PRD.md` or `docs/PRD.json` exists and is current before coding.
 6. Create scratchpad: `docs/scratchpads/{task-id}-{short-name}.md` and include issue/internal ref.
 7. Update `docs/TASKS.md` status + issue/internal ref before coding.
-8. Before push, run CI queue guard: `~/.config/mosaic/rails/git/ci-queue-wait.sh --purpose push -B main`.
+8. Before push, run CI queue guard: `~/.config/mosaic/tools/git/ci-queue-wait.sh --purpose push -B main`.
 9. Open PR to `main` for delivery changes (no direct push to `main`).
-10. Before merge, run CI queue guard: `~/.config/mosaic/rails/git/ci-queue-wait.sh --purpose merge -B main`.
+10. Before merge, run CI queue guard: `~/.config/mosaic/tools/git/ci-queue-wait.sh --purpose merge -B main`.
 11. Merge PRs that pass required checks and review gates with squash strategy only.
 12. Reference issues/internal refs in commits (`Fixes #123`, `Refs #123`, or `Refs TASKS:T1`).
 13. Close issue/internal task only after testing and documentation gates pass, PR merge is complete, and CI/pipeline status is terminal green.
@@ -159,10 +159,10 @@ Run independent reviews:

 ```bash
 # Code quality review (Codex)
-~/.config/mosaic/rails/codex/codex-code-review.sh --uncommitted
+~/.config/mosaic/tools/codex/codex-code-review.sh --uncommitted

 # Security review (Codex)
-~/.config/mosaic/rails/codex/codex-security-review.sh --uncommitted
+~/.config/mosaic/tools/codex/codex-security-review.sh --uncommitted
 ```

 **Fallback:** If Codex is unavailable, use Claude's built-in review skills.
--- a/packages/mosaic/framework/tools/fleet/start-agent-session.sh
+++ b/packages/mosaic/framework/tools/fleet/start-agent-session.sh
@@ -0,0 +1,30 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+AGENT_NAME=${1:-${MOSAIC_AGENT_NAME:-}}
+MOSAIC_TMUX_SOCKET=${MOSAIC_TMUX_SOCKET:-mosaic-factory}
+MOSAIC_AGENT_RUNTIME=${MOSAIC_AGENT_RUNTIME:-pi}
+MOSAIC_AGENT_WORKDIR=${MOSAIC_AGENT_WORKDIR:-$HOME}
+MOSAIC_AGENT_COMMAND=${MOSAIC_AGENT_COMMAND:-}
+
+if [ -z "$AGENT_NAME" ]; then
+  echo "ERROR: agent name argument or MOSAIC_AGENT_NAME is required" >&2
+  exit 64
+fi
+
+if ! command -v tmux >/dev/null 2>&1; then
+  echo "ERROR: tmux is required" >&2
+  exit 69
+fi
+
+if tmux -L "$MOSAIC_TMUX_SOCKET" has-session -t "=${AGENT_NAME}:0.0" 2>/dev/null; then
+  echo "Mosaic agent session already running: $AGENT_NAME on socket $MOSAIC_TMUX_SOCKET"
+  exit 0
+fi
+
+if [ -z "$MOSAIC_AGENT_COMMAND" ]; then
+  MOSAIC_AGENT_COMMAND="mosaic yolo $MOSAIC_AGENT_RUNTIME"
+fi
+
+mkdir -p "$MOSAIC_AGENT_WORKDIR"
+exec tmux -L "$MOSAIC_TMUX_SOCKET" new-session -d -s "$AGENT_NAME" -c "$MOSAIC_AGENT_WORKDIR" "$MOSAIC_AGENT_COMMAND"
--- a/packages/mosaic/framework/tools/fleet/test-start-agent-session.sh
+++ b/packages/mosaic/framework/tools/fleet/test-start-agent-session.sh
@@ -0,0 +1,32 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+SCRIPT_DIR=$(cd -- "$(dirname -- "$0")" && pwd)
+START="$SCRIPT_DIR/start-agent-session.sh"
+SOCKET="mosaic-agent-test-$RANDOM-$$"
+AGENT="agent-$RANDOM"
+WORKDIR=$(mktemp -d)
+trap 'tmux -L "$SOCKET" kill-server >/dev/null 2>&1 || true; rm -rf "$WORKDIR"' EXIT
+
+fail() {
+  echo "FAIL: $*" >&2
+  exit 1
+}
+
+MOSAIC_TMUX_SOCKET="$SOCKET" \
+MOSAIC_AGENT_WORKDIR="$WORKDIR" \
+MOSAIC_AGENT_COMMAND='bash --noprofile --norc -i' \
+  "$START" "$AGENT"
+
+tmux -L "$SOCKET" has-session -t "=$AGENT:0.0" || fail "agent session was not created"
+actual_dir=$(tmux -L "$SOCKET" display-message -p -t "=$AGENT:0.0" '#{pane_current_path}')
+[ "$actual_dir" = "$WORKDIR" ] || fail "agent workdir mismatch: $actual_dir"
+
+MOSAIC_TMUX_SOCKET="$SOCKET" \
+MOSAIC_AGENT_WORKDIR="$WORKDIR" \
+MOSAIC_AGENT_COMMAND='bash --noprofile --norc -i' \
+  "$START" "$AGENT" >/tmp/mosaic-start-agent-idempotent.out
+
+grep -qF 'already running' /tmp/mosaic-start-agent-idempotent.out || fail "duplicate start was not idempotent"
+
+echo "ok - start-agent-session"
--- a/packages/mosaic/framework/tools/git/issue-assign.sh
+++ b/packages/mosaic/framework/tools/git/issue-assign.sh
@@ -98,27 +98,32 @@ case "$PLATFORM" in
        ;;
    gitea)
        # tea issue edit syntax
-        REPO_ARGS=$(get_gitea_repo_args) || {
-            echo "Error: Could not resolve Gitea repo/login args for remote host" >&2
+        REPO_SLUG=$(get_repo_slug) || {
+            echo "Error: Could not resolve Gitea repo slug from remote" >&2
            exit 1
        }
-        CMD="tea issue edit $ISSUE $REPO_ARGS"
+        REPO_LOGIN=$(get_gitea_login) || {
+            echo "Error: Could not resolve Gitea login for remote host" >&2
+            exit 1
+        }
+        REPO_ARGS=(--repo "$REPO_SLUG" --login "$REPO_LOGIN")
+        CMD=(tea issue edit "$ISSUE" "${REPO_ARGS[@]}")
        NEEDS_EDIT=false

        if [[ -n "$ASSIGNEE" ]]; then
            # tea uses --assignees flag
-            CMD="$CMD --assignees \"$ASSIGNEE\""
+            CMD+=(--assignees "$ASSIGNEE")
            NEEDS_EDIT=true
        fi
        if [[ -n "$LABELS" ]]; then
            # tea uses --labels flag (replaces existing)
-            CMD="$CMD --labels \"$LABELS\""
+            CMD+=(--labels "$LABELS")
            NEEDS_EDIT=true
        fi
        if [[ -n "$MILESTONE" ]]; then
-            MILESTONE_ID=$(tea milestones list $REPO_ARGS 2>/dev/null | grep -E "^\s*[0-9]+" | grep "$MILESTONE" | awk '{print $1}' | head -1)
+            MILESTONE_ID=$(tea milestones list "${REPO_ARGS[@]}" 2>/dev/null | grep -E "^\s*[0-9]+" | grep "$MILESTONE" | awk '{print $1}' | head -1)
            if [[ -n "$MILESTONE_ID" ]]; then
-                CMD="$CMD --milestone $MILESTONE_ID"
+                CMD+=(--milestone "$MILESTONE_ID")
                NEEDS_EDIT=true
            else
                echo "Warning: Could not find milestone '$MILESTONE'" >&2
@@ -126,7 +131,7 @@ case "$PLATFORM" in
        fi

        if [[ "$NEEDS_EDIT" == true ]]; then
-            eval "$CMD"
+            "${CMD[@]}"
            echo "Issue #$ISSUE updated successfully"
        else
            echo "No changes specified"
--- a/packages/mosaic/framework/tools/git/issue-edit.sh
+++ b/packages/mosaic/framework/tools/git/issue-edit.sh
@@ -63,24 +63,28 @@ fi
 detect_platform >/dev/null

 if [[ "$PLATFORM" == "github" ]]; then
-    CMD="gh issue edit $ISSUE_NUMBER"
-    [[ -n "$TITLE" ]] && CMD="$CMD --title \"$TITLE\""
-    [[ -n "$BODY" ]] && CMD="$CMD --body \"$BODY\""
-    [[ -n "$LABELS" ]] && CMD="$CMD --add-label \"$LABELS\""
-    [[ -n "$MILESTONE" ]] && CMD="$CMD --milestone \"$MILESTONE\""
-    eval $CMD
+    CMD=(gh issue edit "$ISSUE_NUMBER")
+    [[ -n "$TITLE" ]] && CMD+=(--title "$TITLE")
+    [[ -n "$BODY" ]] && CMD+=(--body "$BODY")
+    [[ -n "$LABELS" ]] && CMD+=(--add-label "$LABELS")
+    [[ -n "$MILESTONE" ]] && CMD+=(--milestone "$MILESTONE")
+    "${CMD[@]}"
    echo "Updated GitHub issue #$ISSUE_NUMBER"
 elif [[ "$PLATFORM" == "gitea" ]]; then
-    REPO_ARGS=$(get_gitea_repo_args) || {
-        echo "Error: Could not resolve Gitea repo/login args for remote host" >&2
+    REPO_SLUG=$(get_repo_slug) || {
+        echo "Error: Could not resolve Gitea repo slug from remote" >&2
        exit 1
    }
-    CMD="tea issue edit $ISSUE_NUMBER $REPO_ARGS"
-    [[ -n "$TITLE" ]] && CMD="$CMD --title \"$TITLE\""
-    [[ -n "$BODY" ]] && CMD="$CMD --description \"$BODY\""
-    [[ -n "$LABELS" ]] && CMD="$CMD --add-labels \"$LABELS\""
-    [[ -n "$MILESTONE" ]] && CMD="$CMD --milestone \"$MILESTONE\""
-    eval $CMD
+    REPO_LOGIN=$(get_gitea_login) || {
+        echo "Error: Could not resolve Gitea login for remote host" >&2
+        exit 1
+    }
+    CMD=(tea issue edit "$ISSUE_NUMBER" --repo "$REPO_SLUG" --login "$REPO_LOGIN")
+    [[ -n "$TITLE" ]] && CMD+=(--title "$TITLE")
+    [[ -n "$BODY" ]] && CMD+=(--description "$BODY")
+    [[ -n "$LABELS" ]] && CMD+=(--add-labels "$LABELS")
+    [[ -n "$MILESTONE" ]] && CMD+=(--milestone "$MILESTONE")
+    "${CMD[@]}"
    echo "Updated Gitea issue #$ISSUE_NUMBER"
 else
    echo "Error: Unknown platform"
--- a/packages/mosaic/framework/tools/git/lane-brief.sh
+++ b/packages/mosaic/framework/tools/git/lane-brief.sh
@@ -0,0 +1,129 @@
+#!/usr/bin/env bash
+#
+# lane-brief.sh — live dispatch brief for a repo "lane" (milestone/label), straight
+# from current Gitea state. Defeats stale worker self-report: workers brief from
+# static notes and routinely report issues "todo" that are already CLOSED, forcing
+# the orchestrator to re-verify each one before dispatch. This returns the CURRENT
+# open set, classified for dispatch, in one call.
+#
+# Usage:
+#   lane-brief.sh -r <owner/repo> [-m <milestone>] [-l <label>] [-L <login>] [-n <limit>]
+#   lane-brief.sh -r usc/uconnect -m "M2M Part Search (0.0.45)"
+#   lane-brief.sh -r usc/uconnect -l domain/6-security
+#
+# Reliable signals (closed issues are excluded by definition — that's the point):
+#   - open-vs-closed : authoritative; this is the stale-intake failure mode.
+#   - PR-linkage     : an open PR referencing the issue = work underway.
+# Assignees/dependencies are intentionally NOT trusted as "available" signals —
+# fleets that track work-state out-of-band (tmux board, issue text) leave them
+# empty in Gitea. Output therefore partitions by PR presence and the OPEN-NO-PR set
+# is "dispatch candidates to cross-check against the live fleet", not a blind list.
+#
+# Login resolution order: -L flag > $GITEA_LOGIN > owner inference (usc->usc,
+# mosaicstack/mosaic->mosaicstack) > detect-platform.sh default-login fallback.
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+# shellcheck source=/dev/null
+source "$SCRIPT_DIR/detect-platform.sh"
+
+REPO="" MILESTONE="" LABEL="" LOGIN="" LIMIT=100
+while getopts "r:m:l:L:n:h" opt; do
+  case "$opt" in
+    r) REPO="$OPTARG" ;;
+    m) MILESTONE="$OPTARG" ;;
+    l) LABEL="$OPTARG" ;;
+    L) LOGIN="$OPTARG" ;;
+    n) LIMIT="$OPTARG" ;;
+    h) grep '^#' "$0" | sed 's/^# \?//'; exit 0 ;;
+    *) echo "see -h" >&2; exit 2 ;;
+  esac
+done
+[[ -n "$REPO" ]] || { echo "FATAL: -r <owner/repo> required" >&2; exit 2; }
+
+# Resolve login: explicit -L, then $GITEA_LOGIN, then owner inference, then the
+# shared default-login resolver. Owner inference comes before the shared fallback
+# because the latter is not owner-aware (picks the default tea login), which is
+# wrong for cross-instance lanes.
+if [[ -z "$LOGIN" ]]; then
+  if [[ -n "${GITEA_LOGIN:-}" ]]; then
+    LOGIN="$GITEA_LOGIN"
+  else
+    case "${REPO%%/*}" in
+      usc|USC) LOGIN=usc ;;
+      mosaicstack|mosaic) LOGIN=mosaicstack ;;
+      *) LOGIN="$(get_gitea_login_for_repo_override 2>/dev/null || true)" ;;
+    esac
+  fi
+fi
+[[ -n "$LOGIN" ]] || { echo "FATAL: could not resolve a Gitea login for $REPO (pass -L or set GITEA_LOGIN)" >&2; exit 2; }
+
+command -v tea >/dev/null || { echo "FATAL: tea not found" >&2; exit 1; }
+command -v jq  >/dev/null || { echo "FATAL: jq not found" >&2; exit 1; }
+
+ISSUES_JSON="$(tea issues list --repo "$REPO" --login "$LOGIN" --state open --limit "$LIMIT" \
+  --fields index,title,assignees,milestone,labels --output json 2>/dev/null)" || {
+  echo "FATAL: tea issues list failed for $REPO (login=$LOGIN)" >&2; exit 1; }
+
+# Open PRs, to cross-ref which issues already have work in flight. An issue is
+# "work underway" if an open PR links to it. Two link signals are honored:
+#   (a) a closing keyword in the PR BODY — Gitea's auto-close set (close/closes/
+#       closed, fix/fixes/fixed, resolve/resolves/resolved), case-insensitive,
+#       directly preceding `#N`. This is the AUTHORITATIVE link Gitea itself uses
+#       to associate a PR with the issue it resolves; a body-only "Closes #546"
+#       is the common case and MUST count. The earlier version inspected only the
+#       PR index/title/head TSV (never the body or Gitea linkage), so a body-only
+#       reference was invisible and the linked OPEN issue was misclassified as a
+#       dispatch candidate — re-dispatchable in-flight work (the #546/#547 defect).
+#   (b) a bare #N in the PR title, or an issue number embedded in the head branch
+#       (feat/546-x, fix-546) — the weaker heuristic preserved from prior behavior.
+# Bare #N mentions in the BODY are deliberately NOT treated as links: PR bodies
+# routinely name unrelated issues in prose ("relevant to the #538 line of work"),
+# and counting those would wrongly mark live, dispatchable issues as in-flight.
+# Only the closing-keyword form is a commitment to resolve that issue. Requiring
+# `#` to directly follow the keyword also keeps cross-repo `owner/repo#N` forms
+# from leaking a foreign issue number into this per-repo lane (cross-repo lanes
+# are run per-repo). JSON (not TSV) is used so multi-line bodies parse cleanly.
+PRS_JSON="$(tea pulls list --repo "$REPO" --login "$LOGIN" --state open \
+  --fields index,title,head,body --output json 2>/dev/null || echo '[]')"
+[[ -n "$PRS_JSON" ]] || PRS_JSON='[]'
+
+# \b anchors the keyword to a word start so embedded substrings do not match
+# (e.g. "prefix #5", "disclosed #7" must NOT be read as "fix #5" / "closed #7").
+GITEA_CLOSE_KW='close[sd]?|fix(e[sd])?|resolve[sd]?'
+PR_BODY_REFS="$(printf '%s' "$PRS_JSON" | jq -r '.[] | .body // ""' 2>/dev/null \
+  | grep -oiE "\\b(${GITEA_CLOSE_KW})[[:space:]:]+#[0-9]+" | grep -oE '[0-9]+' || true)"
+PR_TITLE_HEAD_REFS="$(printf '%s' "$PRS_JSON" \
+  | jq -r '.[] | [ (.title // ""), (.head // "" | if type=="object" then (.ref // "") else . end) ] | join(" ")' 2>/dev/null \
+  | grep -oE '#[0-9]+|[/-][0-9]{3,}' | grep -oE '[0-9]+' || true)"
+PR_ISSUE_REFS="$(printf '%s\n%s\n' "$PR_BODY_REFS" "$PR_TITLE_HEAD_REFS" | grep -E '^[0-9]+$' | sort -u || true)"
+
+ts="$(date -u '+%Y-%m-%d %H:%MZ' 2>/dev/null || echo '?')"
+filt="$REPO"; [[ -n "$MILESTONE" ]] && filt="$filt · milestone:'$MILESTONE'"; [[ -n "$LABEL" ]] && filt="$filt · label:'$LABEL'"
+echo "LANE BRIEF — $filt · $ts (login=$LOGIN)"
+echo "(open issues only; closed are excluded by definition — that's the point)"
+echo
+
+# Label match is exact-token against tea's space-separated labels string (so -l
+# "security" does NOT match label "domain/6-security"). Caveat: label names that
+# themselves contain spaces aren't distinguishable in tea's string form.
+printf '%s' "$ISSUES_JSON" | jq -r --arg ms "$MILESTONE" --arg lb "$LABEL" --arg prs "$PR_ISSUE_REFS" '
+  ($prs | split("\n") | map(select(length>0))) as $prrefs
+  | map(
+      select( ($ms=="" or .milestone==$ms)
+          and ($lb=="" or ((.labels//"") | split(" ") | index($lb) != null)) )
+      | . + { assigned: ((.assignees//"")|length>0),
+              haspr: (.index as $ix | ($prrefs | index($ix)) != null) }
+    )
+  | (map(select(.haspr|not)))  as $candidates
+  | (map(select(.haspr)))      as $inflight
+  | "DISPATCH CANDIDATES (open · no open PR) — \($candidates|length)  [cross-check vs live fleet]:",
+    ( $candidates[] | "  #\(.index)  \(.title[0:90])\(if .assigned then "  (gitea-assignee set)" else "" end)" ),
+    "",
+    "WORK UNDERWAY (open · PR in flight) — \($inflight|length):",
+    ( $inflight[] | "  #\(.index)  \(.title[0:80])  [PR open]" )
+'
+echo
+echo "Closed issues are excluded — do NOT take a worker's self-reported 'todo' on faith."
+echo "Candidates = open + no PR; confirm against the live fleet before dispatch"
+echo "(fleets that don't self-assign in Gitea leave 'unassigned' meaningless)."
--- a/packages/mosaic/framework/tools/git/milestone-create.sh
+++ b/packages/mosaic/framework/tools/git/milestone-create.sh
@@ -99,10 +99,15 @@ fi
 case "$PLATFORM" in
    github)
        # GitHub uses the API for milestone creation
-        JSON_PAYLOAD="{\"title\":\"$TITLE\""
-        [[ -n "$DESCRIPTION" ]] && JSON_PAYLOAD="$JSON_PAYLOAD,\"description\":\"$DESCRIPTION\""
-        [[ -n "$DUE_DATE" ]] && JSON_PAYLOAD="$JSON_PAYLOAD,\"due_on\":\"${DUE_DATE}T00:00:00Z\""
-        JSON_PAYLOAD="$JSON_PAYLOAD}"
+        # Use jq to safely construct JSON so titles/descriptions containing
+        # quotes or special characters do not corrupt the payload (F-07).
+        JSON_PAYLOAD=$(jq -n \
+            --arg t "$TITLE" \
+            --arg d "$DESCRIPTION" \
+            --arg due "${DUE_DATE}" \
+            '{"title": $t}
+             + (if $d != "" then {"description": $d} else {} end)
+             + (if $due != "" then {"due_on": ($due + "T00:00:00Z")} else {} end)')

        gh api repos/:owner/:repo/milestones --method POST --input - <<< "$JSON_PAYLOAD"
        echo "Milestone '$TITLE' created successfully"
--- a/packages/mosaic/framework/tools/git/pr-metadata.sh
+++ b/packages/mosaic/framework/tools/git/pr-metadata.sh
@@ -57,12 +57,20 @@ curl_gitea_pull() {
    local token basic_auth raw_code body_file http_code
    body_file=$(mktemp)

+    # shellcheck disable=SC2329 # Invoked by the RETURN trap below.
+    cleanup_gitea_pull_body() {
+        local status=$?
+        rm -f -- "$body_file"
+        trap - RETURN
+        return "$status"
+    }
+    trap cleanup_gitea_pull_body RETURN
+
    token=$(get_gitea_token "$HOST" || true)
    if [[ -n "$token" ]]; then
        raw_code=$(curl -sS -w '%{http_code}' -o "$body_file" -H "User-Agent: curl/8" -H "Authorization: token $token" "$api_url" || true)
        if [[ "$raw_code" =~ ^2 ]]; then
-            cat "$body_file"
-            rm -f "$body_file"
+            cat "$body_file" || return $?
            return 0
        fi
        http_code="$raw_code"
@@ -72,8 +80,7 @@ curl_gitea_pull() {
    if [[ -n "$basic_auth" ]]; then
        raw_code=$(curl -sS -w '%{http_code}' -o "$body_file" -u "$basic_auth" -H "User-Agent: curl/8" "$api_url" || true)
        if [[ "$raw_code" =~ ^2 ]]; then
-            cat "$body_file"
-            rm -f "$body_file"
+            cat "$body_file" || return $?
            return 0
        fi
        http_code="$raw_code"
@@ -96,7 +103,6 @@ except Exception:
    message = open(path, encoding="utf-8", errors="replace").read()[:200] or "empty response"
 print(f"Error: Gitea pull request API request failed with HTTP {code}: {message}")
 PY
-    rm -f "$body_file"
    return 1
 }

--- a/packages/mosaic/framework/tools/git/test-lane-brief-pr-linkage.sh
+++ b/packages/mosaic/framework/tools/git/test-lane-brief-pr-linkage.sh
@@ -0,0 +1,114 @@
+#!/usr/bin/env bash
+# Regression harness for lane-brief.sh PR->issue linkage classification.
+#
+# Covers the #546/#547 defect: lane-brief.sh inspected only the PR index/title/head
+# fields and never the PR BODY, so an open PR whose body says "Closes #546" did not
+# mark issue #546 as work-underway — #546 was listed as a DISPATCH CANDIDATE and was
+# re-dispatchable in-flight work.
+#
+# Asserts:
+#   1. an open issue closed-keyword-linked from a PR BODY ("Closes #546") is
+#      classified WORK UNDERWAY, not a dispatch candidate.
+#   2. a BARE "#777" prose mention in a PR body does NOT classify #777 as
+#      work-underway (only Gitea closing keywords are a real link) — #777 stays a
+#      dispatch candidate.
+#   3. NON-VACUITY / RED-ON-REVERT: a copy of the script with the body-scan removed
+#      misclassifies #546 as a dispatch candidate — proving the body-scan is exactly
+#      what fixes the defect and that assertion 1 fails if the fix is reverted.
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+LANE_BRIEF="$SCRIPT_DIR/lane-brief.sh"
+WORK_DIR="${MOSAIC_TEST_WORK_DIR:-$PWD/.mosaic-test-work/lane-brief-pr-linkage}"
+BIN_DIR="$WORK_DIR/bin"
+
+rm -rf "$WORK_DIR"
+mkdir -p "$BIN_DIR"
+
+# --- fake `tea`: serves a fixed open-issue set and one open PR. ----------------
+# PR #547 body uses a closing keyword for #546 ("Closes #546") and a BARE mention
+# of #777 ("the #777 line of work"). #777 must NOT be treated as linked.
+cat > "$BIN_DIR/tea" <<'SH'
+#!/usr/bin/env bash
+set -euo pipefail
+case "${1:-} ${2:-}" in
+  "issues list")
+    cat <<'JSON'
+[
+  {"index":"546","title":"lane-brief + ci-wait orchestration tooling","assignees":[],"milestone":null,"labels":""},
+  {"index":"777","title":"unrelated downstream item","assignees":[],"milestone":null,"labels":""},
+  {"index":"999","title":"item only named inside the word hotfix","assignees":[],"milestone":null,"labels":""}
+]
+JSON
+    ;;
+  "pulls list")
+    cat <<'JSON'
+[
+  {"index":"547","title":"feat(framework/tools): orchestration helpers","head":"feat/orchestration-tools-lane-brief-ci-wait","body":"Two additive orchestration tools.\n\nCloses #546.\n\nLogin resolution is relevant to the #777 line of work but does not touch it.\nThis shipped as a hotfix #999 earlier — that bare reference must not link it.\n\nFixes #546\n"}
+]
+JSON
+    ;;
+  *)
+    echo "fake-tea: unhandled: $*" >&2; exit 1 ;;
+esac
+SH
+chmod +x "$BIN_DIR/tea"
+
+run_brief() { # $1 = script path
+  PATH="$BIN_DIR:$PATH" "$1" -r mosaic/stack -L test-login 2>/dev/null
+}
+
+# Extract the issue numbers under a named section header until the next blank line.
+section_nums() { # $1 = output  $2 = header-prefix
+  printf '%s\n' "$1" | awk -v h="$2" '
+    index($0,h)==1 {grab=1; next}
+    grab && /^[[:space:]]*$/ {grab=0}
+    grab && match($0, /#[0-9]+/) { print substr($0, RSTART+1, RLENGTH-1) }
+  '
+}
+
+fail() { echo "FAIL: $1" >&2; exit 1; }
+contains() { printf '%s\n' "$1" | grep -qx "$2"; }
+
+# ---------------------------------------------------------------------------
+# Fixed (current) script behavior
+# ---------------------------------------------------------------------------
+OUT="$(run_brief "$LANE_BRIEF")"
+CAND="$(section_nums "$OUT" 'DISPATCH CANDIDATES')"
+UNDER="$(section_nums "$OUT" 'WORK UNDERWAY')"
+
+echo "--- lane-brief output (fixed) ---"; printf '%s\n' "$OUT"
+echo "--- candidates: [$(printf '%s' "$CAND" | tr '\n' ' ')] underway: [$(printf '%s' "$UNDER" | tr '\n' ' ')] ---"
+
+contains "$UNDER" 546 || fail "#546 (PR body 'Closes #546') should be WORK UNDERWAY"
+contains "$CAND" 546 && fail "#546 must NOT be a dispatch candidate (it has an open PR)"
+contains "$CAND" 777 || fail "#777 (only a bare prose mention) should remain a dispatch candidate"
+contains "$UNDER" 777 && fail "#777 must NOT be work-underway — bare body mentions are not links"
+contains "$CAND" 999 || fail "#999 ('hotfix #999' — keyword is a substring) should remain a candidate"
+contains "$UNDER" 999 && fail "#999 must NOT be work-underway — word-boundary must reject 'hotfix'"
+echo "PASS: body closing-keyword link classifies #546 underway; bare #777 / substring #999 stay candidates"
+
+# ---------------------------------------------------------------------------
+# NON-VACUITY: revert the body-scan and prove #546 regresses to a candidate.
+# ---------------------------------------------------------------------------
+REVERTED="$SCRIPT_DIR/.lane-brief.reverted.$$.sh"
+trap 'rm -f "$REVERTED"' EXIT
+# Drop the PR_BODY_REFS contribution from the union (simulates the pre-fix script
+# that only looked at index/title/head). Sibling `source detect-platform.sh` still
+# resolves because the copy lives in the same dir.
+# shellcheck disable=SC2016  # single-quoted on purpose: sed needs the literal $PR_BODY_REFS
+sed 's/"\$PR_BODY_REFS"/""/' "$LANE_BRIEF" > "$REVERTED"
+chmod +x "$REVERTED"
+grep -q 'PR_BODY_REFS' "$REVERTED" || fail "revert sed anchor not found — test is stale"
+
+ROUT="$(run_brief "$REVERTED")"
+RCAND="$(section_nums "$ROUT" 'DISPATCH CANDIDATES')"
+RUNDER="$(section_nums "$ROUT" 'WORK UNDERWAY')"
+echo "--- candidates(reverted): [$(printf '%s' "$RCAND" | tr '\n' ' ')] underway: [$(printf '%s' "$RUNDER" | tr '\n' ' ')] ---"
+
+contains "$RCAND" 546 || fail "non-vacuity broken: reverted script should misclassify #546 as a candidate"
+contains "$RUNDER" 546 && fail "non-vacuity broken: reverted script should NOT mark #546 underway"
+echo "PASS (RED-on-revert): without the body-scan, #546 regresses to a dispatch candidate"
+
+echo "ALL PASS: test-lane-brief-pr-linkage.sh"
--- a/packages/mosaic/framework/tools/git/test-pr-metadata-gitea.sh
+++ b/packages/mosaic/framework/tools/git/test-pr-metadata-gitea.sh
@@ -7,9 +7,10 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 WORK_DIR="${MOSAIC_TEST_WORK_DIR:-$PWD/.mosaic-test-work/pr-metadata-gitea}"
 REPO_DIR="$WORK_DIR/repo"
 FIXTURE_DIR="$WORK_DIR/fixtures"
+STUB_DIR="$WORK_DIR/stubs"

 rm -rf "$WORK_DIR"
-mkdir -p "$REPO_DIR" "$FIXTURE_DIR"
+mkdir -p "$REPO_DIR" "$FIXTURE_DIR" "$STUB_DIR"

 git -C "$REPO_DIR" init -q
 git -C "$REPO_DIR" remote add origin https://git.uscllc.com/USC/uconnect.git
@@ -56,6 +57,150 @@ cat > "$FIXTURE_DIR/gitea-error.json" <<'JSON'
 {"message": "user does not exist [uid: 0, name: ]", "url": "https://git.uscllc.com/api/swagger"}
 JSON

+cat > "$STUB_DIR/curl" <<'SH'
+#!/usr/bin/env bash
+set -euo pipefail
+
+output_file=""
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        -o)
+            output_file="$2"
+            shift 2
+            ;;
+        -w|-H|-u)
+            shift 2
+            ;;
+        -s|-S|-sS)
+            shift
+            ;;
+        *)
+            shift
+            ;;
+    esac
+done
+
+if [[ -z "$output_file" ]]; then
+    echo "curl stub expected -o <output_file>" >&2
+    exit 2
+fi
+
+case "${MOSAIC_STUB_CURL_MODE:-success}" in
+    success)
+        cat > "$output_file" <<'JSON'
+{
+  "number": 1910,
+  "title": "Live curl path",
+  "state": "open",
+  "user": {"login": "edith"},
+  "head": {"ref": "fix/live-curl-path"},
+  "base": {"ref": "main"},
+  "html_url": "https://git.example.test/acme/widgets/pulls/1910"
+}
+JSON
+        printf '200'
+        ;;
+    cat-fails-after-2xx)
+        rm -f -- "$output_file"
+        ln -s /nonexistent/pr-metadata-body "$output_file"
+        printf '200'
+        ;;
+    *)
+        echo "unknown MOSAIC_STUB_CURL_MODE=${MOSAIC_STUB_CURL_MODE:-}" >&2
+        exit 2
+        ;;
+esac
+SH
+chmod +x "$STUB_DIR/curl"
+
+assert_tmpdir_empty() {
+    local tmpdir="$1" leftover
+    leftover=$(find "$tmpdir" -mindepth 1 -print -quit)
+    if [[ -n "$leftover" ]]; then
+        echo "Expected tmpfile cleanup, found leftover: $leftover" >&2
+        find "$tmpdir" -mindepth 1 -maxdepth 1 -ls >&2
+        exit 1
+    fi
+}
+
+run_curl_success_case() {
+    local tmpdir="$WORK_DIR/tmp-success" stderr_file="$WORK_DIR/curl-success.stderr"
+    local output status
+    mkdir -p "$tmpdir"
+
+    set +e
+    output=$(cd "$REPO_DIR" && \
+        PATH="$STUB_DIR:$PATH" \
+        TMPDIR="$tmpdir" \
+        GITEA_TOKEN="stub-token" \
+        GITEA_URL="https://git.example.test" \
+        MOSAIC_STUB_CURL_MODE="success" \
+        "$SCRIPT_DIR/pr-metadata.sh" -n 1910 2>"$stderr_file")
+    status=$?
+    set -e
+
+    if [[ "$status" -ne 0 ]]; then
+        echo "Expected curl success path to pass, got status $status" >&2
+        cat "$stderr_file" >&2
+        exit 1
+    fi
+    if grep -q "unbound variable" "$stderr_file"; then
+        echo "curl success path emitted unbound-variable cleanup noise" >&2
+        cat "$stderr_file" >&2
+        exit 1
+    fi
+    assert_tmpdir_empty "$tmpdir"
+
+    PR_METADATA_OUTPUT="$output" python3 - <<'PY'
+import json
+import os
+
+data = json.loads(os.environ["PR_METADATA_OUTPUT"])
+assert data["number"] == 1910, data
+assert data["baseRefName"] == "main", data
+assert data["headRefName"] == "fix/live-curl-path", data
+PY
+}
+
+run_curl_early_exit_cleanup_case() {
+    local tmpdir="$WORK_DIR/tmp-early-exit" stderr_file="$WORK_DIR/curl-early-exit.stderr"
+    local output status
+    mkdir -p "$tmpdir"
+
+    set +e
+    output=$(cd "$REPO_DIR" && \
+        PATH="$STUB_DIR:$PATH" \
+        TMPDIR="$tmpdir" \
+        GITEA_TOKEN="stub-token" \
+        GITEA_URL="https://git.example.test" \
+        MOSAIC_STUB_CURL_MODE="cat-fails-after-2xx" \
+        "$SCRIPT_DIR/pr-metadata.sh" -n 1910 2>"$stderr_file")
+    status=$?
+    set -e
+
+    if [[ "$status" -eq 0 ]]; then
+        echo "Expected unreadable 2xx body path to fail" >&2
+        printf '%s\n' "$output" >&2
+        exit 1
+    fi
+    if grep -q "unbound variable" "$stderr_file"; then
+        echo "curl early-exit path emitted unbound-variable cleanup noise" >&2
+        cat "$stderr_file" >&2
+        exit 1
+    fi
+    if ! grep -q "No such file or directory" "$stderr_file"; then
+        echo "Expected body-read failure from broken symlink path" >&2
+        cat "$stderr_file" >&2
+        exit 1
+    fi
+    if grep -q "Gitea API returned non-JSON" "$stderr_file"; then
+        echo "curl helper masked body-read failure as later JSON parsing failure" >&2
+        cat "$stderr_file" >&2
+        exit 1
+    fi
+    assert_tmpdir_empty "$tmpdir"
+}
+
 run_case() {
    local fixture="$1" expected_number="$2" expected_head="$3"
    local output
@@ -77,6 +222,8 @@ PY
 run_case "$FIXTURE_DIR/gitea-standard.json" 1905 edith/t_39ce717c-authentik-smoke-gate
 run_case "$FIXTURE_DIR/gitea-fallback.json" 1908 fix/fallback-head
 run_case "$FIXTURE_DIR/gitea-refs-pull-label.json" 1908 fix/t_23fa9e1d-portal-health-backend
+run_curl_success_case
+run_curl_early_exit_cleanup_case

 if cd "$REPO_DIR" && MOSAIC_GITEA_PR_METADATA_RAW_FILE="$FIXTURE_DIR/gitea-error.json" "$SCRIPT_DIR/pr-metadata.sh" -n 1909 >/dev/null 2>"$WORK_DIR/error.log"; then
    echo "Expected API error fixture to fail" >&2
--- a/packages/mosaic/framework/tools/qa/reflect-stop-hook.sh
+++ b/packages/mosaic/framework/tools/qa/reflect-stop-hook.sh
@@ -0,0 +1,197 @@
+#!/usr/bin/env bash
+# reflect-stop-hook.sh — Stop hook (agent reflection loop, durable kernel)
+#
+# At end-of-run, capture the doer's end-state as a structured `reflection.v1`
+# sidecar: the mechanical diff risk-floor plus any self-report the agent left
+# behind. This is the passive capture half of the design (§10 step 1). It does
+# NOT route, score, or gate — it only writes the sidecar; pickup is future work.
+#
+# FAIL-CLOSED: if REFLECTION_MODE is unset or "off", this is a strict no-op.
+# Global registration is therefore safe; the feature only activates when a
+# launcher/profile explicitly sets REFLECTION_MODE=solo|orchestrated.
+#
+# NON-BLOCKING: Stop hooks are observational. This script NEVER emits a
+# `decision` field and ALWAYS exits 0 — it can never fail or stall a session.
+#
+# Environment contract:
+#   REFLECTION_MODE            off|solo|orchestrated   (default: off → no-op)
+#   REFLECTION_DIR             output dir              (default: <repo>/.mosaic/reflections)
+#   REFLECTION_INPUT           self-report JSON        (default: <repo>/.mosaic/reflection-input.json)
+#   REFLECTION_TASK_REF        canonical task ref      (default: <repo>#<branch>)
+#   REFLECTION_AGENT           persona/runtime id      (default: unknown)
+#   REFLECTION_RISK_THRESHOLD  review cutoff [0,1]     (default: 0.5)
+#
+# Risk-floor surface table is kept in sync with the authoritative TS
+# implementation at packages/macp/src/risk-floor.ts (evaluateRiskFloor).
+#
+# Exit codes: always 0 (observational hook).
+
+set -euo pipefail
+
+# ---- fail-closed gate -------------------------------------------------------
+MODE="${REFLECTION_MODE:-off}"
+if [[ "$MODE" != "solo" && "$MODE" != "orchestrated" ]]; then
+  exit 0
+fi
+
+# Read the Stop payload (best-effort; never required).
+INPUT="$(cat || true)"
+
+# Sentinel lock path (global so the EXIT trap can clean it after main returns).
+LOCKFILE=""
+trap 'rm -f "${LOCKFILE:-}" 2>/dev/null || true' EXIT
+
+main() {
+  command -v jq >/dev/null 2>&1 || return 0   # no jq → silently no-op
+
+  local session_id payload_cwd repo_dir repo_name branch task_ref agent
+  session_id="$(printf '%s' "$INPUT" | jq -r '.session_id // "unknown"' 2>/dev/null || echo unknown)"
+  # Sanitize: session_id is interpolated into file/lock paths — allow safe
+  # filename chars only (defends against ../ or / in the payload).
+  session_id="${session_id//[^a-zA-Z0-9_-]/}"
+  session_id="${session_id:-unknown}"
+  payload_cwd="$(printf '%s' "$INPUT" | jq -r '.cwd // empty' 2>/dev/null || true)"
+
+  # Resolve repo root: prefer git toplevel from the payload cwd, else PWD.
+  local start_dir="${payload_cwd:-$PWD}"
+  repo_dir="$(git -C "$start_dir" rev-parse --show-toplevel 2>/dev/null || echo "$start_dir")"
+  repo_name="$(basename "$repo_dir")"
+  branch="$(git -C "$repo_dir" rev-parse --abbrev-ref HEAD 2>/dev/null || echo detached)"
+
+  task_ref="${REFLECTION_TASK_REF:-${repo_name}#${branch}}"
+  agent="${REFLECTION_AGENT:-unknown}"
+
+  # ---- sentinel guard: avoid re-fire loops --------------------------------
+  local out_dir lock
+  out_dir="${REFLECTION_DIR:-${repo_dir}/.mosaic/reflections}"
+  mkdir -p "$out_dir" 2>/dev/null || return 0
+  lock="${out_dir}/.${session_id}.lock"
+  if [[ -e "$lock" ]]; then
+    return 0
+  fi
+  : > "$lock" 2>/dev/null || true
+  LOCKFILE="$lock"
+
+  # ---- mechanical: changed files ------------------------------------------
+  # Union of committed-vs-HEAD~ is out of scope; capture the working surface:
+  # staged + unstaged + untracked, best-effort.
+  # Exclude .mosaic/ (agent scratch: reflections, locks, self-report input) —
+  # it is tooling state, not part of the diff under review.
+  local files
+  files="$(
+    {
+      git -C "$repo_dir" diff --name-only HEAD 2>/dev/null || true
+      git -C "$repo_dir" diff --name-only --staged 2>/dev/null || true
+      git -C "$repo_dir" ls-files --others --exclude-standard 2>/dev/null || true
+    } | sed '/^$/d' | grep -v '^\.mosaic/' | sort -u || true
+  )"
+
+  # ---- mechanical: risk-floor (inline port of evaluateRiskFloor) ----------
+  local threshold="${REFLECTION_RISK_THRESHOLD:-0.5}"
+  local top_surface="none" top_weight="0.0" tripping=""
+  local f surface weight
+  while IFS= read -r f; do
+    [[ -z "$f" ]] && continue
+    surface="$(classify_surface "$f")"
+    weight="$(surface_weight "$surface")"
+    if awk "BEGIN{exit !($weight > $top_weight)}"; then
+      top_weight="$weight"; top_surface="$surface"; tripping="$f"
+    elif [[ "$surface" == "$top_surface" && "$surface" != "none" ]] && awk "BEGIN{exit !($weight == $top_weight)}"; then
+      tripping="${tripping:+$tripping, }$f"
+    fi
+  done <<< "$files"
+
+  local needs_review reason file_count
+  file_count="$(printf '%s\n' "$files" | sed '/^$/d' | wc -l | tr -d ' ')"
+  if awk "BEGIN{exit !($top_weight >= $threshold)}"; then needs_review=true; else needs_review=false; fi
+  if [[ "$top_surface" == "none" ]]; then
+    if [[ "$file_count" -eq 0 ]]; then reason="no files changed"; else reason="no sensitive surface in ${file_count} changed file(s)"; fi
+  else
+    reason="${top_surface} surface (weight ${top_weight}) in: ${tripping}"
+  fi
+
+  # ---- self-report merge (optional) ---------------------------------------
+  local input_file degraded self_json
+  input_file="${REFLECTION_INPUT:-${repo_dir}/.mosaic/reflection-input.json}"
+  degraded=true
+  self_json='{"confidence":null,"most_likely_wrong":null,"known_not_in_diff":null}'
+  if [[ -r "$input_file" ]] && jq -e . "$input_file" >/dev/null 2>&1; then
+    self_json="$(jq '{
+      confidence: (.confidence // null),
+      most_likely_wrong: (.most_likely_wrong // null),
+      known_not_in_diff: (.known_not_in_diff // null)
+    }' "$input_file" 2>/dev/null || echo "$self_json")"
+    degraded=false
+  fi
+
+  # ---- assemble + atomic write --------------------------------------------
+  local ts files_json record tmp final
+  ts="$(date -u +%Y-%m-%dT%H:%M:%S.000Z)"
+  files_json="$(printf '%s\n' "$files" | jq -R . | jq -s 'map(select(length>0))')"
+
+  record="$(jq -n \
+    --arg task_ref "$task_ref" \
+    --arg agent "$agent" \
+    --arg session_id "$session_id" \
+    --arg ts "$ts" \
+    --arg repo "$repo_name" \
+    --argjson needs_review "$needs_review" \
+    --argjson score "$top_weight" \
+    --arg surface "$top_surface" \
+    --arg reason "$reason" \
+    --argjson files "$files_json" \
+    --argjson self "$self_json" \
+    --argjson degraded "$degraded" \
+    --arg mode "$MODE" \
+    '{
+      schema: "reflection.v1",
+      task_ref: $task_ref,
+      agent: $agent,
+      session_id: $session_id,
+      timestamp: $ts,
+      repo: $repo,
+      confidence: $self.confidence,
+      most_likely_wrong: $self.most_likely_wrong,
+      known_not_in_diff: $self.known_not_in_diff,
+      risk: { needs_review: $needs_review, score: $score, surface: $surface, reason: $reason },
+      files_changed: $files,
+      provenance: { source: "stop-hook", reflection_attempt: 1, degraded: $degraded, reflection_mode: $mode }
+    }' 2>/dev/null || true)"
+
+  [[ -z "$record" ]] && return 0
+
+  final="${out_dir}/${session_id}-${ts//[:]/}.reflection.json"
+  tmp="${final}.tmp"
+  printf '%s\n' "$record" > "$tmp" 2>/dev/null || return 0
+  mv -f "$tmp" "$final" 2>/dev/null || true
+}
+
+# classify_surface PATH → surface name (highest-risk match wins, mirrors TS)
+classify_surface() {
+  local p="$1"
+  if printf '%s' "$p" | grep -qiE 'auth|login|session|token|permission|rbac|credential|secret'; then echo auth; return; fi
+  if printf '%s' "$p" | grep -qiE 'migration|prisma|schema|\.sql|entity|repository|seed'; then echo data; return; fi
+  if printf '%s' "$p" | grep -qiE 'docker|\.woodpecker|compose|traefik|deploy|helm|k8s|terraform'; then echo infra; return; fi
+  if printf '%s' "$p" | grep -qiE 'package\.json|tsconfig|turbo\.json|pnpm-|\.config\.|eslint|vite'; then echo build; return; fi
+  if printf '%s' "$p" | grep -qE '\.tsx|\.css|components/|apps/web/'; then echo ui; return; fi
+  if printf '%s' "$p" | grep -qE '\.spec\.|\.test\.|__tests__/'; then echo test; return; fi
+  if printf '%s' "$p" | grep -qE '\.md$|docs/'; then echo docs; return; fi
+  echo none
+}
+
+# surface_weight SURFACE → numeric weight (mirrors TS SURFACE_RULES)
+surface_weight() {
+  case "$1" in
+    auth) echo 1.0 ;;
+    data) echo 0.9 ;;
+    infra) echo 0.85 ;;
+    build) echo 0.6 ;;
+    ui) echo 0.4 ;;
+    test) echo 0.2 ;;
+    docs) echo 0.1 ;;
+    *) echo 0.0 ;;
+  esac
+}
+
+main || true
+exit 0
--- a/packages/mosaic/framework/tools/tmux/README.md
+++ b/packages/mosaic/framework/tools/tmux/README.md
@@ -31,9 +31,12 @@ Prepends the preamble automatically (auto-detecting your own `host:session`) and
 delivers reliably to local OR remote panes.

 ```bash
-# Local target (same host)
+# Local target (same host, default tmux server)
 agent-send.sh -s <dst_session> -m "message"

+# Local target on a Mosaic fleet socket
+agent-send.sh -L mosaic-factory -s '=coder0' -m "message"
+
 # Remote target (over ssh)
 agent-send.sh -H user@host -s <dst_session> -m "message"

@@ -42,10 +45,27 @@ agent-send.sh -H user@host -s <dst_session> -f msg.txt
 echo "msg" | agent-send.sh -s <dst_session>
 ```

-Key flags: `-s` dst session (required) · `-H` ssh target for remote · `-n` dst
+Key flags: `-L` named tmux socket · `-s` dst session (required) · `-H` ssh target for remote · `-n` dst
 hostname for the preamble (else auto-resolved) · `-m`/`-f`/stdin body · `-S`
 override source label · `-v` verbose · `-r N` Enter-flush attempts.

+For durable fleet use, prefer exact tmux targets such as `=coder0`. The helper
+normalizes exact session targets to pane-qualified targets internally so pane
+commands do not fall back to tmux's prefix matching behavior.
+
+## Named socket isolation
+
+Durable Mosaic fleets should use a dedicated tmux socket, for example:
+
+```bash
+tmux -L mosaic-factory ls
+agent-send.sh -L mosaic-factory -s '=coder0' -m "status?"
+send-message.sh -L mosaic-factory -t '=coder0' -m "raw pane message"
+```
+
+This keeps fleet operations away from the user's default tmux server. It is the
+safe rollout path on hosts that already have manual tmux sessions.
+
 ## Why a helper exists (the submission gotcha)

 Pasting into an interactive REPL via raw `tmux send-keys` is unreliable: a
@@ -67,6 +87,7 @@ message crosses the wire as base64 (`-b`) to avoid all shell-quoting hazards.

 - `agent-send.sh` — inter-agent wrapper (preamble + local/remote dispatch).
 - `send-message.sh` — low-level reliable single-pane submitter (`-b` base64 input).
+- `test-send-message-socket.sh` — smoke test for named-socket isolation.

 ## Distribution

--- a/packages/mosaic/framework/tools/tmux/agent-send.sh
+++ b/packages/mosaic/framework/tools/tmux/agent-send.sh
@@ -23,12 +23,13 @@
 #   the remote host; only bash + tmux + base64 (standard).
 #
 # USAGE
-#   agent-send.sh -s <dst_session> -m "message"                 # local target
-#   agent-send.sh -H user@host -s <dst_session> -m "message"    # remote target
-#   agent-send.sh -H user@host -n <dst_hostname> -s <sess> -f msg.txt
-#   echo "msg" | agent-send.sh -H user@host -s <dst_session>
+#   agent-send.sh [-L socket] -s <dst_session> -m "message"                 # local target
+#   agent-send.sh [-L socket] -H user@host -s <dst_session> -m "message"    # remote target
+#   agent-send.sh [-L socket] -H user@host -n <dst_hostname> -s <sess> -f msg.txt
+#   echo "msg" | agent-send.sh [-L socket] -H user@host -s <dst_session>
 #
 # OPTIONS
+#   -L NAME         tmux socket name passed to `tmux -L NAME` on the target host
 #   -s DST_SESSION  target tmux session (or session:window.pane)   [required]
 #   -H SSH_TARGET   ssh target (user@host) for a remote pane; omit for local
 #   -n DST_HOST     hostname to show in the preamble for the target.
@@ -47,12 +48,13 @@ set -uo pipefail
 SELF_DIR=$(cd -- "$(dirname -- "$0")" && pwd)
 SENDER="$SELF_DIR/send-message.sh"

-DST_SESSION=""; SSH_TARGET=""; DST_HOST=""; MSG=""; FILE=""
+DST_SESSION=""; SSH_TARGET=""; DST_HOST=""; MSG=""; FILE=""; SOCKET_NAME=""
 SRC_LABEL=""; RETRIES=2; VERBOSE=0
 usage() { sed -n '2,44p' "$0"; exit "${1:-3}"; }

-while getopts "s:H:n:m:f:S:r:vh" o; do
+while getopts "L:s:H:n:m:f:S:r:vh" o; do
  case "$o" in
+    L) SOCKET_NAME=$OPTARG ;;
    s) DST_SESSION=$OPTARG ;; H) SSH_TARGET=$OPTARG ;; n) DST_HOST=$OPTARG ;;
    m) MSG=$OPTARG ;; f) FILE=$OPTARG ;; S) SRC_LABEL=$OPTARG ;;
    r) RETRIES=$OPTARG ;; v) VERBOSE=1 ;; h) usage 0 ;; *) usage 3 ;;
@@ -70,8 +72,12 @@ fi

 # Source label: this agent's host:session (auto-detected, overridable).
 if [ -z "$SRC_LABEL" ]; then
+  tmux_cmd=(tmux)
+  if [ -n "$SOCKET_NAME" ]; then
+    tmux_cmd+=(-L "$SOCKET_NAME")
+  fi
  src_host=$(hostname -s 2>/dev/null || echo "?")
-  src_sess=$(tmux display-message -p '#S' 2>/dev/null || echo "?")
+  src_sess=$("${tmux_cmd[@]}" display-message -p '#S' 2>/dev/null || echo "?")
  SRC_LABEL="${src_host}:${src_sess}"
 fi

@@ -89,12 +95,16 @@ FULL="${PREAMBLE} ${MSG}"
 B64=$(printf '%s' "$FULL" | base64 -w0)

 vflag=""; [ "$VERBOSE" = 1 ] && vflag="-v"
+socket_args=()
+if [ -n "$SOCKET_NAME" ]; then
+  socket_args=(-L "$SOCKET_NAME")
+fi

 if [ -z "$SSH_TARGET" ]; then
  # Local pane: call the canonical sender directly.
-  exec "$SENDER" -t "$DST_SESSION" -b "$B64" -r "$RETRIES" $vflag
+  exec "$SENDER" "${socket_args[@]}" -t "$DST_SESSION" -b "$B64" -r "$RETRIES" $vflag
 else
  # Remote pane: ship the sender over ssh and run it local to the target.
  ssh -o ConnectTimeout=10 "$SSH_TARGET" \
-      "bash -s -- -t '$DST_SESSION' -b '$B64' -r '$RETRIES' $vflag" < "$SENDER"
+      "bash -s -- ${socket_args[*]@Q} -t '$DST_SESSION' -b '$B64' -r '$RETRIES' $vflag" < "$SENDER"
 fi
--- a/packages/mosaic/framework/tools/tmux/send-message.sh
+++ b/packages/mosaic/framework/tools/tmux/send-message.sh
@@ -13,12 +13,13 @@
 #   no-op in Claude Code, so the double-Enter is safe.
 #
 # USAGE
-#   send-message.sh -t <target> -m "message"
-#   send-message.sh -t <target> -f <file>
-#   echo "message" | send-message.sh -t <target>
-#   ssh host bash -s -- -t <target> -b "$(base64 -w0 <<<msg)" < send-message.sh
+#   send-message.sh [-L socket_name] -t <target> -m "message"
+#   send-message.sh [-L socket_name] -t <target> -f <file>
+#   echo "message" | send-message.sh [-L socket_name] -t <target>
+#   ssh host bash -s -- -L socket -t <target> -b "$(base64 -w0 <<<msg)" < send-message.sh
 #
 # OPTIONS
+#   -L NAME     tmux socket name passed to `tmux -L NAME` (optional)
 #   -t TARGET   tmux target: session, or session:window.pane  [required]
 #   -m MESSAGE  message text (single- or multi-line)
 #   -f FILE     read message from FILE instead of -m
@@ -34,11 +35,12 @@
 #   3  usage error
 set -uo pipefail

-TARGET=""; MSG=""; FILE=""; B64=""; RETRIES=2; VERBOSE=0
+SOCKET_NAME=""; TARGET=""; MSG=""; FILE=""; B64=""; RETRIES=2; VERBOSE=0
 usage() { sed -n '2,34p' "$0"; exit "${1:-3}"; }

-while getopts "t:m:f:b:r:vh" o; do
+while getopts "L:t:m:f:b:r:vh" o; do
  case "$o" in
+    L) SOCKET_NAME=$OPTARG ;;
    t) TARGET=$OPTARG ;; m) MSG=$OPTARG ;; f) FILE=$OPTARG ;; b) B64=$OPTARG ;;
    r) RETRIES=$OPTARG ;; v) VERBOSE=1 ;; h) usage 0 ;; *) usage 3 ;;
  esac
@@ -51,8 +53,21 @@ elif [ -z "$MSG" ] && [ ! -t 0 ]; then MSG=$(cat)
 fi
 [ -n "$MSG" ] || { echo "ERROR: empty message (use -m, -f, or stdin)" >&2; exit 3; }

+tmux_cmd=(tmux)
+if [ -n "$SOCKET_NAME" ]; then
+  tmux_cmd+=(-L "$SOCKET_NAME")
+fi
+
+# tmux accepts `=session` for some commands, but pane-level commands such as
+# capture-pane require a pane-qualified target. Keep exact-session addressing
+# convenient while avoiding accidental prefix matches.
+EFFECTIVE_TARGET=$TARGET
+if [[ "$TARGET" == =* && "$TARGET" != *:* ]]; then
+  EFFECTIVE_TARGET="${TARGET}:0.0"
+fi
+
 # Target must resolve to a live pane.
-if ! tmux list-panes -t "$TARGET" >/dev/null 2>&1; then
+if ! "${tmux_cmd[@]}" list-panes -t "$EFFECTIVE_TARGET" >/dev/null 2>&1; then
  echo "ERROR: tmux target not found: $TARGET" >&2; exit 1
 fi

@@ -62,18 +77,18 @@ snippet=$(printf '%s' "$MSG" | tr '\n' ' ' | tr -s ' ' | sed 's/[^[:print:]]//g'

 # 1) Paste the body as a bracketed paste so multi-line content does not submit
 #    line-by-line. load-buffer/paste-buffer is far safer than `send-keys -l`.
-printf '%s' "$MSG" | tmux load-buffer -b __mosaic_send -
+printf '%s' "$MSG" | "${tmux_cmd[@]}" load-buffer -b __mosaic_send -
 # -p = bracketed paste when the client supports it; fall back if not.
-tmux paste-buffer -d -p -b __mosaic_send -t "$TARGET" 2>/dev/null \
-  || tmux paste-buffer -d -b __mosaic_send -t "$TARGET"
+"${tmux_cmd[@]}" paste-buffer -d -p -b __mosaic_send -t "$EFFECTIVE_TARGET" 2>/dev/null \
+  || "${tmux_cmd[@]}" paste-buffer -d -b __mosaic_send -t "$EFFECTIVE_TARGET"
 sleep 0.5

 # 2) Submit, then verify; flush with another Enter if it is still a draft.
 status="sent"
 for attempt in $(seq 1 $((RETRIES + 1))); do
-  tmux send-keys -t "$TARGET" Enter
+  "${tmux_cmd[@]}" send-keys -t "$EFFECTIVE_TARGET" Enter
  sleep 1.2
-  pane=$(tmux capture-pane -t "$TARGET" -p 2>/dev/null)
+  pane=$("${tmux_cmd[@]}" capture-pane -t "$EFFECTIVE_TARGET" -p 2>/dev/null)

  if printf '%s' "$pane" | grep -qF "$QUEUED_RE"; then
    status="queued"; break
--- a/packages/mosaic/framework/tools/tmux/test-send-message-socket.sh
+++ b/packages/mosaic/framework/tools/tmux/test-send-message-socket.sh
@@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+SCRIPT_DIR=$(cd -- "$(dirname -- "$0")" && pwd)
+SEND_MESSAGE="$SCRIPT_DIR/send-message.sh"
+AGENT_SEND="$SCRIPT_DIR/agent-send.sh"
+SOCKET="mosaic-test-$RANDOM-$$"
+TARGET="target-$RANDOM"
+DEFAULT_TARGET="default-target-$RANDOM"
+TMPDIR=$(mktemp -d)
+trap 'tmux -L "$SOCKET" kill-server >/dev/null 2>&1 || true; tmux kill-session -t "$DEFAULT_TARGET" >/dev/null 2>&1 || true; rm -rf "$TMPDIR"' EXIT
+
+fail() {
+  echo "FAIL: $*" >&2
+  exit 1
+}
+
+require_tmux() {
+  command -v tmux >/dev/null 2>&1 || fail "tmux is required"
+}
+
+capture_named() {
+  tmux -L "$SOCKET" capture-pane -t "=$TARGET:0.0" -p
+}
+
+capture_default() {
+  tmux capture-pane -t "=$DEFAULT_TARGET:0.0" -p
+}
+
+require_tmux
+
+tmux -L "$SOCKET" new-session -d -s "$TARGET" -c "$TMPDIR" 'bash --noprofile --norc -i'
+tmux new-session -d -s "$DEFAULT_TARGET" -c "$TMPDIR" 'bash --noprofile --norc -i'
+
+"$SEND_MESSAGE" -L "$SOCKET" -t "=$TARGET" -m "named socket hello" >/tmp/send-message-named.out
+sleep 0.2
+capture_named | grep -qF "named socket hello" || fail "send-message.sh did not deliver to named socket"
+if capture_default | grep -qF "named socket hello"; then
+  fail "send-message.sh leaked named-socket message to default tmux server"
+fi
+
+"$AGENT_SEND" -L "$SOCKET" -S "tester:source" -s "=$TARGET" -m "agent socket hello" >/tmp/agent-send-named.out
+sleep 0.2
+capture_named | grep -qF "[tester:source ->" || fail "agent-send.sh did not include preamble"
+capture_named | grep -qF "agent socket hello" || fail "agent-send.sh did not deliver to named socket"
+if capture_default | grep -qF "agent socket hello"; then
+  fail "agent-send.sh leaked named-socket message to default tmux server"
+fi
+
+echo "ok - named tmux socket send tools"
--- a/packages/mosaic/framework/tools/woodpecker/README.md
+++ b/packages/mosaic/framework/tools/woodpecker/README.md
@@ -26,11 +26,12 @@ A Woodpecker API token is required. To configure:

 ## Scripts

-| Script                | Purpose                                     |
-| --------------------- | ------------------------------------------- |
-| `pipeline-list.sh`    | List recent pipelines for a repo            |
-| `pipeline-status.sh`  | Get status of a specific or latest pipeline |
-| `pipeline-trigger.sh` | Trigger a new pipeline build                |
+| Script                | Purpose                                      |
+| --------------------- | -------------------------------------------- |
+| `pipeline-list.sh`    | List recent pipelines for a repo             |
+| `pipeline-status.sh`  | Get status of a specific or latest pipeline  |
+| `pipeline-trigger.sh` | Trigger a new pipeline build                 |
+| `ci-wait.sh`          | Block until pipeline(s) reach terminal state |

 ## Common Options

@@ -55,4 +56,7 @@ A Woodpecker API token is required. To configure:

 # Trigger a build on a specific branch
 ~/.config/mosaic/tools/woodpecker/pipeline-trigger.sh -b feature/my-branch
+
+# Block until one or more pipelines finish (event-driven CI wait)
+~/.config/mosaic/tools/woodpecker/ci-wait.sh -r usc/uconnect -n 3917 -n 3918
 ```
--- a/packages/mosaic/framework/tools/woodpecker/ci-wait.sh
+++ b/packages/mosaic/framework/tools/woodpecker/ci-wait.sh
@@ -0,0 +1,86 @@
+#!/usr/bin/env bash
+# ci-wait.sh — block until one or more Woodpecker pipelines reach terminal state.
+#
+# Problem it solves: orchestrators hand-author a `while true; curl .../repos/1/pipelines/$n
+# ...; sleep` loop for every CI wait. Those loops HARDCODE Woodpecker repo id 1 (only
+# correct for whichever repo happens to be id 1), re-implement URL building with raw
+# curl, and tend to get armed as tight <300s ScheduleWakeup polls (each poll = a full
+# wake+reload+recheck cycle). This encapsulates the loop once, on top of the existing
+# `pipeline-status.sh` wrapper (which resolves repo->id correctly and is instance-aware),
+# so a CI wait becomes a one-liner.
+#
+# Intended use: as the COMMAND of a Monitor / event-driven re-invoke (primary), paired
+# with a single long (>=1500s) timed fallback — NOT as a tight standalone poll.
+#
+# Usage:
+#   ci-wait.sh -r <owner/repo> -n <num> [-n <num> ...] [-a <instance>] [-i <interval>] [-t <timeout>]
+#   ci-wait.sh -r usc/uconnect -n 3917 -n 3918              # wait for both, infer instance
+#   ci-wait.sh -r usc/uconnect -n 3922 -a usc -i 30 -t 2400
+#
+# Instance is inferred from the owner (usc->usc, mosaicstack/mosaic->mosaic) unless -a given.
+# Exit: 0 = all pipelines terminal AND all 'success'; 1 = >=1 terminal non-success;
+#       2 = usage/precondition error; 3 = timeout before all terminal.
+set -euo pipefail
+
+# Resolve pipeline-status.sh as a sibling, matching how the woodpecker tools source
+# _lib.sh — works under the installed runtime AND an in-repo checkout, no MOSAIC_HOME dep.
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PS="$SCRIPT_DIR/pipeline-status.sh"
+
+REPO="" INSTANCE="" INTERVAL=30 TIMEOUT=3600
+NUMS=()
+while getopts "r:n:a:i:t:h" opt; do
+  case "$opt" in
+    r) REPO="$OPTARG" ;;
+    n) NUMS+=("$OPTARG") ;;
+    a) INSTANCE="$OPTARG" ;;
+    i) INTERVAL="$OPTARG" ;;
+    t) TIMEOUT="$OPTARG" ;;
+    h) grep '^#' "$0" | sed 's/^# \?//'; exit 0 ;;
+    *) echo "see -h" >&2; exit 2 ;;
+  esac
+done
+[[ -n "$REPO" ]] || { echo "FATAL: -r <owner/repo> required" >&2; exit 2; }
+[[ ${#NUMS[@]} -gt 0 ]] || { echo "FATAL: at least one -n <pipeline-number> required" >&2; exit 2; }
+[[ -x "$PS" ]] || { echo "FATAL: pipeline-status.sh not found/executable at $PS" >&2; exit 2; }
+
+# Infer Woodpecker instance from owner unless overridden (matches the git-wrapper convention).
+if [[ -z "$INSTANCE" ]]; then
+  case "${REPO%%/*}" in
+    usc|USC) INSTANCE=usc ;;
+    mosaicstack|mosaic) INSTANCE=mosaic ;;
+    *) echo "FATAL: cannot infer Woodpecker instance for owner '${REPO%%/*}' — pass -a <instance>" >&2; exit 2 ;;
+  esac
+fi
+
+command -v jq >/dev/null || { echo "FATAL: jq not found" >&2; exit 2; }
+
+TERMINAL_RE='^(success|failure|error|killed|declined|blocked)$'
+declare -A STATE=()        # num -> terminal status, once reached
+start=$(date +%s 2>/dev/null || echo 0)
+
+echo "ci-wait: $REPO pipelines [${NUMS[*]}] (instance=$INSTANCE, every ${INTERVAL}s, timeout ${TIMEOUT}s)"
+while true; do
+  for n in "${NUMS[@]}"; do
+    [[ -n "${STATE[$n]:-}" ]] && continue
+    s=$("$PS" -r "$REPO" -n "$n" -a "$INSTANCE" -f json 2>/dev/null | jq -r '.status // empty' 2>/dev/null || true)
+    if [[ "$s" =~ $TERMINAL_RE ]]; then
+      STATE[$n]="$s"
+      echo "  pipeline $n TERMINAL: $s"
+    fi
+  done
+  # all terminal?
+  if [[ ${#STATE[@]} -eq ${#NUMS[@]} ]]; then
+    bad=0
+    for n in "${NUMS[@]}"; do [[ "${STATE[$n]}" == "success" ]] || bad=1; done
+    if [[ $bad -eq 0 ]]; then echo "ci-wait: ALL SUCCESS"; exit 0; fi
+    echo "ci-wait: all terminal, NOT all success — $(for n in "${NUMS[@]}"; do printf '%s=%s ' "$n" "${STATE[$n]}"; done)"
+    exit 1
+  fi
+  now=$(date +%s 2>/dev/null || echo 0)
+  if [[ "$start" != 0 && $((now - start)) -ge $TIMEOUT ]]; then
+    echo "ci-wait: TIMEOUT after ${TIMEOUT}s — pending: $(for n in "${NUMS[@]}"; do [[ -z "${STATE[$n]:-}" ]] && printf '%s ' "$n"; done)"
+    exit 3
+  fi
+  sleep "$INTERVAL"
+done
--- a/packages/mosaic/framework/tools/woodpecker/test-ci-wait-exit-matrix.sh
+++ b/packages/mosaic/framework/tools/woodpecker/test-ci-wait-exit-matrix.sh
@@ -0,0 +1,76 @@
+#!/usr/bin/env bash
+# Regression harness for ci-wait.sh terminal-state aggregation and exit codes.
+#
+# ci-wait.sh wraps pipeline-status.sh and blocks until every requested pipeline
+# reaches a terminal Woodpecker state, then maps the aggregate to an exit code.
+# That contract is what callers arm a Monitor/timed-fallback around, so it must be
+# exact. This harness drives ci-wait.sh against a stub pipeline-status.sh whose
+# per-pipeline status is fixture-controlled, and asserts the full exit matrix:
+#
+#   0 = every pipeline terminal AND all 'success'
+#   1 = every pipeline terminal, at least one non-success
+#   2 = usage/precondition error (missing -n)
+#   3 = timeout before all pipelines terminal
+#
+# Non-vacuity: each case pins a DISTINCT exit code to a distinct fixture, so a
+# regression in success-aggregation (case 0 vs 1), terminal detection (case 3),
+# or arg validation (case 2) flips exactly one assertion RED.
+
+set -euo pipefail
+
+CIW_SRC="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/ci-wait.sh"
+WORK_DIR="${MOSAIC_TEST_WORK_DIR:-$PWD/.mosaic-test-work/ci-wait-exit-matrix}"
+TOOL_DIR="$WORK_DIR/tool"
+
+rm -rf "$WORK_DIR"
+mkdir -p "$TOOL_DIR"
+
+# ci-wait.sh resolves pipeline-status.sh as a sibling ($SCRIPT_DIR/pipeline-status.sh),
+# so we run a COPY of ci-wait.sh next to a stub sibling we control.
+cp "$CIW_SRC" "$TOOL_DIR/ci-wait.sh"
+chmod +x "$TOOL_DIR/ci-wait.sh"
+
+# Stub pipeline-status.sh: emits {"status":"<s>"} where <s> comes from env
+# CIW_STATUS_<num> (default "running" = non-terminal, drives the timeout path).
+cat > "$TOOL_DIR/pipeline-status.sh" <<'SH'
+#!/usr/bin/env bash
+set -euo pipefail
+num=""
+while getopts "r:n:a:f:" opt; do case "$opt" in n) num="$OPTARG" ;; *) : ;; esac; done
+var="CIW_STATUS_${num}"
+printf '{"status":"%s"}\n' "${!var:-running}"
+SH
+chmod +x "$TOOL_DIR/pipeline-status.sh"
+
+CIW="$TOOL_DIR/ci-wait.sh"
+
+run_expect() { # $1 = expected exit  $2 = label ; rest = args
+  local want="$1" label="$2"; shift 2
+  local rc=0
+  "$CIW" "$@" >/dev/null 2>&1 || rc=$?
+  if [[ "$rc" -ne "$want" ]]; then
+    echo "FAIL [$label]: expected exit $want, got $rc" >&2; exit 1
+  fi
+  echo "PASS [$label]: exit $rc"
+}
+
+# 0 — both pipelines terminal + success
+CIW_STATUS_100=success CIW_STATUS_101=success \
+  run_expect 0 "all-success" -r mosaic/stack -n 100 -n 101 -a mosaic -i 1 -t 30
+
+# 1 — both terminal, one failure
+CIW_STATUS_100=success CIW_STATUS_101=failure \
+  run_expect 1 "terminal-not-success" -r mosaic/stack -n 100 -n 101 -a mosaic -i 1 -t 30
+
+# 1 — other terminal non-success states still map to 1 (error/killed)
+CIW_STATUS_100=error CIW_STATUS_101=killed \
+  run_expect 1 "terminal-error-killed" -r mosaic/stack -n 100 -n 101 -a mosaic -i 1 -t 30
+
+# 3 — a pipeline never reaches terminal state before timeout
+CIW_STATUS_100=success CIW_STATUS_101=running \
+  run_expect 3 "timeout-pending" -r mosaic/stack -n 100 -n 101 -a mosaic -i 1 -t 0
+
+# 2 — usage error: no -n
+run_expect 2 "usage-missing-n" -r mosaic/stack -a mosaic
+
+echo "ALL PASS: test-ci-wait-exit-matrix.sh"
--- a/packages/mosaic/src/commands/launch.spec.ts
+++ b/packages/mosaic/src/commands/launch.spec.ts
@@ -1,6 +1,15 @@
 import { describe, it, expect, vi, beforeEach, afterEach, type MockInstance } from 'vitest';
 import { Command } from 'commander';
-import { buildPiSkillArgs, registerRuntimeLaunchers, type RuntimeLaunchHandler } from './launch.js';
+import { mkdtempSync, mkdirSync, writeFileSync, symlinkSync, rmSync } from 'node:fs';
+import { tmpdir } from 'node:os';
+import { join } from 'node:path';
+import {
+  buildPiSkillArgs,
+  enumerateSkillDirs,
+  piForceSkillNames,
+  registerRuntimeLaunchers,
+  type RuntimeLaunchHandler,
+} from './launch.js';

 /**
 * Tests for the commander wiring between `mosaic <runtime>` / `mosaic yolo <runtime>`
@@ -23,6 +32,7 @@ function buildProgram(handler: RuntimeLaunchHandler): Command {
 }

 const fakeSkills = ['--skill', '/skills/test-driven-development', '--skill', '/skills/pdf'];
+const fakeForced = ['--skill', '/skills/mosaic-tools'];

 // `process.exit` returns `never`, so vi.spyOn demands a replacement with the
 // same signature. We throw from the mock to short-circuit into test-land.
@@ -66,16 +76,42 @@ describe('registerRuntimeLaunchers — non-yolo subcommands', () => {
 });

 describe('buildPiSkillArgs', () => {
-  it('defaults to disabling Pi skill discovery to keep startup context small', () => {
-    expect(buildPiSkillArgs([], {}, fakeSkills)).toEqual(['--no-skills']);
+  it('disables auto-discovery but force-loads fleet-critical skills by default', () => {
+    expect(buildPiSkillArgs([], {}, fakeSkills, fakeForced)).toEqual([
+      '--no-skills',
+      '--skill',
+      '/skills/mosaic-tools',
+    ]);
  });

-  it('keeps explicit user skills while disabling automatic discovery', () => {
-    expect(buildPiSkillArgs(['--skill', '/tmp/custom'], {}, fakeSkills)).toEqual(['--no-skills']);
+  it('ignores _runtimeArgs (user --skill flags reach Pi via the launch handler, not here)', () => {
+    expect(buildPiSkillArgs(['--skill', '/tmp/custom'], {}, fakeSkills, fakeForced)).toEqual([
+      '--no-skills',
+      '--skill',
+      '/skills/mosaic-tools',
+    ]);
  });

-  it('supports legacy all-skills mode without double-loading settings skills', () => {
-    expect(buildPiSkillArgs([], { MOSAIC_PI_SKILL_MODE: 'all' }, fakeSkills)).toEqual([
+  it('emits only --no-skills when no forced skills are present on disk', () => {
+    expect(buildPiSkillArgs([], {}, fakeSkills, [])).toEqual(['--no-skills']);
+  });
+
+  it('all-skills mode merges the forced set in without duplicating discovered skills', () => {
+    expect(buildPiSkillArgs([], { MOSAIC_PI_SKILL_MODE: 'all' }, fakeSkills, fakeForced)).toEqual([
+      '--no-skills',
+      '--skill',
+      '/skills/test-driven-development',
+      '--skill',
+      '/skills/pdf',
+      '--skill',
+      '/skills/mosaic-tools',
+    ]);
+  });
+
+  it('all-skills mode does not double-load a forced skill already discovered', () => {
+    expect(
+      buildPiSkillArgs([], { MOSAIC_PI_SKILL_MODE: 'all' }, fakeSkills, ['--skill', '/skills/pdf']),
+    ).toEqual([
      '--no-skills',
      '--skill',
      '/skills/test-driven-development',
@@ -84,8 +120,117 @@ describe('buildPiSkillArgs', () => {
    ]);
  });

-  it('supports native Pi discovery when explicitly requested', () => {
-    expect(buildPiSkillArgs([], { MOSAIC_PI_SKILL_MODE: 'discover' }, fakeSkills)).toEqual([]);
+  it('force-loads fleet skills under native Pi discovery when not already discoverable', () => {
+    // Empty native set => Pi would not find mosaic-tools on its own, so force it.
+    expect(
+      buildPiSkillArgs([], { MOSAIC_PI_SKILL_MODE: 'discover' }, fakeSkills, fakeForced, new Set()),
+    ).toEqual(['--skill', '/skills/mosaic-tools']);
+  });
+
+  it('discover mode drops a forced skill Pi already discovers natively (no double-load)', () => {
+    // mosaic-tools is reachable from a Pi native root, so native discovery
+    // covers it — forcing it again would register the same skill twice.
+    expect(
+      buildPiSkillArgs(
+        [],
+        { MOSAIC_PI_SKILL_MODE: 'discover' },
+        fakeSkills,
+        fakeForced,
+        new Set(['/skills/mosaic-tools']),
+      ),
+    ).toEqual([]);
+  });
+
+  it('discover mode keeps a forced skill that no native root provides', () => {
+    expect(
+      buildPiSkillArgs(
+        [],
+        { MOSAIC_PI_SKILL_MODE: 'discover' },
+        fakeSkills,
+        fakeForced,
+        new Set(['/skills/some-other-skill']),
+      ),
+    ).toEqual(['--skill', '/skills/mosaic-tools']);
+  });
+
+  it('discover mode collapses a forced skill listed twice to a single --skill', () => {
+    // Mirror 'all' mode: intra-forced-set duplicates (same realpath) dedup.
+    expect(
+      buildPiSkillArgs(
+        [],
+        { MOSAIC_PI_SKILL_MODE: 'discover' },
+        fakeSkills,
+        ['--skill', '/skills/mosaic-tools', '--skill', '/skills/mosaic-tools'],
+        new Set(),
+      ),
+    ).toEqual(['--skill', '/skills/mosaic-tools']);
+  });
+});
+
+describe('enumerateSkillDirs (real FS)', () => {
+  let root: string;
+
+  beforeEach(() => {
+    root = mkdtempSync(join(tmpdir(), 'mosaic-skills-'));
+  });
+
+  afterEach(() => {
+    rmSync(root, { recursive: true, force: true });
+  });
+
+  function makeSkill(parent: string, name: string): string {
+    const dir = join(parent, name);
+    mkdirSync(dir, { recursive: true });
+    writeFileSync(join(dir, 'SKILL.md'), `# ${name}\n`);
+    return dir;
+  }
+
+  it('accepts a symlinked skill dir (regression: synced fleet skills are symlinks)', () => {
+    // Real skill lives under `canonical/`; the scanned root only has a symlink to it.
+    const canonical = makeSkill(join(root, 'canonical'), 'mosaic-tools');
+    const scanned = join(root, 'scanned');
+    mkdirSync(scanned, { recursive: true });
+    symlinkSync(canonical, join(scanned, 'mosaic-tools'), 'dir');
+
+    expect(enumerateSkillDirs([scanned])).toEqual(['--skill', join(scanned, 'mosaic-tools')]);
+  });
+
+  it('dedups by real path when the same skill is reachable from two roots', () => {
+    // Root A holds the real dir; root B symlinks to it — one --skill, not two.
+    const rootA = join(root, 'a');
+    const rootB = join(root, 'b');
+    const real = makeSkill(rootA, 'mosaic-tools');
+    mkdirSync(rootB, { recursive: true });
+    symlinkSync(real, join(rootB, 'mosaic-tools'), 'dir');
+
+    expect(enumerateSkillDirs([rootA, rootB])).toEqual(['--skill', real]);
+  });
+
+  it('skips directories without a SKILL.md and missing roots', () => {
+    mkdirSync(join(root, 'present', 'not-a-skill'), { recursive: true });
+    makeSkill(join(root, 'present'), 'real-skill');
+
+    expect(enumerateSkillDirs([join(root, 'present'), join(root, 'does-not-exist')])).toEqual([
+      '--skill',
+      join(root, 'present', 'real-skill'),
+    ]);
+  });
+});
+
+describe('piForceSkillNames', () => {
+  it('defaults to mosaic-tools when MOSAIC_PI_FORCE_SKILLS is unset', () => {
+    expect(piForceSkillNames({})).toEqual(['mosaic-tools']);
+  });
+
+  it('treats an empty string as "disable force-loading" (distinct from unset)', () => {
+    expect(piForceSkillNames({ MOSAIC_PI_FORCE_SKILLS: '' })).toEqual([]);
+  });
+
+  it('parses a colon list, trimming blanks and whitespace', () => {
+    expect(piForceSkillNames({ MOSAIC_PI_FORCE_SKILLS: 'mosaic-tools: mosaic-gitea ::' })).toEqual([
+      'mosaic-tools',
+      'mosaic-gitea',
+    ]);
  });
 });

--- a/packages/mosaic/src/commands/launch.ts
+++ b/packages/mosaic/src/commands/launch.ts
@@ -6,7 +6,15 @@
 */

 import { execFileSync, execSync, spawnSync } from 'node:child_process';
-import { existsSync, mkdirSync, readFileSync, writeFileSync, readdirSync, rmSync } from 'node:fs';
+import {
+  existsSync,
+  mkdirSync,
+  readFileSync,
+  writeFileSync,
+  readdirSync,
+  realpathSync,
+  rmSync,
+} from 'node:fs';
 import { createRequire } from 'node:module';
 import { homedir } from 'node:os';
 import { join, dirname } from 'node:path';
@@ -428,25 +436,74 @@ function ensureRuntimeConfig(runtime: RuntimeName, destPath: string): void {

 // ─── Pi skill/extension discovery ────────────────────────────────────────────

-function discoverPiSkills(): string[] {
+/** Resolve a skill dir to its canonical real path so symlinked duplicates
+ * (e.g. ~/.pi/agent/skills/X -> ~/.config/mosaic/skills/X) collapse to one key.
+ * Falls back to the literal path if it can't be resolved (e.g. broken link). */
+function skillRealPath(dir: string): string {
+  try {
+    return realpathSync(dir);
+  } catch {
+    return dir;
+  }
+}
+
+/** Skill roots Pi auto-discovers natively (no `--skill` needed): its global
+ * skills dir and the project-local one relative to the launch cwd. */
+function piNativeSkillRoots(cwd: string = process.cwd()): string[] {
+  return [join(homedir(), '.pi', 'agent', 'skills'), join(cwd, '.pi', 'skills')];
+}
+
+/** Enumerate skill dirs under a set of roots, deduped by real path. A directory
+ * counts as a skill when it (or its symlink target) contains a SKILL.md.
+ * Exported for tests (real-FS coverage of symlink acceptance + realpath dedup). */
+export function enumerateSkillDirs(roots: string[]): string[] {
+  const seen = new Set<string>();
  const args: string[] = [];
-  for (const skillsRoot of [join(MOSAIC_HOME, 'skills'), join(MOSAIC_HOME, 'skills-local')]) {
+  for (const skillsRoot of roots) {
    if (!existsSync(skillsRoot)) continue;
    try {
      for (const entry of readdirSync(skillsRoot, { withFileTypes: true })) {
-        if (!entry.isDirectory()) continue;
+        // Synced fleet skills land as symlinks, so accept both dirs and links.
+        if (!entry.isDirectory() && !entry.isSymbolicLink()) continue;
        const skillDir = join(skillsRoot, entry.name);
-        if (existsSync(join(skillDir, 'SKILL.md'))) {
-          args.push('--skill', skillDir);
-        }
+        if (!existsSync(join(skillDir, 'SKILL.md'))) continue;
+        const key = skillRealPath(skillDir);
+        if (seen.has(key)) continue;
+        seen.add(key);
+        args.push('--skill', skillDir);
      }
    } catch {
-      // skip
+      // skip unreadable roots
    }
  }
  return args;
 }

+/** Every skill dir Pi would link under `MOSAIC_PI_SKILL_MODE=all`: the Mosaic
+ * global/local catalog plus Pi's own native roots. `--no-skills` suppresses
+ * native auto-discovery, so 'all' must re-add the native roots explicitly or
+ * they would be silently dropped. Deduped by real path. */
+function discoverPiSkills(cwd: string = process.cwd()): string[] {
+  return enumerateSkillDirs([
+    join(MOSAIC_HOME, 'skills'),
+    join(MOSAIC_HOME, 'skills-local'),
+    ...piNativeSkillRoots(cwd),
+  ]);
+}
+
+/** Real paths of skills Pi will auto-discover from its native roots. Used to
+ * drop redundant force-loads in 'discover' mode (which keeps native discovery
+ * on) so the same skill is not registered twice. */
+function piNativeSkillRealPaths(cwd: string = process.cwd()): Set<string> {
+  const args = enumerateSkillDirs(piNativeSkillRoots(cwd));
+  const set = new Set<string>();
+  for (let i = 1; i < args.length; i += 2) {
+    const dir = args[i];
+    if (dir !== undefined) set.add(skillRealPath(dir));
+  }
+  return set;
+}
+
 type PiSkillMode = 'none' | 'all' | 'discover';

 function normalizePiSkillMode(env: NodeJS.ProcessEnv): PiSkillMode {
@@ -455,22 +512,96 @@ function normalizePiSkillMode(env: NodeJS.ProcessEnv): PiSkillMode {
  return 'none';
 }

+/**
+ * Fleet-critical Pi skills that are force-loaded on every Pi launch regardless
+ * of MOSAIC_PI_SKILL_MODE. They cover the highest-frequency cross-agent and
+ * git-provider operations where Pi workers historically improvised raw CLIs
+ * (raw `tmux send-keys`, raw `tea`/`gh`/`glab`) instead of the maintained
+ * `~/.config/mosaic/tools/` wrappers.
+ *
+ * An explicit `--skill <dir>` overrides `--no-skills` for that path, so forcing
+ * a single targeted skill surfaces the must-use toolkit without loading the full
+ * ~100-skill catalog (context bloat). Missing skills are skipped silently, so
+ * this is a no-op until the named skill is synced into ~/.config/mosaic/skills/.
+ *
+ * Override with MOSAIC_PI_FORCE_SKILLS (colon-separated skill dir names; set to
+ * an empty string to disable force-loading entirely).
+ */
+const DEFAULT_PI_FORCE_SKILLS = ['mosaic-tools'];
+
+export function piForceSkillNames(env: NodeJS.ProcessEnv): string[] {
+  const override = env['MOSAIC_PI_FORCE_SKILLS'];
+  if (override === undefined) return DEFAULT_PI_FORCE_SKILLS;
+  return override
+    .split(':')
+    .map((name) => name.trim())
+    .filter(Boolean);
+}
+
+function forcedPiSkillArgs(env: NodeJS.ProcessEnv = process.env): string[] {
+  const args: string[] = [];
+  for (const name of piForceSkillNames(env)) {
+    const skillDir = join(MOSAIC_HOME, 'skills', name);
+    if (existsSync(join(skillDir, 'SKILL.md'))) {
+      args.push('--skill', skillDir);
+    }
+  }
+  return args;
+}
+
+/** Concatenate `--skill <dir>` arg groups, dropping any skill already seen.
+ * Dedup is by real path, so a forced skill and the same skill reached via a
+ * different (e.g. symlinked) directory collapse to a single `--skill`. */
+function mergeSkillArgs(...groups: string[][]): string[] {
+  const seen = new Set<string>();
+  const out: string[] = [];
+  for (const group of groups) {
+    for (let i = 0; i < group.length; i += 2) {
+      const dir = group[i + 1];
+      if (group[i] !== '--skill' || dir === undefined) continue;
+      const key = skillRealPath(dir);
+      if (seen.has(key)) continue;
+      seen.add(key);
+      out.push('--skill', dir);
+    }
+  }
+  return out;
+}
+
 export function buildPiSkillArgs(
  _runtimeArgs: string[],
  env: NodeJS.ProcessEnv = process.env,
  discoveredSkillArgs: string[] = discoverPiSkills(),
+  forcedSkillArgs: string[] = forcedPiSkillArgs(env),
+  nativeSkillRealPaths: Set<string> = piNativeSkillRealPaths(),
 ): string[] {
  const mode = normalizePiSkillMode(env);

  if (mode === 'discover') {
-    return [];
+    // Native Pi discovery stays on, so only force-load fleet skills it will NOT
+    // already find under its native roots — otherwise the same skill is
+    // registered twice (once natively, once via --skill). mergeSkillArgs first
+    // collapses any intra-forced-set realpath duplicates, mirroring 'all' mode.
+    const deduped = mergeSkillArgs(forcedSkillArgs);
+    const out: string[] = [];
+    for (let i = 0; i < deduped.length; i += 2) {
+      const dir = deduped[i + 1];
+      if (deduped[i] !== '--skill' || dir === undefined) continue;
+      if (nativeSkillRealPaths.has(skillRealPath(dir))) continue;
+      out.push('--skill', dir);
+    }
+    return out;
  }

  if (mode === 'all') {
-    return ['--no-skills', ...discoveredSkillArgs];
+    // 'all' links the full catalog; merge in the forced set so fleet-critical
+    // skills are guaranteed present even if they live only under skills-local/.
+    // discoverPiSkills already covers Pi's native roots, which `--no-skills`
+    // would otherwise suppress.
+    return ['--no-skills', ...mergeSkillArgs(discoveredSkillArgs, forcedSkillArgs)];
  }

-  return ['--no-skills'];
+  return ['--no-skills', ...forcedSkillArgs];
 }

 function discoverPiExtension(): string[] {
--- a/packages/types/src/index.ts
+++ b/packages/types/src/index.ts
@@ -6,3 +6,4 @@ export * from './provider/index.js';
 export * from './routing/index.js';
 export * from './commands/index.js';
 export * from './federation/index.js';
+export * from './reflection/index.js';
--- a/packages/types/src/reflection/tests/reflection.spec.ts
+++ b/packages/types/src/reflection/tests/reflection.spec.ts
@@ -0,0 +1,146 @@
+/**
+ * Unit tests for the reflection.v1 schema + self-report boundary.
+ *
+ * The runtime source of truth is the zod schema set in `reflection.ts`. The
+ * class-validator `ReflectionSelfReportDto` is the NestJS-side boundary type
+ * (exercised under the gateway app's reflect-metadata runtime, mirroring how
+ * `chat.dto.ts` is tested in apps/gateway); here we validate the self-report
+ * input with its zod counterpart, which is what the Stop hook actually uses.
+ *
+ * Coverage:
+ *  - REVIEW_SURFACES canonical ordering (the enum both zod + JSON Schema mirror)
+ *  - ReflectionV1Schema accepts a fully-populated record
+ *  - ReflectionV1Schema accepts a degraded record (self-report fields null)
+ *  - ReflectionV1Schema rejects bad schema literal / out-of-range confidence / bad surface
+ *  - ReflectionSelfReportSchema accepts valid + empty, rejects bad input
+ */
+
+import { describe, expect, it } from 'vitest';
+
+import {
+  REVIEW_SURFACES,
+  ReflectionV1Schema,
+  ReflectionSelfReportSchema,
+  type ReflectionV1,
+} from '../index.js';
+
+const baseMechanical = {
+  schema: 'reflection.v1' as const,
+  task_ref: 'stack#544',
+  agent: 'claude',
+  session_id: 'sess-abc',
+  timestamp: '2026-06-16T00:00:00.000Z',
+  repo: 'stack',
+  risk: {
+    needs_review: true,
+    score: 1.0,
+    surface: 'auth' as const,
+    reason: 'auth surface (weight 1) in: src/auth.ts',
+  },
+  files_changed: ['src/auth.ts'],
+  provenance: {
+    source: 'stop-hook' as const,
+    reflection_attempt: 1,
+    degraded: false,
+    reflection_mode: 'solo' as const,
+  },
+};
+
+describe('REVIEW_SURFACES', () => {
+  it('keeps the canonical most→least-sensitive ordering', () => {
+    expect(REVIEW_SURFACES).toEqual([
+      'auth',
+      'data',
+      'infra',
+      'build',
+      'ui',
+      'test',
+      'docs',
+      'none',
+    ]);
+  });
+});
+
+describe('ReflectionV1Schema', () => {
+  it('accepts a fully-populated record', () => {
+    const rec: ReflectionV1 = {
+      ...baseMechanical,
+      confidence: 0.7,
+      most_likely_wrong: { surface: 'auth', description: 'token refresh untested' },
+      known_not_in_diff: 'manual QA only on the happy path',
+    };
+    expect(() => ReflectionV1Schema.parse(rec)).not.toThrow();
+  });
+
+  it('accepts a degraded record with null self-report fields', () => {
+    const rec: ReflectionV1 = {
+      ...baseMechanical,
+      confidence: null,
+      most_likely_wrong: null,
+      known_not_in_diff: null,
+      provenance: { ...baseMechanical.provenance, degraded: true },
+    };
+    expect(() => ReflectionV1Schema.parse(rec)).not.toThrow();
+  });
+
+  it('rejects a wrong schema literal', () => {
+    const bad = {
+      ...baseMechanical,
+      schema: 'reflection.v2',
+      confidence: null,
+      most_likely_wrong: null,
+      known_not_in_diff: null,
+    };
+    expect(() => ReflectionV1Schema.parse(bad)).toThrow();
+  });
+
+  it('rejects out-of-range confidence', () => {
+    const bad = {
+      ...baseMechanical,
+      confidence: 1.5,
+      most_likely_wrong: null,
+      known_not_in_diff: null,
+    };
+    expect(() => ReflectionV1Schema.parse(bad)).toThrow();
+  });
+
+  it('rejects an unknown surface', () => {
+    const bad = {
+      ...baseMechanical,
+      risk: { ...baseMechanical.risk, surface: 'network' },
+      confidence: null,
+      most_likely_wrong: null,
+      known_not_in_diff: null,
+    };
+    expect(() => ReflectionV1Schema.parse(bad)).toThrow();
+  });
+});
+
+describe('ReflectionSelfReportSchema', () => {
+  it('accepts a valid self-report', () => {
+    const ok = ReflectionSelfReportSchema.safeParse({
+      confidence: 0.8,
+      most_likely_wrong: {
+        surface: 'data',
+        description: 'migration not run against prod-sized data',
+      },
+      known_not_in_diff: 'rollback path untested',
+    });
+    expect(ok.success).toBe(true);
+  });
+
+  it('accepts an empty self-report (all optional)', () => {
+    expect(ReflectionSelfReportSchema.safeParse({}).success).toBe(true);
+  });
+
+  it('rejects confidence above 1', () => {
+    expect(ReflectionSelfReportSchema.safeParse({ confidence: 2 }).success).toBe(false);
+  });
+
+  it('rejects an unknown most_likely_wrong.surface', () => {
+    const res = ReflectionSelfReportSchema.safeParse({
+      most_likely_wrong: { surface: 'network', description: 'x' },
+    });
+    expect(res.success).toBe(false);
+  });
+});
--- a/packages/types/src/reflection/index.ts
+++ b/packages/types/src/reflection/index.ts
@@ -0,0 +1,30 @@
+/**
+ * Agent reflection (v1) — public barrel.
+ *
+ * reflection.ts      — zod schemas (runtime source of truth) + inferred types
+ * reflection.dto.ts  — class-validator DTO for the agent self-report input
+ */
+
+export {
+  REVIEW_SURFACES,
+  ReviewSurfaceSchema,
+  MostLikelyWrongSchema,
+  ReflectionRiskSchema,
+  ReflectionModeSchema,
+  ReflectionProvenanceSchema,
+  ReflectionSelfReportSchema,
+  ReflectionV1Schema,
+  REFLECTION_SCHEMA_ID,
+} from './reflection.js';
+
+export type {
+  ReviewSurface,
+  MostLikelyWrong,
+  ReflectionRisk,
+  ReflectionMode,
+  ReflectionProvenance,
+  ReflectionSelfReport,
+  ReflectionV1,
+} from './reflection.js';
+
+export { MostLikelyWrongDto, ReflectionSelfReportDto } from './reflection.dto.js';
--- a/packages/types/src/reflection/reflection.dto.ts
+++ b/packages/types/src/reflection/reflection.dto.ts
@@ -0,0 +1,55 @@
+/**
+ * Reflection self-report DTO — class-validator boundary.
+ *
+ * Validates the agent-supplied self-report input (the optional
+ * `$REFLECTION_INPUT` file, default `<repo>/.mosaic/reflection-input.json`)
+ * before it is merged into a `reflection.v1` record. This is the only
+ * externally-authored input on the reflection path, so it gets a DTO per the
+ * Mosaic module-boundary rule.
+ *
+ * Class-validator only (no class-transformer `@Type`) — matching `chat.dto.ts`
+ * — so the module is safe to import without a `reflect-metadata` shim. Deep
+ * nested validation of `most_likely_wrong` is owned by the zod
+ * `ReflectionSelfReportSchema` in `reflection.ts`, which is what the Stop hook
+ * actually enforces at runtime.
+ */
+
+import {
+  IsIn,
+  IsNumber,
+  IsObject,
+  IsOptional,
+  IsString,
+  Max,
+  Min,
+  MaxLength,
+} from 'class-validator';
+
+import { REVIEW_SURFACES } from './reflection.js';
+
+/** Shape of `most_likely_wrong`; validated structurally by zod at runtime. */
+export class MostLikelyWrongDto {
+  @IsIn(REVIEW_SURFACES as unknown as string[])
+  surface!: string;
+
+  @IsString()
+  @MaxLength(4_000)
+  description!: string;
+}
+
+export class ReflectionSelfReportDto {
+  @IsOptional()
+  @IsNumber()
+  @Min(0)
+  @Max(1)
+  confidence?: number;
+
+  @IsOptional()
+  @IsObject()
+  most_likely_wrong?: MostLikelyWrongDto;
+
+  @IsOptional()
+  @IsString()
+  @MaxLength(8_000)
+  known_not_in_diff?: string;
+}
--- a/packages/types/src/reflection/reflection.ts
+++ b/packages/types/src/reflection/reflection.ts
@@ -0,0 +1,90 @@
+/**
+ * Agent reflection (v1) — wire schema.
+ *
+ * Runtime source of truth for the `reflection.v1` sidecar emitted at end-of-run
+ * by the Stop hook (design §10 step 1). The JSON Schema artifact at
+ * `@mosaicstack/macp` `src/schemas/reflection.v1.schema.json` is the documented
+ * contract; this zod schema is the executable one and MUST agree with it.
+ *
+ * Field provenance:
+ *   - MECHANICAL  (risk, files_changed, ids, provenance): written by the hook.
+ *   - SELF-REPORTED (confidence, most_likely_wrong, known_not_in_diff): merged
+ *     from an optional agent-supplied input; null when absent.
+ *
+ * Pure — no NestJS, no DB, no Node-only APIs. Safe for browser/edge.
+ */
+
+import { z } from 'zod';
+
+/** Review surfaces, ordered most- to least-sensitive. Mirrors macp risk-floor. */
+export const REVIEW_SURFACES = [
+  'auth',
+  'data',
+  'infra',
+  'build',
+  'ui',
+  'test',
+  'docs',
+  'none',
+] as const;
+
+export const ReviewSurfaceSchema = z.enum(REVIEW_SURFACES);
+export type ReviewSurface = z.infer<typeof ReviewSurfaceSchema>;
+
+/** SELF-REPORTED: the single most-likely way the work is wrong. */
+export const MostLikelyWrongSchema = z.object({
+  surface: ReviewSurfaceSchema,
+  description: z.string(),
+});
+export type MostLikelyWrong = z.infer<typeof MostLikelyWrongSchema>;
+
+/** MECHANICAL: output of the diff risk-floor (see `@mosaicstack/macp`). */
+export const ReflectionRiskSchema = z.object({
+  needs_review: z.boolean(),
+  score: z.number().min(0).max(1),
+  surface: ReviewSurfaceSchema,
+  reason: z.string(),
+});
+export type ReflectionRisk = z.infer<typeof ReflectionRiskSchema>;
+
+export const ReflectionModeSchema = z.enum(['off', 'solo', 'orchestrated']);
+export type ReflectionMode = z.infer<typeof ReflectionModeSchema>;
+
+export const ReflectionProvenanceSchema = z.object({
+  source: z.literal('stop-hook'),
+  reflection_attempt: z.number().int().min(1),
+  degraded: z.boolean(),
+  reflection_mode: ReflectionModeSchema,
+});
+export type ReflectionProvenance = z.infer<typeof ReflectionProvenanceSchema>;
+
+/**
+ * The self-reported half of a reflection. Supplied by the agent out-of-band
+ * (e.g. `<repo>/.mosaic/reflection-input.json`) and merged by the hook. All
+ * fields optional; missing fields become `null` in the assembled record.
+ */
+export const ReflectionSelfReportSchema = z.object({
+  confidence: z.number().min(0).max(1).nullable().optional(),
+  most_likely_wrong: MostLikelyWrongSchema.nullable().optional(),
+  known_not_in_diff: z.string().nullable().optional(),
+});
+export type ReflectionSelfReport = z.infer<typeof ReflectionSelfReportSchema>;
+
+/** The full assembled `reflection.v1` sidecar. */
+export const ReflectionV1Schema = z.object({
+  schema: z.literal('reflection.v1'),
+  task_ref: z.string(),
+  agent: z.string(),
+  session_id: z.string(),
+  timestamp: z.string(),
+  repo: z.string(),
+  confidence: z.number().min(0).max(1).nullable(),
+  most_likely_wrong: MostLikelyWrongSchema.nullable(),
+  known_not_in_diff: z.string().nullable(),
+  risk: ReflectionRiskSchema,
+  files_changed: z.array(z.string()),
+  provenance: ReflectionProvenanceSchema,
+});
+export type ReflectionV1 = z.infer<typeof ReflectionV1Schema>;
+
+export const REFLECTION_SCHEMA_ID = 'reflection.v1' as const;
--- a/scratchpads/2026-06-19-tmux-fleet-durable-install-plan.md
+++ b/scratchpads/2026-06-19-tmux-fleet-durable-install-plan.md
@@ -0,0 +1,755 @@
+# Durable tmux Fleet Installation Plan
+
+> **For Mosaic/Hermes:** This is an implementation plan for making the tmux-backed Mosaic software-factory fleet durable on this server and reusable in generic Mosaic Stack installs. Keep local USC/Mosaic defaults in profiles; keep framework behavior customizable.
+
+**Goal:** Add a supported Mosaic tmux-fleet installation path: holder-owned tmux server, per-agent reusable sessions, reliable send/reset/status tools, local roster customization, and a documented cutover for this server.
+
+**Architecture:** Mosaic should ship generic tmux fleet primitives in the framework, then layer local rosters through configuration. The holder service owns the tmux socket; each agent service joins the holder-owned server and runs `mosaic yolo <runtime>`. The orchestrator addresses agents through `mosaic agent ...` abstractions so tmux can later be replaced by Matrix-backed agent comms without changing mission flow.
+
+**Reference:** AI Guide `playbooks/tmux-fleet.md` at commit `2a0b0b5` documents the organization-neutral holder-service pattern, exact-match `=<name>` stop targets, and coupled-server cutover/verification sequence. The Stack implementation should treat that as the lifecycle model and keep concrete Mosaic unit/tooling details here.
+
+**Tech Stack:** Bash, tmux, user systemd units, Mosaic CLI/framework installer, JSON/YAML roster config, existing `packages/mosaic/framework/tools/tmux/{agent-send.sh,send-message.sh}`.
+
+---
+
+## Current evidence from this server
+
+Checked 2026-06-19:
+
+- Host: `W-jarvis`
+- User: `jarvis`
+- tmux: `/usr/bin/tmux`, version `3.4`
+- user systemd: active
+- existing tmux sessions: `ai-bma-0`, `dyor-1`, `melaniewoltje-3`, `sage-2`
+- existing Mosaic runtime: `/home/jarvis/.npm-global/bin/mosaic`, version `0.0.31`
+- installed `~/.config/mosaic/tools/tmux` was not present even though the stack repo contains `packages/mosaic/framework/tools/tmux/`
+
+Implication: do not kill the current tmux server casually. This server has active ad-hoc/service sessions. The durable fleet cutover must be planned, with either a separate socket first or a scheduled fleet recycle.
+
+## Design decisions
+
+### 1. Generic framework, local profile
+
+The Mosaic framework should ship:
+
+- systemd unit templates;
+- tmux fleet CLI wrappers;
+- roster schema and examples;
+- install/enable/status/reset commands;
+- docs and verification scripts.
+
+Local environments should provide:
+
+- agent names;
+- runtime per slot (`claude`, `pi`, `codex`, etc.);
+- default role class;
+- launch directory;
+- optional kickstart prompt;
+- model/provider hints;
+- transport selection (`tmux` now, `matrix` later).
+
+Do not bake the USC roster into generic install code. Ship it as an example profile.
+
+### 2. Durable sessions, disposable task context
+
+Session names are durable operational addresses. Task persona is disposable. Reusable worker slots should be reset with `/clear` or `/new` and then receive a fresh task kickstart.
+
+Persistent/semi-persistent personas:
+
+- lead orchestrator;
+- final/adversarial reviewer;
+- architecture/enhancement lane.
+
+Disposable slots:
+
+- implementers;
+- ordinary reviewers;
+- security reviewers unless actively holding a security mission.
+
+### 3. Transport abstraction now
+
+Add commands around tmux instead of calling tmux directly from orchestration:
+
+```bash
+mosaic agent send <agent> --message "..."
+mosaic agent status [--json]
+mosaic agent reset <agent> [--clear|--new]
+mosaic agent roster [--json]
+mosaic fleet install|start|stop|restart|status|verify
+```
+
+Today these call tmux/systemd. Later the same command surface can target Matrix or per-agent gateways.
+
+### 4. Avoid shared-server ownership bug
+
+Use the AI Guide holder pattern:
+
+```text
+mosaic-tmux-holder.service owns the tmux server/socket
+mosaic-agent@<name>.service joins the existing holder-owned socket
+ExecStop kills only session =<name>
+```
+
+Use exact tmux targets: `=<session>`.
+
+### 5. Prefer separate named socket for Mosaic factory
+
+To avoid disturbing existing tmux work, the default fleet should use a named socket such as:
+
+```text
+$XDG_RUNTIME_DIR/mosaic-factory.tmux
+```
+
+or tmux socket name:
+
+```bash
+tmux -L mosaic-factory ...
+```
+
+This avoids collision with ordinary `tmux ls` sessions. The send tools need socket support.
+
+---
+
+## Target USC-style roster example
+
+Ship as example only, not default:
+
+```yaml
+version: 1
+transport: tmux
+tmux:
+  socket_name: mosaic-factory
+  holder_session: _holder
+  working_directory: ~/src
+agents:
+  - name: mos-claude
+    runtime: claude
+    class: orchestrator
+    model_hint: Claude Opus
+    persistent_persona: true
+  - name: coder0
+    runtime: claude
+    class: implementer
+    model_hint: Claude Opus
+    reset_between_tasks: true
+  - name: coder1
+    runtime: claude
+    class: implementer
+    model_hint: Claude Opus
+    reset_between_tasks: true
+  - name: coder2
+    runtime: pi
+    class: implementer
+    model_hint: Pi GPT-5.5
+    reset_between_tasks: true
+  - name: coder3
+    runtime: pi
+    class: implementer
+    model_hint: Pi GPT-5.5
+    reset_between_tasks: true
+  - name: coder4
+    runtime: claude
+    class: implementer
+    model_hint: Claude Opus
+    reset_between_tasks: true
+  - name: coder5
+    runtime: claude
+    class: implementer
+    model_hint: Claude Opus
+    reset_between_tasks: true
+  - name: enhance
+    runtime: claude
+    class: enhancer
+    model_hint: Claude Opus
+    persistent_persona: semi
+  - name: rev0
+    runtime: pi
+    class: reviewer
+    model_hint: Pi GPT-5.5
+    reset_between_tasks: true
+  - name: rev1
+    runtime: pi
+    class: reviewer
+    model_hint: Pi GPT-5.5
+    reset_between_tasks: true
+  - name: secrev0
+    runtime: pi
+    class: security_reviewer
+    model_hint: Pi GPT-5.5
+    reset_between_tasks: true
+  - name: secrev1
+    runtime: pi
+    class: security_reviewer
+    model_hint: Pi GPT-5.5
+    reset_between_tasks: true
+  - name: ultron
+    runtime: pi
+    class: final_reviewer
+    model_hint: Pi GPT-5.5
+    persistent_persona: semi
+```
+
+---
+
+## Phase 0 — Confirm install surfaces
+
+### Task 0.1: Inspect installer copy behavior
+
+**Objective:** Confirm how framework files under `packages/mosaic/framework/` become installed under `~/.config/mosaic/`.
+
+**Files:**
+
+- Read: `tools/install.sh`
+- Read: `packages/mosaic/framework/install.sh`
+- Read: `packages/mosaic/src/runtime/install-manifest.ts`
+
+**Steps:**
+
+1. Verify `packages/mosaic/framework/install.sh` rsyncs `tools/tmux`.
+2. Verify whether npm-packaged installs include `framework/tools/tmux`.
+3. Confirm whether installed hosts should run `mosaic update`, `bash tools/install.sh`, or `packages/mosaic/framework/install.sh` to receive new tmux tools.
+4. Record exact propagation command in docs.
+
+**Verification:**
+
+```bash
+bash packages/mosaic/framework/install.sh --help || true
+npm pack --dry-run --json | jq '.[0].files[].path' | grep 'framework/tools/tmux'
+```
+
+Expected: tmux tools are included in installable package or packaging fix is identified.
+
+### Task 0.2: Inspect current yolo launch semantics
+
+**Objective:** Confirm `mosaic yolo claude` and `mosaic yolo pi` accept optional initial prompt text and behave well under systemd/tmux.
+
+**Files:**
+
+- Read: `packages/mosaic/src/**`
+- Read: `packages/mosaic/framework/runtime/claude/RUNTIME.md`
+- Read: `packages/mosaic/framework/runtime/pi/RUNTIME.md`
+
+**Verification commands:**
+
+```bash
+mosaic yolo claude --help
+mosaic yolo pi --help
+```
+
+Expected: a systemd `ExecStart` can launch the runtime either with no prompt or with a kickstart prompt file/string.
+
+---
+
+## Phase 1 — Framework tmux primitives
+
+### Task 1.1: Add socket support to send tools
+
+**Objective:** Allow `agent-send.sh` and `send-message.sh` to target a named Mosaic tmux socket without affecting default tmux sessions.
+
+**Files:**
+
+- Modify: `packages/mosaic/framework/tools/tmux/send-message.sh`
+- Modify: `packages/mosaic/framework/tools/tmux/agent-send.sh`
+- Modify: `packages/mosaic/framework/tools/tmux/README.md`
+- Test: `packages/mosaic/framework/tools/tmux/test-send-message.sh` (new)
+
+**Design:**
+
+Add optional flags:
+
+```bash
+-L SOCKET_NAME      # tmux -L socket name
+-SOCKET PATH        # optional later if needed; avoid conflict with existing -S source label in agent-send
+```
+
+Because `agent-send.sh` already uses `-S` for source label, prefer `-L` for socket name and `-T` or `--socket-path` only if long-option parsing is added.
+
+**Implementation notes:**
+
+- Build a tmux command array:
+
+```bash
+tmux_cmd=(tmux)
+if [ -n "$SOCKET_NAME" ]; then tmux_cmd+=( -L "$SOCKET_NAME" ); fi
+```
+
+- Replace raw `tmux ...` calls with `"${tmux_cmd[@]}" ...`.
+- Pass `-L` through remote ssh invocation.
+- Include socket name in verbose output.
+
+**Verification:**
+
+```bash
+tmux -L mosaic-test new-session -d -s target 'cat'
+packages/mosaic/framework/tools/tmux/send-message.sh -L mosaic-test -t target -m 'hello'
+tmux -L mosaic-test capture-pane -t target -p | grep hello
+tmux -L mosaic-test kill-server
+```
+
+Expected: message lands in the named socket session; default `tmux ls` is untouched.
+
+### Task 1.2: Add exact target validation helper
+
+**Objective:** Prevent accidental prefix targeting in all tmux fleet operations.
+
+**Files:**
+
+- Create: `packages/mosaic/framework/tools/tmux/_lib.sh`
+- Modify: `send-message.sh`
+- Modify: `agent-send.sh`
+
+**Behavior:**
+
+- For session-only agent names, normalize target to `=<name>` before kill/status/reset operations.
+- For explicit pane targets like `session:window.pane`, allow as advanced path but document the risk.
+
+**Verification:**
+
+Create sessions `agent` and `agent0`; verify killing/resetting `agent` does not affect `agent0`.
+
+---
+
+## Phase 2 — systemd unit templates
+
+### Task 2.1: Add holder service template
+
+**Objective:** Ship a user systemd unit template that owns the Mosaic factory tmux server.
+
+**Files:**
+
+- Create: `packages/mosaic/framework/systemd/user/mosaic-tmux-holder.service`
+- Create: `packages/mosaic/framework/tools/fleet/install-user-units.sh`
+
+**Unit shape:**
+
+```ini
+[Unit]
+Description=Mosaic tmux fleet holder
+Documentation=https://git.mosaicstack.dev/mosaicstack/aiguide
+
+[Service]
+Type=oneshot
+RemainAfterExit=yes
+Environment=MOSAIC_TMUX_SOCKET=mosaic-factory
+ExecStart=/usr/bin/tmux -L ${MOSAIC_TMUX_SOCKET} new-session -d -s _holder 'while true; do sleep 3600; done'
+ExecStop=-/usr/bin/tmux -L ${MOSAIC_TMUX_SOCKET} kill-server
+
+[Install]
+WantedBy=default.target
+```
+
+**Important:** systemd environment expansion in `ExecStart` is limited. Verify syntax; if `%E`/environment expansion is awkward, generate concrete units from config instead of relying on dynamic expansion.
+
+**Verification:**
+
+```bash
+systemd-analyze --user verify ~/.config/systemd/user/mosaic-tmux-holder.service
+systemctl --user daemon-reload
+systemctl --user start mosaic-tmux-holder.service
+tmux -L mosaic-factory ls | grep _holder
+```
+
+### Task 2.2: Add agent service template
+
+**Objective:** Ship a user systemd template that starts one configured agent slot.
+
+**Files:**
+
+- Create: `packages/mosaic/framework/systemd/user/mosaic-agent@.service`
+- Modify: `packages/mosaic/framework/tools/fleet/install-user-units.sh`
+
+**Unit shape:**
+
+```ini
+[Unit]
+Description=Mosaic agent session %i
+Requires=mosaic-tmux-holder.service
+After=mosaic-tmux-holder.service
+PartOf=mosaic-tmux-holder.service
+
+[Service]
+Type=oneshot
+RemainAfterExit=yes
+WorkingDirectory=%h/src
+Environment=MOSAIC_TMUX_SOCKET=mosaic-factory
+ExecStart=/bin/bash -lc 'tmux -L "$MOSAIC_TMUX_SOCKET" new-session -d -s "%i" "mosaic yolo $(mosaic fleet runtime %i)"'
+ExecStop=-/usr/bin/tmux -L mosaic-factory kill-session -t '=%i'
+
+[Install]
+WantedBy=default.target
+```
+
+**Design warning:** command substitution in unit files can become brittle. Prefer a generated per-agent EnvironmentFile:
+
+```text
+~/.config/mosaic/fleet/agents/coder0.env
+```
+
+with:
+
+```bash
+MOSAIC_AGENT_NAME=coder0
+MOSAIC_AGENT_RUNTIME=claude
+MOSAIC_AGENT_WORKDIR=/home/jarvis/src
+MOSAIC_TMUX_SOCKET=mosaic-factory
+```
+
+Then `ExecStart` calls a wrapper:
+
+```bash
+~/.config/mosaic/tools/fleet/start-agent-session.sh
+```
+
+**Verification:**
+
+```bash
+systemd-analyze --user verify ~/.config/systemd/user/mosaic-agent@.service
+systemctl --user start mosaic-agent@coder0.service
+tmux -L mosaic-factory has-session -t '=coder0'
+systemctl --user restart mosaic-agent@coder0.service
+```
+
+Expected: holder server PID remains unchanged; only `coder0` session recycles.
+
+### Task 2.3: Add start-agent wrapper
+
+**Objective:** Keep systemd units simple by moving config lookup and launch command construction into a script.
+
+**Files:**
+
+- Create: `packages/mosaic/framework/tools/fleet/start-agent-session.sh`
+
+**Behavior:**
+
+Inputs:
+
+```bash
+start-agent-session.sh <agent-name>
+```
+
+Reads:
+
+```text
+$MOSAIC_HOME/fleet/agents/<agent-name>.env
+```
+
+Starts:
+
+```bash
+tmux -L "$MOSAIC_TMUX_SOCKET" new-session -d -s "$MOSAIC_AGENT_NAME" -c "$MOSAIC_AGENT_WORKDIR" "mosaic yolo $MOSAIC_AGENT_RUNTIME"
+```
+
+Guardrails:
+
+- fail if runtime is empty;
+- fail if workdir does not exist;
+- no duplicate sessions unless `--replace` is passed;
+- exact session names only.
+
+---
+
+## Phase 3 — roster config and CLI wrappers
+
+### Task 3.1: Add fleet config schema and examples
+
+**Objective:** Define customizable install-time roster without hardcoding USC.
+
+**Files:**
+
+- Create: `packages/mosaic/framework/fleet/roster.schema.json`
+- Create: `packages/mosaic/framework/fleet/examples/minimal.yaml`
+- Create: `packages/mosaic/framework/fleet/examples/usc-software-factory.yaml`
+- Create: `packages/mosaic/framework/fleet/README.md`
+
+**Schema concepts:**
+
+- `transport`: `tmux` now; `matrix` later.
+- `tmux.socket_name`
+- `tmux.holder_session`
+- `defaults.working_directory`
+- `agents[].name`
+- `agents[].runtime`
+- `agents[].class`
+- `agents[].model_hint`
+- `agents[].persistent_persona`
+- `agents[].reset_between_tasks`
+- `agents[].kickstart_template`
+
+**Verification:**
+
+Use `jq` for JSON examples or add a small Python/YAML validator if YAML is chosen. If no YAML parser is guaranteed, store examples as JSON or support both with Python stdlib JSON first.
+
+### Task 3.2: Add `mosaic fleet` commands
+
+**Objective:** Provide operator-safe commands for install/status/start/stop/restart/verify.
+
+**Files:**
+
+- Modify: `packages/mosaic/src/cli.ts` or the current commander entrypoint.
+- Create scripts under: `packages/mosaic/framework/tools/fleet/`
+
+**Commands:**
+
+```bash
+mosaic fleet init --profile minimal|usc --write
+mosaic fleet install-systemd
+mosaic fleet start [agent]
+mosaic fleet stop [agent]
+mosaic fleet restart [agent]
+mosaic fleet status --json
+mosaic fleet verify
+```
+
+**Implementation path:**
+
+Start by wrapping framework shell scripts from the TypeScript CLI. Do not overbuild a TypeScript service manager in the first pass.
+
+### Task 3.3: Add `mosaic agent` commands
+
+**Objective:** Provide transport-stable per-agent operations.
+
+**Files:**
+
+- Modify: Mosaic CLI entrypoint.
+- Create: `packages/mosaic/framework/tools/agent/` or reuse `tools/tmux` + `tools/fleet`.
+
+**Commands:**
+
+```bash
+mosaic agent roster [--json]
+mosaic agent status [agent] [--json]
+mosaic agent send <agent> --message "..."
+mosaic agent reset <agent> --clear|--new
+mosaic agent tail <agent> [-n 80]
+```
+
+**Reset behavior:**
+
+For tmux transport, `reset --clear` sends `/clear` then Enter through `send-message.sh`.
+
+For Claude/Pi differences, keep reset command configurable per runtime:
+
+```yaml
+runtimes:
+  claude:
+    reset_command: /clear
+  pi:
+    reset_command: /new
+```
+
+If a runtime does not support a known reset command, restart the service and send a fresh kickstart.
+
+---
+
+## Phase 4 — this-server rollout strategy
+
+### Task 4.1: Install on separate socket first
+
+**Objective:** Prove the holder pattern without disturbing existing sessions.
+
+**Commands after implementation lands locally:**
+
+```bash
+mosaic fleet init --profile minimal --write
+mosaic fleet install-systemd
+systemctl --user daemon-reload
+systemctl --user start mosaic-tmux-holder.service
+mosaic fleet verify
+```
+
+Expected:
+
+- `tmux -L mosaic-factory ls` shows `_holder`.
+- normal `tmux ls` still shows existing sessions unchanged.
+
+### Task 4.2: Start one canary agent
+
+**Objective:** Validate single-agent start/restart isolation.
+
+Use a harmless canary first, not the full fleet.
+
+Example roster addition:
+
+```yaml
+- name: canary-pi
+  runtime: pi
+  class: canary
+  working_directory: /home/jarvis/src
+```
+
+Commands:
+
+```bash
+systemctl --user start mosaic-agent@canary-pi.service
+SRV=$(tmux -L mosaic-factory display-message -p '#{pid}')
+systemctl --user restart mosaic-agent@canary-pi.service
+test "$SRV" = "$(tmux -L mosaic-factory display-message -p '#{pid}')"
+tmux -L mosaic-factory ls
+```
+
+Expected: holder PID unchanged; `_holder` remains; `canary-pi` recreated.
+
+### Task 4.3: Configure local Mosaic factory roster
+
+**Objective:** Create the actual local roster for this server after canary passes.
+
+Do not assume USC exact roster is desired here. Create a local profile such as:
+
+```text
+~/.config/mosaic/fleet/roster.yaml
+```
+
+Initial local recommendation:
+
+- `mos-claude` orchestrator
+- `coder0` / `coder1` implementers
+- `rev0` reviewer
+- `secrev0` security reviewer
+- `ultron` final/adversarial reviewer
+
+Scale to full USC-style pool only after resource/budget behavior is understood.
+
+### Task 4.4: Cut over existing ad-hoc tmux sessions only if desired
+
+**Objective:** Avoid data loss.
+
+Existing sessions on this server are not on the proposed `mosaic-factory` socket. They can remain untouched. If we later want them under Mosaic fleet control:
+
+1. list sessions;
+2. capture logs/handoffs;
+3. stop old processes intentionally;
+4. recreate as configured `mosaic-agent@...` services;
+5. verify comms and state.
+
+Do not run `tmux kill-server` on the default socket unless Jason explicitly approves that outage.
+
+---
+
+## Phase 5 — docs and AI Guide backfill
+
+### Task 5.1: Stack docs
+
+**Objective:** Document install and customization for Mosaic Stack users.
+
+**Files:**
+
+- Create: `docs/fleet/tmux-fleet.md` or `packages/mosaic/framework/tools/fleet/README.md`
+- Modify: top-level `README.md` if appropriate.
+
+Must cover:
+
+- what problem holder service solves;
+- install commands;
+- customization file;
+- example rosters;
+- reset/reuse lifecycle;
+- exact-target safety;
+- separate socket default;
+- Matrix migration path.
+
+### Task 5.2: AI Guide docs
+
+**Objective:** Keep generic guidance in AI Guide and implementation details in Stack.
+
+**Files in `mosaicstack/aiguide`:**
+
+- Update: `playbooks/tmux-fleet.md` with named socket, roster/profile, and resettable-slot pattern.
+- Add or update: `reference/agent-role-matrix.md` if PR #5 lands.
+
+Do not put Mosaic install commands as the only path in AI Guide. Present them as one implementation profile.
+
+---
+
+## Phase 6 — Matrix migration seam
+
+### Task 6.1: Add transport enum but implement tmux only
+
+**Objective:** Avoid hardcoding tmux into orchestration semantics.
+
+Roster:
+
+```yaml
+transport: tmux
+```
+
+Future:
+
+```yaml
+transport: matrix
+matrix:
+  homeserver: https://matrix.example
+  room_prefix: mosaic-factory
+```
+
+### Task 6.2: Define transport interface docs
+
+**Objective:** Make Matrix plugin work a transport swap, not a rewrite.
+
+Minimum operations:
+
+```text
+send(agent, message)
+reset(agent, mode)
+status(agent)
+tail(agent)
+listAgents()
+```
+
+Any tmux-specific concept must stay below this line.
+
+---
+
+## Acceptance criteria
+
+The implementation is complete when:
+
+- `mosaic fleet init` can write a minimal roster.
+- `mosaic fleet install-systemd` installs holder and agent units without hand editing.
+- `mosaic fleet start` starts the holder and configured agents on a named tmux socket.
+- Restarting one `mosaic-agent@name.service` does not change holder server PID or kill sibling sessions.
+- `mosaic agent send` can deliver a message to a named agent with a self-identifying preamble.
+- `mosaic agent reset` can clear/new a reusable slot and send a fresh kickstart.
+- `mosaic fleet verify` proves holder ownership, exact-target safety, and per-agent restart isolation.
+- Existing default tmux sessions on this server are not disturbed by default install.
+- Docs explain generic customization and include USC-style roster only as an example.
+- AI Guide remains generic; Mosaic Stack docs carry the concrete install path.
+
+## Risks and mitigations
+
+| Risk                                                | Mitigation                                                                        |
+| --------------------------------------------------- | --------------------------------------------------------------------------------- |
+| Killing existing tmux sessions                      | Use named `mosaic-factory` socket; no default `tmux kill-server`.                 |
+| systemd unit quoting/env expansion bugs             | Move logic into shell wrappers; verify with `systemd-analyze --user verify`.      |
+| Runtime reset command mismatch                      | Make reset command runtime-configurable; fallback to service restart + kickstart. |
+| Tool install drift                                  | Ensure npm package includes framework tmux/fleet tools; add packaging test.       |
+| Mosaic-specific assumptions leak into generic guide | Keep USC roster as example profile; AI Guide documents pattern/options.           |
+| Matrix migration blocked by tmux coupling           | Add `mosaic agent` abstraction now; keep tmux details below transport layer.      |
+
+## Suggested first PR split
+
+1. **PR A — tmux tool hardening**
+   - socket support;
+   - exact target helpers;
+   - tests/docs.
+
+2. **PR B — fleet systemd primitives**
+   - holder unit;
+   - agent unit;
+   - start-agent wrapper;
+   - install-user-units script;
+   - verify script.
+
+3. **PR C — roster and CLI**
+   - roster schema/examples;
+   - `mosaic fleet ...` commands;
+   - `mosaic agent ...` commands.
+
+4. **PR D — local rollout and docs**
+   - local roster for this server;
+   - run canary;
+   - document verification evidence;
+   - update AI Guide with generic lessons.
+
+## Immediate next action
+
+Implement PR A first. It is low-risk, improves existing tools, and is required for a safe named-socket rollout on this server.
--- a/scripts/analysis/reflect-board-history.sh
+++ b/scripts/analysis/reflect-board-history.sh
@@ -0,0 +1,111 @@
+#!/usr/bin/env bash
+# reflect-board-history.sh — Phase-0 experiment P3 (outcome detectability)
+#
+# Question: for completed tasks, how often does a machine-detectable
+# correct/wrong outcome signal appear within a follow-up window (default 30d)?
+# If the base rate is too low, predicted-vs-actual calibration (design §7) has
+# nothing to score against, so the kernel should capture caveat-notes only.
+#
+# Method: consume a board/task export (JSONL, one task object per line) OR fall
+# back to scanning the git history of a `data/` task directory. For each task
+# that reached a "done"-like state, decide whether a later signal marks it
+# correct or wrong (reopen, revert, follow-up "fix"/"regression", explicit
+# outcome field). Emit the detectable-outcome base rate. HARNESS + RUBRIC.
+#
+# Usage:
+#   scripts/analysis/reflect-board-history.sh --jsonl FILE [--window-days N] [--json|--md]
+#   scripts/analysis/reflect-board-history.sh --data-dir DIR [--window-days N] [--json|--md]
+#
+# JSONL fields used (best-effort): .id .status .completed_at .outcome
+#   .reopened_at .followups[] (free-form). Missing fields are tolerated.
+#
+# Requirements: jq (for --jsonl), git (for --data-dir), awk.
+#
+# PRE-REGISTERED KILL CONDITION:
+#   detectable-outcome base rate < 20% ⇒ do NOT build §7 calibration loop;
+#   capture caveat-notes only.
+
+set -euo pipefail
+
+JSONL=""
+DATA_DIR=""
+WINDOW_DAYS=30
+FORMAT="json"
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --jsonl) JSONL="$2"; shift 2 ;;
+    --data-dir) DATA_DIR="$2"; shift 2 ;;
+    --window-days) WINDOW_DAYS="$2"; shift 2 ;;
+    --json) FORMAT="json"; shift ;;
+    --md) FORMAT="md"; shift ;;
+    -h|--help) sed -n '2,32p' "$0"; exit 0 ;;
+    *) echo "unknown arg: $1" >&2; exit 2 ;;
+  esac
+done
+
+KILL_CONDITION='detectable-outcome base rate < 20% ⇒ do NOT build §7 calibration loop'
+echo "# pre-registered kill condition: ${KILL_CONDITION}" >&2
+
+done_total=0
+detectable=0
+
+if [[ -n "$JSONL" ]]; then
+  command -v jq >/dev/null 2>&1 || { echo "jq required for --jsonl" >&2; exit 3; }
+  [[ -r "$JSONL" ]] || { echo "cannot read $JSONL" >&2; exit 3; }
+  # Count done tasks and those with a machine-detectable outcome signal.
+  done_total="$(jq -rs '[.[] | select((.status // "") | test("done|complete|closed"; "i"))] | length' "$JSONL" 2>/dev/null || echo 0)"
+  detectable="$(jq -rs '
+    [ .[]
+      | select((.status // "") | test("done|complete|closed"; "i"))
+      | select(
+          (.outcome // null) != null
+          or (.reopened_at // null) != null
+          or ((.followups // []) | length) > 0
+        )
+    ] | length' "$JSONL" 2>/dev/null || echo 0)"
+elif [[ -n "$DATA_DIR" ]]; then
+  command -v git >/dev/null 2>&1 || { echo "git required for --data-dir" >&2; exit 3; }
+  [[ -d "$DATA_DIR" ]] || { echo "no such dir: $DATA_DIR" >&2; exit 3; }
+  # Proxy: a task file later touched by a commit whose subject signals a
+  # correction is a "detectable outcome".
+  while IFS= read -r file; do
+    [[ -z "$file" ]] && continue
+    done_total=$((done_total + 1))
+    if git -C "$DATA_DIR" log --since="${WINDOW_DAYS} days ago" --pretty='%s' -- "$file" 2>/dev/null \
+         | grep -qiE 'reopen|revert|fix|regression|wrong|incorrect|redo'; then
+      detectable=$((detectable + 1))
+    fi
+  done < <(find "$DATA_DIR" -type f -name '*.json' 2>/dev/null)
+else
+  echo "provide --jsonl FILE or --data-dir DIR" >&2
+  exit 2
+fi
+
+rate="$(awk "BEGIN{ if ($done_total==0) print \"0.0\"; else printf \"%.1f\", 100*$detectable/$done_total }")"
+verdict="$(awk "BEGIN{print ($rate < 20.0) ? \"KILL §7 — caveat-notes only\" : \"signal present — proceed\"}")"
+
+if [[ "$FORMAT" == "md" ]]; then
+  cat <<EOF
+## P3 — outcome detectability
+
+- done-like tasks: **${done_total}**
+- with machine-detectable outcome (window ${WINDOW_DAYS}d): **${detectable}**
+- base rate: **${rate}%**
+- kill condition: ${KILL_CONDITION}
+- verdict: **${verdict}**
+EOF
+else
+  awk -v dt="$done_total" -v d="$detectable" -v r="$rate" -v w="$WINDOW_DAYS" \
+      -v v="$verdict" -v kc="$KILL_CONDITION" 'BEGIN{
+    printf "{\n"
+    printf "  \"experiment\": \"P3-board-history\",\n"
+    printf "  \"window_days\": %d,\n", w
+    printf "  \"done_tasks\": %d,\n", dt
+    printf "  \"detectable_outcomes\": %d,\n", d
+    printf "  \"base_rate_pct\": %s,\n", r
+    printf "  \"kill_condition\": \"%s\",\n", kc
+    printf "  \"verdict\": \"%s\"\n", v
+    printf "}\n"
+  }'
+fi
--- a/scripts/analysis/reflect-calibration.sh
+++ b/scripts/analysis/reflect-calibration.sh
@@ -0,0 +1,117 @@
+#!/usr/bin/env bash
+# reflect-calibration.sh — Phase-0 experiment P1 (confidence signal)
+#
+# Question: does an agent's self-reported confidence discriminate correct from
+# incorrect work — especially on the self-rated-HIGH subset, where a closed
+# loop would actually trust it? If confidence ≈ chance on the high subset, the
+# signal is useless and design §7–§8 should not be built.
+#
+# Method: consume a labelled corpus — JSONL of {confidence: 0..1, correct:
+# true|false}. Compute discrimination as ROC AUC over all rows, plus the
+# correct-rate (lift) on the high-confidence subset (>= threshold), and compare
+# to the pre-registered chance baseline (the overall correct-rate). HARNESS +
+# RUBRIC; the labelled corpus is supplied later.
+#
+# Usage:
+#   scripts/analysis/reflect-calibration.sh --jsonl FILE [--high 0.8] [--json|--md]
+#
+# Requirements: jq, awk.
+#
+# PRE-REGISTERED KILL CONDITION:
+#   AUC <= 0.60 OR high-subset lift <= +5pp over base rate
+#   ⇒ confidence is not a usable routing signal; do NOT build §7–§8.
+
+set -euo pipefail
+
+JSONL=""
+HIGH=0.8
+FORMAT="json"
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --jsonl) JSONL="$2"; shift 2 ;;
+    --high) HIGH="$2"; shift 2 ;;
+    --json) FORMAT="json"; shift ;;
+    --md) FORMAT="md"; shift ;;
+    -h|--help) sed -n '2,27p' "$0"; exit 0 ;;
+    *) echo "unknown arg: $1" >&2; exit 2 ;;
+  esac
+done
+
+KILL_CONDITION='AUC <= 0.60 OR high-subset lift <= +5pp ⇒ do NOT build §7–§8'
+echo "# pre-registered kill condition: ${KILL_CONDITION}" >&2
+
+command -v jq >/dev/null 2>&1 || { echo "jq required" >&2; exit 3; }
+[[ -r "$JSONL" ]] || { echo "provide a readable --jsonl FILE" >&2; exit 2; }
+
+# Normalise to "<confidence> <0|1>" rows; tolerate bad lines.
+ROWS="$(jq -rs '
+  [ .[] | select((.confidence|type)=="number") |
+    "\(.confidence) \((.correct==true) | if . then 1 else 0 end)" ]
+  | .[]' "$JSONL" 2>/dev/null || true)"
+
+if [[ -z "$ROWS" ]]; then
+  echo '{ "experiment": "P1-calibration", "error": "no usable rows" }'
+  exit 0
+fi
+
+# AUC via the Mann–Whitney U relation (rank-based); base rate; high-subset lift.
+read -r N POS BASE AUC HIGH_N HIGH_CORRECT HIGH_RATE LIFT <<EOF
+$(printf '%s\n' "$ROWS" | awk -v high="$HIGH" '
+  { c=$1; y=$2; conf[NR]=c; lab[NR]=y; n++;
+    if (y==1) pos++; else neg++;
+    if (c>=high) { hn++; if (y==1) hc++ } }
+  END{
+    base = (n>0)? pos/n : 0;
+    # Rank-sum AUC: average ranks (ties → average rank).
+    # sort indices by confidence
+    for (i=1;i<=n;i++) idx[i]=i;
+    for (i=1;i<=n;i++) for (j=i+1;j<=n;j++) if (conf[idx[i]]>conf[idx[j]]) { t=idx[i]; idx[i]=idx[j]; idx[j]=t }
+    i=1;
+    while (i<=n) {
+      j=i; while (j<n && conf[idx[j+1]]==conf[idx[i]]) j++;
+      avg=(i+j)/2.0;
+      for (k=i;k<=j;k++) rank[idx[k]]=avg;
+      i=j+1;
+    }
+    rsum=0; for (i=1;i<=n;i++) if (lab[i]==1) rsum+=rank[i];
+    if (pos>0 && neg>0) auc=(rsum - pos*(pos+1)/2.0)/(pos*neg); else auc=0.5;
+    hrate=(hn>0)? hc/hn : 0;
+    lift=hrate-base;
+    printf "%d %d %.4f %.4f %d %d %.4f %.4f", n, pos, base, auc, hn, hc, hrate, lift
+  }')
+EOF
+
+verdict="$(awk -v auc="$AUC" -v lift="$LIFT" 'BEGIN{
+  print (auc <= 0.60 || lift <= 0.05) ? "KILL §7–§8 — confidence not usable" : "signal present — proceed"
+}')"
+
+if [[ "$FORMAT" == "md" ]]; then
+  cat <<EOF
+## P1 — confidence calibration
+
+- rows: **${N}** (positives ${POS}) · base correct-rate **$(awk "BEGIN{printf \"%.1f\", 100*${BASE}}")%**
+- ROC AUC: **${AUC}**
+- high-confidence subset (>= ${HIGH}): n=${HIGH_N}, correct=${HIGH_CORRECT}, rate=$(awk "BEGIN{printf \"%.1f\", 100*${HIGH_RATE}}")%
+- lift over base: **$(awk "BEGIN{printf \"%+.1f\", 100*${LIFT}}")pp**
+- kill condition: ${KILL_CONDITION}
+- verdict: **${verdict}**
+EOF
+else
+  awk -v n="$N" -v pos="$POS" -v base="$BASE" -v auc="$AUC" -v hn="$HIGH_N" \
+      -v hc="$HIGH_CORRECT" -v hr="$HIGH_RATE" -v lift="$LIFT" -v high="$HIGH" \
+      -v v="$verdict" -v kc="$KILL_CONDITION" 'BEGIN{
+    printf "{\n"
+    printf "  \"experiment\": \"P1-calibration\",\n"
+    printf "  \"rows\": %d,\n", n
+    printf "  \"positives\": %d,\n", pos
+    printf "  \"base_rate\": %.4f,\n", base
+    printf "  \"auc\": %.4f,\n", auc
+    printf "  \"high_threshold\": %s,\n", high
+    printf "  \"high_subset\": { \"n\": %d, \"correct\": %d, \"rate\": %.4f },\n", hn, hc, hr
+    printf "  \"lift_over_base\": %.4f,\n", lift
+    printf "  \"kill_condition\": \"%s\",\n", kc
+    printf "  \"verdict\": \"%s\"\n", v
+    printf "}\n"
+  }'
+fi
--- a/scripts/analysis/reflect-git-history.sh
+++ b/scripts/analysis/reflect-git-history.sh
@@ -0,0 +1,110 @@
+#!/usr/bin/env bash
+# reflect-git-history.sh — Phase-0 experiment P2 ("only-self-reflection" bucket)
+#
+# Question: of the failures visible in git history, what fraction would ONLY
+# have been caught by end-of-run self-reflection — i.e. NOT by CI and NOT by
+# independent human review? If that bucket is near-empty, the closed
+# calibration / skill-synthesis loop (design §7–§8) is not worth building.
+#
+# Method: scan `git log` over a window for failure signals (reverts, and
+# fix:/hotfix commits landing shortly after a feature merge). Classify each by
+# the gate most likely to have caught it, using a pre-registered heuristic.
+# This is a HARNESS + RUBRIC; the classifier is deliberately simple and the
+# real corpus/labelling is wired later. It emits a structured tally.
+#
+# Usage:
+#   scripts/analysis/reflect-git-history.sh [--repo PATH] [--since SINCE] [--json|--md]
+#
+# Options:
+#   --repo PATH   repo to analyse (default: current repo)
+#   --since SINCE git log --since value (default: "6 months ago")
+#   --json        emit JSON (default)
+#   --md          emit markdown
+#
+# Requirements: git, awk.
+#
+# PRE-REGISTERED KILL CONDITION:
+#   bucket "only_self_reflection" is near-empty (< 10% of classified failures)
+#   ⇒ do NOT build design §7–§8 (closed loop). Caveat-notes capture only.
+
+set -euo pipefail
+
+REPO="."
+SINCE="6 months ago"
+FORMAT="json"
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --repo) REPO="$2"; shift 2 ;;
+    --since) SINCE="$2"; shift 2 ;;
+    --json) FORMAT="json"; shift ;;
+    --md) FORMAT="md"; shift ;;
+    -h|--help) sed -n '2,30p' "$0"; exit 0 ;;
+    *) echo "unknown arg: $1" >&2; exit 2 ;;
+  esac
+done
+
+KILL_CONDITION='bucket only_self_reflection < 10% of classified failures ⇒ do NOT build §7–§8'
+echo "# pre-registered kill condition: ${KILL_CONDITION}" >&2
+
+command -v git >/dev/null 2>&1 || { echo "git required" >&2; exit 3; }
+
+# Collect candidate failure commits: reverts + fix/hotfix subjects.
+mapfile -t LINES < <(
+  git -C "$REPO" log --since="$SINCE" --pretty='%H%x09%s' 2>/dev/null \
+    | grep -iE 'revert|hotfix|hot-fix|regression|fix(\(|:|!| )' || true
+)
+
+total=0; ci=0; human=0; selfonly=0
+for line in "${LINES[@]}"; do
+  [[ -z "$line" ]] && continue
+  subj="${line#*$'\t'}"
+  total=$((total + 1))
+  # Pre-registered classification heuristic (gate most likely to have caught it):
+  #   - build/test/lint/type/ci signals → CI would have caught it
+  #   - security/auth/permission/data/migration → human review would flag it
+  #   - everything else (logic/UX/assumption/edge) → only-self-reflection bucket
+  if printf '%s' "$subj" | grep -qiE 'test|lint|type|build|ci|compile|typo'; then
+    ci=$((ci + 1))
+  elif printf '%s' "$subj" | grep -qiE 'security|auth|permission|rbac|secret|migration|data|sql|injection'; then
+    human=$((human + 1))
+  else
+    selfonly=$((selfonly + 1))
+  fi
+done
+
+pct() { awk "BEGIN{ if ($2==0) print \"0.0\"; else printf \"%.1f\", 100*$1/$2 }"; }
+self_pct="$(pct "$selfonly" "$total")"
+verdict="$(awk "BEGIN{print ($self_pct < 10.0) ? \"KILL §7–§8\" : \"signal present — proceed to deeper labelling\"}")"
+
+if [[ "$FORMAT" == "md" ]]; then
+  cat <<EOF
+## P2 — git-history failure-gate attribution
+
+- window: \`${SINCE}\` · repo: \`${REPO}\`
+- classified failures: **${total}**
+
+| gate | count | share |
+|---|---:|---:|
+| CI would catch | ${ci} | $(pct "$ci" "$total")% |
+| human review would catch | ${human} | $(pct "$human" "$total")% |
+| only-self-reflection | ${selfonly} | ${self_pct}% |
+
+- kill condition: ${KILL_CONDITION}
+- verdict: **${verdict}**
+EOF
+else
+  awk -v t="$total" -v c="$ci" -v h="$human" -v s="$selfonly" -v sp="$self_pct" \
+      -v v="$verdict" -v since="$SINCE" -v repo="$REPO" -v kc="$KILL_CONDITION" 'BEGIN{
+    printf "{\n"
+    printf "  \"experiment\": \"P2-git-history\",\n"
+    printf "  \"repo\": \"%s\",\n", repo
+    printf "  \"since\": \"%s\",\n", since
+    printf "  \"classified_failures\": %d,\n", t
+    printf "  \"buckets\": { \"ci\": %d, \"human_review\": %d, \"only_self_reflection\": %d },\n", c, h, s
+    printf "  \"only_self_reflection_pct\": %s,\n", sp
+    printf "  \"kill_condition\": \"%s\",\n", kc
+    printf "  \"verdict\": \"%s\"\n", v
+    printf "}\n"
+  }'
+fi
Author	SHA1	Message	Date
Jarvis	757f5e6998	feat(fleet): add durable tmux fleet poc All checks were successful ci/woodpecker/push/ci Pipeline was successful Details ci/woodpecker/pr/ci Pipeline was successful Details	2026-06-19 15:50:35 -05:00
Jarvis	250d3da12d	docs: plan durable tmux fleet install Some checks failed ci/woodpecker/push/ci Pipeline was canceled Details ci/woodpecker/pr/ci Pipeline was canceled Details	2026-06-19 15:10:36 -05:00
jason.woltje	87f561c1f8	fix(launch): include Pi native skill roots in 'all' mode; dedup 'discover' force-loads (#556 ) All checks were successful ci/woodpecker/push/ci Pipeline was successful Details ci/woodpecker/push/publish Pipeline was successful Details	2026-06-19 19:58:09 +00:00
jason.woltje	8c45857859	feat(launch): force-load fleet-critical Pi skills + reconcile skill docs (#555 ) All checks were successful ci/woodpecker/push/ci Pipeline was successful Details ci/woodpecker/push/publish Pipeline was successful Details	2026-06-19 18:31:02 +00:00
jason.woltje	605221d42f	docs(framework/tools): lead TOOLS.md with high-salience fleet-tools cheatsheet (#554 ) Some checks failed ci/woodpecker/push/ci Pipeline was successful Details ci/woodpecker/push/publish Pipeline was canceled Details	2026-06-19 18:03:03 +00:00
jason.woltje	ee584ab48c	fix(framework/tools): prettier-format woodpecker README — restore main format gate (#553 ) All checks were successful ci/woodpecker/push/ci Pipeline was successful Details ci/woodpecker/push/publish Pipeline was successful Details	2026-06-18 22:39:35 +00:00
jason.woltje	ab4e138003	feat(framework/tools): orchestration helpers — lane-brief.sh + ci-wait.sh (#547 ) Some checks failed ci/woodpecker/push/ci Pipeline failed Details ci/woodpecker/push/publish Pipeline was canceled Details	2026-06-18 22:08:40 +00:00
jason.woltje	719c6ac3db	fix(framework/tools): eval injection, broken JSON, tmpfile leak (#549 ) Some checks failed ci/woodpecker/push/ci Pipeline was successful Details ci/woodpecker/push/publish Pipeline was canceled Details	2026-06-18 21:35:32 +00:00
jason.woltje	b8807e60df	feat(agent-reflection): durable kernel — reflection.v1 capture + risk-floor + Phase-0 (#545 ) All checks were successful ci/woodpecker/push/ci Pipeline was successful Details ci/woodpecker/push/publish Pipeline was successful Details	2026-06-16 21:35:40 +00:00
jason.woltje	c461380a4a	feat(mosaic-as): agent registration + scoped/revocable tokens (US-007) (#541 ) All checks were successful ci/woodpecker/push/ci Pipeline was successful Details ci/woodpecker/push/publish Pipeline was successful Details	2026-06-16 01:10:44 +00:00