docs(framework): canonize merge-authority policy (hard gate 13 + E2E gate note)

Coordinated work: post-review merge go-ahead belongs to the active coordinator/orchestrator session; solo delivery merges without routine confirmation as before. 'No self-merge' means no UNREVIEWED self-merge. Previously this policy existed only as per-host local patches to the preserved ~/.config/mosaic/AGENTS.md (web1 + sb-it-mgr-0-lt rule 38) and was lost from E2E-DELIVERY.md on every framework sync. Shipping it in defaults/AGENTS.md + guides/E2E-DELIVERY.md makes it permanent for fresh installs and upgrades. Policy: Jason, 2026-06-11. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
release(mosaic): bump @mosaicstack/mosaic 0.0.30 -> 0.0.31 (#534 )
2026-06-11 18:41:59 -05:00 · 2026-06-11 19:55:43 +00:00 · 2026-06-11 19:50:55 +00:00 · 2026-06-11 19:07:00 +00:00 · 2026-06-11 19:06:35 +00:00 · 2026-06-11 18:10:42 +00:00
152 changed files with 26728 additions and 964 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -9,3 +9,6 @@ coverage
 *.tsbuildinfo
 .pnpm-store
 docs/reports/
 # Step-CA dev password — real file is gitignored; commit only the .example
 infra/step-ca/dev-password
--- a/.woodpecker/ci.yml
+++ b/.woodpecker/ci.yml
@@ -46,18 +46,28 @@ steps:
  test:
    image: *node_image
    environment:
-      DATABASE_URL: postgresql://mosaic:mosaic@postgres:5432/mosaic
+      # Avoid the namespace-level Woodpecker DB service named "postgres".
      # The Kubernetes backend exposes service containers by step name.
      DATABASE_URL: postgresql://mosaic:mosaic@ci-postgres:5432/mosaic
    commands:
      - *enable_pnpm
      # Install postgresql-client for pg_isready
      - apk add --no-cache postgresql-client
-      # Wait up to 30s for postgres to be ready
+      # Wait up to 60s for CI postgres to be ready; fail fast if it never comes up.
      - |
-        for i in $(seq 1 30); do
+        ready=0
-          pg_isready -h postgres -p 5432 -U mosaic && break
+        for i in $(seq 1 60); do
-          echo "Waiting for postgres ($i/30)..."
+          if pg_isready -h ci-postgres -p 5432 -U mosaic; then
            ready=1
            break
          fi
          echo "Waiting for ci-postgres ($i/60)..."
          sleep 1
        done
        if [ "$ready" -ne 1 ]; then
          echo "ci-postgres did not become ready" >&2
          exit 1
        fi
      # Run migrations (DATABASE_URL is set in environment above)
      - pnpm --filter @mosaicstack/db run db:migrate
      # Run all tests
@@ -66,7 +76,7 @@ steps:
      - typecheck
 services:
-  postgres:
+  ci-postgres:
    image: pgvector/pgvector:pg17
    environment:
      POSTGRES_USER: mosaic
--- a/.woodpecker/publish.yml
+++ b/.woodpecker/publish.yml
@@ -114,6 +114,31 @@ steps:
    depends_on:
      - build
  build-appservice:
    image: gcr.io/kaniko-project/executor:debug
    environment:
      REGISTRY_USER:
        from_secret: gitea_username
      REGISTRY_PASS:
        from_secret: gitea_password
      CI_COMMIT_BRANCH: ${CI_COMMIT_BRANCH}
      CI_COMMIT_TAG: ${CI_COMMIT_TAG}
      CI_COMMIT_SHA: ${CI_COMMIT_SHA}
    commands:
      - mkdir -p /kaniko/.docker
      - echo "{\"auths\":{\"git.mosaicstack.dev\":{\"username\":\"$REGISTRY_USER\",\"password\":\"$REGISTRY_PASS\"}}}" > /kaniko/.docker/config.json
      - |
        DESTINATIONS="--destination git.mosaicstack.dev/mosaicstack/stack/appservice:sha-${CI_COMMIT_SHA:0:7}"
        if [ "$CI_COMMIT_BRANCH" = "main" ]; then
          DESTINATIONS="$DESTINATIONS --destination git.mosaicstack.dev/mosaicstack/stack/appservice:latest"
        fi
        if [ -n "$CI_COMMIT_TAG" ]; then
          DESTINATIONS="$DESTINATIONS --destination git.mosaicstack.dev/mosaicstack/stack/appservice:$CI_COMMIT_TAG"
        fi
        /kaniko/executor --context . --dockerfile docker/appservice.Dockerfile $DESTINATIONS
    depends_on:
      - build
  build-web:
    image: gcr.io/kaniko-project/executor:debug
    environment:
--- a/README.md
+++ b/README.md
@@ -58,6 +58,8 @@ mosaic yolo pi               # Pi in yolo mode
 The launcher verifies your config, checks for `SOUL.md`, injects your `AGENTS.md` standards into the runtime, and forwards all arguments.
 Pi launches default to a token-lean skill posture: `mosaic pi` passes `--no-skills` so Pi does not preload every global skill description into the system prompt. Use `MOSAIC_PI_SKILL_MODE=all mosaic pi` for the legacy all-skills catalog, or `MOSAIC_PI_SKILL_MODE=discover mosaic pi` to let Pi use its native settings/project skill discovery.
 ### TUI & Gateway
 ```bash
--- a/apps/appservice/package.json
+++ b/apps/appservice/package.json
@@ -0,0 +1,35 @@
 {
  "name": "@mosaicstack/mosaic-as",
  "version": "0.0.1",
  "type": "module",
  "private": true,
  "repository": {
    "type": "git",
    "url": "https://git.mosaicstack.dev/mosaicstack/stack.git",
    "directory": "apps/appservice"
  },
  "main": "dist/main.js",
  "bin": {
    "mosaic-as": "dist/main.js",
    "mosaic-as-registration": "dist/registration-main.js"
  },
  "scripts": {
    "build": "tsc",
    "lint": "eslint src",
    "typecheck": "tsc --noEmit",
    "test": "vitest run --passWithNoTests",
    "dev": "tsx watch src/main.ts"
  },
  "dependencies": {
    "@mosaicstack/appservice": "workspace:*"
  },
  "devDependencies": {
    "@types/node": "^22.0.0",
    "tsx": "^4.19.0",
    "typescript": "^5.8.0",
    "vitest": "^2.0.0"
  },
  "files": [
    "dist"
  ]
 }
--- a/apps/appservice/src/tests/server.test.ts
+++ b/apps/appservice/src/tests/server.test.ts
@@ -0,0 +1,243 @@
 import { describe, expect, it, vi } from 'vitest';
 import { AppserviceDaemon } from '../server.js';
 import type { DaemonConfig, DaemonRequest } from '../server.js';
 const cfg: DaemonConfig = {
  homeserverUrl: 'https://hs.example',
  domain: 'hs.example',
  asToken: 'as-secret',
  hsToken: 'hs-secret',
  bridgeTokens: ['bridge-secret'],
 };
 const jsonResponse = (status: number, body: unknown): Response =>
  new Response(JSON.stringify(body), { status, headers: { 'Content-Type': 'application/json' } });
 const request = (overrides: Partial<DaemonRequest>): DaemonRequest => ({
  method: 'GET',
  path: '/',
  searchParams: new URLSearchParams(),
  body: undefined,
  ...overrides,
 });
 const makeDaemon = () => {
  const fetchMock = vi.fn(async (_input: URL | string) => jsonResponse(200, { event_id: '$sent' }));
  const daemon = new AppserviceDaemon(cfg, fetchMock as unknown as typeof fetch, () => {});
  return { daemon, fetchMock };
 };
 describe('AppserviceDaemon routing', () => {
  it('serves health unauthenticated', async () => {
    const { daemon } = makeDaemon();
    expect((await daemon.handle(request({ path: '/health' }))).status).toBe(200);
  });
  it('404s unknown paths', async () => {
    const { daemon } = makeDaemon();
    expect((await daemon.handle(request({ path: '/nope' }))).status).toBe(404);
  });
  it('transactions require the hs_token', async () => {
    const { daemon } = makeDaemon();
    const bad = await daemon.handle(
      request({
        method: 'PUT',
        path: '/_matrix/app/v1/transactions/t1',
        authorizationHeader: 'Bearer wrong',
        body: { events: [] },
      }),
    );
    expect(bad.status).toBe(403);
    const ok = await daemon.handle(
      request({
        method: 'PUT',
        path: '/_matrix/app/v1/transactions/t1',
        authorizationHeader: 'Bearer hs-secret',
        body: { events: [{ type: 'm.room.message', event_id: '$e' }] },
      }),
    );
    expect(ok.status).toBe(200);
  });
  it('bridge requires a bridge token (hs/as tokens do not work)', async () => {
    const { daemon } = makeDaemon();
    for (const token of [undefined, 'Bearer hs-secret', 'Bearer as-secret', 'Bearer nope']) {
      const res = await daemon.handle(
        request({
          method: 'POST',
          path: '/bridge/v1/messages',
          authorizationHeader: token,
          body: {},
        }),
      );
      expect(res.status).toBe(403);
    }
  });
  it('bridge message sends as the agent and returns the event id', async () => {
    const { daemon, fetchMock } = makeDaemon();
    const res = await daemon.handle(
      request({
        method: 'POST',
        path: '/bridge/v1/messages',
        authorizationHeader: 'Bearer bridge-secret',
        body: { room_id: '!r:hs.example', agent: 'pi0-web1', body: 'hi', thread_root: '$req' },
      }),
    );
    expect(res.status).toBe(200);
    expect(res.body.event_id).toBe('$sent');
    const sendCall = fetchMock.mock.calls
      .map((c) => new URL(String(c[0])))
      .find((u) => u.pathname.includes('/send/m.room.message/'));
    expect(sendCall).toBeDefined();
    expect(sendCall!.searchParams.get('user_id')).toBe('@agent-pi0-web1:hs.example');
  });
  it('bridge rejects invalid payloads with 400', async () => {
    const { daemon } = makeDaemon();
    const res = await daemon.handle(
      request({
        method: 'POST',
        path: '/bridge/v1/messages',
        authorizationHeader: 'Bearer bridge-secret',
        body: { room_id: 'bad', agent: 'pi0', body: 'x' },
      }),
    );
    expect(res.status).toBe(400);
  });
  it('bridge typing endpoint works', async () => {
    const { daemon, fetchMock } = makeDaemon();
    const res = await daemon.handle(
      request({
        method: 'POST',
        path: '/bridge/v1/typing',
        authorizationHeader: 'Bearer bridge-secret',
        body: { room_id: '!r:hs.example', agent: 'pi0-web1', typing: true },
      }),
    );
    expect(res.status).toBe(200);
    const typingCall = fetchMock.mock.calls
      .map((c) => new URL(String(c[0])))
      .find((u) => u.pathname.includes('/typing/'));
    expect(typingCall).toBeDefined();
  });
  it('authenticated unknown bridge sub-paths return 405, never fall through', async () => {
    const { daemon } = makeDaemon();
    const res = await daemon.handle(
      request({
        method: 'GET',
        path: '/bridge/v1/unknown',
        authorizationHeader: 'Bearer bridge-secret',
      }),
    );
    expect(res.status).toBe(405);
  });
  it('provisions a room as the AS sender with space linking', async () => {
    const calls: Array<{ url: URL; body: unknown }> = [];
    const fetchMock = vi.fn(async (input: URL | string, init?: RequestInit) => {
      const url = new URL(String(input));
      calls.push({ url, body: init?.body ? JSON.parse(String(init.body)) : undefined });
      if (url.pathname.endsWith('/createRoom'))
        return jsonResponse(200, { room_id: '!new:hs.example' });
      return jsonResponse(200, {});
    });
    const daemon = new AppserviceDaemon(cfg, fetchMock as unknown as typeof fetch, () => {});
    const res = await daemon.handle(
      request({
        method: 'POST',
        path: '/bridge/v1/provision/rooms',
        authorizationHeader: 'Bearer bridge-secret',
        body: {
          name: 'proj-x',
          alias: 'mosaic-proj-x',
          invite: ['@jason.woltje:hs.example'],
          space_id: '!space:hs.example',
        },
      }),
    );
    expect(res.status).toBe(200);
    expect(res.body.room_id).toBe('!new:hs.example');
    expect(res.body.space_linked).toBe(true);
    const create = calls.find((c) => c.url.pathname.endsWith('/createRoom'));
    expect(create!.url.searchParams.get('user_id')).toBe('@mosaic-as:hs.example');
    const body = create!.body as Record<string, unknown>;
    expect(body.room_alias_name).toBe('mosaic-proj-x');
    expect((body.power_level_content_override as Record<string, unknown>).users).toEqual({
      '@mosaic-as:hs.example': 100,
    });
    expect(calls.some((c) => c.url.pathname.includes('/state/m.space.child/'))).toBe(true);
    expect(calls.some((c) => c.url.pathname.includes('/state/m.space.parent/'))).toBe(true);
  });
  it('space-link failure still returns the room id (no orphan)', async () => {
    const fetchMock = vi.fn(async (input: URL | string) => {
      const url = new URL(String(input));
      if (url.pathname.endsWith('/createRoom'))
        return jsonResponse(200, { room_id: '!new:hs.example' });
      if (url.pathname.includes('/state/m.space.child/'))
        return jsonResponse(403, { errcode: 'M_FORBIDDEN', error: 'no PL in space' });
      return jsonResponse(200, {});
    });
    const daemon = new AppserviceDaemon(cfg, fetchMock as unknown as typeof fetch, () => {});
    const res = await daemon.handle(
      request({
        method: 'POST',
        path: '/bridge/v1/provision/rooms',
        authorizationHeader: 'Bearer bridge-secret',
        body: { name: 'proj-x', space_id: '!space:hs.example' },
      }),
    );
    expect(res.status).toBe(200);
    expect(res.body.room_id).toBe('!new:hs.example');
    expect(res.body.space_linked).toBe(false);
    expect(String(res.body.space_error)).toContain('403');
  });
  it('invite list cap enforced', async () => {
    const { daemon } = makeDaemon();
    const res = await daemon.handle(
      request({
        method: 'POST',
        path: '/bridge/v1/provision/rooms',
        authorizationHeader: 'Bearer bridge-secret',
        body: { name: 'x', invite: Array.from({ length: 51 }, (_, i) => `@u${i}:hs`) },
      }),
    );
    expect(res.status).toBe(400);
  });
  it('provision rejects bad payloads and requires auth', async () => {
    const { daemon } = makeDaemon();
    const noAuth = await daemon.handle(
      request({ method: 'POST', path: '/bridge/v1/provision/rooms', body: { name: 'x' } }),
    );
    expect(noAuth.status).toBe(403);
    const bad = await daemon.handle(
      request({
        method: 'POST',
        path: '/bridge/v1/provision/rooms',
        authorizationHeader: 'Bearer bridge-secret',
        body: { name: '', alias: 'BAD ALIAS' },
      }),
    );
    expect(bad.status).toBe(400);
  });
  it('empty bridge token list denies everything', async () => {
    const daemon = new AppserviceDaemon({ ...cfg, bridgeTokens: [] }, undefined, () => {});
    const res = await daemon.handle(
      request({
        method: 'POST',
        path: '/bridge/v1/typing',
        authorizationHeader: 'Bearer bridge-secret',
        body: {},
      }),
    );
    expect(res.status).toBe(403);
  });
 });
--- a/apps/appservice/src/config.ts
+++ b/apps/appservice/src/config.ts
@@ -0,0 +1,23 @@
 import type { DaemonConfig } from './server.js';
 const required = (name: string): string => {
  const value = process.env[name];
  if (!value) throw new Error(`missing required env var ${name}`);
  return value;
 };
 export function configFromEnv(): DaemonConfig & { port: number } {
  return {
    homeserverUrl: required('MOSAIC_AS_HOMESERVER_URL'),
    domain: required('MOSAIC_AS_DOMAIN'),
    asToken: required('MOSAIC_AS_TOKEN'),
    hsToken: required('MOSAIC_HS_TOKEN'),
    userPrefix: process.env.MOSAIC_AS_USER_PREFIX ?? 'agent-',
    senderLocalpart: process.env.MOSAIC_AS_SENDER_LOCALPART ?? 'mosaic-as',
    bridgeTokens: (process.env.MOSAIC_AS_BRIDGE_TOKENS ?? '')
      .split(',')
      .map((t) => t.trim())
      .filter(Boolean),
    port: Number(process.env.MOSAIC_AS_PORT ?? 8008),
  };
 }
--- a/apps/appservice/src/main.ts
+++ b/apps/appservice/src/main.ts
@@ -0,0 +1,67 @@
 import http from 'node:http';
 import { configFromEnv } from './config.js';
 import { AppserviceDaemon } from './server.js';
 const cfg = configFromEnv();
 const daemon = new AppserviceDaemon(cfg);
 const MAX_BODY_BYTES = 1024 * 1024;
 const server = http.createServer((req, res) => {
  const chunks: Buffer[] = [];
  let received = 0;
  let rejected = false;
  req.on('data', (chunk: Buffer) => {
    received += chunk.length;
    if (received > MAX_BODY_BYTES) {
      rejected = true;
      res.writeHead(413, { 'Content-Type': 'application/json' });
      res.end(JSON.stringify({ errcode: 'M_TOO_LARGE', error: 'request body too large' }));
      req.destroy();
      return;
    }
    chunks.push(chunk);
  });
  req.on('end', () => {
    if (rejected) return;
    void (async () => {
      const url = new URL(req.url ?? '/', 'http://localhost');
      let body: unknown;
      try {
        const raw = Buffer.concat(chunks).toString();
        body = raw ? JSON.parse(raw) : undefined;
      } catch {
        res.writeHead(400, { 'Content-Type': 'application/json' });
        res.end(JSON.stringify({ errcode: 'M_NOT_JSON', error: 'invalid json' }));
        return;
      }
      const result = await daemon.handle({
        method: req.method ?? 'GET',
        path: url.pathname,
        searchParams: url.searchParams,
        authorizationHeader: req.headers.authorization,
        body,
      });
      res.writeHead(result.status, { 'Content-Type': 'application/json' });
      res.end(JSON.stringify(result.body));
    })().catch((error: unknown) => {
      console.error('request failed:', error);
      if (res.headersSent) {
        res.destroy();
        return;
      }
      res.writeHead(500, { 'Content-Type': 'application/json' });
      res.end(JSON.stringify({ error: 'internal error' }));
    });
  });
 });
 server.listen(cfg.port, () => {
  console.log(
    `mosaic-as listening on :${cfg.port} (homeserver ${cfg.homeserverUrl}, domain ${cfg.domain})`,
  );
  if (cfg.bridgeTokens.length === 0) {
    console.warn('WARNING: MOSAIC_AS_BRIDGE_TOKENS is empty — bridge API will deny all requests');
  }
 });
--- a/apps/appservice/src/registration-main.ts
+++ b/apps/appservice/src/registration-main.ts
@@ -0,0 +1,10 @@
 import { buildRegistration, registrationToYaml } from '@mosaicstack/appservice';
 import { configFromEnv } from './config.js';
 // Prints the Synapse registration YAML (mosaic-as.yaml) for the current env.
 // Usage: MOSAIC_AS_URL=http://mosaic-as:8008 mosaic-as-registration > mosaic-as.yaml
 const cfg = configFromEnv();
 const url = process.env.MOSAIC_AS_URL;
 if (!url) throw new Error('missing required env var MOSAIC_AS_URL');
 process.stdout.write(registrationToYaml(buildRegistration(cfg, { url })));
--- a/apps/appservice/src/server.ts
+++ b/apps/appservice/src/server.ts
@@ -0,0 +1,146 @@
 import { createHmac, randomBytes, timingSafeEqual } from 'node:crypto';
 import {
  AppserviceIntent,
  TransactionHandler,
  validateBridgeMessage,
  validateBridgeTyping,
  validateProvisionRoom,
 } from '@mosaicstack/appservice';
 import type { AppserviceConfig, MatrixEvent } from '@mosaicstack/appservice';
 export interface DaemonConfig extends AppserviceConfig {
  /** Bearer tokens accepted on /bridge/v1/* (one per agent-comms host daemon). */
  bridgeTokens: string[];
 }
 export interface DaemonRequest {
  method: string;
  /** URL path without query string. */
  path: string;
  searchParams: URLSearchParams;
  authorizationHeader?: string;
  body: unknown;
 }
 export interface DaemonResponse {
  status: number;
  body: Record<string, unknown>;
 }
 // Compare equal-length HMAC digests so neither content nor LENGTH of the
 // stored secret is observable through timing.
 const HMAC_KEY = randomBytes(32);
 const digest = (value: string): Buffer => createHmac('sha256', HMAC_KEY).update(value).digest();
 const safeEqual = (a: string, b: string): boolean => timingSafeEqual(digest(a), digest(b));
 const TXN_PATH = /^\/_matrix\/app\/v1\/transactions\/([^/]+)$/;
 /**
 * HTTP-framework-agnostic request router for the mosaic-as daemon: the
 * Application Service transactions endpoint (Synapse-facing) plus the
 * internal bridge API v1 (agent-comms daemon-facing). main.ts binds this to
 * node:http; tests drive it directly.
 */
 export class AppserviceDaemon {
  readonly intent: AppserviceIntent;
  private readonly transactions: TransactionHandler;
  constructor(
    private readonly cfg: DaemonConfig,
    fetchImpl?: typeof fetch,
    private readonly log: (line: string) => void = (line) => console.log(line),
  ) {
    this.intent = new AppserviceIntent(cfg, fetchImpl);
    this.transactions = new TransactionHandler({
      hsToken: cfg.hsToken,
      onEvent: (event) => this.onEvent(event),
      onError: (error, txnId) => this.log(`txn ${txnId} handler error: ${String(error)}`),
    });
  }
  /** v1: the daemon only observes; room logic lives in the agent-comms daemons. */
  private onEvent(event: MatrixEvent): void {
    if (event.type === 'm.room.message') {
      this.log(
        `event ${event.event_id ?? '?'} in ${event.room_id ?? '?'} from ${event.sender ?? '?'}`,
      );
    }
  }
  private bridgeAuthorized(authorizationHeader: string | undefined): boolean {
    if (!authorizationHeader?.startsWith('Bearer ')) return false;
    const presented = authorizationHeader.slice('Bearer '.length);
    return this.cfg.bridgeTokens.some((token) => safeEqual(presented, token));
  }
  async handle(req: DaemonRequest): Promise<DaemonResponse> {
    if (req.method === 'GET' && req.path === '/health') {
      return { status: 200, body: { ok: true } };
    }
    const txnMatch = req.method === 'PUT' ? TXN_PATH.exec(req.path) : null;
    if (txnMatch?.[1] !== undefined) {
      return this.transactions.handle(txnMatch[1], req.body, {
        authorizationHeader: req.authorizationHeader,
        accessTokenParam: req.searchParams.get('access_token') ?? undefined,
      });
    }
    if (req.path.startsWith('/bridge/v1/')) {
      if (!this.bridgeAuthorized(req.authorizationHeader)) {
        return { status: 403, body: { errcode: 'M_FORBIDDEN', error: 'bad bridge token' } };
      }
      try {
        if (req.method === 'POST' && req.path === '/bridge/v1/messages') {
          validateBridgeMessage(req.body);
          const eventId = await this.intent.sendAsAgent({
            roomId: req.body.room_id,
            agent: req.body.agent,
            body: req.body.body,
            threadRoot: req.body.thread_root,
            msgtype: req.body.msgtype,
            extraContent: req.body.extra_content,
          });
          return { status: 200, body: { event_id: eventId ?? null } };
        }
        if (req.method === 'POST' && req.path === '/bridge/v1/typing') {
          validateBridgeTyping(req.body);
          await this.intent.setTyping(req.body.room_id, req.body.agent, req.body.typing);
          return { status: 200, body: {} };
        }
        if (req.method === 'POST' && req.path === '/bridge/v1/provision/rooms') {
          validateProvisionRoom(req.body);
          const result = await this.intent.createRoom({
            name: req.body.name,
            alias: req.body.alias,
            topic: req.body.topic,
            invite: req.body.invite,
            spaceId: req.body.space_id,
          });
          this.log(
            `provisioned room ${result.roomId} (${req.body.name}) space_linked=${result.spaceLinked}`,
          );
          return {
            status: 200,
            body: {
              room_id: result.roomId,
              space_linked: result.spaceLinked,
              ...(result.spaceError ? { space_error: result.spaceError } : {}),
            },
          };
        }
      } catch (error) {
        const message = error instanceof Error ? error.message : String(error);
        this.log(`bridge error ${req.method} ${req.path}: ${message}`);
        return { status: 400, body: { error: message } };
      }
      // Explicit: never fall out of the authenticated bridge block, so future
      // sub-paths cannot accidentally route around the auth guard above.
      return { status: 405, body: { error: 'unsupported bridge method/path' } };
    }
    return { status: 404, body: { error: 'not found' } };
  }
 }
--- a/apps/appservice/tsconfig.json
+++ b/apps/appservice/tsconfig.json
@@ -0,0 +1,9 @@
 {
  "extends": "../../tsconfig.base.json",
  "compilerOptions": {
    "outDir": "dist",
    "rootDir": "src"
  },
  "include": ["src/**/*"],
  "exclude": ["node_modules", "dist"]
 }
--- a/apps/gateway/package.json
+++ b/apps/gateway/package.json
@@ -56,6 +56,7 @@
    "@opentelemetry/sdk-metrics": "^2.6.0",
    "@opentelemetry/sdk-node": "^0.213.0",
    "@opentelemetry/semantic-conventions": "^1.40.0",
    "@peculiar/x509": "^2.0.0",
    "@sinclair/typebox": "^0.34.48",
    "better-auth": "^1.5.5",
    "bullmq": "^5.71.0",
@@ -64,6 +65,7 @@
    "dotenv": "^17.3.1",
    "fastify": "^5.0.0",
    "ioredis": "^5.10.0",
    "jose": "^6.2.2",
    "node-cron": "^4.2.1",
    "openai": "^6.32.0",
    "postgres": "^3.4.8",
@@ -71,6 +73,7 @@
    "rxjs": "^7.8.0",
    "socket.io": "^4.8.0",
    "uuid": "^11.0.0",
    "undici": "^7.24.6",
    "zod": "^4.3.6"
  },
  "devDependencies": {
--- a/apps/gateway/src/tests/integration/federation-m2-e2e.integration.test.ts
+++ b/apps/gateway/src/tests/integration/federation-m2-e2e.integration.test.ts
@@ -0,0 +1,243 @@
 /**
 * Federation M2 E2E test — peer-add enrollment flow (FED-M2-10).
 *
 * Covers MILESTONES.md acceptance test #6:
 *   "`peer add <url>` on Server A yields an `active` peer record with a valid cert + key"
 *
 * This test simulates two gateways using a single bootstrapped NestJS app:
 *   - "Server A": the admin API that generates a keypair and stores the cert
 *   - "Server B": the enrollment endpoint that signs the CSR
 *   Both share the same DB + Step-CA in the test environment.
 *
 * Prerequisites:
 *   docker compose -f docker-compose.federated.yml --profile federated up -d
 *
 * Run:
 *   FEDERATED_INTEGRATION=1 STEP_CA_AVAILABLE=1 \
 *   STEP_CA_URL=https://localhost:9000 \
 *   STEP_CA_PROVISIONER_KEY_JSON="$(docker exec $(docker ps -qf name=step-ca) cat /home/step/secrets/mosaic-fed.json)" \
 *   STEP_CA_ROOT_CERT_PATH=/tmp/step-ca-root.crt \
 *   pnpm --filter @mosaicstack/gateway test \
 *     src/__tests__/integration/federation-m2-e2e.integration.test.ts
 *
 * Obtaining Step-CA credentials:
 *   # Extract provisioner key from running container:
 *   #   docker exec $(docker ps -qf name=step-ca) cat /home/step/secrets/mosaic-fed.json
 *   # Copy root cert from container:
 *   #   docker cp $(docker ps -qf name=step-ca):/home/step/certs/root_ca.crt /tmp/step-ca-root.crt
 *   # Then: export STEP_CA_ROOT_CERT_PATH=/tmp/step-ca-root.crt
 *
 * Skipped unless both FEDERATED_INTEGRATION=1 and STEP_CA_AVAILABLE=1 are set.
 */
 import * as crypto from 'node:crypto';
 import { afterAll, beforeAll, describe, expect, it } from 'vitest';
 import { Test } from '@nestjs/testing';
 import { ValidationPipe } from '@nestjs/common';
 import { FastifyAdapter, type NestFastifyApplication } from '@nestjs/platform-fastify';
 import supertest from 'supertest';
 import {
  createDb,
  type Db,
  type DbHandle,
  federationPeers,
  federationGrants,
  federationEnrollmentTokens,
  inArray,
  eq,
 } from '@mosaicstack/db';
 import * as schema from '@mosaicstack/db';
 import { DB } from '../../database/database.module.js';
 import { AdminGuard } from '../../admin/admin.guard.js';
 import { FederationModule } from '../../federation/federation.module.js';
 import { GrantsService } from '../../federation/grants.service.js';
 import { EnrollmentService } from '../../federation/enrollment.service.js';
 const run = process.env['FEDERATED_INTEGRATION'] === '1';
 const stepCaRun =
  run &&
  process.env['STEP_CA_AVAILABLE'] === '1' &&
  !!process.env['STEP_CA_URL'] &&
  !!process.env['STEP_CA_PROVISIONER_KEY_JSON'] &&
  !!process.env['STEP_CA_ROOT_CERT_PATH'];
 const PG_URL = 'postgresql://mosaic:mosaic@localhost:5433/mosaic';
 const RUN_ID = crypto.randomUUID();
 describe.skipIf(!stepCaRun)('federation M2 E2E — peer add enrollment flow', () => {
  let handle: DbHandle;
  let db: Db;
  let app: NestFastifyApplication;
  let agent: ReturnType<typeof supertest>;
  let grantsService: GrantsService;
  let enrollmentService: EnrollmentService;
  const createdTokenGrantIds: string[] = [];
  const createdGrantIds: string[] = [];
  const createdPeerIds: string[] = [];
  const createdUserIds: string[] = [];
  beforeAll(async () => {
    process.env['BETTER_AUTH_SECRET'] ??= 'test-e2e-sealing-key';
    handle = createDb(PG_URL);
    db = handle.db;
    const moduleRef = await Test.createTestingModule({
      imports: [FederationModule],
      providers: [{ provide: DB, useValue: db }],
    })
      .overrideGuard(AdminGuard)
      .useValue({ canActivate: () => true })
      .compile();
    app = moduleRef.createNestApplication<NestFastifyApplication>(new FastifyAdapter());
    app.useGlobalPipes(new ValidationPipe({ whitelist: true, transform: true }));
    await app.init();
    await app.getHttpAdapter().getInstance().ready();
    agent = supertest(app.getHttpServer());
    grantsService = moduleRef.get(GrantsService);
    enrollmentService = moduleRef.get(EnrollmentService);
  }, 30_000);
  afterAll(async () => {
    if (db && createdTokenGrantIds.length > 0) {
      await db
        .delete(federationEnrollmentTokens)
        .where(inArray(federationEnrollmentTokens.grantId, createdTokenGrantIds))
        .catch((e: unknown) => console.error('[federation-m2-e2e cleanup]', e));
    }
    if (db && createdGrantIds.length > 0) {
      await db
        .delete(federationGrants)
        .where(inArray(federationGrants.id, createdGrantIds))
        .catch((e: unknown) => console.error('[federation-m2-e2e cleanup]', e));
    }
    if (db && createdPeerIds.length > 0) {
      await db
        .delete(federationPeers)
        .where(inArray(federationPeers.id, createdPeerIds))
        .catch((e: unknown) => console.error('[federation-m2-e2e cleanup]', e));
    }
    if (db && createdUserIds.length > 0) {
      await db
        .delete(schema.users)
        .where(inArray(schema.users.id, createdUserIds))
        .catch((e: unknown) => console.error('[federation-m2-e2e cleanup]', e));
    }
    if (app)
      await app.close().catch((e: unknown) => console.error('[federation-m2-e2e cleanup]', e));
    if (handle)
      await handle.close().catch((e: unknown) => console.error('[federation-m2-e2e cleanup]', e));
  });
  // -------------------------------------------------------------------------
  // #6 — peer add: keypair → enrollment → cert storage → active peer record
  // -------------------------------------------------------------------------
  it('#6 — peer add flow: keypair → enrollment → cert storage → active peer record', async () => {
    // Create a subject user to satisfy FK on federation_grants.subject_user_id
    const userId = crypto.randomUUID();
    await db
      .insert(schema.users)
      .values({
        id: userId,
        name: `e2e-user-${RUN_ID}`,
        email: `e2e-${RUN_ID}@federation-test.invalid`,
        emailVerified: false,
      })
      .onConflictDoNothing();
    createdUserIds.push(userId);
    // ── Step A: "Server B" setup ─────────────────────────────────────────
    // Server B admin creates a grant and generates an enrollment token to
    // share out-of-band with Server A's operator.
    // Insert a placeholder peer on "Server B" to satisfy the grant FK
    const serverBPeerId = crypto.randomUUID();
    await db
      .insert(federationPeers)
      .values({
        id: serverBPeerId,
        commonName: `server-b-peer-${RUN_ID}`,
        displayName: 'Server B Placeholder',
        certPem: '-----BEGIN CERTIFICATE-----\nMOCK\n-----END CERTIFICATE-----\n',
        certSerial: `serial-b-${serverBPeerId}`,
        certNotAfter: new Date(Date.now() + 365 * 24 * 60 * 60 * 1000),
        state: 'pending',
      })
      .onConflictDoNothing();
    createdPeerIds.push(serverBPeerId);
    const grant = await grantsService.createGrant({
      subjectUserId: userId,
      scope: { resources: ['tasks'], excluded_resources: [], max_rows_per_query: 100 },
      peerId: serverBPeerId,
    });
    createdGrantIds.push(grant.id);
    createdTokenGrantIds.push(grant.id);
    const { token } = await enrollmentService.createToken({
      grantId: grant.id,
      peerId: serverBPeerId,
      ttlSeconds: 900,
    });
    // ── Step B: "Server A" generates keypair ─────────────────────────────
    const keypairRes = await agent
      .post('/api/admin/federation/peers/keypair')
      .send({
        commonName: `e2e-peer-${RUN_ID.slice(0, 8)}`,
        displayName: 'E2E Test Peer',
        endpointUrl: 'https://test.invalid',
      })
      .set('Content-Type', 'application/json');
    expect(keypairRes.status).toBe(201);
    const { peerId, csrPem } = keypairRes.body as { peerId: string; csrPem: string };
    expect(typeof peerId).toBe('string');
    expect(csrPem).toContain('-----BEGIN CERTIFICATE REQUEST-----');
    createdPeerIds.push(peerId);
    // ── Step C: Enrollment (simulates Server A sending CSR to Server B) ──
    const enrollRes = await agent
      .post(`/api/federation/enrollment/${token}`)
      .send({ csrPem })
      .set('Content-Type', 'application/json');
    expect(enrollRes.status).toBe(200);
    const { certPem, certChainPem } = enrollRes.body as {
      certPem: string;
      certChainPem: string;
    };
    expect(certPem).toContain('-----BEGIN CERTIFICATE-----');
    expect(certChainPem).toContain('-----BEGIN CERTIFICATE-----');
    // ── Step D: "Server A" stores the cert ───────────────────────────────
    const storeRes = await agent
      .patch(`/api/admin/federation/peers/${peerId}/cert`)
      .send({ certPem })
      .set('Content-Type', 'application/json');
    expect(storeRes.status).toBe(200);
    // ── Step E: Verify peer record in DB ─────────────────────────────────
    const [peer] = await db
      .select()
      .from(federationPeers)
      .where(eq(federationPeers.id, peerId))
      .limit(1);
    expect(peer).toBeDefined();
    expect(peer?.state).toBe('active');
    expect(peer?.certPem).toContain('-----BEGIN CERTIFICATE-----');
    expect(typeof peer?.certSerial).toBe('string');
    expect((peer?.certSerial ?? '').length).toBeGreaterThan(0);
    // clientKeyPem is a sealed ciphertext — must not be a raw PEM
    expect(peer?.clientKeyPem?.startsWith('-----BEGIN')).toBe(false);
    // certNotAfter must be in the future
    expect(peer?.certNotAfter?.getTime()).toBeGreaterThan(Date.now());
  }, 60_000);
 });
--- a/apps/gateway/src/tests/integration/federation-m2.integration.test.ts
+++ b/apps/gateway/src/tests/integration/federation-m2.integration.test.ts
@@ -0,0 +1,483 @@
 /**
 * Federation M2 integration tests (FED-M2-09).
 *
 * Covers MILESTONES.md acceptance tests #1, #2, #3, #5, #7, #8.
 *
 * Prerequisites:
 *   docker compose -f docker-compose.federated.yml --profile federated up -d
 *
 * Run DB-only tests (no Step-CA):
 *   FEDERATED_INTEGRATION=1 BETTER_AUTH_SECRET=test-secret pnpm --filter @mosaicstack/gateway test \
 *     src/__tests__/integration/federation-m2.integration.test.ts
 *
 * Run all tests including Step-CA-dependent ones:
 *   FEDERATED_INTEGRATION=1 STEP_CA_AVAILABLE=1 \
 *   STEP_CA_URL=https://localhost:9000 \
 *   STEP_CA_PROVISIONER_KEY_JSON="$(docker exec $(docker ps -qf name=step-ca) cat /home/step/secrets/mosaic-fed.json)" \
 *   STEP_CA_ROOT_CERT_PATH=/tmp/step-ca-root.crt \
 *   pnpm --filter @mosaicstack/gateway test \
 *     src/__tests__/integration/federation-m2.integration.test.ts
 *
 * Obtaining Step-CA credentials:
 *   # Extract provisioner key from running container:
 *   #   docker exec $(docker ps -qf name=step-ca) cat /home/step/secrets/mosaic-fed.json
 *   # Copy root cert from container:
 *   #   docker cp $(docker ps -qf name=step-ca):/home/step/certs/root_ca.crt /tmp/step-ca-root.crt
 *   # Then: export STEP_CA_ROOT_CERT_PATH=/tmp/step-ca-root.crt
 */
 import * as crypto from 'node:crypto';
 import { afterAll, beforeAll, describe, expect, it } from 'vitest';
 import { Test } from '@nestjs/testing';
 import { GoneException } from '@nestjs/common';
 import { Pkcs10CertificateRequestGenerator, X509Certificate as PeculiarX509 } from '@peculiar/x509';
 import {
  createDb,
  type Db,
  type DbHandle,
  federationPeers,
  federationGrants,
  federationEnrollmentTokens,
  inArray,
  eq,
 } from '@mosaicstack/db';
 import * as schema from '@mosaicstack/db';
 import { seal } from '@mosaicstack/auth';
 import { DB } from '../../database/database.module.js';
 import { GrantsService } from '../../federation/grants.service.js';
 import { EnrollmentService } from '../../federation/enrollment.service.js';
 import { CaService } from '../../federation/ca.service.js';
 import { FederationScopeError } from '../../federation/scope-schema.js';
 const run = process.env['FEDERATED_INTEGRATION'] === '1';
 const stepCaRun = run && process.env['STEP_CA_AVAILABLE'] === '1';
 const PG_URL = 'postgresql://mosaic:mosaic@localhost:5433/mosaic';
 // ---------------------------------------------------------------------------
 // Helpers for test data isolation
 // ---------------------------------------------------------------------------
 /** Unique run prefix to identify rows created by this test run. */
 const RUN_ID = crypto.randomUUID();
 /** Insert a minimal user row to satisfy the FK on federation_grants.subject_user_id. */
 async function insertTestUser(db: Db, id: string): Promise<void> {
  await db
    .insert(schema.users)
    .values({
      id,
      name: `test-user-${id}`,
      email: `test-${id}@federation-test.invalid`,
      emailVerified: false,
    })
    .onConflictDoNothing();
 }
 /** Insert a minimal peer row to satisfy the FK on federation_grants.peer_id. */
 async function insertTestPeer(db: Db, id: string, suffix: string = ''): Promise<void> {
  await db
    .insert(federationPeers)
    .values({
      id,
      commonName: `test-peer-${RUN_ID}-${suffix}`,
      displayName: `Test Peer ${suffix}`,
      certPem: '-----BEGIN CERTIFICATE-----\nMOCK\n-----END CERTIFICATE-----\n',
      certSerial: `test-serial-${id}`,
      certNotAfter: new Date(Date.now() + 365 * 24 * 60 * 60 * 1000),
      state: 'pending',
    })
    .onConflictDoNothing();
 }
 // ---------------------------------------------------------------------------
 // DB-only test module (CaService mocked so env vars not required)
 // ---------------------------------------------------------------------------
 function buildDbModule(db: Db) {
  return Test.createTestingModule({
    providers: [
      { provide: DB, useValue: db },
      GrantsService,
      {
        provide: CaService,
        useValue: {
          issueCert: async () => {
            throw new Error('CaService.issueCert should not be called in DB-only tests');
          },
        },
      },
      EnrollmentService,
    ],
  }).compile();
 }
 // ---------------------------------------------------------------------------
 // Test suite — DB-only (no Step-CA)
 // ---------------------------------------------------------------------------
 describe.skipIf(!run)('federation M2 — DB-only tests', () => {
  let handle: DbHandle;
  let db: Db;
  let grantsService: GrantsService;
  /** IDs created during this run — cleaned up in afterAll. */
  const createdGrantIds: string[] = [];
  const createdPeerIds: string[] = [];
  const createdUserIds: string[] = [];
  beforeAll(async () => {
    process.env['BETTER_AUTH_SECRET'] ??= 'test-integration-sealing-key-not-for-prod';
    handle = createDb(PG_URL);
    db = handle.db;
    const moduleRef = await buildDbModule(db);
    grantsService = moduleRef.get(GrantsService);
  });
  afterAll(async () => {
    // Clean up in FK-safe order: tokens → grants → peers → users
    if (db && createdGrantIds.length > 0) {
      await db
        .delete(federationEnrollmentTokens)
        .where(inArray(federationEnrollmentTokens.grantId, createdGrantIds))
        .catch((e: unknown) => console.error('[federation-m2-test cleanup]', e));
      await db
        .delete(federationGrants)
        .where(inArray(federationGrants.id, createdGrantIds))
        .catch((e: unknown) => console.error('[federation-m2-test cleanup]', e));
    }
    if (db && createdPeerIds.length > 0) {
      await db
        .delete(federationPeers)
        .where(inArray(federationPeers.id, createdPeerIds))
        .catch((e: unknown) => console.error('[federation-m2-test cleanup]', e));
    }
    if (db && createdUserIds.length > 0) {
      await db
        .delete(schema.users)
        .where(inArray(schema.users.id, createdUserIds))
        .catch((e: unknown) => console.error('[federation-m2-test cleanup]', e));
    }
    if (handle)
      await handle.close().catch((e: unknown) => console.error('[federation-m2-test cleanup]', e));
  });
  // -------------------------------------------------------------------------
  // #1 — grant create writes a pending row
  // -------------------------------------------------------------------------
  it('#1 — createGrant writes a pending row to DB', async () => {
    const userId = crypto.randomUUID();
    const peerId = crypto.randomUUID();
    const validScope = {
      resources: ['tasks'],
      excluded_resources: [],
      max_rows_per_query: 100,
    };
    await insertTestUser(db, userId);
    await insertTestPeer(db, peerId, 'test1');
    createdUserIds.push(userId);
    createdPeerIds.push(peerId);
    const grant = await grantsService.createGrant({
      subjectUserId: userId,
      scope: validScope,
      peerId,
    });
    createdGrantIds.push(grant.id);
    // Verify the row exists in DB with correct shape
    const [row] = await db
      .select()
      .from(federationGrants)
      .where(eq(federationGrants.id, grant.id))
      .limit(1);
    expect(row).toBeDefined();
    expect(row?.status).toBe('pending');
    expect(row?.peerId).toBe(peerId);
    expect(row?.subjectUserId).toBe(userId);
    const storedScope = row?.scope as Record<string, unknown>;
    expect(storedScope['resources']).toEqual(['tasks']);
    expect(storedScope['max_rows_per_query']).toBe(100);
  }, 15_000);
  // -------------------------------------------------------------------------
  // #7 — scope with unknown resource type rejected
  // -------------------------------------------------------------------------
  it('#7 — createGrant rejects scope with unknown resource type', async () => {
    const userId = crypto.randomUUID();
    const peerId = crypto.randomUUID();
    const invalidScope = {
      resources: ['totally_unknown_resource'],
      excluded_resources: [],
      max_rows_per_query: 100,
    };
    await insertTestUser(db, userId);
    await insertTestPeer(db, peerId, 'test7');
    createdUserIds.push(userId);
    createdPeerIds.push(peerId);
    await expect(
      grantsService.createGrant({
        subjectUserId: userId,
        scope: invalidScope,
        peerId,
      }),
    ).rejects.toThrow(FederationScopeError);
  }, 15_000);
  // -------------------------------------------------------------------------
  // #8 — listGrants returns accurate status for grants in various states
  // -------------------------------------------------------------------------
  it('#8 — listGrants returns accurate status for grants in various states', async () => {
    const userId = crypto.randomUUID();
    const peerId = crypto.randomUUID();
    const validScope = {
      resources: ['notes'],
      excluded_resources: [],
      max_rows_per_query: 50,
    };
    await insertTestUser(db, userId);
    await insertTestPeer(db, peerId, 'test8');
    createdUserIds.push(userId);
    createdPeerIds.push(peerId);
    // Create two pending grants via GrantsService
    const grantA = await grantsService.createGrant({
      subjectUserId: userId,
      scope: validScope,
      peerId,
    });
    const grantB = await grantsService.createGrant({
      subjectUserId: userId,
      scope: { resources: ['tasks'], excluded_resources: [], max_rows_per_query: 50 },
      peerId,
    });
    createdGrantIds.push(grantA.id, grantB.id);
    // Insert a third grant directly in 'revoked' state to test status variety
    const [grantC] = await db
      .insert(federationGrants)
      .values({
        id: crypto.randomUUID(),
        subjectUserId: userId,
        peerId,
        scope: validScope,
        status: 'revoked',
        revokedAt: new Date(),
      })
      .returning();
    createdGrantIds.push(grantC!.id);
    // List all grants for this peer
    const allForPeer = await grantsService.listGrants({ peerId });
    const ourGrantIds = new Set([grantA.id, grantB.id, grantC!.id]);
    const ourGrants = allForPeer.filter((g) => ourGrantIds.has(g.id));
    expect(ourGrants).toHaveLength(3);
    const pendingGrants = ourGrants.filter((g) => g.status === 'pending');
    const revokedGrants = ourGrants.filter((g) => g.status === 'revoked');
    expect(pendingGrants).toHaveLength(2);
    expect(revokedGrants).toHaveLength(1);
    // Status-filtered query
    const pendingOnly = await grantsService.listGrants({ peerId, status: 'pending' });
    const ourPending = pendingOnly.filter((g) => ourGrantIds.has(g.id));
    expect(ourPending.every((g) => g.status === 'pending')).toBe(true);
    // Verify peer list from DB also shows the peer rows with correct state
    const peers = await db.select().from(federationPeers).where(eq(federationPeers.id, peerId));
    expect(peers).toHaveLength(1);
    expect(peers[0]?.state).toBe('pending');
  }, 15_000);
  // -------------------------------------------------------------------------
  // #5 — client_key_pem encrypted at rest
  // -------------------------------------------------------------------------
  it('#5 — clientKeyPem stored in DB is a sealed ciphertext (not a valid PEM)', async () => {
    const peerId = crypto.randomUUID();
    const rawPem = '-----BEGIN PRIVATE KEY-----\nMOCK\n-----END PRIVATE KEY-----\n';
    const sealed = seal(rawPem);
    await db.insert(federationPeers).values({
      id: peerId,
      commonName: `test-peer-${RUN_ID}-sealed`,
      displayName: 'Sealed Key Test Peer',
      certPem: '-----BEGIN CERTIFICATE-----\nMOCK\n-----END CERTIFICATE-----\n',
      certSerial: `test-serial-sealed-${peerId}`,
      certNotAfter: new Date(Date.now() + 365 * 24 * 60 * 60 * 1000),
      state: 'pending',
      clientKeyPem: sealed,
    });
    createdPeerIds.push(peerId);
    const [row] = await db
      .select()
      .from(federationPeers)
      .where(eq(federationPeers.id, peerId))
      .limit(1);
    expect(row).toBeDefined();
    // The stored value must NOT be a valid PEM — it's a sealed ciphertext blob
    expect(row?.clientKeyPem).toBeDefined();
    expect(row?.clientKeyPem?.startsWith('-----BEGIN')).toBe(false);
    // The sealed value should be non-trivial (at least 20 chars)
    expect((row?.clientKeyPem ?? '').length).toBeGreaterThan(20);
  }, 15_000);
 });
 // ---------------------------------------------------------------------------
 // Test suite — Step-CA gated
 // ---------------------------------------------------------------------------
 describe.skipIf(!stepCaRun)('federation M2 — Step-CA tests', () => {
  let handle: DbHandle;
  let db: Db;
  let grantsService: GrantsService;
  let enrollmentService: EnrollmentService;
  const createdGrantIds: string[] = [];
  const createdPeerIds: string[] = [];
  const createdUserIds: string[] = [];
  beforeAll(async () => {
    handle = createDb(PG_URL);
    db = handle.db;
    // Use real CaService — env vars (STEP_CA_URL, STEP_CA_PROVISIONER_KEY_JSON,
    // STEP_CA_ROOT_CERT_PATH) must be set when STEP_CA_AVAILABLE=1
    const moduleRef = await Test.createTestingModule({
      providers: [{ provide: DB, useValue: db }, CaService, GrantsService, EnrollmentService],
    }).compile();
    grantsService = moduleRef.get(GrantsService);
    enrollmentService = moduleRef.get(EnrollmentService);
  });
  afterAll(async () => {
    if (db && createdGrantIds.length > 0) {
      await db
        .delete(federationEnrollmentTokens)
        .where(inArray(federationEnrollmentTokens.grantId, createdGrantIds))
        .catch((e: unknown) => console.error('[federation-m2-test cleanup]', e));
      await db
        .delete(federationGrants)
        .where(inArray(federationGrants.id, createdGrantIds))
        .catch((e: unknown) => console.error('[federation-m2-test cleanup]', e));
    }
    if (db && createdPeerIds.length > 0) {
      await db
        .delete(federationPeers)
        .where(inArray(federationPeers.id, createdPeerIds))
        .catch((e: unknown) => console.error('[federation-m2-test cleanup]', e));
    }
    if (db && createdUserIds.length > 0) {
      await db
        .delete(schema.users)
        .where(inArray(schema.users.id, createdUserIds))
        .catch((e: unknown) => console.error('[federation-m2-test cleanup]', e));
    }
    if (handle)
      await handle.close().catch((e: unknown) => console.error('[federation-m2-test cleanup]', e));
  });
  /** Generate a P-256 key pair and PKCS#10 CSR, returning the CSR as PEM. */
  async function generateCsrPem(cn: string): Promise<string> {
    const alg = { name: 'ECDSA', namedCurve: 'P-256', hash: 'SHA-256' };
    const keyPair = await crypto.subtle.generateKey(alg, true, ['sign', 'verify']);
    const csr = await Pkcs10CertificateRequestGenerator.create({
      name: `CN=${cn}`,
      keys: keyPair,
      signingAlgorithm: alg,
    });
    return csr.toString('pem');
  }
  // -------------------------------------------------------------------------
  // #2 — enrollment signs CSR and returns cert
  // -------------------------------------------------------------------------
  it('#2 — redeem returns a certPem containing a valid PEM certificate', async () => {
    const userId = crypto.randomUUID();
    const peerId = crypto.randomUUID();
    const validScope = {
      resources: ['tasks'],
      excluded_resources: [],
      max_rows_per_query: 100,
    };
    await insertTestUser(db, userId);
    await insertTestPeer(db, peerId, 'ca-test2');
    createdUserIds.push(userId);
    createdPeerIds.push(peerId);
    const grant = await grantsService.createGrant({
      subjectUserId: userId,
      scope: validScope,
      peerId,
    });
    createdGrantIds.push(grant.id);
    const { token } = await enrollmentService.createToken({
      grantId: grant.id,
      peerId,
      ttlSeconds: 900,
    });
    const csrPem = await generateCsrPem(`gateway-test-${RUN_ID.slice(0, 8)}`);
    const result = await enrollmentService.redeem(token, csrPem);
    expect(result.certPem).toContain('-----BEGIN CERTIFICATE-----');
    expect(result.certChainPem).toContain('-----BEGIN CERTIFICATE-----');
    // Verify the issued cert parses cleanly
    const cert = new PeculiarX509(result.certPem);
    expect(cert.serialNumber).toBeTruthy();
  }, 30_000);
  // -------------------------------------------------------------------------
  // #3 — token single-use; second attempt returns GoneException
  // -------------------------------------------------------------------------
  it('#3 — second redeem of the same token throws GoneException', async () => {
    const userId = crypto.randomUUID();
    const peerId = crypto.randomUUID();
    const validScope = {
      resources: ['notes'],
      excluded_resources: [],
      max_rows_per_query: 50,
    };
    await insertTestUser(db, userId);
    await insertTestPeer(db, peerId, 'ca-test3');
    createdUserIds.push(userId);
    createdPeerIds.push(peerId);
    const grant = await grantsService.createGrant({
      subjectUserId: userId,
      scope: validScope,
      peerId,
    });
    createdGrantIds.push(grant.id);
    const { token } = await enrollmentService.createToken({
      grantId: grant.id,
      peerId,
      ttlSeconds: 900,
    });
    const csrPem = await generateCsrPem(`gateway-test-replay-${RUN_ID.slice(0, 8)}`);
    // First redeem must succeed
    const result = await enrollmentService.redeem(token, csrPem);
    expect(result.certPem).toContain('-----BEGIN CERTIFICATE-----');
    // Second redeem with the same token must be rejected
    await expect(enrollmentService.redeem(token, csrPem)).rejects.toThrow(GoneException);
  }, 30_000);
 });
--- a/apps/gateway/src/agent/provider-credentials.service.ts
+++ b/apps/gateway/src/agent/provider-credentials.service.ts
@@ -1,62 +1,10 @@
 import { Inject, Injectable, Logger } from '@nestjs/common';
-import { createCipheriv, createDecipheriv, createHash, randomBytes } from 'node:crypto';
+import { seal, unseal } from '@mosaicstack/auth';
 import type { Db } from '@mosaicstack/db';
 import { providerCredentials, eq, and } from '@mosaicstack/db';
 import { DB } from '../database/database.module.js';
 import type { ProviderCredentialSummaryDto } from './provider-credentials.dto.js';
 const ALGORITHM = 'aes-256-gcm';
 const IV_LENGTH = 12; // 96-bit IV for GCM
 const TAG_LENGTH = 16; // 128-bit auth tag
 /**
 * Derive a 32-byte AES-256 key from BETTER_AUTH_SECRET using SHA-256.
 * The secret is assumed to be set in the environment.
 */
 function deriveEncryptionKey(): Buffer {
  const secret = process.env['BETTER_AUTH_SECRET'];
  if (!secret) {
    throw new Error('BETTER_AUTH_SECRET is not set — cannot derive encryption key');
  }
  return createHash('sha256').update(secret).digest();
 }
 /**
 * Encrypt a plain-text value using AES-256-GCM.
 * Output format: base64(iv + authTag + ciphertext)
 */
 function encrypt(plaintext: string): string {
  const key = deriveEncryptionKey();
  const iv = randomBytes(IV_LENGTH);
  const cipher = createCipheriv(ALGORITHM, key, iv);
  const encrypted = Buffer.concat([cipher.update(plaintext, 'utf8'), cipher.final()]);
  const authTag = cipher.getAuthTag();
  // Combine iv (12) + authTag (16) + ciphertext and base64-encode
  const combined = Buffer.concat([iv, authTag, encrypted]);
  return combined.toString('base64');
 }
 /**
 * Decrypt a value encrypted by `encrypt()`.
 * Throws on authentication failure (tampered data).
 */
 function decrypt(encoded: string): string {
  const key = deriveEncryptionKey();
  const combined = Buffer.from(encoded, 'base64');
  const iv = combined.subarray(0, IV_LENGTH);
  const authTag = combined.subarray(IV_LENGTH, IV_LENGTH + TAG_LENGTH);
  const ciphertext = combined.subarray(IV_LENGTH + TAG_LENGTH);
  const decipher = createDecipheriv(ALGORITHM, key, iv);
  decipher.setAuthTag(authTag);
  const decrypted = Buffer.concat([decipher.update(ciphertext), decipher.final()]);
  return decrypted.toString('utf8');
 }
@Injectable()
 export class ProviderCredentialsService {
  private readonly logger = new Logger(ProviderCredentialsService.name);
@@ -74,7 +22,7 @@ export class ProviderCredentialsService {
    value: string,
    metadata?: Record<string, unknown>,
  ): Promise<void> {
-    const encryptedValue = encrypt(value);
+    const encryptedValue = seal(value);
    await this.db
      .insert(providerCredentials)
@@ -122,7 +70,7 @@ export class ProviderCredentialsService {
    }
    try {
-      return decrypt(row.encryptedValue);
+      return unseal(row.encryptedValue);
    } catch (err) {
      this.logger.error(
        `Failed to decrypt credential for user=${userId} provider=${provider}`,
--- a/apps/gateway/src/app.module.ts
+++ b/apps/gateway/src/app.module.ts
@@ -24,6 +24,7 @@ import { GCModule } from './gc/gc.module.js';
 import { ReloadModule } from './reload/reload.module.js';
 import { WorkspaceModule } from './workspace/workspace.module.js';
 import { QueueModule } from './queue/queue.module.js';
 import { FederationModule } from './federation/federation.module.js';
 import { ThrottlerGuard, ThrottlerModule } from '@nestjs/throttler';
@Module({
@@ -52,6 +53,7 @@ import { ThrottlerGuard, ThrottlerModule } from '@nestjs/throttler';
    QueueModule,
    ReloadModule,
    WorkspaceModule,
    FederationModule,
  ],
  controllers: [HealthController],
  providers: [
--- a/apps/gateway/src/database/database.module.ts
+++ b/apps/gateway/src/database/database.module.ts
@@ -1,8 +1,21 @@
 import { mkdirSync } from 'node:fs';
 import { homedir } from 'node:os';
 import { join } from 'node:path';
-import { Global, Inject, Module, type OnApplicationShutdown } from '@nestjs/common';
+import {
-import { createDb, createPgliteDb, type Db, type DbHandle } from '@mosaicstack/db';
+  Global,
  Inject,
  Logger,
  Module,
  type OnApplicationShutdown,
  type OnModuleInit,
 } from '@nestjs/common';
 import {
  createDb,
  createPgliteDb,
  runPgliteMigrations,
  type Db,
  type DbHandle,
 } from '@mosaicstack/db';
 import { createStorageAdapter, type StorageAdapter } from '@mosaicstack/storage';
 import type { MosaicConfig } from '@mosaicstack/config';
 import { MOSAIC_CONFIG } from '../config/config.module.js';
@@ -39,12 +52,37 @@ export const STORAGE_ADAPTER = 'STORAGE_ADAPTER';
  ],
  exports: [DB, STORAGE_ADAPTER],
 })
-export class DatabaseModule implements OnApplicationShutdown {
+export class DatabaseModule implements OnApplicationShutdown, OnModuleInit {
  private readonly logger = new Logger(DatabaseModule.name);
  constructor(
    @Inject(DB_HANDLE) private readonly handle: DbHandle,
    @Inject(STORAGE_ADAPTER) private readonly storageAdapter: StorageAdapter,
    @Inject(MOSAIC_CONFIG) private readonly config: MosaicConfig,
  ) {}
  // Migrations must complete before any module that injects DB starts serving
  // requests. NestJS awaits onModuleInit before app.listen(), and modules that
  // inject DB are initialized after this one — so all DB-dependent code sees a
  // populated schema before the first HTTP request lands.
  //
  // Local (PGlite) tier: we run gateway-DB migrations explicitly here. The
  // storage adapter writes to a separate PGlite directory and only manages its
  // own KV tables, so we still call its migrate() afterwards.
  //
  // Postgres tier: PostgresAdapter.migrate() already calls runMigrations() on
  // the same DATABASE_URL, so a single call covers both the gateway DB and
  // the storage tables. We deliberately do NOT call runMigrations() here to
  // avoid opening a second short-lived connection and doubling startup cost.
  async onModuleInit(): Promise<void> {
    if (this.config.tier === 'local') {
      this.logger.log('Applying PGlite schema migrations...');
      await runPgliteMigrations(this.handle);
    }
    this.logger.log(`Initializing storage adapter (${this.storageAdapter.name})...`);
    await this.storageAdapter.migrate();
  }
  async onApplicationShutdown(): Promise<void> {
    await Promise.all([this.handle.close(), this.storageAdapter.close()]);
  }
--- a/apps/gateway/src/federation/tests/enrollment.service.spec.ts
+++ b/apps/gateway/src/federation/tests/enrollment.service.spec.ts
@@ -0,0 +1,401 @@
 /**
 * Unit tests for EnrollmentService — federation enrollment token flow (FED-M2-07).
 *
 * Coverage:
 *  createToken:
 *   - inserts token row with correct grantId, peerId, and future expiresAt
 *   - returns { token, expiresAt } with a 64-char hex token
 *   - clamps ttlSeconds to 900
 *
 *  redeem — error paths:
 *   - NotFoundException when token row not found
 *   - GoneException when token already used (usedAt set)
 *   - GoneException when token expired (expiresAt < now)
 *   - GoneException when grant status is not pending
 *
 *  redeem — success path:
 *   - atomically claims token BEFORE cert issuance (claim → issueCert → tx)
 *   - calls CaService.issueCert with correct args
 *   - activates grant + updates peer + writes audit log inside a transaction
 *   - returns { certPem, certChainPem }
 *
 *  redeem — replay protection:
 *   - GoneException when claim UPDATE returns empty array (concurrent request won)
 */
 import 'reflect-metadata';
 import { describe, it, expect, vi, beforeEach, beforeAll } from 'vitest';
 import { GoneException, NotFoundException } from '@nestjs/common';
 import type { Db } from '@mosaicstack/db';
 import { EnrollmentService } from '../enrollment.service.js';
 import { makeSelfSignedCert } from './helpers/test-cert.js';
 // ---------------------------------------------------------------------------
 // Test constants
 // ---------------------------------------------------------------------------
 const GRANT_ID = 'g1111111-1111-1111-1111-111111111111';
 const PEER_ID = 'p2222222-2222-2222-2222-222222222222';
 const USER_ID = 'u3333333-3333-3333-3333-333333333333';
 const TOKEN = 'a'.repeat(64); // 64-char hex
 // Real self-signed EC P-256 cert — populated once in beforeAll.
 // Required because EnrollmentService.extractCertNotAfter calls new X509Certificate(certPem)
 // with strict parsing (PR #501 HIGH-2: no silent fallback).
 let REAL_CERT_PEM: string;
 const MOCK_CHAIN_PEM = () => REAL_CERT_PEM + REAL_CERT_PEM;
 const MOCK_SERIAL = 'ABCD1234';
 beforeAll(async () => {
  REAL_CERT_PEM = await makeSelfSignedCert();
 });
 // ---------------------------------------------------------------------------
 // Factory helpers
 // ---------------------------------------------------------------------------
 function makeTokenRow(overrides: Partial<Record<string, unknown>> = {}) {
  return {
    token: TOKEN,
    grantId: GRANT_ID,
    peerId: PEER_ID,
    expiresAt: new Date(Date.now() + 60_000), // 1 min from now
    usedAt: null,
    createdAt: new Date(),
    ...overrides,
  };
 }
 function makeGrant(overrides: Partial<Record<string, unknown>> = {}) {
  return {
    id: GRANT_ID,
    peerId: PEER_ID,
    subjectUserId: USER_ID,
    scope: { resources: ['tasks'], excluded_resources: [], max_rows_per_query: 100 },
    status: 'pending',
    expiresAt: null,
    createdAt: new Date(),
    revokedAt: null,
    revokedReason: null,
    ...overrides,
  };
 }
 // ---------------------------------------------------------------------------
 // Mock DB builder
 // ---------------------------------------------------------------------------
 function makeDb({
  tokenRows = [makeTokenRow()],
  // claimedRows is returned by the .returning() on the token-claim UPDATE.
  // Empty array = concurrent request won the race (GoneException).
  claimedRows = [{ token: TOKEN }],
 }: {
  tokenRows?: unknown[];
  claimedRows?: unknown[];
 } = {}) {
  // insert().values() — for createToken (outer db, not tx)
  const insertValues = vi.fn().mockResolvedValue(undefined);
  const insertMock = vi.fn().mockReturnValue({ values: insertValues });
  // select().from().where().limit() — for fetching the token row
  const limitSelect = vi.fn().mockResolvedValue(tokenRows);
  const whereSelect = vi.fn().mockReturnValue({ limit: limitSelect });
  const fromSelect = vi.fn().mockReturnValue({ where: whereSelect });
  const selectMock = vi.fn().mockReturnValue({ from: fromSelect });
  // update().set().where().returning() — for the atomic token claim (outer db)
  const returningMock = vi.fn().mockResolvedValue(claimedRows);
  const whereClaimUpdate = vi.fn().mockReturnValue({ returning: returningMock });
  const setClaimMock = vi.fn().mockReturnValue({ where: whereClaimUpdate });
  const claimUpdateMock = vi.fn().mockReturnValue({ set: setClaimMock });
  // transaction(cb) — cb receives txMock; txMock has update + insert
  //
  // The tx mock must support two tx.update() call patterns (CRIT-2, PR #501):
  //   1. Grant activation:  .update().set().where().returning() → resolves to [{ id }]
  //   2. Peer update:       .update().set().where()             → resolves to undefined
  //
  // We achieve this by making txWhereUpdate return an object with BOTH a thenable
  // interface (so `await tx.update().set().where()` works) AND a .returning() method.
  const txGrantActivatedRow = { id: GRANT_ID };
  const txReturningMock = vi.fn().mockResolvedValue([txGrantActivatedRow]);
  const txWhereUpdate = vi.fn().mockReturnValue({
    // .returning() for grant activation (first tx.update call)
    returning: txReturningMock,
    // thenables so `await tx.update().set().where()` also works for peer update
    then: (resolve: (v: undefined) => void) => resolve(undefined),
    catch: () => undefined,
    finally: () => undefined,
  });
  const txSetMock = vi.fn().mockReturnValue({ where: txWhereUpdate });
  const txUpdateMock = vi.fn().mockReturnValue({ set: txSetMock });
  const txInsertValues = vi.fn().mockResolvedValue(undefined);
  const txInsertMock = vi.fn().mockReturnValue({ values: txInsertValues });
  const txMock = { update: txUpdateMock, insert: txInsertMock };
  const transactionMock = vi
    .fn()
    .mockImplementation(async (cb: (tx: typeof txMock) => Promise<void>) => cb(txMock));
  return {
    insert: insertMock,
    select: selectMock,
    update: claimUpdateMock,
    transaction: transactionMock,
    _mocks: {
      insertValues,
      insertMock,
      limitSelect,
      whereSelect,
      fromSelect,
      selectMock,
      returningMock,
      whereClaimUpdate,
      setClaimMock,
      claimUpdateMock,
      txInsertValues,
      txInsertMock,
      txWhereUpdate,
      txReturningMock,
      txSetMock,
      txUpdateMock,
      txMock,
      transactionMock,
    },
  };
 }
 // ---------------------------------------------------------------------------
 // Mock CaService
 // ---------------------------------------------------------------------------
 function makeCaService() {
  return {
    // REAL_CERT_PEM is populated by beforeAll — safe to reference via closure here
    // because makeCaService() is only called after the suite's beforeAll runs.
    issueCert: vi.fn().mockImplementation(async () => ({
      certPem: REAL_CERT_PEM,
      certChainPem: MOCK_CHAIN_PEM(),
      serialNumber: MOCK_SERIAL,
    })),
  };
 }
 // ---------------------------------------------------------------------------
 // Mock GrantsService
 // ---------------------------------------------------------------------------
 function makeGrantsService(grantOverrides: Partial<Record<string, unknown>> = {}) {
  return {
    getGrant: vi.fn().mockResolvedValue(makeGrant(grantOverrides)),
    activateGrant: vi.fn().mockResolvedValue(makeGrant({ status: 'active' })),
  };
 }
 // ---------------------------------------------------------------------------
 // Helper: build service under test
 // ---------------------------------------------------------------------------
 function buildService({
  db = makeDb(),
  caService = makeCaService(),
  grantsService = makeGrantsService(),
 }: {
  db?: ReturnType<typeof makeDb>;
  caService?: ReturnType<typeof makeCaService>;
  grantsService?: ReturnType<typeof makeGrantsService>;
 } = {}) {
  return new EnrollmentService(db as unknown as Db, caService as never, grantsService as never);
 }
 // ---------------------------------------------------------------------------
 // Tests: createToken
 // ---------------------------------------------------------------------------
 describe('EnrollmentService.createToken', () => {
  it('inserts a token row and returns { token, expiresAt }', async () => {
    const db = makeDb();
    const service = buildService({ db });
    const result = await service.createToken({
      grantId: GRANT_ID,
      peerId: PEER_ID,
      ttlSeconds: 900,
    });
    expect(result.token).toHaveLength(64); // 32 bytes hex
    expect(result.expiresAt).toBeDefined();
    expect(new Date(result.expiresAt).getTime()).toBeGreaterThan(Date.now());
    expect(db._mocks.insertValues).toHaveBeenCalledWith(
      expect.objectContaining({ grantId: GRANT_ID, peerId: PEER_ID }),
    );
  });
  it('clamps ttlSeconds to 900', async () => {
    const db = makeDb();
    const service = buildService({ db });
    const before = Date.now();
    const result = await service.createToken({
      grantId: GRANT_ID,
      peerId: PEER_ID,
      ttlSeconds: 9999,
    });
    const after = Date.now();
    const expiresMs = new Date(result.expiresAt).getTime();
    // Should be at most 900s from now
    expect(expiresMs - before).toBeLessThanOrEqual(900_000 + 100);
    expect(expiresMs - after).toBeGreaterThanOrEqual(0);
  });
 });
 // ---------------------------------------------------------------------------
 // Tests: redeem — error paths
 // ---------------------------------------------------------------------------
 describe('EnrollmentService.redeem — error paths', () => {
  it('throws NotFoundException when token row not found', async () => {
    const db = makeDb({ tokenRows: [] });
    const service = buildService({ db });
    await expect(service.redeem(TOKEN, '---CSR---')).rejects.toBeInstanceOf(NotFoundException);
  });
  it('throws GoneException when usedAt is set (already redeemed)', async () => {
    const db = makeDb({ tokenRows: [makeTokenRow({ usedAt: new Date(Date.now() - 1000) })] });
    const service = buildService({ db });
    await expect(service.redeem(TOKEN, '---CSR---')).rejects.toBeInstanceOf(GoneException);
  });
  it('throws GoneException when token has expired', async () => {
    const db = makeDb({ tokenRows: [makeTokenRow({ expiresAt: new Date(Date.now() - 1000) })] });
    const service = buildService({ db });
    await expect(service.redeem(TOKEN, '---CSR---')).rejects.toBeInstanceOf(GoneException);
  });
  it('throws GoneException when grant status is not pending', async () => {
    const db = makeDb();
    const grantsService = makeGrantsService({ status: 'active' });
    const service = buildService({ db, grantsService });
    await expect(service.redeem(TOKEN, '---CSR---')).rejects.toBeInstanceOf(GoneException);
  });
  it('throws GoneException when token claim UPDATE returns empty array (concurrent replay)', async () => {
    const db = makeDb({ claimedRows: [] });
    const caService = makeCaService();
    const grantsService = makeGrantsService();
    const service = buildService({ db, caService, grantsService });
    await expect(service.redeem(TOKEN, '---CSR---')).rejects.toBeInstanceOf(GoneException);
  });
  it('does NOT call issueCert when token claim fails (no double minting)', async () => {
    const db = makeDb({ claimedRows: [] });
    const caService = makeCaService();
    const service = buildService({ db, caService });
    await expect(service.redeem(TOKEN, '---CSR---')).rejects.toBeInstanceOf(GoneException);
    expect(caService.issueCert).not.toHaveBeenCalled();
  });
 });
 // ---------------------------------------------------------------------------
 // Tests: redeem — success path
 // ---------------------------------------------------------------------------
 describe('EnrollmentService.redeem — success path', () => {
  let db: ReturnType<typeof makeDb>;
  let caService: ReturnType<typeof makeCaService>;
  let grantsService: ReturnType<typeof makeGrantsService>;
  let service: EnrollmentService;
  beforeEach(() => {
    db = makeDb();
    caService = makeCaService();
    grantsService = makeGrantsService();
    service = buildService({ db, caService, grantsService });
  });
  it('claims token BEFORE calling issueCert (prevents double minting)', async () => {
    const callOrder: string[] = [];
    db._mocks.returningMock.mockImplementation(async () => {
      callOrder.push('claim');
      return [{ token: TOKEN }];
    });
    caService.issueCert.mockImplementation(async () => {
      callOrder.push('issueCert');
      return { certPem: REAL_CERT_PEM, certChainPem: MOCK_CHAIN_PEM(), serialNumber: MOCK_SERIAL };
    });
    await service.redeem(TOKEN, '---CSR---');
    expect(callOrder).toEqual(['claim', 'issueCert']);
  });
  it('calls CaService.issueCert with grantId, subjectUserId, csrPem, ttlSeconds=300', async () => {
    await service.redeem(TOKEN, '---CSR---');
    expect(caService.issueCert).toHaveBeenCalledWith(
      expect.objectContaining({
        grantId: GRANT_ID,
        subjectUserId: USER_ID,
        csrPem: '---CSR---',
        ttlSeconds: 300,
      }),
    );
  });
  it('runs activate grant + peer update + audit inside a transaction', async () => {
    await service.redeem(TOKEN, '---CSR---');
    expect(db._mocks.transactionMock).toHaveBeenCalledOnce();
    // tx.update called twice: activate grant + update peer
    expect(db._mocks.txUpdateMock).toHaveBeenCalledTimes(2);
    // tx.insert called once: audit log
    expect(db._mocks.txInsertMock).toHaveBeenCalledOnce();
  });
  it('activates grant (sets status=active) inside the transaction', async () => {
    await service.redeem(TOKEN, '---CSR---');
    expect(db._mocks.txSetMock).toHaveBeenCalledWith(expect.objectContaining({ status: 'active' }));
  });
  it('updates the federationPeers row with certPem, certSerial, state=active inside the transaction', async () => {
    await service.redeem(TOKEN, '---CSR---');
    expect(db._mocks.txSetMock).toHaveBeenCalledWith(
      expect.objectContaining({
        certPem: REAL_CERT_PEM,
        certSerial: MOCK_SERIAL,
        state: 'active',
      }),
    );
  });
  it('inserts an audit log row inside the transaction', async () => {
    await service.redeem(TOKEN, '---CSR---');
    expect(db._mocks.txInsertValues).toHaveBeenCalledWith(
      expect.objectContaining({
        peerId: PEER_ID,
        grantId: GRANT_ID,
        verb: 'enrollment',
      }),
    );
  });
  it('returns { certPem, certChainPem } from CaService', async () => {
    const result = await service.redeem(TOKEN, '---CSR---');
    expect(result).toEqual({
      certPem: REAL_CERT_PEM,
      certChainPem: MOCK_CHAIN_PEM(),
    });
  });
 });
--- a/apps/gateway/src/federation/tests/federation.controller.spec.ts
+++ b/apps/gateway/src/federation/tests/federation.controller.spec.ts
@@ -0,0 +1,212 @@
 /**
 * Unit tests for FederationController (FED-M2-08).
 *
 * Coverage:
 *  - listGrants: delegates to GrantsService with query params
 *  - createGrant: delegates to GrantsService, validates body
 *  - generateToken: returns enrollmentUrl containing the token
 *  - listPeers: returns DB rows
 */
 import 'reflect-metadata';
 import { describe, it, expect, vi, beforeEach } from 'vitest';
 import { NotFoundException } from '@nestjs/common';
 import type { Db } from '@mosaicstack/db';
 import { FederationController } from '../federation.controller.js';
 import type { GrantsService } from '../grants.service.js';
 import type { EnrollmentService } from '../enrollment.service.js';
 // ---------------------------------------------------------------------------
 // Constants
 // ---------------------------------------------------------------------------
 const GRANT_ID = 'g1111111-1111-1111-1111-111111111111';
 const PEER_ID = 'p2222222-2222-2222-2222-222222222222';
 const USER_ID = 'u3333333-3333-3333-3333-333333333333';
 const MOCK_GRANT = {
  id: GRANT_ID,
  peerId: PEER_ID,
  subjectUserId: USER_ID,
  scope: { resources: ['tasks'], operations: ['list'] },
  status: 'pending' as const,
  expiresAt: null,
  createdAt: new Date('2026-01-01T00:00:00Z'),
  revokedAt: null,
  revokedReason: null,
 };
 const MOCK_PEER = {
  id: PEER_ID,
  commonName: 'test-peer',
  displayName: 'Test Peer',
  certPem: '',
  certSerial: 'pending',
  certNotAfter: new Date(0),
  clientKeyPem: null,
  state: 'pending' as const,
  endpointUrl: null,
  createdAt: new Date('2026-01-01T00:00:00Z'),
  updatedAt: new Date('2026-01-01T00:00:00Z'),
 };
 // ---------------------------------------------------------------------------
 // DB mock builder
 // ---------------------------------------------------------------------------
 function makeDbMock(rows: unknown[] = []) {
  const orderBy = vi.fn().mockResolvedValue(rows);
  const where = vi.fn().mockReturnValue({ orderBy });
  const from = vi.fn().mockReturnValue({ where, orderBy });
  const select = vi.fn().mockReturnValue({ from });
  return {
    select,
    from,
    where,
    orderBy,
    insert: vi.fn(),
    update: vi.fn(),
    delete: vi.fn(),
  } as unknown as Db;
 }
 // ---------------------------------------------------------------------------
 // Tests
 // ---------------------------------------------------------------------------
 describe('FederationController', () => {
  let db: Db;
  let grantsService: GrantsService;
  let enrollmentService: EnrollmentService;
  let controller: FederationController;
  beforeEach(() => {
    db = makeDbMock([MOCK_PEER]);
    grantsService = {
      createGrant: vi.fn().mockResolvedValue(MOCK_GRANT),
      getGrant: vi.fn().mockResolvedValue(MOCK_GRANT),
      listGrants: vi.fn().mockResolvedValue([MOCK_GRANT]),
      revokeGrant: vi.fn().mockResolvedValue({ ...MOCK_GRANT, status: 'revoked' }),
      activateGrant: vi.fn(),
      expireGrant: vi.fn(),
    } as unknown as GrantsService;
    enrollmentService = {
      createToken: vi.fn().mockResolvedValue({
        token: 'abc123def456abc123def456abc123def456abc123def456abc123def456ab12',
        expiresAt: '2026-01-01T00:15:00.000Z',
      }),
      redeem: vi.fn(),
    } as unknown as EnrollmentService;
    controller = new FederationController(db, grantsService, enrollmentService);
  });
  // ─── Grant management ──────────────────────────────────────────────────
  describe('listGrants', () => {
    it('delegates to GrantsService with provided query params', async () => {
      const query = { peerId: PEER_ID, status: 'pending' as const };
      const result = await controller.listGrants(query);
      expect(grantsService.listGrants).toHaveBeenCalledWith(query);
      expect(result).toEqual([MOCK_GRANT]);
    });
    it('delegates to GrantsService with empty filters', async () => {
      const result = await controller.listGrants({});
      expect(grantsService.listGrants).toHaveBeenCalledWith({});
      expect(result).toEqual([MOCK_GRANT]);
    });
  });
  describe('createGrant', () => {
    it('delegates to GrantsService and returns created grant', async () => {
      const body = {
        peerId: PEER_ID,
        subjectUserId: USER_ID,
        scope: { resources: ['tasks'], operations: ['list'] },
      };
      const result = await controller.createGrant(body);
      expect(grantsService.createGrant).toHaveBeenCalledWith(body);
      expect(result).toEqual(MOCK_GRANT);
    });
  });
  describe('getGrant', () => {
    it('delegates to GrantsService with provided ID', async () => {
      const result = await controller.getGrant(GRANT_ID);
      expect(grantsService.getGrant).toHaveBeenCalledWith(GRANT_ID);
      expect(result).toEqual(MOCK_GRANT);
    });
  });
  describe('revokeGrant', () => {
    it('delegates to GrantsService with id and reason', async () => {
      const result = await controller.revokeGrant(GRANT_ID, { reason: 'test reason' });
      expect(grantsService.revokeGrant).toHaveBeenCalledWith(GRANT_ID, 'test reason');
      expect(result).toMatchObject({ status: 'revoked' });
    });
    it('delegates without reason when omitted', async () => {
      await controller.revokeGrant(GRANT_ID, {});
      expect(grantsService.revokeGrant).toHaveBeenCalledWith(GRANT_ID, undefined);
    });
  });
  describe('generateToken', () => {
    it('returns enrollmentUrl containing the token', async () => {
      const token = 'abc123def456abc123def456abc123def456abc123def456abc123def456ab12';
      vi.mocked(enrollmentService.createToken).mockResolvedValueOnce({
        token,
        expiresAt: '2026-01-01T00:15:00.000Z',
      });
      const result = await controller.generateToken(GRANT_ID, { ttlSeconds: 900 });
      expect(result.token).toBe(token);
      expect(result.enrollmentUrl).toContain(token);
      expect(result.enrollmentUrl).toContain('/api/federation/enrollment/');
    });
    it('creates token via EnrollmentService with correct grantId and peerId', async () => {
      await controller.generateToken(GRANT_ID, { ttlSeconds: 300 });
      expect(enrollmentService.createToken).toHaveBeenCalledWith({
        grantId: GRANT_ID,
        peerId: PEER_ID,
        ttlSeconds: 300,
      });
    });
    it('throws NotFoundException when grant does not exist', async () => {
      vi.mocked(grantsService.getGrant).mockRejectedValueOnce(
        new NotFoundException(`Grant ${GRANT_ID} not found`),
      );
      await expect(controller.generateToken(GRANT_ID, { ttlSeconds: 900 })).rejects.toThrow(
        NotFoundException,
      );
    });
  });
  // ─── Peer management ───────────────────────────────────────────────────
  describe('listPeers', () => {
    it('returns DB rows ordered by commonName', async () => {
      const result = await controller.listPeers();
      expect(db.select).toHaveBeenCalled();
      // The DB mock resolves with [MOCK_PEER]
      expect(result).toEqual([MOCK_PEER]);
    });
  });
 });
--- a/apps/gateway/src/federation/tests/grants.service.spec.ts
+++ b/apps/gateway/src/federation/tests/grants.service.spec.ts
@@ -0,0 +1,351 @@
 /**
 * Unit tests for GrantsService — federation grants CRUD + status transitions (FED-M2-06).
 *
 * Coverage:
 *  - createGrant: validates scope via parseFederationScope
 *  - createGrant: inserts with status 'pending'
 *  - getGrant: returns grant when found
 *  - getGrant: throws NotFoundException when not found
 *  - listGrants: no filters returns all grants
 *  - listGrants: filters by peerId
 *  - listGrants: filters by subjectUserId
 *  - listGrants: filters by status
 *  - listGrants: multiple filters combined
 *  - activateGrant: pending → active works
 *  - activateGrant: non-pending throws ConflictException
 *  - revokeGrant: active → revoked works, sets revokedAt
 *  - revokeGrant: non-active throws ConflictException
 *  - expireGrant: active → expired works
 *  - expireGrant: non-active throws ConflictException
 */
 import 'reflect-metadata';
 import { describe, it, expect, vi, beforeEach } from 'vitest';
 import { ConflictException, NotFoundException } from '@nestjs/common';
 import type { Db } from '@mosaicstack/db';
 import { GrantsService } from '../grants.service.js';
 import { FederationScopeError } from '../scope-schema.js';
 // ---------------------------------------------------------------------------
 // Minimal valid federation scope for testing
 // ---------------------------------------------------------------------------
 const VALID_SCOPE = {
  resources: ['tasks'] as const,
  excluded_resources: [],
  max_rows_per_query: 100,
 };
 const PEER_ID = 'a1111111-1111-1111-1111-111111111111';
 const USER_ID = 'u2222222-2222-2222-2222-222222222222';
 const GRANT_ID = 'g3333333-3333-3333-3333-333333333333';
 // ---------------------------------------------------------------------------
 // Build a mock DB that mimics chained Drizzle query builder calls
 // ---------------------------------------------------------------------------
 function makeMockGrant(overrides: Partial<Record<string, unknown>> = {}) {
  return {
    id: GRANT_ID,
    peerId: PEER_ID,
    subjectUserId: USER_ID,
    scope: VALID_SCOPE,
    status: 'pending',
    expiresAt: null,
    createdAt: new Date('2026-01-01T00:00:00Z'),
    revokedAt: null,
    revokedReason: null,
    ...overrides,
  };
 }
 function makeDb(
  overrides: {
    insertReturning?: unknown[];
    selectRows?: unknown[];
    updateReturning?: unknown[];
  } = {},
 ) {
  const insertReturning = overrides.insertReturning ?? [makeMockGrant()];
  const selectRows = overrides.selectRows ?? [makeMockGrant()];
  const updateReturning = overrides.updateReturning ?? [makeMockGrant({ status: 'active' })];
  // Drizzle returns a chainable builder; we need to mock the full chain.
  const returningInsert = vi.fn().mockResolvedValue(insertReturning);
  const valuesInsert = vi.fn().mockReturnValue({ returning: returningInsert });
  const insertMock = vi.fn().mockReturnValue({ values: valuesInsert });
  // select().from().where().limit()
  const limitSelect = vi.fn().mockResolvedValue(selectRows);
  const whereSelect = vi.fn().mockReturnValue({ limit: limitSelect });
  // from returns something that is both thenable (for full-table select) and has .where()
  const fromSelect = vi.fn().mockReturnValue({
    where: whereSelect,
    limit: limitSelect,
    // Make it thenable for listGrants with no filters (await db.select().from(federationGrants))
    then: (resolve: (v: unknown) => unknown) => resolve(selectRows),
  });
  const selectMock = vi.fn().mockReturnValue({ from: fromSelect });
  const returningUpdate = vi.fn().mockResolvedValue(updateReturning);
  const whereUpdate = vi.fn().mockReturnValue({ returning: returningUpdate });
  const setMock = vi.fn().mockReturnValue({ where: whereUpdate });
  const updateMock = vi.fn().mockReturnValue({ set: setMock });
  return {
    insert: insertMock,
    select: selectMock,
    update: updateMock,
    // Expose internals for assertions
    _mocks: {
      insertReturning,
      valuesInsert,
      insertMock,
      limitSelect,
      whereSelect,
      fromSelect,
      selectMock,
      returningUpdate,
      whereUpdate,
      setMock,
      updateMock,
    },
  };
 }
 // ---------------------------------------------------------------------------
 // Tests
 // ---------------------------------------------------------------------------
 describe('GrantsService', () => {
  let db: ReturnType<typeof makeDb>;
  let service: GrantsService;
  beforeEach(() => {
    db = makeDb();
    service = new GrantsService(db as unknown as Db);
  });
  // ─── createGrant ──────────────────────────────────────────────────────────
  describe('createGrant', () => {
    it('calls parseFederationScope — rejects an invalid scope', async () => {
      const invalidScope = { resources: [], max_rows_per_query: 0 };
      await expect(
        service.createGrant({ peerId: PEER_ID, subjectUserId: USER_ID, scope: invalidScope }),
      ).rejects.toBeInstanceOf(FederationScopeError);
    });
    it('inserts a grant with status pending and returns it', async () => {
      const result = await service.createGrant({
        peerId: PEER_ID,
        subjectUserId: USER_ID,
        scope: VALID_SCOPE,
      });
      expect(db._mocks.valuesInsert).toHaveBeenCalledWith(
        expect.objectContaining({ status: 'pending', peerId: PEER_ID, subjectUserId: USER_ID }),
      );
      expect(result.status).toBe('pending');
    });
    it('passes expiresAt as a Date when provided', async () => {
      await service.createGrant({
        peerId: PEER_ID,
        subjectUserId: USER_ID,
        scope: VALID_SCOPE,
        expiresAt: '2027-01-01T00:00:00Z',
      });
      expect(db._mocks.valuesInsert).toHaveBeenCalledWith(
        expect.objectContaining({ expiresAt: expect.any(Date) }),
      );
    });
    it('sets expiresAt to null when not provided', async () => {
      await service.createGrant({ peerId: PEER_ID, subjectUserId: USER_ID, scope: VALID_SCOPE });
      expect(db._mocks.valuesInsert).toHaveBeenCalledWith(
        expect.objectContaining({ expiresAt: null }),
      );
    });
  });
  // ─── getGrant ─────────────────────────────────────────────────────────────
  describe('getGrant', () => {
    it('returns the grant when found', async () => {
      const result = await service.getGrant(GRANT_ID);
      expect(result.id).toBe(GRANT_ID);
    });
    it('throws NotFoundException when no rows returned', async () => {
      db = makeDb({ selectRows: [] });
      service = new GrantsService(db as unknown as Db);
      await expect(service.getGrant(GRANT_ID)).rejects.toBeInstanceOf(NotFoundException);
    });
  });
  // ─── listGrants ───────────────────────────────────────────────────────────
  describe('listGrants', () => {
    it('queries without where clause when no filters provided', async () => {
      const result = await service.listGrants({});
      expect(Array.isArray(result)).toBe(true);
    });
    it('applies peerId filter', async () => {
      await service.listGrants({ peerId: PEER_ID });
      expect(db._mocks.whereSelect).toHaveBeenCalled();
    });
    it('applies subjectUserId filter', async () => {
      await service.listGrants({ subjectUserId: USER_ID });
      expect(db._mocks.whereSelect).toHaveBeenCalled();
    });
    it('applies status filter', async () => {
      await service.listGrants({ status: 'active' });
      expect(db._mocks.whereSelect).toHaveBeenCalled();
    });
    it('applies multiple filters combined', async () => {
      await service.listGrants({ peerId: PEER_ID, status: 'pending' });
      expect(db._mocks.whereSelect).toHaveBeenCalled();
    });
  });
  // ─── activateGrant ────────────────────────────────────────────────────────
  describe('activateGrant', () => {
    it('transitions pending → active and returns updated grant', async () => {
      db = makeDb({
        selectRows: [makeMockGrant({ status: 'pending' })],
        updateReturning: [makeMockGrant({ status: 'active' })],
      });
      service = new GrantsService(db as unknown as Db);
      const result = await service.activateGrant(GRANT_ID);
      expect(db._mocks.setMock).toHaveBeenCalledWith({ status: 'active' });
      expect(result.status).toBe('active');
    });
    it('throws ConflictException when grant is already active', async () => {
      db = makeDb({ selectRows: [makeMockGrant({ status: 'active' })] });
      service = new GrantsService(db as unknown as Db);
      await expect(service.activateGrant(GRANT_ID)).rejects.toBeInstanceOf(ConflictException);
    });
    it('throws ConflictException when grant is revoked', async () => {
      db = makeDb({ selectRows: [makeMockGrant({ status: 'revoked' })] });
      service = new GrantsService(db as unknown as Db);
      await expect(service.activateGrant(GRANT_ID)).rejects.toBeInstanceOf(ConflictException);
    });
    it('throws ConflictException when grant is expired', async () => {
      db = makeDb({ selectRows: [makeMockGrant({ status: 'expired' })] });
      service = new GrantsService(db as unknown as Db);
      await expect(service.activateGrant(GRANT_ID)).rejects.toBeInstanceOf(ConflictException);
    });
  });
  // ─── revokeGrant ──────────────────────────────────────────────────────────
  describe('revokeGrant', () => {
    it('transitions active → revoked and sets revokedAt', async () => {
      const revokedAt = new Date();
      db = makeDb({
        selectRows: [makeMockGrant({ status: 'active' })],
        updateReturning: [makeMockGrant({ status: 'revoked', revokedAt })],
      });
      service = new GrantsService(db as unknown as Db);
      const result = await service.revokeGrant(GRANT_ID, 'test reason');
      expect(db._mocks.setMock).toHaveBeenCalledWith(
        expect.objectContaining({
          status: 'revoked',
          revokedAt: expect.any(Date),
          revokedReason: 'test reason',
        }),
      );
      expect(result.status).toBe('revoked');
    });
    it('sets revokedReason to null when not provided', async () => {
      db = makeDb({
        selectRows: [makeMockGrant({ status: 'active' })],
        updateReturning: [makeMockGrant({ status: 'revoked', revokedAt: new Date() })],
      });
      service = new GrantsService(db as unknown as Db);
      await service.revokeGrant(GRANT_ID);
      expect(db._mocks.setMock).toHaveBeenCalledWith(
        expect.objectContaining({ revokedReason: null }),
      );
    });
    it('throws ConflictException when grant is pending', async () => {
      db = makeDb({ selectRows: [makeMockGrant({ status: 'pending' })] });
      service = new GrantsService(db as unknown as Db);
      await expect(service.revokeGrant(GRANT_ID)).rejects.toBeInstanceOf(ConflictException);
    });
    it('throws ConflictException when grant is already revoked', async () => {
      db = makeDb({ selectRows: [makeMockGrant({ status: 'revoked' })] });
      service = new GrantsService(db as unknown as Db);
      await expect(service.revokeGrant(GRANT_ID)).rejects.toBeInstanceOf(ConflictException);
    });
    it('throws ConflictException when grant is expired', async () => {
      db = makeDb({ selectRows: [makeMockGrant({ status: 'expired' })] });
      service = new GrantsService(db as unknown as Db);
      await expect(service.revokeGrant(GRANT_ID)).rejects.toBeInstanceOf(ConflictException);
    });
  });
  // ─── expireGrant ──────────────────────────────────────────────────────────
  describe('expireGrant', () => {
    it('transitions active → expired and returns updated grant', async () => {
      db = makeDb({
        selectRows: [makeMockGrant({ status: 'active' })],
        updateReturning: [makeMockGrant({ status: 'expired' })],
      });
      service = new GrantsService(db as unknown as Db);
      const result = await service.expireGrant(GRANT_ID);
      expect(db._mocks.setMock).toHaveBeenCalledWith({ status: 'expired' });
      expect(result.status).toBe('expired');
    });
    it('throws ConflictException when grant is pending', async () => {
      db = makeDb({ selectRows: [makeMockGrant({ status: 'pending' })] });
      service = new GrantsService(db as unknown as Db);
      await expect(service.expireGrant(GRANT_ID)).rejects.toBeInstanceOf(ConflictException);
    });
    it('throws ConflictException when grant is already expired', async () => {
      db = makeDb({ selectRows: [makeMockGrant({ status: 'expired' })] });
      service = new GrantsService(db as unknown as Db);
      await expect(service.expireGrant(GRANT_ID)).rejects.toBeInstanceOf(ConflictException);
    });
    it('throws ConflictException when grant is revoked', async () => {
      db = makeDb({ selectRows: [makeMockGrant({ status: 'revoked' })] });
      service = new GrantsService(db as unknown as Db);
      await expect(service.expireGrant(GRANT_ID)).rejects.toBeInstanceOf(ConflictException);
    });
  });
 });
--- a/apps/gateway/src/federation/tests/helpers/test-cert.ts
+++ b/apps/gateway/src/federation/tests/helpers/test-cert.ts
@@ -0,0 +1,138 @@
 /**
 * Test helpers for generating real X.509 PEM certificates in unit tests.
 *
 * PR #501 (FED-M2-11) introduced strict `new X509Certificate(certPem)` parsing
 * in both EnrollmentService.extractCertNotAfter and CaService.issueCert — dummy
 * cert strings now throw `error:0680007B:asn1 encoding routines::header too long`.
 *
 * These helpers produce minimal but cryptographically valid self-signed EC P-256
 * certificates via @peculiar/x509 + Node.js webcrypto, suitable for test mocks.
 *
 * Two variants:
 *  - makeSelfSignedCert()          Plain cert — satisfies node:crypto X509Certificate parse.
 *  - makeMosaicIssuedCert(opts)    Cert with custom Mosaic OID extensions — satisfies the
 *                                  CRIT-1 OID presence + value checks in CaService.issueCert.
 */
 import { webcrypto } from 'node:crypto';
 import {
  X509CertificateGenerator,
  Extension,
  KeyUsagesExtension,
  KeyUsageFlags,
  BasicConstraintsExtension,
  cryptoProvider,
 } from '@peculiar/x509';
 // ---------------------------------------------------------------------------
 // Internal helpers
 // ---------------------------------------------------------------------------
 /**
 * Encode a string as an ASN.1 UTF8String TLV:
 *   0x0C (tag) + 1-byte length (for strings ≤ 127 bytes) + UTF-8 bytes.
 *
 * CaService.issueCert reads the extension value as:
 *   decoder.decode(grantIdExt.value.slice(2))
 * i.e. it skips the tag + length byte and decodes the remainder as UTF-8.
 * So we must produce exactly this encoding as the OCTET STRING content.
 */
 function encodeUtf8String(value: string): Uint8Array {
  const utf8 = new TextEncoder().encode(value);
  if (utf8.length > 127) {
    throw new Error('encodeUtf8String: value too long for single-byte length encoding');
  }
  const buf = new Uint8Array(2 + utf8.length);
  buf[0] = 0x0c; // ASN.1 UTF8String tag
  buf[1] = utf8.length;
  buf.set(utf8, 2);
  return buf;
 }
 // ---------------------------------------------------------------------------
 // Mosaic OID constants (must match production CaService)
 // ---------------------------------------------------------------------------
 const OID_MOSAIC_GRANT_ID = '1.3.6.1.4.1.99999.1';
 const OID_MOSAIC_SUBJECT_USER_ID = '1.3.6.1.4.1.99999.2';
 // ---------------------------------------------------------------------------
 // Public API
 // ---------------------------------------------------------------------------
 /**
 * Generate a minimal self-signed EC P-256 certificate valid for 1 day.
 * CN=harness-test, no custom extensions.
 *
 * Suitable for:
 *  - EnrollmentService.extractCertNotAfter (just needs parseable PEM)
 *  - Any mock that returns certPem / certChainPem without OID checks
 */
 export async function makeSelfSignedCert(): Promise<string> {
  // Ensure @peculiar/x509 uses Node.js webcrypto (available as globalThis.crypto in Node 19+,
  // but we set it explicitly here to be safe on all Node 18+ versions).
  cryptoProvider.set(webcrypto as unknown as Parameters<typeof cryptoProvider.set>[0]);
  const alg = { name: 'ECDSA', namedCurve: 'P-256', hash: 'SHA-256' } as const;
  const keys = await webcrypto.subtle.generateKey(alg, false, ['sign', 'verify']);
  const now = new Date();
  const tomorrow = new Date(now.getTime() + 86_400_000);
  const cert = await X509CertificateGenerator.createSelfSigned({
    serialNumber: '01',
    name: 'CN=harness-test',
    notBefore: now,
    notAfter: tomorrow,
    signingAlgorithm: alg,
    keys,
    extensions: [
      new BasicConstraintsExtension(false),
      new KeyUsagesExtension(KeyUsageFlags.digitalSignature),
    ],
  });
  return cert.toString('pem');
 }
 /**
 * Generate a self-signed EC P-256 certificate that contains the two custom
 * Mosaic OID extensions required by CaService.issueCert's CRIT-1 check:
 *   OID 1.3.6.1.4.1.99999.1  → mosaic_grant_id   (value = grantId)
 *   OID 1.3.6.1.4.1.99999.2  → mosaic_subject_user_id (value = subjectUserId)
 *
 * The extension value encoding matches the production parser's `.slice(2)` assumption:
 * each extension value is an OCTET STRING wrapping an ASN.1 UTF8String TLV.
 */
 export async function makeMosaicIssuedCert(opts: {
  grantId: string;
  subjectUserId: string;
 }): Promise<string> {
  // Ensure @peculiar/x509 uses Node.js webcrypto.
  cryptoProvider.set(webcrypto as unknown as Parameters<typeof cryptoProvider.set>[0]);
  const alg = { name: 'ECDSA', namedCurve: 'P-256', hash: 'SHA-256' } as const;
  const keys = await webcrypto.subtle.generateKey(alg, false, ['sign', 'verify']);
  const now = new Date();
  const tomorrow = new Date(now.getTime() + 86_400_000);
  const cert = await X509CertificateGenerator.createSelfSigned({
    serialNumber: '01',
    name: 'CN=mosaic-issued-test',
    notBefore: now,
    notAfter: tomorrow,
    signingAlgorithm: alg,
    keys,
    extensions: [
      new BasicConstraintsExtension(false),
      new KeyUsagesExtension(KeyUsageFlags.digitalSignature),
      // mosaic_grant_id — OID 1.3.6.1.4.1.99999.1
      new Extension(OID_MOSAIC_GRANT_ID, false, encodeUtf8String(opts.grantId)),
      // mosaic_subject_user_id — OID 1.3.6.1.4.1.99999.2
      new Extension(OID_MOSAIC_SUBJECT_USER_ID, false, encodeUtf8String(opts.subjectUserId)),
    ],
  });
  return cert.toString('pem');
 }
--- a/apps/gateway/src/federation/tests/peer-key.spec.ts
+++ b/apps/gateway/src/federation/tests/peer-key.spec.ts
@@ -0,0 +1,63 @@
 import { describe, it, expect, beforeEach, afterEach } from 'vitest';
 import { sealClientKey, unsealClientKey } from '../peer-key.util.js';
 const TEST_SECRET = 'test-secret-for-peer-key-unit-tests-only';
 const TEST_PEM = `-----BEGIN PRIVATE KEY-----
 MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQC7o4qne60TB3wo
 pCOW8QqstpxEBpnFo37JxLYEJbpE3gUlJajsHv9UWRQ7m5B7n+MBXwTCQqMEY8Wl
 kHv9tGgz1YGwzBjNKxPJXE6pPTXQ1Oa0VB9l3qHdqF5HtZoJzE0c6dO8HJ5YUVL
 -----END PRIVATE KEY-----`;
 let savedSecret: string | undefined;
 beforeEach(() => {
  savedSecret = process.env['BETTER_AUTH_SECRET'];
  process.env['BETTER_AUTH_SECRET'] = TEST_SECRET;
 });
 afterEach(() => {
  if (savedSecret === undefined) {
    delete process.env['BETTER_AUTH_SECRET'];
  } else {
    process.env['BETTER_AUTH_SECRET'] = savedSecret;
  }
 });
 describe('peer-key seal/unseal', () => {
  it('round-trip: unsealClientKey(sealClientKey(pem)) returns original pem', () => {
    const sealed = sealClientKey(TEST_PEM);
    const roundTripped = unsealClientKey(sealed);
    expect(roundTripped).toBe(TEST_PEM);
  });
  it('non-determinism: sealClientKey produces different ciphertext each call', () => {
    const sealed1 = sealClientKey(TEST_PEM);
    const sealed2 = sealClientKey(TEST_PEM);
    expect(sealed1).not.toBe(sealed2);
  });
  it('at-rest: sealed output does not contain plaintext PEM content', () => {
    const sealed = sealClientKey(TEST_PEM);
    expect(sealed).not.toContain('PRIVATE KEY');
    expect(sealed).not.toContain(
      'MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQC7o4qne60TB3wo',
    );
  });
  it('tamper: flipping a byte in the sealed payload causes unseal to throw', () => {
    const sealed = sealClientKey(TEST_PEM);
    const buf = Buffer.from(sealed, 'base64');
    // Flip a byte in the middle of the buffer (past IV and authTag)
    const midpoint = Math.floor(buf.length / 2);
    buf[midpoint] = buf[midpoint]! ^ 0xff;
    const tampered = buf.toString('base64');
    expect(() => unsealClientKey(tampered)).toThrow();
  });
  it('missing secret: unsealClientKey throws when BETTER_AUTH_SECRET is unset', () => {
    const sealed = sealClientKey(TEST_PEM);
    delete process.env['BETTER_AUTH_SECRET'];
    expect(() => unsealClientKey(sealed)).toThrow('BETTER_AUTH_SECRET is not set');
  });
 });
--- a/apps/gateway/src/federation/ca.dto.ts
+++ b/apps/gateway/src/federation/ca.dto.ts
@@ -0,0 +1,57 @@
 /**
 * DTOs for the Step-CA client service (FED-M2-04).
 *
 * IssueCertRequestDto  — input to CaService.issueCert()
 * IssuedCertDto        — output from CaService.issueCert()
 */
 import { IsInt, IsNotEmpty, IsOptional, IsString, IsUUID, Max, Min } from 'class-validator';
 export class IssueCertRequestDto {
  /**
   * PEM-encoded PKCS#10 Certificate Signing Request.
   * The CSR must already include the desired SANs.
   */
  @IsString()
  @IsNotEmpty()
  csrPem!: string;
  /**
   * UUID of the federation_grants row this certificate is being issued for.
   * Embedded as the `mosaic_grant_id` custom OID extension.
   */
  @IsUUID()
  grantId!: string;
  /**
   * UUID of the local user on whose behalf the cert is being issued.
   * Embedded as the `mosaic_subject_user_id` custom OID extension.
   */
  @IsUUID()
  subjectUserId!: string;
  /**
   * Requested certificate validity in seconds.
   * Hard cap: 900 s (15 minutes). Default: 300 s (5 minutes).
   * The service will always clamp to 900 s regardless of this value.
   */
  @IsOptional()
  @IsInt()
  @Min(60)
  @Max(15 * 60)
  ttlSeconds: number = 300;
 }
 export class IssuedCertDto {
  /** PEM-encoded leaf certificate returned by step-ca. */
  certPem!: string;
  /**
   * PEM-encoded full certificate chain (leaf + intermediates + root).
   * Falls back to `certPem` when step-ca returns no `certChain` field.
   */
  certChainPem!: string;
  /** Decimal serial number string of the issued certificate. */
  serialNumber!: string;
 }
--- a/apps/gateway/src/federation/ca.service.spec.ts
+++ b/apps/gateway/src/federation/ca.service.spec.ts
@@ -0,0 +1,592 @@
 /**
 * Unit tests for CaService — Step-CA client (FED-M2-04).
 *
 * Coverage:
 *  - Happy path: returns IssuedCertDto with certPem, certChainPem, serialNumber
 *  - certChainPem fallback: falls back to certPem when certChain absent
 *  - certChainPem from ca field: uses crt+ca when certChain absent but ca present
 *  - HTTP 401: throws CaServiceError with cause + remediation
 *  - HTTP non-401 error: throws CaServiceError
 *  - Malformed CSR: throws before HTTP call (INVALID_CSR)
 *  - Non-JSON response: throws CaServiceError
 *  - HTTPS connection error: throws CaServiceError
 *  - JWT custom claims: mosaic_grant_id and mosaic_subject_user_id present in OTT payload
 *    verified with jose.jwtVerify (real signature check)
 *  - CaServiceError: has cause + remediation properties
 *  - Missing crt in response: throws CaServiceError
 *  - Real CSR validation: valid P-256 CSR passes; malformed CSR fails with INVALID_CSR
 *  - provisionerPassword never appears in CaServiceError messages
 *  - HTTPS-only enforcement: http:// URL throws in constructor
 */
 import 'reflect-metadata';
 import { describe, it, expect, vi, beforeEach, beforeAll, type Mock } from 'vitest';
 import { jwtVerify, exportJWK, generateKeyPair } from 'jose';
 import { Pkcs10CertificateRequestGenerator } from '@peculiar/x509';
 import { makeMosaicIssuedCert } from './__tests__/helpers/test-cert.js';
 // ---------------------------------------------------------------------------
 // Mock node:https BEFORE importing CaService so the mock is in place when
 // the module is loaded. Vitest/ESM require vi.mock at the top level.
 // ---------------------------------------------------------------------------
 vi.mock('node:https', () => {
  const mockRequest = vi.fn();
  const mockAgent = vi.fn().mockImplementation(() => ({}));
  return {
    default: { request: mockRequest, Agent: mockAgent },
    request: mockRequest,
    Agent: mockAgent,
  };
 });
 vi.mock('node:fs', () => {
  const mockReadFileSync = vi
    .fn()
    .mockReturnValue('-----BEGIN CERTIFICATE-----\nFAKEROOT\n-----END CERTIFICATE-----\n');
  return {
    default: { readFileSync: mockReadFileSync },
    readFileSync: mockReadFileSync,
  };
 });
 // ---------------------------------------------------------------------------
 // Helpers
 // ---------------------------------------------------------------------------
 // Real self-signed EC P-256 certificate generated with openssl for testing.
 // openssl req -x509 -newkey ec -pkeyopt ec_paramgen_curve:P-256 -nodes -keyout /dev/null \
 //   -out /dev/stdout -subj "/CN=test" -days 1
 const FAKE_CERT_PEM = `-----BEGIN CERTIFICATE-----
 MIIBdDCCARmgAwIBAgIUM+iUJSayN+PwXkyVN6qwSY7sr6gwCgYIKoZIzj0EAwIw
 DzENMAsGA1UEAwwEdGVzdDAeFw0yNjA0MjIwMzE5MTlaFw0yNjA0MjMwMzE5MTla
 MA8xDTALBgNVBAMMBHRlc3QwWTATBgcqhkjOPQIBBggqhkjOPQMBBwNCAAR21kHL
 n1GmFQ4TEBw3EA53pD+2McIBf5WcoHE+x0eMz5DpRKJe0ksHwOVN5Yev5d57kb+4
 MvG1LhbHCB/uQo8So1MwUTAdBgNVHQ4EFgQUPq0pdIGiQ7pLBRXICS8GTliCrLsw
 HwYDVR0jBBgwFoAUPq0pdIGiQ7pLBRXICS8GTliCrLswDwYDVR0TAQH/BAUwAwEB
 /zAKBggqhkjOPQQDAgNJADBGAiEAypJqyC6S77aQ3eEXokM6sgAsD7Oa3tJbCbVm
 zG3uJb0CIQC1w+GE+Ad0OTR5Quja46R1RjOo8ydpzZ7Fh4rouAiwEw==
 -----END CERTIFICATE-----
 `;
 // Use a second copy of the same cert for the CA field in tests.
 const FAKE_CA_PEM = FAKE_CERT_PEM;
 const GRANT_ID = 'a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11';
 const SUBJECT_USER_ID = 'b1ffcd00-0d1c-5f09-cc7e-7cc0ce491b22';
 // Real self-signed cert containing both Mosaic OID extensions — populated in beforeAll.
 // Required because CaService.issueCert performs CRIT-1 OID presence/value checks on the
 // response cert (PR #501 — strict parsing, no silent fallback).
 let realIssuedCertPem: string;
 // ---------------------------------------------------------------------------
 // Generate a real EC P-256 key pair and CSR for integration-style tests
 // ---------------------------------------------------------------------------
 // We generate this once at module level so it's available to all tests.
 // The key pair and CSR PEM are populated asynchronously in the test that needs them.
 let realCsrPem: string;
 async function generateRealCsr(): Promise<string> {
  const { privateKey, publicKey } = await generateKeyPair('ES256');
  // Export public key JWK for potential verification (not used here but confirms key is exportable)
  await exportJWK(publicKey);
  // Use @peculiar/x509 to build a proper CSR
  const csr = await Pkcs10CertificateRequestGenerator.create({
    name: 'CN=test.federation.local',
    signingAlgorithm: { name: 'ECDSA', hash: 'SHA-256' },
    keys: { privateKey, publicKey },
  });
  return csr.toString('pem');
 }
 // ---------------------------------------------------------------------------
 // Setup env before importing service
 // We use an EC P-256 key pair here so the JWK-based signing works.
 // The key pair is generated once and stored in module-level vars.
 // ---------------------------------------------------------------------------
 // Real EC P-256 test JWK (test-only, never used in production).
 // Generated with node webcrypto for use in unit tests.
 const TEST_EC_PRIVATE_JWK = {
  key_ops: ['sign'],
  ext: true,
  kty: 'EC',
  x: 'Xq2RjZctcPcUMU14qfjs3MtZTmFk8z1lFGQyypgXZOU',
  y: 't8w9Cbt4RVmR47Wnb_i5cLwefEnMcvwse049zu9Rl_E',
  crv: 'P-256',
  d: 'TM6N79w1HE-PiML5Td4mbXfJaLHEaZrVyVrrwlJv7q8',
  kid: 'test-ec-kid',
 };
 const TEST_EC_PUBLIC_JWK = {
  key_ops: ['verify'],
  ext: true,
  kty: 'EC',
  x: 'Xq2RjZctcPcUMU14qfjs3MtZTmFk8z1lFGQyypgXZOU',
  y: 't8w9Cbt4RVmR47Wnb_i5cLwefEnMcvwse049zu9Rl_E',
  crv: 'P-256',
  kid: 'test-ec-kid',
 };
 process.env['STEP_CA_URL'] = 'https://step-ca:9000';
 process.env['STEP_CA_PROVISIONER_KEY_JSON'] = JSON.stringify(TEST_EC_PRIVATE_JWK);
 process.env['STEP_CA_ROOT_CERT_PATH'] = '/fake/root.pem';
 // Import AFTER env is set and mocks are registered
 import * as httpsModule from 'node:https';
 import { CaService, CaServiceError } from './ca.service.js';
 import type { IssueCertRequestDto } from './ca.dto.js';
 // ---------------------------------------------------------------------------
 // Helper to build a mock https.request that simulates step-ca
 // ---------------------------------------------------------------------------
 function makeHttpsMock(statusCode: number, body: unknown, errorMsg?: string): void {
  const mockReq = {
    write: vi.fn(),
    end: vi.fn(),
    on: vi.fn(),
    setTimeout: vi.fn(),
  };
  (httpsModule.request as unknown as Mock).mockImplementation(
    (
      _options: unknown,
      callback: (res: {
        statusCode: number;
        on: (event: string, cb: (chunk?: Buffer) => void) => void;
      }) => void,
    ) => {
      const mockRes = {
        statusCode,
        on: (event: string, cb: (chunk?: Buffer) => void) => {
          if (event === 'data') {
            if (body !== undefined) {
              cb(Buffer.from(typeof body === 'string' ? body : JSON.stringify(body)));
            }
          }
          if (event === 'end') {
            cb();
          }
        },
      };
      if (errorMsg) {
        // Simulate a connection error via the req.on('error') handler
        mockReq.on.mockImplementation((event: string, cb: (err: Error) => void) => {
          if (event === 'error') {
            setImmediate(() => cb(new Error(errorMsg)));
          }
        });
      } else {
        // Normal flow: call the response callback
        setImmediate(() => callback(mockRes));
      }
      return mockReq;
    },
  );
 }
 // ---------------------------------------------------------------------------
 // Tests
 // ---------------------------------------------------------------------------
 describe('CaService', () => {
  let service: CaService;
  beforeAll(async () => {
    // Generate a cert with the two Mosaic OIDs so that CaService.issueCert's
    // CRIT-1 OID checks pass when mock step-ca returns it as `crt`.
    realIssuedCertPem = await makeMosaicIssuedCert({
      grantId: GRANT_ID,
      subjectUserId: SUBJECT_USER_ID,
    });
  });
  beforeEach(() => {
    vi.clearAllMocks();
    service = new CaService();
  });
  function makeReq(overrides: Partial<IssueCertRequestDto> = {}): IssueCertRequestDto {
    // Use a real CSR if available; fall back to a minimal placeholder
    const defaultCsr = realCsrPem ?? makeFakeCsr();
    return {
      csrPem: defaultCsr,
      grantId: GRANT_ID,
      subjectUserId: SUBJECT_USER_ID,
      ttlSeconds: 300,
      ...overrides,
    };
  }
  function makeFakeCsr(): string {
    // A structurally valid-looking CSR header/footer (body will fail crypto verify)
    return `-----BEGIN CERTIFICATE REQUEST-----\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA0000000000000000AAAA\n-----END CERTIFICATE REQUEST-----\n`;
  }
  // -------------------------------------------------------------------------
  // Real CSR generation — runs once and populates realCsrPem
  // -------------------------------------------------------------------------
  it('generates a real P-256 CSR that passes validateCsr', async () => {
    realCsrPem = await generateRealCsr();
    expect(realCsrPem).toMatch(/BEGIN CERTIFICATE REQUEST/);
    // Now test that the service's validateCsr accepts it.
    // We call it indirectly via issueCert with a successful mock.
    makeHttpsMock(200, { crt: realIssuedCertPem, certChain: [realIssuedCertPem, FAKE_CA_PEM] });
    const result = await service.issueCert(makeReq({ csrPem: realCsrPem }));
    expect(result.certPem).toBe(realIssuedCertPem);
  });
  it('throws INVALID_CSR for a malformed PEM-shaped CSR', async () => {
    const malformedCsr =
      '-----BEGIN CERTIFICATE REQUEST-----\nTm90QVJlYWxDU1I=\n-----END CERTIFICATE REQUEST-----\n';
    await expect(service.issueCert(makeReq({ csrPem: malformedCsr }))).rejects.toSatisfy(
      (err: unknown) => {
        if (!(err instanceof CaServiceError)) return false;
        expect(err.code).toBe('INVALID_CSR');
        return true;
      },
    );
  });
  // -------------------------------------------------------------------------
  // Happy path
  // -------------------------------------------------------------------------
  it('returns IssuedCertDto on success (certChain present)', async () => {
    if (!realCsrPem) realCsrPem = await generateRealCsr();
    makeHttpsMock(200, {
      crt: realIssuedCertPem,
      certChain: [realIssuedCertPem, FAKE_CA_PEM],
    });
    const result = await service.issueCert(makeReq());
    expect(result.certPem).toBe(realIssuedCertPem);
    expect(result.certChainPem).toContain(realIssuedCertPem);
    expect(result.certChainPem).toContain(FAKE_CA_PEM);
    expect(typeof result.serialNumber).toBe('string');
  });
  // -------------------------------------------------------------------------
  // certChainPem fallback — certChain absent, ca field present
  // -------------------------------------------------------------------------
  it('builds certChainPem from crt+ca when certChain is absent', async () => {
    if (!realCsrPem) realCsrPem = await generateRealCsr();
    makeHttpsMock(200, {
      crt: realIssuedCertPem,
      ca: FAKE_CA_PEM,
    });
    const result = await service.issueCert(makeReq());
    expect(result.certPem).toBe(realIssuedCertPem);
    expect(result.certChainPem).toContain(realIssuedCertPem);
    expect(result.certChainPem).toContain(FAKE_CA_PEM);
  });
  // -------------------------------------------------------------------------
  // certChainPem fallback — no certChain, no ca field
  // -------------------------------------------------------------------------
  it('falls back to certPem alone when certChain and ca are absent', async () => {
    if (!realCsrPem) realCsrPem = await generateRealCsr();
    makeHttpsMock(200, { crt: realIssuedCertPem });
    const result = await service.issueCert(makeReq());
    expect(result.certPem).toBe(realIssuedCertPem);
    expect(result.certChainPem).toBe(realIssuedCertPem);
  });
  // -------------------------------------------------------------------------
  // HTTP 401
  // -------------------------------------------------------------------------
  it('throws CaServiceError on HTTP 401', async () => {
    if (!realCsrPem) realCsrPem = await generateRealCsr();
    makeHttpsMock(401, { message: 'Unauthorized' });
    await expect(service.issueCert(makeReq())).rejects.toSatisfy((err: unknown) => {
      if (!(err instanceof CaServiceError)) return false;
      expect(err.message).toMatch(/401/);
      expect(err.remediation).toBeTruthy();
      return true;
    });
  });
  // -------------------------------------------------------------------------
  // HTTP non-401 error (e.g. 422)
  // -------------------------------------------------------------------------
  it('throws CaServiceError on HTTP 422', async () => {
    if (!realCsrPem) realCsrPem = await generateRealCsr();
    makeHttpsMock(422, { message: 'Unprocessable Entity' });
    await expect(service.issueCert(makeReq())).rejects.toBeInstanceOf(CaServiceError);
  });
  // -------------------------------------------------------------------------
  // Malformed CSR — throws before HTTP call
  // -------------------------------------------------------------------------
  it('throws CaServiceError for malformed CSR without making HTTP call', async () => {
    const requestSpy = vi.spyOn(httpsModule, 'request');
    await expect(service.issueCert(makeReq({ csrPem: 'not-a-valid-csr' }))).rejects.toBeInstanceOf(
      CaServiceError,
    );
    expect(requestSpy).not.toHaveBeenCalled();
  });
  // -------------------------------------------------------------------------
  // Non-JSON response
  // -------------------------------------------------------------------------
  it('throws CaServiceError when step-ca returns non-JSON', async () => {
    if (!realCsrPem) realCsrPem = await generateRealCsr();
    makeHttpsMock(200, 'this is not json');
    await expect(service.issueCert(makeReq())).rejects.toSatisfy((err: unknown) => {
      if (!(err instanceof CaServiceError)) return false;
      expect(err.message).toMatch(/non-JSON/);
      return true;
    });
  });
  // -------------------------------------------------------------------------
  // HTTPS connection error
  // -------------------------------------------------------------------------
  it('throws CaServiceError on HTTPS connection error', async () => {
    if (!realCsrPem) realCsrPem = await generateRealCsr();
    makeHttpsMock(0, undefined, 'connect ECONNREFUSED 127.0.0.1:9000');
    await expect(service.issueCert(makeReq())).rejects.toSatisfy((err: unknown) => {
      if (!(err instanceof CaServiceError)) return false;
      expect(err.message).toMatch(/HTTPS connection/);
      expect(err.cause).toBeInstanceOf(Error);
      return true;
    });
  });
  // -------------------------------------------------------------------------
  // JWT custom claims: mosaic_grant_id and mosaic_subject_user_id
  // Verified with jose.jwtVerify for real signature verification (M6)
  // -------------------------------------------------------------------------
  it('OTT contains mosaic_grant_id, mosaic_subject_user_id, and jti; signature verifies with jose', async () => {
    if (!realCsrPem) realCsrPem = await generateRealCsr();
    let capturedBody: Record<string, unknown> | undefined;
    const mockReq = {
      write: vi.fn((data: string) => {
        capturedBody = JSON.parse(data) as Record<string, unknown>;
      }),
      end: vi.fn(),
      on: vi.fn(),
      setTimeout: vi.fn(),
    };
    (httpsModule.request as unknown as Mock).mockImplementation(
      (
        _options: unknown,
        callback: (res: {
          statusCode: number;
          on: (event: string, cb: (chunk?: Buffer) => void) => void;
        }) => void,
      ) => {
        const mockRes = {
          statusCode: 200,
          on: (event: string, cb: (chunk?: Buffer) => void) => {
            if (event === 'data') {
              cb(Buffer.from(JSON.stringify({ crt: realIssuedCertPem })));
            }
            if (event === 'end') {
              cb();
            }
          },
        };
        setImmediate(() => callback(mockRes));
        return mockReq;
      },
    );
    await service.issueCert(makeReq({ csrPem: realCsrPem }));
    expect(capturedBody).toBeDefined();
    const ott = capturedBody!['ott'] as string;
    expect(typeof ott).toBe('string');
    // Verify JWT structure
    const parts = ott.split('.');
    expect(parts).toHaveLength(3);
    // Decode payload without signature check first
    const payloadJson = Buffer.from(parts[1]!, 'base64url').toString('utf8');
    const payload = JSON.parse(payloadJson) as Record<string, unknown>;
    expect(payload['mosaic_grant_id']).toBe(GRANT_ID);
    expect(payload['mosaic_subject_user_id']).toBe(SUBJECT_USER_ID);
    expect(typeof payload['jti']).toBe('string'); // M2: jti present
    expect(payload['jti']).toMatch(/^[0-9a-f-]{36}$/); // UUID format
    // M3: top-level sha should NOT be present; step.sha should be present
    expect(payload['sha']).toBeUndefined();
    const step = payload['step'] as Record<string, unknown> | undefined;
    expect(step?.['sha']).toBeDefined();
    // M6: Verify signature with jose.jwtVerify using the public key
    const { importJWK: importJose } = await import('jose');
    const publicKey = await importJose(TEST_EC_PUBLIC_JWK, 'ES256');
    const verified = await jwtVerify(ott, publicKey);
    expect(verified.payload['mosaic_grant_id']).toBe(GRANT_ID);
  });
  // -------------------------------------------------------------------------
  // CaServiceError has cause + remediation
  // -------------------------------------------------------------------------
  it('CaServiceError carries cause and remediation', () => {
    const cause = new Error('original error');
    const err = new CaServiceError('something went wrong', 'fix it like this', cause);
    expect(err).toBeInstanceOf(Error);
    expect(err).toBeInstanceOf(CaServiceError);
    expect(err.message).toBe('something went wrong');
    expect(err.remediation).toBe('fix it like this');
    expect(err.cause).toBe(cause);
    expect(err.name).toBe('CaServiceError');
  });
  // -------------------------------------------------------------------------
  // Missing crt in response
  // -------------------------------------------------------------------------
  it('throws CaServiceError when response is missing the crt field', async () => {
    if (!realCsrPem) realCsrPem = await generateRealCsr();
    makeHttpsMock(200, { ca: FAKE_CA_PEM });
    await expect(service.issueCert(makeReq())).rejects.toSatisfy((err: unknown) => {
      if (!(err instanceof CaServiceError)) return false;
      expect(err.message).toMatch(/missing the "crt" field/);
      return true;
    });
  });
  // -------------------------------------------------------------------------
  // M6: provisionerPassword must never appear in CaServiceError messages
  // -------------------------------------------------------------------------
  it('provisionerPassword does not appear in any CaServiceError message', async () => {
    // Temporarily set a recognizable password to test against
    const originalPassword = process.env['STEP_CA_PROVISIONER_PASSWORD'];
    process.env['STEP_CA_PROVISIONER_PASSWORD'] = 'super-secret-password-12345';
    // Generate a bad CSR to trigger an error path
    const caughtErrors: CaServiceError[] = [];
    try {
      await service.issueCert(makeReq({ csrPem: 'not-a-csr' }));
    } catch (err) {
      if (err instanceof CaServiceError) {
        caughtErrors.push(err);
      }
    }
    // Also try HTTP 401 path
    if (!realCsrPem) realCsrPem = await generateRealCsr();
    makeHttpsMock(401, { message: 'Unauthorized' });
    try {
      await service.issueCert(makeReq({ csrPem: realCsrPem }));
    } catch (err) {
      if (err instanceof CaServiceError) {
        caughtErrors.push(err);
      }
    }
    for (const err of caughtErrors) {
      expect(err.message).not.toContain('super-secret-password-12345');
      if (err.remediation) {
        expect(err.remediation).not.toContain('super-secret-password-12345');
      }
    }
    process.env['STEP_CA_PROVISIONER_PASSWORD'] = originalPassword;
  });
  // -------------------------------------------------------------------------
  // M7: HTTPS-only enforcement in constructor
  // -------------------------------------------------------------------------
  it('throws in constructor if STEP_CA_URL uses http://', () => {
    const originalUrl = process.env['STEP_CA_URL'];
    process.env['STEP_CA_URL'] = 'http://step-ca:9000';
    expect(() => new CaService()).toThrow(CaServiceError);
    process.env['STEP_CA_URL'] = originalUrl;
  });
  // -------------------------------------------------------------------------
  // TTL clamp: ttlSeconds is clamped to 900 s (15 min) maximum
  // -------------------------------------------------------------------------
  it('clamps ttlSeconds to 900 s regardless of input', async () => {
    if (!realCsrPem) realCsrPem = await generateRealCsr();
    let capturedBody: Record<string, unknown> | undefined;
    const mockReq = {
      write: vi.fn((data: string) => {
        capturedBody = JSON.parse(data) as Record<string, unknown>;
      }),
      end: vi.fn(),
      on: vi.fn(),
      setTimeout: vi.fn(),
    };
    (httpsModule.request as unknown as Mock).mockImplementation(
      (
        _options: unknown,
        callback: (res: {
          statusCode: number;
          on: (event: string, cb: (chunk?: Buffer) => void) => void;
        }) => void,
      ) => {
        const mockRes = {
          statusCode: 200,
          on: (event: string, cb: (chunk?: Buffer) => void) => {
            if (event === 'data') {
              cb(Buffer.from(JSON.stringify({ crt: realIssuedCertPem })));
            }
            if (event === 'end') {
              cb();
            }
          },
        };
        setImmediate(() => callback(mockRes));
        return mockReq;
      },
    );
    // Request 86400 s — should be clamped to 900
    await service.issueCert(makeReq({ ttlSeconds: 86400 }));
    expect(capturedBody).toBeDefined();
    const validity = capturedBody!['validity'] as Record<string, unknown>;
    expect(validity['duration']).toBe('900s');
  });
 });
--- a/apps/gateway/src/federation/ca.service.ts
+++ b/apps/gateway/src/federation/ca.service.ts
@@ -0,0 +1,680 @@
 /**
 * CaService — Step-CA client for federation grant certificate issuance.
 *
 * Responsibilities:
 *  1. Build a JWK-provisioner One-Time Token (OTT) signed with the provisioner
 *     private key (ES256/ES384/RS256 per JWK kty/crv) carrying Mosaic-specific
 *     claims (`mosaic_grant_id`, `mosaic_subject_user_id`, `step.sha`) per the
 *     step-ca JWK provisioner protocol.
 *  2. POST the CSR + OTT to the step-ca `/1.0/sign` endpoint over HTTPS,
 *     pinning the trust to the CA root cert supplied via env.
 *  3. Return an IssuedCertDto containing the leaf cert, full chain, and
 *     serial number.
 *
 * Environment variables (all required at runtime — validated in constructor):
 *   STEP_CA_URL                   https://step-ca:9000
 *   STEP_CA_PROVISIONER_KEY_JSON  JWK provisioner private key (JSON)
 *   STEP_CA_ROOT_CERT_PATH        Absolute path to the CA root PEM
 *
 * Optional (only used for JWK PBES2 decrypt at startup if key is encrypted):
 *   STEP_CA_PROVISIONER_PASSWORD  JWK provisioner password (raw string)
 *
 * Custom OID registry (PRD §6, docs/federation/SETUP.md):
 *   1.3.6.1.4.1.99999.1  — mosaic_grant_id
 *   1.3.6.1.4.1.99999.2  — mosaic_subject_user_id
 *
 * Fail-loud contract:
 *   Every error path throws CaServiceError with a human-readable `remediation`
 *   field. Silent OID-stripping is NEVER allowed — if the sign response does
 *   not include the cert, we throw rather than return a cert that may be
 *   missing the custom extensions.
 */
 import { Injectable, Logger } from '@nestjs/common';
 import * as crypto from 'node:crypto';
 import * as fs from 'node:fs';
 import * as https from 'node:https';
 import { SignJWT, importJWK } from 'jose';
 import { Pkcs10CertificateRequest, X509Certificate } from '@peculiar/x509';
 import type { IssueCertRequestDto } from './ca.dto.js';
 import { IssuedCertDto } from './ca.dto.js';
 // ---------------------------------------------------------------------------
 // Custom error class
 // ---------------------------------------------------------------------------
 export class CaServiceError extends Error {
  readonly cause: unknown;
  readonly remediation: string;
  readonly code?: string;
  constructor(message: string, remediation: string, cause?: unknown, code?: string) {
    super(message);
    this.name = 'CaServiceError';
    this.cause = cause;
    this.remediation = remediation;
    this.code = code;
  }
 }
 // ---------------------------------------------------------------------------
 // Internal types
 // ---------------------------------------------------------------------------
 interface StepSignResponse {
  crt: string;
  ca?: string;
  certChain?: string[];
 }
 interface JwkKey {
  kty: string;
  kid?: string;
  use?: string;
  alg?: string;
  k?: string; // symmetric
  n?: string; // RSA
  e?: string;
  d?: string;
  x?: string; // EC
  y?: string;
  crv?: string;
  [key: string]: unknown;
 }
 // ---------------------------------------------------------------------------
 // Helpers
 // ---------------------------------------------------------------------------
 /** UUID regex for validation */
 const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
 /**
 * Derive the JWT algorithm string from a JWK's kty/crv fields.
 * EC P-256 → ES256, EC P-384 → ES384, RSA → RS256.
 */
 function algFromJwk(jwk: JwkKey): string {
  if (jwk.alg) return jwk.alg;
  if (jwk.kty === 'EC') {
    if (jwk.crv === 'P-384') return 'ES384';
    return 'ES256'; // default for P-256 and Ed25519-style EC keys
  }
  if (jwk.kty === 'RSA') return 'RS256';
  throw new CaServiceError(
    `Unsupported JWK kty: ${jwk.kty}`,
    'STEP_CA_PROVISIONER_KEY_JSON must be an EC (P-256/P-384) or RSA JWK private key.',
  );
 }
 /**
 * Compute SHA-256 fingerprint of the DER-encoded CSR body.
 * step-ca uses this as the `step.sha` claim to bind the OTT to a specific CSR.
 */
 function csrFingerprint(csrPem: string): string {
  // Strip PEM headers and decode base64 body
  const b64 = csrPem
    .replace(/-----BEGIN CERTIFICATE REQUEST-----/, '')
    .replace(/-----END CERTIFICATE REQUEST-----/, '')
    .replace(/\s+/g, '');
  let derBuf: Buffer;
  try {
    derBuf = Buffer.from(b64, 'base64');
  } catch (err) {
    throw new CaServiceError(
      'Failed to base64-decode the CSR PEM body',
      'Verify that csrPem is a valid PKCS#10 PEM-encoded certificate request.',
      err,
    );
  }
  if (derBuf.length === 0) {
    throw new CaServiceError(
      'CSR PEM decoded to empty buffer — malformed input',
      'Provide a valid non-empty PKCS#10 PEM-encoded certificate request.',
    );
  }
  return crypto.createHash('sha256').update(derBuf).digest('hex');
 }
 /**
 * Send a JSON POST to the step-ca sign endpoint.
 * Returns the parsed response body or throws CaServiceError.
 */
 function httpsPost(url: string, body: unknown, agent: https.Agent): Promise<StepSignResponse> {
  return new Promise((resolve, reject) => {
    const bodyStr = JSON.stringify(body);
    const parsed = new URL(url);
    const options: https.RequestOptions = {
      hostname: parsed.hostname,
      port: parsed.port ? parseInt(parsed.port, 10) : 443,
      path: parsed.pathname,
      method: 'POST',
      headers: {
        'Content-Type': 'application/json',
        'Content-Length': Buffer.byteLength(bodyStr),
      },
      agent,
      timeout: 5000,
    };
    const req = https.request(options, (res) => {
      const chunks: Buffer[] = [];
      res.on('data', (chunk: Buffer) => chunks.push(chunk));
      res.on('end', () => {
        const raw = Buffer.concat(chunks).toString('utf8');
        if (res.statusCode === 401) {
          reject(
            new CaServiceError(
              `step-ca returned HTTP 401 — invalid or expired OTT`,
              'Check STEP_CA_PROVISIONER_KEY_JSON. Ensure the mosaic-fed provisioner is configured in the CA.',
            ),
          );
          return;
        }
        if (res.statusCode && res.statusCode >= 400) {
          reject(
            new CaServiceError(
              `step-ca returned HTTP ${res.statusCode}: ${raw.slice(0, 256)}`,
              `Review the step-ca logs. Status ${res.statusCode} may indicate a CSR policy violation or misconfigured provisioner.`,
            ),
          );
          return;
        }
        let parsed: unknown;
        try {
          parsed = JSON.parse(raw) as unknown;
        } catch (err) {
          reject(
            new CaServiceError(
              'step-ca returned a non-JSON response',
              'Verify STEP_CA_URL points to a running step-ca instance and that TLS is properly configured.',
              err,
            ),
          );
          return;
        }
        resolve(parsed as StepSignResponse);
      });
    });
    req.setTimeout(5000, () => {
      req.destroy(new Error('Request timed out after 5000ms'));
    });
    req.on('error', (err: Error) => {
      reject(
        new CaServiceError(
          `HTTPS connection to step-ca failed: ${err.message}`,
          'Ensure STEP_CA_URL is reachable and STEP_CA_ROOT_CERT_PATH points to the correct CA root certificate.',
          err,
        ),
      );
    });
    req.write(bodyStr);
    req.end();
  });
 }
 /**
 * Extract a decimal serial number from a PEM certificate.
 * Throws CaServiceError on failure — never silently returns 'unknown'.
 */
 function extractSerial(certPem: string): string {
  let cert: crypto.X509Certificate;
  try {
    cert = new crypto.X509Certificate(certPem);
  } catch (err) {
    throw new CaServiceError(
      'Failed to parse the issued certificate PEM',
      'The certificate returned by step-ca could not be parsed. Check that step-ca is returning a valid PEM certificate.',
      err,
      'CERT_PARSE',
    );
  }
  return cert.serialNumber;
 }
 // ---------------------------------------------------------------------------
 // Service
 // ---------------------------------------------------------------------------
@Injectable()
 export class CaService {
  private readonly logger = new Logger(CaService.name);
  private readonly caUrl: string;
  private readonly rootCertPath: string;
  private readonly httpsAgent: https.Agent;
  private readonly jwk: JwkKey;
  private cachedPrivateKey: crypto.KeyObject | null = null;
  private readonly jwtAlg: string;
  private readonly kid: string;
  constructor() {
    const caUrl = process.env['STEP_CA_URL'];
    const provisionerKeyJson = process.env['STEP_CA_PROVISIONER_KEY_JSON'];
    const rootCertPath = process.env['STEP_CA_ROOT_CERT_PATH'];
    if (!caUrl) {
      throw new CaServiceError(
        'STEP_CA_URL is not set',
        'Set STEP_CA_URL to the base URL of the step-ca instance, e.g. https://step-ca:9000',
      );
    }
    // Enforce HTTPS-only URL
    let parsedUrl: URL;
    try {
      parsedUrl = new URL(caUrl);
    } catch (err) {
      throw new CaServiceError(
        `STEP_CA_URL is not a valid URL: ${caUrl}`,
        'Set STEP_CA_URL to a valid HTTPS URL, e.g. https://step-ca:9000',
        err,
      );
    }
    if (parsedUrl.protocol !== 'https:') {
      throw new CaServiceError(
        `STEP_CA_URL must use HTTPS — got: ${parsedUrl.protocol}`,
        'Set STEP_CA_URL to an https:// URL. Unencrypted connections to the CA are not permitted.',
      );
    }
    if (!provisionerKeyJson) {
      throw new CaServiceError(
        'STEP_CA_PROVISIONER_KEY_JSON is not set',
        'Set STEP_CA_PROVISIONER_KEY_JSON to the JSON-encoded JWK for the mosaic-fed provisioner.',
      );
    }
    if (!rootCertPath) {
      throw new CaServiceError(
        'STEP_CA_ROOT_CERT_PATH is not set',
        'Set STEP_CA_ROOT_CERT_PATH to the absolute path of the step-ca root CA certificate PEM file.',
      );
    }
    // Parse JWK once — do NOT store the raw JSON string as a class field
    let jwk: JwkKey;
    try {
      jwk = JSON.parse(provisionerKeyJson) as JwkKey;
    } catch (err) {
      throw new CaServiceError(
        'STEP_CA_PROVISIONER_KEY_JSON is not valid JSON',
        'Set STEP_CA_PROVISIONER_KEY_JSON to the JSON-serialised JWK object for the mosaic-fed provisioner.',
        err,
      );
    }
    // Derive algorithm from JWK metadata
    const jwtAlg = algFromJwk(jwk);
    const kid = jwk.kid ?? 'mosaic-fed';
    // Import the JWK into a native KeyObject — fail loudly if it cannot be loaded.
    // We do this synchronously here by calling the async importJWK via a blocking workaround.
    // Actually importJWK is async, so we store it for use during token building.
    // We keep the raw jwk object for later async import inside buildOtt.
    // NOTE: We do NOT store provisionerKeyJson string as a class field.
    this.jwk = jwk;
    this.jwtAlg = jwtAlg;
    this.kid = kid;
    this.caUrl = caUrl;
    this.rootCertPath = rootCertPath;
    // Read the root cert and pin it for all HTTPS connections.
    let rootCert: string;
    try {
      rootCert = fs.readFileSync(this.rootCertPath, 'utf8');
    } catch (err) {
      throw new CaServiceError(
        `Cannot read STEP_CA_ROOT_CERT_PATH: ${rootCertPath}`,
        'Ensure the file exists and is readable by the gateway process.',
        err,
      );
    }
    this.httpsAgent = new https.Agent({
      ca: rootCert,
      rejectUnauthorized: true,
    });
    this.logger.log(`CaService initialised — CA URL: ${this.caUrl}`);
  }
  /**
   * Lazily import the private key from JWK on first use.
   * The key is cached in cachedPrivateKey after first import.
   */
  private async getPrivateKey(): Promise<crypto.KeyObject> {
    if (this.cachedPrivateKey !== null) return this.cachedPrivateKey;
    try {
      const key = await importJWK(this.jwk, this.jwtAlg);
      // importJWK returns KeyLike (crypto.KeyObject | Uint8Array) — in Node.js it's KeyObject
      this.cachedPrivateKey = key as unknown as crypto.KeyObject;
      return this.cachedPrivateKey;
    } catch (err) {
      throw new CaServiceError(
        'Failed to import STEP_CA_PROVISIONER_KEY_JSON as a cryptographic key',
        'Ensure STEP_CA_PROVISIONER_KEY_JSON contains a valid JWK private key (EC P-256/P-384 or RSA).',
        err,
      );
    }
  }
  /**
   * Build the JWK-provisioner OTT signed with the provisioner private key.
   * Algorithm is derived from the JWK kty/crv fields.
   */
  private async buildOtt(params: {
    csrPem: string;
    grantId: string;
    subjectUserId: string;
    ttlSeconds: number;
    csrCn: string;
  }): Promise<string> {
    const { csrPem, grantId, subjectUserId, ttlSeconds, csrCn } = params;
    // Validate UUID shape for grant id and subject user id
    if (!UUID_RE.test(grantId)) {
      throw new CaServiceError(
        `grantId is not a valid UUID: ${grantId}`,
        'Provide a valid UUID (RFC 4122) for grantId.',
        undefined,
        'INVALID_GRANT_ID',
      );
    }
    if (!UUID_RE.test(subjectUserId)) {
      throw new CaServiceError(
        `subjectUserId is not a valid UUID: ${subjectUserId}`,
        'Provide a valid UUID (RFC 4122) for subjectUserId.',
        undefined,
        'INVALID_GRANT_ID',
      );
    }
    const sha = csrFingerprint(csrPem);
    const now = Math.floor(Date.now() / 1000);
    const privateKey = await this.getPrivateKey();
    const ott = await new SignJWT({
      iss: this.kid,
      sub: csrCn, // M1: set sub to identity from CSR CN
      aud: [`${this.caUrl}/1.0/sign`],
      iat: now,
      nbf: now - 30, // 30 s clock-skew tolerance
      exp: now + Math.min(ttlSeconds, 3600), // OTT validity ≤ 1 h
      jti: crypto.randomUUID(), // M2: unique token ID
      // step.sha is the canonical field name used in the template — M3: keep only step.sha
      step: { sha },
      // Mosaic custom claims consumed by federation.tpl
      mosaic_grant_id: grantId,
      mosaic_subject_user_id: subjectUserId,
    })
      .setProtectedHeader({ alg: this.jwtAlg, typ: 'JWT', kid: this.kid })
      .sign(privateKey);
    return ott;
  }
  /**
   * Validate a PEM-encoded CSR using @peculiar/x509.
   * Verifies the self-signature, key type/size, and signature algorithm.
   * Optionally verifies that the CSR's SANs match the expected set.
   *
   * Throws CaServiceError with code 'INVALID_CSR' on failure.
   */
  private async validateCsr(pem: string, expectedSans?: string[]): Promise<string> {
    let csr: Pkcs10CertificateRequest;
    try {
      csr = new Pkcs10CertificateRequest(pem);
    } catch (err) {
      throw new CaServiceError(
        'Failed to parse CSR PEM as a valid PKCS#10 certificate request',
        'Provide a valid PEM-encoded PKCS#10 CSR.',
        err,
        'INVALID_CSR',
      );
    }
    // Verify self-signature
    let valid: boolean;
    try {
      valid = await csr.verify();
    } catch (err) {
      throw new CaServiceError(
        'CSR signature verification threw an error',
        'The CSR self-signature could not be verified. Ensure the CSR is properly formed.',
        err,
        'INVALID_CSR',
      );
    }
    if (!valid) {
      throw new CaServiceError(
        'CSR self-signature is invalid',
        'The CSR must be self-signed with the corresponding private key.',
        undefined,
        'INVALID_CSR',
      );
    }
    // Validate signature algorithm — reject MD5 and SHA-1
    // signatureAlgorithm is HashedAlgorithm which extends Algorithm.
    // Cast through unknown to access .name and .hash.name without DOM lib globals.
    const sigAlgAny = csr.signatureAlgorithm as unknown as {
      name?: string;
      hash?: { name?: string };
    };
    const sigAlgName = (sigAlgAny.name ?? '').toLowerCase();
    const hashName = (sigAlgAny.hash?.name ?? '').toLowerCase();
    if (
      sigAlgName.includes('md5') ||
      sigAlgName.includes('sha1') ||
      hashName === 'sha-1' ||
      hashName === 'sha1'
    ) {
      throw new CaServiceError(
        `CSR uses a forbidden signature algorithm: ${sigAlgAny.name ?? 'unknown'}`,
        'Use SHA-256 or stronger. MD5 and SHA-1 are not permitted.',
        undefined,
        'INVALID_CSR',
      );
    }
    // Validate public key algorithm and strength via the algorithm descriptor on the key.
    // csr.publicKey.algorithm is type Algorithm (WebCrypto) — use name-based checks.
    // We cast to an extended interface to access curve/modulus info without DOM globals.
    const pubKeyAlgo = csr.publicKey.algorithm as {
      name: string;
      namedCurve?: string;
      modulusLength?: number;
    };
    const keyAlgoName = pubKeyAlgo.name;
    if (keyAlgoName === 'RSASSA-PKCS1-v1_5' || keyAlgoName === 'RSA-PSS') {
      const modulusLength = pubKeyAlgo.modulusLength ?? 0;
      if (modulusLength < 2048) {
        throw new CaServiceError(
          `CSR RSA key is too short: ${modulusLength} bits (minimum 2048)`,
          'Use an RSA key of at least 2048 bits.',
          undefined,
          'INVALID_CSR',
        );
      }
    } else if (keyAlgoName === 'ECDSA') {
      const namedCurve = pubKeyAlgo.namedCurve ?? '';
      const allowedCurves = new Set(['P-256', 'P-384']);
      if (!allowedCurves.has(namedCurve)) {
        throw new CaServiceError(
          `CSR EC key uses disallowed curve: ${namedCurve}`,
          'Use EC P-256 or P-384. Other curves are not permitted.',
          undefined,
          'INVALID_CSR',
        );
      }
    } else if (keyAlgoName === 'Ed25519') {
      // Ed25519 is explicitly allowed
    } else {
      throw new CaServiceError(
        `CSR uses unsupported key algorithm: ${keyAlgoName}`,
        'Use EC (P-256/P-384), Ed25519, or RSA (≥2048 bit) keys.',
        undefined,
        'INVALID_CSR',
      );
    }
    // Extract SANs if expectedSans provided
    if (expectedSans && expectedSans.length > 0) {
      // Get SANs from CSR extensions
      const sanExtension = csr.extensions?.find(
        (ext) => ext.type === '2.5.29.17', // Subject Alternative Name OID
      );
      const csrSans: string[] = [];
      if (sanExtension) {
        // Parse the raw SAN extension — store as stringified for comparison
        // @peculiar/x509 exposes SANs through the parsed extension
        const sanExt = sanExtension as { names?: Array<{ type: string; value: string }> };
        if (sanExt.names) {
          for (const name of sanExt.names) {
            csrSans.push(name.value);
          }
        }
      }
      const csrSanSet = new Set(csrSans);
      const expectedSanSet = new Set(expectedSans);
      const missing = expectedSans.filter((s) => !csrSanSet.has(s));
      const extra = csrSans.filter((s) => !expectedSanSet.has(s));
      if (missing.length > 0 || extra.length > 0) {
        throw new CaServiceError(
          `CSR SANs do not match expected set. Missing: [${missing.join(', ')}], Extra: [${extra.join(', ')}]`,
          'The CSR must include exactly the SANs specified in the issuance request.',
          undefined,
          'INVALID_CSR',
        );
      }
    }
    // Return the CN from the CSR subject for use as JWT sub
    const cn = csr.subjectName.getField('CN')?.[0] ?? '';
    return cn;
  }
  /**
   * Submit a CSR to step-ca and return the issued certificate.
   *
   * Throws `CaServiceError` on any failure (network, auth, malformed input).
   * Never silently swallows errors — fail-loud is a hard contract per M2-02 review.
   */
  async issueCert(req: IssueCertRequestDto): Promise<IssuedCertDto> {
    // Clamp TTL to 15-minute maximum (H2)
    const ttl = Math.min(req.ttlSeconds ?? 300, 900);
    this.logger.debug(
      `issueCert — grantId=${req.grantId} subjectUserId=${req.subjectUserId} ttl=${ttl}s`,
    );
    // Validate CSR — real cryptographic validation (H3)
    const csrCn = await this.validateCsr(req.csrPem);
    const ott = await this.buildOtt({
      csrPem: req.csrPem,
      grantId: req.grantId,
      subjectUserId: req.subjectUserId,
      ttlSeconds: ttl,
      csrCn,
    });
    const signUrl = `${this.caUrl}/1.0/sign`;
    const requestBody = {
      csr: req.csrPem,
      ott,
      validity: {
        duration: `${ttl}s`,
      },
    };
    this.logger.debug(`Posting CSR to ${signUrl}`);
    const response = await httpsPost(signUrl, requestBody, this.httpsAgent);
    if (!response.crt) {
      throw new CaServiceError(
        'step-ca sign response missing the "crt" field',
        'This is unexpected — the step-ca instance may be misconfigured or running an incompatible version.',
      );
    }
    // Build certChainPem: prefer certChain array, fall back to ca field, fall back to crt alone.
    let certChainPem: string;
    if (response.certChain && response.certChain.length > 0) {
      certChainPem = response.certChain.join('\n');
    } else if (response.ca) {
      certChainPem = response.crt + '\n' + response.ca;
    } else {
      certChainPem = response.crt;
    }
    const serialNumber = extractSerial(response.crt);
    // CRIT-1: Verify the issued certificate contains both Mosaic OID extensions
    // with the correct values. Step-CA's federation.tpl encodes each as an ASN.1
    // UTF8String TLV: tag 0x0C + 1-byte length + UUID bytes. We skip 2 bytes
    // (tag + length) to extract the raw UUID string.
    const issuedCert = new X509Certificate(response.crt);
    const decoder = new TextDecoder();
    const grantIdExt = issuedCert.getExtension('1.3.6.1.4.1.99999.1');
    if (!grantIdExt) {
      throw new CaServiceError(
        'Issued certificate is missing required Mosaic OID: mosaic_grant_id',
        'The Step-CA federation.tpl template did not embed OID 1.3.6.1.4.1.99999.1. Check the provisioner template configuration.',
        undefined,
        'OID_MISSING',
      );
    }
    const grantIdInCert = decoder.decode(grantIdExt.value.slice(2));
    if (grantIdInCert !== req.grantId) {
      throw new CaServiceError(
        `Issued certificate mosaic_grant_id mismatch: expected ${req.grantId}, got ${grantIdInCert}`,
        'The Step-CA issued a certificate with a different grant ID than requested. This may indicate a provisioner misconfiguration or a MITM.',
        undefined,
        'OID_MISMATCH',
      );
    }
    const subjectUserIdExt = issuedCert.getExtension('1.3.6.1.4.1.99999.2');
    if (!subjectUserIdExt) {
      throw new CaServiceError(
        'Issued certificate is missing required Mosaic OID: mosaic_subject_user_id',
        'The Step-CA federation.tpl template did not embed OID 1.3.6.1.4.1.99999.2. Check the provisioner template configuration.',
        undefined,
        'OID_MISSING',
      );
    }
    const subjectUserIdInCert = decoder.decode(subjectUserIdExt.value.slice(2));
    if (subjectUserIdInCert !== req.subjectUserId) {
      throw new CaServiceError(
        `Issued certificate mosaic_subject_user_id mismatch: expected ${req.subjectUserId}, got ${subjectUserIdInCert}`,
        'The Step-CA issued a certificate with a different subject user ID than requested. This may indicate a provisioner misconfiguration or a MITM.',
        undefined,
        'OID_MISMATCH',
      );
    }
    this.logger.log(`Certificate issued — serial=${serialNumber} grantId=${req.grantId}`);
    const result = new IssuedCertDto();
    result.certPem = response.crt;
    result.certChainPem = certChainPem;
    result.serialNumber = serialNumber;
    return result;
  }
 }
--- a/apps/gateway/src/federation/client/tests/federation-client.service.spec.ts
+++ b/apps/gateway/src/federation/client/tests/federation-client.service.spec.ts
@@ -0,0 +1,553 @@
 /**
 * Unit tests for FederationClientService (FED-M3-08).
 *
 * HTTP mocking strategy:
 *   undici MockAgent is used to intercept outbound HTTP requests.  The service
 *   uses `undici.fetch` with a `dispatcher` option, so MockAgent is set as the
 *   global dispatcher and all requests flow through it.
 *
 *   Because the service builds one `undici.Agent` per peer and passes it as
 *   the dispatcher on every fetch call, we cannot intercept at the Agent level
 *   in unit tests without significant refactoring.  Instead, we set the global
 *   dispatcher to a MockAgent and override the service's `doRequest` indirection
 *   by spying on the internal fetch call.
 *
 *   For the cert/key wiring, we use the real `sealClientKey` function from
 *   peer-key.util.ts with a test secret — no stubs.
 *
 * Sealed-key setup:
 *   Each test (or beforeAll) calls `sealClientKey(TEST_PRIVATE_KEY_PEM)` with
 *   BETTER_AUTH_SECRET set to a deterministic test value so that
 *   `unsealClientKey` in the service recovers the original PEM.
 */
 import 'reflect-metadata';
 import { describe, it, expect, vi, beforeEach, afterEach, beforeAll, afterAll } from 'vitest';
 import { MockAgent, setGlobalDispatcher, getGlobalDispatcher } from 'undici';
 import type { Dispatcher } from 'undici';
 import { writeFileSync, unlinkSync } from 'node:fs';
 import { tmpdir } from 'node:os';
 import { join } from 'node:path';
 import type { Db } from '@mosaicstack/db';
 import { FederationClientService, FederationClientError } from '../federation-client.service.js';
 import { sealClientKey } from '../../peer-key.util.js';
 // ---------------------------------------------------------------------------
 // Test constants
 // ---------------------------------------------------------------------------
 const TEST_SECRET = 'test-secret-for-federation-client-spec-only';
 const PEER_ID = 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa';
 const ENDPOINT = 'https://peer.example.com';
 // Minimal valid RSA/EC private key PEM — does NOT need to be a real key for
 // unit tests because we only verify it round-trips through seal/unseal, not
 // that it actually negotiates TLS (MockAgent handles that).
 const TEST_PRIVATE_KEY_PEM = `-----BEGIN PRIVATE KEY-----
 MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDummyKeyForTests
 -----END PRIVATE KEY-----`;
 // Minimal self-signed cert PEM (dummy — only used for mTLS Agent construction)
 const TEST_CERT_PEM = `-----BEGIN CERTIFICATE-----
 MIIBdummyCertForFederationClientTests==
 -----END CERTIFICATE-----`;
 const TEST_CERT_SERIAL = 'ABCDEF1234567890';
 // ---------------------------------------------------------------------------
 // Sealed key (computed once in beforeAll)
 // ---------------------------------------------------------------------------
 let SEALED_KEY: string;
 // Path to a stub Step-CA root cert file written in beforeAll. The cert is never
 // actually used to negotiate TLS in unit tests (MockAgent + spy on resolveEntry
 // short-circuit the network), but loadStepCaRoot() requires the file to exist.
 const STUB_CA_PEM_PATH = join(tmpdir(), 'federation-client-spec-ca.pem');
 const STUB_CA_PEM = `-----BEGIN CERTIFICATE-----
 MIIBdummyCAforFederationClientSpecOnly==
 -----END CERTIFICATE-----
 `;
 // ---------------------------------------------------------------------------
 // Peer row factory
 // ---------------------------------------------------------------------------
 function makePeerRow(overrides: Partial<Record<string, unknown>> = {}) {
  return {
    id: PEER_ID,
    commonName: 'peer-example-com',
    displayName: 'Test Peer',
    certPem: TEST_CERT_PEM,
    certSerial: TEST_CERT_SERIAL,
    certNotAfter: new Date('2030-01-01T00:00:00Z'),
    clientKeyPem: SEALED_KEY,
    state: 'active' as const,
    endpointUrl: ENDPOINT,
    lastSeenAt: null,
    createdAt: new Date('2026-01-01T00:00:00Z'),
    revokedAt: null,
    ...overrides,
  };
 }
 // ---------------------------------------------------------------------------
 // Mock DB builder
 // ---------------------------------------------------------------------------
 function makeDb(selectRows: unknown[] = [makePeerRow()]): Db {
  const limitSelect = vi.fn().mockResolvedValue(selectRows);
  const whereSelect = vi.fn().mockReturnValue({ limit: limitSelect });
  const fromSelect = vi.fn().mockReturnValue({ where: whereSelect });
  const selectMock = vi.fn().mockReturnValue({ from: fromSelect });
  return {
    select: selectMock,
    insert: vi.fn(),
    update: vi.fn(),
    delete: vi.fn(),
    transaction: vi.fn(),
  } as unknown as Db;
 }
 // ---------------------------------------------------------------------------
 // Helpers for MockAgent HTTP interception
 // ---------------------------------------------------------------------------
 /**
 * Create a MockAgent + MockPool for the peer endpoint, set it as the global
 * dispatcher, and return both for per-test configuration.
 */
 function makeMockAgent() {
  const mockAgent = new MockAgent({ connections: 1 });
  mockAgent.disableNetConnect();
  setGlobalDispatcher(mockAgent);
  const pool = mockAgent.get(ENDPOINT);
  return { mockAgent, pool };
 }
 /**
 * Build a FederationClientService with a mock DB and a spy on the internal
 * fetch so we can intercept at the HTTP layer via MockAgent.
 *
 * The service calls `fetch(url, { dispatcher: agent })` where `agent` is the
 * mTLS undici.Agent built from the peer's cert+key.  To make MockAgent work,
 * we need the fetch dispatcher to be the MockAgent, not the per-peer Agent.
 *
 * Strategy: we replace the private `resolveEntry` result's `agent` field with
 * the MockAgent's pool, so fetch uses our interceptor.  We do this by spying
 * on `resolveEntry` and returning a controlled entry.
 */
 function makeService(db: Db, mockPool: Dispatcher): FederationClientService {
  const svc = new FederationClientService(db);
  // Override resolveEntry to inject MockAgent pool as the dispatcher
  vi.spyOn(
    svc as unknown as { resolveEntry: (peerId: string) => Promise<unknown> },
    'resolveEntry',
  ).mockImplementation(async (_peerId: string) => {
    // Still call DB (via the real logic) to exercise peer validation,
    // but return mock pool as the agent.
    // For simplicity in unit tests, directly return a controlled entry.
    return {
      agent: mockPool,
      endpointUrl: ENDPOINT,
      certPem: TEST_CERT_PEM,
      certSerial: TEST_CERT_SERIAL,
    };
  });
  return svc;
 }
 // ---------------------------------------------------------------------------
 // Test setup
 // ---------------------------------------------------------------------------
 let originalDispatcher: Dispatcher;
 beforeAll(() => {
  // Seal the test key once — requires BETTER_AUTH_SECRET
  const saved = process.env['BETTER_AUTH_SECRET'];
  process.env['BETTER_AUTH_SECRET'] = TEST_SECRET;
  try {
    SEALED_KEY = sealClientKey(TEST_PRIVATE_KEY_PEM);
  } finally {
    if (saved === undefined) {
      delete process.env['BETTER_AUTH_SECRET'];
    } else {
      process.env['BETTER_AUTH_SECRET'] = saved;
    }
  }
  writeFileSync(STUB_CA_PEM_PATH, STUB_CA_PEM, 'utf8');
 });
 afterAll(() => {
  try {
    unlinkSync(STUB_CA_PEM_PATH);
  } catch {
    // best-effort cleanup
  }
 });
 beforeEach(() => {
  originalDispatcher = getGlobalDispatcher();
  process.env['BETTER_AUTH_SECRET'] = TEST_SECRET;
  process.env['STEP_CA_ROOT_CERT_PATH'] = STUB_CA_PEM_PATH;
 });
 afterEach(() => {
  setGlobalDispatcher(originalDispatcher);
  vi.restoreAllMocks();
  delete process.env['BETTER_AUTH_SECRET'];
  delete process.env['STEP_CA_ROOT_CERT_PATH'];
 });
 // ---------------------------------------------------------------------------
 // Helpers
 // ---------------------------------------------------------------------------
 /** Successful list response body */
 const LIST_BODY = {
  items: [{ id: '1', title: 'Task One' }],
  nextCursor: undefined,
  _partial: false,
 };
 /** Successful get response body */
 const GET_BODY = {
  item: { id: '1', title: 'Task One' },
  _partial: false,
 };
 /** Successful capabilities response body */
 const CAP_BODY = {
  resources: ['tasks'],
  excluded_resources: [],
  max_rows_per_query: 100,
  supported_verbs: ['list', 'get', 'capabilities'] as const,
 };
 // ---------------------------------------------------------------------------
 // Tests
 // ---------------------------------------------------------------------------
 describe('FederationClientService', () => {
  // ─── Successful verb calls ─────────────────────────────────────────────────
  describe('list()', () => {
    it('returns parsed typed response on success', async () => {
      const db = makeDb();
      const { mockAgent, pool } = makeMockAgent();
      const svc = makeService(db, pool);
      pool
        .intercept({
          path: '/api/federation/v1/list/tasks',
          method: 'POST',
        })
        .reply(200, LIST_BODY, { headers: { 'content-type': 'application/json' } });
      const result = await svc.list(PEER_ID, 'tasks', {});
      expect(result.items).toHaveLength(1);
      expect(result.items[0]).toMatchObject({ id: '1', title: 'Task One' });
      await mockAgent.close();
    });
  });
  describe('get()', () => {
    it('returns parsed typed response on success', async () => {
      const db = makeDb();
      const { mockAgent, pool } = makeMockAgent();
      const svc = makeService(db, pool);
      pool
        .intercept({
          path: '/api/federation/v1/get/tasks/1',
          method: 'POST',
        })
        .reply(200, GET_BODY, { headers: { 'content-type': 'application/json' } });
      const result = await svc.get(PEER_ID, 'tasks', '1', {});
      expect(result.item).toMatchObject({ id: '1', title: 'Task One' });
      await mockAgent.close();
    });
  });
  describe('capabilities()', () => {
    it('returns parsed capabilities response on success', async () => {
      const db = makeDb();
      const { mockAgent, pool } = makeMockAgent();
      const svc = makeService(db, pool);
      pool
        .intercept({
          path: '/api/federation/v1/capabilities',
          method: 'GET',
        })
        .reply(200, CAP_BODY, { headers: { 'content-type': 'application/json' } });
      const result = await svc.capabilities(PEER_ID);
      expect(result.resources).toContain('tasks');
      expect(result.max_rows_per_query).toBe(100);
      await mockAgent.close();
    });
  });
  // ─── HTTP error surfaces ──────────────────────────────────────────────────
  describe('non-2xx responses', () => {
    it('surfaces 403 as FederationClientError({ status: 403, code: "FORBIDDEN" })', async () => {
      const db = makeDb();
      const { mockAgent, pool } = makeMockAgent();
      const svc = makeService(db, pool);
      pool.intercept({ path: '/api/federation/v1/list/tasks', method: 'POST' }).reply(
        403,
        { error: { code: 'forbidden', message: 'Access denied' } },
        {
          headers: { 'content-type': 'application/json' },
        },
      );
      await expect(svc.list(PEER_ID, 'tasks', {})).rejects.toMatchObject({
        status: 403,
        code: 'FORBIDDEN',
        peerId: PEER_ID,
      });
      await mockAgent.close();
    });
    it('surfaces 404 as FederationClientError({ status: 404, code: "HTTP_404" })', async () => {
      const db = makeDb();
      const { mockAgent, pool } = makeMockAgent();
      const svc = makeService(db, pool);
      pool.intercept({ path: '/api/federation/v1/get/tasks/999', method: 'POST' }).reply(
        404,
        { error: { code: 'not_found', message: 'Not found' } },
        {
          headers: { 'content-type': 'application/json' },
        },
      );
      await expect(svc.get(PEER_ID, 'tasks', '999', {})).rejects.toMatchObject({
        status: 404,
        code: 'HTTP_404',
        peerId: PEER_ID,
      });
      await mockAgent.close();
    });
  });
  // ─── Network error ─────────────────────────────────────────────────────────
  describe('network errors', () => {
    it('surfaces network error as FederationClientError({ code: "NETWORK" })', async () => {
      const db = makeDb();
      const { mockAgent, pool } = makeMockAgent();
      const svc = makeService(db, pool);
      pool
        .intercept({ path: '/api/federation/v1/capabilities', method: 'GET' })
        .replyWithError(new Error('ECONNREFUSED'));
      await expect(svc.capabilities(PEER_ID)).rejects.toMatchObject({
        code: 'NETWORK',
        peerId: PEER_ID,
      });
      await mockAgent.close();
    });
  });
  // ─── Invalid response body ─────────────────────────────────────────────────
  describe('invalid response body', () => {
    it('surfaces as FederationClientError({ code: "INVALID_RESPONSE" }) when body shape is wrong', async () => {
      const db = makeDb();
      const { mockAgent, pool } = makeMockAgent();
      const svc = makeService(db, pool);
      // capabilities returns wrong shape (missing required fields)
      pool
        .intercept({ path: '/api/federation/v1/capabilities', method: 'GET' })
        .reply(200, { totally: 'wrong' }, { headers: { 'content-type': 'application/json' } });
      await expect(svc.capabilities(PEER_ID)).rejects.toMatchObject({
        code: 'INVALID_RESPONSE',
        peerId: PEER_ID,
      });
      await mockAgent.close();
    });
  });
  // ─── Peer DB validation ────────────────────────────────────────────────────
  describe('peer validation (without resolveEntry spy)', () => {
    /**
     * These tests exercise the real `resolveEntry` path — no spy on resolveEntry.
     */
    it('throws PEER_NOT_FOUND when peer is not in DB', async () => {
      // DB returns empty array (peer not found)
      const db = makeDb([]);
      const svc = new FederationClientService(db);
      await expect(svc.capabilities(PEER_ID)).rejects.toMatchObject({
        code: 'PEER_NOT_FOUND',
        peerId: PEER_ID,
      });
    });
    it('throws PEER_INACTIVE when peer state is not "active"', async () => {
      const db = makeDb([makePeerRow({ state: 'suspended' })]);
      const svc = new FederationClientService(db);
      await expect(svc.capabilities(PEER_ID)).rejects.toMatchObject({
        code: 'PEER_INACTIVE',
        peerId: PEER_ID,
      });
    });
  });
  // ─── Cache behaviour ───────────────────────────────────────────────────────
  describe('cache behaviour', () => {
    it('hits cache on second call — only one DB lookup happens', async () => {
      // Verify cache by calling the private resolveEntry directly twice and
      // asserting the DB was queried only once. This avoids the HTTP layer,
      // which would require either a real network or per-peer Agent rewiring
      // that the cache invariant doesn't depend on.
      const db = makeDb();
      const selectSpy = vi.spyOn(db, 'select');
      const svc = new FederationClientService(db);
      const resolveEntry = (
        svc as unknown as { resolveEntry: (peerId: string) => Promise<unknown> }
      ).resolveEntry.bind(svc);
      const first = await resolveEntry(PEER_ID);
      const second = await resolveEntry(PEER_ID);
      expect(first).toBe(second);
      expect(selectSpy).toHaveBeenCalledTimes(1);
    });
    it('serializes concurrent resolveEntry calls — only one DB lookup', async () => {
      const db = makeDb();
      const selectSpy = vi.spyOn(db, 'select');
      const svc = new FederationClientService(db);
      const resolveEntry = (
        svc as unknown as {
          resolveEntry: (peerId: string) => Promise<unknown>;
        }
      ).resolveEntry.bind(svc);
      const [a, b] = await Promise.all([resolveEntry(PEER_ID), resolveEntry(PEER_ID)]);
      expect(a).toBe(b);
      expect(selectSpy).toHaveBeenCalledTimes(1);
    });
    it('flushPeer destroys the evicted Agent so old TLS connections close', async () => {
      const db = makeDb();
      const svc = new FederationClientService(db);
      const resolveEntry = (
        svc as unknown as {
          resolveEntry: (peerId: string) => Promise<{ agent: { destroy: () => Promise<void> } }>;
        }
      ).resolveEntry.bind(svc);
      const entry = await resolveEntry(PEER_ID);
      const destroySpy = vi.spyOn(entry.agent, 'destroy').mockResolvedValue();
      svc.flushPeer(PEER_ID);
      expect(destroySpy).toHaveBeenCalledTimes(1);
    });
    it('flushPeer() invalidates cache — next call re-reads DB', async () => {
      const db = makeDb();
      const { mockAgent, pool } = makeMockAgent();
      const svc = makeService(db, pool);
      pool
        .intercept({ path: '/api/federation/v1/capabilities', method: 'GET' })
        .reply(200, CAP_BODY, { headers: { 'content-type': 'application/json' } })
        .times(2);
      // First call — populates cache (via mock resolveEntry)
      await svc.capabilities(PEER_ID);
      // Flush the cache
      svc.flushPeer(PEER_ID);
      // The spy on resolveEntry is still active — check it's called again after flush
      const resolveEntrySpy = vi.spyOn(
        svc as unknown as { resolveEntry: (peerId: string) => Promise<unknown> },
        'resolveEntry',
      );
      // Second call after flush — should call resolveEntry again
      await svc.capabilities(PEER_ID);
      // resolveEntry should have been called once after we started spying (post-flush)
      expect(resolveEntrySpy).toHaveBeenCalledTimes(1);
      await mockAgent.close();
    });
  });
  // ─── loadStepCaRoot env-var guard ─────────────────────────────────────────
  describe('loadStepCaRoot() env-var guard', () => {
    it('throws PEER_MISCONFIGURED when STEP_CA_ROOT_CERT_PATH is not set', async () => {
      delete process.env['STEP_CA_ROOT_CERT_PATH'];
      const db = makeDb();
      const svc = new FederationClientService(db);
      const resolveEntry = (
        svc as unknown as {
          resolveEntry: (peerId: string) => Promise<unknown>;
        }
      ).resolveEntry.bind(svc);
      await expect(resolveEntry(PEER_ID)).rejects.toMatchObject({
        code: 'PEER_MISCONFIGURED',
      });
    });
  });
  // ─── FederationClientError class ──────────────────────────────────────────
  describe('FederationClientError', () => {
    it('is instanceof Error and FederationClientError', () => {
      const err = new FederationClientError({
        code: 'PEER_NOT_FOUND',
        message: 'test',
        peerId: PEER_ID,
      });
      expect(err).toBeInstanceOf(Error);
      expect(err).toBeInstanceOf(FederationClientError);
      expect(err.name).toBe('FederationClientError');
    });
    it('carries status, code, and peerId', () => {
      const err = new FederationClientError({
        status: 403,
        code: 'FORBIDDEN',
        message: 'forbidden',
        peerId: PEER_ID,
      });
      expect(err.status).toBe(403);
      expect(err.code).toBe('FORBIDDEN');
      expect(err.peerId).toBe(PEER_ID);
    });
  });
 });
--- a/apps/gateway/src/federation/client/federation-client.service.ts
+++ b/apps/gateway/src/federation/client/federation-client.service.ts
@@ -0,0 +1,500 @@
 /**
 * FederationClientService — outbound mTLS client for federation requests (FED-M3-08).
 *
 * Dials peer gateways over mTLS using the cert+sealed-key stored in `federation_peers`,
 * invokes federation verbs (list / get / capabilities), and surfaces all failure modes
 * as typed `FederationClientError` instances.
 *
 * ## Error code taxonomy
 *
 * | Code               | When                                                          |
 * | ------------------ | ------------------------------------------------------------- |
 * | PEER_NOT_FOUND     | No row in federation_peers for the given peerId               |
 * | PEER_INACTIVE      | Peer row exists but state !== 'active'                        |
 * | PEER_MISCONFIGURED | Peer row is active but missing endpointUrl or clientKeyPem    |
 * | NETWORK            | undici threw a connection / TLS / timeout error               |
 * | HTTP_{status}      | Peer returned a non-2xx response (e.g. HTTP_403, HTTP_404)    |
 * | FORBIDDEN          | Peer returned 403 (convenience alias alongside HTTP_403)      |
 * | INVALID_RESPONSE   | Response body failed Zod schema validation                    |
 *
 * ## Cache strategy
 *
 * Per-peer `undici.Agent` instances are cached in a `Map<peerId, AgentCacheEntry>` for
 * the lifetime of the service instance.  The cache is keyed on peerId (UUID).
 *
 * Cache invalidation:
 *  - `flushPeer(peerId)` — removes the entry immediately.  M5/M6 MUST call this on
 *    cert rotation or peer revocation events so the next request re-reads the DB and
 *    builds a fresh TLS Agent with the new cert material.
 *  - On cache miss: re-reads the DB, checks state === 'active', rebuilds Agent.
 *
 * Cache does NOT auto-expire.  The service is expected to be a singleton scoped to the
 * NestJS application lifecycle; flushing on revocation/rotation is the only invalidation
 * path by design (avoids redundant DB round-trips on the hot path).
 */
 import { Injectable, Inject, Logger } from '@nestjs/common';
 import { readFileSync } from 'node:fs';
 import { Agent, fetch as undiciFetch } from 'undici';
 import type { Dispatcher } from 'undici';
 import { z } from 'zod';
 import { type Db, eq, federationPeers } from '@mosaicstack/db';
 import {
  FederationListResponseSchema,
  FederationGetResponseSchema,
  FederationCapabilitiesResponseSchema,
  FederationErrorEnvelopeSchema,
  type FederationListResponse,
  type FederationGetResponse,
  type FederationCapabilitiesResponse,
 } from '@mosaicstack/types';
 import { DB } from '../../database/database.module.js';
 import { unsealClientKey } from '../peer-key.util.js';
 // ---------------------------------------------------------------------------
 // Error taxonomy
 // ---------------------------------------------------------------------------
 /**
 * Client-side error code set.  Distinct from the server-side `FederationErrorCode`
 * (which lives in `@mosaicstack/types`) because the client has additional failure
 * modes (PEER_NOT_FOUND, PEER_INACTIVE, PEER_MISCONFIGURED, NETWORK) that the
 * server never emits.
 */
 export type FederationClientErrorCode =
  | 'PEER_NOT_FOUND'
  | 'PEER_INACTIVE'
  | 'PEER_MISCONFIGURED'
  | 'NETWORK'
  | 'FORBIDDEN'
  | 'INVALID_RESPONSE'
  | `HTTP_${number}`;
 export interface FederationClientErrorOptions {
  status?: number;
  code: FederationClientErrorCode;
  message: string;
  peerId: string;
  cause?: unknown;
 }
 /**
 * Thrown by FederationClientService on every failure path.
 * Callers can dispatch on `error.code` for programmatic handling.
 */
 export class FederationClientError extends Error {
  readonly status?: number;
  readonly code: FederationClientErrorCode;
  readonly peerId: string;
  readonly cause?: unknown;
  constructor(opts: FederationClientErrorOptions) {
    super(opts.message);
    this.name = 'FederationClientError';
    this.status = opts.status;
    this.code = opts.code;
    this.peerId = opts.peerId;
    this.cause = opts.cause;
  }
 }
 // ---------------------------------------------------------------------------
 // Internal cache types
 // ---------------------------------------------------------------------------
 interface AgentCacheEntry {
  agent: Agent;
  endpointUrl: string;
  certPem: string;
  certSerial: string;
 }
 // ---------------------------------------------------------------------------
 // Service
 // ---------------------------------------------------------------------------
@Injectable()
 export class FederationClientService {
  private readonly logger = new Logger(FederationClientService.name);
  /**
   * Per-peer undici Agent cache.
   * Key = peerId (UUID string).
   *
   * Values are either a resolved `AgentCacheEntry` or an in-flight
   * `Promise<AgentCacheEntry>` (promise-cache pattern).  Storing the promise
   * prevents duplicate DB lookups and duplicate key-unseal operations when two
   * requests for the same peer arrive before the first build completes.
   *
   * Flush via `flushPeer(peerId)` on cert rotation / peer revocation (M5/M6).
   */
  private readonly cache = new Map<string, AgentCacheEntry | Promise<AgentCacheEntry>>();
  /**
   * Step-CA root cert PEM, loaded once from `STEP_CA_ROOT_CERT_PATH`.
   * Used as the trust anchor for peer server certificates so federation TLS is
   * pinned to our PKI, not the public trust store. Lazily loaded on first use
   * so unit tests that don't exercise the agent path can run without the env var.
   */
  private cachedCaPem: string | null = null;
  constructor(@Inject(DB) private readonly db: Db) {}
  // -------------------------------------------------------------------------
  // Public verb API
  // -------------------------------------------------------------------------
  /**
   * Invoke the `list` verb on a remote peer.
   *
   * @param peerId   UUID of the peer row in `federation_peers`.
   * @param resource Resource path, e.g. "tasks".
   * @param request  Free-form body sent as JSON in the POST body.
   * @returns Parsed `FederationListResponse<T>`.
   */
  async list<T>(
    peerId: string,
    resource: string,
    request: Record<string, unknown>,
  ): Promise<FederationListResponse<T>> {
    const { endpointUrl, agent } = await this.resolveEntry(peerId);
    const url = `${endpointUrl}/api/federation/v1/list/${encodeURIComponent(resource)}`;
    const body = await this.doPost(peerId, url, agent, request);
    return this.parseWith<FederationListResponse<T>>(
      peerId,
      body,
      FederationListResponseSchema(z.unknown()),
    );
  }
  /**
   * Invoke the `get` verb on a remote peer.
   *
   * @param peerId   UUID of the peer row in `federation_peers`.
   * @param resource Resource path, e.g. "tasks".
   * @param id       Resource identifier.
   * @param request  Free-form body sent as JSON in the POST body.
   * @returns Parsed `FederationGetResponse<T>`.
   */
  async get<T>(
    peerId: string,
    resource: string,
    id: string,
    request: Record<string, unknown>,
  ): Promise<FederationGetResponse<T>> {
    const { endpointUrl, agent } = await this.resolveEntry(peerId);
    const url = `${endpointUrl}/api/federation/v1/get/${encodeURIComponent(resource)}/${encodeURIComponent(id)}`;
    const body = await this.doPost(peerId, url, agent, request);
    return this.parseWith<FederationGetResponse<T>>(
      peerId,
      body,
      FederationGetResponseSchema(z.unknown()),
    );
  }
  /**
   * Invoke the `capabilities` verb on a remote peer.
   *
   * @param peerId UUID of the peer row in `federation_peers`.
   * @returns Parsed `FederationCapabilitiesResponse`.
   */
  async capabilities(peerId: string): Promise<FederationCapabilitiesResponse> {
    const { endpointUrl, agent } = await this.resolveEntry(peerId);
    const url = `${endpointUrl}/api/federation/v1/capabilities`;
    const body = await this.doGet(peerId, url, agent);
    return this.parseWith<FederationCapabilitiesResponse>(
      peerId,
      body,
      FederationCapabilitiesResponseSchema,
    );
  }
  // -------------------------------------------------------------------------
  // Cache management
  // -------------------------------------------------------------------------
  /**
   * Flush the cached Agent for a specific peer.
   *
   * M5/M6 MUST call this on:
   *  - cert rotation events (so new cert material is picked up)
   *  - peer revocation events (so future requests fail at PEER_INACTIVE)
   *
   * After flushing, the next call to `list`, `get`, or `capabilities` for
   * this peer will re-read the DB and rebuild the Agent.
   */
  flushPeer(peerId: string): void {
    const entry = this.cache.get(peerId);
    if (entry === undefined) {
      return;
    }
    this.cache.delete(peerId);
    if (!(entry instanceof Promise)) {
      // best-effort destroy; promise-cached entries skip destroy because
      // the in-flight build owns its own Agent which will be GC'd when the
      // owning request handles the rejection from the cache miss
      entry.agent.destroy().catch(() => {
        // intentionally ignored — destroy errors are not actionable
      });
    }
    this.logger.log(`Cache flushed for peer ${peerId}`);
  }
  // -------------------------------------------------------------------------
  // Internal helpers
  // -------------------------------------------------------------------------
  /**
   * Load and cache the Step-CA root cert PEM from `STEP_CA_ROOT_CERT_PATH`.
   * Throws `FederationClientError` if the env var is unset or the file cannot
   * be read — mTLS to a peer without a pinned trust anchor would silently
   * fall back to the public trust store.
   */
  private loadStepCaRoot(): string {
    if (this.cachedCaPem !== null) {
      return this.cachedCaPem;
    }
    const path = process.env['STEP_CA_ROOT_CERT_PATH'];
    if (!path) {
      throw new FederationClientError({
        code: 'PEER_MISCONFIGURED',
        message: 'STEP_CA_ROOT_CERT_PATH is not set; refusing to dial peer without pinned CA trust',
        peerId: '',
      });
    }
    try {
      const pem = readFileSync(path, 'utf8');
      this.cachedCaPem = pem;
      return pem;
    } catch (err) {
      throw new FederationClientError({
        code: 'PEER_MISCONFIGURED',
        message: `Failed to read STEP_CA_ROOT_CERT_PATH (${path})`,
        peerId: '',
        cause: err,
      });
    }
  }
  /**
   * Resolve the cache entry for a peer, reading DB on miss.
   *
   * Uses a promise-cache pattern: concurrent callers for the same uncached
   * `peerId` all `await` the same in-flight `Promise<AgentCacheEntry>` so
   * only one DB lookup and one key-unseal ever runs per peer per cache miss.
   * The promise is replaced with the concrete entry on success, or deleted on
   * rejection so a transient error does not poison the cache permanently.
   *
   * Throws `FederationClientError` with appropriate code if the peer is not
   * found, is inactive, or is missing required fields.
   */
  private async resolveEntry(peerId: string): Promise<AgentCacheEntry> {
    const cached = this.cache.get(peerId);
    if (cached) {
      return cached; // Promise or concrete entry — both are awaitable
    }
    const inflight = this.buildEntry(peerId).then(
      (entry) => {
        this.cache.set(peerId, entry); // replace promise with concrete value
        return entry;
      },
      (err: unknown) => {
        this.cache.delete(peerId); // don't poison the cache with a rejected promise
        throw err;
      },
    );
    this.cache.set(peerId, inflight);
    return inflight;
  }
  /**
   * Build the `AgentCacheEntry` for a peer by reading the DB, validating the
   * peer's state, unsealing the private key, and constructing the mTLS Agent.
   *
   * Throws `FederationClientError` with appropriate code if the peer is not
   * found, is inactive, or is missing required fields.
   */
  private async buildEntry(peerId: string): Promise<AgentCacheEntry> {
    // DB lookup
    const [peer] = await this.db
      .select()
      .from(federationPeers)
      .where(eq(federationPeers.id, peerId))
      .limit(1);
    if (!peer) {
      throw new FederationClientError({
        code: 'PEER_NOT_FOUND',
        message: `Federation peer ${peerId} not found`,
        peerId,
      });
    }
    if (peer.state !== 'active') {
      throw new FederationClientError({
        code: 'PEER_INACTIVE',
        message: `Federation peer ${peerId} is not active (state: ${peer.state})`,
        peerId,
      });
    }
    if (!peer.endpointUrl || !peer.clientKeyPem) {
      throw new FederationClientError({
        code: 'PEER_MISCONFIGURED',
        message: `Federation peer ${peerId} is missing endpointUrl or clientKeyPem`,
        peerId,
      });
    }
    // Unseal the private key
    let privateKeyPem: string;
    try {
      privateKeyPem = unsealClientKey(peer.clientKeyPem);
    } catch (err) {
      throw new FederationClientError({
        code: 'PEER_MISCONFIGURED',
        message: `Failed to unseal client key for peer ${peerId}`,
        peerId,
        cause: err,
      });
    }
    // Build mTLS agent — pin trust to Step-CA root so we never accept
    // a peer cert signed by a public CA (defense against MITM with a
    // publicly-trusted DV cert for the peer's hostname).
    const agent = new Agent({
      connect: {
        cert: peer.certPem,
        key: privateKeyPem,
        ca: this.loadStepCaRoot(),
        // rejectUnauthorized: true is the undici default for HTTPS
      },
    });
    const entry: AgentCacheEntry = {
      agent,
      endpointUrl: peer.endpointUrl,
      certPem: peer.certPem,
      certSerial: peer.certSerial,
    };
    this.logger.log(`Agent cached for peer ${peerId} (serial: ${peer.certSerial})`);
    return entry;
  }
  /**
   * Execute a POST request with a JSON body.
   * Returns the parsed response body as an unknown value.
   * Throws `FederationClientError` on network errors and non-2xx responses.
   */
  private async doPost(
    peerId: string,
    url: string,
    agent: Dispatcher,
    body: Record<string, unknown>,
  ): Promise<unknown> {
    return this.doRequest(peerId, url, agent, {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
      body: JSON.stringify(body),
    });
  }
  /**
   * Execute a GET request.
   * Returns the parsed response body as an unknown value.
   * Throws `FederationClientError` on network errors and non-2xx responses.
   */
  private async doGet(peerId: string, url: string, agent: Dispatcher): Promise<unknown> {
    return this.doRequest(peerId, url, agent, { method: 'GET' });
  }
  private async doRequest(
    peerId: string,
    url: string,
    agent: Dispatcher,
    init: { method: string; headers?: Record<string, string>; body?: string },
  ): Promise<unknown> {
    let response: Awaited<ReturnType<typeof undiciFetch>>;
    try {
      response = await undiciFetch(url, {
        ...init,
        dispatcher: agent,
      });
    } catch (err) {
      throw new FederationClientError({
        code: 'NETWORK',
        message: `Network error calling peer ${peerId} at ${url}: ${err instanceof Error ? err.message : String(err)}`,
        peerId,
        cause: err,
      });
    }
    const rawBody = await response.text().catch(() => '');
    if (!response.ok) {
      const status = response.status;
      // Attempt to parse as federation error envelope
      let serverMessage = `HTTP ${status}`;
      try {
        const json: unknown = JSON.parse(rawBody);
        const result = FederationErrorEnvelopeSchema.safeParse(json);
        if (result.success) {
          serverMessage = result.data.error.message;
        }
      } catch {
        // Not valid JSON or not a federation envelope — use generic message
      }
      // Specific code for 403 (most actionable for callers); generic HTTP_{n} for others
      const code: FederationClientErrorCode = status === 403 ? 'FORBIDDEN' : `HTTP_${status}`;
      throw new FederationClientError({
        status,
        code,
        message: `Peer ${peerId} returned ${status}: ${serverMessage}`,
        peerId,
      });
    }
    try {
      return JSON.parse(rawBody) as unknown;
    } catch (err) {
      throw new FederationClientError({
        code: 'INVALID_RESPONSE',
        message: `Peer ${peerId} returned non-JSON body`,
        peerId,
        cause: err,
      });
    }
  }
  /**
   * Parse and validate a response body against a Zod schema.
   *
   * For list/get, callers pass the result of `FederationListResponseSchema(z.unknown())`
   * so that the envelope structure is validated without requiring a concrete item schema
   * at the client level.  The generic `T` provides compile-time typing.
   *
   * Throws `FederationClientError({ code: 'INVALID_RESPONSE' })` on parse failure.
   */
  private parseWith<T>(peerId: string, body: unknown, schema: z.ZodTypeAny): T {
    const result = schema.safeParse(body);
    if (!result.success) {
      const issues = result.error.issues
        .map((e: z.ZodIssue) => `[${e.path.join('.') || 'root'}] ${e.message}`)
        .join('; ');
      throw new FederationClientError({
        code: 'INVALID_RESPONSE',
        message: `Peer ${peerId} returned invalid response shape: ${issues}`,
        peerId,
      });
    }
    return result.data as T;
  }
 }
--- a/apps/gateway/src/federation/client/index.ts
+++ b/apps/gateway/src/federation/client/index.ts
@@ -0,0 +1,13 @@
 /**
 * Federation client barrel — re-exports for FederationModule consumers.
 *
 * M3-09 (QuerySourceService) and future milestones should import from here,
 * not directly from the implementation file.
 */
 export {
  FederationClientService,
  FederationClientError,
  type FederationClientErrorCode,
  type FederationClientErrorOptions,
 } from './federation-client.service.js';
--- a/apps/gateway/src/federation/enrollment.controller.ts
+++ b/apps/gateway/src/federation/enrollment.controller.ts
@@ -0,0 +1,54 @@
 /**
 * EnrollmentController — federation enrollment HTTP layer (FED-M2-07).
 *
 * Routes:
 *   POST /api/federation/enrollment/tokens   — admin creates a single-use token
 *   POST /api/federation/enrollment/:token   — unauthenticated; token IS the auth
 */
 import {
  Body,
  Controller,
  HttpCode,
  HttpStatus,
  Inject,
  Param,
  Post,
  UseGuards,
 } from '@nestjs/common';
 import { AdminGuard } from '../admin/admin.guard.js';
 import { EnrollmentService } from './enrollment.service.js';
 import { CreateEnrollmentTokenDto, RedeemEnrollmentTokenDto } from './enrollment.dto.js';
@Controller('api/federation/enrollment')
 export class EnrollmentController {
  constructor(@Inject(EnrollmentService) private readonly enrollmentService: EnrollmentService) {}
  /**
   * Admin-only: generate a single-use enrollment token for a pending grant.
   * The token should be distributed out-of-band to the remote peer operator.
   *
   * POST /api/federation/enrollment/tokens
   */
  @Post('tokens')
  @UseGuards(AdminGuard)
  @HttpCode(HttpStatus.CREATED)
  async createToken(@Body() dto: CreateEnrollmentTokenDto) {
    return this.enrollmentService.createToken(dto);
  }
  /**
   * Unauthenticated: remote peer redeems a token by submitting its CSR.
   * The token itself is the credential — no session or bearer token required.
   *
   * POST /api/federation/enrollment/:token
   *
   * Returns the signed leaf cert and full chain PEM on success.
   * Returns 410 Gone if the token was already used or has expired.
   */
  @Post(':token')
  @HttpCode(HttpStatus.OK)
  async redeem(@Param('token') token: string, @Body() dto: RedeemEnrollmentTokenDto) {
    return this.enrollmentService.redeem(token, dto.csrPem);
  }
 }
--- a/apps/gateway/src/federation/enrollment.dto.ts
+++ b/apps/gateway/src/federation/enrollment.dto.ts
@@ -0,0 +1,35 @@
 /**
 * DTOs for the federation enrollment flow (FED-M2-07).
 *
 * CreateEnrollmentTokenDto  — admin generates a single-use enrollment token
 * RedeemEnrollmentTokenDto  — remote peer submits CSR to redeem the token
 */
 import { IsInt, IsNotEmpty, IsOptional, IsString, IsUUID, Max, Min } from 'class-validator';
 export class CreateEnrollmentTokenDto {
  /** UUID of the federation grant this token will activate on redemption. */
  @IsUUID()
  grantId!: string;
  /** UUID of the peer record that will receive the issued cert on redemption. */
  @IsUUID()
  peerId!: string;
  /**
   * Token lifetime in seconds. Default 900 (15 min). Min 60. Max 900.
   * After this time the token is rejected even if unused.
   */
  @IsOptional()
  @IsInt()
  @Min(60)
  @Max(900)
  ttlSeconds: number = 900;
 }
 export class RedeemEnrollmentTokenDto {
  /** PEM-encoded PKCS#10 Certificate Signing Request from the remote peer. */
  @IsString()
  @IsNotEmpty()
  csrPem!: string;
 }
--- a/apps/gateway/src/federation/enrollment.service.ts
+++ b/apps/gateway/src/federation/enrollment.service.ts
@@ -0,0 +1,281 @@
 /**
 * EnrollmentService — single-use enrollment token lifecycle (FED-M2-07).
 *
 * Responsibilities:
 *  1. Generate time-limited single-use enrollment tokens (admin action).
 *  2. Redeem a token: validate → atomically claim token → issue cert via
 *     CaService → transactionally activate grant + update peer + write audit.
 *
 * Replay protection: the token is claimed (UPDATE WHERE used_at IS NULL) BEFORE
 * cert issuance. This prevents double cert minting on concurrent requests.
 * If cert issuance fails after claim, the token is consumed and the grant
 * stays pending — admin must create a new grant.
 */
 import {
  BadRequestException,
  ConflictException,
  GoneException,
  Inject,
  Injectable,
  Logger,
  NotFoundException,
 } from '@nestjs/common';
 import * as crypto from 'node:crypto';
 // X509Certificate is available as a named export in Node.js ≥ 15.6
 const { X509Certificate } = crypto;
 import {
  type Db,
  and,
  eq,
  isNull,
  sql,
  federationEnrollmentTokens,
  federationGrants,
  federationPeers,
  federationAuditLog,
 } from '@mosaicstack/db';
 import { DB } from '../database/database.module.js';
 import { CaService } from './ca.service.js';
 import { GrantsService } from './grants.service.js';
 import { FederationScopeError } from './scope-schema.js';
 import type { CreateEnrollmentTokenDto } from './enrollment.dto.js';
 export interface EnrollmentTokenResult {
  token: string;
  expiresAt: string;
 }
 export interface RedeemResult {
  certPem: string;
  certChainPem: string;
 }
@Injectable()
 export class EnrollmentService {
  private readonly logger = new Logger(EnrollmentService.name);
  constructor(
    @Inject(DB) private readonly db: Db,
    private readonly caService: CaService,
    private readonly grantsService: GrantsService,
  ) {}
  /**
   * Generate a single-use enrollment token for an admin to distribute
   * out-of-band to the remote peer operator.
   */
  async createToken(dto: CreateEnrollmentTokenDto): Promise<EnrollmentTokenResult> {
    const ttl = Math.min(dto.ttlSeconds, 900);
    // MED-3: Verify the grantId ↔ peerId binding — prevents attacker from
    // cross-wiring grants to attacker-controlled peers.
    const [grant] = await this.db
      .select({ peerId: federationGrants.peerId })
      .from(federationGrants)
      .where(eq(federationGrants.id, dto.grantId))
      .limit(1);
    if (!grant) {
      throw new NotFoundException(`Grant ${dto.grantId} not found`);
    }
    if (grant.peerId !== dto.peerId) {
      throw new BadRequestException(`peerId does not match the grant's registered peer`);
    }
    const token = crypto.randomBytes(32).toString('hex');
    const expiresAt = new Date(Date.now() + ttl * 1000);
    await this.db.insert(federationEnrollmentTokens).values({
      token,
      grantId: dto.grantId,
      peerId: dto.peerId,
      expiresAt,
    });
    this.logger.log(
      `Enrollment token created — grantId=${dto.grantId} peerId=${dto.peerId} expiresAt=${expiresAt.toISOString()}`,
    );
    return { token, expiresAt: expiresAt.toISOString() };
  }
  /**
   * Redeem an enrollment token.
   *
   * Full flow:
   *  1. Fetch token row — NotFoundException if not found
   *  2. usedAt set → GoneException (already used)
   *  3. expiresAt < now → GoneException (expired)
   *  4. Load grant — verify status is 'pending'
   *  5. Atomically claim token (UPDATE WHERE used_at IS NULL RETURNING token)
   *     — if no rows returned, concurrent request won → GoneException
   *  6. Issue cert via CaService (network call, outside transaction)
   *     — if this fails, token is consumed; grant stays pending; admin must recreate
   *  7. Transaction: activate grant + update peer record + write audit log
   *  8. Return { certPem, certChainPem }
   */
  async redeem(token: string, csrPem: string): Promise<RedeemResult> {
    // HIGH-5: Track outcome so we can write a failure audit row on any error.
    let outcome: 'allowed' | 'denied' = 'denied';
    // row may be undefined if the token is not found — used defensively in catch.
    let row: typeof federationEnrollmentTokens.$inferSelect | undefined;
    try {
      // 1. Fetch token row
      const [fetchedRow] = await this.db
        .select()
        .from(federationEnrollmentTokens)
        .where(eq(federationEnrollmentTokens.token, token))
        .limit(1);
      if (!fetchedRow) {
        throw new NotFoundException('Enrollment token not found');
      }
      row = fetchedRow;
      // 2. Already used?
      if (row.usedAt !== null) {
        throw new GoneException('Enrollment token has already been used');
      }
      // 3. Expired?
      if (row.expiresAt < new Date()) {
        throw new GoneException('Enrollment token has expired');
      }
      // 4. Load grant and verify it is still pending
      let grant;
      try {
        grant = await this.grantsService.getGrant(row.grantId);
      } catch (err) {
        if (err instanceof FederationScopeError) {
          throw new BadRequestException(err.message);
        }
        throw err;
      }
      if (grant.status !== 'pending') {
        throw new GoneException(
          `Grant ${row.grantId} is no longer pending (status: ${grant.status})`,
        );
      }
      // 5. Atomically claim the token BEFORE cert issuance to prevent double-minting.
      // WHERE used_at IS NULL ensures only one concurrent request wins.
      // Using .returning() works on both node-postgres and PGlite without rowCount inspection.
      const claimed = await this.db
        .update(federationEnrollmentTokens)
        .set({ usedAt: sql`NOW()` })
        .where(
          and(
            eq(federationEnrollmentTokens.token, token),
            isNull(federationEnrollmentTokens.usedAt),
          ),
        )
        .returning({ token: federationEnrollmentTokens.token });
      if (claimed.length === 0) {
        throw new GoneException('Enrollment token has already been used (concurrent request)');
      }
      // 6. Issue certificate via CaService (network call — outside any transaction).
      // If this throws, the token is already consumed. The grant stays pending.
      // Admin must revoke the grant and create a new one.
      let issued;
      try {
        issued = await this.caService.issueCert({
          csrPem,
          grantId: row.grantId,
          subjectUserId: grant.subjectUserId,
          ttlSeconds: 300,
        });
      } catch (err) {
        // HIGH-4: Log only the first 8 hex chars of the token for correlation — never log the full token.
        this.logger.error(
          `issueCert failed after token ${token.slice(0, 8)}... was claimed — grant ${row.grantId} is stranded pending`,
          err instanceof Error ? err.stack : String(err),
        );
        if (err instanceof FederationScopeError) {
          throw new BadRequestException((err as Error).message);
        }
        throw err;
      }
      // 7. Atomically activate grant, update peer record, and write audit log.
      const certNotAfter = this.extractCertNotAfter(issued.certPem);
      await this.db.transaction(async (tx) => {
        // CRIT-2: Guard activation with WHERE status='pending' to prevent double-activation.
        const [activated] = await tx
          .update(federationGrants)
          .set({ status: 'active' })
          .where(and(eq(federationGrants.id, row!.grantId), eq(federationGrants.status, 'pending')))
          .returning({ id: federationGrants.id });
        if (!activated) {
          throw new ConflictException(
            `Grant ${row!.grantId} is no longer pending — cannot activate`,
          );
        }
        // CRIT-2: Guard peer update with WHERE state='pending'.
        await tx
          .update(federationPeers)
          .set({
            certPem: issued.certPem,
            certSerial: issued.serialNumber,
            certNotAfter,
            state: 'active',
          })
          .where(and(eq(federationPeers.id, row!.peerId), eq(federationPeers.state, 'pending')));
        await tx.insert(federationAuditLog).values({
          requestId: crypto.randomUUID(),
          peerId: row!.peerId,
          grantId: row!.grantId,
          verb: 'enrollment',
          resource: 'federation_grant',
          statusCode: 200,
          outcome: 'allowed',
        });
      });
      this.logger.log(
        `Enrollment complete — peerId=${row.peerId} grantId=${row.grantId} serial=${issued.serialNumber}`,
      );
      outcome = 'allowed';
      // 8. Return cert material
      return {
        certPem: issued.certPem,
        certChainPem: issued.certChainPem,
      };
    } catch (err) {
      // HIGH-5: Best-effort audit write on failure — do not let this throw.
      if (outcome === 'denied') {
        await this.db
          .insert(federationAuditLog)
          .values({
            requestId: crypto.randomUUID(),
            peerId: row?.peerId ?? null,
            grantId: row?.grantId ?? null,
            verb: 'enrollment',
            resource: 'federation_grant',
            statusCode:
              err instanceof GoneException ? 410 : err instanceof NotFoundException ? 404 : 500,
            outcome: 'denied',
          })
          .catch(() => {});
      }
      throw err;
    }
  }
  /**
   * Extract the notAfter date from a PEM certificate.
   * HIGH-2: No silent fallback — a cert that cannot be parsed should fail loud.
   */
  private extractCertNotAfter(certPem: string): Date {
    const cert = new X509Certificate(certPem);
    return new Date(cert.validTo);
  }
 }
--- a/apps/gateway/src/federation/federation-admin.dto.ts
+++ b/apps/gateway/src/federation/federation-admin.dto.ts
@@ -0,0 +1,39 @@
 /**
 * DTOs for the federation admin controller (FED-M2-08).
 */
 import { IsInt, IsNotEmpty, IsOptional, IsString, IsUrl, Max, Min } from 'class-validator';
 export class CreatePeerKeypairDto {
  @IsString()
  @IsNotEmpty()
  commonName!: string;
  @IsString()
  @IsNotEmpty()
  displayName!: string;
  @IsOptional()
  @IsUrl()
  endpointUrl?: string;
 }
 export class StorePeerCertDto {
  @IsString()
  @IsNotEmpty()
  certPem!: string;
 }
 export class GenerateEnrollmentTokenDto {
  @IsOptional()
  @IsInt()
  @Min(60)
  @Max(900)
  ttlSeconds: number = 900;
 }
 export class RevokeGrantBodyDto {
  @IsOptional()
  @IsString()
  reason?: string;
 }
--- a/apps/gateway/src/federation/federation.controller.ts
+++ b/apps/gateway/src/federation/federation.controller.ts
@@ -0,0 +1,266 @@
 /**
 * FederationController — admin REST API for federation management (FED-M2-08).
 *
 * Routes (all under /api/admin/federation, all require AdminGuard):
 *
 *   Grant management:
 *     POST   /api/admin/federation/grants
 *     GET    /api/admin/federation/grants
 *     GET    /api/admin/federation/grants/:id
 *     PATCH  /api/admin/federation/grants/:id/revoke
 *     POST   /api/admin/federation/grants/:id/tokens
 *
 *   Peer management:
 *     GET    /api/admin/federation/peers
 *     POST   /api/admin/federation/peers/keypair
 *     PATCH  /api/admin/federation/peers/:id/cert
 *
 * NOTE: The enrollment REDEMPTION endpoint (POST /api/federation/enrollment/:token)
 * is handled by EnrollmentController — not duplicated here.
 */
 import {
  Body,
  Controller,
  Get,
  HttpCode,
  HttpStatus,
  Inject,
  NotFoundException,
  Param,
  Patch,
  Post,
  Query,
  UseGuards,
 } from '@nestjs/common';
 import { webcrypto } from 'node:crypto';
 import { X509Certificate } from 'node:crypto';
 import { Pkcs10CertificateRequestGenerator } from '@peculiar/x509';
 import { type Db, eq, federationPeers } from '@mosaicstack/db';
 import { DB } from '../database/database.module.js';
 import { AdminGuard } from '../admin/admin.guard.js';
 import { GrantsService } from './grants.service.js';
 import { EnrollmentService } from './enrollment.service.js';
 import { sealClientKey } from './peer-key.util.js';
 import { CreateGrantDto, ListGrantsDto } from './grants.dto.js';
 import {
  CreatePeerKeypairDto,
  GenerateEnrollmentTokenDto,
  RevokeGrantBodyDto,
  StorePeerCertDto,
 } from './federation-admin.dto.js';
 // ---------------------------------------------------------------------------
 // Helpers
 // ---------------------------------------------------------------------------
 /**
 * Convert an ArrayBuffer to a Base64 string (for PEM encoding).
 */
 function arrayBufferToBase64(buf: ArrayBuffer): string {
  const bytes = new Uint8Array(buf);
  let binary = '';
  for (const b of bytes) {
    binary += String.fromCharCode(b);
  }
  return Buffer.from(binary, 'binary').toString('base64');
 }
 /**
 * Wrap a Base64 string in PEM armour.
 */
 function toPem(label: string, b64: string): string {
  const lines = b64.match(/.{1,64}/g) ?? [];
  return `-----BEGIN ${label}-----\n${lines.join('\n')}\n-----END ${label}-----\n`;
 }
 // ---------------------------------------------------------------------------
 // Controller
 // ---------------------------------------------------------------------------
@Controller('api/admin/federation')
@UseGuards(AdminGuard)
 export class FederationController {
  constructor(
    @Inject(DB) private readonly db: Db,
    @Inject(GrantsService) private readonly grantsService: GrantsService,
    @Inject(EnrollmentService) private readonly enrollmentService: EnrollmentService,
  ) {}
  // ─── Grant management ────────────────────────────────────────────────────
  /**
   * POST /api/admin/federation/grants
   * Create a new grant in pending state.
   */
  @Post('grants')
  @HttpCode(HttpStatus.CREATED)
  async createGrant(@Body() body: CreateGrantDto) {
    return this.grantsService.createGrant(body);
  }
  /**
   * GET /api/admin/federation/grants
   * List grants with optional filters.
   */
  @Get('grants')
  async listGrants(@Query() query: ListGrantsDto) {
    return this.grantsService.listGrants(query);
  }
  /**
   * GET /api/admin/federation/grants/:id
   * Get a single grant by ID.
   */
  @Get('grants/:id')
  async getGrant(@Param('id') id: string) {
    return this.grantsService.getGrant(id);
  }
  /**
   * PATCH /api/admin/federation/grants/:id/revoke
   * Revoke an active grant.
   */
  @Patch('grants/:id/revoke')
  async revokeGrant(@Param('id') id: string, @Body() body: RevokeGrantBodyDto) {
    return this.grantsService.revokeGrant(id, body.reason);
  }
  /**
   * POST /api/admin/federation/grants/:id/tokens
   * Generate a single-use enrollment token for a pending grant.
   * Returns the token plus an enrollmentUrl the operator shares out-of-band.
   */
  @Post('grants/:id/tokens')
  @HttpCode(HttpStatus.CREATED)
  async generateToken(@Param('id') id: string, @Body() body: GenerateEnrollmentTokenDto) {
    const grant = await this.grantsService.getGrant(id);
    const result = await this.enrollmentService.createToken({
      grantId: id,
      peerId: grant.peerId,
      ttlSeconds: body.ttlSeconds ?? 900,
    });
    const baseUrl = process.env['BETTER_AUTH_URL'] ?? 'http://localhost:14242';
    const enrollmentUrl = `${baseUrl}/api/federation/enrollment/${result.token}`;
    return {
      token: result.token,
      expiresAt: result.expiresAt,
      enrollmentUrl,
    };
  }
  // ─── Peer management ─────────────────────────────────────────────────────
  /**
   * GET /api/admin/federation/peers
   * List all federation peer rows.
   */
  @Get('peers')
  async listPeers() {
    return this.db.select().from(federationPeers).orderBy(federationPeers.commonName);
  }
  /**
   * POST /api/admin/federation/peers/keypair
   * Generate a new peer entry with EC P-256 key pair and a PKCS#10 CSR.
   *
   * Flow:
   *  1. Generate EC P-256 key pair via webcrypto
   *  2. Generate a self-signed CSR via @peculiar/x509
   *  3. Export private key as PEM
   *  4. sealClientKey(privatePem) → sealed blob
   *  5. Insert pending peer row
   *  6. Return { peerId, csrPem }
   */
  @Post('peers/keypair')
  @HttpCode(HttpStatus.CREATED)
  async createPeerKeypair(@Body() body: CreatePeerKeypairDto) {
    // 1. Generate EC P-256 key pair via Web Crypto
    const keyPair = await webcrypto.subtle.generateKey(
      { name: 'ECDSA', namedCurve: 'P-256' },
      true, // extractable
      ['sign', 'verify'],
    );
    // 2. Generate PKCS#10 CSR
    const csr = await Pkcs10CertificateRequestGenerator.create({
      name: `CN=${body.commonName}`,
      keys: keyPair,
      signingAlgorithm: { name: 'ECDSA', hash: 'SHA-256' },
    });
    const csrPem = csr.toString('pem');
    // 3. Export private key as PKCS#8 PEM
    const pkcs8Der = await webcrypto.subtle.exportKey('pkcs8', keyPair.privateKey);
    const privatePem = toPem('PRIVATE KEY', arrayBufferToBase64(pkcs8Der));
    // 4. Seal the private key
    const sealed = sealClientKey(privatePem);
    // 5. Insert pending peer row
    const [peer] = await this.db
      .insert(federationPeers)
      .values({
        commonName: body.commonName,
        displayName: body.displayName,
        certPem: '',
        certSerial: 'pending',
        certNotAfter: new Date(0),
        clientKeyPem: sealed,
        state: 'pending',
        endpointUrl: body.endpointUrl,
      })
      .returning();
    return {
      peerId: peer!.id,
      csrPem,
    };
  }
  /**
   * PATCH /api/admin/federation/peers/:id/cert
   * Store a signed certificate after enrollment completes.
   *
   * Flow:
   *  1. Parse the cert to extract serial and notAfter
   *  2. Update the peer row with cert data + state='active'
   *  3. Return the updated peer row
   */
  @Patch('peers/:id/cert')
  async storePeerCert(@Param('id') id: string, @Body() body: StorePeerCertDto) {
    // Ensure peer exists
    const [existing] = await this.db
      .select({ id: federationPeers.id })
      .from(federationPeers)
      .where(eq(federationPeers.id, id))
      .limit(1);
    if (!existing) {
      throw new NotFoundException(`Peer ${id} not found`);
    }
    // 1. Parse cert
    const x509 = new X509Certificate(body.certPem);
    const certSerial = x509.serialNumber;
    const certNotAfter = new Date(x509.validTo);
    // 2. Update peer
    const [updated] = await this.db
      .update(federationPeers)
      .set({
        certPem: body.certPem,
        certSerial,
        certNotAfter,
        state: 'active',
      })
      .where(eq(federationPeers.id, id))
      .returning();
    return updated;
  }
 }
--- a/apps/gateway/src/federation/federation.module.ts
+++ b/apps/gateway/src/federation/federation.module.ts
@@ -0,0 +1,29 @@
 import { Module } from '@nestjs/common';
 import { AdminGuard } from '../admin/admin.guard.js';
 import { CaService } from './ca.service.js';
 import { EnrollmentController } from './enrollment.controller.js';
 import { EnrollmentService } from './enrollment.service.js';
 import { FederationController } from './federation.controller.js';
 import { GrantsService } from './grants.service.js';
 import { FederationClientService } from './client/index.js';
 import { FederationAuthGuard } from './server/index.js';
@Module({
  controllers: [EnrollmentController, FederationController],
  providers: [
    AdminGuard,
    CaService,
    EnrollmentService,
    GrantsService,
    FederationClientService,
    FederationAuthGuard,
  ],
  exports: [
    CaService,
    EnrollmentService,
    GrantsService,
    FederationClientService,
    FederationAuthGuard,
  ],
 })
 export class FederationModule {}
--- a/apps/gateway/src/federation/grants.dto.ts
+++ b/apps/gateway/src/federation/grants.dto.ts
@@ -0,0 +1,36 @@
 import { IsDateString, IsIn, IsObject, IsOptional, IsString, IsUUID } from 'class-validator';
 export class CreateGrantDto {
  @IsUUID()
  peerId!: string;
  @IsUUID()
  subjectUserId!: string;
  @IsObject()
  scope!: Record<string, unknown>;
  @IsOptional()
  @IsDateString()
  expiresAt?: string;
 }
 export class ListGrantsDto {
  @IsOptional()
  @IsUUID()
  peerId?: string;
  @IsOptional()
  @IsUUID()
  subjectUserId?: string;
  @IsOptional()
  @IsIn(['pending', 'active', 'revoked', 'expired'])
  status?: 'pending' | 'active' | 'revoked' | 'expired';
 }
 export class RevokeGrantDto {
  @IsOptional()
  @IsString()
  reason?: string;
 }
--- a/apps/gateway/src/federation/grants.service.ts
+++ b/apps/gateway/src/federation/grants.service.ts
@@ -0,0 +1,190 @@
 /**
 * Federation grants service — CRUD + status transitions (FED-M2-06).
 *
 * Business logic only. CSR/cert work is handled by M2-07.
 *
 * Status lifecycle:
 *   pending → active   (activateGrant, called by M2-07 enrollment controller after cert signed)
 *   active  → revoked  (revokeGrant)
 *   active  → expired  (expireGrant, called by M6 scheduler)
 */
 import { ConflictException, Inject, Injectable, NotFoundException } from '@nestjs/common';
 import { type Db, and, eq, federationGrants, federationPeers } from '@mosaicstack/db';
 import { DB } from '../database/database.module.js';
 import { parseFederationScope } from './scope-schema.js';
 import type { CreateGrantDto, ListGrantsDto } from './grants.dto.js';
 export type Grant = typeof federationGrants.$inferSelect;
 export type Peer = typeof federationPeers.$inferSelect;
 export type GrantWithPeer = Grant & { peer: Peer };
@Injectable()
 export class GrantsService {
  constructor(@Inject(DB) private readonly db: Db) {}
  /**
   * Create a new grant in `pending` state.
   * Validates the scope against the federation scope JSON schema before inserting.
   */
  async createGrant(dto: CreateGrantDto): Promise<Grant> {
    // Throws FederationScopeError (a plain Error subclass) on invalid scope.
    parseFederationScope(dto.scope);
    const [grant] = await this.db
      .insert(federationGrants)
      .values({
        peerId: dto.peerId,
        subjectUserId: dto.subjectUserId,
        scope: dto.scope,
        status: 'pending',
        expiresAt: dto.expiresAt != null ? new Date(dto.expiresAt) : null,
      })
      .returning();
    return grant!;
  }
  /**
   * Fetch a single grant by ID. Throws NotFoundException if not found.
   */
  async getGrant(id: string): Promise<Grant> {
    const [grant] = await this.db
      .select()
      .from(federationGrants)
      .where(eq(federationGrants.id, id))
      .limit(1);
    if (!grant) {
      throw new NotFoundException(`Grant ${id} not found`);
    }
    return grant;
  }
  /**
   * Fetch a single grant by ID, joined with its associated peer row.
   * Used by FederationAuthGuard to perform grant status + cert serial checks
   * in a single DB round-trip.
   *
   * Throws NotFoundException if the grant does not exist.
   * Throws NotFoundException if the associated peer row is missing (data integrity issue).
   */
  async getGrantWithPeer(id: string): Promise<GrantWithPeer> {
    const rows = await this.db
      .select()
      .from(federationGrants)
      .innerJoin(federationPeers, eq(federationGrants.peerId, federationPeers.id))
      .where(eq(federationGrants.id, id))
      .limit(1);
    const row = rows[0];
    if (!row) {
      throw new NotFoundException(`Grant ${id} not found`);
    }
    return {
      ...row.federation_grants,
      peer: row.federation_peers,
    };
  }
  /**
   * List grants with optional filters for peerId, subjectUserId, and status.
   */
  async listGrants(filters: ListGrantsDto): Promise<Grant[]> {
    const conditions = [];
    if (filters.peerId != null) {
      conditions.push(eq(federationGrants.peerId, filters.peerId));
    }
    if (filters.subjectUserId != null) {
      conditions.push(eq(federationGrants.subjectUserId, filters.subjectUserId));
    }
    if (filters.status != null) {
      conditions.push(eq(federationGrants.status, filters.status));
    }
    if (conditions.length === 0) {
      return this.db.select().from(federationGrants);
    }
    return this.db
      .select()
      .from(federationGrants)
      .where(and(...conditions));
  }
  /**
   * Transition a grant from `pending` → `active`.
   * Called by M2-07 enrollment controller after cert is signed.
   * Throws ConflictException if the grant is not in `pending` state.
   */
  async activateGrant(id: string): Promise<Grant> {
    const grant = await this.getGrant(id);
    if (grant.status !== 'pending') {
      throw new ConflictException(
        `Grant ${id} cannot be activated: expected status 'pending', got '${grant.status}'`,
      );
    }
    const [updated] = await this.db
      .update(federationGrants)
      .set({ status: 'active' })
      .where(eq(federationGrants.id, id))
      .returning();
    return updated!;
  }
  /**
   * Transition a grant from `active` → `revoked`.
   * Sets revokedAt and optionally revokedReason.
   * Throws ConflictException if the grant is not in `active` state.
   */
  async revokeGrant(id: string, reason?: string): Promise<Grant> {
    const grant = await this.getGrant(id);
    if (grant.status !== 'active') {
      throw new ConflictException(
        `Grant ${id} cannot be revoked: expected status 'active', got '${grant.status}'`,
      );
    }
    const [updated] = await this.db
      .update(federationGrants)
      .set({
        status: 'revoked',
        revokedAt: new Date(),
        revokedReason: reason ?? null,
      })
      .where(eq(federationGrants.id, id))
      .returning();
    return updated!;
  }
  /**
   * Transition a grant from `active` → `expired`.
   * Intended for use by the M6 scheduler.
   * Throws ConflictException if the grant is not in `active` state.
   */
  async expireGrant(id: string): Promise<Grant> {
    const grant = await this.getGrant(id);
    if (grant.status !== 'active') {
      throw new ConflictException(
        `Grant ${id} cannot be expired: expected status 'active', got '${grant.status}'`,
      );
    }
    const [updated] = await this.db
      .update(federationGrants)
      .set({ status: 'expired' })
      .where(eq(federationGrants.id, id))
      .returning();
    return updated!;
  }
 }
--- a/apps/gateway/src/federation/oid.util.ts
+++ b/apps/gateway/src/federation/oid.util.ts
@@ -0,0 +1,146 @@
 /**
 * Shared OID extraction helpers for Mosaic federation certificates.
 *
 * Custom OID registry (PRD §6, docs/federation/SETUP.md):
 *   1.3.6.1.4.1.99999.1  — mosaic_grant_id
 *   1.3.6.1.4.1.99999.2  — mosaic_subject_user_id
 *
 * The encoding convention: each extension value is an OCTET STRING wrapping
 * an ASN.1 UTF8String TLV:
 *   0x0C (tag) + 1-byte length + UTF-8 bytes
 *
 * CaService encodes values this way via encodeUtf8String(), and this module
 * decodes them with the corresponding `.slice(2)` to skip tag + length byte.
 *
 * This module is intentionally pure — no NestJS, no DB, no network I/O.
 */
 import { X509Certificate } from '@peculiar/x509';
 // ---------------------------------------------------------------------------
 // OID constants
 // ---------------------------------------------------------------------------
 export const OID_MOSAIC_GRANT_ID = '1.3.6.1.4.1.99999.1';
 export const OID_MOSAIC_SUBJECT_USER_ID = '1.3.6.1.4.1.99999.2';
 // ---------------------------------------------------------------------------
 // Extraction result types
 // ---------------------------------------------------------------------------
 export interface MosaicOids {
  grantId: string;
  subjectUserId: string;
 }
 export type OidExtractionResult =
  | { ok: true; value: MosaicOids }
  | {
      ok: false;
      error: 'MISSING_GRANT_ID' | 'MISSING_SUBJECT_USER_ID' | 'PARSE_ERROR';
      detail?: string;
    };
 // ---------------------------------------------------------------------------
 // Helpers
 // ---------------------------------------------------------------------------
 const decoder = new TextDecoder();
 /**
 * Decode an extension value encoded as ASN.1 UTF8String TLV
 * (tag 0x0C + 1-byte length + UTF-8 bytes).
 * Validates tag, length byte, and buffer bounds before decoding.
 * Throws a descriptive Error on malformed input; caller wraps in try/catch.
 */
 function decodeUtf8StringTlv(value: ArrayBuffer): string {
  const bytes = new Uint8Array(value);
  // Need at least tag + length bytes
  if (bytes.length < 2) {
    throw new Error(`UTF8String TLV too short: expected at least 2 bytes, got ${bytes.length}`);
  }
  // Tag byte must be 0x0C (ASN.1 UTF8String)
  if (bytes[0] !== 0x0c) {
    throw new Error(
      `UTF8String TLV tag mismatch: expected 0x0C, got 0x${bytes[0]!.toString(16).toUpperCase()}`,
    );
  }
  // Only single-byte length form is supported (values 0–127); long form not needed
  // for OID strings of this length.
  const declaredLength = bytes[1]!;
  if (declaredLength > 127) {
    throw new Error(
      `UTF8String TLV uses long-form length (0x${declaredLength.toString(16).toUpperCase()}), which is not supported`,
    );
  }
  // Declared length must match actual remaining bytes
  if (declaredLength !== bytes.length - 2) {
    throw new Error(
      `UTF8String TLV length mismatch: declared ${declaredLength}, actual ${bytes.length - 2}`,
    );
  }
  // Skip: tag (1 byte) + length (1 byte)
  return decoder.decode(bytes.slice(2));
 }
 // ---------------------------------------------------------------------------
 // Public API
 // ---------------------------------------------------------------------------
 /**
 * Extract Mosaic custom OIDs (grantId, subjectUserId) from an X.509 certificate
 * already parsed via @peculiar/x509.
 *
 * Returns `{ ok: true, value: MosaicOids }` on success, or
 * `{ ok: false, error: <code>, detail? }` on any failure — never throws.
 */
 export function extractMosaicOids(cert: X509Certificate): OidExtractionResult {
  try {
    const grantIdExt = cert.getExtension(OID_MOSAIC_GRANT_ID);
    if (!grantIdExt) {
      return { ok: false, error: 'MISSING_GRANT_ID' };
    }
    const subjectUserIdExt = cert.getExtension(OID_MOSAIC_SUBJECT_USER_ID);
    if (!subjectUserIdExt) {
      return { ok: false, error: 'MISSING_SUBJECT_USER_ID' };
    }
    const grantId = decodeUtf8StringTlv(grantIdExt.value);
    const subjectUserId = decodeUtf8StringTlv(subjectUserIdExt.value);
    return {
      ok: true,
      value: { grantId, subjectUserId },
    };
  } catch (err) {
    return {
      ok: false,
      error: 'PARSE_ERROR',
      detail: err instanceof Error ? err.message : String(err),
    };
  }
 }
 /**
 * Parse a PEM-encoded certificate and extract Mosaic OIDs.
 * Returns an OidExtractionResult — never throws.
 */
 export function extractMosaicOidsFromPem(certPem: string): OidExtractionResult {
  let cert: X509Certificate;
  try {
    cert = new X509Certificate(certPem);
  } catch (err) {
    return {
      ok: false,
      error: 'PARSE_ERROR',
      detail: err instanceof Error ? err.message : String(err),
    };
  }
  return extractMosaicOids(cert);
 }
--- a/apps/gateway/src/federation/peer-key.util.ts
+++ b/apps/gateway/src/federation/peer-key.util.ts
@@ -0,0 +1,9 @@
 import { seal, unseal } from '@mosaicstack/auth';
 export function sealClientKey(privateKeyPem: string): string {
  return seal(privateKeyPem);
 }
 export function unsealClientKey(sealedKey: string): string {
  return unseal(sealedKey);
 }
--- a/apps/gateway/src/federation/scope-schema.spec.ts
+++ b/apps/gateway/src/federation/scope-schema.spec.ts
@@ -0,0 +1,187 @@
 /**
 * Unit tests for FederationScopeSchema and parseFederationScope.
 *
 * Coverage:
 *  - Valid: minimal scope
 *  - Valid: full PRD §8.1 example
 *  - Valid: resources + excluded_resources (no overlap)
 *  - Invalid: empty resources
 *  - Invalid: unknown resource value
 *  - Invalid: resources / excluded_resources intersection
 *  - Invalid: filter key not in resources
 *  - Invalid: max_rows_per_query = 0
 *  - Invalid: max_rows_per_query = 10001
 *  - Invalid: not an object / null
 *  - Defaults: include_personal defaults to true; excluded_resources defaults to []
 *  - Sentinel: console.warn fires for sensitive resources
 */
 import { describe, it, expect, vi, afterEach } from 'vitest';
 import {
  parseFederationScope,
  FederationScopeError,
  FederationScopeSchema,
 } from './scope-schema.js';
 afterEach(() => {
  vi.restoreAllMocks();
 });
 describe('parseFederationScope — valid inputs', () => {
  it('accepts a minimal scope (resources + max_rows_per_query only)', () => {
    const scope = parseFederationScope({
      resources: ['tasks'],
      max_rows_per_query: 100,
    });
    expect(scope.resources).toEqual(['tasks']);
    expect(scope.max_rows_per_query).toBe(100);
    expect(scope.excluded_resources).toEqual([]);
    expect(scope.filters).toBeUndefined();
  });
  it('accepts the full PRD §8.1 example', () => {
    const scope = parseFederationScope({
      resources: ['tasks', 'notes', 'memory'],
      filters: {
        tasks: { include_teams: ['team_uuid_1', 'team_uuid_2'], include_personal: true },
        notes: { include_personal: true, include_teams: [] },
        memory: { include_personal: true },
      },
      excluded_resources: ['credentials', 'api_keys'],
      max_rows_per_query: 500,
    });
    expect(scope.resources).toEqual(['tasks', 'notes', 'memory']);
    expect(scope.excluded_resources).toEqual(['credentials', 'api_keys']);
    expect(scope.filters?.tasks?.include_teams).toEqual(['team_uuid_1', 'team_uuid_2']);
    expect(scope.max_rows_per_query).toBe(500);
  });
  it('accepts a scope with excluded_resources and no filter overlap', () => {
    const scope = parseFederationScope({
      resources: ['tasks', 'notes'],
      excluded_resources: ['memory'],
      max_rows_per_query: 250,
    });
    expect(scope.resources).toEqual(['tasks', 'notes']);
    expect(scope.excluded_resources).toEqual(['memory']);
  });
 });
 describe('parseFederationScope — defaults', () => {
  it('defaults excluded_resources to []', () => {
    const scope = parseFederationScope({ resources: ['tasks'], max_rows_per_query: 1 });
    expect(scope.excluded_resources).toEqual([]);
  });
  it('defaults include_personal to true when filter is provided without it', () => {
    const scope = parseFederationScope({
      resources: ['tasks'],
      filters: { tasks: { include_teams: ['t1'] } },
      max_rows_per_query: 10,
    });
    expect(scope.filters?.tasks?.include_personal).toBe(true);
  });
 });
 describe('parseFederationScope — invalid inputs', () => {
  it('throws FederationScopeError for empty resources array', () => {
    expect(() => parseFederationScope({ resources: [], max_rows_per_query: 100 })).toThrow(
      FederationScopeError,
    );
  });
  it('throws for unknown resource value in resources', () => {
    expect(() =>
      parseFederationScope({ resources: ['unknown_resource'], max_rows_per_query: 100 }),
    ).toThrow(FederationScopeError);
  });
  it('throws when resources and excluded_resources intersect', () => {
    expect(() =>
      parseFederationScope({
        resources: ['tasks', 'memory'],
        excluded_resources: ['memory'],
        max_rows_per_query: 100,
      }),
    ).toThrow(FederationScopeError);
  });
  it('throws when filters references a resource not in resources', () => {
    expect(() =>
      parseFederationScope({
        resources: ['tasks'],
        filters: { notes: { include_personal: true } },
        max_rows_per_query: 100,
      }),
    ).toThrow(FederationScopeError);
  });
  it('throws for max_rows_per_query = 0', () => {
    expect(() => parseFederationScope({ resources: ['tasks'], max_rows_per_query: 0 })).toThrow(
      FederationScopeError,
    );
  });
  it('throws for max_rows_per_query = 10001', () => {
    expect(() => parseFederationScope({ resources: ['tasks'], max_rows_per_query: 10001 })).toThrow(
      FederationScopeError,
    );
  });
  it('throws for null input', () => {
    expect(() => parseFederationScope(null)).toThrow(FederationScopeError);
  });
  it('throws for non-object input (string)', () => {
    expect(() => parseFederationScope('not-an-object')).toThrow(FederationScopeError);
  });
 });
 describe('parseFederationScope — sentinel warning', () => {
  it('emits console.warn when resources includes "credentials"', () => {
    const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {});
    parseFederationScope({
      resources: ['tasks', 'credentials'],
      max_rows_per_query: 100,
    });
    expect(warnSpy).toHaveBeenCalledWith(
      expect.stringContaining(
        '[FederationScope] WARNING: scope grants sensitive resource "credentials"',
      ),
    );
  });
  it('emits console.warn when resources includes "api_keys"', () => {
    const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {});
    parseFederationScope({
      resources: ['tasks', 'api_keys'],
      max_rows_per_query: 100,
    });
    expect(warnSpy).toHaveBeenCalledWith(
      expect.stringContaining(
        '[FederationScope] WARNING: scope grants sensitive resource "api_keys"',
      ),
    );
  });
  it('does NOT emit console.warn for non-sensitive resources', () => {
    const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {});
    parseFederationScope({ resources: ['tasks', 'notes', 'memory'], max_rows_per_query: 100 });
    expect(warnSpy).not.toHaveBeenCalled();
  });
 });
 describe('FederationScopeSchema — boundary values', () => {
  it('accepts max_rows_per_query = 1 (lower bound)', () => {
    const result = FederationScopeSchema.safeParse({ resources: ['tasks'], max_rows_per_query: 1 });
    expect(result.success).toBe(true);
  });
  it('accepts max_rows_per_query = 10000 (upper bound)', () => {
    const result = FederationScopeSchema.safeParse({
      resources: ['tasks'],
      max_rows_per_query: 10000,
    });
    expect(result.success).toBe(true);
  });
 });
--- a/apps/gateway/src/federation/scope-schema.ts
+++ b/apps/gateway/src/federation/scope-schema.ts
@@ -0,0 +1,147 @@
 /**
 * Federation grant scope schema and validator.
 *
 * Source of truth: docs/federation/PRD.md §8.1
 *
 * This module is intentionally pure — no DB, no NestJS, no CA wiring.
 * It is reusable from grant CRUD (M2-06) and scope enforcement (M3+).
 */
 import { z } from 'zod';
 // ---------------------------------------------------------------------------
 // Allowlist of federation resources (canonical — M3+ will extend this list)
 // ---------------------------------------------------------------------------
 export const FEDERATION_RESOURCE_VALUES = [
  'tasks',
  'notes',
  'memory',
  'credentials',
  'api_keys',
 ] as const;
 export type FederationResource = (typeof FEDERATION_RESOURCE_VALUES)[number];
 /**
 * Sensitive resources require explicit admin approval (PRD §8.4).
 * The parser warns when these appear in `resources`; M2-06 grant CRUD
 * will add a hard gate on top of this warning.
 */
 const SENSITIVE_RESOURCES: ReadonlySet<FederationResource> = new Set(['credentials', 'api_keys']);
 // ---------------------------------------------------------------------------
 // Sub-schemas
 // ---------------------------------------------------------------------------
 const ResourceArraySchema = z
  .array(z.enum(FEDERATION_RESOURCE_VALUES))
  .nonempty({ message: 'resources must contain at least one value' })
  .refine((arr) => new Set(arr).size === arr.length, {
    message: 'resources must not contain duplicate values',
  });
 const ResourceFilterSchema = z.object({
  include_teams: z.array(z.string()).optional(),
  include_personal: z.boolean().default(true),
 });
 // ---------------------------------------------------------------------------
 // Top-level schema
 // ---------------------------------------------------------------------------
 export const FederationScopeSchema = z
  .object({
    resources: ResourceArraySchema,
    excluded_resources: z
      .array(z.enum(FEDERATION_RESOURCE_VALUES))
      .default([])
      .refine((arr) => new Set(arr).size === arr.length, {
        message: 'excluded_resources must not contain duplicate values',
      }),
    filters: z.record(z.string(), ResourceFilterSchema).optional(),
    max_rows_per_query: z
      .number()
      .int({ message: 'max_rows_per_query must be an integer' })
      .min(1, { message: 'max_rows_per_query must be at least 1' })
      .max(10000, { message: 'max_rows_per_query must be at most 10000' }),
  })
  .superRefine((data, ctx) => {
    const resourceSet = new Set(data.resources);
    // Intersection guard: a resource cannot be both granted and excluded
    for (const r of data.excluded_resources) {
      if (resourceSet.has(r)) {
        ctx.addIssue({
          code: z.ZodIssueCode.custom,
          message: `Resource "${r}" appears in both resources and excluded_resources`,
          path: ['excluded_resources'],
        });
      }
    }
    // Filter keys must be a subset of resources
    if (data.filters) {
      for (const key of Object.keys(data.filters)) {
        if (!resourceSet.has(key as FederationResource)) {
          ctx.addIssue({
            code: z.ZodIssueCode.custom,
            message: `filters key "${key}" references a resource not present in resources`,
            path: ['filters', key],
          });
        }
      }
    }
  });
 export type FederationScope = z.infer<typeof FederationScopeSchema>;
 // ---------------------------------------------------------------------------
 // Error class
 // ---------------------------------------------------------------------------
 export class FederationScopeError extends Error {
  constructor(message: string) {
    super(message);
    this.name = 'FederationScopeError';
  }
 }
 // ---------------------------------------------------------------------------
 // Typed parser
 // ---------------------------------------------------------------------------
 /**
 * Parse and validate an unknown value as a FederationScope.
 *
 * Throws `FederationScopeError` with aggregated Zod issues on failure.
 *
 * Emits `console.warn` when sensitive resources (`credentials`, `api_keys`)
 * are present in `resources` — per PRD §8.4, these require explicit admin
 * approval. M2-06 grant CRUD will add a hard gate on top of this warning.
 */
 export function parseFederationScope(input: unknown): FederationScope {
  const result = FederationScopeSchema.safeParse(input);
  if (!result.success) {
    const issues = result.error.issues
      .map((e) => `  - [${e.path.join('.') || 'root'}] ${e.message}`)
      .join('\n');
    throw new FederationScopeError(`Invalid federation scope:\n${issues}`);
  }
  const scope = result.data;
  // Sentinel warning for sensitive resources (PRD §8.4)
  for (const resource of scope.resources) {
    if (SENSITIVE_RESOURCES.has(resource)) {
      console.warn(
        `[FederationScope] WARNING: scope grants sensitive resource "${resource}". Per PRD §8.4 this requires explicit admin approval and is logged.`,
      );
    }
  }
  return scope;
 }
--- a/apps/gateway/src/federation/server/tests/federation-auth.guard.spec.ts
+++ b/apps/gateway/src/federation/server/tests/federation-auth.guard.spec.ts
@@ -0,0 +1,521 @@
 /**
 * Unit tests for FederationAuthGuard (FED-M3-03).
 *
 * Coverage:
 *  - Missing cert (no TLS socket / no getPeerCertificate) → 401
 *  - Cert parse failure (corrupt DER raw bytes) → 401
 *  - Missing grantId OID → 401
 *  - Missing subjectUserId OID → 401
 *  - Grant not found (GrantsService throws NotFoundException) → 403
 *  - Grant in `pending` status → 403
 *  - Grant in `revoked` status → 403
 *  - Grant in `expired` status → 403
 *  - Cert serial mismatch → 403
 *  - Happy path: active grant + matching cert serial → context attached, returns true
 */
 import 'reflect-metadata';
 import { describe, it, expect, vi, beforeEach } from 'vitest';
 import type { ExecutionContext } from '@nestjs/common';
 import { NotFoundException } from '@nestjs/common';
 import { FederationAuthGuard } from '../federation-auth.guard.js';
 import { makeMosaicIssuedCert } from '../../__tests__/helpers/test-cert.js';
 import type { GrantsService, GrantWithPeer } from '../../grants.service.js';
 // ---------------------------------------------------------------------------
 // Test constants
 // ---------------------------------------------------------------------------
 const GRANT_ID = 'a1111111-1111-1111-1111-111111111111';
 const USER_ID = 'b2222222-2222-2222-2222-222222222222';
 const PEER_ID = 'c3333333-3333-3333-3333-333333333333';
 // Node.js TLS serialNumber is uppercase hex (no colons)
 const CERT_SERIAL_HEX = '01';
 const VALID_SCOPE = { resources: ['tasks'], max_rows_per_query: 100 };
 // ---------------------------------------------------------------------------
 // Mock builders
 // ---------------------------------------------------------------------------
 /**
 * Build a minimal GrantWithPeer-shaped mock.
 */
 function makeGrantWithPeer(overrides: Partial<GrantWithPeer> = {}): GrantWithPeer {
  return {
    id: GRANT_ID,
    peerId: PEER_ID,
    subjectUserId: USER_ID,
    scope: VALID_SCOPE,
    status: 'active',
    expiresAt: null,
    createdAt: new Date('2026-01-01T00:00:00Z'),
    revokedAt: null,
    revokedReason: null,
    peer: {
      id: PEER_ID,
      commonName: 'test-peer',
      displayName: 'Test Peer',
      certPem: '',
      certSerial: CERT_SERIAL_HEX,
      certNotAfter: new Date(Date.now() + 86_400_000),
      clientKeyPem: null,
      state: 'active',
      endpointUrl: null,
      lastSeenAt: null,
      createdAt: new Date('2026-01-01T00:00:00Z'),
      revokedAt: null,
    },
    ...overrides,
  };
 }
 /**
 * Build a mock ExecutionContext with a pre-built TLS peer certificate.
 *
 * `certPem` — PEM string to present as the raw DER cert (converted to Buffer).
 *             Pass null to simulate "no cert presented".
 * `certSerialHex` — serialNumber string returned by the TLS socket.
 *                   Node.js returns uppercase hex.
 * `hasTlsSocket` — if false, raw.socket has no getPeerCertificate (plain HTTP).
 */
 function makeContext(opts: {
  certPem: string | null;
  certSerialHex?: string;
  hasTlsSocket?: boolean;
 }): {
  ctx: ExecutionContext;
  statusMock: ReturnType<typeof vi.fn>;
  sendMock: ReturnType<typeof vi.fn>;
 } {
  const { certPem, certSerialHex = CERT_SERIAL_HEX, hasTlsSocket = true } = opts;
  // Build peerCert object that Node.js TLS socket.getPeerCertificate() returns
  let peerCert: Record<string, unknown>;
  if (certPem === null) {
    // Simulate no cert: Node.js returns object with empty string fields
    peerCert = { raw: null, serialNumber: '' };
  } else {
    // Convert PEM to DER Buffer (strip headers + base64 decode)
    const b64 = certPem
      .replace(/-----BEGIN CERTIFICATE-----/, '')
      .replace(/-----END CERTIFICATE-----/, '')
      .replace(/\s+/g, '');
    const raw = Buffer.from(b64, 'base64');
    peerCert = { raw, serialNumber: certSerialHex };
  }
  const getPeerCertificate = vi.fn().mockReturnValue(peerCert);
  const socket = hasTlsSocket ? { getPeerCertificate } : {}; // No getPeerCertificate → non-TLS
  // Fastify reply mocks
  const sendMock = vi.fn().mockReturnValue(undefined);
  const headerMock = vi.fn().mockReturnValue({ send: sendMock });
  const statusMock = vi.fn().mockReturnValue({ header: headerMock });
  const request = {
    raw: {
      socket,
    },
  };
  const reply = {
    status: statusMock,
  };
  const ctx = {
    switchToHttp: () => ({
      getRequest: () => request,
      getResponse: () => reply,
    }),
  } as unknown as ExecutionContext;
  return { ctx, statusMock, sendMock };
 }
 /**
 * Build a mock GrantsService.
 */
 function makeGrantsService(
  overrides: Partial<Pick<GrantsService, 'getGrantWithPeer'>> = {},
 ): GrantsService {
  return {
    getGrantWithPeer: vi.fn().mockResolvedValue(makeGrantWithPeer()),
    ...overrides,
  } as unknown as GrantsService;
 }
 // ---------------------------------------------------------------------------
 // Test suite
 // ---------------------------------------------------------------------------
 describe('FederationAuthGuard', () => {
  let certPem: string;
  beforeEach(async () => {
    // Generate a real Mosaic-issued cert with the standard OIDs
    certPem = await makeMosaicIssuedCert({ grantId: GRANT_ID, subjectUserId: USER_ID });
  });
  // ── 401: No TLS socket ────────────────────────────────────────────────────
  it('returns 401 when there is no TLS socket (plain HTTP connection)', async () => {
    const { ctx, statusMock, sendMock } = makeContext({
      certPem: certPem,
      hasTlsSocket: false,
    });
    const guard = new FederationAuthGuard(makeGrantsService());
    const result = await guard.canActivate(ctx);
    expect(result).toBe(false);
    expect(statusMock).toHaveBeenCalledWith(401);
    expect(sendMock).toHaveBeenCalledWith(
      expect.objectContaining({
        error: expect.objectContaining({ code: 'unauthorized', message: expect.any(String) }),
      }),
    );
  });
  // ── 401: Cert not presented ───────────────────────────────────────────────
  it('returns 401 when the peer did not present a certificate', async () => {
    const { ctx, statusMock, sendMock } = makeContext({ certPem: null });
    const guard = new FederationAuthGuard(makeGrantsService());
    const result = await guard.canActivate(ctx);
    expect(result).toBe(false);
    expect(statusMock).toHaveBeenCalledWith(401);
    expect(sendMock).toHaveBeenCalledWith(
      expect.objectContaining({
        error: expect.objectContaining({ code: 'unauthorized', message: expect.any(String) }),
      }),
    );
  });
  // ── 401: Cert parse failure ───────────────────────────────────────────────
  it('returns 401 when the certificate DER bytes are corrupt', async () => {
    // Build context with a cert that has garbage DER bytes
    const corruptPem = '-----BEGIN CERTIFICATE-----\naW52YWxpZA==\n-----END CERTIFICATE-----';
    const { ctx, statusMock, sendMock } = makeContext({ certPem: corruptPem });
    const guard = new FederationAuthGuard(makeGrantsService());
    const result = await guard.canActivate(ctx);
    expect(result).toBe(false);
    expect(statusMock).toHaveBeenCalledWith(401);
    expect(sendMock).toHaveBeenCalledWith(
      expect.objectContaining({
        error: expect.objectContaining({ code: 'unauthorized', message: expect.any(String) }),
      }),
    );
  });
  // ── 401: Missing grantId OID ─────────────────────────────────────────────
  it('returns 401 when the cert is missing the grantId OID', async () => {
    // makeSelfSignedCert produces a cert without any Mosaic OIDs
    const { makeSelfSignedCert } = await import('../../__tests__/helpers/test-cert.js');
    const plainCert = await makeSelfSignedCert();
    const { ctx, statusMock, sendMock } = makeContext({ certPem: plainCert });
    const guard = new FederationAuthGuard(makeGrantsService());
    const result = await guard.canActivate(ctx);
    expect(result).toBe(false);
    expect(statusMock).toHaveBeenCalledWith(401);
    expect(sendMock).toHaveBeenCalledWith(
      expect.objectContaining({
        error: expect.objectContaining({ code: 'unauthorized', message: expect.any(String) }),
      }),
    );
  });
  // ── 401: Missing subjectUserId OID ───────────────────────────────────────
  it('returns 401 when the cert has grantId OID but is missing subjectUserId OID', async () => {
    // Build a cert with only the grantId OID by importing cert generator internals
    const { webcrypto } = await import('node:crypto');
    const {
      X509CertificateGenerator,
      Extension,
      KeyUsagesExtension,
      KeyUsageFlags,
      BasicConstraintsExtension,
      cryptoProvider,
    } = await import('@peculiar/x509');
    cryptoProvider.set(webcrypto as unknown as Parameters<typeof cryptoProvider.set>[0]);
    const alg = { name: 'ECDSA', namedCurve: 'P-256', hash: 'SHA-256' } as const;
    const keys = await webcrypto.subtle.generateKey(alg, false, ['sign', 'verify']);
    const now = new Date();
    const tomorrow = new Date(now.getTime() + 86_400_000);
    // Encode grantId only — missing subjectUserId extension
    const utf8 = new TextEncoder().encode(GRANT_ID);
    const encoded = new Uint8Array(2 + utf8.length);
    encoded[0] = 0x0c;
    encoded[1] = utf8.length;
    encoded.set(utf8, 2);
    const cert = await X509CertificateGenerator.createSelfSigned({
      serialNumber: '01',
      name: 'CN=partial-oid-test',
      notBefore: now,
      notAfter: tomorrow,
      signingAlgorithm: alg,
      keys,
      extensions: [
        new BasicConstraintsExtension(false),
        new KeyUsagesExtension(KeyUsageFlags.digitalSignature),
        new Extension('1.3.6.1.4.1.99999.1', false, encoded), // grantId only
      ],
    });
    const { ctx, statusMock, sendMock } = makeContext({ certPem: cert.toString('pem') });
    const guard = new FederationAuthGuard(makeGrantsService());
    const result = await guard.canActivate(ctx);
    expect(result).toBe(false);
    expect(statusMock).toHaveBeenCalledWith(401);
    expect(sendMock).toHaveBeenCalledWith(
      expect.objectContaining({
        error: expect.objectContaining({ code: 'unauthorized', message: expect.any(String) }),
      }),
    );
  });
  // ── 403: Grant not found ─────────────────────────────────────────────────
  it('returns 403 when the grantId from the cert does not exist in DB', async () => {
    const grantsService = makeGrantsService({
      getGrantWithPeer: vi
        .fn()
        .mockRejectedValue(new NotFoundException(`Grant ${GRANT_ID} not found`)),
    });
    const { ctx, statusMock, sendMock } = makeContext({ certPem });
    const guard = new FederationAuthGuard(grantsService);
    const result = await guard.canActivate(ctx);
    expect(result).toBe(false);
    expect(statusMock).toHaveBeenCalledWith(403);
    expect(sendMock).toHaveBeenCalledWith(
      expect.objectContaining({
        error: expect.objectContaining({ code: 'forbidden', message: 'Federation access denied' }),
      }),
    );
  });
  // ── 403: Grant in `pending` status ───────────────────────────────────────
  it('returns 403 when the grant is in pending status', async () => {
    const grantsService = makeGrantsService({
      getGrantWithPeer: vi.fn().mockResolvedValue(makeGrantWithPeer({ status: 'pending' })),
    });
    const { ctx, statusMock, sendMock } = makeContext({ certPem });
    const guard = new FederationAuthGuard(grantsService);
    const result = await guard.canActivate(ctx);
    expect(result).toBe(false);
    expect(statusMock).toHaveBeenCalledWith(403);
    expect(sendMock).toHaveBeenCalledWith(
      expect.objectContaining({
        error: expect.objectContaining({ code: 'forbidden', message: 'Federation access denied' }),
      }),
    );
  });
  // ── 403: Grant in `revoked` status ───────────────────────────────────────
  it('returns 403 when the grant is in revoked status', async () => {
    const grantsService = makeGrantsService({
      getGrantWithPeer: vi
        .fn()
        .mockResolvedValue(makeGrantWithPeer({ status: 'revoked', revokedAt: new Date() })),
    });
    const { ctx, statusMock, sendMock } = makeContext({ certPem });
    const guard = new FederationAuthGuard(grantsService);
    const result = await guard.canActivate(ctx);
    expect(result).toBe(false);
    expect(statusMock).toHaveBeenCalledWith(403);
    expect(sendMock).toHaveBeenCalledWith(
      expect.objectContaining({
        error: expect.objectContaining({ code: 'forbidden', message: 'Federation access denied' }),
      }),
    );
  });
  // ── 403: Grant in `expired` status ───────────────────────────────────────
  it('returns 403 when the grant is in expired status', async () => {
    const grantsService = makeGrantsService({
      getGrantWithPeer: vi.fn().mockResolvedValue(makeGrantWithPeer({ status: 'expired' })),
    });
    const { ctx, statusMock, sendMock } = makeContext({ certPem });
    const guard = new FederationAuthGuard(grantsService);
    const result = await guard.canActivate(ctx);
    expect(result).toBe(false);
    expect(statusMock).toHaveBeenCalledWith(403);
    expect(sendMock).toHaveBeenCalledWith(
      expect.objectContaining({
        error: expect.objectContaining({ code: 'forbidden', message: 'Federation access denied' }),
      }),
    );
  });
  // ── 403: Cert serial mismatch ─────────────────────────────────────────────
  it('returns 403 when the cert serial does not match the registered peer cert serial', async () => {
    // Return a grant whose peer has a different stored serial
    const grantsService = makeGrantsService({
      getGrantWithPeer: vi.fn().mockResolvedValue(
        makeGrantWithPeer({
          peer: {
            id: PEER_ID,
            commonName: 'test-peer',
            displayName: 'Test Peer',
            certPem: '',
            certSerial: 'DEADBEEF', // different from CERT_SERIAL_HEX='01'
            certNotAfter: new Date(Date.now() + 86_400_000),
            clientKeyPem: null,
            state: 'active',
            endpointUrl: null,
            lastSeenAt: null,
            createdAt: new Date('2026-01-01T00:00:00Z'),
            revokedAt: null,
          },
        }),
      ),
    });
    // Context presents cert with serial '01' but DB has 'DEADBEEF'
    const { ctx, statusMock, sendMock } = makeContext({ certPem, certSerialHex: '01' });
    const guard = new FederationAuthGuard(grantsService);
    const result = await guard.canActivate(ctx);
    expect(result).toBe(false);
    expect(statusMock).toHaveBeenCalledWith(403);
    expect(sendMock).toHaveBeenCalledWith(
      expect.objectContaining({
        error: expect.objectContaining({ code: 'forbidden', message: 'Federation access denied' }),
      }),
    );
  });
  // ── 403: subjectUserId cert/DB mismatch (CRIT-1 regression test) ─────────
  it('returns 403 when the cert subjectUserId does not match the DB grant subjectUserId', async () => {
    // Build a cert that claims an attacker's subjectUserId
    const attackerSubjectUserId = 'attacker-user-id';
    const attackerCertPem = await makeMosaicIssuedCert({
      grantId: GRANT_ID,
      subjectUserId: attackerSubjectUserId,
    });
    // DB returns a grant with the legitimate USER_ID
    const grantsService = makeGrantsService({
      getGrantWithPeer: vi.fn().mockResolvedValue(makeGrantWithPeer({ subjectUserId: USER_ID })),
    });
    // Cert presents attacker-user-id but DB has USER_ID — should be rejected
    const { ctx, statusMock, sendMock } = makeContext({
      certPem: attackerCertPem,
      certSerialHex: CERT_SERIAL_HEX,
    });
    const guard = new FederationAuthGuard(grantsService);
    const result = await guard.canActivate(ctx);
    expect(result).toBe(false);
    expect(statusMock).toHaveBeenCalledWith(403);
    expect(sendMock).toHaveBeenCalledWith(
      expect.objectContaining({
        error: expect.objectContaining({ code: 'forbidden', message: 'Federation access denied' }),
      }),
    );
  });
  // ── Happy path ────────────────────────────────────────────────────────────
  it('returns true and attaches federationContext on happy path', async () => {
    const grant = makeGrantWithPeer({
      status: 'active',
      peer: {
        id: PEER_ID,
        commonName: 'test-peer',
        displayName: 'Test Peer',
        certPem: '',
        certSerial: CERT_SERIAL_HEX,
        certNotAfter: new Date(Date.now() + 86_400_000),
        clientKeyPem: null,
        state: 'active',
        endpointUrl: null,
        lastSeenAt: null,
        createdAt: new Date('2026-01-01T00:00:00Z'),
        revokedAt: null,
      },
    });
    const grantsService = makeGrantsService({
      getGrantWithPeer: vi.fn().mockResolvedValue(grant),
    });
    // Build context manually to capture what gets set on request.federationContext
    const b64 = certPem
      .replace(/-----BEGIN CERTIFICATE-----/, '')
      .replace(/-----END CERTIFICATE-----/, '')
      .replace(/\s+/g, '');
    const raw = Buffer.from(b64, 'base64');
    const peerCert = { raw, serialNumber: CERT_SERIAL_HEX };
    const sendMock = vi.fn().mockReturnValue(undefined);
    const headerMock = vi.fn().mockReturnValue({ send: sendMock });
    const statusMock = vi.fn().mockReturnValue({ header: headerMock });
    const request: Record<string, unknown> = {
      raw: {
        socket: { getPeerCertificate: vi.fn().mockReturnValue(peerCert) },
      },
    };
    const reply = { status: statusMock };
    const ctx = {
      switchToHttp: () => ({
        getRequest: () => request,
        getResponse: () => reply,
      }),
    } as unknown as ExecutionContext;
    const guard = new FederationAuthGuard(grantsService);
    const result = await guard.canActivate(ctx);
    expect(result).toBe(true);
    expect(statusMock).not.toHaveBeenCalled();
    // Verify the context was attached correctly
    expect(request['federationContext']).toEqual({
      grantId: GRANT_ID,
      subjectUserId: USER_ID,
      peerId: PEER_ID,
      scope: VALID_SCOPE,
    });
  });
 });
--- a/apps/gateway/src/federation/server/federation-auth.guard.ts
+++ b/apps/gateway/src/federation/server/federation-auth.guard.ts
@@ -0,0 +1,212 @@
 /**
 * FederationAuthGuard — NestJS CanActivate guard for inbound federation requests.
 *
 * Validates the mTLS client certificate presented by a peer gateway, extracts
 * custom OIDs to identify the grant + subject user, loads the grant from DB,
 * asserts it is active, and verifies the cert serial against the registered peer
 * cert serial as a defense-in-depth measure.
 *
 * On success, attaches `request.federationContext` for downstream verb controllers.
 * On failure, responds with the federation wire-format error envelope (not raw
 * NestJS exception JSON) to match the federation protocol contract.
 *
 * ## Cert-serial check decision
 * The guard validates that the inbound client cert's serial number matches the
 * `certSerial` stored on the associated `federation_peers` row. This is a
 * defense-in-depth measure: even if the mTLS handshake is compromised at the
 * transport layer (e.g. misconfigured TLS terminator that forwards arbitrary
 * client certs), an attacker cannot replay a cert with a different serial than
 * what was registered during enrollment. This check is NOT loosened because:
 *  1. It is O(1) — no additional DB round-trip (peerId is on the grant row,
 *     so we join to federationPeers in the same query).
 *  2. Cert renewal MUST update the stored serial — enforced by M6 scheduler.
 *  3. The OID-only path (without serial check) would allow any cert from the
 *     same CA bearing the same grantId OID to succeed after cert compromise.
 *
 * ## FastifyRequest typing path
 * NestJS + Fastify wraps the raw Node.js IncomingMessage in a FastifyRequest.
 * The underlying TLS socket is accessed via `request.raw.socket`, which is a
 * `tls.TLSSocket` when the server is listening on HTTPS. In development/test
 * the gateway may run over plain HTTP, in which case `getPeerCertificate` is
 * not available. The guard safely handles both cases by checking for the
 * method's existence before calling it.
 *
 * Note: The guard reads the peer certificate from the *already-completed*
 * TLS handshake via `socket.getPeerCertificate(detailed=true)`. This relies
 * on the server being configured with `requestCert: true` at the TLS level
 * so Fastify/Node.js requests the client cert during the handshake.
 * The guard does NOT verify the cert chain itself — that is handled by the
 * TLS layer (Node.js `rejectUnauthorized: true` with the CA cert pinned).
 */
 import {
  type CanActivate,
  type ExecutionContext,
  Inject,
  Injectable,
  Logger,
 } from '@nestjs/common';
 import type { FastifyReply, FastifyRequest } from 'fastify';
 import * as tls from 'node:tls';
 import { X509Certificate } from '@peculiar/x509';
 import { FederationForbiddenError, FederationUnauthorizedError } from '@mosaicstack/types';
 import { extractMosaicOids } from '../oid.util.js';
 import { GrantsService } from '../grants.service.js';
 import type { FederationContext } from './federation-context.js';
 import './federation-context.js'; // side-effect import: applies FastifyRequest module augmentation
 // ---------------------------------------------------------------------------
 // Internal helpers
 // ---------------------------------------------------------------------------
 /**
 * Send a federation wire-format error response directly on the Fastify reply.
 * Returns false — callers return this value from canActivate.
 */
 function sendFederationError(
  reply: FastifyReply,
  error: FederationUnauthorizedError | FederationForbiddenError,
 ): boolean {
  const statusCode = error.code === 'unauthorized' ? 401 : 403;
  void reply.status(statusCode).header('content-type', 'application/json').send(error.toEnvelope());
  return false;
 }
 // ---------------------------------------------------------------------------
 // Guard
 // ---------------------------------------------------------------------------
@Injectable()
 export class FederationAuthGuard implements CanActivate {
  private readonly logger = new Logger(FederationAuthGuard.name);
  constructor(@Inject(GrantsService) private readonly grantsService: GrantsService) {}
  async canActivate(context: ExecutionContext): Promise<boolean> {
    const http = context.switchToHttp();
    const request = http.getRequest<FastifyRequest>();
    const reply = http.getResponse<FastifyReply>();
    // ── Step 1: Extract peer certificate from TLS socket ────────────────────
    const rawSocket = request.raw.socket;
    // Check TLS socket: getPeerCertificate is only available on TLS connections.
    if (
      !rawSocket ||
      typeof (rawSocket as Partial<tls.TLSSocket>).getPeerCertificate !== 'function'
    ) {
      this.logger.warn('No TLS socket — client cert unavailable (non-mTLS connection)');
      return sendFederationError(
        reply,
        new FederationUnauthorizedError('Client certificate required'),
      );
    }
    const tlsSocket = rawSocket as tls.TLSSocket;
    const peerCert = tlsSocket.getPeerCertificate(true);
    // Node.js returns an object with empty string fields when no cert was presented.
    if (!peerCert || !peerCert.raw) {
      this.logger.warn('Peer certificate not presented (mTLS handshake did not supply cert)');
      return sendFederationError(
        reply,
        new FederationUnauthorizedError('Client certificate required'),
      );
    }
    // ── Step 2: Parse the DER-encoded certificate via @peculiar/x509 ────────
    let cert: X509Certificate;
    try {
      // peerCert.raw is a Buffer containing the DER-encoded cert
      cert = new X509Certificate(peerCert.raw);
    } catch (err) {
      this.logger.warn(
        `Failed to parse peer certificate: ${err instanceof Error ? err.message : String(err)}`,
      );
      return sendFederationError(
        reply,
        new FederationUnauthorizedError('Client certificate could not be parsed'),
      );
    }
    // ── Step 3: Extract Mosaic custom OIDs ──────────────────────────────────
    const oidResult = extractMosaicOids(cert);
    if (!oidResult.ok) {
      const message =
        oidResult.error === 'MISSING_GRANT_ID'
          ? 'Client certificate is missing required OID: mosaic_grant_id (1.3.6.1.4.1.99999.1)'
          : oidResult.error === 'MISSING_SUBJECT_USER_ID'
            ? 'Client certificate is missing required OID: mosaic_subject_user_id (1.3.6.1.4.1.99999.2)'
            : `Client certificate OID extraction failed: ${oidResult.detail ?? 'unknown error'}`;
      this.logger.warn(`OID extraction failure [${oidResult.error}]: ${message}`);
      return sendFederationError(reply, new FederationUnauthorizedError(message));
    }
    const { grantId, subjectUserId } = oidResult.value;
    // ── Step 4: Load grant from DB ───────────────────────────────────────────
    let grant: Awaited<ReturnType<GrantsService['getGrantWithPeer']>>;
    try {
      grant = await this.grantsService.getGrantWithPeer(grantId);
    } catch {
      // getGrantWithPeer throws NotFoundException when not found
      this.logger.warn(`Grant not found: ${grantId}`);
      return sendFederationError(reply, new FederationForbiddenError('Federation access denied'));
    }
    // ── Step 5: Assert grant is active ──────────────────────────────────────
    if (grant.status !== 'active') {
      this.logger.warn(`Grant ${grantId} is not active — status=${grant.status}`);
      return sendFederationError(reply, new FederationForbiddenError('Federation access denied'));
    }
    // ── Step 5b: Validate cert-extracted subjectUserId against DB (CRIT-1) ──
    // The cert claim is untrusted input; the DB row is authoritative.
    if (subjectUserId !== grant.subjectUserId) {
      this.logger.warn(`subjectUserId mismatch for grant ${grantId}`);
      return sendFederationError(reply, new FederationForbiddenError('Federation access denied'));
    }
    // ── Step 6: Defense-in-depth — cert serial must match registered peer ───
    // The serial number from Node.js TLS is upper-case hex without colons.
    // The @peculiar/x509 serialNumber is decimal. We compare using the native
    // Node.js crypto cert serial which is uppercase hex, matching DB storage.
    // Both are derived from the peerCert.serialNumber Node.js provides.
    const inboundSerial: string = peerCert.serialNumber ?? '';
    if (!grant.peer.certSerial) {
      // Peer row exists but has no stored serial — something is wrong with enrollment
      this.logger.error(`Peer ${grant.peerId} has no stored certSerial — enrollment incomplete`);
      return sendFederationError(reply, new FederationForbiddenError('Federation access denied'));
    }
    // Normalize both to uppercase for comparison (Node.js serialNumber is
    // already uppercase hex; DB value was stored from extractSerial() which
    // returns crypto.X509Certificate.serialNumber — also uppercase hex).
    if (inboundSerial.toUpperCase() !== grant.peer.certSerial.toUpperCase()) {
      this.logger.warn(
        `Cert serial mismatch for grant ${grantId}: ` +
          `inbound=${inboundSerial} registered=${grant.peer.certSerial}`,
      );
      return sendFederationError(reply, new FederationForbiddenError('Federation access denied'));
    }
    // ── Step 7: Attach FederationContext to request ──────────────────────────
    // Use grant.subjectUserId from DB (authoritative) — not the cert-extracted value.
    const federationContext: FederationContext = {
      grantId,
      subjectUserId: grant.subjectUserId,
      peerId: grant.peerId,
      scope: grant.scope as Record<string, unknown>,
    };
    request.federationContext = federationContext;
    this.logger.debug(
      `Federation auth OK — grantId=${grantId} peerId=${grant.peerId} subjectUserId=${grant.subjectUserId}`,
    );
    return true;
  }
 }
--- a/apps/gateway/src/federation/server/federation-context.ts
+++ b/apps/gateway/src/federation/server/federation-context.ts
@@ -0,0 +1,39 @@
 /**
 * FederationContext — attached to inbound federation requests after successful
 * mTLS + grant validation by FederationAuthGuard.
 *
 * Downstream verb controllers access this via `request.federationContext`.
 */
 /**
 * Augment FastifyRequest so TypeScript knows about the federation context
 * property that FederationAuthGuard attaches on success.
 */
 declare module 'fastify' {
  interface FastifyRequest {
    federationContext?: FederationContext;
  }
 }
 /**
 * Typed context object attached to the request by FederationAuthGuard.
 * Carries all data extracted from the mTLS cert + grant DB row needed
 * by downstream federation verb handlers.
 */
 export interface FederationContext {
  /** The federation grant ID extracted from OID 1.3.6.1.4.1.99999.1 */
  grantId: string;
  /** The local subject user whose data is accessible under this grant */
  subjectUserId: string;
  /** The peer gateway ID (from the grant's peerId FK) */
  peerId: string;
  /**
   * Grant scope — determines which resources the peer may query.
   * Typed as Record<string, unknown> because the full scope schema lives in
   * scope-schema.ts; downstream handlers should narrow via parseFederationScope.
   */
  scope: Record<string, unknown>;
 }
--- a/apps/gateway/src/federation/server/index.ts
+++ b/apps/gateway/src/federation/server/index.ts
@@ -0,0 +1,13 @@
 /**
 * Federation server-side barrel — inbound request handling.
 *
 * Exports the mTLS auth guard and the FederationContext interface
 * for use by verb controllers (M3-05/06/07).
 *
 * Usage:
 *   import { FederationAuthGuard } from './server/index.js';
 *   @UseGuards(FederationAuthGuard)
 */
 export { FederationAuthGuard } from './federation-auth.guard.js';
 export type { FederationContext } from './federation-context.js';
--- a/deploy/portainer/federated-test.stack.yml
+++ b/deploy/portainer/federated-test.stack.yml
@@ -30,9 +30,18 @@
 #   DNS A record        ${HOST_FQDN} → Swarm ingress IP (or Cloudflare proxy).
 #
 # IMAGE
-#   Pinned to digest fed-v0.1.0-m1 (DEPLOY-01 verified).
+#   Pinned to sha-9f1a081 (main HEAD post-#488 Dockerfile fix). The previous
 #   pin (fed-v0.1.0-m1, sha256:9b72e2...) had a broken pnpm copy and could
 #   not resolve @mosaicstack/storage at runtime. The new digest was smoke-
 #   tested locally — gateway boots, imports resolve, tier-detector runs.
 #   Update digest here when promoting a new build.
 #
 # HEALTHCHECK NOTE (2026-04-21)
 #   Switched from busybox wget to node http.get on 127.0.0.1 (not localhost) to
 #   avoid IPv6 resolution issues on Alpine. Retries increased to 5 and
 #   start_period to 60s to cover the NestJS/GC cold-start window (~40-50s).
 #   restart_policy set to `any` so SIGTERM/clean-exit also triggers restart.
 #
 # NOTE: This is a TEST template — production deployments use a separate
 #       parameterised template with stricter resource limits and secrets.
@@ -40,8 +49,8 @@ version: '3.9'
 services:
  gateway:
-    image: git.mosaicstack.dev/mosaicstack/stack/gateway@sha256:9b72e202a9eecc27d31920b87b475b9e96e483c0323acc57856be4b1355db1ec
+    image: git.mosaicstack.dev/mosaicstack/stack/gateway@sha256:1069117740e00ccfeba357cae38c43f3729fe5ae702740ce474f6512414d7c02
-    # Tag for human reference: fed-v0.1.0-m1
+    # Tag for human reference: sha-9f1a081 (post-#488 Dockerfile fix; smoke-tested locally)
    environment:
      # ── Tier ───────────────────────────────────────────────────────────────
      MOSAIC_TIER: federated
@@ -73,7 +82,7 @@ services:
    deploy:
      replicas: 1
      restart_policy:
-        condition: on-failure
+        condition: any
        delay: 5s
        max_attempts: 3
      labels:
@@ -85,11 +94,15 @@ services:
        - 'traefik.http.routers.${STACK_NAME}.tls.certresolver=letsencrypt'
        - 'traefik.http.services.${STACK_NAME}.loadbalancer.server.port=3000'
    healthcheck:
-      test: ['CMD', 'wget', '-qO-', 'http://localhost:3000/health']
+      test:
        - 'CMD'
        - 'node'
        - '-e'
        - "require('http').get('http://127.0.0.1:3000/health',r=>process.exit(r.statusCode===200?0:1)).on('error',()=>process.exit(1))"
      interval: 30s
      timeout: 5s
-      retries: 3
+      retries: 5
-      start_period: 20s
+      start_period: 60s
    depends_on:
      - postgres
      - valkey
--- a/docker-compose.federated.yml
+++ b/docker-compose.federated.yml
@@ -27,6 +27,7 @@ services:
  postgres-federated:
    image: pgvector/pgvector:pg17
    profiles: [federated]
    restart: unless-stopped
    ports:
      - '${PG_FEDERATED_HOST_PORT:-5433}:5432'
    environment:
@@ -45,6 +46,7 @@ services:
  valkey-federated:
    image: valkey/valkey:8-alpine
    profiles: [federated]
    restart: unless-stopped
    ports:
      - '${VALKEY_FEDERATED_HOST_PORT:-6380}:6379'
    volumes:
@@ -55,6 +57,64 @@ services:
      timeout: 3s
      retries: 5
  # ---------------------------------------------------------------------------
  # Step-CA — Mosaic Federation internal certificate authority
  #
  # Image: pinned to 0.27.4 (latest stable as of late 2025).
  # `latest` is forbidden per Mosaic image policy (immutable tag required for
  # reproducible deployments and digest-first promotion in CI).
  #
  # Profile: `federated` — this service must not start in non-federated dev.
  #
  # Password:
  #   Dev:  bind-mount ./infra/step-ca/dev-password (gitignored; copy from
  #         ./infra/step-ca/dev-password.example and customise locally).
  #   Prod: replace the bind-mount with a Docker secret:
  #           secrets:
  #             ca_password:
  #               external: true
  #         and reference it as `/run/secrets/ca_password` (same path the
  #         init script already uses).
  #
  # Provisioner: "mosaic-fed" (consumed by apps/gateway/src/federation/ca.service.ts)
  # ---------------------------------------------------------------------------
  step-ca:
    image: smallstep/step-ca:0.27.4
    profiles: [federated]
    restart: unless-stopped
    ports:
      - '${STEP_CA_HOST_PORT:-9000}:9000'
    volumes:
      - step_ca_data:/home/step
      # init script — executed as the container entrypoint
      - ./infra/step-ca/init.sh:/usr/local/bin/mosaic-step-ca-init.sh:ro
      # X.509 template skeleton (wired in M2-04)
      - ./infra/step-ca/templates:/etc/step-ca-templates:ro
      # Dev password file — GITIGNORED; copy from dev-password.example
      # In production, replace this with a Docker secret (see comment above).
      - ./infra/step-ca/dev-password:/run/secrets/ca_password:ro
    entrypoint: ['/bin/sh', '/usr/local/bin/mosaic-step-ca-init.sh']
    healthcheck:
      # The healthcheck requires the root cert to exist, which is only true
      # after init.sh has completed on first boot. start_period gives init
      # time to finish before Docker starts counting retries.
      test:
        [
          'CMD',
          'step',
          'ca',
          'health',
          '--ca-url',
          'https://localhost:9000',
          '--root',
          '/home/step/certs/root_ca.crt',
        ]
      interval: 10s
      timeout: 5s
      retries: 5
      start_period: 30s
 volumes:
  pg_federated_data:
  valkey_federated_data:
  step_ca_data:
--- a/docker/appservice.Dockerfile
+++ b/docker/appservice.Dockerfile
@@ -0,0 +1,28 @@
 FROM node:22-alpine AS base
 ENV PNPM_HOME="/pnpm"
 ENV PATH="$PNPM_HOME:$PATH"
 RUN corepack enable
 FROM base AS builder
 WORKDIR /app
 # Copy workspace manifests first for layer-cached install
 COPY pnpm-workspace.yaml pnpm-lock.yaml package.json ./
 COPY apps/appservice/package.json ./apps/appservice/
 COPY packages/ ./packages/
 COPY plugins/ ./plugins/
 RUN pnpm install --frozen-lockfile
 COPY . .
 RUN pnpm turbo run build --filter @mosaicstack/mosaic-as...
 RUN pnpm --filter @mosaicstack/mosaic-as --prod deploy --legacy /deploy
 FROM base AS runner
 WORKDIR /app
 ENV NODE_ENV=production
 COPY --from=builder /deploy/node_modules ./node_modules
 COPY --from=builder /deploy/package.json ./package.json
 COPY --from=builder /app/apps/appservice/dist ./dist
 USER node
 EXPOSE 8008
 HEALTHCHECK --interval=30s --timeout=5s --start-period=15s --retries=5 \
  CMD ["node", "-e", "require('http').get('http://127.0.0.1:8008/health',r=>process.exit(r.statusCode===200?0:1)).on('error',()=>process.exit(1))"]
 CMD ["node", "dist/main.js"]
--- a/docker/gateway.Dockerfile
+++ b/docker/gateway.Dockerfile
@@ -5,18 +5,27 @@ RUN corepack enable
 FROM base AS builder
 WORKDIR /app
 # Copy workspace manifests first for layer-cached install
 COPY pnpm-workspace.yaml pnpm-lock.yaml package.json ./
 COPY apps/gateway/package.json ./apps/gateway/
 COPY packages/ ./packages/
 COPY plugins/ ./plugins/
 RUN pnpm install --frozen-lockfile
 COPY . .
-RUN pnpm --filter @mosaic/gateway build
+# Build gateway and all of its workspace dependencies via turbo dependency graph
 RUN pnpm turbo run build --filter @mosaicstack/gateway...
 # Produce a self-contained deploy artifact: flat node_modules, no pnpm symlinks
 # --legacy is required for pnpm v10 when inject-workspace-packages is not set
 RUN pnpm --filter @mosaicstack/gateway --prod deploy --legacy /deploy
 FROM base AS runner
 WORKDIR /app
 ENV NODE_ENV=production
 # Use the pnpm deploy output — resolves all deps into a flat, self-contained node_modules
 COPY --from=builder /deploy/node_modules ./node_modules
 COPY --from=builder /deploy/package.json ./package.json
 # dist is declared in package.json "files" so pnpm deploy copies it into /deploy;
 # copy from builder explicitly as belt-and-suspenders
 COPY --from=builder /app/apps/gateway/dist ./dist
 COPY --from=builder /app/apps/gateway/package.json ./package.json
 COPY --from=builder /app/node_modules ./node_modules
 EXPOSE 4000
 CMD ["node", "dist/main.js"]
--- a/docs/TASKS.md
+++ b/docs/TASKS.md
@@ -22,14 +22,15 @@
 These are MVP-level checks that don't belong to any single workstream. Updated by the orchestrator at each session.
-| id      | status      | description                                                                                              | notes                                                                                   |
+| id         | status      | description                                                                                              | notes                                                                                                                         |
-| ------- | ----------- | -------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------- |
+| ---------- | ----------- | -------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------- |
-| MVP-T01 | done        | Author MVP-level manifest at `docs/MISSION-MANIFEST.md`                                                  | This session (2026-04-19); PR pending                                                   |
+| MVP-T01    | done        | Author MVP-level manifest at `docs/MISSION-MANIFEST.md`                                                  | This session (2026-04-19); PR pending                                                                                         |
-| MVP-T02 | done        | Archive install-ux-v2 mission state to `docs/archive/missions/install-ux-v2-20260405/`                   | IUV-M03 retroactively closed (shipped via PR #446 + releases 0.0.27→0.0.29)             |
+| MVP-T02    | done        | Archive install-ux-v2 mission state to `docs/archive/missions/install-ux-v2-20260405/`                   | IUV-M03 retroactively closed (shipped via PR #446 + releases 0.0.27→0.0.29)                                                   |
-| MVP-T03 | done        | Land federation v1 planning artifacts on `main`                                                          | PR #468 merged 2026-04-19 (commit `66512550`)                                           |
+| MVP-T03    | done        | Land federation v1 planning artifacts on `main`                                                          | PR #468 merged 2026-04-19 (commit `66512550`)                                                                                 |
-| MVP-T04 | not-started | Sync `.mosaic/orchestrator/mission.json` MVP slot with this manifest (milestone enumeration, etc.)       | Coord state file; consider whether to repopulate via `mosaic coord` or accept hand-edit |
+| MVP-T04    | not-started | Sync `.mosaic/orchestrator/mission.json` MVP slot with this manifest (milestone enumeration, etc.)       | Coord state file; consider whether to repopulate via `mosaic coord` or accept hand-edit                                       |
-| MVP-T05 | in-progress | Kick off W1 / FED-M1 — federated tier infrastructure                                                     | Session 16 (2026-04-19): FED-M1-01 in-progress on `feat/federation-m1-tier-config`      |
+| MVP-T05    | in-progress | Kick off W1 / FED-M1 — federated tier infrastructure                                                     | Session 16 (2026-04-19): FED-M1-01 in-progress on `feat/federation-m1-tier-config`                                            |
-| MVP-T06 | not-started | Declare additional workstreams (web dashboard, TUI/CLI parity, remote control, etc.) as scope solidifies | Track each new workstream by adding a row to the Workstream Rollup                      |
+| MVP-T06    | not-started | Declare additional workstreams (web dashboard, TUI/CLI parity, remote control, etc.) as scope solidifies | Track each new workstream by adding a row to the Workstream Rollup                                                            |
 | T-A292E96F | in-progress | Fix Mosaic Gitea PR metadata/login wrapper regression for U-Connect merge preflight                      | Kanban `t_a292e96f`; branch `fix/t-a292e96f-gitea-pr-metadata`; scratchpad `docs/scratchpads/t-a292e96f-gitea-pr-metadata.md` |
 ## Pointer to Active Workstream
@@ -38,3 +39,9 @@ Active workstream is **W1 — Federation v1**. Workers should:
 1. Read [docs/federation/MISSION-MANIFEST.md](./federation/MISSION-MANIFEST.md) for workstream scope
 2. Read [docs/federation/TASKS.md](./federation/TASKS.md) for the next pending task
 3. Follow per-task agent + tier guidance from the workstream manifest
 ## Thin-core prompt diet (#528) — feat/contract-thin-core
 - Status: PR open, awaiting maintainer merge ratification (fleet-governing change).
 - Cut always-injected contract AGENTS+TOOLS+RUNTIME 8,827→4,122 tok (−53%); all 12 hard gates intact.
 - Validation: deterministic gate-checklist PASS; headless A/B thin 7/9 vs monolith 5/9. Detail: scratchpads/contract-thin-core.md.
--- a/docs/federation/ADMIN-CLI.md
+++ b/docs/federation/ADMIN-CLI.md
@@ -0,0 +1,106 @@
 # Mosaic Federation — Admin CLI Reference
 Available since: FED-M2
 ## Grant Management
 ### Create a grant
 ```bash
 mosaic federation grant create --user <userId> --peer <peerId> --scope <scope-file.json>
 ```
 The scope file defines what resources and rows the peer may access:
 ```json
 {
  "resources": ["tasks", "notes"],
  "excluded_resources": ["credentials"],
  "max_rows_per_query": 100
 }
 ```
 Valid resource values: `tasks`, `notes`, `credentials`, `teams`, `users`
 ### List grants
 ```bash
 mosaic federation grant list [--peer <peerId>] [--status pending|active|revoked|expired]
 ```
 Shows all federation grants, optionally filtered by peer or status.
 ### Show a grant
 ```bash
 mosaic federation grant show <grantId>
 ```
 Display details of a single grant, including its scope, activation timestamp, and status.
 ### Revoke a grant
 ```bash
 mosaic federation grant revoke <grantId> [--reason "Reason text"]
 ```
 Revoke an active grant immediately. Revoked grants cannot be reactivated. The optional reason is stored in the audit log.
 ### Generate enrollment token
 ```bash
 mosaic federation grant token <grantId> [--ttl <seconds>]
 ```
 Generate a single-use enrollment token for the grant. The default TTL is 900 seconds (15 minutes); maximum 15 minutes.
 Output includes the token and the full enrollment URL for the peer to use.
 ## Peer Management
 ### Add a peer (remote enrollment)
 ```bash
 mosaic federation peer add <enrollment-url>
 ```
 Enroll a remote peer using the enrollment URL obtained from a grant token. The command:
 1. Generates a P-256 ECDSA keypair locally
 2. Creates a certificate signing request (CSR)
 3. Submits the CSR to the enrollment URL
 4. Verifies the returned certificate includes the correct custom OIDs (grant ID and subject user ID)
 5. Seals the private key at rest using `BETTER_AUTH_SECRET`
 6. Stores the peer record and sealed key in the local gateway database
 Once enrollment completes, the peer can authenticate using the certificate and private key.
 ### List peers
 ```bash
 mosaic federation peer list
 ```
 Shows all enrolled peers, including their certificate fingerprints and activation status.
 ## REST API Reference
 All CLI commands call the local gateway admin API. Equivalent REST endpoints:
 | CLI Command  | REST Endpoint                                                                               | Method            |
 | ------------ | ------------------------------------------------------------------------------------------- | ----------------- |
 | grant create | `/api/admin/federation/grants`                                                              | POST              |
 | grant list   | `/api/admin/federation/grants`                                                              | GET               |
 | grant show   | `/api/admin/federation/grants/:id`                                                          | GET               |
 | grant revoke | `/api/admin/federation/grants/:id/revoke`                                                   | PATCH             |
 | grant token  | `/api/admin/federation/grants/:id/tokens`                                                   | POST              |
 | peer list    | `/api/admin/federation/peers`                                                               | GET               |
 | peer add     | `/api/admin/federation/peers/keypair` + enrollment + `/api/admin/federation/peers/:id/cert` | POST, POST, PATCH |
 ## Security Notes
 - **Enrollment tokens** are single-use and expire in 15 minutes (not configurable beyond 15 minutes)
 - **Peer private keys** are encrypted at rest using AES-256-GCM, keyed from `BETTER_AUTH_SECRET`
 - **Custom OIDs** in issued certificates are verified post-issuance: the grant ID and subject user ID must match the certificate extensions
 - **Grant activation** is atomic — concurrent enrollment attempts for the same grant are rejected
 - **Revoked grants** cannot be activated; peers attempting to use a revoked grant's token will be rejected
--- a/docs/federation/MISSION-MANIFEST.md
+++ b/docs/federation/MISSION-MANIFEST.md
@@ -7,11 +7,11 @@
 **ID:** federation-v1-20260419
 **Statement:** Jarvis operates across 3–4 workstations in two physical locations (home, USC). The user currently reaches back to a single jarvis-brain checkout from every session; a prior OpenBrain attempt caused cache, latency, and opacity pain. This mission builds asymmetric federation between Mosaic Stack gateways so that a session on a user's home gateway can query their work gateway in real time without data ever persisting across the boundary, with full multi-tenant isolation and standard-PKI (X.509 / Step-CA) trust management.
-**Phase:** M2 active — Step-CA + grant schema + admin CLI; parallel test-deploy workstream stood up
+**Phase:** M3 active — mTLS handshake + list/get/capabilities verbs + scope enforcement
-**Current Milestone:** FED-M2
+**Current Milestone:** FED-M3
-**Progress:** 1 / 7 milestones
+**Progress:** 2 / 7 milestones
 **Status:** active
-**Last Updated:** 2026-04-21 (M2 decomposed; mos-test-1/-2 designated as federation E2E test hosts)
+**Last Updated:** 2026-04-21 (M2 closed via PR #503, tag `fed-v0.2.0-m2`, issue #461 closed; M3 decomposed into 14 tasks)
 **Parent Mission:** None — new mission
 ## Test Infrastructure
@@ -63,8 +63,8 @@ Key design references:
 | #   | ID     | Name                                          | Status      | Branch             | Issue | Started    | Completed  |
 | --- | ------ | --------------------------------------------- | ----------- | ------------------ | ----- | ---------- | ---------- |
 | 1   | FED-M1 | Federated tier infrastructure                 | done        | (12 PRs #470-#481) | #460  | 2026-04-19 | 2026-04-19 |
-| 2   | FED-M2 | Step-CA + grant schema + admin CLI            | in-progress | (decomposition)    | #461  | 2026-04-21 | —          |
+| 2   | FED-M2 | Step-CA + grant schema + admin CLI            | done        | (PRs #483-#503)    | #461  | 2026-04-21 | 2026-04-21 |
-| 3   | FED-M3 | mTLS handshake + list/get + scope enforcement | not-started | —                  | #462  | —          | —          |
+| 3   | FED-M3 | mTLS handshake + list/get + scope enforcement | in-progress | (decomposition)    | #462  | 2026-04-21 | —          |
 | 4   | FED-M4 | search verb + audit log + rate limit          | not-started | —                  | #463  | —          | —          |
 | 5   | FED-M5 | Cache + offline degradation + OTEL            | not-started | —                  | #464  | —          | —          |
 | 6   | FED-M6 | Revocation + auto-renewal + CRL               | not-started | —                  | #465  | —          | —          |
@@ -85,17 +85,24 @@ Key design references:
 ## Session History
-| Session | Date       | Runtime | Outcome                                                               |
+| Session | Date                    | Runtime | Outcome                                                                                                                               |
-| ------- | ---------- | ------- | --------------------------------------------------------------------- |
+| ------- | ----------------------- | ------- | ------------------------------------------------------------------------------------------------------------------------------------- |
-| S1      | 2026-04-19 | claude  | PRD authored, MILESTONES decomposed, 7 issues filed                   |
+| S1      | 2026-04-19              | claude  | PRD authored, MILESTONES decomposed, 7 issues filed                                                                                   |
-| S2-S4   | 2026-04-19 | claude  | FED-M1 complete: 12 tasks (PRs #470-#481) merged; tag `fed-v0.1.0-m1` |
+| S2-S4   | 2026-04-19              | claude  | FED-M1 complete: 12 tasks (PRs #470-#481) merged; tag `fed-v0.1.0-m1`                                                                 |
 | S5-S22  | 2026-04-19 → 2026-04-21 | claude  | FED-M2 complete: 13 tasks (PRs #483-#503) merged; tag `fed-v0.2.0-m2`; issue #461 closed. Step-CA + grant schema + admin CLI shipped. |
 | S23     | 2026-04-21              | claude  | M3 decomposed into 14 tasks in `docs/federation/TASKS.md`. Manifest M3 row → in-progress. Next: kickoff M3-01.                        |
 ## Next Step
-FED-M2 active. Decomposition landed in `docs/federation/TASKS.md` (M2-01..M2-13 code workstream + DEPLOY-01..DEPLOY-05 parallel test-deploy workstream, ~88K total). Tracking issue #482.
+FED-M3 active. Decomposition landed in `docs/federation/TASKS.md` (M3-01..M3-14, ~100K estimate). Tracking issue #462.
-Parallel execution plan:
+Execution plan (parallel where possible):
- **CODE workstream**: M2-01 (DB migration) starts immediately — sonnet subagent on `feat/federation-m2-schema`. Then M2-02 → M2-09 sequentially with M2-04/M2-05/M2-06/M2-07 having interleaved CA/storage/grant dependencies.
+- **Foundation**: M3-01 (DTOs in `packages/types/src/federation/`) starts immediately — sonnet subagent on `feat/federation-m3-types`. Blocks all server + client work.
- **DEPLOY workstream**: DEPLOY-01 (image verify) → DEPLOY-02 (stack template) → DEPLOY-03/04 (mos-test-1/-2 deploy) → DEPLOY-05 (TEST-INFRA.md). Gated on Portainer wrapper PR (`PORTAINER_INSECURE` flag) merging first.
+- **Server stream** (after M3-01): M3-03 (AuthGuard) + M3-04 (ScopeService) in series, then M3-05 / M3-06 / M3-07 (verbs) in parallel.
- **Re-converge** at M2-10 (E2E test) once both workstreams ready.
+- **Client stream** (after M3-01, parallel with server): M3-08 (FederationClient) → M3-09 (QuerySourceService).
 - **Harness** (parallel with everything): M3-02 (`tools/federation-harness/`) — needed for M3-11.
 - **Test gates**: M3-10 (Integration) → M3-11 (E2E with harness) → M3-12 (Independent security review, two rounds budgeted).
 - **Close**: M3-13 (Docs) → M3-14 (release tag `fed-v0.3.0-m3`, close #462).
 **Test-bed fallback:** `mos-test-1/-2` deploy is still blocked on `FED-M2-DEPLOY-IMG-FIX`. The harness in M3-02 ships a local two-gateway docker-compose so M3-11 is not blocked. Production-host validation is M7's responsibility (PRD AC-12).
--- a/docs/federation/SETUP.md
+++ b/docs/federation/SETUP.md
@@ -70,6 +70,96 @@ For JSON output (useful in CI/automation):
 mosaic gateway doctor --json
 ```
 ## Step 2: Step-CA Bootstrap
 Step-CA is a certificate authority that issues X.509 certificates for federation peers. In Mosaic federation, it signs peer certificates with custom OIDs that embed grant and user identities, enforcing authorization at the certificate level.
 ### Prerequisites for Step-CA
 Before starting the CA, you must set up the dev password:
 ```bash
 cp infra/step-ca/dev-password.example infra/step-ca/dev-password
 # Edit dev-password and set your CA password (minimum 16 characters)
 ```
 The password is required for the CA to boot and derive the provisioner key used by the gateway.
 ### Start the Step-CA service
 Add the step-ca service to your federated stack:
 ```bash
 docker compose -f docker-compose.federated.yml --profile federated up -d step-ca
 ```
 On first boot, the init script (`infra/step-ca/init.sh`) runs automatically. It:
 - Generates the CA root key and certificate in the Docker volume
 - Creates the `mosaic-fed` JWK provisioner
 - Applies the X.509 template from `infra/step-ca/templates/federation.tpl`
 The volume is persistent, so subsequent boots reuse the existing CA keys.
 Verify the CA is healthy:
 ```bash
 curl https://localhost:9000/health --cacert /tmp/step-ca-root.crt
 ```
 (If the root cert file doesn't exist yet, see the extraction steps below.)
 ### Extract credentials for the gateway
 The gateway requires two credentials from the running CA:
 **1. Provisioner key (for `STEP_CA_PROVISIONER_KEY_JSON`)**
 ```bash
 docker exec $(docker ps -qf name=step-ca) cat /home/step/secrets/mosaic-fed.json > /tmp/step-ca-provisioner.json
 ```
 This JSON file contains the JWK public and private keys for the `mosaic-fed` provisioner. Store it securely and pass its contents to the gateway via the `STEP_CA_PROVISIONER_KEY_JSON` environment variable.
 **2. Root certificate (for `STEP_CA_ROOT_CERT_PATH`)**
 ```bash
 docker cp $(docker ps -qf name=step-ca):/home/step/certs/root_ca.crt /tmp/step-ca-root.crt
 ```
 This PEM file is the CA's root certificate, used to verify peer certificates issued by step-ca. Pass its path to the gateway via `STEP_CA_ROOT_CERT_PATH`.
 ### Custom OID Registry
 Federation certificates include custom OIDs in the certificate extension. These encode authorization metadata:
 | OID                 | Name                   | Description           |
 | ------------------- | ---------------------- | --------------------- |
 | 1.3.6.1.4.1.99999.1 | mosaic_grant_id        | Federation grant UUID |
 | 1.3.6.1.4.1.99999.2 | mosaic_subject_user_id | Subject user UUID     |
 These OIDs are verified by the gateway after the CSR is signed, ensuring the certificate was issued with the correct grant and user context.
 ### Environment Variables
 Configure the gateway with the following environment variables before startup:
 | Variable                       | Required | Description                                                                                               |
 | ------------------------------ | -------- | --------------------------------------------------------------------------------------------------------- |
 | `STEP_CA_URL`                  | Yes      | Base URL of the step-ca instance, e.g. `https://step-ca:9000` (use `https://localhost:9000` in local dev) |
 | `STEP_CA_PROVISIONER_KEY_JSON` | Yes      | JSON-encoded JWK from `/home/step/secrets/mosaic-fed.json`                                                |
 | `STEP_CA_ROOT_CERT_PATH`       | Yes      | Absolute path to the root CA certificate (e.g. `/tmp/step-ca-root.crt`)                                   |
 | `BETTER_AUTH_SECRET`           | Yes      | Secret used to seal peer private keys at rest; already required for M1                                    |
 Example environment setup:
 ```bash
 export STEP_CA_URL="https://localhost:9000"
 export STEP_CA_PROVISIONER_KEY_JSON="$(cat /tmp/step-ca-provisioner.json)"
 export STEP_CA_ROOT_CERT_PATH="/tmp/step-ca-root.crt"
 export BETTER_AUTH_SECRET="<your-secret>"
 ```
 ## Troubleshooting
 ### Port conflicts
@@ -117,3 +207,74 @@ docker compose -f docker-compose.federated.yml logs valkey-federated
 ```
 If Valkey is running, verify your firewall allows 6380. On macOS, Docker Desktop may require binding to `host.docker.internal` instead of `localhost`.
 ## Key rotation (deferred)
 Federation peer private keys (`federation_peers.client_key_pem`) are sealed at rest using AES-256-GCM with a key derived from `BETTER_AUTH_SECRET` via SHA-256. If `BETTER_AUTH_SECRET` is rotated, all sealed `client_key_pem` values in the database become unreadable and must be re-sealed with the new key before rotation completes.
 The full key rotation procedure (decrypt all rows with old key, re-encrypt with new key, atomically swap the secret) is out of scope for M2. Operators must not rotate `BETTER_AUTH_SECRET` without a migration plan for all sealed federation peer keys.
 ## OID Assignments — Mosaic Internal OID Arc
 Mosaic uses the private enterprise arc `1.3.6.1.4.1.99999` for custom X.509
 certificate extensions in federation grant certificates.
 **IMPORTANT:** This is a development/internal OID arc. Before deploying to a
 production environment accessible by external parties, register a proper IANA
 Private Enterprise Number (PEN) at <https://pen.iana.org/pen/PenApplication.page>
 and update these assignments accordingly.
 ### Assigned OIDs
 | OID                   | Symbolic name                     | Description                                               |
 | --------------------- | --------------------------------- | --------------------------------------------------------- |
 | `1.3.6.1.4.1.99999.1` | `mosaic.federation.grantId`       | UUID of the `federation_grants` row authorising this cert |
 | `1.3.6.1.4.1.99999.2` | `mosaic.federation.subjectUserId` | UUID of the local user on whose behalf the cert is issued |
 ### Encoding
 Each extension value is DER-encoded as an ASN.1 **UTF8String**:
 ```
 Tag    0x0C        (UTF8String)
 Length 0x24        (36 decimal — fixed length of a UUID string)
 Value  <36 ASCII bytes of the UUID>
 ```
 The step-ca X.509 template at `infra/step-ca/templates/federation.tpl`
 produces this encoding via the Go template expression:
 ```
 {{ printf "\x0c\x24%s" .Token.mosaic_grant_id | b64enc }}
 ```
 The resulting base64 value is passed as the `value` field of the extension
 object in the template JSON.
 ### CA Environment Variables
 The `CaService` (`apps/gateway/src/federation/ca.service.ts`) requires the
 following environment variables at gateway startup:
 | Variable                       | Required | Description                                                          |
 | ------------------------------ | -------- | -------------------------------------------------------------------- |
 | `STEP_CA_URL`                  | Yes      | Base URL of the step-ca instance, e.g. `https://step-ca:9000`        |
 | `STEP_CA_PROVISIONER_PASSWORD` | Yes      | JWK provisioner password for the `mosaic-fed` provisioner            |
 | `STEP_CA_PROVISIONER_KEY_JSON` | Yes      | JSON-encoded JWK (public + private) for the `mosaic-fed` provisioner |
 | `STEP_CA_ROOT_CERT_PATH`       | Yes      | Absolute path to the step-ca root CA certificate PEM file            |
 Set these variables in your environment or secret manager before starting
 the gateway. In the federated Docker Compose stack they are expected to be
 injected via Docker secrets and environment variable overrides.
 ### Fail-loud contract
 The CA service (and the X.509 template) are designed to fail loudly if the
 custom OIDs cannot be embedded:
 - The template produces a malformed extension value (zero-length UTF8String
  body) when the JWT claims `mosaic_grant_id` or `mosaic_subject_user_id` are
  absent. step-ca rejects the CSR rather than issuing a cert without the OIDs.
 - `CaService.issueCert()` throws a `CaServiceError` on every error path with
  a human-readable `remediation` string. It never silently returns a cert that
  may be missing the required extensions.
--- a/docs/federation/TASKS.md
+++ b/docs/federation/TASKS.md
@@ -46,13 +46,14 @@ Goal: Two federated-tier gateways stood up on Portainer at `mos-test-1.woltje.co
 > **Tracking issue:** #482.
-| id               | status      | description                                                                                                                                                                                                             | issue | agent  | branch                                | depends_on   | estimate | notes                                                                                                                                              |
+| id                    | status      | description                                                                                                                                                                                                             | issue | agent  | branch                                | depends_on   | estimate | notes                                                                                                                                              |
-| ---------------- | ----------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----- | ------ | ------------------------------------- | ------------ | -------- | -------------------------------------------------------------------------------------------------------------------------------------------------- |
+| --------------------- | ----------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----- | ------ | ------------------------------------- | ------------ | -------- | -------------------------------------------------------------------------------------------------------------------------------------------------- |
-| FED-M2-DEPLOY-01 | not-started | Verify `gateway:fed-v0.1.0-m1` image was published by `.woodpecker/publish.yml` on tag push; if not, investigate and remediate. Document image URI in deployment artifact.                                              | #482  | sonnet | feat/federation-deploy-image-verify   | —            | 2K       | publish.yml registers `gateway:$CI_COMMIT_TAG` destination; should already exist at `git.mosaicstack.dev/mosaicstack/stack/gateway:fed-v0.1.0-m1`. |
+| FED-M2-DEPLOY-01      | done        | Verify `gateway:fed-v0.1.0-m1` image was published by `.woodpecker/publish.yml` on tag push; if not, investigate and remediate. Document image URI in deployment artifact.                                              | #482  | sonnet | (verified inline, no PR)              | —            | 2K       | Tag exists; digest `sha256:9b72e202a9eecc27d31920b87b475b9e96e483c0323acc57856be4b1355db1ec` captured for digest-pinned deploys.                   |
-| FED-M2-DEPLOY-02 | not-started | Author Portainer git-stack compose file `deploy/portainer/federated-test.stack.yml` (gateway + PG-pgvector + Valkey, env-driven). Use immutable tag, not `latest`.                                                      | #482  | sonnet | feat/federation-deploy-stack-template | DEPLOY-01    | 5K       | Stack must be parameterizable via env (`STACK_DOMAIN`, `BETTERAUTH_SECRET`, etc.) so one template serves both hosts.                               |
+| FED-M2-DEPLOY-02      | done        | Author Portainer git-stack compose file `deploy/portainer/federated-test.stack.yml` (gateway + PG-pgvector + Valkey, env-driven). Use immutable tag, not `latest`.                                                      | #482  | sonnet | feat/federation-deploy-stack-template | DEPLOY-01    | 5K       | Shipped in PR #485. Digest-pinned. Env: STACK_NAME, HOST_FQDN, POSTGRES_PASSWORD, BETTER_AUTH_SECRET, BETTER_AUTH_URL.                             |
-| FED-M2-DEPLOY-03 | not-started | Deploy stack to mos-test-1.woltje.com via `~/.config/mosaic/tools/portainer/`. Verify M1 acceptance: federated-tier boot succeeds; `mosaic gateway doctor --json` returns green; pgvector `vector(3)` round-trip works. | #482  | sonnet | feat/federation-deploy-test-1         | DEPLOY-02    | 3K       | Requires `PORTAINER_URL` + `PORTAINER_API_KEY` env (vault-loaded). DNS for mos-test-1 must resolve before deploy.                                  |
+| FED-M2-DEPLOY-IMG-FIX | in-progress | Gateway image runtime broken (ERR_MODULE_NOT_FOUND for `dotenv`); Dockerfile copies `.pnpm/` store but not `apps/gateway/node_modules` symlinks. Switch to `pnpm deploy` for self-contained runtime.                    | #482  | sonnet | (subagent in flight)                  | DEPLOY-02    | 4K       | Subagent `a78a9ab0ddae91fbc` in flight. Triggers Kaniko rebuild on merge; capture new digest; bump stack template in follow-up PR before redeploy. |
-| FED-M2-DEPLOY-04 | not-started | Deploy stack to mos-test-2.woltje.com via Portainer wrapper. Same M1 acceptance probes as DEPLOY-03.                                                                                                                    | #482  | sonnet | feat/federation-deploy-test-2         | DEPLOY-02    | 3K       | Independent of DEPLOY-03 (parallelizable). Same secret material with distinct domain + secrets per host.                                           |
+| FED-M2-DEPLOY-03      | blocked     | Deploy stack to mos-test-1.woltje.com via `~/.config/mosaic/tools/portainer/`. Verify M1 acceptance: federated-tier boot succeeds; `mosaic gateway doctor --json` returns green; pgvector `vector(3)` round-trip works. | #482  | sonnet | feat/federation-deploy-test-1         | IMG-FIX      | 3K       | Stack created on Portainer endpoint 3 (Swarm `local`), but blocked on image fix. Container fails on boot until IMG-FIX merges + redeploy.          |
-| FED-M2-DEPLOY-05 | not-started | Document deployment in `docs/federation/TEST-INFRA.md`: hosts, image tags, secrets sourcing, redeploy procedure, teardown. Update MISSION-MANIFEST with deployment status.                                              | #482  | haiku  | feat/federation-deploy-docs           | DEPLOY-03,04 | 3K       | Operator-facing doc; mentions but does not duplicate `tools/portainer/README.md`.                                                                  |
+| FED-M2-DEPLOY-04      | blocked     | Deploy stack to mos-test-2.woltje.com via Portainer wrapper. Same M1 acceptance probes as DEPLOY-03.                                                                                                                    | #482  | sonnet | feat/federation-deploy-test-2         | IMG-FIX      | 3K       | Same status as DEPLOY-03. Stack created; blocked on image fix.                                                                                     |
 | FED-M2-DEPLOY-05      | not-started | Document deployment in `docs/federation/TEST-INFRA.md`: hosts, image tags, secrets sourcing, redeploy procedure, teardown. Update MISSION-MANIFEST with deployment status.                                              | #482  | haiku  | feat/federation-deploy-docs           | DEPLOY-03,04 | 3K       | Operator-facing doc; mentions but does not duplicate `tools/portainer/README.md`.                                                                  |
 **Deploy workstream estimate:** ~16K tokens
@@ -62,21 +63,21 @@ Goal: Two federated-tier gateways stood up on Portainer at `mos-test-1.woltje.co
 Goal: An admin can create a federation grant; counterparty enrolls; cert is signed by Step-CA with SAN OIDs for `grantId` + `subjectUserId`. No runtime federation traffic flows yet (that's M3).
-| id        | status      | description                                                                                                                                                                                      | issue | agent  | branch                             | depends_on       | estimate | notes                                                                                                                                     |
+| id        | status | description                                                                                                                                                                                      | issue | agent  | branch                             | depends_on       | estimate | notes                                                                                                                                                        |
-| --------- | ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----- | ------ | ---------------------------------- | ---------------- | -------- | ----------------------------------------------------------------------------------------------------------------------------------------- |
+| --------- | ------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----- | ------ | ---------------------------------- | ---------------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| FED-M2-01 | not-started | DB migration: `federation_grants`, `federation_peers`, `federation_audit_log` tables + enum types (`grant_status`, `peer_state`). Drizzle schema + migration generation; migration tests.        | #461  | sonnet | feat/federation-m2-schema          | —                | 5K       | `federation_audit_log` is created but not yet written to (audit logic is M4). Reserve `query_hash`, `outcome`, `bytes_out` columns.       |
+| FED-M2-01 | done   | DB migration: `federation_grants`, `federation_peers`, `federation_audit_log` tables + enum types (`grant_status`, `peer_state`). Drizzle schema + migration generation; migration tests.        | #461  | sonnet | feat/federation-m2-schema          | —                | 5K       | Shipped in PR #486. DESC indexes + reserved cols added after first review; migration tests green.                                                            |
-| FED-M2-02 | not-started | Add Step-CA sidecar to `docker-compose.federated.yml`: official `smallstep/step-ca` image, persistent CA volume, JWK provisioner config baked into init script.                                  | #461  | sonnet | feat/federation-m2-stepca          | DEPLOY-02        | 4K       | Profile-gated under `federated`. CA password from secret; dev compose uses dev-only password file.                                        |
+| FED-M2-02 | done   | Add Step-CA sidecar to `docker-compose.federated.yml`: official `smallstep/step-ca` image, persistent CA volume, JWK provisioner config baked into init script.                                  | #461  | sonnet | feat/federation-m2-stepca          | DEPLOY-02        | 4K       | Shipped in PR #494. Profile-gated under `federated`; CA password from secret; dev compose uses dev-only password file.                                       |
-| FED-M2-03 | not-started | Scope JSON schema + validator: `resources` allowlist, `excluded_resources`, `include_teams`, `include_personal`, `max_rows_per_query`. Vitest unit tests for valid + invalid scopes.             | #461  | sonnet | feat/federation-m2-scope-schema    | —                | 4K       | Validator independent of CA — reusable from grant CRUD + (later) M3 scope enforcement.                                                    |
+| FED-M2-03 | done   | Scope JSON schema + validator: `resources` allowlist, `excluded_resources`, `include_teams`, `include_personal`, `max_rows_per_query`. Vitest unit tests for valid + invalid scopes.             | #461  | sonnet | feat/federation-m2-scope-schema    | —                | 4K       | Shipped in PR #496 (bundled with grants service). Validator independent of CA; reusable from grant CRUD + M3 scope enforcement.                              |
-| FED-M2-04 | not-started | `apps/gateway/src/federation/ca.service.ts`: Step-CA client (CSR submission, OID-bearing cert retrieval). Mocked + integration tests against real Step-CA container.                             | #461  | sonnet | feat/federation-m2-ca-service      | M2-02            | 6K       | SAN OIDs: `grantId` (custom OID 1.3.6.1.4.1.99999.1) + `subjectUserId` (1.3.6.1.4.1.99999.2). Document OID assignments in PRD/SETUP.      |
+| FED-M2-04 | done   | `apps/gateway/src/federation/ca.service.ts`: Step-CA client (CSR submission, OID-bearing cert retrieval). Mocked + integration tests against real Step-CA container.                             | #461  | sonnet | feat/federation-m2-ca-service      | M2-02            | 6K       | Shipped in PR #494. SAN OIDs 1.3.6.1.4.1.99999.1 (grantId) + 1.3.6.1.4.1.99999.2 (subjectUserId); integration test asserts both OIDs present in issued cert. |
-| FED-M2-05 | not-started | Sealed storage for `client_key_pem` reusing existing `provider_credentials` sealing key. Tests prove DB-at-rest is ciphertext, not PEM. Key rotation path documented (deferred impl).            | #461  | sonnet | feat/federation-m2-key-sealing     | M2-01            | 5K       | Separate from M2-06 to keep crypto seam isolated; reviewer focus is sealing only.                                                         |
+| FED-M2-05 | done   | Sealed storage for `client_key_pem` reusing existing `provider_credentials` sealing key. Tests prove DB-at-rest is ciphertext, not PEM. Key rotation path documented (deferred impl).            | #461  | sonnet | feat/federation-m2-key-sealing     | M2-01            | 5K       | Shipped in PR #495. Crypto seam isolated; tests confirm ciphertext-at-rest; key rotation deferred to M6.                                                     |
-| FED-M2-06 | not-started | `grants.service.ts`: CRUD + status transitions (`pending` → `active` → `revoked`); integrates M2-03 (scope) + M2-05 (sealing). Unit tests cover all transitions including invalid ones.          | #461  | sonnet | feat/federation-m2-grants-service  | M2-03, M2-05     | 6K       | Business logic only — CSR + cert work delegated to M2-04. Revocation handler is M6.                                                       |
+| FED-M2-06 | done   | `grants.service.ts`: CRUD + status transitions (`pending` → `active` → `revoked`); integrates M2-03 (scope) + M2-05 (sealing). Unit tests cover all transitions including invalid ones.          | #461  | sonnet | feat/federation-m2-grants-service  | M2-03, M2-05     | 6K       | Shipped in PR #496. All status transitions covered; invalid transition tests green; revocation handler deferred to M6.                                       |
-| FED-M2-07 | not-started | `enrollment.controller.ts`: short-lived single-use token endpoint; CSR signing; updates grant `pending` → `active`; emits enrollment audit (table-only write, M4 tightens).                      | #461  | sonnet | feat/federation-m2-enrollment      | M2-04, M2-06     | 6K       | Tokens single-use with 410 on replay; tokens TTL'd at 15min; rate-limited at request layer (M4 introduces guard, M2 uses simple lock).    |
+| FED-M2-07 | done   | `enrollment.controller.ts`: short-lived single-use token endpoint; CSR signing; updates grant `pending` → `active`; emits enrollment audit (table-only write, M4 tightens).                      | #461  | sonnet | feat/federation-m2-enrollment      | M2-04, M2-06     | 6K       | Shipped in PR #497. Tokens single-use with 410 on replay; TTL 15min; rate-limited at request layer.                                                          |
-| FED-M2-08 | not-started | Admin CLI: `mosaic federation grant create/list/show` + `peer add/list`. Integration with grants.service (no API duplication). Help output + machine-readable JSON option.                       | #461  | sonnet | feat/federation-m2-cli             | M2-06, M2-07     | 7K       | `peer add <enrollment-url>` is the client-side flow; resolves enrollment URL → CSR → store sealed key + cert.                             |
+| FED-M2-08 | done   | Admin CLI: `mosaic federation grant create/list/show` + `peer add/list`. Integration with grants.service (no API duplication). Help output + machine-readable JSON option.                       | #461  | sonnet | feat/federation-m2-cli             | M2-06, M2-07     | 7K       | Shipped in PR #498. `peer add <enrollment-url>` client-side flow; JSON output flag; admin REST controller co-shipped.                                        |
-| FED-M2-09 | not-started | Integration tests covering MILESTONES.md M2 acceptance tests #1, #2, #3, #5, #7, #8 (single-gateway suite). Real Step-CA container; vitest profile gated by `FEDERATED_INTEGRATION=1`.           | #461  | sonnet | feat/federation-m2-integration     | M2-08            | 8K       | Tests #4 (cert OID match) + #6 (two-gateway peer-add) handled separately by M2-10 (E2E).                                                  |
+| FED-M2-09 | done   | Integration tests covering MILESTONES.md M2 acceptance tests #1, #2, #3, #5, #7, #8 (single-gateway suite). Real Step-CA container; vitest profile gated by `FEDERATED_INTEGRATION=1`.           | #461  | sonnet | feat/federation-m2-integration     | M2-08            | 8K       | Shipped in PR #499. All 6 acceptance tests green; gated by FEDERATED_INTEGRATION=1.                                                                          |
-| FED-M2-10 | not-started | E2E test against deployed mos-test-1 + mos-test-2 (or local two-gateway docker-compose if Portainer not ready): MILESTONES test #6 `peer add` yields `active` peer record with valid cert + key. | #461  | sonnet | feat/federation-m2-e2e             | M2-08, DEPLOY-04 | 6K       | Falls back to local docker-compose-two-gateways if remote test hosts not yet available. Documents both paths.                             |
+| FED-M2-10 | done   | E2E test against deployed mos-test-1 + mos-test-2 (or local two-gateway docker-compose if Portainer not ready): MILESTONES test #6 `peer add` yields `active` peer record with valid cert + key. | #461  | sonnet | feat/federation-m2-e2e             | M2-08, DEPLOY-04 | 6K       | Shipped in PR #500. Local two-gateway docker-compose path used; `peer add` yields active peer with valid cert + sealed key.                                  |
-| FED-M2-11 | not-started | Independent security review (sonnet, not author of M2-04/05/06/07): focus on single-use token replay, sealing leak surfaces, OID match enforcement, scope schema bypass paths.                   | #461  | sonnet | feat/federation-m2-security-review | M2-10            | 8K       | Apply M1 two-round pattern. Reviewer should explicitly attempt enrollment-token replay, OID-spoofing CSR, and key leak in error messages. |
+| FED-M2-11 | done   | Independent security review (sonnet, not author of M2-04/05/06/07): focus on single-use token replay, sealing leak surfaces, OID match enforcement, scope schema bypass paths.                   | #461  | sonnet | feat/federation-m2-security-review | M2-10            | 8K       | Shipped in PR #501. Two-round review; enrollment-token replay, OID-spoofing CSR, and key leak in error messages all verified and hardened.                   |
-| FED-M2-12 | not-started | Docs update: `docs/federation/SETUP.md` Step-CA section; new `docs/federation/ADMIN-CLI.md` with grant/peer commands; scope schema reference; OID registration note. Runbook still M7-deferred.  | #461  | haiku  | feat/federation-m2-docs            | M2-11            | 4K       | Adds CA bootstrap section to SETUP.md with `docker compose --profile federated up step-ca` example.                                       |
+| FED-M2-12 | done   | Docs update: `docs/federation/SETUP.md` Step-CA section; new `docs/federation/ADMIN-CLI.md` with grant/peer commands; scope schema reference; OID registration note. Runbook still M7-deferred.  | #461  | haiku  | feat/federation-m2-docs            | M2-11            | 4K       | Shipped in PR #502. SETUP.md CA bootstrap section added; ADMIN-CLI.md created; scope schema reference and OID note included.                                 |
-| FED-M2-13 | not-started | PR aggregate close, CI green, merge to main, close #461. Release tag `fed-v0.2.0-m2`. Mark deploy stream complete. Update mission manifest M2 row.                                               | #461  | sonnet | feat/federation-m2-close           | M2-12            | 3K       | Same close pattern as M1-12; queue-guard before merge; tea release-create with notes including deploy-stream PRs.                         |
+| FED-M2-13 | done   | PR aggregate close, CI green, merge to main, close #461. Release tag `fed-v0.2.0-m2`. Mark deploy stream complete. Update mission manifest M2 row.                                               | #461  | sonnet | chore/federation-m2-close          | M2-12            | 3K       | Release tag `fed-v0.2.0-m2` created; issue #461 closed; all M2 PRs #494–#502 merged to main.                                                                 |
 **M2 code workstream estimate:** ~72K tokens (vs MILESTONES.md 30K — same over-budget pattern as M1, where per-task breakdown including tests/review/docs catches the real cost).
@@ -84,7 +85,38 @@ Goal: An admin can create a federation grant; counterparty enrolls; cert is sign
 ## Milestone 3 — mTLS handshake + list/get + scope enforcement (FED-M3)
-_Deferred. Issue #462._
+Goal: Two federated gateways exchange real data over mTLS. Inbound requests pass through cert validation → grant lookup → scope enforcement → native RBAC → response. `list`, `get`, and `capabilities` verbs land. The federation E2E harness (`tools/federation-harness/`) is the new permanent test bed for M3+ and is gated on every milestone going forward.
 > **Critical trust boundary.** Every 401/403 path needs a test. Code review is non-negotiable; M3-12 budgets two review rounds.
 >
 > **Tracking issue:** #462.
 | id        | status      | description                                                                                                                                                                                                                                                                                            | issue | agent  | branch                               | depends_on       | estimate | notes                                                                                                                                                    |
 | --------- | ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----- | ------ | ------------------------------------ | ---------------- | -------- | -------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | FED-M3-01 | not-started | `packages/types/src/federation/` — request/response DTOs for `list`, `get`, `capabilities` verbs. Wire-format zod schemas + inferred TS types. Includes `FederationRequest`, `FederationListResponse<T>`, `FederationGetResponse<T>`, `FederationCapabilitiesResponse`, error envelope, `_source` tag. | #462  | sonnet | feat/federation-m3-types             | —                | 4K       | Reusable from gateway server + client + harness. Pure types — no I/O, no NestJS.                                                                         |
 | FED-M3-02 | not-started | `tools/federation-harness/` scaffold: `docker-compose.two-gateways.yml` (Server A + Server B + step-CA), `seed.ts` (provisions grants, peers, sample tasks/notes/credentials per scope variant), `harness.ts` helper (boots stack, returns typed clients). README documents harness use.               | #462  | sonnet | feat/federation-m3-harness           | DEPLOY-04 (soft) | 8K       | Falls back to local docker-compose if `mos-test-1/-2` not yet redeployed (DEPLOY chain blocked on IMG-FIX). Permanent test infra used by M3+.            |
 | FED-M3-03 | not-started | `apps/gateway/src/federation/server/federation-auth.guard.ts` (NestJS guard). Validates inbound client cert from Fastify TLS context, extracts `grantId` + `subjectUserId` from custom OIDs, loads grant from DB, asserts `status='active'`, attaches `FederationContext` to request.                  | #462  | sonnet | feat/federation-m3-auth-guard        | M3-01            | 8K       | Reuses OID parsing logic mirrored from `ca.service.ts` post-issuance verification. 401 on malformed/missing OIDs; 403 on revoked/expired/missing grant.  |
 | FED-M3-04 | not-started | `apps/gateway/src/federation/server/scope.service.ts`. Pipeline: (1) resource allowlist + excluded check, (2) native RBAC eval as `subjectUserId`, (3) scope filter intersection (`include_teams`, `include_personal`), (4) `max_rows_per_query` cap. Pure service — DB calls injected.                | #462  | sonnet | feat/federation-m3-scope-service     | M3-01            | 10K      | Hardest correctness target in M3. Reuses `parseFederationScope` (M2-03). Returns either `{ allowed: true, filter }` or structured deny reason for audit. |
 | FED-M3-05 | not-started | `apps/gateway/src/federation/server/verbs/list.controller.ts`. Wires AuthGuard → ScopeService → tasks/notes/memory query layer; applies row cap; tags rows with `_source`. Resource selector via path param.                                                                                           | #462  | sonnet | feat/federation-m3-verb-list         | M3-03, M3-04     | 6K       | Routes: `POST /api/federation/v1/list/:resource`. No body persistence. Audit write deferred to M4.                                                       |
 | FED-M3-06 | not-started | `apps/gateway/src/federation/server/verbs/get.controller.ts`. Single-resource fetch by id; same pipeline as list. 404 on not-found, 403 on RBAC/scope deny — both audited the same way.                                                                                                                | #462  | sonnet | feat/federation-m3-verb-get          | M3-03, M3-04     | 6K       | `POST /api/federation/v1/get/:resource/:id`. Mirrors list controller patterns.                                                                           |
 | FED-M3-07 | not-started | `apps/gateway/src/federation/server/verbs/capabilities.controller.ts`. Read-only enumeration: returns `{ resources, excluded_resources, max_rows_per_query, supported_verbs }` derived from grant scope. Always allowed for an active grant — no RBAC eval.                                            | #462  | sonnet | feat/federation-m3-verb-capabilities | M3-03            | 4K       | `GET /api/federation/v1/capabilities`. Smallest verb; useful sanity check that mTLS + auth guard work end-to-end.                                        |
 | FED-M3-08 | not-started | `apps/gateway/src/federation/client/federation-client.service.ts`. Outbound mTLS dialer: picks `(certPem, sealed clientKey)` from `federation_peers`, unwraps key, builds undici Agent with mTLS, calls peer verb, parses typed response, wraps non-2xx into `FederationClientError`.                  | #462  | sonnet | feat/federation-m3-client            | M3-01            | 8K       | Independent of server stream — can land in parallel with M3-03/04. Cert/key cached per-peer; flushed by future M5/M6 logic.                              |
 | FED-M3-09 | not-started | `apps/gateway/src/federation/client/query-source.service.ts`. Accepts `source: "local" \| "federated:<host>" \| "all"` from gateway query layer; for `"all"` fans out to local + each peer in parallel; merges results; tags every row with `_source`.                                                 | #462  | sonnet | feat/federation-m3-query-source      | M3-08            | 8K       | Per-peer failure surfaces as `_partial: true` in response, not hard failure (sets up M5 offline UX). M5 adds caching + circuit breaker on top.           |
 | FED-M3-10 | not-started | Integration tests for MILESTONES.md M3 acceptance #6 (malformed OIDs → 401; valid cert + revoked grant → 403) and #7 (`max_rows_per_query` cap). Real PG, mocked TLS context (Fastify req shim).                                                                                                       | #462  | sonnet | feat/federation-m3-integration       | M3-05, M3-06     | 8K       | Vitest profile gated by `FEDERATED_INTEGRATION=1`. Single-gateway suite; no harness required.                                                            |
 | FED-M3-11 | not-started | E2E tests for MILESTONES.md M3 acceptance #1, #2, #3, #4, #5, #8, #9, #10 (8 cases). Uses harness from M3-02; two real gateways, real Step-CA, real mTLS. Each test asserts both happy-path response and audit/no-persist invariants.                                                                  | #462  | sonnet | feat/federation-m3-e2e               | M3-02, M3-09     | 12K      | Largest single task. Each acceptance gets its own `it(...)` for clear failure attribution.                                                               |
 | FED-M3-12 | not-started | Independent security review (sonnet, not author of M3-03/04/05/06/07/08/09): focus on cert-SAN spoofing, OID extraction edge cases, scope-bypass via filter manipulation, RBAC-bypass via subjectUser swap, response leakage when scope deny.                                                          | #462  | sonnet | feat/federation-m3-security-review   | M3-11            | 10K      | Two review rounds budgeted. PRD requires explicit test for every 401/403 path — review verifies coverage.                                                |
 | FED-M3-13 | not-started | Docs update: `docs/federation/SETUP.md` mTLS handshake section, new `docs/federation/HARNESS.md` for federation-harness usage, OID reference table in SETUP.md, scope enforcement pipeline diagram. Runbook still M7-deferred.                                                                         | #462  | haiku  | feat/federation-m3-docs              | M3-12            | 5K       | One ASCII diagram for the auth-guard → scope → RBAC pipeline; helps future reviewers reason about denial paths.                                          |
 | FED-M3-14 | not-started | PR aggregate close, CI green, merge to main, close #462. Release tag `fed-v0.3.0-m3`. Update mission manifest M3 row → done; M4 row → in-progress when work begins.                                                                                                                                    | #462  | sonnet | chore/federation-m3-close            | M3-13            | 3K       | Same close pattern as M1-12 / M2-13.                                                                                                                     |
 **M3 estimate:** ~100K tokens (vs MILESTONES.md 40K — same per-task breakdown pattern as M1/M2: tests, review, and docs split out from implementation cost). Largest milestone in the federation mission.
 **Parallelization opportunities:**
 - M3-08 (client) can land in parallel with M3-03/M3-04 (server pipeline) — they only share DTOs from M3-01.
 - M3-02 (harness) can land in parallel with everything except M3-11.
 - M3-05/M3-06/M3-07 (verbs) are independent of each other once M3-03/M3-04 land.
 **Test bed fallback:** If `mos-test-1.woltje.com` / `mos-test-2.woltje.com` are still blocked on `FED-M2-DEPLOY-IMG-FIX` when M3-11 is ready to run, the harness's local `docker-compose.two-gateways.yml` is a sufficient stand-in. Production-host validation moves to M7 acceptance suite (PRD AC-12).
 ## Milestone 4 — search + audit + rate limit (FED-M4)
--- a/docs/mission-control/BOARD.md
+++ b/docs/mission-control/BOARD.md
@@ -0,0 +1,101 @@
 # Mission Control Plane — Feature Board
 > Discussion board for the combined PRD / mission / Kanban workflow.
 > Use this to decide scope before implementation.
 ## Board Legend
 - **Must-have** — required for the first usable version
 - **Should-have** — strongly preferred, but can ship after the core path
 - **Could-have** — valuable later if time permits
 - **Won't-have** — explicitly deferred
 ---
 ## Feature Board
 | Feature Card                   | Need                                                          | Priority    | Decision / Notes                                                            |
 | ------------------------------ | ------------------------------------------------------------- | ----------- | --------------------------------------------------------------------------- |
 | Canonical mission manifest     | One durable root object for goal, PRD, board, session         | Must-have   | Mission manifest becomes the anchor for all downstream state                |
 | PRD generator integration      | PRD should be generated from a feature idea and saved in docs | Must-have   | Use Mosaic PRDy format and keep the file human-reviewable                   |
 | Board atomization              | Break PRD into assignable tasks with dependencies             | Must-have   | Each user story should map to one or more tasks                             |
 | Short-cycle detector           | Detect compaction churn and repeated tool loops               | Must-have   | Coordinator should track churn score per session                            |
 | Handoff packet                 | Preserve actionable context across rotations                  | Must-have   | Use a compact structured summary, not a raw transcript                      |
 | Auto-resume workers            | Let new sessions read mission + board on start                | Should-have | Makes overnight autonomy realistic                                          |
 | Mission status view            | Show current phase, blockers, and active session              | Should-have | Expose through CLI first, dashboard later                                   |
 | Worktree root convention       | Keep worktrees off `/tmp` and on the larger persistent drive  | Should-have | Prefer `/src/<repo>-worktrees` for repo worktrees and long-lived agent work |
 | Review gate                    | Prevent autonomous work from shipping unreviewed              | Should-have | Use reviewer tasks before mission close                                     |
 | Rotation policy config         | Configure thresholds per mission/profile                      | Could-have  | Keep v1 simple, add tuning later                                            |
 | Goal decomposition suggestions | Suggest sub-goals from the PRD                                | Could-have  | Good for planning, not necessary for core path                              |
 | Cross-channel continuity       | Continue a mission across CLI/gateway/remote channels         | Could-have  | Important later, not required for MVP                                       |
 | Automatic board sync           | Mirror git docs into DB and back                              | Could-have  | Nice-to-have after the file-first flow stabilizes                           |
 | Fully autonomous closeout      | Let mission finish without human intervention                 | Won't-have  | Keep an operator-visible review step                                        |
 ---
 ## Needs Discussion
 ### 1) Canonical source of truth
 **Question:** Should the PRD, mission manifest, and board all live in git, or should one be the database source of truth?
 **Proposed answer:** Keep the human-readable artifacts in git and sync the mission runtime state to the database.
 ### 2) Scope of automation
 **Question:** Should the first version auto-create the board from the PRD, or require a human/orchestrator to approve the split?
 **Proposed answer:** Auto-create a draft board, then let the orchestrator approve or adjust it.
 ### 3) Rotation triggers
 **Question:** What should trigger a forced session rotation?
 **Candidate signals:**
 - repeated compaction
 - repeated prompts for permission
 - identical tool loops
 - no new file/task state after several turns
 - task blocked on a missing prerequisite
 **Proposed answer:** Use a weighted churn score with a small hard cap on repeated compactions.
 ### 4) Handoff format
 **Question:** What should the next session receive?
 **Proposed answer:**
 - Mission ID
 - PRD path
 - Active board task
 - Completed work
 - Blockers
 - Next 3 actions
 - Non-negotiable constraints
 ### 5) Operator control
 **Question:** Should the operator be able to force a rotation or pause the mission?
 **Proposed answer:** Yes. Human override should win.
 ---
 ## Draft Decisions
 1. File-first artifacts, DB-backed runtime state.
 2. PRD-first planning, board-second execution.
 3. Auto-rotation on churn, but human override remains available.
 4. Structured handoff packets required on every rotation.
 5. Mission close requires a reviewer task.
 ---
 ## Open Questions
 - What exact data fields belong in the mission manifest?
 - Should rotation thresholds vary by agent profile?
 - What is the minimum viable status surface for v1?
 - Should the board support milestones in addition to tasks?
--- a/docs/mission-control/MISSION-MANIFEST.md
+++ b/docs/mission-control/MISSION-MANIFEST.md
@@ -0,0 +1,95 @@
 # Mission Manifest — Mosaic Mission Control Plane
 > Persistent document tracking scope, status, and handoff history for the combined PRD / mission / Kanban workflow.
 ## Mission
 **ID:** mission-control-plane-20260506
 **Statement:** Combine Mosaic PRDy, coord, and Kanban into one durable workflow so an agent can move from feature idea to PRD to mission to task board and keep working across session rotation, compaction, and restarts with minimal context loss.
 **Phase:** planning — MC-01 complete, MC-02 next
 **Current Milestone:** MC-02
 **Progress:** 1 / 6 milestones
 **Status:** active
 **Last Updated:** 2026-05-06
 **Parent Mission:** None — new mission
 ---
 ## Context
 This mission exists because overnight autonomy breaks when the working session short-cycles. The system needs durable artifacts and a mechanical coordinator that can:
 1. keep a canonical PRD,
 2. atomize the PRD into board tasks,
 3. track mission state separately from the chat session,
 4. detect churn or compaction pressure,
 5. rotate to a fresh session, and
 6. re-enter from a structured handoff.
 Operational convention: repo worktrees and long-lived working directories should use `/src/<repo>-worktrees` instead of `/tmp`.
 Design references:
 - `docs/mission-control/PRD.md` — product requirements
 - `docs/mission-control/BOARD.md` — feature discussion board
 - `docs/mission-control/TASKS.md` — atomized execution plan
 ---
 ## Success Criteria
 - [ ] AC-1: A feature idea can be converted into a PRD, mission, and task board.
 - [ ] AC-2: The coordinator can load a mission and its board from durable storage.
 - [ ] AC-3: The coordinator can detect short-cycling and rotate sessions automatically.
 - [ ] AC-4: A rotated session can resume from a handoff packet without manual re-prompting.
 - [ ] AC-5: The board remains traceable back to the PRD user stories.
 - [ ] AC-6: Operators can inspect mission state, task state, and latest handoff from one place.
 - [ ] AC-7: The system can run overnight without losing the mission goal.
 ---
 ## Milestones
 | #   | ID    | Name                                     | Status      | Branch                  | Started    | Completed |
 | --- | ----- | ---------------------------------------- | ----------- | ----------------------- | ---------- | --------- |
 | 1   | MC-01 | PRD + mission schema foundation          | in-progress | docs/mission-control-\* | 2026-05-06 | —         |
 | 2   | MC-02 | Mission runtime model                    | not-started | —                       | —          | —         |
 | 3   | MC-03 | Board atomization and task linkage       | not-started | —                       | —          | —         |
 | 4   | MC-04 | Short-cycle detector and rotation engine | not-started | —                       | —          | —         |
 | 5   | MC-05 | Handoff generation and re-entry          | not-started | —                       | —          | —         |
 | 6   | MC-06 | Operator surface and E2E validation      | not-started | —                       | —          | —         |
 ---
 ## Budget
 | Milestone | Est. tokens | Parallelizable?    |
 | --------- | ----------- | ------------------ |
 | MC-01     | 16K         | No                 |
 | MC-02     | 20K         | No                 |
 | MC-03     | 24K         | Mostly after MC-01 |
 | MC-04     | 20K         | After MC-02        |
 | MC-05     | 18K         | After MC-04        |
 | MC-06     | 26K         | After MC-04/05     |
 | **Total** | **~124K**   |                    |
 ---
 ## Session History
 | Session | Date       | Runtime | Outcome                                                                  |
 | ------- | ---------- | ------- | ------------------------------------------------------------------------ |
 | S1      | 2026-05-06 | hermes  | PRD, board, task plan, mission manifest, and worktree convention drafted |
 ---
 ## Next Step
 Kick off MC-02: implement the durable mission runtime model and wire the mission state into the coordinator.
--- a/docs/mission-control/PRD.md
+++ b/docs/mission-control/PRD.md
@@ -0,0 +1,205 @@
 # PRD: Mosaic Mission Control Plane
 ## Metadata
 - **Owner:** Jason Woltje
 - **Date:** 2026-05-06
 - **Status:** draft
 - **Framework:** Mosaic PRDy + coord + Kanban
 - **Target Repo:** `git.mosaicstack.dev/mosaic/mosaic-stack`
 - **Primary Modules:** `packages/prdy`, `packages/coord`, `packages/queue`, `apps/gateway`, `packages/brain`, `packages/cli`
 ---
 ## Problem Statement
 Mosaic already has the ingredients for durable agent work: PRD generation (`prdy`), mission coordination (`coord`), and task execution boards (`Kanban` / `TASKS.md`). Today those systems can still drift apart:
 - A PRD can exist without a mission record.
 - A mission can exist without a machine-readable execution board.
 - Agents can short-cycle or compact repeatedly without a durable handoff.
 - The next session may know the goal, but not the exact next step.
 The result is brittle overnight autonomy: work continues only as long as a single session remains healthy.
 This feature unifies those layers into one durable workflow so a mission can survive session rotation, compaction, and restarts with minimal state loss.
 ---
 ## Goals
 1. Create one canonical pipeline from idea → PRD → mission → board → execution.
 2. Let `prdy` generate a PRD that is immediately usable as a mission input.
 3. Let `coord` own mission state, handoffs, and session rotation.
 4. Let the board hold atomized tasks with dependencies and assignees.
 5. Let agents read the mission and board to learn the next action without extra prompting.
 6. Detect short-cycling and rotate sessions before quality degrades.
 7. Preserve useful context across handoffs with a structured summary packet.
 8. Give operators a single place to see mission status, task state, and the current session.
 ---
 ## Non-Goals
 1. Replacing the Mosaic agent runtime or gateway architecture.
 2. Rewriting `prdy` or `coord` from scratch.
 3. Turning the board into a general project-management system.
 4. Building a full Gantt/charting product.
 5. Removing human review or approval gates.
 6. Allowing agents to create arbitrary mission state without schema.
 ---
 ## User Stories
 ### US-001: Create a mission from a feature idea
 **Description:** As an orchestrator, I want to turn a feature idea into a PRD and mission so that agents can work from a durable spec instead of a chat transcript.
 **Acceptance Criteria:**
 - [ ] `prdy` can emit a PRD with goals, non-goals, and requirements.
 - [ ] The PRD is linked to a mission ID.
 - [ ] The mission manifest references the PRD path.
 - [ ] The mission is readable by downstream agent sessions.
 ### US-002: Atomize work into a board
 **Description:** As an orchestrator, I want to split a PRD into board tasks so that work can be assigned to specialists.
 **Acceptance Criteria:**
 - [ ] Each user story can become one or more tasks.
 - [ ] Tasks have assignees, dependencies, and estimates.
 - [ ] Tasks are machine-readable and durable.
 - [ ] The board can be regenerated from the PRD without ambiguity.
 ### US-003: Rotate sessions without losing the mission
 **Description:** As a coordinator, I want to restart or rotate a session when it short-cycles so that the mission continues with minimal loss.
 **Acceptance Criteria:**
 - [ ] The coordinator detects compaction pressure or repeated loops.
 - [ ] The coordinator writes a handoff summary before rotation.
 - [ ] A new session can resume from the handoff packet.
 - [ ] The mission state remains intact across the rotation.
 ### US-004: Let workers read the next step automatically
 **Description:** As a worker agent, I want to read the mission and board at startup so I can do the next useful thing without waiting for a human prompt.
 **Acceptance Criteria:**
 - [ ] Startup loads the active mission manifest.
 - [ ] Startup loads the current board/task row.
 - [ ] Startup exposes the next action clearly in the prompt.
 - [ ] The agent can continue after compaction using the same mission context.
 ### US-005: Observe mission health from one place
 **Description:** As an operator, I want a single view of mission health so that I can see progress, blocked tasks, and session churn.
 **Acceptance Criteria:**
 - [ ] Mission state shows current phase and progress.
 - [ ] Board state shows task status by assignee.
 - [ ] Short-cycle/rotation events are visible.
 - [ ] Handoffs are inspectable.
 ---
 ## Functional Requirements
 FR-1. The system must represent a mission as a durable object with an ID, goal, current phase, PRD path, board path, and active session ID.
 FR-2. The system must represent a PRD as a markdown document with goals, user stories, functional requirements, non-goals, technical considerations, and success metrics.
 FR-3. The system must represent execution work as a board of atomized tasks with status, assignee, dependency, and estimate fields.
 FR-4. The coordinator must be able to derive a task board from a PRD.
 FR-5. The coordinator must be able to write a handoff packet that includes goal, current state, completed work, blocked work, next steps, and constraints.
 FR-6. The coordinator must detect short-cycling signals such as repeated compactions, repeated tool loops, repeated approval prompts, or no progress across several turns.
 FR-7. The coordinator must rotate the session when the short-cycle threshold is exceeded.
 FR-8. The coordinator must preserve mission continuity across session rotation.
 FR-9. The worker session must read the mission state and board state at startup.
 FR-10. The worker session must be able to resume from the last handoff summary without the operator rewriting the goal manually.
 FR-11. The operator must be able to inspect the mission state, PRD, board, and latest handoff from one place.
 FR-12. The mission system must keep a traceable link between PRD requirements and board tasks.
 FR-13. The system must not allow a task to become active without a valid mission context.
 FR-14. The system must keep durable history for rotation and handoff events.
 ---
 ## Board Discussion: Features and Needs
 This is the feature discussion board that should drive the mission design.
 | Card                     | Need                                         | Why it matters                               | Proposed decision                                            |
 | ------------------------ | -------------------------------------------- | -------------------------------------------- | ------------------------------------------------------------ |
 | Canonical mission record | One source of truth for goal/state           | Prevents drift between chat, docs, and queue | Make mission manifest the durable root object                |
 | PRD → board derivation   | Break feature ideas into executable work     | Lets the plan be assigned and tracked        | Keep PRD as the spec, generate board tasks from user stories |
 | Session watchdog         | Detect churn/short-cycling                   | Keeps overnight runs productive              | Add short-cycle scoring and forced rotation                  |
 | Structured handoff       | Preserve context across session changes      | Minimizes restart loss                       | Use a compact JSON/MD handoff packet                         |
 | Worker auto-read         | Let agents resume without human re-prompting | Reduces operator overhead                    | Load mission + board on session start                        |
 | Status surface           | Show progress and blockers clearly           | Operators need confidence                    | Expose mission state via CLI and dashboard                   |
 | Review gate              | Keep quality high on autonomous work         | Prevents silent regressions                  | Require review tasks before close                            |
 | Recoverability           | Resume after failure or restart              | Mission should outlive a process             | Persist session and handoff history                          |
 ---
 ## Design Considerations
 1. The PRD should stay human-readable markdown, because the board and mission references need to be reviewable in git.
 2. The board should be machine-readable enough for automation but still readable by humans.
 3. The mission manifest should point to the PRD and board, not duplicate them.
 4. Handoff packets should be compact and structured so they can be injected into a new session with minimal token cost.
 5. The coordinator should prefer rotation over forced context growth once the session is near the compaction threshold.
 6. Existing Mosaic commands should be extended, not replaced, wherever possible.
 7. The same mission should be resumable across CLI, gateway, and remote channels.
 ---
 ## Technical Considerations
 - Likely storage split:
  - PRD/board/manifest in git-backed docs
  - mission/session state in the Mosaic data layer
  - runtime health in queue/session state
 - Worktrees and long-lived agent working directories should live under `/src/<repo>-worktrees` rather than `/tmp` so they sit on the larger persistent drive and survive longer-running missions.
 - The coordinator needs a stable session identity, even if the active session changes.
 - Task dependencies must be enforced so workers do not start early.
 - The handoff packet should include the top 3 immediate actions and the strongest constraints.
 - Rotation triggers should be configurable per profile or per mission.
 - The initial version can be file-first, with dashboard sync added later.
 ---
 ## Success Metrics
 - A mission can rotate sessions without losing the active goal.
 - A new session can resume from the latest handoff in under one turn.
 - Board tasks remain aligned to PRD user stories.
 - Short-cycling sessions are replaced before repeated compaction harms quality.
 - Operators can find mission state without spelunking across multiple chat logs.
 ---
 ## Open Questions
 1. What should the canonical mission ID format be?
 2. Should the board live only in git, or also in the database?
 3. Should rotation be automatic by default, or opt-in per mission?
 4. What should the short-cycle threshold be initially?
 5. Should handoffs be pure text, structured JSON, or both?
 6. Which CLI command should be the primary mission entrypoint: `mosaic mission`, `mosaic coord`, or `mosaic prdy`?
--- a/docs/mission-control/TASKS.md
+++ b/docs/mission-control/TASKS.md
@@ -0,0 +1,113 @@
 # Tasks — Mosaic Mission Control Plane
 > Single-writer: orchestrator only. Workers read but never modify.
 >
 > **Mission:** mission-control-plane-20260506
 > **Schema:** `| id | status | description | issue | agent | branch | depends_on | estimate | notes |`
 > **Status values:** `not-started` | `in-progress` | `done` | `blocked` | `failed` | `needs-qa`
 > **Agent values:** `codex` | `glm-5.1` | `haiku` | `sonnet` | `opus` | `—` (auto)
 >
 > Scope: this file decomposes the combined PRD / mission / board workflow into atomized tasks.
 ---
 ## Milestone 1 — PRD + mission schema foundation
 Goal: create the durable doc structure and the minimal mission metadata needed to keep PRD, board, and mission aligned.
 | id       | status      | description                                                                                              | issue | agent  | branch                        | depends_on         | estimate | notes                                       |
 | -------- | ----------- | -------------------------------------------------------------------------------------------------------- | ----- | ------ | ----------------------------- | ------------------ | -------- | ------------------------------------------- |
 | MC-01-01 | not-started | Write `docs/mission-control/PRD.md` with goals, non-goals, functional requirements, and success metrics. | —     | sonnet | docs/mission-control-prd      | —                  | 5K       | Human-readable PRD becomes the spec anchor. |
 | MC-01-02 | not-started | Write `docs/mission-control/BOARD.md` as a decision board for scope, priority, and open questions.       | —     | haiku  | docs/mission-control-board    | MC-01-01           | 3K       | Keeps discussion separate from the spec.    |
 | MC-01-03 | not-started | Write `docs/mission-control/MISSION-MANIFEST.md` linking PRD, board, tasks, and mission identity.        | —     | sonnet | docs/mission-control-manifest | MC-01-01, MC-01-02 | 4K       | Durable mission root object.                |
 | MC-01-04 | not-started | Write `docs/mission-control/TASKS.md` with the atomized execution plan and dependency graph.             | —     | sonnet | docs/mission-control-tasks    | MC-01-03           | 4K       | Board-backed execution plan.                |
 **Milestone 1 estimate:** ~16K tokens
 ---
 ## Milestone 2 — Mission runtime model
 Goal: make missions first-class runtime objects that can survive session restarts and compaction.
 | id       | status      | description                                                                                                                               | issue                                                                                                                                  | agent | branch                                 | depends_on                         | estimate | notes                                      |
 | -------- | ----------- | ----------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------- | ----- | -------------------------------------- | ---------------------------------- | -------- | ------------------------------------------ | ---------------------------------------------------- |
 | MC-02-01 | not-started | Define mission schema in the data layer: mission ID, goal, phase, PRD path, board path, active session ID, last handoff, and churn score. | —                                                                                                                                      | codex | feat/mission-control-schema            | MC-01-03                           | 6K       | This is the durable root state.            |
 | MC-02-02 | not-started | Add mission read/write services to `packages/coord` so the coordinator can load and persist mission state.                                | —                                                                                                                                      | codex | feat/mission-control-coord-store       | MC-02-01                           | 6K       | Keep storage simple and explicit.          |
 | MC-02-03 | not-started | Add mission status reporting to `mosaic mission` and `mosaic coord status`.                                                               | —                                                                                                                                      | codex | feat/mission-control-status-cli        | MC-02-02                           | 4K       | Operators need one obvious status command. |
 | MC-02-04 | not-started | Add tests for mission persistence and recovery after restart.                                                                             | —                                                                                                                                      | haiku | feat/mission-control-persistence-tests | MC-02-02                           | 4K       | Verify mission survives process churn.     |
 |          | MC-02-05    | done                                                                                                                                      | Add a worktree-root convention to the mission runtime notes and startup guidance so agents prefer `/src/<repo>-worktrees` over `/tmp`. | —     | haiku                                  | docs/mission-control-worktree-root | MC-01-03 | 3K                                         | Keep long-lived work on the larger persistent drive. |
 **Milestone 2 estimate:** ~20K tokens
 ---
 ## Milestone 3 — Board atomization and task linkage
 Goal: derive assignable tasks from the PRD and keep them linked to mission state.
 | id       | status      | description                                                                                 | issue | agent  | branch                           | depends_on         | estimate | notes                                       |
 | -------- | ----------- | ------------------------------------------------------------------------------------------- | ----- | ------ | -------------------------------- | ------------------ | -------- | ------------------------------------------- |
 | MC-03-01 | not-started | Add a PRD-to-task decomposition rule set: every user story maps to one or more board tasks. | —     | sonnet | feat/mission-control-decompose   | MC-01-01           | 5K       | Start simple and deterministic.             |
 | MC-03-02 | not-started | Implement board generation from the PRD in a machine-readable format.                       | —     | codex  | feat/mission-control-board-gen   | MC-03-01           | 6K       | Output should be usable by the coordinator. |
 | MC-03-03 | not-started | Add dependency validation so tasks cannot start before parent tasks complete.               | —     | codex  | feat/mission-control-deps        | MC-03-02           | 5K       | Enforces ordering.                          |
 | MC-03-04 | not-started | Add review-task support so a mission cannot close without a reviewer step.                  | —     | sonnet | feat/mission-control-review-gate | MC-03-03           | 4K       | Preserves quality.                          |
 | MC-03-05 | not-started | Add tests proving the board stays traceable back to the PRD user stories.                   | —     | haiku  | feat/mission-control-trace-tests | MC-03-02, MC-03-03 | 4K       | Traceability is the point.                  |
 **Milestone 3 estimate:** ~24K tokens
 ---
 ## Milestone 4 — Short-cycle detector and rotation engine
 Goal: detect when a session is stuck and rotate to a fresh session before quality falls off.
 | id       | status      | description                                                                                                                         | issue | agent  | branch                              | depends_on | estimate | notes                                          |
 | -------- | ----------- | ----------------------------------------------------------------------------------------------------------------------------------- | ----- | ------ | ----------------------------------- | ---------- | -------- | ---------------------------------------------- |
 | MC-04-01 | not-started | Define churn signals: repeated compaction, identical tool loops, repeated permission prompts, and no progress across several turns. | —     | sonnet | feat/mission-control-churn-signals  | MC-02-01   | 4K       | Keep the rules explicit.                       |
 | MC-04-02 | not-started | Implement churn scoring in the coordinator with configurable thresholds.                                                            | —     | codex  | feat/mission-control-churn-score    | MC-04-01   | 6K       | Weighted score makes tuning easier.            |
 | MC-04-03 | not-started | Implement automatic session rotation when churn crosses the threshold.                                                              | —     | codex  | feat/mission-control-rotate-session | MC-04-02   | 6K       | The session is disposable; the mission is not. |
 | MC-04-04 | not-started | Add tests for rotation triggers and for avoiding premature rotation.                                                                | —     | haiku  | feat/mission-control-rotation-tests | MC-04-03   | 4K       | Prevent flapping.                              |
 **Milestone 4 estimate:** ~20K tokens
 ---
 ## Milestone 5 — Handoff generation and re-entry
 Goal: preserve the best context from the old session and inject it into the new session cleanly.
 | id       | status      | description                                                                                                          | issue | agent  | branch                              | depends_on         | estimate | notes                                    |
 | -------- | ----------- | -------------------------------------------------------------------------------------------------------------------- | ----- | ------ | ----------------------------------- | ------------------ | -------- | ---------------------------------------- |
 | MC-05-01 | not-started | Define the handoff packet schema: mission ID, session ID, completed work, blockers, next 3 actions, and constraints. | —     | sonnet | feat/mission-control-handoff-schema | MC-02-01           | 4K       | Keep it compact and structured.          |
 | MC-05-02 | not-started | Implement handoff packet writing during rotation.                                                                    | —     | codex  | feat/mission-control-handoff-write  | MC-05-01, MC-04-03 | 5K       | Persist before the old session exits.    |
 | MC-05-03 | not-started | Implement handoff packet loading at session startup.                                                                 | —     | codex  | feat/mission-control-handoff-load   | MC-05-01, MC-04-03 | 5K       | New session should know the next action. |
 | MC-05-04 | not-started | Add tests proving a rotated session can continue the mission without manual re-prompting.                            | —     | haiku  | feat/mission-control-handoff-tests  | MC-05-02, MC-05-03 | 4K       | Resume quality is the key metric.        |
 **Milestone 5 estimate:** ~18K tokens
 ---
 ## Milestone 6 — Operator surface and E2E validation
 Goal: expose the whole workflow through commands and verify it end-to-end.
 | id       | status      | description                                                                                               | issue | agent  | branch                           | depends_on         | estimate | notes                                        |
 | -------- | ----------- | --------------------------------------------------------------------------------------------------------- | ----- | ------ | -------------------------------- | ------------------ | -------- | -------------------------------------------- |
 | MC-06-01 | not-started | Add a CLI command to inspect the active mission, PRD path, board path, task statuses, and latest handoff. | —     | codex  | feat/mission-control-inspect-cli | MC-02-03, MC-05-03 | 5K       | One place to inspect the whole stack.        |
 | MC-06-02 | not-started | Add a compact dashboard or TUI summary view for mission health.                                           | —     | codex  | feat/mission-control-summary-ui  | MC-06-01           | 6K       | Nice to have, but not before the core works. |
 | MC-06-03 | not-started | Build an E2E harness that simulates compaction / rotation and verifies the mission can continue.          | —     | sonnet | feat/mission-control-e2e-harness | MC-04-03, MC-05-03 | 8K       | This is the proof that the design works.     |
 | MC-06-04 | not-started | Add final docs for operators explaining how PRD, mission, and board fit together.                         | —     | haiku  | feat/mission-control-ops-docs    | MC-06-03           | 4K       | Make it usable by humans.                    |
 | MC-06-05 | not-started | Consolidate review findings and close the mission with a release note.                                    | —     | sonnet | chore/mission-control-close      | MC-06-04           | 3K       | Only after the E2E passes.                   |
 **Milestone 6 estimate:** ~26K tokens
 ---
 ## Execution Notes
 - `sonnet` is best for planning, decomposition, and the review-gate tasks.
 - `codex` is best for schema, coordinator, and CLI implementation.
 - `haiku` is best for validation, traceability checks, and docs.
 - The first implementation pass should stay file-first and keep the runtime state thin.
 - The mission should not close until the PRD, board, mission manifest, and E2E harness all agree.
--- a/docs/plans/2026-05-06-hermes-mosaic-alignment.md
+++ b/docs/plans/2026-05-06-hermes-mosaic-alignment.md
@@ -0,0 +1,238 @@
 # Hermes-Mosaic Alignment Plan
 > **For Hermes:** Use subagent-driven-development skill to implement this plan task-by-task.
 **Goal:** Package Mosaic's mechanical coordination primitives as a native Hermes toolset so any Hermes profile gets mission management, task decomposition, handoff, and session continuity without depending on the Mosaic gateway or OpenClaw runtime.
 **Architecture:** Extract the coordination logic from Mosaic's `packages/coord` (TypeScript, file-first) into a Hermes Python toolset that wraps the same file conventions. The Mosaic Stack repo remains the canonical upstream for the file formats (TASKS.md schema, mission.json schema, handoff packet schema). Hermes implements native Python tools that read/write those same files, plus tool-calls for churn detection and handoff generation that have no Mosaic equivalent today.
 **Tech Stack:** Python (Hermes toolset), SQLite (Hermes Kanban), JSON + Markdown (Mosaic file conventions)
 ---
 ## Alignment Map
 ### What Mosaic has that Hermes needs
 | Mosaic Component                 | What it does                                              | Natural Hermes home                          | Why                                                                                                                                                                              |
 | -------------------------------- | --------------------------------------------------------- | -------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `packages/coord` (mission.ts)    | Mission CRUD, session tracking, milestone state           | **Hermes toolset: `mission`**                | Mission state is session-scoped, not gateway-scoped. Hermes sessions already have identity, process tracking, and context windows.                                               |
 | `packages/coord` (tasks-file.ts) | Parse/write TASKS.md tables                               | **Hermes toolset: `mission`** (same)         | Hermes already reads/writes files. The TASKS.md parser is ~300 lines of pure string manipulation — trivial Python port.                                                          |
 | `packages/coord` (runner.ts)     | Spawn claude/codex workers with continuation prompts      | **Already covered by `delegate_task`**       | Hermes delegate_task already does isolated subagent spawning with restricted toolsets. The runner's "find next task and build continuation prompt" logic moves into a tool-call. |
 | `packages/coord` (status.ts)     | Mission health, task progress, next task                  | **Hermes toolset: `mission`** (same)         | Status readout fits naturally as a tool-call. No gateway needed.                                                                                                                 |
 | `packages/prdy`                  | PRD generation wizard                                     | **Hermes skill: `prdy`**                     | PRD generation is a prompt + template problem, not infrastructure. A Hermes skill with templates is the right fit.                                                               |
 | `plugins/mosaic-framework`       | before_agent_start + subagent_spawning hooks              | **Hermes system prompt injection**           | Hermes already injects system context via skills and config. The framework preamble and worktree rules become standard Hermes skills loaded by the orchestrator profile.         |
 | `plugins/macp`                   | OpenClaw ACP bridge (spawn codex/claude)                  | **Already covered by `delegate_task` + ACP** | Hermes already has ACP support and delegate_task. The MACP bridge is redundant when running natively in Hermes.                                                                  |
 | Churn detection (planned)        | Detect compaction loops, repeated tool calls, no progress | **Hermes middleware**                        | This needs to live inside Hermes's turn loop where it can observe tool-call patterns. Mosaic can't see this from outside.                                                        |
 | Handoff packet (planned)         | Structured context summary for session rotation           | **Hermes toolset: `mission`**                | Handoff is a serialization of mission + session state. Hermes owns the session, so it should own the handoff.                                                                    |
 ### What Hermes already has that replaces Mosaic infrastructure
 | Mosaic concept       | Hermes equivalent                     | Notes                                                                                                    |
 | -------------------- | ------------------------------------- | -------------------------------------------------------------------------------------------------------- |
 | Gateway (NestJS)     | Hermes gateway                        | Hermes already has a gateway with WebSocket, Discord, Telegram, CLI. No need for a second one.           |
 | Pi SDK agent runtime | Hermes agent loop                     | Hermes IS the agent runtime. OpenClaw's Pi SDK is a different runtime that Mosaic targets.               |
 | MACP ACP bridge      | `delegate_task` + ACP tools           | Same capability, already native.                                                                         |
 | Session identity     | Hermes session IDs + process_registry | Hermes already tracks session identity, PIDs, and background processes.                                  |
 | Task execution board | Hermes Kanban                         | Fully functional SQLite-backed Kanban with dispatcher, triage, events, comments.                         |
 | Worker spawning      | Hermes dispatcher + cron              | Kanban dispatcher + cron already handle this.                                                            |
 | Context injection    | Hermes skills + system prompt         | Skills are loaded at session start and injected into context. Exactly what mosaic-framework plugin does. |
 | File checkpoints     | Hermes checkpoint_manager             | Already tracks file mutations with shadow git.                                                           |
 ### What Mosaic keeps as its own entity
 | Component             | Why it stays in Mosaic                              |
 | --------------------- | --------------------------------------------------- |
 | `apps/gateway`        | NestJS API surface — Mosaic's web platform offering |
 | `apps/web`            | Next.js dashboard — Mosaic's UI offering            |
 | `packages/types`      | Shared TS contracts for Mosaic gateway plugins      |
 | `packages/db`         | Drizzle ORM + PG — Mosaic's data layer              |
 | `packages/auth`       | BetterAuth — Mosaic's auth system                   |
 | `packages/brain`      | PG-backed data layer for Mosaic web app             |
 | `packages/queue`      | Valkey task queue for Mosaic gateway                |
 | `plugins/discord`     | OpenClaw Discord plugin                             |
 | `plugins/telegram`    | OpenClaw Telegram plugin                            |
 | `packages/mosaic` CLI | The `mosaic` CLI — Mosaic's own command surface     |
 ---
 ## Architecture: `mission` Toolset for Hermes
 ### New files under `/opt/hermes/tools/`
 ```
 mission_tools.py      — Tool-call surface (mission_create, mission_status,
                         mission_next_task, mission_update_task, mission_handoff,
                         mission_resume)
 mission_state.py       — State management (read/write mission.json, parse TASKS.md,
                         parse MISSION-MANIFEST.md)
 mission_churn.py       — Churn detection (tool-loop counter, compaction counter,
                         progress scorer)
 mission_handoff.py     — Handoff packet generation and loading
 ```
 ### Tool-calls exposed to the agent
 | Tool                  | What it does                                                                      | When the agent calls it                     |
 | --------------------- | --------------------------------------------------------------------------------- | ------------------------------------------- |
 | `mission_create`      | Initialize mission.json + TASKS.md + MISSION-MANIFEST.md in a project dir         | When starting a new mission                 |
 | `mission_status`      | Read current mission state, milestone progress, next task, active session         | At session start, or when checking progress |
 | `mission_next_task`   | Find the next `not-started` task whose dependencies are met, return its full spec | When the agent needs work to do             |
 | `mission_update_task` | Update a task row status in TASKS.md                                              | When completing or blocking a task          |
 | `mission_handoff`     | Generate a handoff packet from current session context + mission state            | Before session rotation or at session end   |
 | `mission_resume`      | Load a handoff packet and inject it as context for the new session                | At session start after rotation             |
 ### Toolset registration
 The `mission` toolset follows the same pattern as `kanban`:
 1. **Gating**: Tools are available when:
   - The profile has `mission` in its toolsets config, OR
   - A `HERMES_MISSION_DIR` env var is set (cron/dispatcher spawned workers)
 2. **File conventions**: The toolset reads/writes the same file formats as Mosaic `packages/coord`:
   - `.mosaic/orchestrator/mission.json` — mission state
   - `docs/TASKS.md` — task table
   - `docs/MISSION-MANIFEST.md` — mission manifest
   - `docs/scratchpads/<id>.md` — session scratchpad
 3. **Kanban bridge**: Optional bidirectional sync between mission TASKS.md rows and Kanban task cards, so the dashboard sees mission tasks.
 ### Churn detection (middleware)
 Churn detection lives in Hermes's turn loop, NOT as a tool-call. It observes:
 - Repeated compaction events (context window pressure)
 - Identical tool-call sequences (loop detection)
 - No file state changes across N turns
 - Repeated permission denials
 When churn score exceeds threshold:
 1. `mission_handoff` is called automatically
 2. Session is rotated (fresh context window)
 3. `mission_resume` is called in the new session
 This is new infrastructure that only Hermes can provide (Mosaic runs outside the agent loop).
 ---
 ## Implementation Tasks
 ### Phase 1: Core state management (Python port of coord)
 | Task                                               | Files                         | Estimate |
 | -------------------------------------------------- | ----------------------------- | -------- |
 | 1.1 Port mission.json read/write to Python         | `mission_state.py`            | 2h       |
 | 1.2 Port TASKS.md parser to Python                 | `mission_state.py`            | 2h       |
 | 1.3 Port MISSION-MANIFEST.md reader to Python      | `mission_state.py`            | 1h       |
 | 1.4 Implement `mission_create` tool-call           | `mission_tools.py`            | 1h       |
 | 1.5 Implement `mission_status` tool-call           | `mission_tools.py`            | 1h       |
 | 1.6 Implement `mission_next_task` tool-call        | `mission_tools.py`            | 1h       |
 | 1.7 Implement `mission_update_task` tool-call      | `mission_tools.py`            | 1h       |
 | 1.8 Register `mission` toolset in Hermes registry  | `tools/registry.py`           | 30m      |
 | 1.9 Add `mission` to orchestrator profile toolsets | `config.yaml`                 | 10m      |
 | 1.10 Write unit tests for mission_state            | `tests/test_mission_state.py` | 2h       |
 | 1.11 Write unit tests for TASKS.md parser          | `tests/test_tasks_parser.py`  | 1h       |
 **Phase 1 estimate:** ~13h
 ### Phase 2: Handoff and session continuity
 | Task                                              | Files                                    | Estimate |
 | ------------------------------------------------- | ---------------------------------------- | -------- |
 | 2.1 Define handoff packet schema (JSON)           | `mission_handoff.py`                     | 1h       |
 | 2.2 Implement `mission_handoff` tool-call         | `mission_handoff.py`, `mission_tools.py` | 2h       |
 | 2.3 Implement `mission_resume` tool-call          | `mission_handoff.py`, `mission_tools.py` | 2h       |
 | 2.4 Wire handoff into session start (auto-resume) | agent loop hook                          | 2h       |
 | 2.5 Write tests for handoff round-trip            | `tests/test_mission_handoff.py`          | 1h       |
 **Phase 2 estimate:** ~8h
 ### Phase 3: Churn detection
 | Task                                                           | Files                         | Estimate |
 | -------------------------------------------------------------- | ----------------------------- | -------- |
 | 3.1 Define churn signal weights and thresholds                 | `mission_churn.py`            | 1h       |
 | 3.2 Implement tool-loop detector (consecutive identical calls) | `mission_churn.py`            | 2h       |
 | 3.3 Implement compaction pressure detector                     | `mission_churn.py`            | 1h       |
 | 3.4 Implement progress scorer (file state delta)               | `mission_churn.py`            | 2h       |
 | 3.5 Wire churn scoring into agent turn loop                    | agent loop middleware         | 2h       |
 | 3.6 Implement auto-rotation trigger                            | agent loop + handoff          | 2h       |
 | 3.7 Write tests for churn scoring                              | `tests/test_mission_churn.py` | 1h       |
 **Phase 3 estimate:** ~11h
 ### Phase 4: Kanban bridge + CLI surface
 | Task                                                 | Files                    | Estimate |
 | ---------------------------------------------------- | ------------------------ | -------- |
 | 4.1 Implement TASKS.md → Kanban sync (one-way first) | `mission_kanban_sync.py` | 2h       |
 | 4.2 Add `hermes mission` CLI subcommand              | `mission_cli.py`         | 2h       |
 | 4.3 Add `hermes mission status` command              | `mission_cli.py`         | 1h       |
 | 4.4 Add `hermes mission init` command                | `mission_cli.py`         | 1h       |
 | 4.5 Add `hermes mission handoff` command             | `mission_cli.py`         | 1h       |
 | 4.6 Add `hermes mission resume` command              | `mission_cli.py`         | 1h       |
 **Phase 4 estimate:** ~8h
 ---
 ## File Format Compatibility
 The Python implementation MUST read and write the exact same file formats as Mosaic's TypeScript `packages/coord`. This means:
 1. **mission.json** schema is identical to `Mission` type in `packages/coord/src/types.ts`
 2. **TASKS.md** table format is identical to what `packages/coord/src/tasks-file.ts` parses
 3. **MISSION-MANIFEST.md** is free-form markdown (no parser needed — just read the file)
 4. **Handoff packets** are a new JSON format defined in this toolset (Mosaic doesn't have them yet)
 This way a project can use Hermes mission tools OR Mosaic `mosaic coord` commands interchangeably. The files are the contract.
 ---
 ## Relationship Diagram
 ```
 Mosaic Stack (TypeScript)          Hermes Agent (Python)
 ┌─────────────────────────┐        ┌─────────────────────────┐
 │  packages/coord          │        │  tools/mission_tools.py  │
 │  ├─ mission.ts           │◄──────►│  ├─ mission_state.py     │
 │  ├─ tasks-file.ts        │  same  │  ├─ mission_handoff.py   │
 │  ├─ status.ts            │  files │  ├─ mission_churn.py     │
 │  └─ runner.ts            │        │  └─ mission_tools.py     │
 │                          │        │                          │
 │  packages/prdy           │        │  skills/prdy/            │
 │  └─ templates, wizard    │◄──────►│  └─ SKILL.md + templates │
 │                          │        │                          │
 │  plugins/mosaic-framework│        │  skills/ (existing)      │
 │  └─ context injection    │◄──────►│  └─ kanban-orchestrator  │
 │                          │        │     + mosaic-coding-*     │
 │  plugins/macp            │        │  tools/delegate_task.py  │
 │  └─ ACP bridge           │◄──────►│  └─ already covers this  │
 │                          │        │                          │
 │  (stays in Mosaic)       │        │  tools/kanban_tools.py   │
 │  apps/gateway            │        │  └─ Hermes Kanban DB     │
 │  apps/web                │        │                          │
 │  packages/db             │        │  tools/cronjob_tools.py  │
 │  packages/queue          │        │  └─ already covers cron  │
 └─────────────────────────┘        └─────────────────────────┘
 ```
 ---
 ## Open Questions
 1. **Should the `mission` toolset ship with Hermes core, or as a plugin?**
   - Recommendation: ship as a **built-in toolset** (like `kanban`) since mission coordination is a core agent capability, not an optional integration. The file formats are stable and the code is small.
 2. **Should churn detection be per-profile configurable?**
   - Recommendation: yes. Add `mission.churn_threshold` and `mission.churn_weights` to profile config.yaml. Default threshold = 5 consecutive no-progress turns.
 3. **Should handoff packets live in the project dir or in Hermes home?**
   - Recommendation: **project dir** (`.mosaic/handoffs/<session-id>.json`). This keeps them version-controlled and accessible regardless of which agent runtime picks up the project.
 4. **Bidirectional Kanban sync?**
   - Recommendation: **one-way first** (TASKS.md → Kanban). Bidirectional adds conflict resolution complexity. Ship one-way, add reverse sync in v2 if needed.
 5. **PRD generation — skill or tool-call?**
   - Recommendation: **skill** (`prdy`). PRD generation is a prompt engineering problem with templates. Skills already handle this pattern perfectly.
--- a/docs/plans/2026-05-07-coordination-resilience.md
+++ b/docs/plans/2026-05-07-coordination-resilience.md
@@ -0,0 +1,236 @@
 # Mosaic Stack ↔ Hermes Coordination Resilience
 > Purpose: document the self-healing coordination patterns that emerged while implementing the Hermes mission toolset, distress-card protocol, and auto-heal watchers, so the same mechanics can be reimplemented in Mosaic Stack or any similar agent platform.
 ## Summary
 The coordination layer should be treated as a system of mechanical recovery loops rather than a single interactive agent session.
 ## SIBKISS operational summary
 - mission on
 - heartbeat always
 - resume from packet
 - block with `[BLOCKED]`
 - reassign
 - keep tasks tiny
 - auto-heal dead workers
 The design has four parts:
 1. Atomic task decomposition — workers operate only within a small, explicit scope.
 2. Distress signaling — workers create a standardized `[BLOCKED]` card when they encounter a blocker outside their scope.
 3. Mechanical fallback — if the worker cannot phone home because of rate limits or dead context, a cron-style watcher synthesizes the distress card for them.
 4. Auto-heal / reassignment — stale workers are reaped, crash-loops are reset, and rate-limited work is reassigned to a different profile/provider.
 ## Why this exists
 Observed failure modes:
 - Scope creep: a worker completes the target fix, then spends the rest of its budget chasing downstream cascade work.
 - Silent failure / dead worker: the worker PID is gone, but the task remains running or blocked.
 - Rate-limited worker: the worker is too constrained to create a help card itself, so it spins or fails without a clean handoff.
 The answer is not to raise iteration caps or ask the worker to keep trying longer. The answer is to make the coordination layer self-healing and the work items atomic.
 ## Core workflow
 ### 1) Atomic task boundaries
 Every task should have:
 - one concern
 - explicit files/packages in scope
 - explicit files/packages out of scope
 - a maximum file count if possible
 - a stated expected iteration budget
 When a worker discovers work outside scope, it must stop fixing it and hand off.
 ### 2) Worker-authored distress card
 If the worker can still report status, it creates a card like:
 - Title: `[BLOCKED] t_<source_id> <blocker_type>`
 - Assignee: `tuesday` / orchestrator role
 - Status: `ready`
 - Body: standardized distress template with source task, blocker type, completed work, cannot-touch scope, and needed action
 The orchestrator receives the card, acts on it, and closes the loop.
 ## Routing rules
 ### Distress card routing
 - Title: `[BLOCKED] t_<source_id> <blocker_type>`
 - Assignee: `tuesday` / orchestrator role
 - Status: `ready`
 - Body: standardized distress template with source task, blocker type, completed work, cannot-touch scope, and needed action
 - Source task stays linked to the distress card so the recovery trail is auditable
 The orchestrator receives the card, acts on it, and closes the loop.
 ### 3) Mechanical fallback for rate-limited workers
 If the worker is too rate-limited or unstable to create the distress card itself, a no-agent watcher must synthesize the card from the task row and failure metadata.
 That watcher should:
 - inspect running / blocked tasks
 - detect repeated 429 / 503 / overload errors
 - create the same standardized `[BLOCKED]` card on behalf of the worker
 - link the distress card to the source task
 - add a comment to the source task
 - allow the dispatcher to pick up the new card immediately
 This is the key fix for the logic issue: the worker does not need to be able to phone home if the watcher can do it mechanically.
 ### 4) Auto-heal for dead workers
 A separate no-agent watcher should:
 - reap dead PIDs stuck in `running`
 - reset crash-loops whose failures are infrastructure-related
 - escalate tasks that have been reset too many times
 This watcher prevents stale tasks from clogging the board and keeps the dispatch queue moving.
 ## Distress card contract
 ### Canonical title
 ```text
 [BLOCKED] t_<source_task_id> <blocker_type>
 ```
 ### Canonical blocker types
 - `scope_boundary`
 - `env_blocker`
 - `credential_failure`
 - `dependency`
 - `iteration_budget`
 - `rate_limited`
 ### Canonical body
 ```markdown
 ## Distress Signal
 - Blocked task: t_xxx
 - Worker: <profile_name>
 - Branch: <git_branch_name>
 - Workspace: <path>
 - Blocker type: <type>
 - Completed: <what was done>
 - Cannot touch: <out-of-scope packages/files>
 - Needs: <what the orchestrator should do>
 - State: committed | uncommitted | stashed(<stash_name>)
 ## Scope Guard
 DO NOT touch: anything outside diagnosing and remediating the blocker described above
 Only fix: assign, split, reassign, or unblock the source task
 ```
 ## Routing rules
 ### Distress card routing
 - `[BLOCKED]` title prefix should bypass normal triage.
 - The card should go directly to the orchestration profile.
 - The orchestrator should start from a clean session each time.
 ### Rate-limit fallback
 When the source task is rate-limited:
 - do not keep retrying in the worker
 - let the watcher synthesize the distress card
 - have the orchestrator reassign the source task to a different profile/provider combo
 ### Provider fallback principle
 Never reassign rate-limited work back to the same provider if the failure was provider pressure. Use a different provider when possible.
 ### Suggested fallback order
 1. Keep the current task body and scope guards intact.
 2. Reassign to a different profile on a different provider.
 3. If that is impossible, reassign to a different profile on the same provider only for non-rate-limit blockers.
 4. If repeated failures continue, split the task into a narrower atomic card.
 ## Related recovery docs
 - Mission packet recovery contract: `/opt/hermes/docs/mission-toolset-heartbeat.md`
 - Hermes mission implementation plan: `/opt/hermes/docs/plans/mission-toolset-implementation.md`
 - The same packet-first resume rule applies: inspect the latest packet before re-reading mission files.
 - New-session trigger: when a profile config changes, start a fresh session or `/reset` so the updated toolset is actually loaded.
 ## Watchers to implement
 ### Auto-heal watcher
 Responsibilities:
 - reap stale workers
 - reset dead-PID crash loops
 - track reset counts
 - escalate after repeated resets
 ### Distress synthesizer watcher
 Responsibilities:
 - detect rate-limited / stuck workers
 - create `[BLOCKED]` cards mechanically
 - link the card to the source task
 - leave a comment for traceability
 ### Iteration-budget watcher
 Responsibilities:
 - detect long-running tasks and repeated failure patterns
 - recommend splits when a task is clearly over-scoped
 - report tasks that need human review after multiple resets
 ## Operational principle
 If a task cannot cleanly finish within its atomic scope, the right response is to surface a smaller coordination problem, not to keep burning context.
 This is what makes the system robust across compaction, rate limits, and dead workers.
 ## Suggested implementation order
 1. Atomic task metadata in task bodies
 2. Worker-authored distress card protocol
 3. Mechanical distress synthesizer watcher
 4. Auto-heal watcher for dead workers
 5. Orchestrator routing rules for `[BLOCKED]`
 6. Rate-limit fallback / model reassignment table
 ## Where this fits in Hermes
 - Kanban = durable work graph and status engine
 - Watchers = mechanical healing and distress synthesis
 - Orchestrator = split / reassign / unblock decision-maker
 - Workers = execution inside atomic task boundaries
 ## Where this fits in Mosaic Stack
 - PRD / coordination infra should encode the same patterns
 - Mosaic can use the same distress-card contract and watcher logic
 - The coordination model should be runtime-agnostic: any agent system can use it if it can write a task card and react to a ready queue
 ## Cross-project takeaway
 The important pattern is not the specific tool names. It is the mechanical feedback loop:
 - detect failure without requiring the failing worker to succeed
 - create a standardized help artifact
 - route that artifact to a fresh orchestrator context
 - repair the assignment graph
 - continue the mission
 That pattern is reusable anywhere.
--- a/docs/scratchpads/git-wrapper-rollup-20260526.md
+++ b/docs/scratchpads/git-wrapper-rollup-20260526.md
@@ -0,0 +1,33 @@
 # Git Wrapper Rollup — 2026-05-26
 ## Objective
 Consolidate pending Mosaic wrapper fixes after `mosaic update` reported the local framework package was already current (`@mosaicstack/mosaic 0.0.30`) but the installed `~/.config/mosaic/tools` wrappers still lacked the open Gitea/Woodpecker wrapper patches.
 ## Scope
 Roll up the open wrapper-related Gitea PR branches into one integration branch:
 - PR #513: `pr-ci-wait.sh` stdin collision fix.
 - PR #518: Gitea PR metadata/merge preflight hardening.
 - PR #521: Gitea merge fallback + unsafe PR-number rejection.
 - PR #522: Woodpecker credential/pagination fixes and CI Postgres service collision fix.
 - PR #523: explicit Gitea repo/login args and `eval` removal for PR/issue creation.
 ## Conflict resolutions
 - Kept array-based command construction where possible instead of reintroducing `eval`.
 - Kept explicit `--repo OWNER/REPO --login mosaicstack` Gitea arguments for `tea` calls.
 - Combined PR merge API fallback behavior from metadata hardening and empty-identity fallback branches.
 - Preserved numeric PR-number validation for `pr-merge.sh`.
 ## Verification checklist
 - `bash -n` on changed shell scripts.
 - Wrapper smoke checks from a clean worktree.
 - Gitea PR verification after push.
 - CI status checked through Gitea/Woodpecker.
 ## Notes
 `mosaic update` did not install these fixes because the package registry still reports `@mosaicstack/mosaic 0.0.30` as current. The source patches must merge/release before normal framework update will carry them.
--- a/docs/scratchpads/mvp-20260312.md
+++ b/docs/scratchpads/mvp-20260312.md
@@ -523,3 +523,133 @@ Independent security review surfaced three high-impact and four medium findings;
 - #8: confirm `packages/config/dist` not git-tracked
 **Next mission step:** FED-M2 (Step-CA + grant schema + admin CLI). Per TASKS.md scope rule, M2 will be decomposed when it enters active planning. Issue #461 tracks scope.
 ## Session 20 — 2026-04-21 — FED-M2 kickoff
 ### Decisions
 - **Workstream split**: parallel CODE (M2-01..M2-13, ~72K) + DEPLOY (DEPLOY-01..DEPLOY-05, ~16K) tracks; re-converge at M2-10 E2E.
 - **Test hosts**: `mos-test-1.woltje.com` (querying side / Server A), `mos-test-2.woltje.com` (serving side / Server B). Wildcard `*.woltje.com` A→174.137.97.162 already exists; Traefik wildcard cert covers both subdomains. No DNS or cert work needed pre-deploy.
 - **Portainer access**: requires `PORTAINER_INSECURE=1` flag added to mosaic wrappers (self-signed cert at `https://10.1.1.43:9443`). PR pending on `feat/mosaic-portainer-tls-flag`.
 - **Image policy**: deploy by digest (immutable) per Mosaic policy. `gateway:fed-v0.1.0-m1` digest = `sha256:9b72e202a9eecc27d31920b87b475b9e96e483c0323acc57856be4b1355db1ec`.
 ### DEPLOY-01 — image manifest verified
 - Tag `fed-v0.1.0-m1` exists at `git.mosaicstack.dev/mosaicstack/stack/gateway`
 - Digest: `sha256:9b72e202a9eecc27d31920b87b475b9e96e483c0323acc57856be4b1355db1ec`
 - 9 layers, ~530MB total
 - Use this digest in DEPLOY-02 stack template (do NOT reference `:fed-v0.1.0-m1` tag in stack — pin to digest)
 ### Registry auth note
 - Gitea container registry uses Bearer token flow (`/v2/token?service=container_registry&scope=repository:<repo>:pull`)
 - Username: `jarvis` (NOT `mosaicstack`); password: `gitea.mosaicstack.token` from credentials.json
 - Direct `Authorization: Bearer <pat>` does NOT work — must exchange PAT for registry token first
 ### Active PRs
 - #483 — docs: M2 mission planning (TASKS decomposition + manifest update) — CI running
 - (pending) `feat/mosaic-portainer-tls-flag` — wrapper PORTAINER_INSECURE flag (sonnet subagent in progress)
 - (pending) `feat/federation-m2-schema` — FED-M2-01 DB schema migration (sonnet subagent in progress)
 ### MISSION-MANIFEST layout fix
 - Initial M2 commit had Test Infrastructure block inserted by lint-staged prettier between "Last Updated" and "Parent Mission" — split mission frontmatter
 - Fixed in 3d001fdb: moved Parent Mission back to frontmatter, kept Test Infrastructure as standalone H2 between Mission and Context
 ## Session 21 — 2026-04-21/22 — DEPLOY-02 merged, gateway image bug discovered, M2-01 in remediation
 ### PRs merged
 - **#483** — docs(federation): M2 mission planning (TASKS decomposition + manifest update)
 - **#484** — feat(mosaic-portainer): PORTAINER_INSECURE flag for self-signed TLS (wrapper sync to `~/.config/mosaic/tools/portainer/` done manually due to broken `mosaic upgrade` `set -o pipefail` on dash)
 - **#485** — feat(deploy): portainer stack template `deploy/portainer/federated-test.stack.yml` for federation test instances [DEPLOY-02]
 ### Stack deployed (mos-test-1, mos-test-2)
 - Both stacks created on Portainer endpoint 3 (`local` Swarm @ 10.1.1.43, the only endpoint with traefik-public + woltje.com wildcard cert)
 - Swarm ID `l7z67tfpd4bvj4979ufpkyi50`
 - Image pinned to digest `sha256:9b72e202a9eecc27d31920b87b475b9e96e483c0323acc57856be4b1355db1ec`
 - Traefik labels target `${HOST_FQDN}` per env
 ### CRITICAL FINDING — gateway image runtime-broken
 - `docker run` against `gateway:fed-v0.1.0-m1` fails immediately:  
  `Error [ERR_MODULE_NOT_FOUND]: Cannot find package 'dotenv' imported from /app/dist/main.js`
 - Root cause: `docker/gateway.Dockerfile` copies `/app/node_modules` from builder — but pnpm puts deps in the content-addressed `.pnpm/` store with symlinks at `apps/gateway/node_modules/*`. The runner stage misses the symlinks → Node can't resolve workspace deps.
 - M1 release was never runtime-tested as a stripped container; CI passed because tests run in dev tree where pnpm symlinks are intact.
 - **Fix in flight** (subagent `a78a9ab0ddae91fbc`): switch builder to `pnpm --filter @mosaic/gateway --prod deploy /deploy`, then runner copies `/deploy/node_modules` + `/deploy/dist` + `/deploy/package.json`.
 ### M2-01 schema review verdict — NEEDS CHANGES
 - PR #486 (`feat/federation-m2-schema`) — independent reviewer (sonnet) found 2 real issues:
  1. `federation_audit_log` time-range indexes missing `.desc()` on `created_at` (3 places)
  2. Reserved columns missing per TASKS.md M2-01 spec: `query_hash`, `outcome`, `bytes_out` (M4 will write; spec said reserve now)
 - Also notes (advisory): subject_user_id correctly `text` (matches BetterAuth users.id; spec defect, not code defect); peer→grant cascade test not present (would be trivial to add)
 - **Remediation in flight** (subagent `a673dd9355dc26f82` in worktree `agent-a4404ac1`): apply DESC + reserved cols, regenerate migration in place (preferred) or stack 0009 (fallback), force-push, post PR comment.
 ### Process notes
 - Branch race incident: schema subagent + wrapper subagent both ran in main checkout → schema files appeared on wrapper branch. Recovered by TaskStop, `git checkout --` to clean, respawned schema subagent with `isolation: "worktree"`. **Rule going forward:** any subagent doing code edits gets `isolation: "worktree"` unless work is single-file and the orchestrator confirms no other branch will touch overlapping files.
 - `pr-create.sh` shell-quotes backticks badly → use `tea pr create --repo mosaicstack/stack` directly (matches CLI-skill behavior). Will leave a followup to harden pr-create.sh.
 - Gitea registry auth: bearer-token exchange flow (`/v2/token?service=container_registry&scope=repository:<repo>:pull`) — direct `Authorization: Bearer <pat>` returns 401.
 - Portainer Swarm stack create endpoint: `POST /api/stacks/create/swarm/string?endpointId=<id>` (NOT `/api/stacks?type=1` — deprecated and rejected with 400).
 ### In-flight at compaction boundary
 - Subagent `a78a9ab0ddae91fbc` — Dockerfile pnpm-deploy fix → PR (not yet opened at handoff)
 - Subagent `a673dd9355dc26f82` — M2-01 schema remediation (DESC + reserved cols) → force-push to PR #486
 - Both will trigger CI; orchestrator must independently re-review fixes (especially the security-adjacent schema work) per "always verify subagent claims" rule.
 ### Next after subagents return
 1. Independent re-review of schema remediation (different subagent, fresh context)
 2. Merge #486 if green
 3. Merge Dockerfile fix PR if green → triggers Kaniko CI rebuild → capture new digest
 4. Update `deploy/portainer/federated-test.stack.yml` to new digest in a small PR
 5. Redeploy mos-test-1 + mos-test-2 (Portainer stack update via API)
 6. Verify HTTPS reachability + `/health` endpoint at both hosts
 7. DEPLOY-03/04 acceptance probes (`mosaic gateway doctor --json`, pgvector `vector(3)` round-trip)
 8. DEPLOY-05: author `docs/federation/TEST-INFRA.md`
 9. M2-02 (Step-CA sidecar) kicks off after image health is green
 ### Session 23 — 2026-04-21 — M2 close + M3 decomposition
 **Closed at compaction boundary:** all 13 M2 tasks done, PRs #494–#503 merged to `main`, tag `fed-v0.2.0-m2` published, Gitea release notes posted, issue #461 closed. Main at `4ece6dc6`.
 **M2 hardening landed in PR #501** (security review remediation):
 - CRIT-1: post-issuance OID verification in `ca.service.ts` (rejects cert if `mosaic_grant_id` / `mosaic_subject_user_id` extensions missing or mismatched)
 - CRIT-2: atomic activation guard `WHERE status='pending'` on grant + `WHERE state='pending'` on peer; throws `ConflictException` if lost race
 - HIGH-2: removed try/catch fallback in `extractCertNotAfter` — parse failures propagate as 500 (no silent 90-day default)
 - HIGH-4: token slice for logging (`${token.slice(0, 8)}...`) — no full token in stdout
 - HIGH-5: `redeem()` wrapped in try/catch with best-effort failure audit; uses `null` (not `'unknown'`) for nullable UUID FK fallback
 - MED-3: `createToken` validates `grant.peerId === dto.peerId`; `BadRequestException` on mismatch
 **Remaining M2 security findings deferred to M3+:**
 - HIGH-1: peerId/subjectUserId tenancy validation on `createGrant` (M3 ScopeService work surfaces this)
 - HIGH-3: Step-CA cert SHA-256 fingerprint pinning (M5 cert handling)
 - MED-1: token entropy already 32 bytes — wontfix
 - MED-2: per-route rate limit on enrollment endpoint (M4 rate limit work)
 - MED-4: CSR CN binding to peer's commonName (M3 AuthGuard work)
 **M3 decomposition landed in this session:**
 - 14 tasks (M3-01..M3-14), ~100K estimate
 - Structure mirrors M1/M2 pattern: foundation → server stream + client stream + harness in parallel → integration → E2E → security review → docs → close
 - M3-02 ships local two-gateway docker-compose (`tools/federation-harness/`) so M3-11 E2E is not blocked on the Portainer test bed (which is still blocked on `FED-M2-DEPLOY-IMG-FIX`)
 **Subagent doctrine retained from M2:**
 - All worker subagents use `isolation: "worktree"` to prevent branch-race incidents
 - Code review is independent (different subagent, no overlap with author of work)
 - `tea pr create --repo mosaicstack/stack --login mosaicstack` is the working PR-create path; `pr-create.sh` has shell-quoting bugs (followup #45 if not already filed)
 - Cost tier: foundational implementation = sonnet, docs = haiku, complex multi-file architecture (security review, scope service) = sonnet with two review rounds
 **Next concrete step:**
 1. PR for the M3 planning artifact (this commit) — branch `docs/federation-m3-planning`
 2. After merge, kickoff M3-01 (DTOs) on `feat/federation-m3-types` with sonnet subagent in worktree
 3. Once M3-01 lands, fan out: M3-02 (harness) || M3-03 (AuthGuard) → M3-04 (ScopeService) || M3-08 (FederationClient)
 4. Re-converge at M3-10 (Integration) → M3-11 (E2E)
--- a/docs/scratchpads/t-a292e96f-gitea-pr-metadata.md
+++ b/docs/scratchpads/t-a292e96f-gitea-pr-metadata.md
@@ -0,0 +1,53 @@
 # t_a292e96f — Gitea PR metadata wrapper fix
 ## Objective
 Repair Mosaic git wrappers so Gitea PR metadata and merge preflight work for U-Connect PRs on `git.uscllc.com` without selecting the unrelated `git.mosaicstack.dev` tea login.
 ## Findings
 - Reproduced the failure from `/src/uconnect-worktrees/t_39ce717c-authentik-smoke-gate` with the current `pr-metadata.sh`:
  - PR #1905 returned JSON with `number=null`, `baseRefName=""`, `headRefName=""`.
  - PR #1908 returned JSON with `number=null`, `baseRefName=""`, `headRefName=""`.
 - Root cause: the wrapper treated HTTP/API error payloads as PR payloads and normalized missing fields to empty strings.
 - The credential loader can return a non-working `git.uscllc.com` API token in this environment, while host-specific `~/.git-credentials` basic auth succeeds. The wrapper now falls back by host before normalization.
 - `tea login list` has only `git.mosaicstack.dev` configured here; `pr-merge.sh` previously forced `--login mosaicstack`, which is invalid for `git.uscllc.com` and caused `Login name mosaicstack does not exist`.
 ## Changes
 - `packages/mosaic/framework/tools/git/detect-platform.sh`
  - Added `get_gitea_basic_auth <host>` to retrieve host-specific HTTPS credentials from `~/.git-credentials` without printing secrets.
 - `packages/mosaic/framework/tools/git/pr-metadata.sh`
  - Uses strict bash mode.
  - Checks Gitea HTTP status and fails nonzero on API errors/non-JSON instead of emitting empty branch fields.
  - Falls back from token auth to host-specific basic auth.
  - Normalizes standard `head.ref`/`base.ref` and fallback branch fields.
  - Requires non-empty `headRefName` and `baseRefName`.
  - Preserves GitHub `gh pr view` behavior.
 - `packages/mosaic/framework/tools/git/pr-merge.sh`
  - Reads metadata once for base-branch policy preflight.
  - Selects a `tea` login only when its configured URL matches the repo host.
  - Falls back to authenticated Gitea merge API when no matching `tea` login exists, avoiding the wrong `mosaicstack` login for USC repos.
  - Keeps squash-only and main-only merge policy.
 - `packages/mosaic/framework/tools/git/test-pr-metadata-gitea.sh`
  - Added fixture-based regression harness for standard Gitea fields, fallback branch fields, `refs/pull/<n>/head` plus `head.label` normalization, and API error payloads.
 ## Documentation / changelog note
 This repository currently has no root `CHANGELOG.md`; the scratchpad and `docs/TASKS.md` carry the task-level change record for this wrapper fix.
 ## Verification log
 - Red regression check: copied the new `test-pr-metadata-gitea.sh` harness next to `origin/main` wrapper scripts and ran it with `MOSAIC_TEST_WORK_DIR=$PWD/.mosaic-test-work/pr-metadata-gitea-red`; it failed as expected with `headRefName=''` and `baseRefName=''` on the fixture API-error path.
 - `bash -n packages/mosaic/framework/tools/git/{detect-platform.sh,pr-metadata.sh,pr-merge.sh,test-pr-metadata-gitea.sh}`: passed.
 - `shellcheck -x -P . -e SC1090 packages/mosaic/framework/tools/git/{detect-platform.sh,pr-metadata.sh,pr-merge.sh,test-pr-metadata-gitea.sh}`: passed.
 - `MOSAIC_TEST_WORK_DIR=$PWD/.mosaic-test-work/pr-metadata-gitea packages/mosaic/framework/tools/git/test-pr-metadata-gitea.sh`: passed; verifies standard Gitea fields, fallback branch fields, `refs/pull/<n>/head` label normalization, and nonzero API-error handling.
 - Installed wrapper parity: `/home/hermes/.config/mosaic/tools/git/{detect-platform.sh,pr-metadata.sh,pr-merge.sh}` byte-match the PR source copies after validation, so active U-Connect wrapper invocations use the same fix while source PR review runs.
 - Live sanitized U-Connect metadata from `/src/uconnect` with `MOSAIC_CREDENTIALS_FILE=/src/jarvis-brain/credentials.json`:
  - PR #1905: `number=1905`, `baseRefName=main`, `headRefName=edith/t_39ce717c-authentik-smoke-gate`, `state=open`, `host=git.uscllc.com`.
  - PR #1908: `number=1908`, `baseRefName=main`, `headRefName=fix/t_23fa9e1d-portal-health-backend`, `state=closed`, `host=git.uscllc.com`.
 - Merge preflight dry runs from installed wrappers:
  - PR #1905: `Dry run: would merge PR #1905 on git.uscllc.com with authenticated Gitea API fallback (base=main, method=squash).`
  - PR #1908: `Dry run: would merge PR #1908 on git.uscllc.com with authenticated Gitea API fallback (base=main, method=squash).`
 - PR: `https://git.mosaicstack.dev/mosaicstack/stack/pulls/518`, branch `fix/t-a292e96f-gitea-pr-metadata`.
 - CI: Recent PR/push pipelines failed before clone/test execution due Woodpecker/Kubernetes PVC API timeout: `dial tcp 10.43.0.1:443: i/o timeout`. No repository test step executed in CI; local targeted verification above remains clean.
--- a/docs/scratchpads/t_301e4e3b-pr-merge-gitea-empty-uid.md
+++ b/docs/scratchpads/t_301e4e3b-pr-merge-gitea-empty-uid.md
@@ -0,0 +1,31 @@
 # Scratchpad: t_301e4e3b pr-merge.sh Gitea empty-uid fallback
 ## Task
 Implement a narrow hardening in `packages/mosaic/framework/tools/git/pr-merge.sh` so Gitea merges recover from the known non-interactive `tea pr merge` identity failure: `user does not exist [uid: 0, name: ]`.
 ## Constraints
 - Preserve Mosaic policy gates: squash-only, base branch `main`, queue guard unless explicitly skipped.
 - Preserve the existing authenticated Gitea API fallback when no tea login exists.
 - Do not fallback on arbitrary tea failures.
 - Do not expose tokens or credential-bearing remotes.
 - Scope is limited to the merge wrapper plus focused test/support/scratchpad files.
 ## External issue
 - Gitea issue #520: Harden pr-merge.sh Gitea empty-uid fallback
 ## Plan
 1. Add a focused shell regression harness with mocked `tea` and `curl` proving the known empty uid/name failure must fall back to Gitea API.
 2. Watch the harness fail on current code.
 3. Implement helper functions in `pr-merge.sh` for redacted command display, known failure classification, and authenticated Gitea API merge fallback.
 4. Keep unknown `tea` failures blocking by replaying stderr and exiting non-zero.
 5. Run syntax, shellcheck if available, focused regression, and repo quality gates before push/PR.
 ## Session log
 - 2026-05-22: Read Kanban context, Mosaic global/repo instructions, created isolated branch `fix/t_301e4e3b-pr-merge-gitea-empty-uid`, and opened Gitea issue #520 using the Mosaic issue wrapper/API fallback.
 - 2026-05-22: Added regression harness and watched it fail on current behavior with `user does not exist [uid: 0, name: ]`; implemented narrow fallback and verified known-empty-identity fallback, arbitrary tea failure blocking, and no-tea-login API fallback paths.
 - 2026-05-22: Validation passed for `bash -n`, `shellcheck -x`, focused shell harness, `pnpm typecheck`, `pnpm lint`, `pnpm format:check`, and `pnpm --filter @mosaicstack/mosaic test`. Full `pnpm test` exposed an out-of-scope gateway DB setup failure (`relation "messages" does not exist`) in `apps/gateway/src/__tests__/cross-user-isolation.test.ts`.
--- a/docs/scratchpads/t_5aab9cc8-pr-merge-eval-injection.md
+++ b/docs/scratchpads/t_5aab9cc8-pr-merge-eval-injection.md
@@ -0,0 +1,48 @@
 # t_5aab9cc8 — pr-merge.sh eval injection remediation
 ## Objective
 Remediate PR #521 review blocker: `packages/mosaic/framework/tools/git/pr-merge.sh` must reject non-numeric PR numbers before metadata lookup/merge and must not use `eval` for GitHub merge execution.
 ## Scope
 - Shell wrapper only: `packages/mosaic/framework/tools/git/pr-merge.sh`
 - Focused regression harness: `packages/mosaic/framework/tools/git/test-pr-merge-gitea-empty-uid.sh`
 - No API/frontend/infra surfaces.
 ## Acceptance Criteria
 - AC1: `PR_NUMBER` is validated as digits-only immediately after required-argument parsing, before metadata lookup.
 - AC2: GitHub merge path uses a quoted argv array, not command-string construction plus `eval`.
 - AC3: Focused tests prove PR-number metacharacters are rejected and cannot execute injected shell commands on GitHub path.
 - AC4: Focused tests prove PR-number metacharacters are rejected on Gitea path before tea/curl merge calls.
 - AC5: Existing Gitea empty-uid fallback behavior remains green.
 - AC6: Syntax, shellcheck where available, focused harness, and relevant repo gates are rerun or absence documented.
 ## Plan
 1. Add failing regression tests for GitHub eval injection and Gitea invalid PR rejection.
 2. Implement fail-closed PR number validation before metadata lookup.
 3. Replace GitHub `eval` command with argv array execution.
 4. Run required validation and update this scratchpad with evidence.
 5. Commit, queue-guard, push branch, update PR #521.
 ## TDD Log
 - RED: `AGENT_WORK_ROOT="$HERMES_KANBAN_WORKSPACE/work" bash packages/mosaic/framework/tools/git/test-pr-merge-gitea-empty-uid.sh` failed on vulnerable code with `Expected GitHub metacharacter PR number to be rejected` and showed the injected PR number reached the GitHub merge path.
 - GREEN: Added digits-only validation before metadata lookup and replaced GitHub `eval` with an argv array. The focused harness now passes and verifies invalid PR numbers are rejected before GitHub `gh` calls and before Gitea `tea`/`curl` calls.
 ## Validation Evidence
 - PASS: `AGENT_WORK_ROOT="$HERMES_KANBAN_WORKSPACE/work" bash -n packages/mosaic/framework/tools/git/pr-merge.sh packages/mosaic/framework/tools/git/test-pr-merge-gitea-empty-uid.sh`
 - PASS: `shellcheck -x packages/mosaic/framework/tools/git/pr-merge.sh packages/mosaic/framework/tools/git/test-pr-merge-gitea-empty-uid.sh`
 - PASS: `AGENT_WORK_ROOT="$HERMES_KANBAN_WORKSPACE/work" bash packages/mosaic/framework/tools/git/test-pr-merge-gitea-empty-uid.sh`
 - PASS: `pnpm --filter @mosaicstack/mosaic... build`
 - PASS: `pnpm --filter @mosaicstack/mosaic lint`
 - PASS: `pnpm --filter @mosaicstack/mosaic typecheck`
 - PASS: `pnpm --filter @mosaicstack/mosaic test` — 32 files / 291 tests passed.
 - REVIEW: `/home/hermes/.config/mosaic/tools/codex/codex-code-review.sh --uncommitted` could not run due Codex 401 Unauthorized. Independent delegate review completed read-only with PASS / no blockers; non-blocking suggestion to assert GitHub mock log remains empty was applied.
 ## Risks / Blockers
 - No active blockers.
--- a/eslint.config.mjs
+++ b/eslint.config.mjs
@@ -30,6 +30,7 @@ export default tseslint.config(
            'apps/gateway/vitest.config.ts',
            'packages/storage/vitest.config.ts',
            'packages/mosaic/__tests__/*.ts',
            'tools/federation-harness/*.ts',
          ],
        },
      },
--- a/guides/BOOTSTRAP.md
+++ b/guides/BOOTSTRAP.md
@@ -453,6 +453,26 @@ Initialize standard labels and the first pre-MVP milestone:
 ---
 ## Secrets Bootstrap (Required for Every New App)
 Every new application MUST complete the following secrets bootstrap before deploying to any non-local environment. This is a hard gate — deployment without completed secrets bootstrap is forbidden.
 ### Secrets bootstrap checklist
 - [ ] Vault path created: `vault kv put secret/k3s/<app>/ ...` with all required secret fields
 - [ ] Required secrets listed in project README under a "Secrets architecture" section, including:
  - Vault path(s) used
  - All required secret keys and their purpose
  - Whether the app uses ESO bridge (default) or Direct-Vault (opt-in, with justification)
 - [ ] `external-secret.yaml` manifest committed to repo's `deploy/` or `k8s/` directory
 - [ ] Deployment YAML references the synced k8s Secret via `secretKeyRef` (not raw env vars or `.env` files)
 - [ ] App startup has schema-based validation for all required env vars (zod / pydantic / envconfig equivalent) that exits non-zero on missing required values
 - [ ] Direct-Vault opt-in (if applicable): justification documented in README + AppRole provisioned + bootstrap credentials stored in Vault and synced via a separate `ExternalSecret`
 See `~/.config/mosaic/guides/VAULT-SECRETS.md` for full worked examples of the ESO bridge pattern, the Direct-Vault opt-in pattern, and the forbidden antipatterns.
 ---
 ## Checklist
 After bootstrapping, verify:
--- a/guides/VAULT-SECRETS.md
+++ b/guides/VAULT-SECRETS.md
@@ -203,3 +203,374 @@ Error: token expired
 3. **Audit logging** - All access is logged; act accordingly
 4. **No local copies** - Don't store secrets in files or env vars long-term
 5. **Rotate on compromise** - Immediately rotate any exposed secrets
 ---
 ## Secrets Architecture Decision Matrix
 Use this table to choose between the ESO bridge (default) and Direct-Vault (opt-in) patterns for every new app or integration.
 | Factor                      | ESO Bridge (default)                                                          | Direct-Vault (opt-in)                                                                                                   |
 | --------------------------- | ----------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------- |
 | **Use-case**                | All static secrets (DB creds, API keys, signing keys, OAuth secrets)          | Dynamic creds with short TTLs (DB rotation, AWS STS, PKI), per-request audit trails, or lease renewal mid-pod-lifecycle |
 | **App code change**         | None — reads standard env vars via `secretKeyRef`                             | Requires Vault client (`hvac`, `node-vault`, `vault/api`) in application code                                           |
 | **Secret rotation**         | ESO re-syncs on Vault write; pod restart or secret refresh picks up new value | App manages lease renewal or re-auth within the running process                                                         |
 | **Audit granularity**       | Access logged at Vault when ESO syncs; no per-request app audit               | Every app request to Vault is a separate audit log entry                                                                |
 | **Operational burden**      | Low — ESO handles polling, sync, and k8s Secret lifecycle                     | Higher — app must handle auth, lease renewal, error paths, and token rotation                                           |
 | **Justification required?** | No — this is the default                                                      | Yes — document in project README under "Secrets architecture"                                                           |
 | **Example use cases**       | Web app DB password, OAuth client secret, JWT signing key, API token          | HashiCorp DB secrets engine with 15-min TTL leases, AWS STS assume-role, Vault PKI short-lived certs                    |
 **Decision rule:** If you are unsure, use ESO. Only justify Direct-Vault when the secret cannot be safely stored in a k8s Secret (too short-lived, per-request TTL required, or mid-lifecycle renewal needed).
 ---
 ## ESO Bridge Pattern (Default)
 This is the required default for all k8s workloads. Follow this exact pattern unless a documented dynamic-secrets requirement justifies Direct-Vault.
 ### 1. Provision Vault path
 ```bash
 # Write the secrets for the app (run once; use IaC/Terraform for repeatable provisioning)
 vault kv put secret/k3s/<app> \
  db_password="..." \
  api_key="..." \
  jwt_secret="..."
 ```
 Use the canonical path structure: `secret/k3s/<app>` for k3s cluster workloads.
 ### 2. ExternalSecret manifest
 Commit this to the repo's `deploy/` or `k8s/` directory:
 ```yaml
 # deploy/external-secret.yaml
 apiVersion: external-secrets.io/v1beta1
 kind: ExternalSecret
 metadata:
  name: <app>-secrets
  namespace: <namespace>
 spec:
  refreshInterval: 1h
  secretStoreRef:
    name: vault-backend # ClusterSecretStore name — verify with cluster admin
    kind: ClusterSecretStore
  target:
    name: <app>-secrets # k8s Secret name that will be created
    creationPolicy: Owner
  data:
    - secretKey: DB_PASSWORD # key in the k8s Secret
      remoteRef:
        key: secret/k3s/<app> # Vault path
        property: db_password # field within the Vault secret
    - secretKey: API_KEY
      remoteRef:
        key: secret/k3s/<app>
        property: api_key
    - secretKey: JWT_SECRET
      remoteRef:
        key: secret/k3s/<app>
        property: jwt_secret
 ```
 ### 3. Deployment manifest — reference synced k8s Secret
 ```yaml
 # deploy/deployment.yaml (env section)
 env:
  - name: DB_PASSWORD
    valueFrom:
      secretKeyRef:
        name: <app>-secrets # matches ExternalSecret target.name
        key: DB_PASSWORD
  - name: API_KEY
    valueFrom:
      secretKeyRef:
        name: <app>-secrets
        key: API_KEY
  - name: JWT_SECRET
    valueFrom:
      secretKeyRef:
        name: <app>-secrets
        key: JWT_SECRET
  - name: PORT
    value: '3000' # safe-default: non-secret, no Vault needed
 ```
 ### 4. App-side schema validation — TypeScript (zod)
 Validate all required env vars at startup. Exit non-zero on missing values.
 ```typescript
 // src/env.ts
 import { z } from 'zod';
 const envSchema = z.object({
  DB_PASSWORD: z.string().min(1, 'DB_PASSWORD is required'),
  API_KEY: z.string().min(1, 'API_KEY is required'),
  JWT_SECRET: z.string().min(32, 'JWT_SECRET must be at least 32 chars'),
  PORT: z.coerce.number().default(3000),
  NODE_ENV: z.enum(['development', 'production', 'test']).default('production'),
 });
 const result = envSchema.safeParse(process.env);
 if (!result.success) {
  console.error('Missing or invalid environment variables:');
  console.error(result.error.flatten().fieldErrors);
  process.exit(1);
 }
 export const env = result.data;
 ```
 ### 4b. App-side schema validation — Python (pydantic)
 ```python
 # src/config.py
 from pydantic_settings import BaseSettings, SettingsConfigDict
 class Settings(BaseSettings):
    db_password: str
    api_key: str
    jwt_secret: str
    port: int = 3000
    node_env: str = "production"
    model_config = SettingsConfigDict(env_file=None)  # no .env in prod
 try:
    settings = Settings()
 except Exception as e:
    import sys
    print(f"Missing or invalid environment variables: {e}", file=sys.stderr)
    sys.exit(1)
 ```
 ### 4c. App-side schema validation — Go (envconfig)
 ```go
 // config/config.go
 package config
 import (
    "fmt"
    "github.com/kelseyhightower/envconfig"
 )
 type Config struct {
    DBPassword string `envconfig:"DB_PASSWORD" required:"true"`
    APIKey     string `envconfig:"API_KEY" required:"true"`
    JWTSecret  string `envconfig:"JWT_SECRET" required:"true"`
    Port       int    `envconfig:"PORT" default:"3000"`
 }
 func Load() (*Config, error) {
    var cfg Config
    if err := envconfig.Process("", &cfg); err != nil {
        return nil, fmt.Errorf("invalid environment: %w", err)
    }
    return &cfg, nil
 }
 ```
 In your `main.go`:
 ```go
 cfg, err := config.Load()
 if err != nil {
    fmt.Fprintln(os.Stderr, err)
    os.Exit(1)
 }
 ```
 ---
 ## Direct-Vault Opt-In Pattern
 Use this pattern ONLY when a documented dynamic-secrets requirement applies (DB rotation with short TTLs, AWS STS, PKI, per-request audit). Document the justification in the project README under "Secrets architecture" before implementing.
 ### When it is justified
 - Vault DB secrets engine with lease TTLs shorter than a typical pod lifecycle (< 1 hour)
 - AWS STS assume-role tokens generated per-request
 - Vault PKI short-lived certificates (< 24 hours) that must be renewed within a running pod
 - Per-request audit trail requirement (each app call must appear separately in Vault audit log)
 ### Provision an AppRole for the app
 ```bash
 # Enable AppRole auth (if not already enabled)
 vault auth enable approle
 # Create a Vault policy for the app
 # Note: KV v2 paths require both the exact path (for the top-level secret) and the
 # wildcard (for sub-paths). Always include both to avoid permission denied errors.
 vault policy write <app>-policy - <<EOF
 path "secret/data/k3s/<app>" {
  capabilities = ["read"]
 }
 path "secret/data/k3s/<app>/*" {
  capabilities = ["read"]
 }
 path "database/creds/<app>-role" {
  capabilities = ["read"]
 }
 EOF
 # Create the AppRole
 vault write auth/approle/role/<app>-role \
  token_policies="<app>-policy" \
  token_ttl=1h \
  token_max_ttl=4h \
  secret_id_ttl=0
 # Retrieve role-id and secret-id
 vault read auth/approle/role/<app>-role/role-id
 vault write -f auth/approle/role/<app>-role/secret-id
 ```
 ### Bootstrap AppRole credentials via ESO (solving the chicken-and-egg problem)
 The AppRole `role-id` and `secret-id` are themselves secrets. Store them in Vault at a bootstrap path, then use ESO to sync them into a k8s Secret. The app reads that k8s Secret at startup to authenticate with Vault directly.
 ```bash
 # Store the bootstrap credentials in Vault
 vault kv put secret/k3s/<app>-bootstrap \
  role_id="<role-id>" \
  secret_id="<secret-id>"
 ```
 ```yaml
 # deploy/external-secret-bootstrap.yaml
 apiVersion: external-secrets.io/v1beta1
 kind: ExternalSecret
 metadata:
  name: <app>-vault-auth
  namespace: <namespace>
 spec:
  refreshInterval: 24h
  secretStoreRef:
    name: vault-backend
    kind: ClusterSecretStore
  target:
    name: <app>-vault-auth
    creationPolicy: Owner
  data:
    - secretKey: VAULT_ROLE_ID
      remoteRef:
        key: secret/k3s/<app>-bootstrap
        property: role_id
    - secretKey: VAULT_SECRET_ID
      remoteRef:
        key: secret/k3s/<app>-bootstrap
        property: secret_id
 ```
 ```yaml
 # deploy/deployment.yaml (env section for Direct-Vault app)
 env:
  - name: VAULT_ADDR
    value: 'https://vault.example.com' # safe-default: non-secret cluster address
  - name: VAULT_ROLE_ID
    valueFrom:
      secretKeyRef:
        name: <app>-vault-auth
        key: VAULT_ROLE_ID
  - name: VAULT_SECRET_ID
    valueFrom:
      secretKeyRef:
        name: <app>-vault-auth
        key: VAULT_SECRET_ID
 ```
 ### App-side Vault client pattern
 ```typescript
 // src/vault-client.ts — only exists in Direct-Vault apps
 import vault from 'node-vault';
 import { z } from 'zod';
 const bootstrapSchema = z.object({
  VAULT_ADDR: z.string().url(),
  VAULT_ROLE_ID: z.string().min(1),
  VAULT_SECRET_ID: z.string().min(1),
 });
 const bootstrap = bootstrapSchema.parse(process.env);
 const client = vault({ endpoint: bootstrap.VAULT_ADDR });
 export async function getVaultClient() {
  const { auth } = await client.approleLogin({
    role_id: bootstrap.VAULT_ROLE_ID,
    secret_id: bootstrap.VAULT_SECRET_ID,
  });
  client.token = auth.client_token;
  return client;
 }
 ```
 Document in README under "Secrets architecture": the Vault path, why Direct-Vault is required, and the lease/renewal strategy.
 ---
 ## Forbidden Patterns (CI Lint Targets)
 The following patterns are forbidden in all Mosaic projects. CI lint SHOULD catch these automatically (implementation tracked separately). Agents MUST NOT introduce these patterns.
 ### 1. Untagged fallback defaults for required values
 ```yaml
 # FORBIDDEN — required secret with silent fallback
 environment:
  - DB_PASSWORD=${DB_PASSWORD:-changeme}
  - API_KEY=${API_KEY:-}
 # REQUIRED — fast-fail on missing required values
 environment:
  - DB_PASSWORD=${DB_PASSWORD:?DB_PASSWORD is required}
  - API_KEY=${API_KEY:?API_KEY is required}
 # ALLOWED — true convenience default, tagged
 environment:
  - PORT=${PORT:-3000}  # safe-default: non-secret, app works at any port
 ```
 This applies to: `docker-compose.yml`, k8s manifests, Helm `values.yaml`, any env file committed to git.
 ### 2. Vault KV calls in application source code (ESO-default projects)
 ```python
 # FORBIDDEN in ESO-default apps — direct Vault client in app source
 import hvac
 client = hvac.Client(url=os.environ['VAULT_ADDR'])
 secret = client.secrets.kv.v2.read_secret_version(path='myapp/db')
 ```
 ESO-default apps read env vars only. Direct-Vault clients belong only in apps with a documented dynamic-secrets justification in README.
 ### 3. Hardcoded secrets or API keys in committed files
 ```python
 # FORBIDDEN — hardcoded credential
 DB_PASSWORD = "supersecret123"
 API_KEY = "sk-live-abc123"
 ```
 No exceptions. CI lint must flag any string matching common secret patterns (`password`, `secret`, `api_key`, `token` assigned a literal non-env-var value).
 ### 4. `.env` files in production deployment paths
 ```
 # FORBIDDEN — .env file in a production deploy path
 deploy/.env
 k8s/.env
 docker/.env
 # ALLOWED — local dev only
 .env.example          # template only, no real values
 .env                  # local dev, must be in .gitignore
 ```
 `.env` files are acceptable in local-dev contexts only and MUST be in `.gitignore`. They are forbidden in any path that a CI pipeline or production deployment process reads directly.
--- a/infra/step-ca/dev-password.example
+++ b/infra/step-ca/dev-password.example
@@ -0,0 +1 @@
 dev-only-step-ca-password-do-not-use-in-production
--- a/infra/step-ca/init.sh
+++ b/infra/step-ca/init.sh
@@ -0,0 +1,90 @@
 #!/bin/sh
 # infra/step-ca/init.sh
 #
 # Idempotent first-boot initialiser for the Mosaic Federation CA.
 #
 # On the first run (no /home/step/config/ca.json present) this script:
 #   1. Initialises Step-CA with a JWK provisioner named "mosaic-fed".
 #   2. Writes the CA configuration to the persistent volume at /home/step.
 #   3. Copies the federation X.509 template into the CA config directory.
 #   4. Patches the mosaic-fed provisioner entry in ca.json to reference the
 #      template via options.x509.templateFile (using jq — must be installed
 #      in the container image).
 #
 # On subsequent runs (config already exists) this script skips init and
 # starts the CA directly.
 #
 # The provisioner name "mosaic-fed" is consumed by:
 #   apps/gateway/src/federation/ca.service.ts  (added in M2-04)
 #
 # Password source:
 #   Dev:  mounted from ./infra/step-ca/dev-password via bind mount.
 #   Prod: mounted from a Docker secret at /run/secrets/ca_password.
 #
 # OID template:
 #   infra/step-ca/templates/federation.tpl emits custom OID extensions:
 #     1.3.6.1.4.1.99999.1  — mosaic_grant_id
 #     1.3.6.1.4.1.99999.2  — mosaic_subject_user_id
 set -e
 CA_CONFIG="/home/step/config/ca.json"
 PASSWORD_FILE="/run/secrets/ca_password"
 TEMPLATE_SRC="/etc/step-ca-templates/federation.tpl"
 TEMPLATE_DEST="/home/step/templates/federation.tpl"
 if [ ! -f "${CA_CONFIG}" ]; then
  echo "[step-ca init] First boot detected — initialising Mosaic Federation CA..."
  step ca init \
    --name "Mosaic Federation CA" \
    --dns "localhost" \
    --dns "step-ca" \
    --address ":9000" \
    --provisioner "mosaic-fed" \
    --password-file "${PASSWORD_FILE}" \
    --provisioner-password-file "${PASSWORD_FILE}" \
    --no-db
  echo "[step-ca init] CA initialised."
  # Copy the X.509 template into the Step-CA config directory.
  if [ -f "${TEMPLATE_SRC}" ]; then
    mkdir -p /home/step/templates
    cp "${TEMPLATE_SRC}" "${TEMPLATE_DEST}"
    echo "[step-ca init] Federation X.509 template copied to ${TEMPLATE_DEST}."
  else
    echo "[step-ca init] WARNING: Template source ${TEMPLATE_SRC} not found — skipping copy."
  fi
  # Wire the template into the mosaic-fed provisioner via jq.
  # This is idempotent: the block only runs once (first boot).
  #
  # jq filter: find the provisioner entry with name "mosaic-fed" and set
  # .options.x509.templateFile to the absolute path of the template.
  # All other provisioners and config keys are left unchanged.
  if [ -f "${TEMPLATE_DEST}" ] && command -v jq > /dev/null 2>&1; then
    echo "[step-ca init] Patching mosaic-fed provisioner with X.509 template..."
    TEMP_CONFIG="${CA_CONFIG}.tmp"
    jq --arg tpl "${TEMPLATE_DEST}" '
      .authority.provisioners |= map(
        if .name == "mosaic-fed" then
          .options.x509.templateFile = $tpl
        else
          .
        end
      )
    ' "${CA_CONFIG}" > "${TEMP_CONFIG}" && mv "${TEMP_CONFIG}" "${CA_CONFIG}"
    echo "[step-ca init] Provisioner patched."
  elif ! command -v jq > /dev/null 2>&1; then
    echo "[step-ca init] WARNING: jq not found — skipping provisioner template patch."
    echo "[step-ca init] Install jq in the step-ca image to enable automatic template wiring."
  fi
  echo "[step-ca init] Startup complete."
 else
  echo "[step-ca init] Config already exists — skipping init."
 fi
 echo "[step-ca init] Starting Step-CA on :9000..."
 exec step-ca /home/step/config/ca.json --password-file "${PASSWORD_FILE}"
--- a/infra/step-ca/templates/federation.tpl
+++ b/infra/step-ca/templates/federation.tpl
@@ -0,0 +1,56 @@
 {
  "subject": {{ toJson .Subject }},
  "sans": {{ toJson .SANs }},
  {{- /*
    Mosaic Federation X.509 Certificate Template
    ============================================
    Provisioner: mosaic-fed (JWK)
    Implemented: FED-M2-04
    This template emits two custom OID extensions carrying Mosaic federation
    identifiers. The OTT token (built by CaService.buildOtt) MUST include the
    claims `mosaic_grant_id` and `mosaic_subject_user_id` as top-level JWT
    claims. step-ca exposes them under `.Token.<claim>` in this template.
    OID Registry (Mosaic Internal Arc — 1.3.6.1.4.1.99999):
      1.3.6.1.4.1.99999.1  mosaic_grant_id        (UUID, 36 ASCII chars)
      1.3.6.1.4.1.99999.2  mosaic_subject_user_id  (UUID, 36 ASCII chars)
    DER encoding for each extension value (ASN.1 UTF8String):
      Tag    = 0x0C  (UTF8String)
      Length = 0x24  (decimal 36 — the fixed length of a UUID string)
      Value  = 36 ASCII bytes of the UUID
    The `printf` below builds the raw TLV bytes then base64-encodes them.
    step-ca expects the `value` field to be base64-encoded raw DER bytes.
    Fail-loud contract:
      If either claim is missing from the token the printf will produce a
      zero-length UUID field, making the extension malformed. step-ca will
      reject the certificate rather than issuing one without the required OIDs.
      Silent OID stripping is NEVER tolerated.
    Step-CA template reference:
      https://smallstep.com/docs/step-ca/templates
  */ -}}
  "extensions": [
    {
      "id": "1.3.6.1.4.1.99999.1",
      "critical": false,
      "value": "{{ printf "\x0c%c%s" (len .Token.mosaic_grant_id) .Token.mosaic_grant_id | b64enc }}"
    },
    {
      "id": "1.3.6.1.4.1.99999.2",
      "critical": false,
      "value": "{{ printf "\x0c%c%s" (len .Token.mosaic_subject_user_id) .Token.mosaic_subject_user_id | b64enc }}"
    }
  ],
  "keyUsage": ["digitalSignature"],
  "extKeyUsage": ["clientAuth"],
  "basicConstraints": {
    "isCA": false
  }
 }
--- a/packages/appservice/package.json
+++ b/packages/appservice/package.json
@@ -0,0 +1,36 @@
 {
  "name": "@mosaicstack/appservice",
  "version": "0.0.1",
  "type": "module",
  "repository": {
    "type": "git",
    "url": "https://git.mosaicstack.dev/mosaicstack/stack.git",
    "directory": "packages/appservice"
  },
  "main": "dist/index.js",
  "types": "dist/index.d.ts",
  "exports": {
    ".": {
      "types": "./dist/index.d.ts",
      "default": "./dist/index.js"
    }
  },
  "scripts": {
    "build": "tsc",
    "lint": "eslint src",
    "typecheck": "tsc --noEmit",
    "test": "vitest run --passWithNoTests"
  },
  "devDependencies": {
    "@types/node": "^22.0.0",
    "typescript": "^5.8.0",
    "vitest": "^2.0.0"
  },
  "publishConfig": {
    "registry": "https://git.mosaicstack.dev/api/packages/mosaicstack/npm/",
    "access": "public"
  },
  "files": [
    "dist"
  ]
 }
--- a/packages/appservice/src/tests/appservice.test.ts
+++ b/packages/appservice/src/tests/appservice.test.ts
@@ -0,0 +1,230 @@
 import { describe, expect, it, vi } from 'vitest';
 import { validateBridgeMessage, validateBridgeTyping } from '../bridge.dto.js';
 import { AppserviceIntent, MatrixApiError } from '../intent.js';
 import { buildRegistration, registrationToYaml } from '../registration.js';
 import { TransactionHandler } from '../transactions.js';
 import type { AppserviceConfig, MatrixEvent } from '../types.js';
 const cfg: AppserviceConfig = {
  homeserverUrl: 'https://hs.example',
  domain: 'hs.example',
  asToken: 'as-secret',
  hsToken: 'hs-secret',
 };
 const jsonResponse = (status: number, body: unknown): Response =>
  new Response(JSON.stringify(body), { status, headers: { 'Content-Type': 'application/json' } });
 describe('TransactionHandler', () => {
  const makeHandler = (onEvent = vi.fn()) => ({
    onEvent,
    handler: new TransactionHandler({ hsToken: 'hs-secret', onEvent }),
  });
  it('rejects a bad hs_token with M_FORBIDDEN', async () => {
    const { handler, onEvent } = makeHandler();
    const res = await handler.handle(
      't1',
      { events: [{ type: 'm.room.message' }] },
      { authorizationHeader: 'Bearer wrong' },
    );
    expect(res.status).toBe(403);
    expect(res.body.errcode).toBe('M_FORBIDDEN');
    expect(onEvent).not.toHaveBeenCalled();
  });
  it('accepts Bearer auth and legacy access_token param', async () => {
    const { handler } = makeHandler();
    expect(
      (await handler.handle('t1', { events: [] }, { authorizationHeader: 'Bearer hs-secret' }))
        .status,
    ).toBe(200);
    expect(
      (await handler.handle('t2', { events: [] }, { accessTokenParam: 'hs-secret' })).status,
    ).toBe(200);
  });
  it('processes events once per txnId (idempotent retries)', async () => {
    const { handler, onEvent } = makeHandler();
    const body = { events: [{ type: 'm.room.message', event_id: '$e1' }] };
    await handler.handle('t1', body, { authorizationHeader: 'Bearer hs-secret' });
    const retry = await handler.handle('t1', body, { authorizationHeader: 'Bearer hs-secret' });
    expect(retry.status).toBe(200);
    expect(onEvent).toHaveBeenCalledTimes(1);
  });
  it('a throwing event handler does not fail the transaction', async () => {
    const onError = vi.fn();
    const handler = new TransactionHandler({
      hsToken: 'hs-secret',
      onEvent: () => {
        throw new Error('boom');
      },
      onError,
    });
    const res = await handler.handle(
      't1',
      { events: [{ type: 'x' }, { type: 'y' }] },
      { authorizationHeader: 'Bearer hs-secret' },
    );
    expect(res.status).toBe(200);
    expect(onError).toHaveBeenCalledTimes(2);
  });
 });
 describe('AppserviceIntent', () => {
  it('derives namespaced user ids and rejects bad slugs', () => {
    const intent = new AppserviceIntent(cfg);
    expect(intent.agentUserId('pi0-web1')).toBe('@agent-pi0-web1:hs.example');
    expect(intent.agentUserId('Pi0-Web1')).toBe('@agent-pi0-web1:hs.example');
    expect(() => intent.agentUserId('../evil')).toThrow();
    expect(() => intent.agentUserId('')).toThrow();
  });
  it('uses uuid transaction ids', async () => {
    const calls: string[] = [];
    const fetchMock = vi.fn(async (input: URL | string) => {
      calls.push(new URL(String(input)).pathname);
      return jsonResponse(200, {});
    });
    const intent = new AppserviceIntent(cfg, fetchMock as unknown as typeof fetch);
    await intent.sendAsAgent({ roomId: '!r:hs.example', agent: 'pi0', body: 'x' });
    const send = calls.find((p) => p.includes('/send/m.room.message/'));
    expect(send).toMatch(/mosaic-as-[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/);
  });
  it('registers once, impersonates via user_id, threads replies', async () => {
    const calls: Array<{ url: URL; init: RequestInit }> = [];
    const fetchMock = vi.fn(async (input: URL | string, init?: RequestInit) => {
      calls.push({ url: new URL(String(input)), init: init ?? {} });
      return jsonResponse(200, { event_id: '$sent' });
    });
    const intent = new AppserviceIntent(cfg, fetchMock as unknown as typeof fetch);
    const eventId = await intent.sendAsAgent({
      roomId: '!room:hs.example',
      agent: 'pi0-web1',
      body: 'hello',
      threadRoot: '$req',
    });
    await intent.sendAsAgent({ roomId: '!room:hs.example', agent: 'pi0-web1', body: 'again' });
    expect(eventId).toBe('$sent');
    const paths = calls.map((c) => c.url.pathname);
    expect(paths.filter((p) => p.endsWith('/register'))).toHaveLength(1); // cached
    expect(paths.filter((p) => p.includes('/join'))).toHaveLength(1); // cached
    const send = calls.find((c) => c.url.pathname.includes('/send/m.room.message/'));
    expect(send).toBeDefined();
    expect(send!.url.searchParams.get('user_id')).toBe('@agent-pi0-web1:hs.example');
    const content = JSON.parse(String(send!.init.body)) as Record<string, unknown>;
    const rel = content['m.relates_to'] as Record<string, unknown>;
    expect(rel.rel_type).toBe('m.thread');
    expect(rel.event_id).toBe('$req');
    expect(rel.is_falling_back).toBe(true);
    expect(
      calls.every(
        (c) => (c.init.headers as Record<string, string>).Authorization === 'Bearer as-secret',
      ),
    ).toBe(true);
  });
  it('tolerates M_USER_IN_USE and surfaces other register errors', async () => {
    const inUse = vi.fn(async () =>
      jsonResponse(400, { errcode: 'M_USER_IN_USE', error: 'taken' }),
    );
    const intent = new AppserviceIntent(cfg, inUse as unknown as typeof fetch);
    await expect(intent.ensureRegistered('pi0-web1')).resolves.toBe('@agent-pi0-web1:hs.example');
    const denied = vi.fn(async () =>
      jsonResponse(401, { errcode: 'M_UNKNOWN_TOKEN', error: 'nope' }),
    );
    const intent2 = new AppserviceIntent(cfg, denied as unknown as typeof fetch);
    await expect(intent2.ensureRegistered('pi0-web1')).rejects.toThrow(MatrixApiError);
  });
  it('invites then joins on M_FORBIDDEN join', async () => {
    const paths: string[] = [];
    const fetchMock = vi.fn(async (input: URL | string) => {
      const url = new URL(String(input));
      paths.push(url.pathname);
      if (url.pathname.endsWith('/join') && paths.filter((p) => p.endsWith('/join')).length === 1) {
        return jsonResponse(403, { errcode: 'M_FORBIDDEN', error: 'not invited' });
      }
      return jsonResponse(200, {});
    });
    const intent = new AppserviceIntent(cfg, fetchMock as unknown as typeof fetch);
    await intent.ensureJoined('!room:hs.example', 'pi0-web1');
    expect(paths.filter((p) => p.endsWith('/invite'))).toHaveLength(1);
    expect(paths.filter((p) => p.endsWith('/join'))).toHaveLength(2);
  });
 });
 describe('registration', () => {
  it('builds an exclusive escaped user namespace', () => {
    const reg = buildRegistration(cfg, { url: 'http://mosaic-as:8008' });
    expect(reg.namespaces.users[0]).toEqual({
      regex: '@agent-.*:hs\\.example',
      exclusive: true,
    });
    expect(reg.rate_limited).toBe(false);
    const yaml = registrationToYaml(reg);
    expect(yaml).toContain("sender_localpart: 'mosaic-as'");
    expect(yaml).toContain("as_token: 'as-secret'");
    expect(yaml).toContain('exclusive: true');
  });
 });
 describe('registration hardening', () => {
  it('rejects control characters in registration values', () => {
    const reg = buildRegistration(
      { ...cfg, asToken: 'abc\nhttp_injected: true' },
      { url: 'http://mosaic-as:8008' },
    );
    expect(() => registrationToYaml(reg)).toThrow(/control characters/);
  });
  it('escapes single quotes in token values', () => {
    const reg = buildRegistration({ ...cfg, asToken: "it's" }, { url: 'http://mosaic-as:8008' });
    expect(registrationToYaml(reg)).toContain("as_token: 'it''s'");
  });
 });
 describe('bridge DTOs', () => {
  it('validates message and typing payloads', () => {
    expect(() =>
      validateBridgeMessage({ room_id: '!r:hs', agent: 'pi0', body: 'x' }),
    ).not.toThrow();
    expect(() => validateBridgeMessage({ room_id: 'bad', agent: 'pi0', body: 'x' })).toThrow();
    expect(() => validateBridgeMessage({ room_id: '!r:hs', agent: '', body: 'x' })).toThrow();
    expect(() => validateBridgeMessage({ room_id: '!r:hs', agent: '../evil', body: 'x' })).toThrow(
      /agent must match/,
    );
    expect(() =>
      validateBridgeTyping({ room_id: '!r:hs', agent: 'pi0', typing: true }),
    ).not.toThrow();
    expect(() => validateBridgeTyping({ room_id: '!r:hs', agent: 'pi0', typing: 'yes' })).toThrow();
  });
 });
 describe('event shape', () => {
  it('transaction events flow through to the handler', async () => {
    const seen: MatrixEvent[] = [];
    const handler = new TransactionHandler({
      hsToken: 'hs-secret',
      onEvent: (e) => void seen.push(e),
    });
    await handler.handle(
      't1',
      {
        events: [
          { type: 'm.room.message', room_id: '!r:hs', sender: '@u:hs', content: { body: 'hi' } },
        ],
      },
      { authorizationHeader: 'Bearer hs-secret' },
    );
    expect(seen).toHaveLength(1);
    expect(seen[0]!.content?.body).toBe('hi');
  });
 });
--- a/packages/appservice/src/bridge.dto.ts
+++ b/packages/appservice/src/bridge.dto.ts
@@ -0,0 +1,83 @@
 /** DTOs for the internal bridge API consumed by agent-comms host daemons. */
 export interface BridgeMessageDto {
  room_id: string;
  /** Agent slug (localpart suffix), e.g. "pi0-web1". */
  agent: string;
  body: string;
  thread_root?: string;
  msgtype?: string;
  /** Optional protocol payload merged into content (e.g. org.uscllc.agent). */
  extra_content?: Record<string, unknown>;
 }
 export interface BridgeTypingDto {
  room_id: string;
  agent: string;
  typing: boolean;
 }
 const AGENT_SLUG_RE = /^[a-z0-9][a-z0-9_.-]*$/;
 const assertAgentSlug = (agent: unknown): void => {
  if (typeof agent !== 'string' || !AGENT_SLUG_RE.test(agent.toLowerCase())) {
    throw new Error('agent must match [a-z0-9][a-z0-9_.-]*');
  }
 };
 export function validateBridgeMessage(input: unknown): asserts input is BridgeMessageDto {
  const o = input as Partial<BridgeMessageDto> | null | undefined;
  if (!o || typeof o !== 'object') throw new Error('payload must be an object');
  if (typeof o.room_id !== 'string' || !o.room_id.startsWith('!'))
    throw new Error('room_id must be a Matrix room id');
  assertAgentSlug(o.agent);
  if (typeof o.body !== 'string') throw new Error('body must be a string');
  if (o.thread_root !== undefined && typeof o.thread_root !== 'string')
    throw new Error('thread_root must be a string');
  if (
    o.extra_content !== undefined &&
    (typeof o.extra_content !== 'object' || o.extra_content === null)
  ) {
    throw new Error('extra_content must be an object');
  }
 }
 export function validateBridgeTyping(input: unknown): asserts input is BridgeTypingDto {
  const o = input as Partial<BridgeTypingDto> | null | undefined;
  if (!o || typeof o !== 'object') throw new Error('payload must be an object');
  if (typeof o.room_id !== 'string' || !o.room_id.startsWith('!'))
    throw new Error('room_id must be a Matrix room id');
  assertAgentSlug(o.agent);
  if (typeof o.typing !== 'boolean') throw new Error('typing must be a boolean');
 }
 export interface ProvisionRoomDto {
  name: string;
  alias?: string;
  topic?: string;
  invite?: string[];
  space_id?: string;
 }
 export function validateProvisionRoom(input: unknown): asserts input is ProvisionRoomDto {
  const o = input as Partial<ProvisionRoomDto> | null | undefined;
  if (!o || typeof o !== 'object') throw new Error('payload must be an object');
  if (typeof o.name !== 'string' || o.name.length === 0) throw new Error('name is required');
  if (o.alias !== undefined && (!/^[a-z0-9_.-]+$/.test(o.alias) || o.alias.length > 200)) {
    throw new Error('alias must match [a-z0-9_.-]+ (max 200 chars)');
  }
  if (o.invite !== undefined) {
    if (
      !Array.isArray(o.invite) ||
      o.invite.some((u) => typeof u !== 'string' || !u.startsWith('@'))
    ) {
      throw new Error('invite must be a list of Matrix user ids');
    }
    if (o.invite.length > 50) {
      throw new Error('invite list exceeds maximum of 50');
    }
  }
  if (o.space_id !== undefined && (typeof o.space_id !== 'string' || !o.space_id.startsWith('!'))) {
    throw new Error('space_id must be a Matrix room id');
  }
 }
--- a/packages/appservice/src/index.ts
+++ b/packages/appservice/src/index.ts
@@ -0,0 +1,19 @@
 export { AppserviceIntent, MatrixApiError } from './intent.js';
 export type { SendMessageOptions } from './intent.js';
 export { TransactionHandler } from './transactions.js';
 export type { TransactionHandlerOptions } from './transactions.js';
 export { buildRegistration, registrationToYaml } from './registration.js';
 export type { RegistrationOptions } from './registration.js';
 export {
  validateBridgeMessage,
  validateBridgeTyping,
  validateProvisionRoom,
 } from './bridge.dto.js';
 export type { BridgeMessageDto, BridgeTypingDto, ProvisionRoomDto } from './bridge.dto.js';
 export type {
  AppserviceConfig,
  EventHandler,
  HandlerResult,
  MatrixEvent,
  Transaction,
 } from './types.js';
--- a/packages/appservice/src/intent.ts
+++ b/packages/appservice/src/intent.ts
@@ -0,0 +1,236 @@
 import crypto from 'node:crypto';
 import type { AppserviceConfig } from './types.js';
 export interface SendMessageOptions {
  roomId: string;
  /** Agent slug, e.g. "pi0-web1" -> @agent-pi0-web1:domain */
  agent: string;
  body: string;
  /** Request event id to thread off (m.thread, spec v1.4). */
  threadRoot?: string;
  msgtype?: string;
  /** Extra content keys merged into the message content (e.g. org.uscllc.agent). */
  extraContent?: Record<string, unknown>;
 }
 export class MatrixApiError extends Error {
  constructor(
    readonly status: number,
    readonly errcode: string | undefined,
    message: string,
  ) {
    super(message);
    this.name = 'MatrixApiError';
  }
 }
 type FetchLike = typeof fetch;
 /**
 * Acts on the homeserver as appservice-namespaced virtual users
 * (Application Service API: as_token auth + user_id impersonation).
 */
 export class AppserviceIntent {
  private readonly registered = new Set<string>();
  private readonly joined = new Set<string>();
  private readonly fetchImpl: FetchLike;
  constructor(
    private readonly cfg: AppserviceConfig,
    fetchImpl?: FetchLike,
  ) {
    this.fetchImpl = fetchImpl ?? fetch;
  }
  get userPrefix(): string {
    return this.cfg.userPrefix ?? 'agent-';
  }
  get senderUserId(): string {
    return `@${this.cfg.senderLocalpart ?? 'mosaic-as'}:${this.cfg.domain}`;
  }
  agentLocalpart(agent: string): string {
    const slug = agent.toLowerCase();
    if (!/^[a-z0-9][a-z0-9_.-]*$/.test(slug)) {
      throw new Error(`invalid agent slug: ${agent}`);
    }
    return `${this.userPrefix}${slug}`;
  }
  agentUserId(agent: string): string {
    return `@${this.agentLocalpart(agent)}:${this.cfg.domain}`;
  }
  private async request(
    method: string,
    path: string,
    options: { userId?: string; body?: unknown } = {},
  ): Promise<Record<string, unknown>> {
    const url = new URL(this.cfg.homeserverUrl.replace(/\/$/, '') + path);
    if (options.userId) {
      url.searchParams.set('user_id', options.userId);
    }
    const res = await this.fetchImpl(url, {
      method,
      headers: {
        Authorization: `Bearer ${this.cfg.asToken}`,
        'Content-Type': 'application/json',
      },
      body: options.body === undefined ? undefined : JSON.stringify(options.body),
    });
    const text = await res.text();
    const data = (text ? JSON.parse(text) : {}) as Record<string, unknown>;
    if (!res.ok) {
      throw new MatrixApiError(
        res.status,
        typeof data.errcode === 'string' ? data.errcode : undefined,
        `${method} ${path} -> ${res.status}: ${text.slice(0, 300)}`,
      );
    }
    return data;
  }
  /** Register the virtual user if it does not exist yet. Idempotent. */
  async ensureRegistered(agent: string): Promise<string> {
    const localpart = this.agentLocalpart(agent);
    const userId = this.agentUserId(agent);
    if (this.registered.has(userId)) return userId;
    try {
      await this.request('POST', '/_matrix/client/v3/register', {
        body: { type: 'm.login.application_service', username: localpart },
      });
    } catch (err) {
      if (!(err instanceof MatrixApiError && err.errcode === 'M_USER_IN_USE')) {
        throw err;
      }
    }
    this.registered.add(userId);
    return userId;
  }
  /** Join the agent to a room; on invite-only rooms the AS sender invites first. */
  async ensureJoined(roomId: string, agent: string): Promise<void> {
    const userId = await this.ensureRegistered(agent);
    const key = `${userId} ${roomId}`;
    if (this.joined.has(key)) return;
    const room = encodeURIComponent(roomId);
    try {
      await this.request('POST', `/_matrix/client/v3/rooms/${room}/join`, { userId, body: {} });
    } catch (err) {
      if (!(err instanceof MatrixApiError && err.errcode === 'M_FORBIDDEN')) throw err;
      await this.request('POST', `/_matrix/client/v3/rooms/${room}/invite`, {
        userId: this.senderUserId,
        body: { user_id: userId },
      });
      await this.request('POST', `/_matrix/client/v3/rooms/${room}/join`, { userId, body: {} });
    }
    this.joined.add(key);
  }
  /** Send a message AS the agent's virtual user. */
  async sendAsAgent(options: SendMessageOptions): Promise<string | undefined> {
    const userId = this.agentUserId(options.agent);
    await this.ensureJoined(options.roomId, options.agent);
    const content: Record<string, unknown> = {
      msgtype: options.msgtype ?? 'm.text',
      body: options.body,
      ...options.extraContent,
    };
    if (options.threadRoot) {
      content['m.relates_to'] = {
        rel_type: 'm.thread',
        event_id: options.threadRoot,
        is_falling_back: true,
        'm.in_reply_to': { event_id: options.threadRoot },
      };
    }
    const txn = `mosaic-as-${crypto.randomUUID()}`;
    const room = encodeURIComponent(options.roomId);
    const res = await this.request(
      'PUT',
      `/_matrix/client/v3/rooms/${room}/send/m.room.message/${txn}`,
      { userId, body: content },
    );
    return typeof res.event_id === 'string' ? res.event_id : undefined;
  }
  /** Set the agent's typing indicator in a room. */
  async setTyping(
    roomId: string,
    agent: string,
    typing: boolean,
    timeoutMs = 30000,
  ): Promise<void> {
    const userId = await this.ensureRegistered(agent);
    const room = encodeURIComponent(roomId);
    const user = encodeURIComponent(userId);
    await this.request('PUT', `/_matrix/client/v3/rooms/${room}/typing/${user}`, {
      userId,
      body: typing ? { typing: true, timeout: timeoutMs } : { typing: false },
    });
  }
  /** Create a room as the AS sender: agents get PL 50 by namespace via the
   * sender (PL 100); humans invited at default PL. Optionally link into a
   * space (m.space.child + m.space.parent). Returns the room id. */
  async createRoom(options: {
    name: string;
    alias?: string;
    topic?: string;
    invite?: string[];
    spaceId?: string;
  }): Promise<{ roomId: string; spaceLinked: boolean; spaceError?: string }> {
    const body: Record<string, unknown> = {
      name: options.name,
      preset: 'private_chat',
      invite: options.invite ?? [],
      power_level_content_override: {
        users: { [this.senderUserId]: 100 },
        // state_default 50 stays; the AS sender can grant agents as needed.
      },
    };
    if (options.alias) body.room_alias_name = options.alias;
    if (options.topic) body.topic = options.topic;
    const res = await this.request('POST', '/_matrix/client/v3/createRoom', {
      userId: this.senderUserId,
      body,
    });
    const roomId = res.room_id;
    if (typeof roomId !== 'string') throw new Error('createRoom returned no room_id');
    if (!options.spaceId) {
      return { roomId, spaceLinked: false };
    }
    // Space-link failures must NOT throw: the room already exists, and an
    // exception would hide the room_id (orphaned room, no recovery path).
    const encodedSpaceId = encodeURIComponent(options.spaceId);
    const encodedRoomId = encodeURIComponent(roomId);
    try {
      await this.request(
        'PUT',
        `/_matrix/client/v3/rooms/${encodedSpaceId}/state/m.space.child/${encodedRoomId}`,
        { userId: this.senderUserId, body: { via: [this.cfg.domain], suggested: true } },
      );
      await this.request(
        'PUT',
        `/_matrix/client/v3/rooms/${encodedRoomId}/state/m.space.parent/${encodedSpaceId}`,
        { userId: this.senderUserId, body: { via: [this.cfg.domain], canonical: true } },
      );
    } catch (error) {
      const message = error instanceof Error ? error.message : String(error);
      return { roomId, spaceLinked: false, spaceError: message };
    }
    return { roomId, spaceLinked: true };
  }
  /** Set display name for an agent's virtual user. */
  async setDisplayName(agent: string, displayName: string): Promise<void> {
    const userId = await this.ensureRegistered(agent);
    const user = encodeURIComponent(userId);
    await this.request('PUT', `/_matrix/client/v3/profile/${user}/displayname`, {
      userId,
      body: { displayname: displayName },
    });
  }
 }
--- a/packages/appservice/src/registration.ts
+++ b/packages/appservice/src/registration.ts
@@ -0,0 +1,76 @@
 import type { AppserviceConfig } from './types.js';
 export interface RegistrationOptions {
  /** Unique appservice id in Synapse. Default: "mosaic-as". */
  id?: string;
  /** URL where Synapse reaches the appservice, e.g. http://mosaic-as:8008 */
  url: string;
  /** Alias namespace regex prefix. Default: "#mosaic-". */
  aliasPrefix?: string;
 }
 const escapeRegex = (value: string): string => value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
 /**
 * Build the Synapse appservice registration document (mosaic-as.yaml).
 * Deployment (infrastructure repo) serializes this to YAML and mounts it via
 * app_service_config_files.
 */
 export function buildRegistration(cfg: AppserviceConfig, options: RegistrationOptions) {
  const prefix = cfg.userPrefix ?? 'agent-';
  return {
    id: options.id ?? 'mosaic-as',
    url: options.url,
    as_token: cfg.asToken,
    hs_token: cfg.hsToken,
    sender_localpart: cfg.senderLocalpart ?? 'mosaic-as',
    rate_limited: false,
    namespaces: {
      users: [
        {
          regex: `@${escapeRegex(prefix)}.*:${escapeRegex(cfg.domain)}`,
          exclusive: true,
        },
      ],
      aliases: [
        {
          regex: `${escapeRegex(options.aliasPrefix ?? '#mosaic-')}.*:${escapeRegex(cfg.domain)}`,
          exclusive: false,
        },
      ],
      rooms: [],
    },
  };
 }
 const assertYamlSafe = (field: string, value: string): string => {
  // Tokens/urls/ids are single-line opaque strings; control characters would
  // let a crafted value terminate the scalar and inject YAML keys.
  if (/[\r\n\x00-\x08\x0b-\x1f]/.test(value)) {
    throw new Error(`registration field ${field} contains control characters`);
  }
  return value.replace(/'/g, "''");
 };
 /** Minimal YAML serialization for the flat registration document. */
 export function registrationToYaml(registration: ReturnType<typeof buildRegistration>): string {
  const ns = registration.namespaces;
  const nsBlock = (entries: Array<{ regex: string; exclusive: boolean }>): string =>
    entries.length === 0
      ? ' []'
      : '\n' +
        entries.map((e) => `    - regex: '${e.regex}'\n      exclusive: ${e.exclusive}`).join('\n');
  return [
    `id: '${assertYamlSafe('id', registration.id)}'`,
    `url: '${assertYamlSafe('url', registration.url)}'`,
    `as_token: '${assertYamlSafe('as_token', registration.as_token)}'`,
    `hs_token: '${assertYamlSafe('hs_token', registration.hs_token)}'`,
    `sender_localpart: '${assertYamlSafe('sender_localpart', registration.sender_localpart)}'`,
    `rate_limited: ${registration.rate_limited}`,
    'namespaces:',
    `  users:${nsBlock(ns.users)}`,
    `  aliases:${nsBlock(ns.aliases)}`,
    `  rooms:${nsBlock(ns.rooms)}`,
    '',
  ].join('\n');
 }
--- a/packages/appservice/src/transactions.ts
+++ b/packages/appservice/src/transactions.ts
@@ -0,0 +1,89 @@
 import { timingSafeEqual } from 'node:crypto';
 import type { EventHandler, HandlerResult, Transaction } from './types.js';
 const MAX_SEEN_TXN_IDS = 1000;
 function safeTokenCompare(presented: string | undefined, expected: string): boolean {
  if (presented === undefined) return false;
  const a = Buffer.from(presented);
  const b = Buffer.from(expected);
  if (a.length !== b.length) {
    // Compare against a same-length dummy so length is not a timing oracle.
    timingSafeEqual(a, Buffer.alloc(a.length));
    return false;
  }
  return timingSafeEqual(a, b);
 }
 export interface TransactionHandlerOptions {
  hsToken: string;
  onEvent: EventHandler;
  /** Called for handler errors; events are at-most-once, errors must not 500. */
  onError?: (error: unknown, txnId: string) => void;
 }
 /**
 * Framework-agnostic handler for the Application Service transactions API
 * (PUT /_matrix/app/v1/transactions/{txnId}). Host apps (Fastify/Nest) wrap
 * this in a route.
 *
 * Spec requirements covered: hs_token verification (Authorization: Bearer,
 * with legacy ?access_token fallback), txnId idempotency, always-200 on
 * accepted transactions (homeserver retries on any other status).
 *
 * KNOWN LIMITATION: the txnId dedupe ring is in-process memory only. After a
 * restart the homeserver may redeliver pending transactions — event handlers
 * must be idempotent (delivery is at-least-once across process lifetimes).
 */
 export class TransactionHandler {
  private readonly seen: string[] = [];
  private readonly seenSet = new Set<string>();
  constructor(private readonly options: TransactionHandlerOptions) {}
  authorized(
    authorizationHeader: string | undefined,
    accessTokenParam: string | undefined,
  ): boolean {
    const bearer = authorizationHeader?.startsWith('Bearer ')
      ? authorizationHeader.slice('Bearer '.length)
      : undefined;
    const presented = bearer ?? accessTokenParam;
    return safeTokenCompare(presented, this.options.hsToken);
  }
  async handle(
    txnId: string,
    body: unknown,
    auth: { authorizationHeader?: string; accessTokenParam?: string },
  ): Promise<HandlerResult> {
    if (!this.authorized(auth.authorizationHeader, auth.accessTokenParam)) {
      return { status: 403, body: { errcode: 'M_FORBIDDEN', error: 'bad hs_token' } };
    }
    if (this.seenSet.has(txnId)) {
      return { status: 200, body: {} };
    }
    this.markSeen(txnId);
    const txn = (body ?? {}) as Partial<Transaction>;
    for (const event of txn.events ?? []) {
      try {
        await this.options.onEvent(event);
      } catch (error) {
        // A failing handler must not fail the transaction: the homeserver
        // would retry the whole batch forever.
        this.options.onError?.(error, txnId);
      }
    }
    return { status: 200, body: {} };
  }
  private markSeen(txnId: string): void {
    this.seen.push(txnId);
    this.seenSet.add(txnId);
    while (this.seen.length > MAX_SEEN_TXN_IDS) {
      const evicted = this.seen.shift();
      if (evicted !== undefined) this.seenSet.delete(evicted);
    }
  }
 }
--- a/packages/appservice/src/types.ts
+++ b/packages/appservice/src/types.ts
@@ -0,0 +1,35 @@
 export interface AppserviceConfig {
  /** Homeserver client-server API base, e.g. https://chat.uscllc.com */
  homeserverUrl: string;
  /** Server name used in user IDs, e.g. chat.uscllc.com */
  domain: string;
  /** Token the appservice presents to the homeserver (as_token). */
  asToken: string;
  /** Token the homeserver presents to the appservice (hs_token). */
  hsToken: string;
  /** Localpart prefix owned by this appservice. Default: "agent-". */
  userPrefix?: string;
  /** The appservice's own sender user localpart. Default: "mosaic-as". */
  senderLocalpart?: string;
 }
 export interface MatrixEvent {
  type: string;
  event_id?: string;
  room_id?: string;
  sender?: string;
  state_key?: string;
  content?: Record<string, unknown>;
  origin_server_ts?: number;
 }
 export interface Transaction {
  events: MatrixEvent[];
 }
 export type EventHandler = (event: MatrixEvent) => void | Promise<void>;
 export interface HandlerResult {
  status: number;
  body: Record<string, unknown>;
 }
--- a/packages/appservice/tsconfig.json
+++ b/packages/appservice/tsconfig.json
@@ -0,0 +1,9 @@
 {
  "extends": "../../tsconfig.base.json",
  "compilerOptions": {
    "outDir": "dist",
    "rootDir": "src"
  },
  "include": ["src/**/*"],
  "exclude": ["node_modules", "dist"]
 }
--- a/packages/auth/src/index.ts
+++ b/packages/auth/src/index.ts
@@ -10,3 +10,4 @@ export {
  type SsoTeamSyncConfig,
  type SupportedSsoProviderId,
 } from './sso.js';
 export { seal, unseal } from './seal.js';
--- a/packages/auth/src/seal.ts
+++ b/packages/auth/src/seal.ts
@@ -0,0 +1,52 @@
 import { createCipheriv, createDecipheriv, createHash, randomBytes } from 'node:crypto';
 const ALGORITHM = 'aes-256-gcm';
 const IV_LENGTH = 12; // 96-bit IV for GCM
 const TAG_LENGTH = 16; // 128-bit auth tag
 /**
 * Derive a 32-byte AES-256 key from BETTER_AUTH_SECRET using SHA-256.
 * Throws if BETTER_AUTH_SECRET is not set.
 */
 function deriveKey(): Buffer {
  const secret = process.env['BETTER_AUTH_SECRET'];
  if (!secret) {
    throw new Error('BETTER_AUTH_SECRET is not set — cannot derive encryption key');
  }
  return createHash('sha256').update(secret).digest();
 }
 /**
 * Seal a plaintext string using AES-256-GCM.
 * Output format: base64(IV || authTag || ciphertext)
 */
 export function seal(plaintext: string): string {
  const key = deriveKey();
  const iv = randomBytes(IV_LENGTH);
  const cipher = createCipheriv(ALGORITHM, key, iv);
  const encrypted = Buffer.concat([cipher.update(plaintext, 'utf8'), cipher.final()]);
  const authTag = cipher.getAuthTag();
  const combined = Buffer.concat([iv, authTag, encrypted]);
  return combined.toString('base64');
 }
 /**
 * Unseal a value sealed by `seal()`.
 * Throws on authentication failure (tampered data) or if BETTER_AUTH_SECRET is unset.
 */
 export function unseal(encoded: string): string {
  const key = deriveKey();
  const combined = Buffer.from(encoded, 'base64');
  const iv = combined.subarray(0, IV_LENGTH);
  const authTag = combined.subarray(IV_LENGTH, IV_LENGTH + TAG_LENGTH);
  const ciphertext = combined.subarray(IV_LENGTH + TAG_LENGTH);
  const decipher = createDecipheriv(ALGORITHM, key, iv);
  decipher.setAuthTag(authTag);
  const decrypted = Buffer.concat([decipher.update(ciphertext), decipher.final()]);
  return decrypted.toString('utf8');
 }
--- a/packages/db/drizzle/0008_smart_lyja.sql
+++ b/packages/db/drizzle/0008_smart_lyja.sql
@@ -0,0 +1,75 @@
 CREATE TYPE "public"."grant_status" AS ENUM('active', 'revoked', 'expired');--> statement-breakpoint
 CREATE TYPE "public"."peer_state" AS ENUM('pending', 'active', 'suspended', 'revoked');--> statement-breakpoint
 CREATE TABLE "admin_tokens" (
 	"id" text PRIMARY KEY NOT NULL,
 	"user_id" text NOT NULL,
 	"token_hash" text NOT NULL,
 	"label" text NOT NULL,
 	"scope" text DEFAULT 'admin' NOT NULL,
 	"expires_at" timestamp with time zone,
 	"last_used_at" timestamp with time zone,
 	"created_at" timestamp with time zone DEFAULT now() NOT NULL
 );
 --> statement-breakpoint
 CREATE TABLE "federation_audit_log" (
 	"id" uuid PRIMARY KEY DEFAULT gen_random_uuid() NOT NULL,
 	"request_id" text NOT NULL,
 	"peer_id" uuid,
 	"subject_user_id" text,
 	"grant_id" uuid,
 	"verb" text NOT NULL,
 	"resource" text NOT NULL,
 	"status_code" integer NOT NULL,
 	"result_count" integer,
 	"denied_reason" text,
 	"latency_ms" integer,
 	"created_at" timestamp with time zone DEFAULT now() NOT NULL,
 	"query_hash" text,
 	"outcome" text,
 	"bytes_out" integer
 );
 --> statement-breakpoint
 CREATE TABLE "federation_grants" (
 	"id" uuid PRIMARY KEY DEFAULT gen_random_uuid() NOT NULL,
 	"subject_user_id" text NOT NULL,
 	"peer_id" uuid NOT NULL,
 	"scope" jsonb NOT NULL,
 	"status" "grant_status" DEFAULT 'active' NOT NULL,
 	"expires_at" timestamp with time zone,
 	"created_at" timestamp with time zone DEFAULT now() NOT NULL,
 	"revoked_at" timestamp with time zone,
 	"revoked_reason" text
 );
 --> statement-breakpoint
 CREATE TABLE "federation_peers" (
 	"id" uuid PRIMARY KEY DEFAULT gen_random_uuid() NOT NULL,
 	"common_name" text NOT NULL,
 	"display_name" text NOT NULL,
 	"cert_pem" text NOT NULL,
 	"cert_serial" text NOT NULL,
 	"cert_not_after" timestamp with time zone NOT NULL,
 	"client_key_pem" text,
 	"state" "peer_state" DEFAULT 'pending' NOT NULL,
 	"endpoint_url" text,
 	"last_seen_at" timestamp with time zone,
 	"created_at" timestamp with time zone DEFAULT now() NOT NULL,
 	"revoked_at" timestamp with time zone,
 	CONSTRAINT "federation_peers_common_name_unique" UNIQUE("common_name"),
 	CONSTRAINT "federation_peers_cert_serial_unique" UNIQUE("cert_serial")
 );
 --> statement-breakpoint
 ALTER TABLE "admin_tokens" ADD CONSTRAINT "admin_tokens_user_id_users_id_fk" FOREIGN KEY ("user_id") REFERENCES "public"."users"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint
 ALTER TABLE "federation_audit_log" ADD CONSTRAINT "federation_audit_log_peer_id_federation_peers_id_fk" FOREIGN KEY ("peer_id") REFERENCES "public"."federation_peers"("id") ON DELETE set null ON UPDATE no action;--> statement-breakpoint
 ALTER TABLE "federation_audit_log" ADD CONSTRAINT "federation_audit_log_subject_user_id_users_id_fk" FOREIGN KEY ("subject_user_id") REFERENCES "public"."users"("id") ON DELETE set null ON UPDATE no action;--> statement-breakpoint
 ALTER TABLE "federation_audit_log" ADD CONSTRAINT "federation_audit_log_grant_id_federation_grants_id_fk" FOREIGN KEY ("grant_id") REFERENCES "public"."federation_grants"("id") ON DELETE set null ON UPDATE no action;--> statement-breakpoint
 ALTER TABLE "federation_grants" ADD CONSTRAINT "federation_grants_subject_user_id_users_id_fk" FOREIGN KEY ("subject_user_id") REFERENCES "public"."users"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint
 ALTER TABLE "federation_grants" ADD CONSTRAINT "federation_grants_peer_id_federation_peers_id_fk" FOREIGN KEY ("peer_id") REFERENCES "public"."federation_peers"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint
 CREATE INDEX "admin_tokens_user_id_idx" ON "admin_tokens" USING btree ("user_id");--> statement-breakpoint
 CREATE UNIQUE INDEX "admin_tokens_hash_idx" ON "admin_tokens" USING btree ("token_hash");--> statement-breakpoint
 CREATE INDEX "federation_audit_log_peer_created_at_idx" ON "federation_audit_log" USING btree ("peer_id","created_at" DESC NULLS LAST);--> statement-breakpoint
 CREATE INDEX "federation_audit_log_subject_created_at_idx" ON "federation_audit_log" USING btree ("subject_user_id","created_at" DESC NULLS LAST);--> statement-breakpoint
 CREATE INDEX "federation_audit_log_created_at_idx" ON "federation_audit_log" USING btree ("created_at" DESC NULLS LAST);--> statement-breakpoint
 CREATE INDEX "federation_grants_subject_status_idx" ON "federation_grants" USING btree ("subject_user_id","status");--> statement-breakpoint
 CREATE INDEX "federation_grants_peer_status_idx" ON "federation_grants" USING btree ("peer_id","status");--> statement-breakpoint
 CREATE INDEX "federation_peers_cert_serial_idx" ON "federation_peers" USING btree ("cert_serial");--> statement-breakpoint
 CREATE INDEX "federation_peers_state_idx" ON "federation_peers" USING btree ("state");
--- a/packages/db/drizzle/0009_federation_grant_pending.sql
+++ b/packages/db/drizzle/0009_federation_grant_pending.sql
@@ -0,0 +1,2 @@
 ALTER TYPE "public"."grant_status" ADD VALUE 'pending' BEFORE 'active';--> statement-breakpoint
 ALTER TABLE "federation_grants" ALTER COLUMN "status" SET DEFAULT 'pending';
--- a/packages/db/drizzle/0010_federation_enrollment_tokens.sql
+++ b/packages/db/drizzle/0010_federation_enrollment_tokens.sql
@@ -0,0 +1,11 @@
 CREATE TABLE "federation_enrollment_tokens" (
 	"token" text PRIMARY KEY NOT NULL,
 	"grant_id" uuid NOT NULL,
 	"peer_id" uuid NOT NULL,
 	"expires_at" timestamp with time zone NOT NULL,
 	"used_at" timestamp with time zone,
 	"created_at" timestamp with time zone DEFAULT now() NOT NULL
 );
 --> statement-breakpoint
 ALTER TABLE "federation_enrollment_tokens" ADD CONSTRAINT "federation_enrollment_tokens_grant_id_federation_grants_id_fk" FOREIGN KEY ("grant_id") REFERENCES "public"."federation_grants"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint
 ALTER TABLE "federation_enrollment_tokens" ADD CONSTRAINT "federation_enrollment_tokens_peer_id_federation_peers_id_fk" FOREIGN KEY ("peer_id") REFERENCES "public"."federation_peers"("id") ON DELETE cascade ON UPDATE no action;
--- a/packages/db/drizzle/meta/0008_snapshot.json
+++ b/packages/db/drizzle/meta/0008_snapshot.json
--- a/packages/db/drizzle/meta/0009_snapshot.json
+++ b/packages/db/drizzle/meta/0009_snapshot.json
--- a/packages/db/drizzle/meta/0010_snapshot.json
+++ b/packages/db/drizzle/meta/0010_snapshot.json
--- a/packages/db/drizzle/meta/_journal.json
+++ b/packages/db/drizzle/meta/_journal.json
@@ -57,6 +57,27 @@
      "when": 1774227064500,
      "tag": "0006_swift_shen",
      "breakpoints": true
    },
    {
      "idx": 8,
      "version": "7",
      "when": 1776822435828,
      "tag": "0008_smart_lyja",
      "breakpoints": true
    },
    {
      "idx": 9,
      "version": "7",
      "when": 1745280000000,
      "tag": "0009_federation_grant_pending",
      "breakpoints": true
    },
    {
      "idx": 10,
      "version": "7",
      "when": 1745366400000,
      "tag": "0010_federation_enrollment_tokens",
      "breakpoints": true
    }
  ]
 }
--- a/packages/db/package.json
+++ b/packages/db/package.json
@@ -42,6 +42,7 @@
    "access": "public"
  },
  "files": [
-    "dist"
+    "dist",
    "drizzle"
  ]
 }
--- a/packages/db/src/client-pglite.ts
+++ b/packages/db/src/client-pglite.ts
@@ -1,10 +1,12 @@
 import { PGlite } from '@electric-sql/pglite';
 import { vector } from '@electric-sql/pglite/vector';
 import { drizzle } from 'drizzle-orm/pglite';
 import * as schema from './schema.js';
 import type { DbHandle } from './client.js';
 export function createPgliteDb(dataDir: string): DbHandle {
-  const client = new PGlite(dataDir);
+  // pgvector extension is required by migration 0001 (insights.embedding column).
  const client = new PGlite(dataDir, { extensions: { vector } });
  const db = drizzle(client, { schema });
  return {
    db: db as unknown as DbHandle['db'],
--- a/packages/db/src/federation.integration.test.ts
+++ b/packages/db/src/federation.integration.test.ts
@@ -0,0 +1,424 @@
 /**
 * FED-M2-01 — Integration test: federation DB schema (peers / grants / audit_log).
 *
 * Prereq: docker compose -f docker-compose.federated.yml --profile federated up -d
 *         (or any postgres with the mosaic schema already applied)
 * Run:    FEDERATED_INTEGRATION=1 pnpm --filter @mosaicstack/db test src/federation.integration.test.ts
 *
 * Skipped when FEDERATED_INTEGRATION !== '1'.
 *
 * Strategy:
 *  - Applies the federation migration SQL directly (idempotent: CREATE TYPE/TABLE
 *    with IF NOT EXISTS guards applied via inline SQL before the migration DDL).
 *  - Assumes the base schema (users table etc.) already exists in the target DB.
 *  - All test rows use the `fed-m2-01-` prefix; cleanup in afterAll.
 *
 * Coverage:
 *  1. Federation tables + enums apply cleanly against the existing schema.
 *  2. Insert a sample user + peer + grant + audit row; verify round-trip.
 *  3. FK cascade: deleting the user cascades to federation_grants.
 *  4. FK set-null: deleting the peer sets federation_audit_log.peer_id to NULL.
 *  5. Enum constraint: inserting an invalid status/state value throws a DB error.
 *  6. Unique constraint: duplicate cert_serial throws a DB error.
 */
 import postgres from 'postgres';
 import { afterAll, beforeAll, describe, expect, it } from 'vitest';
 const run = process.env['FEDERATED_INTEGRATION'] === '1';
 const PG_URL = process.env['DATABASE_URL'] ?? 'postgresql://mosaic:mosaic@localhost:5433/mosaic';
 /** Recognisable test-row prefix for safe cleanup without full-table truncation. */
 const T = 'fed-m2-01';
 // Deterministic IDs (UUID format required for uuid PK columns: 8-4-4-4-12 hex digits).
 const PEER1_ID = `f2000001-0000-4000-8000-000000000001`;
 const PEER2_ID = `f2000002-0000-4000-8000-000000000002`;
 const USER1_ID = `${T}-user-1`;
 let sql: ReturnType<typeof postgres> | undefined;
 beforeAll(async () => {
  if (!run) return;
  sql = postgres(PG_URL, { max: 1, connect_timeout: 10, idle_timeout: 10 });
  // Apply the federation enums and tables idempotently.
  // This mirrors the migration file but uses IF NOT EXISTS guards so it can run
  // against a DB that may not have had drizzle migrations tracked.
  await sql`
    DO $$ BEGIN
      CREATE TYPE peer_state AS ENUM ('pending', 'active', 'suspended', 'revoked');
    EXCEPTION WHEN duplicate_object THEN NULL;
    END $$
  `;
  await sql`
    DO $$ BEGIN
      CREATE TYPE grant_status AS ENUM ('active', 'revoked', 'expired');
    EXCEPTION WHEN duplicate_object THEN NULL;
    END $$
  `;
  await sql`
    CREATE TABLE IF NOT EXISTS federation_peers (
      id              uuid PRIMARY KEY DEFAULT gen_random_uuid(),
      common_name     text NOT NULL,
      display_name    text NOT NULL,
      cert_pem        text NOT NULL,
      cert_serial     text NOT NULL,
      cert_not_after  timestamp with time zone NOT NULL,
      client_key_pem  text,
      state           peer_state NOT NULL DEFAULT 'pending',
      endpoint_url    text,
      last_seen_at    timestamp with time zone,
      created_at      timestamp with time zone NOT NULL DEFAULT now(),
      revoked_at      timestamp with time zone,
      CONSTRAINT federation_peers_common_name_unique UNIQUE (common_name),
      CONSTRAINT federation_peers_cert_serial_unique UNIQUE (cert_serial)
    )
  `;
  await sql`
    CREATE INDEX IF NOT EXISTS federation_peers_cert_serial_idx ON federation_peers (cert_serial)
  `;
  await sql`
    CREATE INDEX IF NOT EXISTS federation_peers_state_idx ON federation_peers (state)
  `;
  await sql`
    CREATE TABLE IF NOT EXISTS federation_grants (
      id               uuid PRIMARY KEY DEFAULT gen_random_uuid(),
      subject_user_id  text NOT NULL REFERENCES users(id) ON DELETE CASCADE,
      peer_id          uuid NOT NULL REFERENCES federation_peers(id) ON DELETE CASCADE,
      scope            jsonb NOT NULL,
      status           grant_status NOT NULL DEFAULT 'active',
      expires_at       timestamp with time zone,
      created_at       timestamp with time zone NOT NULL DEFAULT now(),
      revoked_at       timestamp with time zone,
      revoked_reason   text
    )
  `;
  await sql`
    CREATE INDEX IF NOT EXISTS federation_grants_subject_status_idx ON federation_grants (subject_user_id, status)
  `;
  await sql`
    CREATE INDEX IF NOT EXISTS federation_grants_peer_status_idx ON federation_grants (peer_id, status)
  `;
  await sql`
    CREATE TABLE IF NOT EXISTS federation_audit_log (
      id               uuid PRIMARY KEY DEFAULT gen_random_uuid(),
      request_id       text NOT NULL,
      peer_id          uuid REFERENCES federation_peers(id) ON DELETE SET NULL,
      subject_user_id  text REFERENCES users(id) ON DELETE SET NULL,
      grant_id         uuid REFERENCES federation_grants(id) ON DELETE SET NULL,
      verb             text NOT NULL,
      resource         text NOT NULL,
      status_code      integer NOT NULL,
      result_count     integer,
      denied_reason    text,
      latency_ms       integer,
      created_at       timestamp with time zone NOT NULL DEFAULT now(),
      query_hash       text,
      outcome          text,
      bytes_out        integer
    )
  `;
  await sql`
    CREATE INDEX IF NOT EXISTS federation_audit_log_peer_created_at_idx
      ON federation_audit_log (peer_id, created_at DESC NULLS LAST)
  `;
  await sql`
    CREATE INDEX IF NOT EXISTS federation_audit_log_subject_created_at_idx
      ON federation_audit_log (subject_user_id, created_at DESC NULLS LAST)
  `;
  await sql`
    CREATE INDEX IF NOT EXISTS federation_audit_log_created_at_idx
      ON federation_audit_log (created_at DESC NULLS LAST)
  `;
 });
 afterAll(async () => {
  if (!sql) return;
  // Cleanup in FK-safe order (children before parents).
  await sql`DELETE FROM federation_audit_log WHERE request_id LIKE ${T + '%'}`.catch(() => {});
  await sql`
    DELETE FROM federation_grants
    WHERE subject_user_id LIKE ${T + '%'}
       OR revoked_reason LIKE ${T + '%'}
  `.catch(() => {});
  await sql`DELETE FROM federation_peers WHERE common_name LIKE ${T + '%'}`.catch(() => {});
  await sql`DELETE FROM users WHERE id LIKE ${T + '%'}`.catch(() => {});
  await sql.end({ timeout: 3 }).catch(() => {});
 });
 describe.skipIf(!run)('federation schema — integration', () => {
  // ── 1. Insert sample rows ──────────────────────────────────────────────────
  it('inserts a user, peer, grant, and audit row without constraint violation', async () => {
    const certPem = '-----BEGIN CERTIFICATE-----\nMIItest\n-----END CERTIFICATE-----';
    // User — BetterAuth users.id is text (any string, not uuid).
    await sql!`
      INSERT INTO users (id, name, email, email_verified, created_at, updated_at)
      VALUES (${USER1_ID}, ${'M2-01 Test User'}, ${USER1_ID + '@example.com'}, false, now(), now())
      ON CONFLICT (id) DO NOTHING
    `;
    // Peer
    await sql!`
      INSERT INTO federation_peers
        (id, common_name, display_name, cert_pem, cert_serial, cert_not_after, state, created_at)
      VALUES (
        ${PEER1_ID},
        ${T + '-gateway-example-com'},
        ${'Test Peer'},
        ${certPem},
        ${T + '-serial-001'},
        now() + interval '1 year',
        ${'active'},
        now()
      )
      ON CONFLICT (id) DO NOTHING
    `;
    // Grant — scope is jsonb; pass as JSON string and cast server-side.
    const scopeJson = JSON.stringify({
      resources: ['tasks', 'notes'],
      operations: ['list', 'get'],
    });
    const grants = await sql!`
      INSERT INTO federation_grants
        (subject_user_id, peer_id, scope, status, created_at)
      VALUES (
        ${USER1_ID},
        ${PEER1_ID},
        ${scopeJson}::jsonb,
        ${'active'},
        now()
      )
      RETURNING id
    `;
    expect(grants).toHaveLength(1);
    const grantId = grants[0]!['id'] as string;
    // Audit log row
    await sql!`
      INSERT INTO federation_audit_log
        (request_id, peer_id, subject_user_id, grant_id, verb, resource, status_code, created_at)
      VALUES (
        ${T + '-req-001'},
        ${PEER1_ID},
        ${USER1_ID},
        ${grantId},
        ${'list'},
        ${'tasks'},
        ${200},
        now()
      )
    `;
    // Verify the audit row is present and has correct data.
    const auditRows = await sql!`
      SELECT * FROM federation_audit_log WHERE request_id = ${T + '-req-001'}
    `;
    expect(auditRows).toHaveLength(1);
    expect(auditRows[0]!['status_code']).toBe(200);
    expect(auditRows[0]!['verb']).toBe('list');
    expect(auditRows[0]!['resource']).toBe('tasks');
  }, 30_000);
  // ── 2. FK cascade: user delete cascades grants ─────────────────────────────
  it('cascade-deletes federation_grants when the subject user is deleted', async () => {
    const cascadeUserId = `${T}-cascade-user`;
    await sql!`
      INSERT INTO users (id, name, email, email_verified, created_at, updated_at)
      VALUES (${cascadeUserId}, ${'Cascade User'}, ${cascadeUserId + '@example.com'}, false, now(), now())
      ON CONFLICT (id) DO NOTHING
    `;
    const scopeJson = JSON.stringify({ resources: ['tasks'] });
    await sql!`
      INSERT INTO federation_grants
        (subject_user_id, peer_id, scope, status, revoked_reason, created_at)
      VALUES (
        ${cascadeUserId},
        ${PEER1_ID},
        ${scopeJson}::jsonb,
        ${'active'},
        ${T + '-cascade-test'},
        now()
      )
    `;
    const before = await sql!`
      SELECT count(*)::int AS cnt FROM federation_grants WHERE subject_user_id = ${cascadeUserId}
    `;
    expect(before[0]!['cnt']).toBe(1);
    // Delete user → grants should cascade-delete.
    await sql!`DELETE FROM users WHERE id = ${cascadeUserId}`;
    const after = await sql!`
      SELECT count(*)::int AS cnt FROM federation_grants WHERE subject_user_id = ${cascadeUserId}
    `;
    expect(after[0]!['cnt']).toBe(0);
  }, 15_000);
  // ── 3. FK set-null: peer delete sets audit_log.peer_id to NULL ────────────
  it('sets federation_audit_log.peer_id to NULL when the peer is deleted', async () => {
    // Insert a throwaway peer for this specific cascade test.
    await sql!`
      INSERT INTO federation_peers
        (id, common_name, display_name, cert_pem, cert_serial, cert_not_after, state, created_at)
      VALUES (
        ${PEER2_ID},
        ${T + '-gateway-throwaway-com'},
        ${'Throwaway Peer'},
        ${'cert-pem-placeholder'},
        ${T + '-serial-002'},
        now() + interval '1 year',
        ${'active'},
        now()
      )
      ON CONFLICT (id) DO NOTHING
    `;
    const reqId = `${T}-req-setnull`;
    await sql!`
      INSERT INTO federation_audit_log
        (request_id, peer_id, subject_user_id, verb, resource, status_code, created_at)
      VALUES (
        ${reqId},
        ${PEER2_ID},
        ${USER1_ID},
        ${'get'},
        ${'tasks'},
        ${200},
        now()
      )
    `;
    await sql!`DELETE FROM federation_peers WHERE id = ${PEER2_ID}`;
    const rows = await sql!`
      SELECT peer_id FROM federation_audit_log WHERE request_id = ${reqId}
    `;
    expect(rows).toHaveLength(1);
    expect(rows[0]!['peer_id']).toBeNull();
  }, 15_000);
  // ── 4. Enum constraint: invalid grant_status rejected ─────────────────────
  it('rejects an invalid grant_status value with a DB error', async () => {
    const scopeJson = JSON.stringify({ resources: ['tasks'] });
    await expect(
      sql!`
        INSERT INTO federation_grants
          (subject_user_id, peer_id, scope, status, created_at)
        VALUES (
          ${USER1_ID},
          ${PEER1_ID},
          ${scopeJson}::jsonb,
          ${'invalid_status'},
          now()
        )
      `,
    ).rejects.toThrow();
  }, 10_000);
  // ── 5. Enum constraint: invalid peer_state rejected ───────────────────────
  it('rejects an invalid peer_state value with a DB error', async () => {
    await expect(
      sql!`
        INSERT INTO federation_peers
          (common_name, display_name, cert_pem, cert_serial, cert_not_after, state, created_at)
        VALUES (
          ${'bad-state-peer'},
          ${'Bad State'},
          ${'pem'},
          ${'bad-serial-999'},
          now() + interval '1 year',
          ${'invalid_state'},
          now()
        )
      `,
    ).rejects.toThrow();
  }, 10_000);
  // ── 6. Unique constraint: duplicate cert_serial rejected ──────────────────
  it('rejects a duplicate cert_serial with a unique constraint violation', async () => {
    await expect(
      sql!`
        INSERT INTO federation_peers
          (common_name, display_name, cert_pem, cert_serial, cert_not_after, state, created_at)
        VALUES (
          ${T + '-dup-cn'},
          ${'Dup Peer'},
          ${'pem'},
          ${T + '-serial-001'},
          now() + interval '1 year',
          ${'pending'},
          now()
        )
      `,
    ).rejects.toThrow();
  }, 10_000);
  // ── 7. FK cascade: peer delete cascades to federation_grants ─────────────
  it('cascade-deletes federation_grants when the owning peer is deleted', async () => {
    const PEER3_ID = `f2000003-0000-4000-8000-000000000003`;
    const cascadeGrantUserId = `${T}-cascade-grant-user`;
    // Insert a dedicated user and peer for this test.
    await sql!`
      INSERT INTO users (id, name, email, email_verified, created_at, updated_at)
      VALUES (${cascadeGrantUserId}, ${'Cascade Grant User'}, ${cascadeGrantUserId + '@example.com'}, false, now(), now())
      ON CONFLICT (id) DO NOTHING
    `;
    await sql!`
      INSERT INTO federation_peers
        (id, common_name, display_name, cert_pem, cert_serial, cert_not_after, state, created_at)
      VALUES (
        ${PEER3_ID},
        ${T + '-gateway-cascade-peer'},
        ${'Cascade Peer'},
        ${'cert-pem-cascade'},
        ${T + '-serial-003'},
        now() + interval '1 year',
        ${'active'},
        now()
      )
      ON CONFLICT (id) DO NOTHING
    `;
    const scopeJson = JSON.stringify({ resources: ['tasks'] });
    await sql!`
      INSERT INTO federation_grants
        (subject_user_id, peer_id, scope, status, created_at)
      VALUES (
        ${cascadeGrantUserId},
        ${PEER3_ID},
        ${scopeJson}::jsonb,
        ${'active'},
        now()
      )
    `;
    const before = await sql!`
      SELECT count(*)::int AS cnt FROM federation_grants WHERE peer_id = ${PEER3_ID}
    `;
    expect(before[0]!['cnt']).toBe(1);
    // Delete peer → grants should cascade-delete.
    await sql!`DELETE FROM federation_peers WHERE id = ${PEER3_ID}`;
    const after = await sql!`
      SELECT count(*)::int AS cnt FROM federation_grants WHERE peer_id = ${PEER3_ID}
    `;
    expect(after[0]!['cnt']).toBe(0);
    // Cleanup
    await sql!`DELETE FROM users WHERE id = ${cascadeGrantUserId}`.catch(() => {});
  }, 15_000);
 });
--- a/packages/db/src/federation.ts
+++ b/packages/db/src/federation.ts
@@ -0,0 +1,21 @@
 /**
 * Federation schema re-exports.
 *
 * The actual table and enum definitions live in schema.ts (alongside all other
 * Drizzle tables) to avoid CJS/ESM cross-import issues when drizzle-kit loads
 * schema files via esbuild-register.  Application code that wants named imports
 * for federation symbols should import from this file.
 *
 * M2-01: DB tables and enums only. No business logic.
 * M2-03 will add JSON schema validation for the `scope` column.
 * M4 will write rows to federation_audit_log.
 */
 export {
  peerStateEnum,
  grantStatusEnum,
  federationPeers,
  federationGrants,
  federationAuditLog,
  federationEnrollmentTokens,
 } from './schema.js';
--- a/packages/db/src/index.ts
+++ b/packages/db/src/index.ts
@@ -1,7 +1,8 @@
 export { createDb, type Db, type DbHandle } from './client.js';
 export { createPgliteDb } from './client-pglite.js';
-export { runMigrations } from './migrate.js';
+export { runMigrations, runPgliteMigrations } from './migrate.js';
 export * from './schema.js';
 export * from './federation.js';
 export {
  eq,
  and,
--- a/packages/db/src/migrate.test.ts
+++ b/packages/db/src/migrate.test.ts
@@ -0,0 +1,70 @@
 import { mkdtempSync, rmSync } from 'node:fs';
 import { tmpdir } from 'node:os';
 import { join } from 'node:path';
 import { sql } from 'drizzle-orm';
 import { afterEach, beforeEach, describe, expect, it } from 'vitest';
 import { createPgliteDb } from './client-pglite.js';
 import { runPgliteMigrations } from './migrate.js';
 import type { DbHandle } from './client.js';
 interface PgliteExec {
  exec(query: string): Promise<unknown>;
 }
 describe('runPgliteMigrations', () => {
  let dataDir: string;
  let handle: DbHandle;
  beforeEach(() => {
    dataDir = mkdtempSync(join(tmpdir(), 'mosaic-db-migrate-test-'));
    handle = createPgliteDb(dataDir);
  });
  afterEach(async () => {
    await handle.close();
    rmSync(dataDir, { recursive: true, force: true });
  });
  it('creates the BetterAuth tables required by the gateway', async () => {
    await runPgliteMigrations(handle);
    const result = (await handle.db.execute(sql`
      SELECT table_name FROM information_schema.tables
      WHERE table_schema = 'public'
      ORDER BY table_name
    `)) as unknown as { rows: Array<{ table_name: string }> };
    const tables = result.rows.map((r) => r.table_name);
    // Auth tables — required for sign-in / bootstrap to function.
    expect(tables).toContain('users');
    expect(tables).toContain('sessions');
    expect(tables).toContain('accounts');
    expect(tables).toContain('verifications');
    // Schema sanity check — admin token table consumed by mosaic gateway config.
    expect(tables).toContain('admin_tokens');
  });
  it('is idempotent — running twice does not error', async () => {
    await runPgliteMigrations(handle);
    await expect(runPgliteMigrations(handle)).resolves.toBeUndefined();
  });
  it('surfaces statement-level error context on failure and leaves no ledger row', async () => {
    // Pre-create a `users` table that conflicts with migration 0000's CREATE TABLE,
    // forcing it to fail without IF NOT EXISTS.
    const client = (handle.db as unknown as { $client: PgliteExec }).$client;
    await client.exec('CREATE TABLE users (sentinel text)');
    await expect(runPgliteMigrations(handle)).rejects.toThrow(
      /migration hash=[a-f0-9]+ statement #\d+ failed/,
    );
    // Ledger should be empty — partial application must not pretend to be complete.
    const ledger = (await handle.db.execute(
      sql`SELECT count(*)::int AS count FROM drizzle.__drizzle_migrations`,
    )) as unknown as { rows: Array<{ count: number }> };
    expect(ledger.rows[0]?.count).toBe(0);
  });
 });
--- a/packages/db/src/migrate.ts
+++ b/packages/db/src/migrate.ts
@@ -1,18 +1,109 @@
 import { dirname, resolve } from 'node:path';
 import { fileURLToPath } from 'node:url';
-import { drizzle } from 'drizzle-orm/postgres-js';
+import { sql } from 'drizzle-orm';
-import { migrate } from 'drizzle-orm/postgres-js/migrator';
+import { drizzle as drizzlePostgres } from 'drizzle-orm/postgres-js';
 import { migrate as migratePostgres } from 'drizzle-orm/postgres-js/migrator';
 import { readMigrationFiles } from 'drizzle-orm/migrator';
 import postgres from 'postgres';
 import { DEFAULT_DATABASE_URL } from './defaults.js';
 import type { DbHandle } from './client.js';
 interface PgliteExecutor {
  exec(query: string): Promise<unknown>;
 }
 interface ExecuteRows<T> {
  rows: T[];
 }
 function migrationsFolder(): string {
  const here = dirname(fileURLToPath(import.meta.url));
  return resolve(here, '../drizzle');
 }
 export async function runMigrations(url?: string): Promise<void> {
  const connectionString = url ?? process.env['DATABASE_URL'] ?? DEFAULT_DATABASE_URL;
-  const sql = postgres(connectionString, { max: 1 });
+  const sqlClient = postgres(connectionString, { max: 1 });
-  const db = drizzle(sql);
+  const db = drizzlePostgres(sqlClient);
  const __dirname = dirname(fileURLToPath(import.meta.url));
  try {
-    await migrate(db, { migrationsFolder: resolve(__dirname, '../drizzle') });
+    // TODO: postgres-tier first-install also fails because (a) Drizzle wraps every
    // migration in one transaction (breaks 0009's ALTER TYPE ADD VALUE → SET DEFAULT
    // sequence) and (b) drizzle/meta/_journal.json has 0009 ordered before 0008,
    // which the postgres-js migrator skips by `created_at < folderMillis`. The
    // PGlite path below sidesteps both. A follow-up should either share the
    // per-statement loop (see runPgliteMigrations) or fix the journal ordering.
    await migratePostgres(db, { migrationsFolder: migrationsFolder() });
  } finally {
-    await sql.end();
+    await sqlClient.end();
  }
 }
 // Apply Drizzle migrations against an embedded PGlite database.
 //
 // We don't reuse drizzle's pglite migrator because it wraps ALL migrations in
 // one outer transaction, which breaks Postgres' `check_safe_enum_use` rule —
 // e.g. migration 0009 does `ALTER TYPE ADD VALUE 'pending'` then references
 // `'pending'` as a default in the same tx. PGlite's `exec()` runs each
 // statement under the Simple Query protocol, autocommitting between them.
 //
 // We still write to the standard `drizzle.__drizzle_migrations` ledger so the
 // result is interoperable with `runMigrations()` on a postgres-backed deploy
 // (modulo the journal-ordering bug noted above).
 //
 // We skip-by-hash rather than skip-by-folderMillis (which is what Drizzle's
 // postgres-js migrator does). That's deliberate — out-of-order timestamps in
 // `_journal.json` won't silently drop migrations.
 //
 // Failure model: each statement autocommits, and the ledger row is written
 // only after all statements in a migration succeed. A crash mid-migration
 // leaves the prefix applied with no ledger entry, so the next boot will
 // replay those statements and fail loudly on "already exists". Recovery:
 // drop the partially-applied objects, or insert the migration's hash into
 // `drizzle.__drizzle_migrations` manually. The error log identifies which
 // statement of which migration was the culprit.
 export async function runPgliteMigrations(handle: DbHandle): Promise<void> {
  const client = (handle.db as unknown as { $client?: PgliteExecutor }).$client;
  if (!client || typeof client.exec !== 'function') {
    throw new Error('runPgliteMigrations: handle.db is not backed by a PGlite client');
  }
  await client.exec('CREATE SCHEMA IF NOT EXISTS drizzle');
  await client.exec(`
    CREATE TABLE IF NOT EXISTS drizzle.__drizzle_migrations (
      id SERIAL PRIMARY KEY,
      hash text NOT NULL,
      created_at bigint
    )
  `);
  const appliedRows = (await handle.db.execute(
    sql`SELECT hash FROM drizzle.__drizzle_migrations`,
  )) as unknown as ExecuteRows<{ hash: string }>;
  const applied = new Set(appliedRows.rows.map((r) => r.hash));
  const migrations = readMigrationFiles({ migrationsFolder: migrationsFolder() });
  for (const migration of migrations) {
    if (applied.has(migration.hash)) continue;
    // Run each statement-breakpoint chunk in its own exec() call so PGlite
    // commits between statements — this is what lets `ALTER TYPE ADD VALUE`
    // become visible before a subsequent statement references the new value.
    for (const [stmtIdx, stmt] of migration.sql.entries()) {
      const trimmed = stmt.trim();
      if (!trimmed) continue;
      try {
        await client.exec(trimmed);
      } catch (err) {
        const cause = err instanceof Error ? err.message : String(err);
        throw new Error(
          `runPgliteMigrations: migration hash=${migration.hash} statement #${stmtIdx} failed: ${cause}\n` +
            `Statement: ${trimmed.slice(0, 200)}${trimmed.length > 200 ? '…' : ''}`,
          { cause: err },
        );
      }
    }
    await handle.db.execute(
      sql`INSERT INTO drizzle.__drizzle_migrations (hash, created_at) VALUES (${migration.hash}, ${migration.folderMillis})`,
    );
  }
 }
--- a/packages/db/src/schema.ts
+++ b/packages/db/src/schema.ts
@@ -5,6 +5,7 @@
 import {
  pgTable,
  pgEnum,
  text,
  timestamp,
  boolean,
@@ -585,3 +586,226 @@ export const summarizationJobs = pgTable(
  },
  (t) => [index('summarization_jobs_status_idx').on(t.status)],
 );
 // ─── Federation ──────────────────────────────────────────────────────────────
 // Enums declared before tables that reference them.
 // All federation definitions live in this file (avoids CJS/ESM cross-import
 // issues when drizzle-kit loads schema files via esbuild-register).
 // Application code imports from `federation.ts` which re-exports from here.
 /**
 * Lifecycle state of a federation peer.
 * - pending:   registered but not yet approved / TLS handshake not confirmed
 * - active:    fully operational; mTLS verified
 * - suspended: temporarily blocked; cert still valid
 * - revoked:   cert revoked; no traffic allowed
 */
 export const peerStateEnum = pgEnum('peer_state', ['pending', 'active', 'suspended', 'revoked']);
 /**
 * Lifecycle state of a federation grant.
 * - pending: created but not yet activated (awaiting cert enrollment, M2-07)
 * - active:  grant is in effect
 * - revoked: manually revoked before expiry
 * - expired: natural expiry (expires_at passed)
 */
 export const grantStatusEnum = pgEnum('grant_status', ['pending', 'active', 'revoked', 'expired']);
 /**
 * A registered peer gateway identified by its Step-CA certificate CN.
 * Represents both inbound peers (other gateways querying us) and outbound
 * peers (gateways we query — identified by client_key_pem being set).
 */
 export const federationPeers = pgTable(
  'federation_peers',
  {
    id: uuid('id').primaryKey().defaultRandom(),
    /** Certificate CN, e.g. "gateway-uscllc-com". Unique — one row per peer identity. */
    commonName: text('common_name').notNull().unique(),
    /** Human-friendly label shown in admin UI. */
    displayName: text('display_name').notNull(),
    /** Pinned PEM certificate used for mTLS verification. */
    certPem: text('cert_pem').notNull(),
    /** Certificate serial number — used for CRL / revocation lookup. */
    certSerial: text('cert_serial').notNull().unique(),
    /** Certificate expiry — used by the renewal scheduler (FED-M6). */
    certNotAfter: timestamp('cert_not_after', { withTimezone: true }).notNull(),
    /**
     * Sealed (encrypted) private key for outbound connections TO this peer.
     * NULL for inbound-only peer rows (we serve them; we don't call them).
     */
    clientKeyPem: text('client_key_pem'),
    /** Current peer lifecycle state. */
    state: peerStateEnum('state').notNull().default('pending'),
    /** Base URL for outbound queries, e.g. "https://woltje.com:443". NULL for inbound-only peers. */
    endpointUrl: text('endpoint_url'),
    /** Timestamp of the most recent successful inbound or outbound request. */
    lastSeenAt: timestamp('last_seen_at', { withTimezone: true }),
    createdAt: timestamp('created_at', { withTimezone: true }).notNull().defaultNow(),
    /** Populated when the cert is revoked; NULL while the peer is active. */
    revokedAt: timestamp('revoked_at', { withTimezone: true }),
  },
  (t) => [
    // CRL / revocation lookups by serial.
    index('federation_peers_cert_serial_idx').on(t.certSerial),
    // Filter peers by state (e.g. find all active peers for outbound routing).
    index('federation_peers_state_idx').on(t.state),
  ],
 );
 /**
 * A grant lets a specific peer cert query a specific local user's data within
 * a defined scope. Scopes are validated by JSON Schema in M2-03; this table
 * stores them as raw jsonb.
 */
 export const federationGrants = pgTable(
  'federation_grants',
  {
    id: uuid('id').primaryKey().defaultRandom(),
    /**
     * The local user whose data this grant exposes.
     * Cascade delete: if the user account is deleted, revoke all their grants.
     */
    subjectUserId: text('subject_user_id')
      .notNull()
      .references(() => users.id, { onDelete: 'cascade' }),
    /**
     * The peer gateway holding the grant.
     * Cascade delete: if the peer record is removed, the grant is moot.
     */
    peerId: uuid('peer_id')
      .notNull()
      .references(() => federationPeers.id, { onDelete: 'cascade' }),
    /**
     * Scope object — validated by JSON Schema (M2-03).
     * Example: { "resources": ["tasks", "notes"], "operations": ["list", "get"] }
     */
    scope: jsonb('scope').notNull(),
    /** Current grant lifecycle state. */
    status: grantStatusEnum('status').notNull().default('pending'),
    /** Optional hard expiry. NULL means the grant does not expire automatically. */
    expiresAt: timestamp('expires_at', { withTimezone: true }),
    createdAt: timestamp('created_at', { withTimezone: true }).notNull().defaultNow(),
    /** Populated when the grant is explicitly revoked. */
    revokedAt: timestamp('revoked_at', { withTimezone: true }),
    /** Human-readable reason for revocation (audit trail). */
    revokedReason: text('revoked_reason'),
  },
  (t) => [
    // Hot path: look up active grants for a subject user (auth middleware).
    index('federation_grants_subject_status_idx').on(t.subjectUserId, t.status),
    // Hot path: look up active grants held by a peer (inbound request check).
    index('federation_grants_peer_status_idx').on(t.peerId, t.status),
  ],
 );
 /**
 * Append-only audit log of all federation requests.
 * M4 writes rows here. M2 only creates the table.
 *
 * All FKs use SET NULL so audit rows survive peer/user/grant deletion.
 */
 export const federationAuditLog = pgTable(
  'federation_audit_log',
  {
    id: uuid('id').primaryKey().defaultRandom(),
    /** UUIDv7 from the X-Request-ID header — correlates with OTEL traces. */
    requestId: text('request_id').notNull(),
    /** Peer that made the request. SET NULL if the peer is later deleted. */
    peerId: uuid('peer_id').references(() => federationPeers.id, { onDelete: 'set null' }),
    /** Subject user whose data was queried. SET NULL if the user is deleted. */
    subjectUserId: text('subject_user_id').references(() => users.id, { onDelete: 'set null' }),
    /** Grant under which the request was authorised. SET NULL if the grant is deleted. */
    grantId: uuid('grant_id').references(() => federationGrants.id, { onDelete: 'set null' }),
    /** Request verb: "list" | "get" | "search". */
    verb: text('verb').notNull(),
    /** Resource type: "tasks" | "notes" | "memory" | etc. */
    resource: text('resource').notNull(),
    /** HTTP status code returned to the peer. */
    statusCode: integer('status_code').notNull(),
    /** Number of items returned (NULL for non-list requests or errors). */
    resultCount: integer('result_count'),
    /** Why the request was denied (NULL when allowed). */
    deniedReason: text('denied_reason'),
    /** End-to-end latency in milliseconds. */
    latencyMs: integer('latency_ms'),
    createdAt: timestamp('created_at', { withTimezone: true }).notNull().defaultNow(),
    // Reserved for M4 — see PRD 7.3
    /** SHA-256 of the normalised GraphQL/REST query string; written by M4 search. */
    queryHash: text('query_hash'),
    /** Request outcome: "allowed" | "denied" | "partial"; written by M4. */
    outcome: text('outcome'),
    /** Response payload size in bytes; written by M4. */
    bytesOut: integer('bytes_out'),
  },
  (t) => [
    // Per-peer request history in reverse chronological order.
    index('federation_audit_log_peer_created_at_idx').on(t.peerId, t.createdAt.desc()),
    // Per-user access log in reverse chronological order.
    index('federation_audit_log_subject_created_at_idx').on(t.subjectUserId, t.createdAt.desc()),
    // Global time-range scans (dashboards, rate-limit windows).
    index('federation_audit_log_created_at_idx').on(t.createdAt.desc()),
  ],
 );
 /**
 * Single-use enrollment tokens — M2-07.
 *
 * An admin creates a token (with a TTL) and hands it out-of-band to the
 * remote peer operator.  The peer redeems it exactly once by posting its
 * CSR to POST /api/federation/enrollment/:token.  The token is atomically
 * marked as used to prevent replay attacks.
 */
 export const federationEnrollmentTokens = pgTable('federation_enrollment_tokens', {
  /** 32-byte hex token — crypto.randomBytes(32).toString('hex') */
  token: text('token').primaryKey(),
  /** The federation grant this enrollment activates. */
  grantId: uuid('grant_id')
    .notNull()
    .references(() => federationGrants.id, { onDelete: 'cascade' }),
  /** The peer record that will be updated on successful enrollment. */
  peerId: uuid('peer_id')
    .notNull()
    .references(() => federationPeers.id, { onDelete: 'cascade' }),
  /** Hard expiry — token rejected after this time even if not used. */
  expiresAt: timestamp('expires_at', { withTimezone: true }).notNull(),
  /** NULL until the token is redeemed.  Set atomically to prevent replay. */
  usedAt: timestamp('used_at', { withTimezone: true }),
  createdAt: timestamp('created_at', { withTimezone: true }).notNull().defaultNow(),
 });
--- a/packages/mosaic/framework/defaults/AGENTS.md
+++ b/packages/mosaic/framework/defaults/AGENTS.md
@@ -1,28 +1,24 @@
 # Mosaic Global Agent Contract
-Canonical file: `~/.config/mosaic/AGENTS.md`
+Canonical file: `~/.config/mosaic/AGENTS.md`. Mandatory behavior for all Mosaic agent runtimes.
-This file defines the mandatory behavior for all Mosaic agent runtimes.
+This is the THIN CORE — the launcher injects it (plus USER.md, the TOOLS index, and the runtime
 contract) into every session. It carries only what must be resident to avoid violating a gate.
 Depth lives in guides, read on demand (see Conditional Guide Loading).
-## MANDATORY Load Order (No Exceptions)
+## Session Start — Load Order
-Before responding to any user message, you MUST read these files in order:
+The core contract is ALREADY in your context (injected by `mosaic` launch). Do not re-read it.
 At session start, additionally:
-1. `~/.config/mosaic/SOUL.md`
+1. Read `~/.config/mosaic/SOUL.md` (agent identity — small, once).
-2. `~/.config/mosaic/USER.md`
+2. Read project-local `AGENTS.md` / `CLAUDE.md` if present.
-3. `~/.config/mosaic/STANDARDS.md`
+3. Read guides ONLY as triggered by the Conditional Guide Loading table below. Do NOT pre-load
-4. `~/.config/mosaic/AGENTS.md`
+   guides you do not need — role-relevant detail is pulled on demand, not up front.
-5. `~/.config/mosaic/TOOLS.md`
+4. When you begin implementation work, read `~/.config/mosaic/guides/E2E-DELIVERY.md` (the full
-6. `~/.config/mosaic/guides/E2E-DELIVERY.md`
+   delivery procedure: PRD/tracking gates, execution cycle, testing, review, completion).
-7. `~/.config/mosaic/guides/MEMORY.md`
+5. `~/.config/mosaic/STANDARDS.md` is available for reference; load it only if the task requires
-8. Project-local `AGENTS.md` (if present)
+   standards validation (do NOT halt if missing).
 9. Runtime-specific reference:
   - Pi: `~/.config/mosaic/runtime/pi/RUNTIME.md`
   - Claude: `~/.config/mosaic/runtime/claude/RUNTIME.md`
   - Codex: `~/.config/mosaic/runtime/codex/RUNTIME.md`
   - OpenCode: `~/.config/mosaic/runtime/opencode/RUNTIME.md`
 If any required file is missing, you MUST stop and report the missing file.
 ## CRITICAL HARD GATES (Read First)
@@ -37,56 +33,40 @@ If any required file is missing, you MUST stop and report the missing file.
 9. Do NOT stop at "PR created". Do NOT ask "should I merge?" Do NOT ask "should I close the issue?".
 10. Manual `docker build` / `docker push` for deployment is FORBIDDEN when CI/CD pipelines exist in the repository. CI is the ONLY canonical build path for container images.
 11. Before ANY build or deployment action, you MUST check for existing CI/CD pipeline configuration (`.woodpecker/`, `.woodpecker.yml`, `.github/workflows/`, etc.). If pipelines exist, use them — do not build locally.
-12. The mandatory load order and intake procedure are NOT conditional on perceived task complexity. A "simple" commit-push-deploy task has the same procedural requirements as a multi-file feature. Skipping intake because a task "seems simple" is the most common framework violation.
+12. The mandatory intake procedure is NOT conditional on perceived task complexity. A "simple" commit-push-deploy task has the same procedural requirements as a multi-file feature. Skipping intake because a task "seems simple" is the most common framework violation.
 13. **Merge authority (coordinated work):** when a coordinator/orchestrator session is active for the work, the post-review MERGE GO-AHEAD is the coordinator's to give — once code has passed the required review gates, request the coordinator's go-ahead and merge on their confirmation; do NOT wait on the human owner personally. Solo (uncoordinated) delivery keeps the default: merge without routine confirmation per gates 2 and 9. A "No self-merge" note on a PR means no UNREVIEWED self-merge — it does not suspend coordinator-authorized merges. (Policy: Jason, 2026-06-11.)
-## Non-Negotiable Operating Rules
+## Non-Negotiable Operating Rules (condensed — full detail in `guides/E2E-DELIVERY.md`)
-1. You MUST create and maintain a task-specific scratchpad for every non-trivial task.
+- **Source of requirements:** `docs/PRD.md`/`docs/PRD.json` MUST exist before coding. In steered autonomy, make best-guess PRD decisions, mark each `ASSUMPTION:` with rationale, continue. (`guides/PRD.md`)
-2. You MUST follow the end-to-end procedure in `E2E-DELIVERY.md`.
+- **Tracking:** create/maintain a scratchpad and `docs/TASKS.md` for every non-trivial task; keep current through completion.
-3. You MUST execute this cycle for implementation work: `plan -> code -> test -> review -> remediate -> review -> commit -> push -> greenfield situational test -> repeat`.
+- **Execution cycle:** `plan → code → test → review → remediate → review → commit → push → greenfield situational test → repeat`. On failure, remediate and re-run from the failed step.
-4. Before coding begins, `docs/PRD.md` or `docs/PRD.json` MUST exist and be treated as the source of requirements.
+- **Testing:** run baseline tests before any completion claim. Situational testing is the PRIMARY gate. Risk-based TDD is REQUIRED for bug fixes, security/auth/permission logic, and critical data mutations. (`guides/QA-TESTING.md`)
-5. The main agent MUST prepare or update the PRD using user objectives, constraints, and available project context before implementation starts.
+- **Review:** if you modify source code, an independent code review MUST pass before completion. (`guides/CODE-REVIEW.md`)
-6. In steered autonomy mode, the agent MUST make best-guess PRD decisions when needed, mark each with `ASSUMPTION:` and rationale, and continue without waiting for routine user approval.
+- **Evidence:** provide explicit verification evidence before any completion claim. Never use workarounds that bypass quality gates.
-7. You MUST run baseline tests before claiming completion.
+- **Secrets & deps:** never hardcode secrets (`guides/VAULT-SECRETS.md`); never use deprecated/unsupported dependencies.
-8. Situational testing is the PRIMARY validation gate. You MUST run situational tests based on the change surface.
+- **Git strategy:** trunk-based — branch from `main`, merge to `main` via PR only (squash merge), never push directly to `main`.
-9. TDD is risk-based and REQUIRED for bug fixes, security/auth/permission logic, and critical business logic/data mutations (see `~/.config/mosaic/guides/QA-TESTING.md`).
+- **Provider work:** detect platform first, then use `~/.config/mosaic/tools/git/*.sh` wrappers before any raw `gh`/`tea`/`glab`. Create/link issue(s) in `docs/TASKS.md` before coding; if no provider, use `TASKS:<id>` refs.
-10. If you modify source code, you MUST run an independent code review before completion.
+- **Deployment:** own it when in scope and access is configured. Use immutable image tags (`sha-*`, `vX.Y.Z-rc.N`) with digest-first promotion; `latest` is forbidden as a deployment reference. (`guides/INFRASTRUCTURE.md`)
-11. You MUST update required documentation for code/API/auth/infra changes per `~/.config/mosaic/guides/DOCUMENTATION.md`.
+- **Release:** on milestone completion, create + push a release tag and publish a repository release.
-12. You MUST provide verification evidence before completion claims.
+- **Documentation:** update required docs for code/API/auth/infra changes; keep `docs/` root clean (scoped folders). (`guides/DOCUMENTATION.md`)
-13. You MUST NOT use workarounds that bypass quality gates.
+- **TypeScript:** DTO files (`*.dto.ts`) REQUIRED for module/API boundaries. (`guides/TYPESCRIPT.md`)
-14. You MUST NOT hardcode secrets.
+- **Ownership:** own execution end-to-end (plan→deploy). Human intervention is escalation-only — do not ask the human to do routine coding, review, or repo work.
-15. You MUST NOT use deprecated or unsupported dependencies.
+- **Budget:** honor user plan/token budgets; adjust execution strategy to stay within limits.
 16. When a milestone is completed, you MUST create and push a release tag and publish a repository release.
 17. For every non-trivial implementation task, you MUST create or update `docs/TASKS.md` before coding and keep it current through completion.
 18. You MUST keep `docs/` root clean and place reports/artifacts in scoped folders per `~/.config/mosaic/guides/DOCUMENTATION.md`.
 19. For TypeScript codebases, DTO files are REQUIRED for module/API boundaries (`*.dto.ts`).
 20. You MUST honor user plan/token budgets: monitor estimated vs used tokens and adjust execution strategy to stay within limits.
 21. You MUST use trunk merge strategy: branch from `main`, merge to `main` via PR only, never push directly to `main`, and use squash merge only.
 22. You MUST own project execution end-to-end: planning, coding, testing, review, remediation, PR/repo operations, release/tag, and deployment when in scope.
 23. Human intervention is escalation-only; do not ask the human to perform routine coding, review, or repository management work.
 24. Deployment ownership is REQUIRED when deployment is in scope and target access is configured.
 25. For container deployments, you MUST use immutable image tags (`sha-*`, `vX.Y.Z-rc.N`) with digest-first promotion; `latest` is forbidden as a deployment reference.
 26. If an external git provider is available (Gitea/GitHub/GitLab), you MUST create or update issue(s) and link them in `docs/TASKS.md` before coding; if unavailable, use `TASKS:<id>` internal refs in `docs/TASKS.md`.
 27. For provider operations (issue/PR/milestone), you MUST detect platform first and use `~/.config/mosaic/tools/git/*.sh` wrappers before any raw provider CLI/API calls.
 28. Direct `gh`/`tea`/`glab` commands are forbidden as first choice when a Mosaic wrapper exists; use raw commands only as documented fallback.
 29. If the mission is orchestration-oriented (contains "orchestrate", issue/milestone coordination, or multi-task execution), you MUST load and follow `~/.config/mosaic/guides/ORCHESTRATOR.md` before taking action.
 30. At session start, you MUST declare the operating mode in your first response before any tool calls or implementation steps.
 31. For orchestration-oriented missions, the first line MUST be exactly: `Now initiating Orchestrator mode...`
 32. For non-orchestrator implementation missions, the first line MUST be exactly: `Now initiating Delivery mode...`
 33. For explicit review-only missions, the first line MUST be exactly: `Now initiating Review mode...`
 34. For source-code delivery through PR workflow, completion is forbidden until the PR is merged to `main`, CI/pipeline status is terminal green, and linked issue/internal task is closed.
 35. If merge/CI/issue-closure operations fail, you MUST report a blocker with the exact failed wrapper command and stop instead of declaring completion.
 36. Before push or PR merge, you MUST run CI queue guard and wait if the project has running/queued pipelines: `~/.config/mosaic/tools/git/ci-queue-wait.sh --purpose push|merge`.
 37. When an active mission is detected at session start (MISSION-MANIFEST.md, TASKS.md, or scratchpads/ present), you MUST load `~/.config/mosaic/guides/ORCHESTRATOR-PROTOCOL.md` and follow the Session Resume Protocol before taking any action.
 ## Mode Declaration Protocol (Hard Rule)
-At session start, declare one mode before any actions:
+At session start, declare exactly one mode as the first line, before any tool call or step:
 1. Orchestration mission: `Now initiating Orchestrator mode...`
 2. Implementation mission: `Now initiating Delivery mode...`
 3. Review-only mission: `Now initiating Review mode...`
 Orchestration-oriented = contains "orchestrate", issue/milestone coordination, or multi-task
 execution → also load `guides/ORCHESTRATOR.md` before acting. If an active mission is detected at
 session start (MISSION-MANIFEST.md, TASKS.md, or scratchpads/ present) → load
 `guides/ORCHESTRATOR-PROTOCOL.md` and follow the Session Resume Protocol before any action.
 ## Steered Autonomy Escalation Triggers
 Only interrupt the human when one of these is true:
@@ -97,136 +77,69 @@ Only interrupt the human when one of these is true:
 4. Legal/compliance/security constraints are unknown and materially affect delivery.
 5. Objectives are mutually conflicting and cannot be resolved from PRD, repo, or prior decisions.
-## Conditional Guide Loading
+## Conditional Guide Loading (role/task-driven — load only what the task needs)
-Load additional guides when the task requires them.
+| Task                                               | Guide                              |
 | -------------------------------------------------- | ---------------------------------- |
 | Project bootstrap                                  | `guides/BOOTSTRAP.md`              |
 | PRD creation / requirements                        | `guides/PRD.md`                    |
 | Orchestration flow                                 | `guides/ORCHESTRATOR.md`           |
 | Mission lifecycle / multi-session orchestration    | `guides/ORCHESTRATOR-PROTOCOL.md`  |
 | Orchestrator estimation heuristics                 | `guides/ORCHESTRATOR-LEARNINGS.md` |
 | Frontend changes                                   | `guides/FRONTEND.md`               |
 | Backend/API changes                                | `guides/BACKEND.md`                |
 | Auth/authorization                                 | `guides/AUTHENTICATION.md`         |
 | CI/CD changes                                      | `guides/CI-CD-PIPELINES.md`        |
 | Infrastructure/DevOps/deployment                   | `guides/INFRASTRUCTURE.md`         |
 | Code review work                                   | `guides/CODE-REVIEW.md`            |
 | TypeScript strict typing                           | `guides/TYPESCRIPT.md`             |
 | QA / test strategy                                 | `guides/QA-TESTING.md`             |
 | Documentation (any code/API/auth/infra change)     | `guides/DOCUMENTATION.md`          |
 | Secrets / vault usage                              | `guides/VAULT-SECRETS.md`          |
 | Tool/credential reference (service CLIs, wrappers) | `guides/TOOLS-REFERENCE.md`        |
 | Memory protocol (OpenBrain capture/recall)         | `guides/MEMORY.md`                 |
-| Task                                                    | Required Guide                                      |
+## Subagent Model Selection (Cost — Hard Rule)
 | ------------------------------------------------------- | --------------------------------------------------- |
 | Project bootstrap                                       | `~/.config/mosaic/guides/BOOTSTRAP.md`              |
 | PRD creation and requirements definition                | `~/.config/mosaic/guides/PRD.md`                    |
 | Orchestration flow                                      | `~/.config/mosaic/guides/ORCHESTRATOR.md`           |
 | Frontend changes                                        | `~/.config/mosaic/guides/FRONTEND.md`               |
 | Backend/API changes                                     | `~/.config/mosaic/guides/BACKEND.md`                |
 | Documentation changes or any code/API/auth/infra change | `~/.config/mosaic/guides/DOCUMENTATION.md`          |
 | Authentication/authorization                            | `~/.config/mosaic/guides/AUTHENTICATION.md`         |
 | CI/CD changes                                           | `~/.config/mosaic/guides/CI-CD-PIPELINES.md`        |
 | Infrastructure/DevOps                                   | `~/.config/mosaic/guides/INFRASTRUCTURE.md`         |
 | Code review work                                        | `~/.config/mosaic/guides/CODE-REVIEW.md`            |
 | TypeScript strict typing                                | `~/.config/mosaic/guides/TYPESCRIPT.md`             |
 | QA and test strategy                                    | `~/.config/mosaic/guides/QA-TESTING.md`             |
 | Secrets and vault usage                                 | `~/.config/mosaic/guides/VAULT-SECRETS.md`          |
 | Orchestrator estimation heuristics                      | `~/.config/mosaic/guides/ORCHESTRATOR-LEARNINGS.md` |
 | Mission lifecycle / multi-session orchestration         | `~/.config/mosaic/guides/ORCHESTRATOR-PROTOCOL.md`  |
-## Embedded Delivery Cycle (Hard Rule)
+Select the cheapest model capable of the task; do NOT default to the most expensive. Omitting the
 tier defaults to the parent (usually opus) and wastes budget.
- Implementation work MUST follow the embedded execution cycle:
+- **haiku** — search/grep/glob, codebase exploration, status/health checks, one-line mechanical fixes.
-  - `plan -> code -> test -> review -> remediate -> review -> commit -> push -> greenfield situational test -> repeat`
+- **sonnet** — code review, lint, test writing/fixing, standard feature implementation.
- If a step fails, you MUST remediate and re-run from the relevant step before proceeding.
+- **opus** — complex architecture / multi-file refactors, security/auth logic, ambiguous design decisions.
-## Sequential-Thinking MCP (Hard Requirement)
+Start cheapest; escalate only when the task genuinely needs deeper reasoning. Runtime syntax for
-
+specifying tier is in the runtime contract.
 - `sequential-thinking` MCP server is REQUIRED for Mosaic operation.
 - Installation and configuration are managed by Mosaic bootstrap and runtime linking.
 - If sequential-thinking is unavailable, you MUST report the failure and stop planning-intensive execution.
 ## Subagent Model Selection (Cost Optimization — Hard Rule)
 When delegating work to subagents, you MUST select the cheapest model capable of completing the task. Do NOT default to the most expensive model for every delegation.
 | Task Type                                     | Model Tier | Rationale                                               |
 | --------------------------------------------- | ---------- | ------------------------------------------------------- |
 | File search, grep, glob, codebase exploration | **haiku**  | Read-only, pattern matching, no reasoning depth needed  |
 | Status checks, health monitoring, heartbeat   | **haiku**  | Structured API calls, pass/fail output                  |
 | Simple code fixes (typos, rename, one-liner)  | **haiku**  | Minimal reasoning, mechanical changes                   |
 | Code review, lint, style checks               | **sonnet** | Needs judgment but not deep architectural reasoning     |
 | Test writing, test fixes                      | **sonnet** | Pattern-based, moderate complexity                      |
 | Standard feature implementation               | **sonnet** | Good balance of capability and cost for most coding     |
 | Complex architecture, multi-file refactors    | **opus**   | Requires deep reasoning, large context, design judgment |
 | Security review, auth logic                   | **opus**   | High-stakes reasoning where mistakes are costly         |
 | Ambiguous requirements, design decisions      | **opus**   | Needs nuanced judgment and tradeoff analysis            |
 **Decision rule**: Start with the cheapest viable tier. Only escalate if the task genuinely requires deeper reasoning — not as a safety default. Most coding tasks are sonnet-tier. Reserve opus for work where wrong answers are expensive.
 **Runtime-specific syntax**: See the runtime reference for how to specify model tier when spawning subagents (e.g., Claude Code Task tool `model` parameter).
 ## Superpowers Enforcement (Hard Rule)
-Mosaic provides capabilities beyond basic code editing: **skills**, **hooks**, **MCP tools**, and **plugins**. These are not optional extras — they are force multipliers that agents MUST actively use when applicable. Under-utilization of superpowers is a framework violation.
+Skills, hooks, MCP tools, and plugins are force multipliers you MUST use when applicable;
 under-utilization is a framework violation.
-### Skills
+- **Skills:** before implementation, scan `~/.config/mosaic/skills/` and load any matching the task
  domain (e.g. `nestjs-best-practices` for NestJS). Include skill loading in worker kickstarts. Do
  not load unrelated skills.
 - **Hooks:** never bypass or suppress hook output; treat hook failures like failing tests and fix
  them. If a hook is wrong, report it as a framework issue — do not work around it.
 - **MCP:** sequential-thinking is REQUIRED for planning/architecture/multi-step reasoning. OpenBrain
  (`capture`/`search`/`recent`) is the cross-agent memory layer — search at session start, capture
  what you learn. Use web/browser/research MCP tools instead of asking the user to look things up.
 - **Plugins:** use code-review / pr-review / architecture plugins proactively after significant
  changes and before opening a PR — do not wait to be asked.
 - **Self-evolution:** capture recurring patterns (`framework-improvement`), missing tooling
  (`tooling-gap`), and value-less friction (`framework-friction`) to OpenBrain.
-Skills are domain-specific instruction sets in `~/.config/mosaic/skills/` that encode best practices, patterns, and guardrails. They are loaded into agents via the runtime's skill mechanism (e.g., Claude Code slash commands, Pi `--skill` flag).
+## Other Hard Rules
-**Rules:**
+- **Sequential-thinking MCP** is REQUIRED. If unavailable, report the failure and stop planning-intensive execution.
 - **Missing core file:** if `AGENTS.md`, `SOUL.md`, or the runtime contract is missing, stop and report it.
-1. Before starting implementation, scan available skills (`ls ~/.config/mosaic/skills/`) and load any that match the task domain.
+## Session Closure
 2. When a skill exists for the technology being used (e.g., `nestjs-best-practices` for NestJS work), you MUST load it.
 3. When spawning workers, include skill loading in the kickstart prompt.
 4. If you complete a task without loading a relevant available skill, that is a quality gap.
-### Hooks
+Before closing an implementation task, confirm: required + situational tests passed (primary gate);
-
+aligned to `docs/PRD.md`; acceptance criteria mapped to evidence; independent code review passed (if
-Hooks provide automated quality gates (lint, format, typecheck) that fire on file edits. They are configured in the runtime settings and run automatically.
+code changed); required docs updated; scratchpad updated with decisions/results/risks; explicit
-
+completion evidence provided. For PR-workflow delivery: confirm merged PR number + merge commit on
-**Rules:**
+`main`, terminal-green CI, and linked issue closed (or `docs/TASKS.md` equivalent). If any of those
-
+are blocked by access/tooling failure, return `blocked` with the exact failed wrapper command — do
-1. Do NOT bypass or suppress hook output. If a hook reports errors, fix them before proceeding.
+not claim completion. Full checklist: `guides/E2E-DELIVERY.md`.
 2. Hook failures are immediate feedback — treat them like failing tests.
 3. If a hook is consistently failing on valid code, report it as a framework issue rather than working around it.
 ### MCP Tools
 MCP servers extend agent capabilities with external integrations (sequential-thinking, web search, memory, browser automation, etc.). Available MCP tools are listed at session start.
 **Rules:**
 1. **sequential-thinking** is REQUIRED for planning, architecture, and multi-step reasoning. Use it — do not skip structured thinking for complex decisions.
 2. **OpenBrain** (`capture`, `search`, `recent`) is the cross-agent memory layer. Capture discoveries and search for prior context at session start.
 3. When a task involves web research, browser testing, or external data, use the available MCP tools (web-search, chrome-devtools, web-reader) rather than asking the user to look things up.
 4. Check available MCP tools at session start and use them proactively throughout the session.
 ### Plugins (Runtime-Specific)
 Runtime plugins (e.g., Claude Code's `feature-dev`, `pr-review-toolkit`, `code-review`) provide specialized agent capabilities like code review, architecture analysis, and test coverage analysis.
 **Rules:**
 1. After completing a significant code change, use code review plugins proactively — do not wait for the user to ask.
 2. Before creating a PR, use PR review plugins to catch issues early.
 3. When designing architecture, use planning/architecture plugins for structured analysis.
 ### Self-Evolution
 The Mosaic framework should improve over time based on usage patterns:
 1. When you discover a recurring pattern that should be codified, capture it to OpenBrain with `type: "framework-improvement"`.
 2. When a hook, skill, or tool is missing for a common task, capture the gap to OpenBrain with `type: "tooling-gap"`.
 3. When a framework rule causes friction without adding value, capture the observation to OpenBrain with `type: "framework-friction"`.
 These captures feed the framework's continuous improvement cycle.
 ## Skills Policy
 - Load skills that match the active task domain before starting implementation.
 - Do not load unrelated skills.
 - Follow skill trigger rules from the active runtime instruction layer.
 - Actively check `~/.config/mosaic/skills/` for applicable skills rather than passively waiting for them to be mentioned.
 ## Session Closure Requirement
 Before closing any implementation task:
 1. Confirm required tests passed.
 2. Confirm situational tests passed (primary gate).
 3. Confirm implementation is aligned to the active `docs/PRD.md` or `docs/PRD.json`.
 4. Confirm acceptance criteria are mapped to verification evidence.
 5. If source code changed, confirm independent code review passed.
 6. Confirm required documentation updates were completed and reviewed.
 7. Update scratchpad with decisions, results, and open risks.
 8. Provide explicit completion evidence.
 9. If source code changed and external provider is available, confirm merged PR number and merge commit on `main`.
 10. Confirm CI/pipeline status is terminal green for the merged change (or merged PR head when equivalent).
 11. Confirm linked issue is closed (or internal `docs/TASKS.md` equivalent is closed when no provider exists).
 12. If any of items 9-11 are blocked by access/tooling failure, return `blocked` status with exact failed wrapper command and do not claim completion.
--- a/packages/mosaic/framework/defaults/STANDARDS.md
+++ b/packages/mosaic/framework/defaults/STANDARDS.md
@@ -27,6 +27,16 @@ Master/slave model:
 - Do not perform destructive git/file actions without explicit instruction.
 - Browser automation (Playwright, Cypress, Puppeteer) MUST run in headless mode. Never launch a visible browser — it collides with the user's display and active session.
 ### Secrets handling (HARD RULE)
 - Vault is the canonical source-of-truth for every secret in every environment. No exceptions.
 - For k8s workloads, the default read path is **External Secrets Operator → k8s Secret → env var** (`secretKeyRef`). The app reads standard env vars; no Vault client in app code.
 - Direct-Vault clients in application code are **opt-in only**, justified per-app by a documented dynamic-secrets requirement (e.g., DB rotation, AWS STS). Default to ESO. Document the justification in the project's README under "Secrets architecture".
 - `${VAR:-default}` fallback syntax in any deployment configuration (compose, k8s manifests, Helm values, env files committed to git) is **forbidden** for required values. Use `${VAR:?VAR is required}` to fast-fail. Defaults are allowed only for true conveniences (e.g. `${PORT:-3000}`) and MUST be tagged `# safe-default: <reason>` so a reviewer can confirm the intent.
 - `.env` files in production deployment paths are **forbidden**. `.env.example` and `.env` in local-dev paths are fine.
 - App startup MUST validate required secrets against a schema (zod / pydantic / equivalent) and exit non-zero on missing required values. Never run with defaulted weak fallbacks.
 - New apps: bootstrap checklist (see `~/.config/mosaic/guides/BOOTSTRAP.md`) MUST include Vault path provisioning + `ExternalSecret` manifest + README declaring the Vault path and required keys.
 ## Session Lifecycle Contract
 - Start: `scripts/agent/session-start.sh`
--- a/packages/mosaic/framework/defaults/TOOLS.md
+++ b/packages/mosaic/framework/defaults/TOOLS.md
@@ -1,257 +1,58 @@
-# Machine-Level Tool Reference
+# Machine Tools — Index
-Centralized reference for tools, credentials, and CLI patterns available across all projects.
+Tool suites live at `~/.config/mosaic/tools/<suite>/`. This is the index only.
 **Full CLI signatures, flags, and examples: `~/.config/mosaic/guides/TOOLS-REFERENCE.md`** —
 read it (or the relevant service guide) when your task actually touches that service.
 Project-specific tooling belongs in the project's `AGENTS.md`, not here.
-All tool suites are located at `~/.config/mosaic/tools/`.
+## Suites (use wrappers first)
-
+
-## Tool Suites
+| Suite      | Path                                             | Purpose                                                                  |
-
+| ---------- | ------------------------------------------------ | ------------------------------------------------------------------------ |
-### Git Wrappers (Use First)
+| git        | `tools/git/*.sh`                                 | issues, PRs, milestones, CI queue guard (platform-auto-detected)         |
-
+| woodpecker | `tools/woodpecker/*.sh`                          | CI pipelines (`-a mosaic`\|`usc`; match git remote host)                 |
-Mosaic wrappers at `~/.config/mosaic/tools/git/*.sh` handle platform detection and edge cases. Always use these before raw CLI commands.
+| portainer  | `tools/portainer/*.sh`                           | Docker Swarm stacks (status/redeploy/list)                               |
-
+| coolify    | `tools/coolify/*.sh`                             | **DEPRECATED** — superseded by Portainer; do not use for new deployments |
-```bash
+| authentik  | `tools/authentik/*.sh`                           | identity (users/groups/apps/flows)                                       |
-# Issues
+| cloudflare | `tools/cloudflare/*.sh`                          | DNS (zones/records; `-a` instance)                                       |
-~/.config/mosaic/tools/git/issue-create.sh
+| glpi       | `tools/glpi/*.sh`                                | IT tickets/computers/users                                               |
-~/.config/mosaic/tools/git/issue-close.sh
+| health     | `tools/health/stack-health.sh`                   | service health checks                                                    |
-
+| codex      | `tools/codex/*.sh`                               | code/security review (`--uncommitted`)                                   |
-# PRs
+| openbrain  | `tools/openbrain/*`, `tools/openbrain_client.py` | semantic memory (see below)                                              |
-~/.config/mosaic/tools/git/pr-create.sh
+| excalidraw | MCP `mcp__excalidraw__*`                         | diagram export/generation                                                |
-~/.config/mosaic/tools/git/pr-merge.sh
+
-
+Git wrappers are MANDATORY-first for issue/PR/milestone ops (see AGENTS.md hard gates 6–8).
-# Milestones
+Queue guard before push/merge: `tools/git/ci-queue-wait.sh --purpose push|merge`.
 ~/.config/mosaic/tools/git/milestone-create.sh
 # CI queue guard (required before push/merge)
 ~/.config/mosaic/tools/git/ci-queue-wait.sh --purpose push|merge
 ```
 ### Code Review (Codex)
 ```bash
 ~/.config/mosaic/tools/codex/codex-code-review.sh --uncommitted
 ~/.config/mosaic/tools/codex/codex-security-review.sh --uncommitted
 ```
 ### Infrastructure — Portainer
 ```bash
 ~/.config/mosaic/tools/portainer/stack-status.sh -n <stack-name>
 ~/.config/mosaic/tools/portainer/stack-redeploy.sh -n <stack-name>
 ~/.config/mosaic/tools/portainer/stack-list.sh
 ~/.config/mosaic/tools/portainer/endpoint-list.sh
 ```
 ### Infrastructure — Coolify (DEPRECATED)
 > Coolify has been superseded by Portainer Docker Swarm in this stack.
 > Tools remain for reference but should not be used for new deployments.
 ```bash
 # DEPRECATED — do not use for new deployments
 ~/.config/mosaic/tools/coolify/project-list.sh
 ~/.config/mosaic/tools/coolify/service-list.sh
 ~/.config/mosaic/tools/coolify/service-status.sh -u <uuid>
 ~/.config/mosaic/tools/coolify/deploy.sh -u <uuid>
 ~/.config/mosaic/tools/coolify/env-set.sh -u <uuid> -k KEY -v VALUE
 ```
 ### Identity — Authentik
 ```bash
 ~/.config/mosaic/tools/authentik/user-list.sh
 ~/.config/mosaic/tools/authentik/user-create.sh -u <username> -n <name> -e <email>
 ~/.config/mosaic/tools/authentik/group-list.sh
 ~/.config/mosaic/tools/authentik/app-list.sh
 ~/.config/mosaic/tools/authentik/flow-list.sh
 ~/.config/mosaic/tools/authentik/admin-status.sh
 ```
 ### CI/CD — Woodpecker
 Multi-instance support: `-a <instance>` selects a named instance. Omit `-a` to use the default from `woodpecker.default` in credentials.json.
 | Instance           | URL                | Serves                             |
 | ------------------ | ------------------ | ---------------------------------- |
 | `mosaic` (default) | ci.mosaicstack.dev | Mosaic repos (git.mosaicstack.dev) |
 | `usc`              | ci.uscllc.com      | USC repos (git.uscllc.com)         |
 ```bash
 # List recent pipelines
 ~/.config/mosaic/tools/woodpecker/pipeline-list.sh [-r owner/repo] [-a instance]
 # Check latest or specific pipeline status
 ~/.config/mosaic/tools/woodpecker/pipeline-status.sh [-r owner/repo] [-n number] [-a instance]
 # Trigger a build
 ~/.config/mosaic/tools/woodpecker/pipeline-trigger.sh [-r owner/repo] [-b branch] [-a instance]
 ```
 Instance selection rule: match `-a` to the git remote host of the target repo. If the repo is on `git.uscllc.com`, use `-a usc`. If on `git.mosaicstack.dev`, use `-a mosaic` (or omit, since it's the default).
 ### DNS — Cloudflare
 Multi-instance support: `-a <instance>` selects a named instance (e.g. `personal`, `work`). Omit `-a` to use the default from `cloudflare.default` in credentials.json.
 ```bash
 # List zones (domains)
 ~/.config/mosaic/tools/cloudflare/zone-list.sh [-a instance]
 # List DNS records (zone by name or ID)
 ~/.config/mosaic/tools/cloudflare/record-list.sh -z <zone> [-a instance] [-t type] [-n name]
 # Create DNS record
 ~/.config/mosaic/tools/cloudflare/record-create.sh -z <zone> -t <type> -n <name> -c <content> [-a instance] [-p] [-l ttl] [-P priority]
 # Update DNS record
 ~/.config/mosaic/tools/cloudflare/record-update.sh -z <zone> -r <record-id> -t <type> -n <name> -c <content> [-a instance] [-p] [-l ttl]
 # Delete DNS record
 ~/.config/mosaic/tools/cloudflare/record-delete.sh -z <zone> -r <record-id> [-a instance]
 ```
 ### IT Service — GLPI
 ```bash
 ~/.config/mosaic/tools/glpi/ticket-list.sh
 ~/.config/mosaic/tools/glpi/ticket-create.sh -t <title> -c <content>
 ~/.config/mosaic/tools/glpi/computer-list.sh
 ~/.config/mosaic/tools/glpi/user-list.sh
 ```
 ### Health Check
 ```bash
 # Check all configured services
 ~/.config/mosaic/tools/health/stack-health.sh
 # Check a specific service
 ~/.config/mosaic/tools/health/stack-health.sh -s portainer
 # JSON output for automation
 ~/.config/mosaic/tools/health/stack-health.sh -f json
 ```
 ### Shared Credential Loader
 ```bash
 # Source in any script to load service credentials
 source ~/.config/mosaic/tools/_lib/credentials.sh
 load_credentials <service-name>
 # Supported: portainer, coolify, authentik, glpi, github, gitea-mosaicstack, gitea-usc, woodpecker, cloudflare, turbo-cache, openbrain
 ```
 ### OpenBrain — Semantic Memory (PRIMARY)
 Self-hosted semantic brain backed by pgvector. Primary shared memory layer for all agents across all sessions and harnesses. Stores and retrieves decisions, context, and observations via semantic search.
 **MANDATORY jarvis-brain rule:** When working in `~/src/jarvis-brain`, NEVER capture project data, meeting notes, status updates, timeline decisions, or task completions to OpenBrain. The flat files (`data/projects/*.json`, `data/tasks/*.json`) are the SSOT — use `tools/brain.py` and direct JSON edits. OpenBrain is for agent meta-observations ONLY (tooling gotchas, framework learnings, cross-project patterns). Violating this creates duplicate, divergent data.
 **Credentials:** `load_credentials openbrain` → exports `OPENBRAIN_URL`, `OPENBRAIN_TOKEN`
 Configure in your credentials.json:
 ```json
 "openbrain": {
  "url": "https://<your-openbrain-host>",
  "api_key": "<your-api-key>"
 }
 ```
 **REST API** (any language, any harness):
 ```bash
 source ~/.config/mosaic/tools/_lib/credentials.sh && load_credentials openbrain
 # Search by meaning
 curl -s -X POST -H "Authorization: Bearer $OPENBRAIN_TOKEN" -H "Content-Type: application/json" \
  -d '{"query": "your search", "limit": 5}' "$OPENBRAIN_URL/v1/search"
 # Capture a thought
 curl -s -X POST -H "Authorization: Bearer $OPENBRAIN_TOKEN" -H "Content-Type: application/json" \
  -d '{"content": "...", "source": "agent-name", "metadata": {}}' "$OPENBRAIN_URL/v1/thoughts"
 # Recent activity
 curl -s -H "Authorization: Bearer $OPENBRAIN_TOKEN" "$OPENBRAIN_URL/v1/thoughts/recent?limit=5"
 # Stats
 curl -s -H "Authorization: Bearer $OPENBRAIN_TOKEN" "$OPENBRAIN_URL/v1/stats"
 ```
 **Python client** (if jarvis-brain is available on PYTHONPATH):
 ```bash
 python tools/openbrain_client.py search "topic"
 python tools/openbrain_client.py capture "decision or observation" --source agent-name
 python tools/openbrain_client.py recent --limit 5
 python tools/openbrain_client.py stats
 ```
 **MCP (Claude Code sessions):** When connected, `mcp__openbrain__capture/search/recent/stats` tools are available natively — prefer those over CLI when in a Claude session.
 **Rule: capture when you LEARN something. Never when you DO something.**
 | Trigger                                   | Action                                    | Retention             |
 | ----------------------------------------- | ----------------------------------------- | --------------------- |
 | Session start                             | `search` + `recent` to load prior context | —                     |
 | Architectural or tooling decision made    | Capture with rationale                    | `long` or `permanent` |
 | Gotcha or non-obvious behavior discovered | Capture immediately                       | `medium`              |
 | User preference stated or confirmed       | Capture                                   | `permanent`           |
 | Cross-project pattern identified          | Capture                                   | `permanent`           |
 | Prior decision superseded                 | UPDATE existing thought                   | (keep tier)           |
 **Never capture:** task started, commit pushed, PR opened, test results, file edits, CI status.
 Full protocol and cleanup tools: `~/.config/mosaic/guides/MEMORY.md`
 Smart capture wrapper (enforces schema + dedup): `~/.config/mosaic/tools/openbrain/capture.sh`
 ### Excalidraw — Diagram Export (MCP)
 Headless `.excalidraw` → SVG export via `@excalidraw/excalidraw`. Available as MCP tools in Claude Code sessions.
 **MCP tools (when connected):**
 | Tool                                      | Input                                         | Output                                               |
 | ----------------------------------------- | --------------------------------------------- | ---------------------------------------------------- |
 | `mcp__excalidraw__excalidraw_to_svg`      | `elements` JSON string + optional `app_state` | SVG string                                           |
 | `mcp__excalidraw__excalidraw_file_to_svg` | `file_path` to `.excalidraw`                  | SVG string + writes `.svg` alongside                 |
 | `mcp__excalidraw__list_diagrams`          | (none)                                        | Available templates (requires `EXCALIDRAW_GEN_PATH`) |
 | `mcp__excalidraw__generate_diagram`       | `name`, optional `output_path`                | Path to generated `.excalidraw`                      |
 | `mcp__excalidraw__generate_and_export`    | `name`, optional `output_path`                | Paths to `.excalidraw` and `.svg`                    |
 **Diagram generation** (`list_diagrams`, `generate_diagram`, `generate_and_export`) requires `EXCALIDRAW_GEN_PATH` env var pointing to `excalidraw_gen.py`. Set in environment or shell profile:
 ```bash
 export EXCALIDRAW_GEN_PATH="$HOME/src/jarvis-brain/tools/excalidraw_export/excalidraw_gen.py"
 ```
 **Manual registration:**
 ```bash
 mosaic-ensure-excalidraw           # install deps + register with Claude
 mosaic-ensure-excalidraw --check   # verify registration
 ```
 ## Git Providers
 | Instance                      | URL | CLI | Purpose |
 | ----------------------------- | --- | --- | ------- |
 | (add your git providers here) |     |     |         |
 ## Credentials
-**Location:** (configure your credential file path)
+`source ~/.config/mosaic/tools/_lib/credentials.sh && load_credentials <service>`
-**Loader:** `source ~/.config/mosaic/tools/_lib/credentials.sh && load_credentials <service>`
+Supported: portainer, coolify (deprecated), authentik, glpi, github, gitea-mosaicstack,
 gitea-usc, woodpecker, cloudflare, turbo-cache, openbrain. Never expose or commit values.
-**Never expose actual values. Never commit credential files.**
+## OpenBrain — Semantic Memory (PRIMARY) — capture when you LEARN, never when you DO
-## CLI Gotchas
+Primary cross-agent memory (pgvector). Capture decisions/gotchas/preferences/patterns; never task
 starts, commits, PRs, test results, or file edits. At session start, `search` + `recent` to load
 prior context. MCP (`mcp__openbrain__capture/search/recent/stats`) preferred when connected; else
 REST/`tools/openbrain_client.py`. Full protocol: `guides/MEMORY.md`.
-(Add platform-specific CLI gotchas as you discover them.)
+**MANDATORY jarvis-brain rule:** when working in `~/src/jarvis-brain`, NEVER capture project data,
 meeting notes, status, timelines, or task completions to OpenBrain — the flat files
 (`data/projects/*.json`, `data/tasks/*.json`) are the SSOT (use `tools/brain.py` + direct JSON
 edits). OpenBrain there is for agent meta-observations ONLY (tooling gotchas, framework learnings,
 cross-project patterns). Violating this creates duplicate, divergent data.
 ## Git Providers
 | Host                | Instance         | CI                               |
 | ------------------- | ---------------- | -------------------------------- |
 | git.mosaicstack.dev | mosaic (default) | ci.mosaicstack.dev (`-a mosaic`) |
 | git.uscllc.com      | usc              | ci.uscllc.com (`-a usc`)         |
 Match Woodpecker `-a` and credential instance to the target repo's git remote host.
 ## Safety Defaults
- Prefer `trash` over `rm` when available — recoverable beats gone forever
+- Prefer `trash` over `rm` when available — recoverable beats gone forever.
- Never run destructive commands without explicit instruction
+- Never run destructive commands without explicit instruction.
 - Write it down — "mental notes" don't survive session restarts; files do
--- a/packages/mosaic/framework/guides/BOOTSTRAP.md
+++ b/packages/mosaic/framework/guides/BOOTSTRAP.md
@@ -453,6 +453,26 @@ Initialize standard labels and the first pre-MVP milestone:
 ---
 ## Secrets Bootstrap (Required for Every New App)
 Every new application MUST complete the following secrets bootstrap before deploying to any non-local environment. This is a hard gate — deployment without completed secrets bootstrap is forbidden.
 ### Secrets bootstrap checklist
 - [ ] Vault path created: `vault kv put secret/k3s/<app>/ ...` with all required secret fields
 - [ ] Required secrets listed in project README under a "Secrets architecture" section, including:
  - Vault path(s) used
  - All required secret keys and their purpose
  - Whether the app uses ESO bridge (default) or Direct-Vault (opt-in, with justification)
 - [ ] `external-secret.yaml` manifest committed to repo's `deploy/` or `k8s/` directory
 - [ ] Deployment YAML references the synced k8s Secret via `secretKeyRef` (not raw env vars or `.env` files)
 - [ ] App startup has schema-based validation for all required env vars (zod / pydantic / envconfig equivalent) that exits non-zero on missing required values
 - [ ] Direct-Vault opt-in (if applicable): justification documented in README + AppRole provisioned + bootstrap credentials stored in Vault and synced via a separate `ExternalSecret`
 See `~/.config/mosaic/guides/VAULT-SECRETS.md` for full worked examples of the ESO bridge pattern, the Direct-Vault opt-in pattern, and the forbidden antipatterns.
 ---
 ## Checklist
 After bootstrapping, verify:
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Hermes Agent	01b05614ff	docs(framework): canonize merge-authority policy (hard gate 13 + E2E gate note) All checks were successful ci/woodpecker/push/ci Pipeline was successful Details ci/woodpecker/pr/ci Pipeline was successful Details Coordinated work: post-review merge go-ahead belongs to the active coordinator/orchestrator session; solo delivery merges without routine confirmation as before. 'No self-merge' means no UNREVIEWED self-merge. Previously this policy existed only as per-host local patches to the preserved ~/.config/mosaic/AGENTS.md (web1 + sb-it-mgr-0-lt rule 38) and was lost from E2E-DELIVERY.md on every framework sync. Shipping it in defaults/AGENTS.md + guides/E2E-DELIVERY.md makes it permanent for fresh installs and upgrades. Policy: Jason, 2026-06-11. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>	2026-06-11 18:41:59 -05:00
jason.woltje	aa221bf92e	release(mosaic): bump @mosaicstack/mosaic 0.0.30 -> 0.0.31 (#534 ) All checks were successful ci/woodpecker/push/ci Pipeline was successful Details ci/woodpecker/push/publish Pipeline was successful Details ci/woodpecker/tag/publish Pipeline was successful Details	2026-06-11 19:55:43 +00:00
jason.woltje	799df40f4e	feat(appservice): room provisioning (M4c) (#535 ) Some checks failed ci/woodpecker/push/publish Pipeline was canceled Details ci/woodpecker/push/ci Pipeline was canceled Details	2026-06-11 19:50:55 +00:00
jason.woltje	b79e9f32c6	chore(framework): canonize Vault-as-SSOT + ESO-default secrets policy (#519 ) All checks were successful ci/woodpecker/push/ci Pipeline was successful Details ci/woodpecker/push/publish Pipeline was successful Details	2026-06-11 19:07:00 +00:00
jason.woltje	89d69eb23b	docs: add mission control and coordination resilience docs (#511 ) Some checks failed ci/woodpecker/push/ci Pipeline was canceled Details ci/woodpecker/push/publish Pipeline was canceled Details	2026-06-11 19:06:35 +00:00
jason.woltje	59b611ba8a	refactor(framework): thin-core prompt diet — cut injected contract ~53% (#529 ) Some checks failed ci/woodpecker/push/ci Pipeline was canceled Details ci/woodpecker/push/publish Pipeline was successful Details ci/woodpecker/pr/ci Pipeline was successful Details	2026-06-11 18:10:42 +00:00
jason.woltje	dfa0be42f6	feat(framework/tools): inter-agent tmux comms — agent-send.sh + addressing standard (#533 ) Some checks failed ci/woodpecker/push/ci Pipeline was canceled Details ci/woodpecker/push/publish Pipeline was canceled Details	2026-06-11 18:01:44 +00:00
jason.woltje	bb96a3f23e	ci: publish mosaic-as appservice image (#532 ) All checks were successful ci/woodpecker/push/ci Pipeline was successful Details ci/woodpecker/push/publish Pipeline was successful Details	2026-06-10 23:00:38 +00:00
jason.woltje	48b2f28e45	feat(appservice): mosaic-as daemon host + container (M4a) (#531 ) All checks were successful ci/woodpecker/push/ci Pipeline was successful Details ci/woodpecker/push/publish Pipeline was successful Details	2026-06-10 22:16:28 +00:00
jason.woltje	8f09c910a9	feat(appservice): Matrix Application Service core library (M4a) (#530 ) All checks were successful ci/woodpecker/push/ci Pipeline was successful Details ci/woodpecker/push/publish Pipeline was successful Details	2026-06-10 21:23:25 +00:00
jason.woltje	dde95a59b3	fix(pi): reduce startup skill-token overhead (#527 ) All checks were successful ci/woodpecker/push/ci Pipeline was successful Details ci/woodpecker/push/publish Pipeline was successful Details	2026-06-05 18:36:42 +00:00
jason.woltje	821e19dcbb	fix(mosaic-tools): roll up Gitea and Woodpecker wrapper fixes (#524 ) All checks were successful ci/woodpecker/push/ci Pipeline was successful Details ci/woodpecker/push/publish Pipeline was successful Details	2026-05-26 20:56:09 +00:00
jason.woltje	755df9079e	Merge pull request 'fix(db): bootstrap migrations on local-tier gateway startup' (#510 ) from fix/db-bootstrap-migrations into main	2026-05-04 22:13:14 +00:00
jason.woltje	ac5650d9f9	fix(db): bootstrap migrations on local-tier gateway startup Fresh `mosaic gateway install` (npm) left the gateway DB schema empty — sign-in 500'd with `relation "users" does not exist`, and every entry point (auth, bootstrap setup) failed because they all query the users table first. Five stacked bugs on the local (PGlite) tier: 1. `packages/db/package.json` `files: ["dist"]` excluded the `drizzle/` SQL migrations from the published tarball. 2. `runMigrations()` only supports postgres-js — unusable for embedded PGlite. 3. `apps/gateway/src/database/database.module.ts` never invoked migrations at startup. 4. `createPgliteDb` didn't load pgvector, so migration 0001's `CREATE EXTENSION vector` failed. 5. Drizzle's PG migrator wraps every migration in one outer transaction, which trips Postgres' `check_safe_enum_use` on migration 0009 (`ALTER TYPE ADD VALUE 'pending'` → `SET DEFAULT 'pending'` in the same tx). Changes: - Ship `drizzle/` in the published tarball. - `createPgliteDb` loads `@electric-sql/pglite/vector`. - New `runPgliteMigrations(handle)` walks the Drizzle journal and runs each statement-breakpoint chunk through PGlite's `client.exec()` (autocommit per statement). Records into `drizzle.__drizzle_migrations` for interop with the postgres-js path. Per-statement try/catch surfaces which statement of which migration failed. - `DatabaseModule` runs migrations in `OnModuleInit` before `app.listen()`. Local tier: explicit `runPgliteMigrations` then `storageAdapter.migrate()`. Postgres tier: just `storageAdapter.migrate()`, which already calls `runMigrations(url)` internally — no double-call. - Removed `packages/storage/src/test-utils/pglite-with-vector.ts`. The "intentionally not exported" rationale is moot now that migration 0001 forces pgvector load anyway. The integration test uses `createPgliteDb` + `runPgliteMigrations` from `@mosaicstack/db`. Tests: BetterAuth tables exist after migrate; idempotent (re-runs 0009); partial-failure surfaces statement-level context and leaves no ledger row. QA on a fresh PGlite install: - `Applying PGlite schema migrations...` then `Initializing storage adapter (pglite)...` in startup log. - `GET /api/bootstrap/status` → `{"needsSetup":true}` HTTP 200 (was 500). - `POST /api/bootstrap/setup` reaches Zod validator (was 500). Scope: this PR fixes the local (PGlite) tier. Postgres-tier first install still has the outer-transaction problem and a journal ordering bug (0009's `when` < 0008's). Documented inline as TODO and in the scratchpad — needs a separate change with real-Postgres validation. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>	2026-05-04 17:06:50 -05:00
jason.woltje	bd83f86740	Merge pull request 'feat(federation): mTLS AuthGuard with OID-based grant resolution (FED-M3-03)' (#509 ) from feat/federation-m3-auth-guard into main All checks were successful ci/woodpecker/push/ci Pipeline was successful Details ci/woodpecker/push/publish Pipeline was successful Details	2026-04-25 13:27:20 +00:00
Jarvis	0af3e218a1	fix(federation/auth-guard): remediate CRIT-1/CRIT-2 + HIGH-1..4 review findings All checks were successful ci/woodpecker/pr/ci Pipeline was successful Details ci/woodpecker/push/ci Pipeline was successful Details - CRIT-1: Validate cert subjectUserId against grant.subjectUserId from DB; use authoritative DB value in FederationContext - CRIT-2: Add @Inject(GrantsService) decorator (tsx/esbuild requirement) - HIGH-1: Validate UTF8String TLV tag, length, and bounds in OID parser - HIGH-2: Collapse all 403 wire messages to a generic string to prevent grant enumeration; keep internal logger detail - HIGH-3: Assert federation wire envelope shape in all guard tests - HIGH-4: Regression test for subjectUserId cert/DB mismatch Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-25 06:33:37 -05:00
Jarvis	b01c9b3bb0	feat(federation): mTLS AuthGuard with OID-based grant resolution (FED-M3-03) Adds FederationAuthGuard that validates inbound mTLS client certs on federation API routes. Extracts custom OIDs (grantId, subjectUserId), loads the grant+peer from DB in one query, asserts active status, and validates cert serial as defense-in-depth. Attaches FederationContext to requests on success and uses federation wire-format error envelopes (not raw NestJS exceptions) for 401/403 responses. New files: - apps/gateway/src/federation/oid.util.ts — shared OID extraction (no dupe ASN.1 logic) - apps/gateway/src/federation/server/federation-auth.guard.ts — guard impl - apps/gateway/src/federation/server/federation-context.ts — FederationContext type + module augment - apps/gateway/src/federation/server/index.ts — barrel export - apps/gateway/src/federation/server/__tests__/federation-auth.guard.spec.ts — 11 unit tests Modified: - apps/gateway/src/federation/grants.service.ts — adds getGrantWithPeer() with join - apps/gateway/src/federation/federation.module.ts — registers FederationAuthGuard as provider Closes #462 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-25 06:33:37 -05:00
jason.woltje	b67f2c9f08	Merge pull request 'feat(federation): outbound mTLS FederationClient (FED-M3-08)' (#508 ) from feat/federation-m3-client into main All checks were successful ci/woodpecker/push/ci Pipeline was successful Details ci/woodpecker/push/publish Pipeline was successful Details	2026-04-24 04:30:29 +00:00
Jarvis	37675ae3f2	fix(federation/client): serialize cache fills, destroy evicted Agent, cover env-var guard All checks were successful ci/woodpecker/pr/ci Pipeline was successful Details ci/woodpecker/push/ci Pipeline was successful Details - HIGH-A: resolveEntry now uses promise-cache pattern so concurrent callers serialize on a single in-flight build, eliminating duplicate key material in heap and duplicate DB round-trips - HIGH-B: flushPeer destroys the evicted undici Agent so stale TLS connections close on cert rotation - MED-C: add regression test for PEER_MISCONFIGURED when STEP_CA_ROOT_CERT_PATH is unset Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-23 22:56:57 -05:00
Jarvis	a4a6769a6d	fix(federation/client): pin Step-CA root, fix lockfile, harden cache test All checks were successful ci/woodpecker/push/ci Pipeline was successful Details ci/woodpecker/pr/ci Pipeline was successful Details CRIT-1: regenerate pnpm-lock.yaml so apps/gateway resolves undici@7.24.6 (prior PR pushed package.json without lockfile update; CI failed with ERR_PNPM_OUTDATED_LOCKFILE). Incidentally cleans 57 lines of stale peer-dep entries. CRIT-2: cache-hit test no longer swallows resolveEntry errors. Calls the private method directly twice and asserts identity equality plus a single DB select, removing the silent-failure path the prior assertion allowed. HIGH-1: mTLS Agent now pins Step-CA root via STEP_CA_ROOT_CERT_PATH. Without the env var resolveEntry throws PEER_MISCONFIGURED, refusing to dial peers against the public trust store. PEM is read once and cached on the service instance. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>	2026-04-23 22:30:09 -05:00
Jarvis	21650fb194	feat(federation): outbound mTLS FederationClient (FED-M3-08) Some checks failed ci/woodpecker/push/ci Pipeline failed Details ci/woodpecker/pr/ci Pipeline failed Details Implements FederationClientService — a NestJS injectable that dials peer gateways over mTLS (undici Agent with cert+sealed-key from federation_peers), invokes list/get/capabilities verbs, validates responses via Zod, and surfaces all failure modes as typed FederationClientError with a coherent error code taxonomy (PEER_NOT_FOUND, PEER_INACTIVE, PEER_MISCONFIGURED, NETWORK, FORBIDDEN, HTTP_{status}, INVALID_RESPONSE). Per-peer Agent instances are cached in a Map for the service lifetime; flushPeer(peerId) invalidates the cache for M5/M6 cert rotation and revocation events. Wired into FederationModule providers + exports so QuerySourceService (M3-09) can inject it. 13 unit tests covering all required scenarios via undici MockAgent + real sealClientKey/unsealClientKey round-trip. Closes #462 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-23 22:16:52 -05:00
jason.woltje	89c733e0b9	feat(federation): two-gateway test harness scaffold (FED-M3-02) (#505 ) All checks were successful ci/woodpecker/push/ci Pipeline was successful Details ci/woodpecker/push/publish Pipeline was successful Details	2026-04-24 03:01:25 +00:00
jason.woltje	ee3f2defd9	feat(types): federation v1 DTOs (FED-M3-01) (#506 ) Some checks failed ci/woodpecker/push/ci Pipeline was successful Details ci/woodpecker/push/publish Pipeline failed Details	2026-04-24 02:54:40 +00:00
jason.woltje	7342c1290d	fix(federation): use real PEM certs in enrollment + ca service tests (#507 ) Some checks failed ci/woodpecker/push/ci Pipeline was successful Details ci/woodpecker/push/publish Pipeline failed Details	2026-04-24 02:43:42 +00:00
jason.woltje	e64ddd2c1c	docs(federation): M3 mission planning — 14-task decomposition (#504 ) Some checks failed ci/woodpecker/push/ci Pipeline failed Details ci/woodpecker/push/publish Pipeline was successful Details	2026-04-24 01:13:40 +00:00
jason.woltje	4ece6dc643	chore(federation): M2 milestone close (FED-M2-13) (#503 ) Some checks failed ci/woodpecker/push/ci Pipeline failed Details ci/woodpecker/tag/publish Pipeline was successful Details ci/woodpecker/push/publish Pipeline was successful Details	2026-04-22 06:09:54 +00:00
jason.woltje	194c3b603e	docs(federation): M2 Step-CA setup guide + admin CLI reference (FED-M2-12) (#502 ) Some checks failed ci/woodpecker/push/publish Pipeline failed Details ci/woodpecker/push/ci Pipeline failed Details	2026-04-22 06:06:45 +00:00
jason.woltje	fc1600b738	fix(federation): security hardening — OID verification, atomic activation, audit on failure (#501 ) Some checks failed ci/woodpecker/push/ci Pipeline failed Details ci/woodpecker/push/publish Pipeline failed Details	2026-04-22 06:02:52 +00:00
jason.woltje	0ee5b14c68	test(federation): M2 E2E peer-add enrollment flow (FED-M2-10) (#500 ) All checks were successful ci/woodpecker/push/ci Pipeline was successful Details ci/woodpecker/push/publish Pipeline was successful Details	2026-04-22 05:37:06 +00:00
jason.woltje	3eee176cc3	test(federation): M2 integration tests (FED-M2-09) (#499 ) All checks were successful ci/woodpecker/push/ci Pipeline was successful Details ci/woodpecker/push/publish Pipeline was successful Details	2026-04-22 05:08:24 +00:00
jason.woltje	74fe60d8d6	feat(federation): admin controller + CLI federation commands (FED-M2-08) (#498 ) All checks were successful ci/woodpecker/push/ci Pipeline was successful Details ci/woodpecker/push/publish Pipeline was successful Details	2026-04-22 04:39:46 +00:00
jason.woltje	0bfaa56e9e	feat(federation): enrollment controller + single-use token flow (FED-M2-07) (#497 ) All checks were successful ci/woodpecker/push/ci Pipeline was successful Details ci/woodpecker/push/publish Pipeline was successful Details	2026-04-22 04:23:19 +00:00
jason.woltje	01dd6b9fa1	feat(federation): grants service CRUD + status transitions (FED-M2-06) (#496 ) All checks were successful ci/woodpecker/push/ci Pipeline was successful Details ci/woodpecker/push/publish Pipeline was successful Details	2026-04-22 03:57:12 +00:00
jason.woltje	1038ae76e1	feat(federation): Step-CA client service for grant certs (FED-M2-04) (#494 ) All checks were successful ci/woodpecker/push/ci Pipeline was successful Details ci/woodpecker/push/publish Pipeline was successful Details	2026-04-22 03:34:37 +00:00
jason.woltje	bf082d95a0	feat(federation): seal federation peer client keys at rest (FED-M2-05) (#495 ) All checks were successful ci/woodpecker/push/ci Pipeline was successful Details ci/woodpecker/push/publish Pipeline was successful Details	2026-04-22 03:10:20 +00:00
jason.woltje	bb24292cf7	fix(federation): healthcheck + restart policy for federated-test stacks (#492 ) All checks were successful ci/woodpecker/push/ci Pipeline was successful Details ci/woodpecker/push/publish Pipeline was successful Details	2026-04-22 02:56:40 +00:00
jason.woltje	f2cda52e1a	fix(deploy): bump gateway image digest to sha-9f1a081 [DEPLOY-IMG-FIX] (#491 ) All checks were successful ci/woodpecker/push/publish Pipeline was successful Details ci/woodpecker/push/ci Pipeline was successful Details ci/woodpecker/pr/ci Pipeline was successful Details	2026-04-22 02:35:19 +00:00
jason.woltje	7d7cf012f0	feat(federation): scope schema validator [FED-M2-03] (#489 ) Some checks failed ci/woodpecker/push/ci Pipeline failed Details ci/woodpecker/push/publish Pipeline failed Details	2026-04-22 02:31:13 +00:00
jason.woltje	c56dda74aa	feat(federation): Step-CA sidecar in federated compose [FED-M2-02] (#490 ) Some checks failed ci/woodpecker/push/ci Pipeline was successful Details ci/woodpecker/push/publish Pipeline failed Details	2026-04-22 02:21:49 +00:00
jason.woltje	9f1a08185e	docs(federation): S21 tracking — DEPLOY-01/02 done, IMG-FIX in flight, M2-01 in remediation (#487 ) All checks were successful ci/woodpecker/push/ci Pipeline was successful Details ci/woodpecker/push/publish Pipeline was successful Details	2026-04-22 02:02:36 +00:00
jason.woltje	d2e408656b	fix(docker): pnpm deploy for self-contained gateway runtime image (#488 ) Some checks failed ci/woodpecker/push/publish Pipeline failed Details ci/woodpecker/push/ci Pipeline failed Details	2026-04-22 02:02:29 +00:00
jason.woltje	54c278b871	feat(db): federation schema — grants/peers/audit_log [FED-M2-01] (#486 ) Some checks failed ci/woodpecker/push/publish Pipeline failed Details ci/woodpecker/push/ci Pipeline failed Details	2026-04-22 02:02:21 +00:00
jason.woltje	4dbd429203	feat(deploy): portainer stack template for federation test instances [DEPLOY-02] (#485 ) All checks were successful ci/woodpecker/push/publish Pipeline was successful Details ci/woodpecker/push/ci Pipeline was successful Details ci/woodpecker/pr/ci Pipeline was successful Details	2026-04-22 01:34:44 +00:00
		`@@ -0,0 +1 @@`
							`dev-only-step-ca-password-do-not-use-in-production`
		`@@ -0,0 +1,2 @@`
							`ALTER TYPE "public"."grant_status" ADD VALUE 'pending' BEFORE 'active';--> statement-breakpoint`
							`ALTER TABLE "federation_grants" ALTER COLUMN "status" SET DEFAULT 'pending';`