fix(federation): harness round-2 — email validation + host-side URL rewrite

- Bug-1: replace whitespace in admin email local-part (was breaking @IsEmail) - Bug-2: rewrite enrollment URL to use host-accessible base in seed.ts (in-cluster URL not resolvable from host) - Bug-3: correct README Known Limitations section - eslint.config.mjs: add tools/federation-harness/*.ts to allowDefaultProject so pre-commit hook can lint harness scripts Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
fix(federation): harness CRIT bugs — admin bootstrap auth + peer FK + boot deadline (review remediation)
2026-04-23 21:54:46 -05:00 · 2026-04-23 21:54:46 -05:00 · 2026-04-23 21:54:46 -05:00 · 2026-04-24 02:54:40 +00:00 · 2026-04-24 02:43:42 +00:00 · 2026-04-24 01:13:40 +00:00
21 changed files with 2674 additions and 71 deletions
--- a/apps/gateway/src/federation/tests/enrollment.service.spec.ts
+++ b/apps/gateway/src/federation/tests/enrollment.service.spec.ts
@@ -24,10 +24,11 @@
 */
 import 'reflect-metadata';
-import { describe, it, expect, vi, beforeEach } from 'vitest';
+import { describe, it, expect, vi, beforeEach, beforeAll } from 'vitest';
 import { GoneException, NotFoundException } from '@nestjs/common';
 import type { Db } from '@mosaicstack/db';
 import { EnrollmentService } from '../enrollment.service.js';
 import { makeSelfSignedCert } from './helpers/test-cert.js';
 // ---------------------------------------------------------------------------
 // Test constants
@@ -38,10 +39,18 @@ const PEER_ID = 'p2222222-2222-2222-2222-222222222222';
 const USER_ID = 'u3333333-3333-3333-3333-333333333333';
 const TOKEN = 'a'.repeat(64); // 64-char hex
-const MOCK_CERT_PEM = '-----BEGIN CERTIFICATE-----\nMOCK\n-----END CERTIFICATE-----\n';
+// Real self-signed EC P-256 cert — populated once in beforeAll.
-const MOCK_CHAIN_PEM = MOCK_CERT_PEM + MOCK_CERT_PEM;
+// Required because EnrollmentService.extractCertNotAfter calls new X509Certificate(certPem)
 // with strict parsing (PR #501 HIGH-2: no silent fallback).
 let REAL_CERT_PEM: string;
 const MOCK_CHAIN_PEM = () => REAL_CERT_PEM + REAL_CERT_PEM;
 const MOCK_SERIAL = 'ABCD1234';
 beforeAll(async () => {
  REAL_CERT_PEM = await makeSelfSignedCert();
 });
 // ---------------------------------------------------------------------------
 // Factory helpers
 // ---------------------------------------------------------------------------
@@ -103,11 +112,27 @@ function makeDb({
  const claimUpdateMock = vi.fn().mockReturnValue({ set: setClaimMock });
  // transaction(cb) — cb receives txMock; txMock has update + insert
-  const txInsertValues = vi.fn().mockResolvedValue(undefined);
+  //
-  const txInsertMock = vi.fn().mockReturnValue({ values: txInsertValues });
+  // The tx mock must support two tx.update() call patterns (CRIT-2, PR #501):
-  const txWhereUpdate = vi.fn().mockResolvedValue(undefined);
+  //   1. Grant activation:  .update().set().where().returning() → resolves to [{ id }]
  //   2. Peer update:       .update().set().where()             → resolves to undefined
  //
  // We achieve this by making txWhereUpdate return an object with BOTH a thenable
  // interface (so `await tx.update().set().where()` works) AND a .returning() method.
  const txGrantActivatedRow = { id: GRANT_ID };
  const txReturningMock = vi.fn().mockResolvedValue([txGrantActivatedRow]);
  const txWhereUpdate = vi.fn().mockReturnValue({
    // .returning() for grant activation (first tx.update call)
    returning: txReturningMock,
    // thenables so `await tx.update().set().where()` also works for peer update
    then: (resolve: (v: undefined) => void) => resolve(undefined),
    catch: () => undefined,
    finally: () => undefined,
  });
  const txSetMock = vi.fn().mockReturnValue({ where: txWhereUpdate });
  const txUpdateMock = vi.fn().mockReturnValue({ set: txSetMock });
  const txInsertValues = vi.fn().mockResolvedValue(undefined);
  const txInsertMock = vi.fn().mockReturnValue({ values: txInsertValues });
  const txMock = { update: txUpdateMock, insert: txInsertMock };
  const transactionMock = vi
    .fn()
@@ -132,6 +157,7 @@ function makeDb({
      txInsertValues,
      txInsertMock,
      txWhereUpdate,
      txReturningMock,
      txSetMock,
      txUpdateMock,
      txMock,
@@ -146,11 +172,13 @@ function makeDb({
 function makeCaService() {
  return {
-    issueCert: vi.fn().mockResolvedValue({
+    // REAL_CERT_PEM is populated by beforeAll — safe to reference via closure here
-      certPem: MOCK_CERT_PEM,
+    // because makeCaService() is only called after the suite's beforeAll runs.
-      certChainPem: MOCK_CHAIN_PEM,
+    issueCert: vi.fn().mockImplementation(async () => ({
      certPem: REAL_CERT_PEM,
      certChainPem: MOCK_CHAIN_PEM(),
      serialNumber: MOCK_SERIAL,
-    }),
+    })),
  };
 }
@@ -301,29 +329,29 @@ describe('EnrollmentService.redeem — success path', () => {
    });
    caService.issueCert.mockImplementation(async () => {
      callOrder.push('issueCert');
-      return { certPem: MOCK_CERT_PEM, certChainPem: MOCK_CHAIN_PEM, serialNumber: MOCK_SERIAL };
+      return { certPem: REAL_CERT_PEM, certChainPem: MOCK_CHAIN_PEM(), serialNumber: MOCK_SERIAL };
    });
-    await service.redeem(TOKEN, MOCK_CERT_PEM);
+    await service.redeem(TOKEN, '---CSR---');
    expect(callOrder).toEqual(['claim', 'issueCert']);
  });
  it('calls CaService.issueCert with grantId, subjectUserId, csrPem, ttlSeconds=300', async () => {
-    await service.redeem(TOKEN, MOCK_CERT_PEM);
+    await service.redeem(TOKEN, '---CSR---');
    expect(caService.issueCert).toHaveBeenCalledWith(
      expect.objectContaining({
        grantId: GRANT_ID,
        subjectUserId: USER_ID,
-        csrPem: MOCK_CERT_PEM,
+        csrPem: '---CSR---',
        ttlSeconds: 300,
      }),
    );
  });
  it('runs activate grant + peer update + audit inside a transaction', async () => {
-    await service.redeem(TOKEN, MOCK_CERT_PEM);
+    await service.redeem(TOKEN, '---CSR---');
    expect(db._mocks.transactionMock).toHaveBeenCalledOnce();
    // tx.update called twice: activate grant + update peer
@@ -333,17 +361,17 @@ describe('EnrollmentService.redeem — success path', () => {
  });
  it('activates grant (sets status=active) inside the transaction', async () => {
-    await service.redeem(TOKEN, MOCK_CERT_PEM);
+    await service.redeem(TOKEN, '---CSR---');
    expect(db._mocks.txSetMock).toHaveBeenCalledWith(expect.objectContaining({ status: 'active' }));
  });
  it('updates the federationPeers row with certPem, certSerial, state=active inside the transaction', async () => {
-    await service.redeem(TOKEN, MOCK_CERT_PEM);
+    await service.redeem(TOKEN, '---CSR---');
    expect(db._mocks.txSetMock).toHaveBeenCalledWith(
      expect.objectContaining({
-        certPem: MOCK_CERT_PEM,
+        certPem: REAL_CERT_PEM,
        certSerial: MOCK_SERIAL,
        state: 'active',
      }),
@@ -351,7 +379,7 @@ describe('EnrollmentService.redeem — success path', () => {
  });
  it('inserts an audit log row inside the transaction', async () => {
-    await service.redeem(TOKEN, MOCK_CERT_PEM);
+    await service.redeem(TOKEN, '---CSR---');
    expect(db._mocks.txInsertValues).toHaveBeenCalledWith(
      expect.objectContaining({
@@ -363,11 +391,11 @@ describe('EnrollmentService.redeem — success path', () => {
  });
  it('returns { certPem, certChainPem } from CaService', async () => {
-    const result = await service.redeem(TOKEN, MOCK_CERT_PEM);
+    const result = await service.redeem(TOKEN, '---CSR---');
    expect(result).toEqual({
-      certPem: MOCK_CERT_PEM,
+      certPem: REAL_CERT_PEM,
-      certChainPem: MOCK_CHAIN_PEM,
+      certChainPem: MOCK_CHAIN_PEM(),
    });
  });
 });
--- a/apps/gateway/src/federation/tests/helpers/test-cert.ts
+++ b/apps/gateway/src/federation/tests/helpers/test-cert.ts
@@ -0,0 +1,138 @@
 /**
 * Test helpers for generating real X.509 PEM certificates in unit tests.
 *
 * PR #501 (FED-M2-11) introduced strict `new X509Certificate(certPem)` parsing
 * in both EnrollmentService.extractCertNotAfter and CaService.issueCert — dummy
 * cert strings now throw `error:0680007B:asn1 encoding routines::header too long`.
 *
 * These helpers produce minimal but cryptographically valid self-signed EC P-256
 * certificates via @peculiar/x509 + Node.js webcrypto, suitable for test mocks.
 *
 * Two variants:
 *  - makeSelfSignedCert()          Plain cert — satisfies node:crypto X509Certificate parse.
 *  - makeMosaicIssuedCert(opts)    Cert with custom Mosaic OID extensions — satisfies the
 *                                  CRIT-1 OID presence + value checks in CaService.issueCert.
 */
 import { webcrypto } from 'node:crypto';
 import {
  X509CertificateGenerator,
  Extension,
  KeyUsagesExtension,
  KeyUsageFlags,
  BasicConstraintsExtension,
  cryptoProvider,
 } from '@peculiar/x509';
 // ---------------------------------------------------------------------------
 // Internal helpers
 // ---------------------------------------------------------------------------
 /**
 * Encode a string as an ASN.1 UTF8String TLV:
 *   0x0C (tag) + 1-byte length (for strings ≤ 127 bytes) + UTF-8 bytes.
 *
 * CaService.issueCert reads the extension value as:
 *   decoder.decode(grantIdExt.value.slice(2))
 * i.e. it skips the tag + length byte and decodes the remainder as UTF-8.
 * So we must produce exactly this encoding as the OCTET STRING content.
 */
 function encodeUtf8String(value: string): Uint8Array {
  const utf8 = new TextEncoder().encode(value);
  if (utf8.length > 127) {
    throw new Error('encodeUtf8String: value too long for single-byte length encoding');
  }
  const buf = new Uint8Array(2 + utf8.length);
  buf[0] = 0x0c; // ASN.1 UTF8String tag
  buf[1] = utf8.length;
  buf.set(utf8, 2);
  return buf;
 }
 // ---------------------------------------------------------------------------
 // Mosaic OID constants (must match production CaService)
 // ---------------------------------------------------------------------------
 const OID_MOSAIC_GRANT_ID = '1.3.6.1.4.1.99999.1';
 const OID_MOSAIC_SUBJECT_USER_ID = '1.3.6.1.4.1.99999.2';
 // ---------------------------------------------------------------------------
 // Public API
 // ---------------------------------------------------------------------------
 /**
 * Generate a minimal self-signed EC P-256 certificate valid for 1 day.
 * CN=harness-test, no custom extensions.
 *
 * Suitable for:
 *  - EnrollmentService.extractCertNotAfter (just needs parseable PEM)
 *  - Any mock that returns certPem / certChainPem without OID checks
 */
 export async function makeSelfSignedCert(): Promise<string> {
  // Ensure @peculiar/x509 uses Node.js webcrypto (available as globalThis.crypto in Node 19+,
  // but we set it explicitly here to be safe on all Node 18+ versions).
  cryptoProvider.set(webcrypto as unknown as Parameters<typeof cryptoProvider.set>[0]);
  const alg = { name: 'ECDSA', namedCurve: 'P-256', hash: 'SHA-256' } as const;
  const keys = await webcrypto.subtle.generateKey(alg, false, ['sign', 'verify']);
  const now = new Date();
  const tomorrow = new Date(now.getTime() + 86_400_000);
  const cert = await X509CertificateGenerator.createSelfSigned({
    serialNumber: '01',
    name: 'CN=harness-test',
    notBefore: now,
    notAfter: tomorrow,
    signingAlgorithm: alg,
    keys,
    extensions: [
      new BasicConstraintsExtension(false),
      new KeyUsagesExtension(KeyUsageFlags.digitalSignature),
    ],
  });
  return cert.toString('pem');
 }
 /**
 * Generate a self-signed EC P-256 certificate that contains the two custom
 * Mosaic OID extensions required by CaService.issueCert's CRIT-1 check:
 *   OID 1.3.6.1.4.1.99999.1  → mosaic_grant_id   (value = grantId)
 *   OID 1.3.6.1.4.1.99999.2  → mosaic_subject_user_id (value = subjectUserId)
 *
 * The extension value encoding matches the production parser's `.slice(2)` assumption:
 * each extension value is an OCTET STRING wrapping an ASN.1 UTF8String TLV.
 */
 export async function makeMosaicIssuedCert(opts: {
  grantId: string;
  subjectUserId: string;
 }): Promise<string> {
  // Ensure @peculiar/x509 uses Node.js webcrypto.
  cryptoProvider.set(webcrypto as unknown as Parameters<typeof cryptoProvider.set>[0]);
  const alg = { name: 'ECDSA', namedCurve: 'P-256', hash: 'SHA-256' } as const;
  const keys = await webcrypto.subtle.generateKey(alg, false, ['sign', 'verify']);
  const now = new Date();
  const tomorrow = new Date(now.getTime() + 86_400_000);
  const cert = await X509CertificateGenerator.createSelfSigned({
    serialNumber: '01',
    name: 'CN=mosaic-issued-test',
    notBefore: now,
    notAfter: tomorrow,
    signingAlgorithm: alg,
    keys,
    extensions: [
      new BasicConstraintsExtension(false),
      new KeyUsagesExtension(KeyUsageFlags.digitalSignature),
      // mosaic_grant_id — OID 1.3.6.1.4.1.99999.1
      new Extension(OID_MOSAIC_GRANT_ID, false, encodeUtf8String(opts.grantId)),
      // mosaic_subject_user_id — OID 1.3.6.1.4.1.99999.2
      new Extension(OID_MOSAIC_SUBJECT_USER_ID, false, encodeUtf8String(opts.subjectUserId)),
    ],
  });
  return cert.toString('pem');
 }
--- a/apps/gateway/src/federation/ca.service.spec.ts
+++ b/apps/gateway/src/federation/ca.service.spec.ts
@@ -20,9 +20,10 @@
 */
 import 'reflect-metadata';
-import { describe, it, expect, vi, beforeEach, type Mock } from 'vitest';
+import { describe, it, expect, vi, beforeEach, beforeAll, type Mock } from 'vitest';
 import { jwtVerify, exportJWK, generateKeyPair } from 'jose';
 import { Pkcs10CertificateRequestGenerator } from '@peculiar/x509';
 import { makeMosaicIssuedCert } from './__tests__/helpers/test-cert.js';
 // ---------------------------------------------------------------------------
 // Mock node:https BEFORE importing CaService so the mock is in place when
@@ -74,6 +75,11 @@ const FAKE_CA_PEM = FAKE_CERT_PEM;
 const GRANT_ID = 'a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11';
 const SUBJECT_USER_ID = 'b1ffcd00-0d1c-5f09-cc7e-7cc0ce491b22';
 // Real self-signed cert containing both Mosaic OID extensions — populated in beforeAll.
 // Required because CaService.issueCert performs CRIT-1 OID presence/value checks on the
 // response cert (PR #501 — strict parsing, no silent fallback).
 let realIssuedCertPem: string;
 // ---------------------------------------------------------------------------
 // Generate a real EC P-256 key pair and CSR for integration-style tests
 // ---------------------------------------------------------------------------
@@ -194,6 +200,15 @@ function makeHttpsMock(statusCode: number, body: unknown, errorMsg?: string): vo
 describe('CaService', () => {
  let service: CaService;
  beforeAll(async () => {
    // Generate a cert with the two Mosaic OIDs so that CaService.issueCert's
    // CRIT-1 OID checks pass when mock step-ca returns it as `crt`.
    realIssuedCertPem = await makeMosaicIssuedCert({
      grantId: GRANT_ID,
      subjectUserId: SUBJECT_USER_ID,
    });
  });
  beforeEach(() => {
    vi.clearAllMocks();
    service = new CaService();
@@ -226,9 +241,9 @@ describe('CaService', () => {
    // Now test that the service's validateCsr accepts it.
    // We call it indirectly via issueCert with a successful mock.
-    makeHttpsMock(200, { crt: FAKE_CERT_PEM, certChain: [FAKE_CERT_PEM, FAKE_CA_PEM] });
+    makeHttpsMock(200, { crt: realIssuedCertPem, certChain: [realIssuedCertPem, FAKE_CA_PEM] });
    const result = await service.issueCert(makeReq({ csrPem: realCsrPem }));
-    expect(result.certPem).toBe(FAKE_CERT_PEM);
+    expect(result.certPem).toBe(realIssuedCertPem);
  });
  it('throws INVALID_CSR for a malformed PEM-shaped CSR', async () => {
@@ -251,14 +266,14 @@ describe('CaService', () => {
  it('returns IssuedCertDto on success (certChain present)', async () => {
    if (!realCsrPem) realCsrPem = await generateRealCsr();
    makeHttpsMock(200, {
-      crt: FAKE_CERT_PEM,
+      crt: realIssuedCertPem,
-      certChain: [FAKE_CERT_PEM, FAKE_CA_PEM],
+      certChain: [realIssuedCertPem, FAKE_CA_PEM],
    });
    const result = await service.issueCert(makeReq());
-    expect(result.certPem).toBe(FAKE_CERT_PEM);
+    expect(result.certPem).toBe(realIssuedCertPem);
-    expect(result.certChainPem).toContain(FAKE_CERT_PEM);
+    expect(result.certChainPem).toContain(realIssuedCertPem);
    expect(result.certChainPem).toContain(FAKE_CA_PEM);
    expect(typeof result.serialNumber).toBe('string');
  });
@@ -270,14 +285,14 @@ describe('CaService', () => {
  it('builds certChainPem from crt+ca when certChain is absent', async () => {
    if (!realCsrPem) realCsrPem = await generateRealCsr();
    makeHttpsMock(200, {
-      crt: FAKE_CERT_PEM,
+      crt: realIssuedCertPem,
      ca: FAKE_CA_PEM,
    });
    const result = await service.issueCert(makeReq());
-    expect(result.certPem).toBe(FAKE_CERT_PEM);
+    expect(result.certPem).toBe(realIssuedCertPem);
-    expect(result.certChainPem).toContain(FAKE_CERT_PEM);
+    expect(result.certChainPem).toContain(realIssuedCertPem);
    expect(result.certChainPem).toContain(FAKE_CA_PEM);
  });
@@ -287,12 +302,12 @@ describe('CaService', () => {
  it('falls back to certPem alone when certChain and ca are absent', async () => {
    if (!realCsrPem) realCsrPem = await generateRealCsr();
-    makeHttpsMock(200, { crt: FAKE_CERT_PEM });
+    makeHttpsMock(200, { crt: realIssuedCertPem });
    const result = await service.issueCert(makeReq());
-    expect(result.certPem).toBe(FAKE_CERT_PEM);
+    expect(result.certPem).toBe(realIssuedCertPem);
-    expect(result.certChainPem).toBe(FAKE_CERT_PEM);
+    expect(result.certChainPem).toBe(realIssuedCertPem);
  });
  // -------------------------------------------------------------------------
@@ -398,7 +413,7 @@ describe('CaService', () => {
          statusCode: 200,
          on: (event: string, cb: (chunk?: Buffer) => void) => {
            if (event === 'data') {
-              cb(Buffer.from(JSON.stringify({ crt: FAKE_CERT_PEM })));
+              cb(Buffer.from(JSON.stringify({ crt: realIssuedCertPem })));
            }
            if (event === 'end') {
              cb();
@@ -555,7 +570,7 @@ describe('CaService', () => {
          statusCode: 200,
          on: (event: string, cb: (chunk?: Buffer) => void) => {
            if (event === 'data') {
-              cb(Buffer.from(JSON.stringify({ crt: FAKE_CERT_PEM })));
+              cb(Buffer.from(JSON.stringify({ crt: realIssuedCertPem })));
            }
            if (event === 'end') {
              cb();
--- a/docs/federation/MISSION-MANIFEST.md
+++ b/docs/federation/MISSION-MANIFEST.md
@@ -7,11 +7,11 @@
 **ID:** federation-v1-20260419
 **Statement:** Jarvis operates across 3–4 workstations in two physical locations (home, USC). The user currently reaches back to a single jarvis-brain checkout from every session; a prior OpenBrain attempt caused cache, latency, and opacity pain. This mission builds asymmetric federation between Mosaic Stack gateways so that a session on a user's home gateway can query their work gateway in real time without data ever persisting across the boundary, with full multi-tenant isolation and standard-PKI (X.509 / Step-CA) trust management.
-**Phase:** M2 active — Step-CA + grant schema + admin CLI; parallel test-deploy workstream stood up
+**Phase:** M3 active — mTLS handshake + list/get/capabilities verbs + scope enforcement
-**Current Milestone:** FED-M2
+**Current Milestone:** FED-M3
-**Progress:** 1 / 7 milestones
+**Progress:** 2 / 7 milestones
 **Status:** active
-**Last Updated:** 2026-04-21 (M2 decomposed; mos-test-1/-2 designated as federation E2E test hosts)
+**Last Updated:** 2026-04-21 (M2 closed via PR #503, tag `fed-v0.2.0-m2`, issue #461 closed; M3 decomposed into 14 tasks)
 **Parent Mission:** None — new mission
 ## Test Infrastructure
@@ -63,8 +63,8 @@ Key design references:
 | #   | ID     | Name                                          | Status      | Branch             | Issue | Started    | Completed  |
 | --- | ------ | --------------------------------------------- | ----------- | ------------------ | ----- | ---------- | ---------- |
 | 1   | FED-M1 | Federated tier infrastructure                 | done        | (12 PRs #470-#481) | #460  | 2026-04-19 | 2026-04-19 |
-| 2   | FED-M2 | Step-CA + grant schema + admin CLI            | in-progress | (decomposition)    | #461  | 2026-04-21 | —          |
+| 2   | FED-M2 | Step-CA + grant schema + admin CLI            | done        | (PRs #483-#503)    | #461  | 2026-04-21 | 2026-04-21 |
-| 3   | FED-M3 | mTLS handshake + list/get + scope enforcement | not-started | —                  | #462  | —          | —          |
+| 3   | FED-M3 | mTLS handshake + list/get + scope enforcement | in-progress | (decomposition)    | #462  | 2026-04-21 | —          |
 | 4   | FED-M4 | search verb + audit log + rate limit          | not-started | —                  | #463  | —          | —          |
 | 5   | FED-M5 | Cache + offline degradation + OTEL            | not-started | —                  | #464  | —          | —          |
 | 6   | FED-M6 | Revocation + auto-renewal + CRL               | not-started | —                  | #465  | —          | —          |
@@ -86,16 +86,23 @@ Key design references:
 ## Session History
 | Session | Date                    | Runtime | Outcome                                                                                                                               |
-| ------- | ---------- | ------- | --------------------------------------------------------------------- |
+| ------- | ----------------------- | ------- | ------------------------------------------------------------------------------------------------------------------------------------- |
 | S1      | 2026-04-19              | claude  | PRD authored, MILESTONES decomposed, 7 issues filed                                                                                   |
 | S2-S4   | 2026-04-19              | claude  | FED-M1 complete: 12 tasks (PRs #470-#481) merged; tag `fed-v0.1.0-m1`                                                                 |
 | S5-S22  | 2026-04-19 → 2026-04-21 | claude  | FED-M2 complete: 13 tasks (PRs #483-#503) merged; tag `fed-v0.2.0-m2`; issue #461 closed. Step-CA + grant schema + admin CLI shipped. |
 | S23     | 2026-04-21              | claude  | M3 decomposed into 14 tasks in `docs/federation/TASKS.md`. Manifest M3 row → in-progress. Next: kickoff M3-01.                        |
 ## Next Step
-FED-M2 active. Decomposition landed in `docs/federation/TASKS.md` (M2-01..M2-13 code workstream + DEPLOY-01..DEPLOY-05 parallel test-deploy workstream, ~88K total). Tracking issue #482.
+FED-M3 active. Decomposition landed in `docs/federation/TASKS.md` (M3-01..M3-14, ~100K estimate). Tracking issue #462.
-Parallel execution plan:
+Execution plan (parallel where possible):
- **CODE workstream**: M2-01 (DB migration) starts immediately — sonnet subagent on `feat/federation-m2-schema`. Then M2-02 → M2-09 sequentially with M2-04/M2-05/M2-06/M2-07 having interleaved CA/storage/grant dependencies.
+- **Foundation**: M3-01 (DTOs in `packages/types/src/federation/`) starts immediately — sonnet subagent on `feat/federation-m3-types`. Blocks all server + client work.
- **DEPLOY workstream**: DEPLOY-01 (image verify) → DEPLOY-02 (stack template) → DEPLOY-03/04 (mos-test-1/-2 deploy) → DEPLOY-05 (TEST-INFRA.md). Gated on Portainer wrapper PR (`PORTAINER_INSECURE` flag) merging first.
+- **Server stream** (after M3-01): M3-03 (AuthGuard) + M3-04 (ScopeService) in series, then M3-05 / M3-06 / M3-07 (verbs) in parallel.
- **Re-converge** at M2-10 (E2E test) once both workstreams ready.
+- **Client stream** (after M3-01, parallel with server): M3-08 (FederationClient) → M3-09 (QuerySourceService).
 - **Harness** (parallel with everything): M3-02 (`tools/federation-harness/`) — needed for M3-11.
 - **Test gates**: M3-10 (Integration) → M3-11 (E2E with harness) → M3-12 (Independent security review, two rounds budgeted).
 - **Close**: M3-13 (Docs) → M3-14 (release tag `fed-v0.3.0-m3`, close #462).
 **Test-bed fallback:** `mos-test-1/-2` deploy is still blocked on `FED-M2-DEPLOY-IMG-FIX`. The harness in M3-02 ships a local two-gateway docker-compose so M3-11 is not blocked. Production-host validation is M7's responsibility (PRD AC-12).
--- a/docs/federation/TASKS.md
+++ b/docs/federation/TASKS.md
@@ -64,20 +64,20 @@ Goal: Two federated-tier gateways stood up on Portainer at `mos-test-1.woltje.co
 Goal: An admin can create a federation grant; counterparty enrolls; cert is signed by Step-CA with SAN OIDs for `grantId` + `subjectUserId`. No runtime federation traffic flows yet (that's M3).
 | id        | status | description                                                                                                                                                                                      | issue | agent  | branch                             | depends_on       | estimate | notes                                                                                                                                                        |
-| --------- | ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----- | ------ | ---------------------------------- | ---------------- | -------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| --------- | ------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----- | ------ | ---------------------------------- | ---------------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| FED-M2-01 | needs-qa    | DB migration: `federation_grants`, `federation_peers`, `federation_audit_log` tables + enum types (`grant_status`, `peer_state`). Drizzle schema + migration generation; migration tests.        | #461  | sonnet | feat/federation-m2-schema          | —                | 5K       | PR #486 open. First review NEEDS CHANGES (missing DESC indexes + reserved cols). Remediation subagent `a673dd9355dc26f82` in flight in worktree `agent-a4404ac1`.                                                                                                                                                                                                                                     |
+| FED-M2-01 | done   | DB migration: `federation_grants`, `federation_peers`, `federation_audit_log` tables + enum types (`grant_status`, `peer_state`). Drizzle schema + migration generation; migration tests.        | #461  | sonnet | feat/federation-m2-schema          | —                | 5K       | Shipped in PR #486. DESC indexes + reserved cols added after first review; migration tests green.                                                            |
-| FED-M2-02 | not-started | Add Step-CA sidecar to `docker-compose.federated.yml`: official `smallstep/step-ca` image, persistent CA volume, JWK provisioner config baked into init script.                                  | #461  | sonnet | feat/federation-m2-stepca          | DEPLOY-02        | 4K       | Profile-gated under `federated`. CA password from secret; dev compose uses dev-only password file.                                                                                                                                                                                                                                                                                                    |
+| FED-M2-02 | done   | Add Step-CA sidecar to `docker-compose.federated.yml`: official `smallstep/step-ca` image, persistent CA volume, JWK provisioner config baked into init script.                                  | #461  | sonnet | feat/federation-m2-stepca          | DEPLOY-02        | 4K       | Shipped in PR #494. Profile-gated under `federated`; CA password from secret; dev compose uses dev-only password file.                                       |
-| FED-M2-03 | not-started | Scope JSON schema + validator: `resources` allowlist, `excluded_resources`, `include_teams`, `include_personal`, `max_rows_per_query`. Vitest unit tests for valid + invalid scopes.             | #461  | sonnet | feat/federation-m2-scope-schema    | —                | 4K       | Validator independent of CA — reusable from grant CRUD + (later) M3 scope enforcement.                                                                                                                                                                                                                                                                                                                |
+| FED-M2-03 | done   | Scope JSON schema + validator: `resources` allowlist, `excluded_resources`, `include_teams`, `include_personal`, `max_rows_per_query`. Vitest unit tests for valid + invalid scopes.             | #461  | sonnet | feat/federation-m2-scope-schema    | —                | 4K       | Shipped in PR #496 (bundled with grants service). Validator independent of CA; reusable from grant CRUD + M3 scope enforcement.                              |
-| FED-M2-04 | not-started | `apps/gateway/src/federation/ca.service.ts`: Step-CA client (CSR submission, OID-bearing cert retrieval). Mocked + integration tests against real Step-CA container.                             | #461  | sonnet | feat/federation-m2-ca-service      | M2-02            | 6K       | SAN OIDs: `grantId` (custom OID 1.3.6.1.4.1.99999.1) + `subjectUserId` (1.3.6.1.4.1.99999.2). Document OID assignments in PRD/SETUP. **Acceptance**: must (a) wire `federation.tpl` template into `mosaic-fed` provisioner config and (b) include a unit/integration test asserting issued certs contain BOTH OIDs — fails-loud guard against silent OID stripping (carry-forward from M2-02 review). |
+| FED-M2-04 | done   | `apps/gateway/src/federation/ca.service.ts`: Step-CA client (CSR submission, OID-bearing cert retrieval). Mocked + integration tests against real Step-CA container.                             | #461  | sonnet | feat/federation-m2-ca-service      | M2-02            | 6K       | Shipped in PR #494. SAN OIDs 1.3.6.1.4.1.99999.1 (grantId) + 1.3.6.1.4.1.99999.2 (subjectUserId); integration test asserts both OIDs present in issued cert. |
-| FED-M2-05 | not-started | Sealed storage for `client_key_pem` reusing existing `provider_credentials` sealing key. Tests prove DB-at-rest is ciphertext, not PEM. Key rotation path documented (deferred impl).            | #461  | sonnet | feat/federation-m2-key-sealing     | M2-01            | 5K       | Separate from M2-06 to keep crypto seam isolated; reviewer focus is sealing only.                                                                                                                                                                                                                                                                                                                     |
+| FED-M2-05 | done   | Sealed storage for `client_key_pem` reusing existing `provider_credentials` sealing key. Tests prove DB-at-rest is ciphertext, not PEM. Key rotation path documented (deferred impl).            | #461  | sonnet | feat/federation-m2-key-sealing     | M2-01            | 5K       | Shipped in PR #495. Crypto seam isolated; tests confirm ciphertext-at-rest; key rotation deferred to M6.                                                     |
-| FED-M2-06 | not-started | `grants.service.ts`: CRUD + status transitions (`pending` → `active` → `revoked`); integrates M2-03 (scope) + M2-05 (sealing). Unit tests cover all transitions including invalid ones.          | #461  | sonnet | feat/federation-m2-grants-service  | M2-03, M2-05     | 6K       | Business logic only — CSR + cert work delegated to M2-04. Revocation handler is M6.                                                                                                                                                                                                                                                                                                                   |
+| FED-M2-06 | done   | `grants.service.ts`: CRUD + status transitions (`pending` → `active` → `revoked`); integrates M2-03 (scope) + M2-05 (sealing). Unit tests cover all transitions including invalid ones.          | #461  | sonnet | feat/federation-m2-grants-service  | M2-03, M2-05     | 6K       | Shipped in PR #496. All status transitions covered; invalid transition tests green; revocation handler deferred to M6.                                       |
-| FED-M2-07 | not-started | `enrollment.controller.ts`: short-lived single-use token endpoint; CSR signing; updates grant `pending` → `active`; emits enrollment audit (table-only write, M4 tightens).                      | #461  | sonnet | feat/federation-m2-enrollment      | M2-04, M2-06     | 6K       | Tokens single-use with 410 on replay; tokens TTL'd at 15min; rate-limited at request layer (M4 introduces guard, M2 uses simple lock).                                                                                                                                                                                                                                                                |
+| FED-M2-07 | done   | `enrollment.controller.ts`: short-lived single-use token endpoint; CSR signing; updates grant `pending` → `active`; emits enrollment audit (table-only write, M4 tightens).                      | #461  | sonnet | feat/federation-m2-enrollment      | M2-04, M2-06     | 6K       | Shipped in PR #497. Tokens single-use with 410 on replay; TTL 15min; rate-limited at request layer.                                                          |
-| FED-M2-08 | not-started | Admin CLI: `mosaic federation grant create/list/show` + `peer add/list`. Integration with grants.service (no API duplication). Help output + machine-readable JSON option.                       | #461  | sonnet | feat/federation-m2-cli             | M2-06, M2-07     | 7K       | `peer add <enrollment-url>` is the client-side flow; resolves enrollment URL → CSR → store sealed key + cert.                                                                                                                                                                                                                                                                                         |
+| FED-M2-08 | done   | Admin CLI: `mosaic federation grant create/list/show` + `peer add/list`. Integration with grants.service (no API duplication). Help output + machine-readable JSON option.                       | #461  | sonnet | feat/federation-m2-cli             | M2-06, M2-07     | 7K       | Shipped in PR #498. `peer add <enrollment-url>` client-side flow; JSON output flag; admin REST controller co-shipped.                                        |
-| FED-M2-09 | not-started | Integration tests covering MILESTONES.md M2 acceptance tests #1, #2, #3, #5, #7, #8 (single-gateway suite). Real Step-CA container; vitest profile gated by `FEDERATED_INTEGRATION=1`.           | #461  | sonnet | feat/federation-m2-integration     | M2-08            | 8K       | Tests #4 (cert OID match) + #6 (two-gateway peer-add) handled separately by M2-10 (E2E).                                                                                                                                                                                                                                                                                                              |
+| FED-M2-09 | done   | Integration tests covering MILESTONES.md M2 acceptance tests #1, #2, #3, #5, #7, #8 (single-gateway suite). Real Step-CA container; vitest profile gated by `FEDERATED_INTEGRATION=1`.           | #461  | sonnet | feat/federation-m2-integration     | M2-08            | 8K       | Shipped in PR #499. All 6 acceptance tests green; gated by FEDERATED_INTEGRATION=1.                                                                          |
-| FED-M2-10 | not-started | E2E test against deployed mos-test-1 + mos-test-2 (or local two-gateway docker-compose if Portainer not ready): MILESTONES test #6 `peer add` yields `active` peer record with valid cert + key. | #461  | sonnet | feat/federation-m2-e2e             | M2-08, DEPLOY-04 | 6K       | Falls back to local docker-compose-two-gateways if remote test hosts not yet available. Documents both paths.                                                                                                                                                                                                                                                                                         |
+| FED-M2-10 | done   | E2E test against deployed mos-test-1 + mos-test-2 (or local two-gateway docker-compose if Portainer not ready): MILESTONES test #6 `peer add` yields `active` peer record with valid cert + key. | #461  | sonnet | feat/federation-m2-e2e             | M2-08, DEPLOY-04 | 6K       | Shipped in PR #500. Local two-gateway docker-compose path used; `peer add` yields active peer with valid cert + sealed key.                                  |
-| FED-M2-11 | not-started | Independent security review (sonnet, not author of M2-04/05/06/07): focus on single-use token replay, sealing leak surfaces, OID match enforcement, scope schema bypass paths.                   | #461  | sonnet | feat/federation-m2-security-review | M2-10            | 8K       | Apply M1 two-round pattern. Reviewer should explicitly attempt enrollment-token replay, OID-spoofing CSR, and key leak in error messages.                                                                                                                                                                                                                                                             |
+| FED-M2-11 | done   | Independent security review (sonnet, not author of M2-04/05/06/07): focus on single-use token replay, sealing leak surfaces, OID match enforcement, scope schema bypass paths.                   | #461  | sonnet | feat/federation-m2-security-review | M2-10            | 8K       | Shipped in PR #501. Two-round review; enrollment-token replay, OID-spoofing CSR, and key leak in error messages all verified and hardened.                   |
-| FED-M2-12 | not-started | Docs update: `docs/federation/SETUP.md` Step-CA section; new `docs/federation/ADMIN-CLI.md` with grant/peer commands; scope schema reference; OID registration note. Runbook still M7-deferred.  | #461  | haiku  | feat/federation-m2-docs            | M2-11            | 4K       | Adds CA bootstrap section to SETUP.md with `docker compose --profile federated up step-ca` example.                                                                                                                                                                                                                                                                                                   |
+| FED-M2-12 | done   | Docs update: `docs/federation/SETUP.md` Step-CA section; new `docs/federation/ADMIN-CLI.md` with grant/peer commands; scope schema reference; OID registration note. Runbook still M7-deferred.  | #461  | haiku  | feat/federation-m2-docs            | M2-11            | 4K       | Shipped in PR #502. SETUP.md CA bootstrap section added; ADMIN-CLI.md created; scope schema reference and OID note included.                                 |
-| FED-M2-13 | not-started | PR aggregate close, CI green, merge to main, close #461. Release tag `fed-v0.2.0-m2`. Mark deploy stream complete. Update mission manifest M2 row.                                               | #461  | sonnet | feat/federation-m2-close           | M2-12            | 3K       | Same close pattern as M1-12; queue-guard before merge; tea release-create with notes including deploy-stream PRs.                                                                                                                                                                                                                                                                                     |
+| FED-M2-13 | done   | PR aggregate close, CI green, merge to main, close #461. Release tag `fed-v0.2.0-m2`. Mark deploy stream complete. Update mission manifest M2 row.                                               | #461  | sonnet | chore/federation-m2-close          | M2-12            | 3K       | Release tag `fed-v0.2.0-m2` created; issue #461 closed; all M2 PRs #494–#502 merged to main.                                                                 |
 **M2 code workstream estimate:** ~72K tokens (vs MILESTONES.md 30K — same over-budget pattern as M1, where per-task breakdown including tests/review/docs catches the real cost).
@@ -85,7 +85,38 @@ Goal: An admin can create a federation grant; counterparty enrolls; cert is sign
 ## Milestone 3 — mTLS handshake + list/get + scope enforcement (FED-M3)
-_Deferred. Issue #462._
+Goal: Two federated gateways exchange real data over mTLS. Inbound requests pass through cert validation → grant lookup → scope enforcement → native RBAC → response. `list`, `get`, and `capabilities` verbs land. The federation E2E harness (`tools/federation-harness/`) is the new permanent test bed for M3+ and is gated on every milestone going forward.
 > **Critical trust boundary.** Every 401/403 path needs a test. Code review is non-negotiable; M3-12 budgets two review rounds.
 >
 > **Tracking issue:** #462.
 | id        | status      | description                                                                                                                                                                                                                                                                                            | issue | agent  | branch                               | depends_on       | estimate | notes                                                                                                                                                    |
 | --------- | ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----- | ------ | ------------------------------------ | ---------------- | -------- | -------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | FED-M3-01 | not-started | `packages/types/src/federation/` — request/response DTOs for `list`, `get`, `capabilities` verbs. Wire-format zod schemas + inferred TS types. Includes `FederationRequest`, `FederationListResponse<T>`, `FederationGetResponse<T>`, `FederationCapabilitiesResponse`, error envelope, `_source` tag. | #462  | sonnet | feat/federation-m3-types             | —                | 4K       | Reusable from gateway server + client + harness. Pure types — no I/O, no NestJS.                                                                         |
 | FED-M3-02 | not-started | `tools/federation-harness/` scaffold: `docker-compose.two-gateways.yml` (Server A + Server B + step-CA), `seed.ts` (provisions grants, peers, sample tasks/notes/credentials per scope variant), `harness.ts` helper (boots stack, returns typed clients). README documents harness use.               | #462  | sonnet | feat/federation-m3-harness           | DEPLOY-04 (soft) | 8K       | Falls back to local docker-compose if `mos-test-1/-2` not yet redeployed (DEPLOY chain blocked on IMG-FIX). Permanent test infra used by M3+.            |
 | FED-M3-03 | not-started | `apps/gateway/src/federation/server/federation-auth.guard.ts` (NestJS guard). Validates inbound client cert from Fastify TLS context, extracts `grantId` + `subjectUserId` from custom OIDs, loads grant from DB, asserts `status='active'`, attaches `FederationContext` to request.                  | #462  | sonnet | feat/federation-m3-auth-guard        | M3-01            | 8K       | Reuses OID parsing logic mirrored from `ca.service.ts` post-issuance verification. 401 on malformed/missing OIDs; 403 on revoked/expired/missing grant.  |
 | FED-M3-04 | not-started | `apps/gateway/src/federation/server/scope.service.ts`. Pipeline: (1) resource allowlist + excluded check, (2) native RBAC eval as `subjectUserId`, (3) scope filter intersection (`include_teams`, `include_personal`), (4) `max_rows_per_query` cap. Pure service — DB calls injected.                | #462  | sonnet | feat/federation-m3-scope-service     | M3-01            | 10K      | Hardest correctness target in M3. Reuses `parseFederationScope` (M2-03). Returns either `{ allowed: true, filter }` or structured deny reason for audit. |
 | FED-M3-05 | not-started | `apps/gateway/src/federation/server/verbs/list.controller.ts`. Wires AuthGuard → ScopeService → tasks/notes/memory query layer; applies row cap; tags rows with `_source`. Resource selector via path param.                                                                                           | #462  | sonnet | feat/federation-m3-verb-list         | M3-03, M3-04     | 6K       | Routes: `POST /api/federation/v1/list/:resource`. No body persistence. Audit write deferred to M4.                                                       |
 | FED-M3-06 | not-started | `apps/gateway/src/federation/server/verbs/get.controller.ts`. Single-resource fetch by id; same pipeline as list. 404 on not-found, 403 on RBAC/scope deny — both audited the same way.                                                                                                                | #462  | sonnet | feat/federation-m3-verb-get          | M3-03, M3-04     | 6K       | `POST /api/federation/v1/get/:resource/:id`. Mirrors list controller patterns.                                                                           |
 | FED-M3-07 | not-started | `apps/gateway/src/federation/server/verbs/capabilities.controller.ts`. Read-only enumeration: returns `{ resources, excluded_resources, max_rows_per_query, supported_verbs }` derived from grant scope. Always allowed for an active grant — no RBAC eval.                                            | #462  | sonnet | feat/federation-m3-verb-capabilities | M3-03            | 4K       | `GET /api/federation/v1/capabilities`. Smallest verb; useful sanity check that mTLS + auth guard work end-to-end.                                        |
 | FED-M3-08 | not-started | `apps/gateway/src/federation/client/federation-client.service.ts`. Outbound mTLS dialer: picks `(certPem, sealed clientKey)` from `federation_peers`, unwraps key, builds undici Agent with mTLS, calls peer verb, parses typed response, wraps non-2xx into `FederationClientError`.                  | #462  | sonnet | feat/federation-m3-client            | M3-01            | 8K       | Independent of server stream — can land in parallel with M3-03/04. Cert/key cached per-peer; flushed by future M5/M6 logic.                              |
 | FED-M3-09 | not-started | `apps/gateway/src/federation/client/query-source.service.ts`. Accepts `source: "local" \| "federated:<host>" \| "all"` from gateway query layer; for `"all"` fans out to local + each peer in parallel; merges results; tags every row with `_source`.                                                 | #462  | sonnet | feat/federation-m3-query-source      | M3-08            | 8K       | Per-peer failure surfaces as `_partial: true` in response, not hard failure (sets up M5 offline UX). M5 adds caching + circuit breaker on top.           |
 | FED-M3-10 | not-started | Integration tests for MILESTONES.md M3 acceptance #6 (malformed OIDs → 401; valid cert + revoked grant → 403) and #7 (`max_rows_per_query` cap). Real PG, mocked TLS context (Fastify req shim).                                                                                                       | #462  | sonnet | feat/federation-m3-integration       | M3-05, M3-06     | 8K       | Vitest profile gated by `FEDERATED_INTEGRATION=1`. Single-gateway suite; no harness required.                                                            |
 | FED-M3-11 | not-started | E2E tests for MILESTONES.md M3 acceptance #1, #2, #3, #4, #5, #8, #9, #10 (8 cases). Uses harness from M3-02; two real gateways, real Step-CA, real mTLS. Each test asserts both happy-path response and audit/no-persist invariants.                                                                  | #462  | sonnet | feat/federation-m3-e2e               | M3-02, M3-09     | 12K      | Largest single task. Each acceptance gets its own `it(...)` for clear failure attribution.                                                               |
 | FED-M3-12 | not-started | Independent security review (sonnet, not author of M3-03/04/05/06/07/08/09): focus on cert-SAN spoofing, OID extraction edge cases, scope-bypass via filter manipulation, RBAC-bypass via subjectUser swap, response leakage when scope deny.                                                          | #462  | sonnet | feat/federation-m3-security-review   | M3-11            | 10K      | Two review rounds budgeted. PRD requires explicit test for every 401/403 path — review verifies coverage.                                                |
 | FED-M3-13 | not-started | Docs update: `docs/federation/SETUP.md` mTLS handshake section, new `docs/federation/HARNESS.md` for federation-harness usage, OID reference table in SETUP.md, scope enforcement pipeline diagram. Runbook still M7-deferred.                                                                         | #462  | haiku  | feat/federation-m3-docs              | M3-12            | 5K       | One ASCII diagram for the auth-guard → scope → RBAC pipeline; helps future reviewers reason about denial paths.                                          |
 | FED-M3-14 | not-started | PR aggregate close, CI green, merge to main, close #462. Release tag `fed-v0.3.0-m3`. Update mission manifest M3 row → done; M4 row → in-progress when work begins.                                                                                                                                    | #462  | sonnet | chore/federation-m3-close            | M3-13            | 3K       | Same close pattern as M1-12 / M2-13.                                                                                                                     |
 **M3 estimate:** ~100K tokens (vs MILESTONES.md 40K — same per-task breakdown pattern as M1/M2: tests, review, and docs split out from implementation cost). Largest milestone in the federation mission.
 **Parallelization opportunities:**
 - M3-08 (client) can land in parallel with M3-03/M3-04 (server pipeline) — they only share DTOs from M3-01.
 - M3-02 (harness) can land in parallel with everything except M3-11.
 - M3-05/M3-06/M3-07 (verbs) are independent of each other once M3-03/M3-04 land.
 **Test bed fallback:** If `mos-test-1.woltje.com` / `mos-test-2.woltje.com` are still blocked on `FED-M2-DEPLOY-IMG-FIX` when M3-11 is ready to run, the harness's local `docker-compose.two-gateways.yml` is a sufficient stand-in. Production-host validation moves to M7 acceptance suite (PRD AC-12).
 ## Milestone 4 — search + audit + rate limit (FED-M4)
--- a/docs/scratchpads/mvp-20260312.md
+++ b/docs/scratchpads/mvp-20260312.md
@@ -612,3 +612,44 @@ Independent security review surfaced three high-impact and four medium findings;
 7. DEPLOY-03/04 acceptance probes (`mosaic gateway doctor --json`, pgvector `vector(3)` round-trip)
 8. DEPLOY-05: author `docs/federation/TEST-INFRA.md`
 9. M2-02 (Step-CA sidecar) kicks off after image health is green
 ### Session 23 — 2026-04-21 — M2 close + M3 decomposition
 **Closed at compaction boundary:** all 13 M2 tasks done, PRs #494–#503 merged to `main`, tag `fed-v0.2.0-m2` published, Gitea release notes posted, issue #461 closed. Main at `4ece6dc6`.
 **M2 hardening landed in PR #501** (security review remediation):
 - CRIT-1: post-issuance OID verification in `ca.service.ts` (rejects cert if `mosaic_grant_id` / `mosaic_subject_user_id` extensions missing or mismatched)
 - CRIT-2: atomic activation guard `WHERE status='pending'` on grant + `WHERE state='pending'` on peer; throws `ConflictException` if lost race
 - HIGH-2: removed try/catch fallback in `extractCertNotAfter` — parse failures propagate as 500 (no silent 90-day default)
 - HIGH-4: token slice for logging (`${token.slice(0, 8)}...`) — no full token in stdout
 - HIGH-5: `redeem()` wrapped in try/catch with best-effort failure audit; uses `null` (not `'unknown'`) for nullable UUID FK fallback
 - MED-3: `createToken` validates `grant.peerId === dto.peerId`; `BadRequestException` on mismatch
 **Remaining M2 security findings deferred to M3+:**
 - HIGH-1: peerId/subjectUserId tenancy validation on `createGrant` (M3 ScopeService work surfaces this)
 - HIGH-3: Step-CA cert SHA-256 fingerprint pinning (M5 cert handling)
 - MED-1: token entropy already 32 bytes — wontfix
 - MED-2: per-route rate limit on enrollment endpoint (M4 rate limit work)
 - MED-4: CSR CN binding to peer's commonName (M3 AuthGuard work)
 **M3 decomposition landed in this session:**
 - 14 tasks (M3-01..M3-14), ~100K estimate
 - Structure mirrors M1/M2 pattern: foundation → server stream + client stream + harness in parallel → integration → E2E → security review → docs → close
 - M3-02 ships local two-gateway docker-compose (`tools/federation-harness/`) so M3-11 E2E is not blocked on the Portainer test bed (which is still blocked on `FED-M2-DEPLOY-IMG-FIX`)
 **Subagent doctrine retained from M2:**
 - All worker subagents use `isolation: "worktree"` to prevent branch-race incidents
 - Code review is independent (different subagent, no overlap with author of work)
 - `tea pr create --repo mosaicstack/stack --login mosaicstack` is the working PR-create path; `pr-create.sh` has shell-quoting bugs (followup #45 if not already filed)
 - Cost tier: foundational implementation = sonnet, docs = haiku, complex multi-file architecture (security review, scope service) = sonnet with two review rounds
 **Next concrete step:**
 1. PR for the M3 planning artifact (this commit) — branch `docs/federation-m3-planning`
 2. After merge, kickoff M3-01 (DTOs) on `feat/federation-m3-types` with sonnet subagent in worktree
 3. Once M3-01 lands, fan out: M3-02 (harness) || M3-03 (AuthGuard) → M3-04 (ScopeService) || M3-08 (FederationClient)
 4. Re-converge at M3-10 (Integration) → M3-11 (E2E)
--- a/eslint.config.mjs
+++ b/eslint.config.mjs
@@ -30,6 +30,7 @@ export default tseslint.config(
            'apps/gateway/vitest.config.ts',
            'packages/storage/vitest.config.ts',
            'packages/mosaic/__tests__/*.ts',
            'tools/federation-harness/*.ts',
          ],
        },
      },
--- a/packages/types/package.json
+++ b/packages/types/package.json
@@ -26,7 +26,8 @@
  },
  "dependencies": {
    "class-transformer": "^0.5.1",
-    "class-validator": "^0.15.1"
+    "class-validator": "^0.15.1",
    "zod": "^4.3.6"
  },
  "publishConfig": {
    "registry": "https://git.mosaicstack.dev/api/packages/mosaicstack/npm/",
--- a/packages/types/src/federation/tests/federation.spec.ts
+++ b/packages/types/src/federation/tests/federation.spec.ts
@@ -0,0 +1,435 @@
 /**
 * Unit tests for federation wire-format DTOs.
 *
 * Coverage:
 *  - FederationRequestSchema  (valid + invalid)
 *  - FederationListResponseSchema factory
 *  - FederationGetResponseSchema factory
 *  - FederationCapabilitiesResponseSchema
 *  - FederationErrorEnvelopeSchema + error code exhaustiveness
 *  - FederationError exception hierarchy
 *  - tagWithSource helper round-trip
 *  - SourceTagSchema
 */
 import { describe, expect, it } from 'vitest';
 import { z } from 'zod';
 import {
  FEDERATION_ERROR_CODES,
  FEDERATION_VERBS,
  FederationCapabilitiesResponseSchema,
  FederationError,
  FederationErrorEnvelopeSchema,
  FederationForbiddenError,
  FederationInternalError,
  FederationInvalidRequestError,
  FederationNotFoundError,
  FederationRateLimitedError,
  FederationRequestSchema,
  FederationScopeViolationError,
  FederationUnauthorizedError,
  FederationGetResponseSchema,
  FederationListResponseSchema,
  SOURCE_LOCAL,
  SourceTagSchema,
  parseFederationErrorEnvelope,
  tagWithSource,
 } from '../index.js';
 // ---------------------------------------------------------------------------
 // Verbs
 // ---------------------------------------------------------------------------
 describe('FEDERATION_VERBS', () => {
  it('contains exactly list, get, capabilities', () => {
    expect(FEDERATION_VERBS).toEqual(['list', 'get', 'capabilities']);
  });
 });
 // ---------------------------------------------------------------------------
 // FederationRequestSchema
 // ---------------------------------------------------------------------------
 describe('FederationRequestSchema', () => {
  it('accepts a minimal valid list request', () => {
    const result = FederationRequestSchema.safeParse({ verb: 'list', resource: 'tasks' });
    expect(result.success).toBe(true);
  });
  it('accepts a get request with cursor and params', () => {
    const result = FederationRequestSchema.safeParse({
      verb: 'get',
      resource: 'notes',
      cursor: 'abc123',
      params: { filter: 'mine' },
    });
    expect(result.success).toBe(true);
    if (result.success) {
      expect(result.data.cursor).toBe('abc123');
      expect(result.data.params?.['filter']).toBe('mine');
    }
  });
  it('accepts a capabilities request', () => {
    const result = FederationRequestSchema.safeParse({ verb: 'capabilities', resource: 'tasks' });
    expect(result.success).toBe(true);
  });
  it('rejects an unknown verb', () => {
    const result = FederationRequestSchema.safeParse({ verb: 'search', resource: 'tasks' });
    expect(result.success).toBe(false);
  });
  it('rejects an empty resource string', () => {
    const result = FederationRequestSchema.safeParse({ verb: 'list', resource: '' });
    expect(result.success).toBe(false);
  });
  it('rejects a missing verb', () => {
    const result = FederationRequestSchema.safeParse({ resource: 'tasks' });
    expect(result.success).toBe(false);
  });
 });
 // ---------------------------------------------------------------------------
 // FederationListResponseSchema factory
 // ---------------------------------------------------------------------------
 describe('FederationListResponseSchema', () => {
  const ItemSchema = z.object({ id: z.string(), name: z.string() });
  const ListSchema = FederationListResponseSchema(ItemSchema);
  it('accepts a valid list envelope', () => {
    const result = ListSchema.safeParse({
      items: [{ id: '1', name: 'Task A' }],
      nextCursor: 'page2',
      _partial: false,
      _truncated: false,
    });
    expect(result.success).toBe(true);
    if (result.success) {
      expect(result.data.items).toHaveLength(1);
      expect(result.data.nextCursor).toBe('page2');
    }
  });
  it('accepts a minimal envelope with empty items', () => {
    const result = ListSchema.safeParse({ items: [] });
    expect(result.success).toBe(true);
  });
  it('rejects when items is missing', () => {
    const result = ListSchema.safeParse({ nextCursor: 'x' });
    expect(result.success).toBe(false);
  });
  it('rejects when an item fails validation', () => {
    const result = ListSchema.safeParse({ items: [{ id: 1, name: 'bad' }] });
    expect(result.success).toBe(false);
  });
 });
 // ---------------------------------------------------------------------------
 // FederationGetResponseSchema factory
 // ---------------------------------------------------------------------------
 describe('FederationGetResponseSchema', () => {
  const ItemSchema = z.object({ id: z.string() });
  const GetSchema = FederationGetResponseSchema(ItemSchema);
  it('accepts a found item', () => {
    const result = GetSchema.safeParse({ item: { id: 'abc' } });
    expect(result.success).toBe(true);
    if (result.success) {
      expect(result.data.item).toEqual({ id: 'abc' });
    }
  });
  it('accepts null item (not found)', () => {
    const result = GetSchema.safeParse({ item: null });
    expect(result.success).toBe(true);
    if (result.success) {
      expect(result.data.item).toBeNull();
    }
  });
  it('rejects when item is missing', () => {
    const result = GetSchema.safeParse({});
    expect(result.success).toBe(false);
  });
 });
 // ---------------------------------------------------------------------------
 // FederationCapabilitiesResponseSchema
 // ---------------------------------------------------------------------------
 describe('FederationCapabilitiesResponseSchema', () => {
  it('accepts a valid capabilities response', () => {
    const result = FederationCapabilitiesResponseSchema.safeParse({
      resources: ['tasks', 'notes'],
      excluded_resources: ['credentials'],
      max_rows_per_query: 500,
      supported_verbs: ['list', 'get', 'capabilities'],
    });
    expect(result.success).toBe(true);
    if (result.success) {
      expect(result.data.max_rows_per_query).toBe(500);
    }
  });
  it('accepts a response with filters field', () => {
    const result = FederationCapabilitiesResponseSchema.safeParse({
      resources: ['tasks', 'notes'],
      excluded_resources: [],
      max_rows_per_query: 100,
      supported_verbs: ['list'],
      filters: {
        tasks: { include_teams: ['team-a'], include_personal: true },
        notes: { include_personal: false },
      },
    });
    expect(result.success).toBe(true);
    if (result.success) {
      expect(result.data.filters?.['tasks']?.include_teams).toEqual(['team-a']);
    }
  });
  it('accepts a response with partial filters (only include_teams)', () => {
    const result = FederationCapabilitiesResponseSchema.safeParse({
      resources: ['tasks'],
      excluded_resources: [],
      max_rows_per_query: 50,
      supported_verbs: ['list'],
      filters: { tasks: { include_teams: ['eng'] } },
    });
    expect(result.success).toBe(true);
  });
  it('accepts a response with rate_limit (M4 full shape)', () => {
    const result = FederationCapabilitiesResponseSchema.safeParse({
      resources: ['tasks'],
      excluded_resources: [],
      max_rows_per_query: 100,
      supported_verbs: ['list'],
      rate_limit: { limit_per_minute: 60, remaining: 55, reset_at: '2026-04-23T12:00:00Z' },
    });
    expect(result.success).toBe(true);
    if (result.success) {
      expect(result.data.rate_limit?.limit_per_minute).toBe(60);
      expect(result.data.rate_limit?.remaining).toBe(55);
    }
  });
  it('accepts a response with rate_limit (M3 minimal — limit_per_minute only)', () => {
    const result = FederationCapabilitiesResponseSchema.safeParse({
      resources: ['tasks'],
      excluded_resources: [],
      max_rows_per_query: 100,
      supported_verbs: ['list'],
      rate_limit: { limit_per_minute: 120 },
    });
    expect(result.success).toBe(true);
  });
  it('accepts a response without rate_limit (field is optional)', () => {
    const result = FederationCapabilitiesResponseSchema.safeParse({
      resources: ['tasks'],
      excluded_resources: [],
      max_rows_per_query: 100,
      supported_verbs: ['list'],
    });
    expect(result.success).toBe(true);
    if (result.success) {
      expect(result.data.rate_limit).toBeUndefined();
    }
  });
  it('rejects rate_limit with non-positive limit_per_minute', () => {
    const result = FederationCapabilitiesResponseSchema.safeParse({
      resources: ['tasks'],
      excluded_resources: [],
      max_rows_per_query: 100,
      supported_verbs: ['list'],
      rate_limit: { limit_per_minute: 0 },
    });
    expect(result.success).toBe(false);
  });
  it('rejects rate_limit with invalid reset_at datetime', () => {
    const result = FederationCapabilitiesResponseSchema.safeParse({
      resources: ['tasks'],
      excluded_resources: [],
      max_rows_per_query: 100,
      supported_verbs: ['list'],
      rate_limit: { limit_per_minute: 60, reset_at: 'not-a-datetime' },
    });
    expect(result.success).toBe(false);
  });
  it('rejects supported_verbs with an invalid verb (MED-3 enum guard)', () => {
    const result = FederationCapabilitiesResponseSchema.safeParse({
      resources: ['tasks'],
      excluded_resources: [],
      max_rows_per_query: 100,
      supported_verbs: ['invalid_verb'],
    });
    expect(result.success).toBe(false);
  });
  it('rejects empty resources array', () => {
    const result = FederationCapabilitiesResponseSchema.safeParse({
      resources: [],
      excluded_resources: [],
      max_rows_per_query: 100,
      supported_verbs: ['list'],
    });
    expect(result.success).toBe(false);
  });
  it('rejects non-integer max_rows_per_query', () => {
    const result = FederationCapabilitiesResponseSchema.safeParse({
      resources: ['tasks'],
      excluded_resources: [],
      max_rows_per_query: 1.5,
      supported_verbs: ['list'],
    });
    expect(result.success).toBe(false);
  });
 });
 // ---------------------------------------------------------------------------
 // FederationErrorEnvelopeSchema + error code exhaustiveness
 // ---------------------------------------------------------------------------
 describe('FederationErrorEnvelopeSchema', () => {
  it('accepts each valid error code', () => {
    for (const code of FEDERATION_ERROR_CODES) {
      const result = FederationErrorEnvelopeSchema.safeParse({
        error: { code, message: 'test' },
      });
      expect(result.success, `code ${code} should be valid`).toBe(true);
    }
  });
  it('rejects an unknown error code', () => {
    const result = FederationErrorEnvelopeSchema.safeParse({
      error: { code: 'unknown_code', message: 'test' },
    });
    expect(result.success).toBe(false);
  });
  it('accepts optional details field', () => {
    const result = FederationErrorEnvelopeSchema.safeParse({
      error: { code: 'forbidden', message: 'nope', details: { grantId: 'xyz' } },
    });
    expect(result.success).toBe(true);
  });
  it('rejects when message is missing', () => {
    const result = FederationErrorEnvelopeSchema.safeParse({ error: { code: 'not_found' } });
    expect(result.success).toBe(false);
  });
 });
 describe('parseFederationErrorEnvelope', () => {
  it('returns a typed envelope for valid input', () => {
    const env = parseFederationErrorEnvelope({ error: { code: 'not_found', message: 'gone' } });
    expect(env.error.code).toBe('not_found');
  });
  it('throws for invalid input', () => {
    expect(() => parseFederationErrorEnvelope({ bad: 'shape' })).toThrow();
  });
 });
 // ---------------------------------------------------------------------------
 // FederationError exception hierarchy
 // ---------------------------------------------------------------------------
 describe('FederationError hierarchy', () => {
  const cases: Array<[string, FederationError]> = [
    ['unauthorized', new FederationUnauthorizedError()],
    ['forbidden', new FederationForbiddenError()],
    ['not_found', new FederationNotFoundError()],
    ['rate_limited', new FederationRateLimitedError()],
    ['scope_violation', new FederationScopeViolationError()],
    ['invalid_request', new FederationInvalidRequestError()],
    ['internal_error', new FederationInternalError()],
  ];
  it.each(cases)('code %s is an instance of FederationError', (_code, err) => {
    expect(err).toBeInstanceOf(FederationError);
    expect(err).toBeInstanceOf(Error);
  });
  it.each(cases)('code %s has correct code property', (code, err) => {
    expect(err.code).toBe(code);
  });
  it('toEnvelope serialises to wire format', () => {
    const err = new FederationForbiddenError('Access denied', { grantId: 'g1' });
    const env = err.toEnvelope();
    expect(env.error.code).toBe('forbidden');
    expect(env.error.message).toBe('Access denied');
    expect(env.error.details).toEqual({ grantId: 'g1' });
  });
  it('toEnvelope omits details when not provided', () => {
    const err = new FederationNotFoundError();
    const env = err.toEnvelope();
    expect(Object.prototype.hasOwnProperty.call(env.error, 'details')).toBe(false);
  });
  it('error codes tuple covers all subclasses (exhaustiveness check)', () => {
    // If a new subclass is added without a code, this test fails at compile time.
    const allCodes = new Set(FEDERATION_ERROR_CODES);
    for (const [code] of cases) {
      expect(allCodes.has(code as (typeof FEDERATION_ERROR_CODES)[number])).toBe(true);
    }
    // All codes are covered by at least one case
    expect(cases).toHaveLength(FEDERATION_ERROR_CODES.length);
  });
 });
 // ---------------------------------------------------------------------------
 // Source tag + tagWithSource
 // ---------------------------------------------------------------------------
 describe('SourceTagSchema', () => {
  it('accepts a non-empty _source string', () => {
    expect(SourceTagSchema.safeParse({ _source: 'local' }).success).toBe(true);
    expect(SourceTagSchema.safeParse({ _source: 'mosaic.uscllc.com' }).success).toBe(true);
  });
  it('rejects empty _source string', () => {
    expect(SourceTagSchema.safeParse({ _source: '' }).success).toBe(false);
  });
 });
 describe('tagWithSource', () => {
  it('stamps each item with the given source', () => {
    const items = [{ id: '1' }, { id: '2' }];
    const tagged = tagWithSource(items, SOURCE_LOCAL);
    expect(tagged).toEqual([
      { id: '1', _source: 'local' },
      { id: '2', _source: 'local' },
    ]);
  });
  it('preserves original item fields', () => {
    const items = [{ id: 'x', name: 'Task', done: false }];
    const tagged = tagWithSource(items, 'mosaic.uscllc.com');
    expect(tagged[0]).toMatchObject({ id: 'x', name: 'Task', done: false });
    expect(tagged[0]?._source).toBe('mosaic.uscllc.com');
  });
  it('returns empty array for empty input', () => {
    expect(tagWithSource([], 'local')).toEqual([]);
  });
  it('round-trip: tagWithSource output passes SourceTagSchema', () => {
    const tagged = tagWithSource([{ id: '1' }], 'local');
    expect(SourceTagSchema.safeParse(tagged[0]).success).toBe(true);
  });
 });
--- a/packages/types/src/federation/error.ts
+++ b/packages/types/src/federation/error.ts
@@ -0,0 +1,164 @@
 /**
 * Federation wire-format error envelope and exception hierarchy.
 *
 * Source of truth: docs/federation/PRD.md §6, §8.
 *
 * DESIGN: Typed error classes rather than discriminated union values
 * ──────────────────────────────────────────────────────────────────
 * We expose:
 *   1. `FEDERATION_ERROR_CODES` — closed string-enum tuple (exhaustiveness-checkable).
 *   2. `FederationErrorCode` — union type inferred from the tuple.
 *   3. `FederationErrorEnvelopeSchema` — Zod schema for the wire format.
 *   4. `FederationError` — base Error subclass with a typed `code` property.
 *      One concrete subclass per code (e.g. `FederationUnauthorizedError`),
 *      which enables `instanceof` dispatch in handlers without a switch.
 *
 * Rationale: subclasses give gateway handlers and the client a clean dispatch
 * point (catch + instanceof) without re-parsing or switch tables.  All classes
 * carry `code` so a generic logger can act on any FederationError uniformly.
 *
 * Pure — no NestJS, no DB, no Node-only APIs. Safe for browser/edge.
 */
 import { z } from 'zod';
 // ---------------------------------------------------------------------------
 // Error code enum (closed)
 // ---------------------------------------------------------------------------
 export const FEDERATION_ERROR_CODES = [
  'unauthorized',
  'forbidden',
  'not_found',
  'rate_limited',
  'scope_violation',
  'invalid_request',
  'internal_error',
 ] as const;
 export type FederationErrorCode = (typeof FEDERATION_ERROR_CODES)[number];
 // ---------------------------------------------------------------------------
 // Wire-format schema
 // ---------------------------------------------------------------------------
 export const FederationErrorEnvelopeSchema = z.object({
  error: z.object({
    code: z.enum(FEDERATION_ERROR_CODES),
    message: z.string(),
    details: z.unknown().optional(),
  }),
 });
 export type FederationErrorEnvelope = z.infer<typeof FederationErrorEnvelopeSchema>;
 // ---------------------------------------------------------------------------
 // Exception class hierarchy
 // ---------------------------------------------------------------------------
 /**
 * Base class for all federation errors.
 * Carries a typed `code` so handlers can act uniformly on any FederationError.
 */
 export class FederationError extends Error {
  readonly code: FederationErrorCode;
  readonly details?: unknown;
  constructor(code: FederationErrorCode, message: string, details?: unknown) {
    super(message);
    this.name = 'FederationError';
    this.code = code;
    this.details = details;
  }
  /** Serialise to the wire-format error envelope. */
  toEnvelope(): FederationErrorEnvelope {
    return {
      error: {
        code: this.code,
        message: this.message,
        ...(this.details !== undefined ? { details: this.details } : {}),
      },
    };
  }
 }
 /** Client cert is missing, invalid, or signed by an untrusted CA. */
 export class FederationUnauthorizedError extends FederationError {
  constructor(message = 'Unauthorized', details?: unknown) {
    super('unauthorized', message, details);
    this.name = 'FederationUnauthorizedError';
  }
 }
 /** Grant is inactive, revoked, or the subject user lacks access to the resource. */
 export class FederationForbiddenError extends FederationError {
  constructor(message = 'Forbidden', details?: unknown) {
    super('forbidden', message, details);
    this.name = 'FederationForbiddenError';
  }
 }
 /** Requested resource does not exist. */
 export class FederationNotFoundError extends FederationError {
  constructor(message = 'Not found', details?: unknown) {
    super('not_found', message, details);
    this.name = 'FederationNotFoundError';
  }
 }
 /** Grant has exceeded its rate limit; Retry-After should accompany this. */
 export class FederationRateLimitedError extends FederationError {
  constructor(message = 'Rate limit exceeded', details?: unknown) {
    super('rate_limited', message, details);
    this.name = 'FederationRateLimitedError';
  }
 }
 /**
 * The request targets a resource or performs an action that the grant's
 * scope explicitly disallows (distinct from generic 403 — scope_violation
 * means the scope configuration itself blocked the request).
 */
 export class FederationScopeViolationError extends FederationError {
  constructor(message = 'Scope violation', details?: unknown) {
    super('scope_violation', message, details);
    this.name = 'FederationScopeViolationError';
  }
 }
 /** Malformed request — missing fields, invalid cursor, unknown verb, etc. */
 export class FederationInvalidRequestError extends FederationError {
  constructor(message = 'Invalid request', details?: unknown) {
    super('invalid_request', message, details);
    this.name = 'FederationInvalidRequestError';
  }
 }
 /** Unexpected server-side failure. */
 export class FederationInternalError extends FederationError {
  constructor(message = 'Internal error', details?: unknown) {
    super('internal_error', message, details);
    this.name = 'FederationInternalError';
  }
 }
 // ---------------------------------------------------------------------------
 // Typed parser
 // ---------------------------------------------------------------------------
 /**
 * Parse an unknown value as a FederationErrorEnvelope.
 * Throws a plain Error (not FederationError) when parsing fails — this means
 * the payload wasn't even a valid error envelope.
 */
 export function parseFederationErrorEnvelope(input: unknown): FederationErrorEnvelope {
  const result = FederationErrorEnvelopeSchema.safeParse(input);
  if (!result.success) {
    const issues = result.error.issues
      .map((e) => `  - [${e.path.join('.') || 'root'}] ${e.message}`)
      .join('\n');
    throw new Error(`Invalid federation error envelope:\n${issues}`);
  }
  return result.data;
 }
--- a/packages/types/src/federation/index.ts
+++ b/packages/types/src/federation/index.ts
@@ -0,0 +1,16 @@
 /**
 * Federation wire-format DTOs — public barrel.
 *
 * Exports everything downstream M3 tasks need:
 *   verbs.ts      — FEDERATION_VERBS constant + FederationVerb type
 *   request.ts    — FederationRequestSchema + FederationRequest
 *   response.ts   — list/get/capabilities schema factories + types
 *   source-tag.ts — SourceTagSchema, tagWithSource helper
 *   error.ts      — error envelope schema + typed exception hierarchy
 */
 export * from './verbs.js';
 export * from './request.js';
 export * from './response.js';
 export * from './source-tag.js';
 export * from './error.js';
--- a/packages/types/src/federation/request.ts
+++ b/packages/types/src/federation/request.ts
@@ -0,0 +1,47 @@
 /**
 * Federation wire-format request schema.
 *
 * Source of truth: docs/federation/PRD.md §9 (query model).
 *
 * Pure — no NestJS, no DB, no Node-only APIs. Safe for browser/edge.
 */
 import { z } from 'zod';
 import { FEDERATION_VERBS } from './verbs.js';
 // ---------------------------------------------------------------------------
 // Query params — free-form key/value pairs passed alongside the request
 // ---------------------------------------------------------------------------
 const QueryParamsSchema = z.record(z.string(), z.string()).optional();
 // ---------------------------------------------------------------------------
 // Top-level request schema
 // ---------------------------------------------------------------------------
 export const FederationRequestSchema = z.object({
  /**
   * Verb being invoked. One of the M3 federation verbs.
   */
  verb: z.enum(FEDERATION_VERBS),
  /**
   * Resource path being queried, e.g. "tasks", "notes", "memory".
   * Forward-slash-separated for sub-resources (e.g. "teams/abc/tasks").
   */
  resource: z.string().min(1, { message: 'resource must not be empty' }),
  /**
   * Optional free-form query params (filters, sort, etc.).
   * Values are always strings; consumers parse as needed.
   */
  params: QueryParamsSchema,
  /**
   * Opaque pagination cursor returned by a previous list response.
   * Absent on first page.
   */
  cursor: z.string().optional(),
 });
 export type FederationRequest = z.infer<typeof FederationRequestSchema>;
--- a/packages/types/src/federation/response.ts
+++ b/packages/types/src/federation/response.ts
@@ -0,0 +1,162 @@
 /**
 * Federation wire-format response schemas.
 *
 * Source of truth: docs/federation/PRD.md §9 and MILESTONES.md §M3.
 *
 * DESIGN: Generic factory functions rather than z.lazy
 * ─────────────────────────────────────────────────────
 * Zod generic schemas cannot be expressed as a single re-usable `z.ZodType`
 * value because TypeScript's type system erases the generic at the call site.
 * The idiomatic Zod v4 pattern is factory functions that take an item schema
 * and return a fully-typed schema.
 *
 *   const MyListSchema = FederationListResponseSchema(z.string());
 *   type MyList = z.infer<typeof MyListSchema>;
 *   // => { items: string[]; nextCursor?: string; _partial?: boolean; _truncated?: boolean }
 *
 * Downstream consumers (M3-03..M3-07, M3-08, M3-09) should call these
 * factories once per resource type and cache the result.
 *
 * Pure — no NestJS, no DB, no Node-only APIs. Safe for browser/edge.
 */
 import { z } from 'zod';
 import { FEDERATION_VERBS } from './verbs.js';
 // ---------------------------------------------------------------------------
 // Shared envelope flags
 // ---------------------------------------------------------------------------
 /**
 * `_partial`: true when the response is a subset of available data (e.g. due
 * to scope intersection reducing the result set).
 */
 const PartialFlag = z.boolean().optional();
 /**
 * `_truncated`: true when the response was capped by max_rows_per_query and
 * additional pages exist beyond the current cursor.
 */
 const TruncatedFlag = z.boolean().optional();
 // ---------------------------------------------------------------------------
 // FederationListResponseSchema<T> factory
 // ---------------------------------------------------------------------------
 /**
 * Returns a Zod schema for a paginated federation list envelope.
 *
 * @param itemSchema - Zod schema for a single item in the list.
 *
 * @example
 * ```ts
 * const TaskListSchema = FederationListResponseSchema(TaskSchema);
 * type TaskList = z.infer<typeof TaskListSchema>;
 * ```
 */
 export function FederationListResponseSchema<T extends z.ZodTypeAny>(itemSchema: T) {
  return z.object({
    items: z.array(itemSchema),
    nextCursor: z.string().optional(),
    _partial: PartialFlag,
    _truncated: TruncatedFlag,
  });
 }
 export type FederationListResponse<T> = {
  items: T[];
  nextCursor?: string;
  _partial?: boolean;
  _truncated?: boolean;
 };
 // ---------------------------------------------------------------------------
 // FederationGetResponseSchema<T> factory
 // ---------------------------------------------------------------------------
 /**
 * Returns a Zod schema for a single-item federation get envelope.
 *
 * `item` is null when the resource was not found (404 equivalent on the wire).
 *
 * @param itemSchema - Zod schema for the item (nullable is applied internally).
 *
 * @example
 * ```ts
 * const TaskGetSchema = FederationGetResponseSchema(TaskSchema);
 * type TaskGet = z.infer<typeof TaskGetSchema>;
 * ```
 */
 export function FederationGetResponseSchema<T extends z.ZodTypeAny>(itemSchema: T) {
  return z.object({
    item: itemSchema.nullable(),
    _partial: PartialFlag,
  });
 }
 export type FederationGetResponse<T> = {
  item: T | null;
  _partial?: boolean;
 };
 // ---------------------------------------------------------------------------
 // FederationCapabilitiesResponseSchema (fixed shape)
 // ---------------------------------------------------------------------------
 /**
 * Shape mirrors FederationScope (apps/gateway/src/federation/scope-schema.ts)
 * but is kept separate to avoid coupling packages/types to the gateway module.
 * The serving side populates this from the resolved grant scope at request time.
 */
 export const FederationCapabilitiesResponseSchema = z.object({
  /**
   * Resources this grant is allowed to query.
   */
  resources: z.array(z.string()).nonempty(),
  /**
   * Resources explicitly blocked for this grant even if they exist.
   */
  excluded_resources: z.array(z.string()),
  /**
   * Per-resource filters (mirrors FederationScope.filters from PRD §8.1).
   * Keys are resource names; values control team/personal visibility.
   */
  filters: z
    .record(
      z.string(),
      z.object({
        include_teams: z.array(z.string()).optional(),
        include_personal: z.boolean().optional(),
      }),
    )
    .optional(),
  /**
   * Hard cap on rows returned per query for this grant.
   */
  max_rows_per_query: z.number().int().positive(),
  /**
   * Verbs currently available. Will expand in M4+ (search).
   * Closed enum — only values from FEDERATION_VERBS are accepted.
   */
  supported_verbs: z.array(z.enum(FEDERATION_VERBS)).nonempty(),
  /**
   * Rate-limit state for this grant (PRD §9.1).
   * M4 populates `remaining` and `reset_at`; M3 servers may return only
   * `limit_per_minute` or omit the field entirely.
   */
  rate_limit: z
    .object({
      limit_per_minute: z.number().int().positive(),
      remaining: z.number().int().nonnegative().optional(),
      reset_at: z.string().datetime().optional(),
    })
    .optional(),
 });
 export type FederationCapabilitiesResponse = z.infer<typeof FederationCapabilitiesResponseSchema>;
--- a/packages/types/src/federation/source-tag.ts
+++ b/packages/types/src/federation/source-tag.ts
@@ -0,0 +1,61 @@
 /**
 * _source tag for federation fan-out results.
 *
 * Source of truth: docs/federation/PRD.md §9.3 and MILESTONES.md §M3 acceptance test #8.
 *
 * When source: "all" is requested, the gateway fans out to local + all active
 * federated peers, merges results, and tags each item with _source so the
 * caller knows the provenance.
 *
 * Pure — no NestJS, no DB, no Node-only APIs. Safe for browser/edge.
 */
 import { z } from 'zod';
 // ---------------------------------------------------------------------------
 // Source tag schema
 // ---------------------------------------------------------------------------
 /**
 * `_source` is either:
 * - `"local"` — the item came from this gateway's own storage.
 * - a peer common name (e.g. `"mosaic.uscllc.com"`) — the item came from
 *   that federated peer.
 */
 export const SourceTagSchema = z.object({
  _source: z.string().min(1, { message: '_source must not be empty' }),
 });
 export type SourceTag = z.infer<typeof SourceTagSchema>;
 /**
 * Literal union for the well-known local source value.
 * Peers are identified by hostname strings, so there is no closed enum.
 */
 export const SOURCE_LOCAL = 'local' as const;
 // ---------------------------------------------------------------------------
 // Helper: tagWithSource
 // ---------------------------------------------------------------------------
 /**
 * Stamps each item in `items` with `{ _source: source }`.
 *
 * The return type merges the item type with SourceTag so callers get full
 * type-safety on both the original fields and `_source`.
 *
 * @param items  - Array of items to tag.
 * @param source - Either `"local"` or a peer hostname (common name from the
 *                 client cert's CN or O field).
 *
 * @example
 * ```ts
 * const local = tagWithSource([{ id: '1', title: 'Task' }], 'local');
 * // => [{ id: '1', title: 'Task', _source: 'local' }]
 *
 * const remote = tagWithSource(peerItems, 'mosaic.uscllc.com');
 * ```
 */
 export function tagWithSource<T extends object>(items: T[], source: string): Array<T & SourceTag> {
  return items.map((item) => ({ ...item, _source: source }));
 }
--- a/packages/types/src/federation/verbs.ts
+++ b/packages/types/src/federation/verbs.ts
@@ -0,0 +1,11 @@
 /**
 * Federation verb constants and types.
 *
 * Source of truth: docs/federation/PRD.md §9.1
 *
 * M3 ships list, get, capabilities. search lives in M4.
 */
 export const FEDERATION_VERBS = ['list', 'get', 'capabilities'] as const;
 export type FederationVerb = (typeof FEDERATION_VERBS)[number];
--- a/packages/types/src/index.ts
+++ b/packages/types/src/index.ts
@@ -5,3 +5,4 @@ export * from './agent/index.js';
 export * from './provider/index.js';
 export * from './routing/index.js';
 export * from './commands/index.js';
 export * from './federation/index.js';
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -679,6 +679,9 @@ importers:
      class-validator:
        specifier: ^0.15.1
        version: 0.15.1
      zod:
        specifier: ^4.3.6
        version: 4.3.6
    devDependencies:
      typescript:
        specifier: ^5.8.0
@@ -710,10 +713,10 @@ importers:
    dependencies:
      '@mariozechner/pi-agent-core':
        specifier: ^0.63.1
-        version: 0.63.2(@modelcontextprotocol/sdk@1.28.0(zod@4.3.6))(ws@8.20.0)(zod@4.3.6)
+        version: 0.63.2(@modelcontextprotocol/sdk@1.28.0(zod@4.3.6))(ws@8.20.0)(zod@3.25.76)
      '@mariozechner/pi-ai':
        specifier: ^0.63.1
-        version: 0.63.2(@modelcontextprotocol/sdk@1.28.0(zod@4.3.6))(ws@8.20.0)(zod@4.3.6)
+        version: 0.63.2(@modelcontextprotocol/sdk@1.28.0(zod@4.3.6))(ws@8.20.0)(zod@3.25.76)
      '@sinclair/typebox':
        specifier: ^0.34.41
        version: 0.34.48
@@ -7326,6 +7329,12 @@ snapshots:
      '@jridgewell/gen-mapping': 0.3.13
      '@jridgewell/trace-mapping': 0.3.31
  '@anthropic-ai/sdk@0.73.0(zod@3.25.76)':
    dependencies:
      json-schema-to-ts: 3.1.1
    optionalDependencies:
      zod: 3.25.76
  '@anthropic-ai/sdk@0.73.0(zod@4.3.6)':
    dependencies:
      json-schema-to-ts: 3.1.1
@@ -8667,6 +8676,18 @@ snapshots:
      - ws
      - zod
  '@mariozechner/pi-agent-core@0.63.2(@modelcontextprotocol/sdk@1.28.0(zod@4.3.6))(ws@8.20.0)(zod@3.25.76)':
    dependencies:
      '@mariozechner/pi-ai': 0.63.2(@modelcontextprotocol/sdk@1.28.0(zod@4.3.6))(ws@8.20.0)(zod@3.25.76)
    transitivePeerDependencies:
      - '@modelcontextprotocol/sdk'
      - aws-crt
      - bufferutil
      - supports-color
      - utf-8-validate
      - ws
      - zod
  '@mariozechner/pi-agent-core@0.63.2(@modelcontextprotocol/sdk@1.28.0(zod@4.3.6))(ws@8.20.0)(zod@4.3.6)':
    dependencies:
      '@mariozechner/pi-ai': 0.63.2(@modelcontextprotocol/sdk@1.28.0(zod@4.3.6))(ws@8.20.0)(zod@4.3.6)
@@ -8715,6 +8736,30 @@ snapshots:
      - ws
      - zod
  '@mariozechner/pi-ai@0.63.2(@modelcontextprotocol/sdk@1.28.0(zod@4.3.6))(ws@8.20.0)(zod@3.25.76)':
    dependencies:
      '@anthropic-ai/sdk': 0.73.0(zod@3.25.76)
      '@aws-sdk/client-bedrock-runtime': 3.1008.0
      '@google/genai': 1.45.0(@modelcontextprotocol/sdk@1.28.0(zod@4.3.6))
      '@mistralai/mistralai': 1.14.1
      '@sinclair/typebox': 0.34.48
      ajv: 8.18.0
      ajv-formats: 3.0.1(ajv@8.18.0)
      chalk: 5.6.2
      openai: 6.26.0(ws@8.20.0)(zod@3.25.76)
      partial-json: 0.1.7
      proxy-agent: 6.5.0
      undici: 7.24.3
      zod-to-json-schema: 3.25.1(zod@3.25.76)
    transitivePeerDependencies:
      - '@modelcontextprotocol/sdk'
      - aws-crt
      - bufferutil
      - supports-color
      - utf-8-validate
      - ws
      - zod
  '@mariozechner/pi-ai@0.63.2(@modelcontextprotocol/sdk@1.28.0(zod@4.3.6))(ws@8.20.0)(zod@4.3.6)':
    dependencies:
      '@anthropic-ai/sdk': 0.73.0(zod@4.3.6)
@@ -13307,6 +13352,11 @@ snapshots:
    dependencies:
      mimic-function: 5.0.1
  openai@6.26.0(ws@8.20.0)(zod@3.25.76):
    optionalDependencies:
      ws: 8.20.0
      zod: 3.25.76
  openai@6.26.0(ws@8.20.0)(zod@4.3.6):
    optionalDependencies:
      ws: 8.20.0
--- a/tools/federation-harness/README.md
+++ b/tools/federation-harness/README.md
@@ -0,0 +1,254 @@
 # Federation Test Harness
 Local two-gateway federation test infrastructure for Mosaic Stack M3+.
 This harness boots two real gateway instances (`gateway-a`, `gateway-b`) on a
 shared Docker bridge network, each backed by its own Postgres (pgvector) +
 Valkey, sharing a single Step-CA. It is the test bed for all M3+ federation
 E2E tests.
 ## Prerequisites
 - Docker with Compose v2 (`docker compose version` ≥ 2.20)
 - pnpm (for running via repo scripts)
 - `infra/step-ca/dev-password` must exist (copy from `infra/step-ca/dev-password.example`)
 ## Network Topology
 ```
 Host machine
 ├── localhost:14001  →  gateway-a   (Server A — home / requesting)
 ├── localhost:14002  →  gateway-b   (Server B — work / serving)
 ├── localhost:15432  →  postgres-a
 ├── localhost:15433  →  postgres-b
 ├── localhost:16379  →  valkey-a
 ├── localhost:16380  →  valkey-b
 └── localhost:19000  →  step-ca     (shared CA)
 Docker network: fed-test-net (bridge)
  gateway-a ←──── mTLS ────→ gateway-b
             ↘             ↗
               step-ca
 ```
 Ports are chosen to avoid collision with the base dev stack (5433, 6380, 14242, 9000).
 ## Starting the Harness
 ```bash
 # From repo root
 docker compose -f tools/federation-harness/docker-compose.two-gateways.yml up -d
 # Wait for all services to be healthy (~60-90s on first boot due to NestJS cold start)
 docker compose -f tools/federation-harness/docker-compose.two-gateways.yml ps
 ```
 ## Seeding Test Data
 The seed script provisions three grant scope variants (A, B, C) and walks the
 full enrollment flow so Server A ends up with active peers pointing at Server B.
 ```bash
 # Assumes stack is already running
 pnpm tsx tools/federation-harness/seed.ts
 # Or boot + seed in one step
 pnpm tsx tools/federation-harness/seed.ts --boot
 ```
 ### Scope Variants
 | Variant | Resources          | Filters                            | Excluded    | Purpose                         |
 | ------- | ------------------ | ---------------------------------- | ----------- | ------------------------------- |
 | A       | tasks, notes       | include_personal: true             | (none)      | Personal data federation        |
 | B       | tasks              | include_teams: ['T1'], no personal | (none)      | Team-scoped, no personal        |
 | C       | tasks, credentials | include_personal: true             | credentials | Sanity: excluded wins over list |
 ## Using from Vitest
 ```ts
 import {
  bootHarness,
  tearDownHarness,
  serverA,
  serverB,
  seed,
 } from '../../tools/federation-harness/harness.js';
 import type { HarnessHandle } from '../../tools/federation-harness/harness.js';
 let handle: HarnessHandle;
 beforeAll(async () => {
  handle = await bootHarness();
 }, 180_000); // allow 3 min for Docker pull + NestJS cold start
 afterAll(async () => {
  await tearDownHarness(handle);
 });
 test('variant A: list tasks returns personal tasks', async () => {
  // NOTE: Only 'all' is supported for now — per-variant narrowing is M3-11.
  const seedResult = await seed(handle, 'all');
  const a = serverA(handle);
  const res = await fetch(`${a.baseUrl}/api/federation/tasks`, {
    headers: { 'x-federation-grant': seedResult.grants.variantA.id },
  });
  expect(res.status).toBe(200);
 });
 ```
 > **Note:** `seed()` bootstraps a fresh admin user on each gateway via
 > `POST /api/bootstrap/setup`. Both gateways must have zero users (pristine DB).
 > If either gateway already has users, `seed()` throws with a clear error.
 > Reset state with `docker compose down -v`.
 The `bootHarness()` function is **idempotent**: if both gateways are already
 healthy, it reuses the running stack and returns `ownedStack: false`. Tests
 should not call `tearDownHarness` when `ownedStack` is false unless they
 explicitly want to shut down a shared stack.
 ## Vitest Config (pnpm test:federation)
 Add to `vitest.config.ts` at repo root (or a dedicated config):
 ```ts
 // vitest.federation.config.ts
 import { defineConfig } from 'vitest/config';
 export default defineConfig({
  test: {
    include: ['**/*.federation.test.ts'],
    testTimeout: 60_000,
    hookTimeout: 180_000,
    reporters: ['verbose'],
  },
 });
 ```
 Then add to root `package.json`:
 ```json
 "test:federation": "vitest run --config vitest.federation.config.ts"
 ```
 ## Nuking State
 ```bash
 # Remove containers AND volumes (ephemeral state — CA keys, DBs, everything)
 docker compose -f tools/federation-harness/docker-compose.two-gateways.yml down -v
 ```
 On next `up`, Step-CA re-initialises from scratch and generates new CA keys.
 ## Step-CA Root Certificate
 The CA root lives in the `fed-harness-step-ca` Docker volume at
 `/home/step/certs/root_ca.crt`. To extract it to the host:
 ```bash
 docker run --rm \
  -v fed-harness-step-ca:/home/step \
  alpine cat /home/step/certs/root_ca.crt > /tmp/fed-harness-root-ca.crt
 ```
 ## Troubleshooting
 ### Port conflicts
 Default host ports: 14001, 14002, 15432, 15433, 16379, 16380, 19000.
 Override via environment variables before `docker compose up`:
 ```bash
 GATEWAY_A_HOST_PORT=14101 GATEWAY_B_HOST_PORT=14102 \
  docker compose -f tools/federation-harness/docker-compose.two-gateways.yml up -d
 ```
 ### Image pull failures
 The gateway image is digest-pinned to:
 ```
 git.mosaicstack.dev/mosaicstack/stack/gateway@sha256:1069117740e00ccfeba357cae38c43f3729fe5ae702740ce474f6512414d7c02
 ```
 (sha-9f1a081, post-#491 IMG-FIX)
 If the registry is unreachable, Docker will use the locally cached image if
 present. If no local image exists, the compose up will fail with a pull error.
 In that case:
 1. Ensure you can reach `git.mosaicstack.dev` (VPN, DNS, etc.).
 2. Log in: `docker login git.mosaicstack.dev`
 3. Pull manually: `docker pull git.mosaicstack.dev/mosaicstack/stack/gateway@sha256:1069117740e00ccfeba357cae38c43f3729fe5ae702740ce474f6512414d7c02`
 ### NestJS cold start
 Gateway containers take 40–60 seconds to become healthy on first boot (Node.js
 module resolution + NestJS DI bootstrap). The `start_period: 60s` in the
 compose healthcheck covers this. `bootHarness()` polls for up to 3 minutes.
 ### Step-CA startup
 Step-CA initialises on first boot (generates CA keys). This takes ~5-10s.
 The `start_period: 30s` in the healthcheck covers it. Both gateways wait for
 Step-CA to be healthy before starting (`depends_on: step-ca: condition: service_healthy`).
 ### dev-password missing
 The Step-CA container requires `infra/step-ca/dev-password` to be mounted.
 Copy the example and set a local password:
 ```bash
 cp infra/step-ca/dev-password.example infra/step-ca/dev-password
 # Edit the file to set your preferred dev CA password
 ```
 The file is `.gitignore`d — do not commit it.
 ## Image Digest Note
 The gateway image is pinned to `sha256:1069117740e00ccfeba357cae38c43f3729fe5ae702740ce474f6512414d7c02`
 (sha-9f1a081). This is the digest promoted by PR #491 (IMG-FIX). The `latest`
 tag is forbidden per Mosaic image policy. When a new gateway build is promoted,
 update the digest in `docker-compose.two-gateways.yml` and in this file.
 ## Known Limitations
 ### BETTER_AUTH_URL enrollment URL bug (upstream production code — not yet fixed)
 `apps/gateway/src/federation/federation.controller.ts:145` constructs the
 enrollment URL using `process.env['BETTER_AUTH_URL'] ?? 'http://localhost:14242'`.
 This is an upstream bug: `BETTER_AUTH_URL` is the Better Auth origin (typically
 the web app), not the gateway's own base URL. In non-harness deployments this
 produces an enrollment URL pointing to the wrong host or port.
 **How the harness handles this:**
 1. **In-cluster calls (container-to-container):** The compose file sets
   `BETTER_AUTH_URL: 'http://gateway-b:3000'` so the enrollment URL returned by
   the gateway uses the Docker internal hostname. This lets other containers in the
   `fed-test-net` network resolve and reach Server B's enrollment endpoint.
 2. **Host-side URL rewrite (seed script):** The `seed.ts` script runs on the host
   machine where `gateway-b` is not a resolvable hostname. Before calling
   `fetch(enrollmentUrl, ...)`, the seed script rewrites the URL: it extracts only
   the token path segment from `enrollmentUrl` and reassembles the URL using the
   host-accessible `serverBUrl` (default: `http://localhost:14002`). This lets the
   seed script redeem enrollment tokens from the host without being affected by the
   in-cluster hostname in the returned URL.
 **TODO:** Fix `federation.controller.ts` to derive the enrollment URL from its own
 listening address (e.g. `GATEWAY_BASE_URL` env var or a dedicated
 `FEDERATION_ENROLLMENT_BASE_URL` env var) rather than reusing `BETTER_AUTH_URL`.
 Tracked as a follow-up to PR #505 — do not bundle with harness changes.
 ## Permanent Infrastructure
 This harness is designed to outlive M3 and be reused by M4+ milestone tests.
 It is not a throwaway scaffold — treat it as production test infrastructure:
 - Keep it idempotent.
 - Do not hardcode test assumptions in the harness layer (put them in tests).
 - Update the seed script when new scope variants are needed.
 - The README and harness should be kept in sync as the federation API evolves.
--- a/tools/federation-harness/docker-compose.two-gateways.yml
+++ b/tools/federation-harness/docker-compose.two-gateways.yml
@@ -0,0 +1,247 @@
 # tools/federation-harness/docker-compose.two-gateways.yml
 #
 # Two-gateway federation test harness — local-only, no Portainer/Swarm needed.
 #
 # USAGE (manual):
 #   docker compose -f tools/federation-harness/docker-compose.two-gateways.yml up -d
 #   docker compose -f tools/federation-harness/docker-compose.two-gateways.yml down -v
 #
 # USAGE (from harness.ts):
 #   const handle = await bootHarness();
 #   ...
 #   await tearDownHarness(handle);
 #
 # TOPOLOGY:
 #   gateway-a  — "home" instance (Server A, the requesting side)
 #     └── postgres-a  (pgvector/pg17, port 15432)
 #     └── valkey-a    (port 16379)
 #   gateway-b  — "work" instance (Server B, the serving side)
 #     └── postgres-b  (pgvector/pg17, port 15433)
 #     └── valkey-b    (port 16380)
 #   step-ca    — shared CA for both gateways (port 19000)
 #
 # All services share the `fed-test-net` bridge network.
 # Host port ranges (15432-15433, 16379-16380, 14001-14002, 19000) are chosen
 # to avoid collision with the base dev stack (5433, 6380, 14242, 9000).
 #
 # IMAGE:
 #   Pinned to the immutable digest sha256:1069117740e00ccfeba357cae38c43f3729fe5ae702740ce474f6512414d7c02
 #   (sha-9f1a081, post-#491 IMG-FIX, smoke-tested locally).
 #   Update this digest only after a new CI build is promoted to the registry.
 #
 # STEP-CA:
 #   Single shared Step-CA instance. Both gateways connect to it.
 #   CA volume is ephemeral per `docker compose down -v`; regenerated on next up.
 #   The harness seed script provisions the CA roots cross-trust after first boot.
 services:
  # ─── Shared Certificate Authority ────────────────────────────────────────────
  step-ca:
    image: smallstep/step-ca:0.27.4
    container_name: fed-harness-step-ca
    restart: unless-stopped
    ports:
      - '${STEP_CA_HOST_PORT:-19000}:9000'
    volumes:
      - step_ca_data:/home/step
      - ../../infra/step-ca/init.sh:/usr/local/bin/mosaic-step-ca-init.sh:ro
      - ../../infra/step-ca/templates:/etc/step-ca-templates:ro
      - ../../infra/step-ca/dev-password:/run/secrets/ca_password:ro
    entrypoint: ['/bin/sh', '/usr/local/bin/mosaic-step-ca-init.sh']
    networks:
      - fed-test-net
    healthcheck:
      test:
        [
          'CMD',
          'step',
          'ca',
          'health',
          '--ca-url',
          'https://localhost:9000',
          '--root',
          '/home/step/certs/root_ca.crt',
        ]
      interval: 10s
      timeout: 5s
      retries: 5
      start_period: 30s
  # ─── Server A — Home / Requesting Gateway ────────────────────────────────────
  postgres-a:
    image: pgvector/pgvector:pg17
    container_name: fed-harness-postgres-a
    restart: unless-stopped
    ports:
      - '${PG_A_HOST_PORT:-15432}:5432'
    environment:
      POSTGRES_USER: mosaic
      POSTGRES_PASSWORD: mosaic
      POSTGRES_DB: mosaic
    volumes:
      - pg_a_data:/var/lib/postgresql/data
      - ../../infra/pg-init:/docker-entrypoint-initdb.d:ro
    networks:
      - fed-test-net
    healthcheck:
      test: ['CMD-SHELL', 'pg_isready -U mosaic']
      interval: 5s
      timeout: 3s
      retries: 5
  valkey-a:
    image: valkey/valkey:8-alpine
    container_name: fed-harness-valkey-a
    restart: unless-stopped
    ports:
      - '${VALKEY_A_HOST_PORT:-16379}:6379'
    volumes:
      - valkey_a_data:/data
    networks:
      - fed-test-net
    healthcheck:
      test: ['CMD', 'valkey-cli', 'ping']
      interval: 5s
      timeout: 3s
      retries: 5
  gateway-a:
    image: git.mosaicstack.dev/mosaicstack/stack/gateway@sha256:1069117740e00ccfeba357cae38c43f3729fe5ae702740ce474f6512414d7c02
    # Tag for human reference: sha-9f1a081 (post-#491 IMG-FIX; smoke-tested locally)
    container_name: fed-harness-gateway-a
    restart: unless-stopped
    ports:
      - '${GATEWAY_A_HOST_PORT:-14001}:3000'
    environment:
      MOSAIC_TIER: federated
      DATABASE_URL: postgres://mosaic:mosaic@postgres-a:5432/mosaic
      VALKEY_URL: redis://valkey-a:6379
      GATEWAY_PORT: '3000'
      GATEWAY_CORS_ORIGIN: 'http://localhost:14001'
      BETTER_AUTH_SECRET: harness-secret-server-a-do-not-use-in-prod
      BETTER_AUTH_URL: 'http://gateway-a:3000'
      STEP_CA_URL: 'https://step-ca:9000'
      FEDERATION_PEER_HOSTNAME: gateway-a
      # Bootstrap password for POST /api/bootstrap/setup — used by seed.ts to create
      # the first admin user. Only valid on a pristine (zero-user) database.
      # Not the same as ADMIN_API_KEY — there is no static API key in the gateway.
      ADMIN_BOOTSTRAP_PASSWORD: harness-admin-password-a
    depends_on:
      postgres-a:
        condition: service_healthy
      valkey-a:
        condition: service_healthy
      step-ca:
        condition: service_healthy
    networks:
      - fed-test-net
    healthcheck:
      test:
        [
          'CMD',
          'node',
          '-e',
          "require('http').get('http://127.0.0.1:3000/api/health', r => process.exit(r.statusCode === 200 ? 0 : 1)).on('error', () => process.exit(1))",
        ]
      interval: 10s
      timeout: 5s
      retries: 5
      start_period: 60s
  # ─── Server B — Work / Serving Gateway ──────────────────────────────────────
  postgres-b:
    image: pgvector/pgvector:pg17
    container_name: fed-harness-postgres-b
    restart: unless-stopped
    ports:
      - '${PG_B_HOST_PORT:-15433}:5432'
    environment:
      POSTGRES_USER: mosaic
      POSTGRES_PASSWORD: mosaic
      POSTGRES_DB: mosaic
    volumes:
      - pg_b_data:/var/lib/postgresql/data
      - ../../infra/pg-init:/docker-entrypoint-initdb.d:ro
    networks:
      - fed-test-net
    healthcheck:
      test: ['CMD-SHELL', 'pg_isready -U mosaic']
      interval: 5s
      timeout: 3s
      retries: 5
  valkey-b:
    image: valkey/valkey:8-alpine
    container_name: fed-harness-valkey-b
    restart: unless-stopped
    ports:
      - '${VALKEY_B_HOST_PORT:-16380}:6379'
    volumes:
      - valkey_b_data:/data
    networks:
      - fed-test-net
    healthcheck:
      test: ['CMD', 'valkey-cli', 'ping']
      interval: 5s
      timeout: 3s
      retries: 5
  gateway-b:
    image: git.mosaicstack.dev/mosaicstack/stack/gateway@sha256:1069117740e00ccfeba357cae38c43f3729fe5ae702740ce474f6512414d7c02
    # Tag for human reference: sha-9f1a081 (post-#491 IMG-FIX; smoke-tested locally)
    container_name: fed-harness-gateway-b
    restart: unless-stopped
    ports:
      - '${GATEWAY_B_HOST_PORT:-14002}:3000'
    environment:
      MOSAIC_TIER: federated
      DATABASE_URL: postgres://mosaic:mosaic@postgres-b:5432/mosaic
      VALKEY_URL: redis://valkey-b:6379
      GATEWAY_PORT: '3000'
      GATEWAY_CORS_ORIGIN: 'http://localhost:14002'
      BETTER_AUTH_SECRET: harness-secret-server-b-do-not-use-in-prod
      BETTER_AUTH_URL: 'http://gateway-b:3000'
      STEP_CA_URL: 'https://step-ca:9000'
      FEDERATION_PEER_HOSTNAME: gateway-b
      # Bootstrap password for POST /api/bootstrap/setup — used by seed.ts to create
      # the first admin user. Only valid on a pristine (zero-user) database.
      # Not the same as ADMIN_API_KEY — there is no static API key in the gateway.
      ADMIN_BOOTSTRAP_PASSWORD: harness-admin-password-b
    depends_on:
      postgres-b:
        condition: service_healthy
      valkey-b:
        condition: service_healthy
      step-ca:
        condition: service_healthy
    networks:
      - fed-test-net
    healthcheck:
      test:
        [
          'CMD',
          'node',
          '-e',
          "require('http').get('http://127.0.0.1:3000/api/health', r => process.exit(r.statusCode === 200 ? 0 : 1)).on('error', () => process.exit(1))",
        ]
      interval: 10s
      timeout: 5s
      retries: 5
      start_period: 60s
 networks:
  fed-test-net:
    name: fed-test-net
    driver: bridge
 volumes:
  step_ca_data:
    name: fed-harness-step-ca
  pg_a_data:
    name: fed-harness-pg-a
  valkey_a_data:
    name: fed-harness-valkey-a
  pg_b_data:
    name: fed-harness-pg-b
  valkey_b_data:
    name: fed-harness-valkey-b
--- a/tools/federation-harness/harness.ts
+++ b/tools/federation-harness/harness.ts
@@ -0,0 +1,290 @@
 /**
 * tools/federation-harness/harness.ts
 *
 * Vitest-consumable helpers for the two-gateway federation harness.
 *
 * USAGE (in a vitest test file):
 *
 *   import { bootHarness, tearDownHarness, serverA, serverB, seed } from
 *     '../../tools/federation-harness/harness.js';
 *
 *   let handle: HarnessHandle;
 *
 *   beforeAll(async () => {
 *     handle = await bootHarness();
 *   }, 180_000);
 *
 *   afterAll(async () => {
 *     await tearDownHarness(handle);
 *   });
 *
 *   test('variant A — list tasks', async () => {
 *     const seedResult = await seed(handle, 'all');
 *     const a = serverA(handle);
 *     const res = await fetch(`${a.baseUrl}/api/federation/list/tasks`, {
 *       headers: { Authorization: `Bearer ${seedResult.adminTokenA}` },
 *     });
 *     expect(res.status).toBe(200);
 *   });
 *
 * NOTE: The `seed()` helper currently only supports scenario='all'. Passing any
 * other value throws immediately. Per-variant narrowing is deferred to M3-11.
 *
 * ESM / NodeNext: all imports use .js extensions.
 */
 import { execSync, execFileSync } from 'node:child_process';
 import { resolve, dirname } from 'node:path';
 import { fileURLToPath } from 'node:url';
 import { runSeed, type SeedResult } from './seed.js';
 // ─── Types ───────────────────────────────────────────────────────────────────
 export interface GatewayAccessor {
  /** Base URL reachable from the host machine, e.g. http://localhost:14001 */
  baseUrl: string;
  /** Bootstrap password used for POST /api/bootstrap/setup on a pristine gateway */
  bootstrapPassword: string;
  /** Internal Docker network hostname (for container-to-container calls) */
  internalHostname: string;
 }
 export interface HarnessHandle {
  /** Server A accessor */
  a: GatewayAccessor;
  /** Server B accessor */
  b: GatewayAccessor;
  /** Absolute path to the docker-compose file */
  composeFile: string;
  /** Whether this instance booted the stack (vs. reusing an existing one) */
  ownedStack: boolean;
  /** Optional seed result if seed() was called */
  seedResult?: SeedResult;
 }
 /**
 * Scenario to seed. Currently only 'all' is implemented; per-variant narrowing
 * is tracked as M3-11. Passing any other value throws immediately with a clear
 * error rather than silently over-seeding.
 */
 export type SeedScenario = 'variantA' | 'variantB' | 'variantC' | 'all';
 // ─── Constants ────────────────────────────────────────────────────────────────
 const __dirname = dirname(fileURLToPath(import.meta.url));
 const COMPOSE_FILE = resolve(__dirname, 'docker-compose.two-gateways.yml');
 const GATEWAY_A_URL = process.env['GATEWAY_A_URL'] ?? 'http://localhost:14001';
 const GATEWAY_B_URL = process.env['GATEWAY_B_URL'] ?? 'http://localhost:14002';
 const ADMIN_BOOTSTRAP_PASSWORD_A =
  process.env['ADMIN_BOOTSTRAP_PASSWORD_A'] ?? 'harness-admin-password-a';
 const ADMIN_BOOTSTRAP_PASSWORD_B =
  process.env['ADMIN_BOOTSTRAP_PASSWORD_B'] ?? 'harness-admin-password-b';
 const READINESS_TIMEOUT_MS = 180_000;
 const READINESS_POLL_MS = 3_000;
 // ─── Internal helpers ─────────────────────────────────────────────────────────
 async function isGatewayHealthy(baseUrl: string): Promise<boolean> {
  try {
    const res = await fetch(`${baseUrl}/api/health`, { signal: AbortSignal.timeout(5_000) });
    return res.ok;
  } catch {
    return false;
  }
 }
 /**
 * Poll both gateways in parallel until both are healthy or the shared deadline
 * expires. Polling in parallel (rather than sequentially) avoids the bug where
 * a slow gateway-a consumes all of the readiness budget before gateway-b is
 * checked.
 */
 async function waitForStack(handle: HarnessHandle): Promise<void> {
  const gateways: Array<{ label: string; url: string }> = [
    { label: 'gateway-a', url: handle.a.baseUrl },
    { label: 'gateway-b', url: handle.b.baseUrl },
  ];
  await Promise.all(
    gateways.map(async (gw) => {
      // Each gateway gets its own independent deadline.
      const deadline = Date.now() + READINESS_TIMEOUT_MS;
      process.stdout.write(`[harness] Waiting for ${gw.label}...`);
      while (Date.now() < deadline) {
        if (await isGatewayHealthy(gw.url)) {
          process.stdout.write(` ready\n`);
          return;
        }
        if (Date.now() + READINESS_POLL_MS > deadline) {
          throw new Error(
            `[harness] ${gw.label} did not become healthy within ${READINESS_TIMEOUT_MS.toString()}ms`,
          );
        }
        await new Promise((r) => setTimeout(r, READINESS_POLL_MS));
        process.stdout.write('.');
      }
      throw new Error(
        `[harness] ${gw.label} did not become healthy within ${READINESS_TIMEOUT_MS.toString()}ms`,
      );
    }),
  );
 }
 function isStackRunning(): boolean {
  try {
    const output = execFileSync(
      'docker',
      ['compose', '-f', COMPOSE_FILE, 'ps', '--format', 'json'],
      { encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] },
    );
    if (!output.trim()) return false;
    // Parse JSON lines — each running service emits a JSON object per line
    const lines = output.trim().split('\n').filter(Boolean);
    const runningServices = lines.filter((line) => {
      try {
        const obj = JSON.parse(line) as { State?: string };
        return obj.State === 'running';
      } catch {
        return false;
      }
    });
    // Expect at least gateway-a and gateway-b running
    return runningServices.length >= 2;
  } catch {
    return false;
  }
 }
 // ─── Public API ───────────────────────────────────────────────────────────────
 /**
 * Boot the harness stack.
 *
 * Idempotent: if the stack is already running and both gateways are healthy,
 * this function reuses the existing stack and returns a handle with
 * `ownedStack: false`. Callers that set `ownedStack: false` should NOT call
 * `tearDownHarness` unless they explicitly want to tear down a pre-existing stack.
 *
 * If the stack is not running, it starts it with `docker compose up -d` and
 * waits for both gateways to pass their /api/health probe.
 */
 export async function bootHarness(): Promise<HarnessHandle> {
  const handle: HarnessHandle = {
    a: {
      baseUrl: GATEWAY_A_URL,
      bootstrapPassword: ADMIN_BOOTSTRAP_PASSWORD_A,
      internalHostname: 'gateway-a',
    },
    b: {
      baseUrl: GATEWAY_B_URL,
      bootstrapPassword: ADMIN_BOOTSTRAP_PASSWORD_B,
      internalHostname: 'gateway-b',
    },
    composeFile: COMPOSE_FILE,
    ownedStack: false,
  };
  // Check if both gateways are already healthy
  const [aHealthy, bHealthy] = await Promise.all([
    isGatewayHealthy(handle.a.baseUrl),
    isGatewayHealthy(handle.b.baseUrl),
  ]);
  if (aHealthy && bHealthy) {
    console.log('[harness] Stack already running — reusing existing stack.');
    handle.ownedStack = false;
    return handle;
  }
  console.log('[harness] Starting federation harness stack...');
  execSync(`docker compose -f "${COMPOSE_FILE}" up -d`, { stdio: 'inherit' });
  handle.ownedStack = true;
  await waitForStack(handle);
  console.log('[harness] Stack is ready.');
  return handle;
 }
 /**
 * Tear down the harness stack.
 *
 * Runs `docker compose down -v` to remove containers AND volumes (ephemeral state).
 * Only tears down if `handle.ownedStack` is true unless `force` is set.
 */
 export async function tearDownHarness(
  handle: HarnessHandle,
  opts?: { force?: boolean },
 ): Promise<void> {
  if (!handle.ownedStack && !opts?.force) {
    console.log(
      '[harness] Stack not owned by this handle — skipping teardown (pass force: true to override).',
    );
    return;
  }
  console.log('[harness] Tearing down federation harness stack...');
  execSync(`docker compose -f "${handle.composeFile}" down -v`, { stdio: 'inherit' });
  console.log('[harness] Stack torn down.');
 }
 /**
 * Return the Server A accessor from a harness handle.
 * Convenience wrapper for test readability.
 */
 export function serverA(handle: HarnessHandle): GatewayAccessor {
  return handle.a;
 }
 /**
 * Return the Server B accessor from a harness handle.
 * Convenience wrapper for test readability.
 */
 export function serverB(handle: HarnessHandle): GatewayAccessor {
  return handle.b;
 }
 /**
 * Seed the harness with test data for one or more scenarios.
 *
 * @param handle   The harness handle returned by bootHarness().
 * @param scenario Which scope variants to provision. Currently only 'all' is
 *                 supported — passing any other value throws immediately with a
 *                 clear error. Per-variant narrowing is tracked as M3-11.
 *
 * Returns a SeedResult with grant IDs, peer IDs, and admin tokens for each
 * gateway, which test assertions can reference.
 *
 * IMPORTANT: The harness assumes a pristine database on both gateways. The seed
 * bootstraps an admin user on each gateway via POST /api/bootstrap/setup. If
 * either gateway already has users, seed() throws with a clear error message.
 * Run 'docker compose down -v' to reset state.
 */
 export async function seed(
  handle: HarnessHandle,
  scenario: SeedScenario = 'all',
 ): Promise<SeedResult> {
  if (scenario !== 'all') {
    throw new Error(
      `seed: scenario narrowing not yet implemented; pass "all" for now. ` +
        `Got: "${scenario}". Per-variant narrowing is tracked as M3-11.`,
    );
  }
  const result = await runSeed({
    serverAUrl: handle.a.baseUrl,
    serverBUrl: handle.b.baseUrl,
    adminBootstrapPasswordA: handle.a.bootstrapPassword,
    adminBootstrapPasswordB: handle.b.bootstrapPassword,
  });
  handle.seedResult = result;
  return result;
 }
--- a/tools/federation-harness/seed.ts
+++ b/tools/federation-harness/seed.ts
@@ -0,0 +1,603 @@
 #!/usr/bin/env tsx
 /**
 * tools/federation-harness/seed.ts
 *
 * Provisions test data for the two-gateway federation harness.
 * Run via: tsx tools/federation-harness/seed.ts
 *
 * What this script does:
 *  1. (Optional) Boots the compose stack if --boot flag is passed.
 *  2. Waits for both gateways to be healthy.
 *  3. Bootstraps an admin user + token on each gateway via POST /api/bootstrap/setup.
 *  4. Creates three grants on Server B matching the M3 acceptance test scenarios:
 *     - Scope variant A: tasks + notes, include_personal: true
 *     - Scope variant B: tasks only, include_teams: ['T1'], exclude T2
 *     - Scope variant C: tasks + credentials in resources, credentials excluded (sanity)
 *  5. For each grant, walks the full enrollment flow:
 *       a. Server B creates a peer keypair (represents the requesting side).
 *       b. Server B creates the grant referencing that peer.
 *       c. Server B issues an enrollment token.
 *       d. Server A creates its own peer keypair (represents its view of B).
 *       e. Server A redeems the enrollment token at Server B's enrollment endpoint,
 *          submitting A's CSR → receives signed cert back.
 *       f. Server A stores the cert on its peer record → peer becomes active.
 *  6. Inserts representative test tasks/notes/credentials on Server B.
 *
 * IMPORTANT: This script uses the real admin REST API — no direct DB writes.
 * It exercises the full enrollment flow as M3 acceptance tests will.
 *
 * ESM / NodeNext: all imports use .js extensions.
 */
 import { execSync } from 'node:child_process';
 import { resolve, dirname } from 'node:path';
 import { fileURLToPath } from 'node:url';
 // ─── Constants ───────────────────────────────────────────────────────────────
 const __dirname = dirname(fileURLToPath(import.meta.url));
 const COMPOSE_FILE = resolve(__dirname, 'docker-compose.two-gateways.yml');
 /** Base URLs as seen from the host machine (mapped host ports). */
 const SERVER_A_URL = process.env['GATEWAY_A_URL'] ?? 'http://localhost:14001';
 const SERVER_B_URL = process.env['GATEWAY_B_URL'] ?? 'http://localhost:14002';
 /**
 * Bootstrap passwords used when calling POST /api/bootstrap/setup on each
 * gateway. Each gateway starts with zero users and requires a one-time setup
 * call before any admin-guarded endpoints can be used.
 */
 const ADMIN_BOOTSTRAP_PASSWORD_A =
  process.env['ADMIN_BOOTSTRAP_PASSWORD_A'] ?? 'harness-admin-password-a';
 const ADMIN_BOOTSTRAP_PASSWORD_B =
  process.env['ADMIN_BOOTSTRAP_PASSWORD_B'] ?? 'harness-admin-password-b';
 const READINESS_TIMEOUT_MS = 120_000;
 const READINESS_POLL_MS = 3_000;
 // ─── Scope variant definitions (for M3 acceptance tests) ─────────────────────
 /** Scope variant A — tasks + notes, personal data included. */
 export const SCOPE_VARIANT_A = {
  resources: ['tasks', 'notes'],
  filters: {
    tasks: { include_personal: true },
    notes: { include_personal: true },
  },
  excluded_resources: [] as string[],
  max_rows_per_query: 500,
 };
 /** Scope variant B — tasks only, team T1 only, no personal. */
 export const SCOPE_VARIANT_B = {
  resources: ['tasks'],
  filters: {
    tasks: { include_teams: ['T1'], include_personal: false },
  },
  excluded_resources: [] as string[],
  max_rows_per_query: 500,
 };
 /**
 * Scope variant C — tasks + credentials in resources list, but credentials
 * explicitly in excluded_resources. Sanity test: credentials must still be
 * inaccessible even though they appear in resources.
 */
 export const SCOPE_VARIANT_C = {
  resources: ['tasks', 'credentials'],
  filters: {
    tasks: { include_personal: true },
  },
  excluded_resources: ['credentials'],
  max_rows_per_query: 500,
 };
 // ─── Inline types (no import from packages/types — M3-01 branch not yet merged) ─
 interface AdminFetchOptions {
  method?: string;
  body?: unknown;
  adminToken: string;
 }
 interface PeerRecord {
  peerId: string;
  csrPem: string;
 }
 interface GrantRecord {
  id: string;
  status: string;
  scope: unknown;
 }
 interface EnrollmentTokenResult {
  token: string;
  expiresAt: string;
  enrollmentUrl: string;
 }
 interface EnrollmentRedeemResult {
  certPem: string;
  certChainPem: string;
 }
 interface BootstrapResult {
  adminUserId: string;
  adminToken: string;
 }
 export interface SeedResult {
  serverAUrl: string;
  serverBUrl: string;
  adminTokenA: string;
  adminTokenB: string;
  adminUserIdA: string;
  adminUserIdB: string;
  grants: {
    variantA: GrantRecord;
    variantB: GrantRecord;
    variantC: GrantRecord;
  };
  peers: {
    variantA: PeerRecord & { grantId: string };
    variantB: PeerRecord & { grantId: string };
    variantC: PeerRecord & { grantId: string };
  };
 }
 // ─── HTTP helpers ─────────────────────────────────────────────────────────────
 /**
 * Authenticated admin fetch. Sends `Authorization: Bearer <adminToken>` which
 * is the only path supported by AdminGuard (DB-backed sha256 token lookup).
 * No `x-admin-key` header path exists in the gateway.
 */
 async function adminFetch<T>(baseUrl: string, path: string, opts: AdminFetchOptions): Promise<T> {
  const url = `${baseUrl}${path}`;
  const res = await fetch(url, {
    method: opts.method ?? 'GET',
    headers: {
      'Content-Type': 'application/json',
      Authorization: `Bearer ${opts.adminToken}`,
    },
    body: opts.body !== undefined ? JSON.stringify(opts.body) : undefined,
  });
  if (!res.ok) {
    const text = await res.text().catch(() => '(no body)');
    throw new Error(`${opts.method ?? 'GET'} ${url} → ${res.status}: ${text}`);
  }
  return res.json() as Promise<T>;
 }
 // ─── Admin bootstrap ──────────────────────────────────────────────────────────
 /**
 * Bootstrap an admin user on a pristine gateway.
 *
 * Steps:
 *  1. GET /api/bootstrap/status — confirms needsSetup === true.
 *  2. POST /api/bootstrap/setup with { name, email, password } — returns
 *     { user, token: { plaintext } }.
 *
 * The harness assumes a fresh DB. If needsSetup is false the harness fails
 * fast with a clear error rather than proceeding with an unknown token.
 */
 async function bootstrapAdmin(
  baseUrl: string,
  label: string,
  password: string,
 ): Promise<BootstrapResult> {
  console.log(`[seed] Bootstrapping admin on ${label} (${baseUrl})...`);
  // 1. Check status
  const statusRes = await fetch(`${baseUrl}/api/bootstrap/status`);
  if (!statusRes.ok) {
    throw new Error(`[seed] GET ${baseUrl}/api/bootstrap/status → ${statusRes.status.toString()}`);
  }
  const status = (await statusRes.json()) as { needsSetup: boolean };
  if (!status.needsSetup) {
    throw new Error(
      `[seed] ${label} at ${baseUrl} already has users (needsSetup=false). ` +
        `The harness requires a pristine database. Run 'docker compose down -v' to reset.`,
    );
  }
  // 2. Bootstrap
  const setupRes = await fetch(`${baseUrl}/api/bootstrap/setup`, {
    method: 'POST',
    headers: { 'Content-Type': 'application/json' },
    body: JSON.stringify({
      name: `Harness Admin (${label})`,
      email: `harness-admin-${label.toLowerCase().replace(/\s+/g, '-')}@example.invalid`,
      password,
    }),
  });
  if (!setupRes.ok) {
    const body = await setupRes.text().catch(() => '(no body)');
    throw new Error(
      `[seed] POST ${baseUrl}/api/bootstrap/setup → ${setupRes.status.toString()}: ${body}`,
    );
  }
  const result = (await setupRes.json()) as {
    user: { id: string };
    token: { plaintext: string };
  };
  console.log(`[seed]   ${label} admin user: ${result.user.id}`);
  console.log(`[seed]   ${label} admin token: ${result.token.plaintext.slice(0, 8)}...`);
  return {
    adminUserId: result.user.id,
    adminToken: result.token.plaintext,
  };
 }
 // ─── Readiness probe ──────────────────────────────────────────────────────────
 async function waitForGateway(baseUrl: string, label: string): Promise<void> {
  const deadline = Date.now() + READINESS_TIMEOUT_MS;
  let lastError: string = '';
  while (Date.now() < deadline) {
    try {
      const res = await fetch(`${baseUrl}/api/health`, { signal: AbortSignal.timeout(5_000) });
      if (res.ok) {
        console.log(`[seed] ${label} is ready (${baseUrl})`);
        return;
      }
      lastError = `HTTP ${res.status.toString()}`;
    } catch (err) {
      lastError = err instanceof Error ? err.message : String(err);
    }
    await new Promise((r) => setTimeout(r, READINESS_POLL_MS));
  }
  throw new Error(
    `[seed] ${label} did not become ready within ${READINESS_TIMEOUT_MS.toString()}ms — last error: ${lastError}`,
  );
 }
 // ─── Enrollment flow ──────────────────────────────────────────────────────────
 /**
 * Walk the full enrollment flow for one grant.
 *
 * The correct two-sided flow (matching the data model's FK semantics):
 *
 *  1. On Server B: POST /api/admin/federation/peers/keypair
 *       → peerId_B (Server B's peer record representing the requesting side)
 *  2. On Server B: POST /api/admin/federation/grants with peerId: peerId_B
 *       → grant (FK to Server B's own federation_peers table — no violation)
 *  3. On Server B: POST /api/admin/federation/grants/:id/tokens
 *       → enrollmentUrl pointing back to Server B
 *  4. On Server A: POST /api/admin/federation/peers/keypair
 *       → peerId_A + csrPem_A (Server A's local record of Server B)
 *  5. Server A → Server B: POST enrollmentUrl with { csrPem: csrPem_A }
 *       → certPem signed by Server B's CA
 *  6. On Server A: PATCH /api/admin/federation/peers/:peerId_A/cert with certPem
 *       → Server A's peer record transitions to active
 *
 * Returns the activated grant (from Server B) and Server A's peer record.
 */
 async function enrollGrant(opts: {
  label: string;
  subjectUserId: string;
  scope: unknown;
  adminTokenA: string;
  adminTokenB: string;
  serverAUrl: string;
  serverBUrl: string;
 }): Promise<{ grant: GrantRecord; peer: PeerRecord & { grantId: string } }> {
  const { label, subjectUserId, scope, adminTokenA, adminTokenB, serverAUrl, serverBUrl } = opts;
  console.log(`\n[seed] Enrolling grant for scope variant ${label}...`);
  // 1. Create peer keypair on Server B (represents the requesting peer from B's perspective)
  const peerB = await adminFetch<PeerRecord>(serverBUrl, '/api/admin/federation/peers/keypair', {
    method: 'POST',
    adminToken: adminTokenB,
    body: {
      commonName: `harness-peer-${label.toLowerCase()}-from-b`,
      displayName: `Harness Peer ${label} (Server A as seen from B)`,
      endpointUrl: serverAUrl,
    },
  });
  console.log(`[seed]   Created peer on B: ${peerB.peerId}`);
  // 2. Create grant on Server B referencing B's own peer record
  const grant = await adminFetch<GrantRecord>(serverBUrl, '/api/admin/federation/grants', {
    method: 'POST',
    adminToken: adminTokenB,
    body: {
      peerId: peerB.peerId,
      subjectUserId,
      scope,
    },
  });
  console.log(`[seed]   Created grant on B: ${grant.id} (status: ${grant.status})`);
  // 3. Generate enrollment token on Server B
  const tokenResult = await adminFetch<EnrollmentTokenResult>(
    serverBUrl,
    `/api/admin/federation/grants/${grant.id}/tokens`,
    { method: 'POST', adminToken: adminTokenB, body: { ttlSeconds: 900 } },
  );
  console.log(`[seed]   Enrollment token: ${tokenResult.token.slice(0, 8)}...`);
  console.log(`[seed]   Enrollment URL: ${tokenResult.enrollmentUrl}`);
  // 4. Create peer keypair on Server A (Server A's local record of Server B)
  const peerA = await adminFetch<PeerRecord>(serverAUrl, '/api/admin/federation/peers/keypair', {
    method: 'POST',
    adminToken: adminTokenA,
    body: {
      commonName: `harness-peer-${label.toLowerCase()}-from-a`,
      displayName: `Harness Peer ${label} (Server B as seen from A)`,
      endpointUrl: serverBUrl,
    },
  });
  console.log(`[seed]   Created peer on A: ${peerA.peerId}`);
  // 5. Redeem token at Server B's enrollment endpoint with A's CSR.
  //    The enrollment endpoint is not admin-guarded — the one-time token IS the credential.
  //
  //    The enrollmentUrl returned by the gateway is built using BETTER_AUTH_URL which
  //    resolves to the in-cluster Docker hostname (gateway-b:3000). That URL is only
  //    reachable from other containers, not from the host machine running this script.
  //    We rewrite the host portion to use the host-accessible serverBUrl so the
  //    seed script can reach the endpoint from the host.
  const parsedEnrollment = new URL(tokenResult.enrollmentUrl);
  const tokenSegment = parsedEnrollment.pathname.split('/').pop()!;
  const redeemUrl = `${serverBUrl}/api/federation/enrollment/${tokenSegment}`;
  console.log(`[seed]   Rewritten redeem URL (host-accessible): ${redeemUrl}`);
  const redeemRes = await fetch(redeemUrl, {
    method: 'POST',
    headers: { 'Content-Type': 'application/json' },
    body: JSON.stringify({ csrPem: peerA.csrPem }),
  });
  if (!redeemRes.ok) {
    const body = await redeemRes.text().catch(() => '(no body)');
    throw new Error(`Enrollment redemption failed: ${redeemRes.status.toString()} — ${body}`);
  }
  const redeemResult = (await redeemRes.json()) as EnrollmentRedeemResult;
  console.log(`[seed]   Cert issued (${redeemResult.certPem.length.toString()} bytes)`);
  // 6. Store cert on Server A's peer record → transitions to active
  await adminFetch<unknown>(serverAUrl, `/api/admin/federation/peers/${peerA.peerId}/cert`, {
    method: 'PATCH',
    adminToken: adminTokenA,
    body: { certPem: redeemResult.certPem },
  });
  console.log(`[seed]   Cert stored on A — peer ${peerA.peerId} is now active`);
  // Verify grant flipped to active on B
  const activeGrant = await adminFetch<GrantRecord>(
    serverBUrl,
    `/api/admin/federation/grants/${grant.id}`,
    { adminToken: adminTokenB },
  );
  console.log(`[seed]   Grant status on B: ${activeGrant.status}`);
  return { grant: activeGrant, peer: { ...peerA, grantId: grant.id } };
 }
 // ─── Test data insertion ──────────────────────────────────────────────────────
 /**
 * Insert representative test data on Server B via its admin APIs.
 *
 * NOTE: The gateway's task/note/credential APIs require an authenticated user
 * session. For the harness, we seed via admin-level endpoints if available,
 * or document the gap here for M3-11 to fill in with proper user session seeding.
 *
 * ASSUMPTION: Server B exposes POST /api/admin/tasks (or similar) for test data.
 * If that endpoint does not yet exist, this function logs a warning and skips
 * without failing — M3-11 will add the session-based seeding path.
 */
 async function seedTestData(
  subjectUserId: string,
  scopeLabel: string,
  serverBUrl: string,
  adminTokenB: string,
 ): Promise<void> {
  console.log(`\n[seed] Seeding test data on Server B for ${scopeLabel}...`);
  const testTasks = [
    {
      title: `${scopeLabel} Task 1`,
      description: 'Federation harness test task',
      userId: subjectUserId,
    },
    {
      title: `${scopeLabel} Task 2`,
      description: 'Team-scoped test task',
      userId: subjectUserId,
      teamId: 'T1',
    },
  ];
  const testNotes = [
    {
      title: `${scopeLabel} Note 1`,
      content: 'Personal note for federation test',
      userId: subjectUserId,
    },
  ];
  // Attempt to insert — tolerate 404 (endpoint not yet implemented)
  for (const task of testTasks) {
    try {
      await adminFetch<unknown>(serverBUrl, '/api/admin/tasks', {
        method: 'POST',
        adminToken: adminTokenB,
        body: task,
      });
      console.log(`[seed]   Inserted task: "${task.title}"`);
    } catch (err) {
      const msg = err instanceof Error ? err.message : String(err);
      if (msg.includes('404') || msg.includes('Cannot POST')) {
        console.warn(
          `[seed]   WARN: /api/admin/tasks not found — skipping task insertion (expected until M3-11)`,
        );
        break;
      }
      throw err;
    }
  }
  for (const note of testNotes) {
    try {
      await adminFetch<unknown>(serverBUrl, '/api/admin/notes', {
        method: 'POST',
        adminToken: adminTokenB,
        body: note,
      });
      console.log(`[seed]   Inserted note: "${note.title}"`);
    } catch (err) {
      const msg = err instanceof Error ? err.message : String(err);
      if (msg.includes('404') || msg.includes('Cannot POST')) {
        console.warn(
          `[seed]   WARN: /api/admin/notes not found — skipping note insertion (expected until M3-11)`,
        );
        break;
      }
      throw err;
    }
  }
  console.log(`[seed] Test data seeding for ${scopeLabel} complete.`);
 }
 // ─── Main entrypoint ──────────────────────────────────────────────────────────
 export async function runSeed(opts?: {
  serverAUrl?: string;
  serverBUrl?: string;
  adminBootstrapPasswordA?: string;
  adminBootstrapPasswordB?: string;
  subjectUserIds?: { variantA: string; variantB: string; variantC: string };
 }): Promise<SeedResult> {
  const aUrl = opts?.serverAUrl ?? SERVER_A_URL;
  const bUrl = opts?.serverBUrl ?? SERVER_B_URL;
  const passwordA = opts?.adminBootstrapPasswordA ?? ADMIN_BOOTSTRAP_PASSWORD_A;
  const passwordB = opts?.adminBootstrapPasswordB ?? ADMIN_BOOTSTRAP_PASSWORD_B;
  // Use provided or default subject user IDs.
  // In a real run these would be real user UUIDs from Server B's DB.
  // For the harness, the admin bootstrap user on Server B is used as the subject.
  // These are overridden after bootstrap if opts.subjectUserIds is not provided.
  const subjectIds = opts?.subjectUserIds;
  console.log('[seed] Waiting for gateways to be ready...');
  await Promise.all([waitForGateway(aUrl, 'Server A'), waitForGateway(bUrl, 'Server B')]);
  // Bootstrap admin users on both gateways (requires pristine DBs).
  console.log('\n[seed] Bootstrapping admin accounts...');
  const [bootstrapA, bootstrapB] = await Promise.all([
    bootstrapAdmin(aUrl, 'Server A', passwordA),
    bootstrapAdmin(bUrl, 'Server B', passwordB),
  ]);
  // Default subject user IDs to the admin user on Server B (guaranteed to exist).
  const resolvedSubjectIds = subjectIds ?? {
    variantA: bootstrapB.adminUserId,
    variantB: bootstrapB.adminUserId,
    variantC: bootstrapB.adminUserId,
  };
  // Enroll all three scope variants sequentially to avoid race conditions on
  // the step-ca signing queue. Parallel enrollment would work too but
  // sequential is easier to debug when something goes wrong.
  console.log('\n[seed] Enrolling scope variants...');
  const resultA = await enrollGrant({
    label: 'A',
    subjectUserId: resolvedSubjectIds.variantA,
    scope: SCOPE_VARIANT_A,
    adminTokenA: bootstrapA.adminToken,
    adminTokenB: bootstrapB.adminToken,
    serverAUrl: aUrl,
    serverBUrl: bUrl,
  });
  const resultB = await enrollGrant({
    label: 'B',
    subjectUserId: resolvedSubjectIds.variantB,
    scope: SCOPE_VARIANT_B,
    adminTokenA: bootstrapA.adminToken,
    adminTokenB: bootstrapB.adminToken,
    serverAUrl: aUrl,
    serverBUrl: bUrl,
  });
  const resultC = await enrollGrant({
    label: 'C',
    subjectUserId: resolvedSubjectIds.variantC,
    scope: SCOPE_VARIANT_C,
    adminTokenA: bootstrapA.adminToken,
    adminTokenB: bootstrapB.adminToken,
    serverAUrl: aUrl,
    serverBUrl: bUrl,
  });
  // Seed test data on Server B for each scope variant
  await Promise.all([
    seedTestData(resolvedSubjectIds.variantA, 'A', bUrl, bootstrapB.adminToken),
    seedTestData(resolvedSubjectIds.variantB, 'B', bUrl, bootstrapB.adminToken),
    seedTestData(resolvedSubjectIds.variantC, 'C', bUrl, bootstrapB.adminToken),
  ]);
  const result: SeedResult = {
    serverAUrl: aUrl,
    serverBUrl: bUrl,
    adminTokenA: bootstrapA.adminToken,
    adminTokenB: bootstrapB.adminToken,
    adminUserIdA: bootstrapA.adminUserId,
    adminUserIdB: bootstrapB.adminUserId,
    grants: {
      variantA: resultA.grant,
      variantB: resultB.grant,
      variantC: resultC.grant,
    },
    peers: {
      variantA: resultA.peer,
      variantB: resultB.peer,
      variantC: resultC.peer,
    },
  };
  console.log('\n[seed] Seed complete.');
  console.log('[seed] Summary:');
  console.log(`  Variant A grant: ${result.grants.variantA.id} (${result.grants.variantA.status})`);
  console.log(`  Variant B grant: ${result.grants.variantB.id} (${result.grants.variantB.status})`);
  console.log(`  Variant C grant: ${result.grants.variantC.id} (${result.grants.variantC.status})`);
  return result;
 }
 // ─── CLI entry ────────────────────────────────────────────────────────────────
 const isCli =
  process.argv[1] != null &&
  fileURLToPath(import.meta.url).endsWith(process.argv[1]!.split('/').pop()!);
 if (isCli) {
  const shouldBoot = process.argv.includes('--boot');
  if (shouldBoot) {
    console.log('[seed] --boot flag detected — starting compose stack...');
    execSync(`docker compose -f "${COMPOSE_FILE}" up -d`, { stdio: 'inherit' });
  }
  runSeed()
    .then(() => {
      process.exit(0);
    })
    .catch((err) => {
      console.error('[seed] Fatal:', err);
      process.exit(1);
    });
 }
Author	SHA1	Message	Date
Jarvis	4cf9362e75	fix(federation): harness round-2 — email validation + host-side URL rewrite All checks were successful ci/woodpecker/pr/ci Pipeline was successful Details ci/woodpecker/push/ci Pipeline was successful Details - Bug-1: replace whitespace in admin email local-part (was breaking @IsEmail) - Bug-2: rewrite enrollment URL to use host-accessible base in seed.ts (in-cluster URL not resolvable from host) - Bug-3: correct README Known Limitations section - eslint.config.mjs: add tools/federation-harness/*.ts to allowDefaultProject so pre-commit hook can lint harness scripts Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-23 21:54:46 -05:00
Jarvis	cb118a53d9	fix(federation): harness CRIT bugs — admin bootstrap auth + peer FK + boot deadline (review remediation) CRIT-1: Replace nonexistent x-admin-key header with Authorization: Bearer <token>; add bootstrapAdmin() to call POST /api/bootstrap/setup on each pristine gateway before any admin-guarded endpoint is used. CRIT-2: Fix cross-gateway peer FK violation — peer keypair is now created on Server B first (so the grant FK resolves against B's own federation_peers table), then Server A creates its own keypair and redeems the enrollment token at B. HIGH-3: waitForStack() now polls both gateways in parallel via Promise.all, each with an independent deadline, so a slow gateway-a cannot starve gateway-b's budget. MED-4: seed() throws immediately with a clear error if scenario !== 'all'; per-variant narrowing deferred to M3-11 with explicit JSDoc note. Also: remove ADMIN_API_KEY (no such path in AdminGuard) from compose, replace with ADMIN_BOOTSTRAP_PASSWORD; add BETTER_AUTH_URL production-code limitation as a TODO in the README. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-23 21:54:46 -05:00
Jarvis	b445033c69	feat(federation): two-gateway test harness scaffold (FED-M3-02) Adds tools/federation-harness/ — the permanent test bed for M3+ federation E2E tests. Boots two gateways (Server A + Server B) on a shared Docker bridge network with per-gateway Postgres/pgvector + Valkey and a shared Step-CA. - docker-compose.two-gateways.yml: gateway-a/b, postgres-a/b, valkey-a/b, step-ca; image digest-pinned to sha256:1069117740e... (sha-9f1a081, #491) - seed.ts: provisions scope variants A/B/C via real admin REST API; walks full enrollment flow (peer keypair → grant → token → redeem → cert store) - harness.ts: bootHarness/tearDownHarness/serverA/serverB/seed helpers for vitest; idempotent boot (reuses running stack when both gateways healthy) - README.md: prereqs, topology, seed usage, vitest integration, port override, troubleshooting, image digest note No production code modified. Quality gates: typecheck ✓ lint ✓ format ✓ Closes #462 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-23 21:54:46 -05:00
jason.woltje	ee3f2defd9	feat(types): federation v1 DTOs (FED-M3-01) (#506 ) Some checks failed ci/woodpecker/push/ci Pipeline was successful Details ci/woodpecker/push/publish Pipeline failed Details	2026-04-24 02:54:40 +00:00
jason.woltje	7342c1290d	fix(federation): use real PEM certs in enrollment + ca service tests (#507 ) Some checks failed ci/woodpecker/push/ci Pipeline was successful Details ci/woodpecker/push/publish Pipeline failed Details	2026-04-24 02:43:42 +00:00
jason.woltje	e64ddd2c1c	docs(federation): M3 mission planning — 14-task decomposition (#504 ) Some checks failed ci/woodpecker/push/ci Pipeline failed Details ci/woodpecker/push/publish Pipeline was successful Details	2026-04-24 01:13:40 +00:00
jason.woltje	4ece6dc643	chore(federation): M2 milestone close (FED-M2-13) (#503 ) Some checks failed ci/woodpecker/push/ci Pipeline failed Details ci/woodpecker/tag/publish Pipeline was successful Details ci/woodpecker/push/publish Pipeline was successful Details	2026-04-22 06:09:54 +00:00
jason.woltje	194c3b603e	docs(federation): M2 Step-CA setup guide + admin CLI reference (FED-M2-12) (#502 ) Some checks failed ci/woodpecker/push/publish Pipeline failed Details ci/woodpecker/push/ci Pipeline failed Details	2026-04-22 06:06:45 +00:00