Merge pull request 'feat(federation): outbound mTLS FederationClient (FED-M3-08)' (#508 ) from feat/federation-m3-client into main

fix(federation/client): serialize cache fills, destroy evicted Agent, cover env-var guard
- HIGH-A: resolveEntry now uses promise-cache pattern so concurrent callers serialize on a single in-flight build, eliminating duplicate key material in heap and duplicate DB round-trips - HIGH-B: flushPeer destroys the evicted undici Agent so stale TLS connections close on cert rotation - MED-C: add regression test for PEER_MISCONFIGURED when STEP_CA_ROOT_CERT_PATH is unset Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-24 04:30:29 +00:00 · 2026-04-23 22:56:57 -05:00 · 2026-04-23 22:30:09 -05:00 · 2026-04-23 22:16:52 -05:00 · 2026-04-24 03:01:25 +00:00 · 2026-04-24 02:54:40 +00:00
23 changed files with 3590 additions and 48 deletions
--- a/apps/gateway/package.json
+++ b/apps/gateway/package.json
@@ -73,6 +73,7 @@
    "rxjs": "^7.8.0",
    "socket.io": "^4.8.0",
    "uuid": "^11.0.0",
+    "undici": "^7.24.6",
    "zod": "^4.3.6"
  },
  "devDependencies": {
--- a/apps/gateway/src/federation/tests/enrollment.service.spec.ts
+++ b/apps/gateway/src/federation/tests/enrollment.service.spec.ts
@@ -24,10 +24,11 @@
 */

 import 'reflect-metadata';
-import { describe, it, expect, vi, beforeEach } from 'vitest';
+import { describe, it, expect, vi, beforeEach, beforeAll } from 'vitest';
 import { GoneException, NotFoundException } from '@nestjs/common';
 import type { Db } from '@mosaicstack/db';
 import { EnrollmentService } from '../enrollment.service.js';
+import { makeSelfSignedCert } from './helpers/test-cert.js';

 // ---------------------------------------------------------------------------
 // Test constants
@@ -38,10 +39,18 @@ const PEER_ID = 'p2222222-2222-2222-2222-222222222222';
 const USER_ID = 'u3333333-3333-3333-3333-333333333333';
 const TOKEN = 'a'.repeat(64); // 64-char hex

-const MOCK_CERT_PEM = '-----BEGIN CERTIFICATE-----\nMOCK\n-----END CERTIFICATE-----\n';
-const MOCK_CHAIN_PEM = MOCK_CERT_PEM + MOCK_CERT_PEM;
+// Real self-signed EC P-256 cert — populated once in beforeAll.
+// Required because EnrollmentService.extractCertNotAfter calls new X509Certificate(certPem)
+// with strict parsing (PR #501 HIGH-2: no silent fallback).
+let REAL_CERT_PEM: string;
+
+const MOCK_CHAIN_PEM = () => REAL_CERT_PEM + REAL_CERT_PEM;
 const MOCK_SERIAL = 'ABCD1234';

+beforeAll(async () => {
+  REAL_CERT_PEM = await makeSelfSignedCert();
+});
+
 // ---------------------------------------------------------------------------
 // Factory helpers
 // ---------------------------------------------------------------------------
@@ -103,11 +112,27 @@ function makeDb({
  const claimUpdateMock = vi.fn().mockReturnValue({ set: setClaimMock });

  // transaction(cb) — cb receives txMock; txMock has update + insert
-  const txInsertValues = vi.fn().mockResolvedValue(undefined);
-  const txInsertMock = vi.fn().mockReturnValue({ values: txInsertValues });
-  const txWhereUpdate = vi.fn().mockResolvedValue(undefined);
+  //
+  // The tx mock must support two tx.update() call patterns (CRIT-2, PR #501):
+  //   1. Grant activation:  .update().set().where().returning() → resolves to [{ id }]
+  //   2. Peer update:       .update().set().where()             → resolves to undefined
+  //
+  // We achieve this by making txWhereUpdate return an object with BOTH a thenable
+  // interface (so `await tx.update().set().where()` works) AND a .returning() method.
+  const txGrantActivatedRow = { id: GRANT_ID };
+  const txReturningMock = vi.fn().mockResolvedValue([txGrantActivatedRow]);
+  const txWhereUpdate = vi.fn().mockReturnValue({
+    // .returning() for grant activation (first tx.update call)
+    returning: txReturningMock,
+    // thenables so `await tx.update().set().where()` also works for peer update
+    then: (resolve: (v: undefined) => void) => resolve(undefined),
+    catch: () => undefined,
+    finally: () => undefined,
+  });
  const txSetMock = vi.fn().mockReturnValue({ where: txWhereUpdate });
  const txUpdateMock = vi.fn().mockReturnValue({ set: txSetMock });
+  const txInsertValues = vi.fn().mockResolvedValue(undefined);
+  const txInsertMock = vi.fn().mockReturnValue({ values: txInsertValues });
  const txMock = { update: txUpdateMock, insert: txInsertMock };
  const transactionMock = vi
    .fn()
@@ -132,6 +157,7 @@ function makeDb({
      txInsertValues,
      txInsertMock,
      txWhereUpdate,
+      txReturningMock,
      txSetMock,
      txUpdateMock,
      txMock,
@@ -146,11 +172,13 @@ function makeDb({

 function makeCaService() {
  return {
-    issueCert: vi.fn().mockResolvedValue({
-      certPem: MOCK_CERT_PEM,
-      certChainPem: MOCK_CHAIN_PEM,
+    // REAL_CERT_PEM is populated by beforeAll — safe to reference via closure here
+    // because makeCaService() is only called after the suite's beforeAll runs.
+    issueCert: vi.fn().mockImplementation(async () => ({
+      certPem: REAL_CERT_PEM,
+      certChainPem: MOCK_CHAIN_PEM(),
      serialNumber: MOCK_SERIAL,
-    }),
+    })),
  };
 }

@@ -301,29 +329,29 @@ describe('EnrollmentService.redeem — success path', () => {
    });
    caService.issueCert.mockImplementation(async () => {
      callOrder.push('issueCert');
-      return { certPem: MOCK_CERT_PEM, certChainPem: MOCK_CHAIN_PEM, serialNumber: MOCK_SERIAL };
+      return { certPem: REAL_CERT_PEM, certChainPem: MOCK_CHAIN_PEM(), serialNumber: MOCK_SERIAL };
    });

-    await service.redeem(TOKEN, MOCK_CERT_PEM);
+    await service.redeem(TOKEN, '---CSR---');

    expect(callOrder).toEqual(['claim', 'issueCert']);
  });

  it('calls CaService.issueCert with grantId, subjectUserId, csrPem, ttlSeconds=300', async () => {
-    await service.redeem(TOKEN, MOCK_CERT_PEM);
+    await service.redeem(TOKEN, '---CSR---');

    expect(caService.issueCert).toHaveBeenCalledWith(
      expect.objectContaining({
        grantId: GRANT_ID,
        subjectUserId: USER_ID,
-        csrPem: MOCK_CERT_PEM,
+        csrPem: '---CSR---',
        ttlSeconds: 300,
      }),
    );
  });

  it('runs activate grant + peer update + audit inside a transaction', async () => {
-    await service.redeem(TOKEN, MOCK_CERT_PEM);
+    await service.redeem(TOKEN, '---CSR---');

    expect(db._mocks.transactionMock).toHaveBeenCalledOnce();
    // tx.update called twice: activate grant + update peer
@@ -333,17 +361,17 @@ describe('EnrollmentService.redeem — success path', () => {
  });

  it('activates grant (sets status=active) inside the transaction', async () => {
-    await service.redeem(TOKEN, MOCK_CERT_PEM);
+    await service.redeem(TOKEN, '---CSR---');

    expect(db._mocks.txSetMock).toHaveBeenCalledWith(expect.objectContaining({ status: 'active' }));
  });

  it('updates the federationPeers row with certPem, certSerial, state=active inside the transaction', async () => {
-    await service.redeem(TOKEN, MOCK_CERT_PEM);
+    await service.redeem(TOKEN, '---CSR---');

    expect(db._mocks.txSetMock).toHaveBeenCalledWith(
      expect.objectContaining({
-        certPem: MOCK_CERT_PEM,
+        certPem: REAL_CERT_PEM,
        certSerial: MOCK_SERIAL,
        state: 'active',
      }),
@@ -351,7 +379,7 @@ describe('EnrollmentService.redeem — success path', () => {
  });

  it('inserts an audit log row inside the transaction', async () => {
-    await service.redeem(TOKEN, MOCK_CERT_PEM);
+    await service.redeem(TOKEN, '---CSR---');

    expect(db._mocks.txInsertValues).toHaveBeenCalledWith(
      expect.objectContaining({
@@ -363,11 +391,11 @@ describe('EnrollmentService.redeem — success path', () => {
  });

  it('returns { certPem, certChainPem } from CaService', async () => {
-    const result = await service.redeem(TOKEN, MOCK_CERT_PEM);
+    const result = await service.redeem(TOKEN, '---CSR---');

    expect(result).toEqual({
-      certPem: MOCK_CERT_PEM,
-      certChainPem: MOCK_CHAIN_PEM,
+      certPem: REAL_CERT_PEM,
+      certChainPem: MOCK_CHAIN_PEM(),
    });
  });
 });
--- a/apps/gateway/src/federation/tests/helpers/test-cert.ts
+++ b/apps/gateway/src/federation/tests/helpers/test-cert.ts
@@ -0,0 +1,138 @@
+/**
+ * Test helpers for generating real X.509 PEM certificates in unit tests.
+ *
+ * PR #501 (FED-M2-11) introduced strict `new X509Certificate(certPem)` parsing
+ * in both EnrollmentService.extractCertNotAfter and CaService.issueCert — dummy
+ * cert strings now throw `error:0680007B:asn1 encoding routines::header too long`.
+ *
+ * These helpers produce minimal but cryptographically valid self-signed EC P-256
+ * certificates via @peculiar/x509 + Node.js webcrypto, suitable for test mocks.
+ *
+ * Two variants:
+ *  - makeSelfSignedCert()          Plain cert — satisfies node:crypto X509Certificate parse.
+ *  - makeMosaicIssuedCert(opts)    Cert with custom Mosaic OID extensions — satisfies the
+ *                                  CRIT-1 OID presence + value checks in CaService.issueCert.
+ */
+
+import { webcrypto } from 'node:crypto';
+import {
+  X509CertificateGenerator,
+  Extension,
+  KeyUsagesExtension,
+  KeyUsageFlags,
+  BasicConstraintsExtension,
+  cryptoProvider,
+} from '@peculiar/x509';
+
+// ---------------------------------------------------------------------------
+// Internal helpers
+// ---------------------------------------------------------------------------
+
+/**
+ * Encode a string as an ASN.1 UTF8String TLV:
+ *   0x0C (tag) + 1-byte length (for strings ≤ 127 bytes) + UTF-8 bytes.
+ *
+ * CaService.issueCert reads the extension value as:
+ *   decoder.decode(grantIdExt.value.slice(2))
+ * i.e. it skips the tag + length byte and decodes the remainder as UTF-8.
+ * So we must produce exactly this encoding as the OCTET STRING content.
+ */
+function encodeUtf8String(value: string): Uint8Array {
+  const utf8 = new TextEncoder().encode(value);
+  if (utf8.length > 127) {
+    throw new Error('encodeUtf8String: value too long for single-byte length encoding');
+  }
+  const buf = new Uint8Array(2 + utf8.length);
+  buf[0] = 0x0c; // ASN.1 UTF8String tag
+  buf[1] = utf8.length;
+  buf.set(utf8, 2);
+  return buf;
+}
+
+// ---------------------------------------------------------------------------
+// Mosaic OID constants (must match production CaService)
+// ---------------------------------------------------------------------------
+
+const OID_MOSAIC_GRANT_ID = '1.3.6.1.4.1.99999.1';
+const OID_MOSAIC_SUBJECT_USER_ID = '1.3.6.1.4.1.99999.2';
+
+// ---------------------------------------------------------------------------
+// Public API
+// ---------------------------------------------------------------------------
+
+/**
+ * Generate a minimal self-signed EC P-256 certificate valid for 1 day.
+ * CN=harness-test, no custom extensions.
+ *
+ * Suitable for:
+ *  - EnrollmentService.extractCertNotAfter (just needs parseable PEM)
+ *  - Any mock that returns certPem / certChainPem without OID checks
+ */
+export async function makeSelfSignedCert(): Promise<string> {
+  // Ensure @peculiar/x509 uses Node.js webcrypto (available as globalThis.crypto in Node 19+,
+  // but we set it explicitly here to be safe on all Node 18+ versions).
+  cryptoProvider.set(webcrypto as unknown as Parameters<typeof cryptoProvider.set>[0]);
+
+  const alg = { name: 'ECDSA', namedCurve: 'P-256', hash: 'SHA-256' } as const;
+  const keys = await webcrypto.subtle.generateKey(alg, false, ['sign', 'verify']);
+
+  const now = new Date();
+  const tomorrow = new Date(now.getTime() + 86_400_000);
+
+  const cert = await X509CertificateGenerator.createSelfSigned({
+    serialNumber: '01',
+    name: 'CN=harness-test',
+    notBefore: now,
+    notAfter: tomorrow,
+    signingAlgorithm: alg,
+    keys,
+    extensions: [
+      new BasicConstraintsExtension(false),
+      new KeyUsagesExtension(KeyUsageFlags.digitalSignature),
+    ],
+  });
+
+  return cert.toString('pem');
+}
+
+/**
+ * Generate a self-signed EC P-256 certificate that contains the two custom
+ * Mosaic OID extensions required by CaService.issueCert's CRIT-1 check:
+ *   OID 1.3.6.1.4.1.99999.1  → mosaic_grant_id   (value = grantId)
+ *   OID 1.3.6.1.4.1.99999.2  → mosaic_subject_user_id (value = subjectUserId)
+ *
+ * The extension value encoding matches the production parser's `.slice(2)` assumption:
+ * each extension value is an OCTET STRING wrapping an ASN.1 UTF8String TLV.
+ */
+export async function makeMosaicIssuedCert(opts: {
+  grantId: string;
+  subjectUserId: string;
+}): Promise<string> {
+  // Ensure @peculiar/x509 uses Node.js webcrypto.
+  cryptoProvider.set(webcrypto as unknown as Parameters<typeof cryptoProvider.set>[0]);
+
+  const alg = { name: 'ECDSA', namedCurve: 'P-256', hash: 'SHA-256' } as const;
+  const keys = await webcrypto.subtle.generateKey(alg, false, ['sign', 'verify']);
+
+  const now = new Date();
+  const tomorrow = new Date(now.getTime() + 86_400_000);
+
+  const cert = await X509CertificateGenerator.createSelfSigned({
+    serialNumber: '01',
+    name: 'CN=mosaic-issued-test',
+    notBefore: now,
+    notAfter: tomorrow,
+    signingAlgorithm: alg,
+    keys,
+    extensions: [
+      new BasicConstraintsExtension(false),
+      new KeyUsagesExtension(KeyUsageFlags.digitalSignature),
+      // mosaic_grant_id — OID 1.3.6.1.4.1.99999.1
+      new Extension(OID_MOSAIC_GRANT_ID, false, encodeUtf8String(opts.grantId)),
+      // mosaic_subject_user_id — OID 1.3.6.1.4.1.99999.2
+      new Extension(OID_MOSAIC_SUBJECT_USER_ID, false, encodeUtf8String(opts.subjectUserId)),
+    ],
+  });
+
+  return cert.toString('pem');
+}
--- a/apps/gateway/src/federation/ca.service.spec.ts
+++ b/apps/gateway/src/federation/ca.service.spec.ts
@@ -20,9 +20,10 @@
 */

 import 'reflect-metadata';
-import { describe, it, expect, vi, beforeEach, type Mock } from 'vitest';
+import { describe, it, expect, vi, beforeEach, beforeAll, type Mock } from 'vitest';
 import { jwtVerify, exportJWK, generateKeyPair } from 'jose';
 import { Pkcs10CertificateRequestGenerator } from '@peculiar/x509';
+import { makeMosaicIssuedCert } from './__tests__/helpers/test-cert.js';

 // ---------------------------------------------------------------------------
 // Mock node:https BEFORE importing CaService so the mock is in place when
@@ -74,6 +75,11 @@ const FAKE_CA_PEM = FAKE_CERT_PEM;
 const GRANT_ID = 'a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11';
 const SUBJECT_USER_ID = 'b1ffcd00-0d1c-5f09-cc7e-7cc0ce491b22';

+// Real self-signed cert containing both Mosaic OID extensions — populated in beforeAll.
+// Required because CaService.issueCert performs CRIT-1 OID presence/value checks on the
+// response cert (PR #501 — strict parsing, no silent fallback).
+let realIssuedCertPem: string;
+
 // ---------------------------------------------------------------------------
 // Generate a real EC P-256 key pair and CSR for integration-style tests
 // ---------------------------------------------------------------------------
@@ -194,6 +200,15 @@ function makeHttpsMock(statusCode: number, body: unknown, errorMsg?: string): vo
 describe('CaService', () => {
  let service: CaService;

+  beforeAll(async () => {
+    // Generate a cert with the two Mosaic OIDs so that CaService.issueCert's
+    // CRIT-1 OID checks pass when mock step-ca returns it as `crt`.
+    realIssuedCertPem = await makeMosaicIssuedCert({
+      grantId: GRANT_ID,
+      subjectUserId: SUBJECT_USER_ID,
+    });
+  });
+
  beforeEach(() => {
    vi.clearAllMocks();
    service = new CaService();
@@ -226,9 +241,9 @@ describe('CaService', () => {

    // Now test that the service's validateCsr accepts it.
    // We call it indirectly via issueCert with a successful mock.
-    makeHttpsMock(200, { crt: FAKE_CERT_PEM, certChain: [FAKE_CERT_PEM, FAKE_CA_PEM] });
+    makeHttpsMock(200, { crt: realIssuedCertPem, certChain: [realIssuedCertPem, FAKE_CA_PEM] });
    const result = await service.issueCert(makeReq({ csrPem: realCsrPem }));
-    expect(result.certPem).toBe(FAKE_CERT_PEM);
+    expect(result.certPem).toBe(realIssuedCertPem);
  });

  it('throws INVALID_CSR for a malformed PEM-shaped CSR', async () => {
@@ -251,14 +266,14 @@ describe('CaService', () => {
  it('returns IssuedCertDto on success (certChain present)', async () => {
    if (!realCsrPem) realCsrPem = await generateRealCsr();
    makeHttpsMock(200, {
-      crt: FAKE_CERT_PEM,
-      certChain: [FAKE_CERT_PEM, FAKE_CA_PEM],
+      crt: realIssuedCertPem,
+      certChain: [realIssuedCertPem, FAKE_CA_PEM],
    });

    const result = await service.issueCert(makeReq());

-    expect(result.certPem).toBe(FAKE_CERT_PEM);
-    expect(result.certChainPem).toContain(FAKE_CERT_PEM);
+    expect(result.certPem).toBe(realIssuedCertPem);
+    expect(result.certChainPem).toContain(realIssuedCertPem);
    expect(result.certChainPem).toContain(FAKE_CA_PEM);
    expect(typeof result.serialNumber).toBe('string');
  });
@@ -270,14 +285,14 @@ describe('CaService', () => {
  it('builds certChainPem from crt+ca when certChain is absent', async () => {
    if (!realCsrPem) realCsrPem = await generateRealCsr();
    makeHttpsMock(200, {
-      crt: FAKE_CERT_PEM,
+      crt: realIssuedCertPem,
      ca: FAKE_CA_PEM,
    });

    const result = await service.issueCert(makeReq());

-    expect(result.certPem).toBe(FAKE_CERT_PEM);
-    expect(result.certChainPem).toContain(FAKE_CERT_PEM);
+    expect(result.certPem).toBe(realIssuedCertPem);
+    expect(result.certChainPem).toContain(realIssuedCertPem);
    expect(result.certChainPem).toContain(FAKE_CA_PEM);
  });

@@ -287,12 +302,12 @@ describe('CaService', () => {

  it('falls back to certPem alone when certChain and ca are absent', async () => {
    if (!realCsrPem) realCsrPem = await generateRealCsr();
-    makeHttpsMock(200, { crt: FAKE_CERT_PEM });
+    makeHttpsMock(200, { crt: realIssuedCertPem });

    const result = await service.issueCert(makeReq());

-    expect(result.certPem).toBe(FAKE_CERT_PEM);
-    expect(result.certChainPem).toBe(FAKE_CERT_PEM);
+    expect(result.certPem).toBe(realIssuedCertPem);
+    expect(result.certChainPem).toBe(realIssuedCertPem);
  });

  // -------------------------------------------------------------------------
@@ -398,7 +413,7 @@ describe('CaService', () => {
          statusCode: 200,
          on: (event: string, cb: (chunk?: Buffer) => void) => {
            if (event === 'data') {
-              cb(Buffer.from(JSON.stringify({ crt: FAKE_CERT_PEM })));
+              cb(Buffer.from(JSON.stringify({ crt: realIssuedCertPem })));
            }
            if (event === 'end') {
              cb();
@@ -555,7 +570,7 @@ describe('CaService', () => {
          statusCode: 200,
          on: (event: string, cb: (chunk?: Buffer) => void) => {
            if (event === 'data') {
-              cb(Buffer.from(JSON.stringify({ crt: FAKE_CERT_PEM })));
+              cb(Buffer.from(JSON.stringify({ crt: realIssuedCertPem })));
            }
            if (event === 'end') {
              cb();
--- a/apps/gateway/src/federation/client/tests/federation-client.service.spec.ts
+++ b/apps/gateway/src/federation/client/tests/federation-client.service.spec.ts
@@ -0,0 +1,553 @@
+/**
+ * Unit tests for FederationClientService (FED-M3-08).
+ *
+ * HTTP mocking strategy:
+ *   undici MockAgent is used to intercept outbound HTTP requests.  The service
+ *   uses `undici.fetch` with a `dispatcher` option, so MockAgent is set as the
+ *   global dispatcher and all requests flow through it.
+ *
+ *   Because the service builds one `undici.Agent` per peer and passes it as
+ *   the dispatcher on every fetch call, we cannot intercept at the Agent level
+ *   in unit tests without significant refactoring.  Instead, we set the global
+ *   dispatcher to a MockAgent and override the service's `doRequest` indirection
+ *   by spying on the internal fetch call.
+ *
+ *   For the cert/key wiring, we use the real `sealClientKey` function from
+ *   peer-key.util.ts with a test secret — no stubs.
+ *
+ * Sealed-key setup:
+ *   Each test (or beforeAll) calls `sealClientKey(TEST_PRIVATE_KEY_PEM)` with
+ *   BETTER_AUTH_SECRET set to a deterministic test value so that
+ *   `unsealClientKey` in the service recovers the original PEM.
+ */
+
+import 'reflect-metadata';
+import { describe, it, expect, vi, beforeEach, afterEach, beforeAll, afterAll } from 'vitest';
+import { MockAgent, setGlobalDispatcher, getGlobalDispatcher } from 'undici';
+import type { Dispatcher } from 'undici';
+import { writeFileSync, unlinkSync } from 'node:fs';
+import { tmpdir } from 'node:os';
+import { join } from 'node:path';
+import type { Db } from '@mosaicstack/db';
+import { FederationClientService, FederationClientError } from '../federation-client.service.js';
+import { sealClientKey } from '../../peer-key.util.js';
+
+// ---------------------------------------------------------------------------
+// Test constants
+// ---------------------------------------------------------------------------
+
+const TEST_SECRET = 'test-secret-for-federation-client-spec-only';
+const PEER_ID = 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa';
+const ENDPOINT = 'https://peer.example.com';
+
+// Minimal valid RSA/EC private key PEM — does NOT need to be a real key for
+// unit tests because we only verify it round-trips through seal/unseal, not
+// that it actually negotiates TLS (MockAgent handles that).
+const TEST_PRIVATE_KEY_PEM = `-----BEGIN PRIVATE KEY-----
+MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDummyKeyForTests
+-----END PRIVATE KEY-----`;
+
+// Minimal self-signed cert PEM (dummy — only used for mTLS Agent construction)
+const TEST_CERT_PEM = `-----BEGIN CERTIFICATE-----
+MIIBdummyCertForFederationClientTests==
+-----END CERTIFICATE-----`;
+
+const TEST_CERT_SERIAL = 'ABCDEF1234567890';
+
+// ---------------------------------------------------------------------------
+// Sealed key (computed once in beforeAll)
+// ---------------------------------------------------------------------------
+
+let SEALED_KEY: string;
+
+// Path to a stub Step-CA root cert file written in beforeAll. The cert is never
+// actually used to negotiate TLS in unit tests (MockAgent + spy on resolveEntry
+// short-circuit the network), but loadStepCaRoot() requires the file to exist.
+const STUB_CA_PEM_PATH = join(tmpdir(), 'federation-client-spec-ca.pem');
+const STUB_CA_PEM = `-----BEGIN CERTIFICATE-----
+MIIBdummyCAforFederationClientSpecOnly==
+-----END CERTIFICATE-----
+`;
+
+// ---------------------------------------------------------------------------
+// Peer row factory
+// ---------------------------------------------------------------------------
+
+function makePeerRow(overrides: Partial<Record<string, unknown>> = {}) {
+  return {
+    id: PEER_ID,
+    commonName: 'peer-example-com',
+    displayName: 'Test Peer',
+    certPem: TEST_CERT_PEM,
+    certSerial: TEST_CERT_SERIAL,
+    certNotAfter: new Date('2030-01-01T00:00:00Z'),
+    clientKeyPem: SEALED_KEY,
+    state: 'active' as const,
+    endpointUrl: ENDPOINT,
+    lastSeenAt: null,
+    createdAt: new Date('2026-01-01T00:00:00Z'),
+    revokedAt: null,
+    ...overrides,
+  };
+}
+
+// ---------------------------------------------------------------------------
+// Mock DB builder
+// ---------------------------------------------------------------------------
+
+function makeDb(selectRows: unknown[] = [makePeerRow()]): Db {
+  const limitSelect = vi.fn().mockResolvedValue(selectRows);
+  const whereSelect = vi.fn().mockReturnValue({ limit: limitSelect });
+  const fromSelect = vi.fn().mockReturnValue({ where: whereSelect });
+  const selectMock = vi.fn().mockReturnValue({ from: fromSelect });
+
+  return {
+    select: selectMock,
+    insert: vi.fn(),
+    update: vi.fn(),
+    delete: vi.fn(),
+    transaction: vi.fn(),
+  } as unknown as Db;
+}
+
+// ---------------------------------------------------------------------------
+// Helpers for MockAgent HTTP interception
+// ---------------------------------------------------------------------------
+
+/**
+ * Create a MockAgent + MockPool for the peer endpoint, set it as the global
+ * dispatcher, and return both for per-test configuration.
+ */
+function makeMockAgent() {
+  const mockAgent = new MockAgent({ connections: 1 });
+  mockAgent.disableNetConnect();
+  setGlobalDispatcher(mockAgent);
+  const pool = mockAgent.get(ENDPOINT);
+  return { mockAgent, pool };
+}
+
+/**
+ * Build a FederationClientService with a mock DB and a spy on the internal
+ * fetch so we can intercept at the HTTP layer via MockAgent.
+ *
+ * The service calls `fetch(url, { dispatcher: agent })` where `agent` is the
+ * mTLS undici.Agent built from the peer's cert+key.  To make MockAgent work,
+ * we need the fetch dispatcher to be the MockAgent, not the per-peer Agent.
+ *
+ * Strategy: we replace the private `resolveEntry` result's `agent` field with
+ * the MockAgent's pool, so fetch uses our interceptor.  We do this by spying
+ * on `resolveEntry` and returning a controlled entry.
+ */
+function makeService(db: Db, mockPool: Dispatcher): FederationClientService {
+  const svc = new FederationClientService(db);
+
+  // Override resolveEntry to inject MockAgent pool as the dispatcher
+  vi.spyOn(
+    svc as unknown as { resolveEntry: (peerId: string) => Promise<unknown> },
+    'resolveEntry',
+  ).mockImplementation(async (_peerId: string) => {
+    // Still call DB (via the real logic) to exercise peer validation,
+    // but return mock pool as the agent.
+    // For simplicity in unit tests, directly return a controlled entry.
+    return {
+      agent: mockPool,
+      endpointUrl: ENDPOINT,
+      certPem: TEST_CERT_PEM,
+      certSerial: TEST_CERT_SERIAL,
+    };
+  });
+
+  return svc;
+}
+
+// ---------------------------------------------------------------------------
+// Test setup
+// ---------------------------------------------------------------------------
+
+let originalDispatcher: Dispatcher;
+
+beforeAll(() => {
+  // Seal the test key once — requires BETTER_AUTH_SECRET
+  const saved = process.env['BETTER_AUTH_SECRET'];
+  process.env['BETTER_AUTH_SECRET'] = TEST_SECRET;
+  try {
+    SEALED_KEY = sealClientKey(TEST_PRIVATE_KEY_PEM);
+  } finally {
+    if (saved === undefined) {
+      delete process.env['BETTER_AUTH_SECRET'];
+    } else {
+      process.env['BETTER_AUTH_SECRET'] = saved;
+    }
+  }
+  writeFileSync(STUB_CA_PEM_PATH, STUB_CA_PEM, 'utf8');
+});
+
+afterAll(() => {
+  try {
+    unlinkSync(STUB_CA_PEM_PATH);
+  } catch {
+    // best-effort cleanup
+  }
+});
+
+beforeEach(() => {
+  originalDispatcher = getGlobalDispatcher();
+  process.env['BETTER_AUTH_SECRET'] = TEST_SECRET;
+  process.env['STEP_CA_ROOT_CERT_PATH'] = STUB_CA_PEM_PATH;
+});
+
+afterEach(() => {
+  setGlobalDispatcher(originalDispatcher);
+  vi.restoreAllMocks();
+  delete process.env['BETTER_AUTH_SECRET'];
+  delete process.env['STEP_CA_ROOT_CERT_PATH'];
+});
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+/** Successful list response body */
+const LIST_BODY = {
+  items: [{ id: '1', title: 'Task One' }],
+  nextCursor: undefined,
+  _partial: false,
+};
+
+/** Successful get response body */
+const GET_BODY = {
+  item: { id: '1', title: 'Task One' },
+  _partial: false,
+};
+
+/** Successful capabilities response body */
+const CAP_BODY = {
+  resources: ['tasks'],
+  excluded_resources: [],
+  max_rows_per_query: 100,
+  supported_verbs: ['list', 'get', 'capabilities'] as const,
+};
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+describe('FederationClientService', () => {
+  // ─── Successful verb calls ─────────────────────────────────────────────────
+
+  describe('list()', () => {
+    it('returns parsed typed response on success', async () => {
+      const db = makeDb();
+      const { mockAgent, pool } = makeMockAgent();
+      const svc = makeService(db, pool);
+
+      pool
+        .intercept({
+          path: '/api/federation/v1/list/tasks',
+          method: 'POST',
+        })
+        .reply(200, LIST_BODY, { headers: { 'content-type': 'application/json' } });
+
+      const result = await svc.list(PEER_ID, 'tasks', {});
+
+      expect(result.items).toHaveLength(1);
+      expect(result.items[0]).toMatchObject({ id: '1', title: 'Task One' });
+
+      await mockAgent.close();
+    });
+  });
+
+  describe('get()', () => {
+    it('returns parsed typed response on success', async () => {
+      const db = makeDb();
+      const { mockAgent, pool } = makeMockAgent();
+      const svc = makeService(db, pool);
+
+      pool
+        .intercept({
+          path: '/api/federation/v1/get/tasks/1',
+          method: 'POST',
+        })
+        .reply(200, GET_BODY, { headers: { 'content-type': 'application/json' } });
+
+      const result = await svc.get(PEER_ID, 'tasks', '1', {});
+
+      expect(result.item).toMatchObject({ id: '1', title: 'Task One' });
+
+      await mockAgent.close();
+    });
+  });
+
+  describe('capabilities()', () => {
+    it('returns parsed capabilities response on success', async () => {
+      const db = makeDb();
+      const { mockAgent, pool } = makeMockAgent();
+      const svc = makeService(db, pool);
+
+      pool
+        .intercept({
+          path: '/api/federation/v1/capabilities',
+          method: 'GET',
+        })
+        .reply(200, CAP_BODY, { headers: { 'content-type': 'application/json' } });
+
+      const result = await svc.capabilities(PEER_ID);
+
+      expect(result.resources).toContain('tasks');
+      expect(result.max_rows_per_query).toBe(100);
+
+      await mockAgent.close();
+    });
+  });
+
+  // ─── HTTP error surfaces ──────────────────────────────────────────────────
+
+  describe('non-2xx responses', () => {
+    it('surfaces 403 as FederationClientError({ status: 403, code: "FORBIDDEN" })', async () => {
+      const db = makeDb();
+      const { mockAgent, pool } = makeMockAgent();
+      const svc = makeService(db, pool);
+
+      pool.intercept({ path: '/api/federation/v1/list/tasks', method: 'POST' }).reply(
+        403,
+        { error: { code: 'forbidden', message: 'Access denied' } },
+        {
+          headers: { 'content-type': 'application/json' },
+        },
+      );
+
+      await expect(svc.list(PEER_ID, 'tasks', {})).rejects.toMatchObject({
+        status: 403,
+        code: 'FORBIDDEN',
+        peerId: PEER_ID,
+      });
+
+      await mockAgent.close();
+    });
+
+    it('surfaces 404 as FederationClientError({ status: 404, code: "HTTP_404" })', async () => {
+      const db = makeDb();
+      const { mockAgent, pool } = makeMockAgent();
+      const svc = makeService(db, pool);
+
+      pool.intercept({ path: '/api/federation/v1/get/tasks/999', method: 'POST' }).reply(
+        404,
+        { error: { code: 'not_found', message: 'Not found' } },
+        {
+          headers: { 'content-type': 'application/json' },
+        },
+      );
+
+      await expect(svc.get(PEER_ID, 'tasks', '999', {})).rejects.toMatchObject({
+        status: 404,
+        code: 'HTTP_404',
+        peerId: PEER_ID,
+      });
+
+      await mockAgent.close();
+    });
+  });
+
+  // ─── Network error ─────────────────────────────────────────────────────────
+
+  describe('network errors', () => {
+    it('surfaces network error as FederationClientError({ code: "NETWORK" })', async () => {
+      const db = makeDb();
+      const { mockAgent, pool } = makeMockAgent();
+      const svc = makeService(db, pool);
+
+      pool
+        .intercept({ path: '/api/federation/v1/capabilities', method: 'GET' })
+        .replyWithError(new Error('ECONNREFUSED'));
+
+      await expect(svc.capabilities(PEER_ID)).rejects.toMatchObject({
+        code: 'NETWORK',
+        peerId: PEER_ID,
+      });
+
+      await mockAgent.close();
+    });
+  });
+
+  // ─── Invalid response body ─────────────────────────────────────────────────
+
+  describe('invalid response body', () => {
+    it('surfaces as FederationClientError({ code: "INVALID_RESPONSE" }) when body shape is wrong', async () => {
+      const db = makeDb();
+      const { mockAgent, pool } = makeMockAgent();
+      const svc = makeService(db, pool);
+
+      // capabilities returns wrong shape (missing required fields)
+      pool
+        .intercept({ path: '/api/federation/v1/capabilities', method: 'GET' })
+        .reply(200, { totally: 'wrong' }, { headers: { 'content-type': 'application/json' } });
+
+      await expect(svc.capabilities(PEER_ID)).rejects.toMatchObject({
+        code: 'INVALID_RESPONSE',
+        peerId: PEER_ID,
+      });
+
+      await mockAgent.close();
+    });
+  });
+
+  // ─── Peer DB validation ────────────────────────────────────────────────────
+
+  describe('peer validation (without resolveEntry spy)', () => {
+    /**
+     * These tests exercise the real `resolveEntry` path — no spy on resolveEntry.
+     */
+
+    it('throws PEER_NOT_FOUND when peer is not in DB', async () => {
+      // DB returns empty array (peer not found)
+      const db = makeDb([]);
+      const svc = new FederationClientService(db);
+
+      await expect(svc.capabilities(PEER_ID)).rejects.toMatchObject({
+        code: 'PEER_NOT_FOUND',
+        peerId: PEER_ID,
+      });
+    });
+
+    it('throws PEER_INACTIVE when peer state is not "active"', async () => {
+      const db = makeDb([makePeerRow({ state: 'suspended' })]);
+      const svc = new FederationClientService(db);
+
+      await expect(svc.capabilities(PEER_ID)).rejects.toMatchObject({
+        code: 'PEER_INACTIVE',
+        peerId: PEER_ID,
+      });
+    });
+  });
+
+  // ─── Cache behaviour ───────────────────────────────────────────────────────
+
+  describe('cache behaviour', () => {
+    it('hits cache on second call — only one DB lookup happens', async () => {
+      // Verify cache by calling the private resolveEntry directly twice and
+      // asserting the DB was queried only once. This avoids the HTTP layer,
+      // which would require either a real network or per-peer Agent rewiring
+      // that the cache invariant doesn't depend on.
+      const db = makeDb();
+      const selectSpy = vi.spyOn(db, 'select');
+      const svc = new FederationClientService(db);
+      const resolveEntry = (
+        svc as unknown as { resolveEntry: (peerId: string) => Promise<unknown> }
+      ).resolveEntry.bind(svc);
+
+      const first = await resolveEntry(PEER_ID);
+      const second = await resolveEntry(PEER_ID);
+
+      expect(first).toBe(second);
+      expect(selectSpy).toHaveBeenCalledTimes(1);
+    });
+
+    it('serializes concurrent resolveEntry calls — only one DB lookup', async () => {
+      const db = makeDb();
+      const selectSpy = vi.spyOn(db, 'select');
+      const svc = new FederationClientService(db);
+      const resolveEntry = (
+        svc as unknown as {
+          resolveEntry: (peerId: string) => Promise<unknown>;
+        }
+      ).resolveEntry.bind(svc);
+
+      const [a, b] = await Promise.all([resolveEntry(PEER_ID), resolveEntry(PEER_ID)]);
+      expect(a).toBe(b);
+      expect(selectSpy).toHaveBeenCalledTimes(1);
+    });
+
+    it('flushPeer destroys the evicted Agent so old TLS connections close', async () => {
+      const db = makeDb();
+      const svc = new FederationClientService(db);
+      const resolveEntry = (
+        svc as unknown as {
+          resolveEntry: (peerId: string) => Promise<{ agent: { destroy: () => Promise<void> } }>;
+        }
+      ).resolveEntry.bind(svc);
+
+      const entry = await resolveEntry(PEER_ID);
+      const destroySpy = vi.spyOn(entry.agent, 'destroy').mockResolvedValue();
+
+      svc.flushPeer(PEER_ID);
+      expect(destroySpy).toHaveBeenCalledTimes(1);
+    });
+
+    it('flushPeer() invalidates cache — next call re-reads DB', async () => {
+      const db = makeDb();
+      const { mockAgent, pool } = makeMockAgent();
+      const svc = makeService(db, pool);
+
+      pool
+        .intercept({ path: '/api/federation/v1/capabilities', method: 'GET' })
+        .reply(200, CAP_BODY, { headers: { 'content-type': 'application/json' } })
+        .times(2);
+
+      // First call — populates cache (via mock resolveEntry)
+      await svc.capabilities(PEER_ID);
+
+      // Flush the cache
+      svc.flushPeer(PEER_ID);
+
+      // The spy on resolveEntry is still active — check it's called again after flush
+      const resolveEntrySpy = vi.spyOn(
+        svc as unknown as { resolveEntry: (peerId: string) => Promise<unknown> },
+        'resolveEntry',
+      );
+
+      // Second call after flush — should call resolveEntry again
+      await svc.capabilities(PEER_ID);
+
+      // resolveEntry should have been called once after we started spying (post-flush)
+      expect(resolveEntrySpy).toHaveBeenCalledTimes(1);
+
+      await mockAgent.close();
+    });
+  });
+
+  // ─── loadStepCaRoot env-var guard ─────────────────────────────────────────
+
+  describe('loadStepCaRoot() env-var guard', () => {
+    it('throws PEER_MISCONFIGURED when STEP_CA_ROOT_CERT_PATH is not set', async () => {
+      delete process.env['STEP_CA_ROOT_CERT_PATH'];
+      const db = makeDb();
+      const svc = new FederationClientService(db);
+      const resolveEntry = (
+        svc as unknown as {
+          resolveEntry: (peerId: string) => Promise<unknown>;
+        }
+      ).resolveEntry.bind(svc);
+
+      await expect(resolveEntry(PEER_ID)).rejects.toMatchObject({
+        code: 'PEER_MISCONFIGURED',
+      });
+    });
+  });
+
+  // ─── FederationClientError class ──────────────────────────────────────────
+
+  describe('FederationClientError', () => {
+    it('is instanceof Error and FederationClientError', () => {
+      const err = new FederationClientError({
+        code: 'PEER_NOT_FOUND',
+        message: 'test',
+        peerId: PEER_ID,
+      });
+      expect(err).toBeInstanceOf(Error);
+      expect(err).toBeInstanceOf(FederationClientError);
+      expect(err.name).toBe('FederationClientError');
+    });
+
+    it('carries status, code, and peerId', () => {
+      const err = new FederationClientError({
+        status: 403,
+        code: 'FORBIDDEN',
+        message: 'forbidden',
+        peerId: PEER_ID,
+      });
+      expect(err.status).toBe(403);
+      expect(err.code).toBe('FORBIDDEN');
+      expect(err.peerId).toBe(PEER_ID);
+    });
+  });
+});
--- a/apps/gateway/src/federation/client/federation-client.service.ts
+++ b/apps/gateway/src/federation/client/federation-client.service.ts
@@ -0,0 +1,500 @@
+/**
+ * FederationClientService — outbound mTLS client for federation requests (FED-M3-08).
+ *
+ * Dials peer gateways over mTLS using the cert+sealed-key stored in `federation_peers`,
+ * invokes federation verbs (list / get / capabilities), and surfaces all failure modes
+ * as typed `FederationClientError` instances.
+ *
+ * ## Error code taxonomy
+ *
+ * | Code               | When                                                          |
+ * | ------------------ | ------------------------------------------------------------- |
+ * | PEER_NOT_FOUND     | No row in federation_peers for the given peerId               |
+ * | PEER_INACTIVE      | Peer row exists but state !== 'active'                        |
+ * | PEER_MISCONFIGURED | Peer row is active but missing endpointUrl or clientKeyPem    |
+ * | NETWORK            | undici threw a connection / TLS / timeout error               |
+ * | HTTP_{status}      | Peer returned a non-2xx response (e.g. HTTP_403, HTTP_404)    |
+ * | FORBIDDEN          | Peer returned 403 (convenience alias alongside HTTP_403)      |
+ * | INVALID_RESPONSE   | Response body failed Zod schema validation                    |
+ *
+ * ## Cache strategy
+ *
+ * Per-peer `undici.Agent` instances are cached in a `Map<peerId, AgentCacheEntry>` for
+ * the lifetime of the service instance.  The cache is keyed on peerId (UUID).
+ *
+ * Cache invalidation:
+ *  - `flushPeer(peerId)` — removes the entry immediately.  M5/M6 MUST call this on
+ *    cert rotation or peer revocation events so the next request re-reads the DB and
+ *    builds a fresh TLS Agent with the new cert material.
+ *  - On cache miss: re-reads the DB, checks state === 'active', rebuilds Agent.
+ *
+ * Cache does NOT auto-expire.  The service is expected to be a singleton scoped to the
+ * NestJS application lifecycle; flushing on revocation/rotation is the only invalidation
+ * path by design (avoids redundant DB round-trips on the hot path).
+ */
+
+import { Injectable, Inject, Logger } from '@nestjs/common';
+import { readFileSync } from 'node:fs';
+import { Agent, fetch as undiciFetch } from 'undici';
+import type { Dispatcher } from 'undici';
+import { z } from 'zod';
+import { type Db, eq, federationPeers } from '@mosaicstack/db';
+import {
+  FederationListResponseSchema,
+  FederationGetResponseSchema,
+  FederationCapabilitiesResponseSchema,
+  FederationErrorEnvelopeSchema,
+  type FederationListResponse,
+  type FederationGetResponse,
+  type FederationCapabilitiesResponse,
+} from '@mosaicstack/types';
+import { DB } from '../../database/database.module.js';
+import { unsealClientKey } from '../peer-key.util.js';
+
+// ---------------------------------------------------------------------------
+// Error taxonomy
+// ---------------------------------------------------------------------------
+
+/**
+ * Client-side error code set.  Distinct from the server-side `FederationErrorCode`
+ * (which lives in `@mosaicstack/types`) because the client has additional failure
+ * modes (PEER_NOT_FOUND, PEER_INACTIVE, PEER_MISCONFIGURED, NETWORK) that the
+ * server never emits.
+ */
+export type FederationClientErrorCode =
+  | 'PEER_NOT_FOUND'
+  | 'PEER_INACTIVE'
+  | 'PEER_MISCONFIGURED'
+  | 'NETWORK'
+  | 'FORBIDDEN'
+  | 'INVALID_RESPONSE'
+  | `HTTP_${number}`;
+
+export interface FederationClientErrorOptions {
+  status?: number;
+  code: FederationClientErrorCode;
+  message: string;
+  peerId: string;
+  cause?: unknown;
+}
+
+/**
+ * Thrown by FederationClientService on every failure path.
+ * Callers can dispatch on `error.code` for programmatic handling.
+ */
+export class FederationClientError extends Error {
+  readonly status?: number;
+  readonly code: FederationClientErrorCode;
+  readonly peerId: string;
+  readonly cause?: unknown;
+
+  constructor(opts: FederationClientErrorOptions) {
+    super(opts.message);
+    this.name = 'FederationClientError';
+    this.status = opts.status;
+    this.code = opts.code;
+    this.peerId = opts.peerId;
+    this.cause = opts.cause;
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Internal cache types
+// ---------------------------------------------------------------------------
+
+interface AgentCacheEntry {
+  agent: Agent;
+  endpointUrl: string;
+  certPem: string;
+  certSerial: string;
+}
+
+// ---------------------------------------------------------------------------
+// Service
+// ---------------------------------------------------------------------------
+
+@Injectable()
+export class FederationClientService {
+  private readonly logger = new Logger(FederationClientService.name);
+
+  /**
+   * Per-peer undici Agent cache.
+   * Key = peerId (UUID string).
+   *
+   * Values are either a resolved `AgentCacheEntry` or an in-flight
+   * `Promise<AgentCacheEntry>` (promise-cache pattern).  Storing the promise
+   * prevents duplicate DB lookups and duplicate key-unseal operations when two
+   * requests for the same peer arrive before the first build completes.
+   *
+   * Flush via `flushPeer(peerId)` on cert rotation / peer revocation (M5/M6).
+   */
+  private readonly cache = new Map<string, AgentCacheEntry | Promise<AgentCacheEntry>>();
+
+  /**
+   * Step-CA root cert PEM, loaded once from `STEP_CA_ROOT_CERT_PATH`.
+   * Used as the trust anchor for peer server certificates so federation TLS is
+   * pinned to our PKI, not the public trust store. Lazily loaded on first use
+   * so unit tests that don't exercise the agent path can run without the env var.
+   */
+  private cachedCaPem: string | null = null;
+
+  constructor(@Inject(DB) private readonly db: Db) {}
+
+  // -------------------------------------------------------------------------
+  // Public verb API
+  // -------------------------------------------------------------------------
+
+  /**
+   * Invoke the `list` verb on a remote peer.
+   *
+   * @param peerId   UUID of the peer row in `federation_peers`.
+   * @param resource Resource path, e.g. "tasks".
+   * @param request  Free-form body sent as JSON in the POST body.
+   * @returns Parsed `FederationListResponse<T>`.
+   */
+  async list<T>(
+    peerId: string,
+    resource: string,
+    request: Record<string, unknown>,
+  ): Promise<FederationListResponse<T>> {
+    const { endpointUrl, agent } = await this.resolveEntry(peerId);
+    const url = `${endpointUrl}/api/federation/v1/list/${encodeURIComponent(resource)}`;
+    const body = await this.doPost(peerId, url, agent, request);
+    return this.parseWith<FederationListResponse<T>>(
+      peerId,
+      body,
+      FederationListResponseSchema(z.unknown()),
+    );
+  }
+
+  /**
+   * Invoke the `get` verb on a remote peer.
+   *
+   * @param peerId   UUID of the peer row in `federation_peers`.
+   * @param resource Resource path, e.g. "tasks".
+   * @param id       Resource identifier.
+   * @param request  Free-form body sent as JSON in the POST body.
+   * @returns Parsed `FederationGetResponse<T>`.
+   */
+  async get<T>(
+    peerId: string,
+    resource: string,
+    id: string,
+    request: Record<string, unknown>,
+  ): Promise<FederationGetResponse<T>> {
+    const { endpointUrl, agent } = await this.resolveEntry(peerId);
+    const url = `${endpointUrl}/api/federation/v1/get/${encodeURIComponent(resource)}/${encodeURIComponent(id)}`;
+    const body = await this.doPost(peerId, url, agent, request);
+    return this.parseWith<FederationGetResponse<T>>(
+      peerId,
+      body,
+      FederationGetResponseSchema(z.unknown()),
+    );
+  }
+
+  /**
+   * Invoke the `capabilities` verb on a remote peer.
+   *
+   * @param peerId UUID of the peer row in `federation_peers`.
+   * @returns Parsed `FederationCapabilitiesResponse`.
+   */
+  async capabilities(peerId: string): Promise<FederationCapabilitiesResponse> {
+    const { endpointUrl, agent } = await this.resolveEntry(peerId);
+    const url = `${endpointUrl}/api/federation/v1/capabilities`;
+    const body = await this.doGet(peerId, url, agent);
+    return this.parseWith<FederationCapabilitiesResponse>(
+      peerId,
+      body,
+      FederationCapabilitiesResponseSchema,
+    );
+  }
+
+  // -------------------------------------------------------------------------
+  // Cache management
+  // -------------------------------------------------------------------------
+
+  /**
+   * Flush the cached Agent for a specific peer.
+   *
+   * M5/M6 MUST call this on:
+   *  - cert rotation events (so new cert material is picked up)
+   *  - peer revocation events (so future requests fail at PEER_INACTIVE)
+   *
+   * After flushing, the next call to `list`, `get`, or `capabilities` for
+   * this peer will re-read the DB and rebuild the Agent.
+   */
+  flushPeer(peerId: string): void {
+    const entry = this.cache.get(peerId);
+    if (entry === undefined) {
+      return;
+    }
+    this.cache.delete(peerId);
+    if (!(entry instanceof Promise)) {
+      // best-effort destroy; promise-cached entries skip destroy because
+      // the in-flight build owns its own Agent which will be GC'd when the
+      // owning request handles the rejection from the cache miss
+      entry.agent.destroy().catch(() => {
+        // intentionally ignored — destroy errors are not actionable
+      });
+    }
+    this.logger.log(`Cache flushed for peer ${peerId}`);
+  }
+
+  // -------------------------------------------------------------------------
+  // Internal helpers
+  // -------------------------------------------------------------------------
+
+  /**
+   * Load and cache the Step-CA root cert PEM from `STEP_CA_ROOT_CERT_PATH`.
+   * Throws `FederationClientError` if the env var is unset or the file cannot
+   * be read — mTLS to a peer without a pinned trust anchor would silently
+   * fall back to the public trust store.
+   */
+  private loadStepCaRoot(): string {
+    if (this.cachedCaPem !== null) {
+      return this.cachedCaPem;
+    }
+    const path = process.env['STEP_CA_ROOT_CERT_PATH'];
+    if (!path) {
+      throw new FederationClientError({
+        code: 'PEER_MISCONFIGURED',
+        message: 'STEP_CA_ROOT_CERT_PATH is not set; refusing to dial peer without pinned CA trust',
+        peerId: '',
+      });
+    }
+    try {
+      const pem = readFileSync(path, 'utf8');
+      this.cachedCaPem = pem;
+      return pem;
+    } catch (err) {
+      throw new FederationClientError({
+        code: 'PEER_MISCONFIGURED',
+        message: `Failed to read STEP_CA_ROOT_CERT_PATH (${path})`,
+        peerId: '',
+        cause: err,
+      });
+    }
+  }
+
+  /**
+   * Resolve the cache entry for a peer, reading DB on miss.
+   *
+   * Uses a promise-cache pattern: concurrent callers for the same uncached
+   * `peerId` all `await` the same in-flight `Promise<AgentCacheEntry>` so
+   * only one DB lookup and one key-unseal ever runs per peer per cache miss.
+   * The promise is replaced with the concrete entry on success, or deleted on
+   * rejection so a transient error does not poison the cache permanently.
+   *
+   * Throws `FederationClientError` with appropriate code if the peer is not
+   * found, is inactive, or is missing required fields.
+   */
+  private async resolveEntry(peerId: string): Promise<AgentCacheEntry> {
+    const cached = this.cache.get(peerId);
+    if (cached) {
+      return cached; // Promise or concrete entry — both are awaitable
+    }
+
+    const inflight = this.buildEntry(peerId).then(
+      (entry) => {
+        this.cache.set(peerId, entry); // replace promise with concrete value
+        return entry;
+      },
+      (err: unknown) => {
+        this.cache.delete(peerId); // don't poison the cache with a rejected promise
+        throw err;
+      },
+    );
+
+    this.cache.set(peerId, inflight);
+    return inflight;
+  }
+
+  /**
+   * Build the `AgentCacheEntry` for a peer by reading the DB, validating the
+   * peer's state, unsealing the private key, and constructing the mTLS Agent.
+   *
+   * Throws `FederationClientError` with appropriate code if the peer is not
+   * found, is inactive, or is missing required fields.
+   */
+  private async buildEntry(peerId: string): Promise<AgentCacheEntry> {
+    // DB lookup
+    const [peer] = await this.db
+      .select()
+      .from(federationPeers)
+      .where(eq(federationPeers.id, peerId))
+      .limit(1);
+
+    if (!peer) {
+      throw new FederationClientError({
+        code: 'PEER_NOT_FOUND',
+        message: `Federation peer ${peerId} not found`,
+        peerId,
+      });
+    }
+
+    if (peer.state !== 'active') {
+      throw new FederationClientError({
+        code: 'PEER_INACTIVE',
+        message: `Federation peer ${peerId} is not active (state: ${peer.state})`,
+        peerId,
+      });
+    }
+
+    if (!peer.endpointUrl || !peer.clientKeyPem) {
+      throw new FederationClientError({
+        code: 'PEER_MISCONFIGURED',
+        message: `Federation peer ${peerId} is missing endpointUrl or clientKeyPem`,
+        peerId,
+      });
+    }
+
+    // Unseal the private key
+    let privateKeyPem: string;
+    try {
+      privateKeyPem = unsealClientKey(peer.clientKeyPem);
+    } catch (err) {
+      throw new FederationClientError({
+        code: 'PEER_MISCONFIGURED',
+        message: `Failed to unseal client key for peer ${peerId}`,
+        peerId,
+        cause: err,
+      });
+    }
+
+    // Build mTLS agent — pin trust to Step-CA root so we never accept
+    // a peer cert signed by a public CA (defense against MITM with a
+    // publicly-trusted DV cert for the peer's hostname).
+    const agent = new Agent({
+      connect: {
+        cert: peer.certPem,
+        key: privateKeyPem,
+        ca: this.loadStepCaRoot(),
+        // rejectUnauthorized: true is the undici default for HTTPS
+      },
+    });
+
+    const entry: AgentCacheEntry = {
+      agent,
+      endpointUrl: peer.endpointUrl,
+      certPem: peer.certPem,
+      certSerial: peer.certSerial,
+    };
+
+    this.logger.log(`Agent cached for peer ${peerId} (serial: ${peer.certSerial})`);
+
+    return entry;
+  }
+
+  /**
+   * Execute a POST request with a JSON body.
+   * Returns the parsed response body as an unknown value.
+   * Throws `FederationClientError` on network errors and non-2xx responses.
+   */
+  private async doPost(
+    peerId: string,
+    url: string,
+    agent: Dispatcher,
+    body: Record<string, unknown>,
+  ): Promise<unknown> {
+    return this.doRequest(peerId, url, agent, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify(body),
+    });
+  }
+
+  /**
+   * Execute a GET request.
+   * Returns the parsed response body as an unknown value.
+   * Throws `FederationClientError` on network errors and non-2xx responses.
+   */
+  private async doGet(peerId: string, url: string, agent: Dispatcher): Promise<unknown> {
+    return this.doRequest(peerId, url, agent, { method: 'GET' });
+  }
+
+  private async doRequest(
+    peerId: string,
+    url: string,
+    agent: Dispatcher,
+    init: { method: string; headers?: Record<string, string>; body?: string },
+  ): Promise<unknown> {
+    let response: Awaited<ReturnType<typeof undiciFetch>>;
+
+    try {
+      response = await undiciFetch(url, {
+        ...init,
+        dispatcher: agent,
+      });
+    } catch (err) {
+      throw new FederationClientError({
+        code: 'NETWORK',
+        message: `Network error calling peer ${peerId} at ${url}: ${err instanceof Error ? err.message : String(err)}`,
+        peerId,
+        cause: err,
+      });
+    }
+
+    const rawBody = await response.text().catch(() => '');
+
+    if (!response.ok) {
+      const status = response.status;
+
+      // Attempt to parse as federation error envelope
+      let serverMessage = `HTTP ${status}`;
+      try {
+        const json: unknown = JSON.parse(rawBody);
+        const result = FederationErrorEnvelopeSchema.safeParse(json);
+        if (result.success) {
+          serverMessage = result.data.error.message;
+        }
+      } catch {
+        // Not valid JSON or not a federation envelope — use generic message
+      }
+
+      // Specific code for 403 (most actionable for callers); generic HTTP_{n} for others
+      const code: FederationClientErrorCode = status === 403 ? 'FORBIDDEN' : `HTTP_${status}`;
+
+      throw new FederationClientError({
+        status,
+        code,
+        message: `Peer ${peerId} returned ${status}: ${serverMessage}`,
+        peerId,
+      });
+    }
+
+    try {
+      return JSON.parse(rawBody) as unknown;
+    } catch (err) {
+      throw new FederationClientError({
+        code: 'INVALID_RESPONSE',
+        message: `Peer ${peerId} returned non-JSON body`,
+        peerId,
+        cause: err,
+      });
+    }
+  }
+
+  /**
+   * Parse and validate a response body against a Zod schema.
+   *
+   * For list/get, callers pass the result of `FederationListResponseSchema(z.unknown())`
+   * so that the envelope structure is validated without requiring a concrete item schema
+   * at the client level.  The generic `T` provides compile-time typing.
+   *
+   * Throws `FederationClientError({ code: 'INVALID_RESPONSE' })` on parse failure.
+   */
+  private parseWith<T>(peerId: string, body: unknown, schema: z.ZodTypeAny): T {
+    const result = schema.safeParse(body);
+    if (!result.success) {
+      const issues = result.error.issues
+        .map((e: z.ZodIssue) => `[${e.path.join('.') || 'root'}] ${e.message}`)
+        .join('; ');
+      throw new FederationClientError({
+        code: 'INVALID_RESPONSE',
+        message: `Peer ${peerId} returned invalid response shape: ${issues}`,
+        peerId,
+      });
+    }
+    return result.data as T;
+  }
+}
--- a/apps/gateway/src/federation/client/index.ts
+++ b/apps/gateway/src/federation/client/index.ts
@@ -0,0 +1,13 @@
+/**
+ * Federation client barrel — re-exports for FederationModule consumers.
+ *
+ * M3-09 (QuerySourceService) and future milestones should import from here,
+ * not directly from the implementation file.
+ */
+
+export {
+  FederationClientService,
+  FederationClientError,
+  type FederationClientErrorCode,
+  type FederationClientErrorOptions,
+} from './federation-client.service.js';
--- a/apps/gateway/src/federation/federation.module.ts
+++ b/apps/gateway/src/federation/federation.module.ts
@@ -5,10 +5,11 @@ import { EnrollmentController } from './enrollment.controller.js';
 import { EnrollmentService } from './enrollment.service.js';
 import { FederationController } from './federation.controller.js';
 import { GrantsService } from './grants.service.js';
+import { FederationClientService } from './client/index.js';

@Module({
  controllers: [EnrollmentController, FederationController],
-  providers: [AdminGuard, CaService, EnrollmentService, GrantsService],
-  exports: [CaService, EnrollmentService, GrantsService],
+  providers: [AdminGuard, CaService, EnrollmentService, GrantsService, FederationClientService],
+  exports: [CaService, EnrollmentService, GrantsService, FederationClientService],
 })
 export class FederationModule {}
--- a/eslint.config.mjs
+++ b/eslint.config.mjs
@@ -30,6 +30,7 @@ export default tseslint.config(
            'apps/gateway/vitest.config.ts',
            'packages/storage/vitest.config.ts',
            'packages/mosaic/__tests__/*.ts',
+            'tools/federation-harness/*.ts',
          ],
        },
      },
--- a/packages/types/package.json
+++ b/packages/types/package.json
@@ -26,7 +26,8 @@
  },
  "dependencies": {
    "class-transformer": "^0.5.1",
-    "class-validator": "^0.15.1"
+    "class-validator": "^0.15.1",
+    "zod": "^4.3.6"
  },
  "publishConfig": {
    "registry": "https://git.mosaicstack.dev/api/packages/mosaicstack/npm/",
--- a/packages/types/src/federation/tests/federation.spec.ts
+++ b/packages/types/src/federation/tests/federation.spec.ts
@@ -0,0 +1,435 @@
+/**
+ * Unit tests for federation wire-format DTOs.
+ *
+ * Coverage:
+ *  - FederationRequestSchema  (valid + invalid)
+ *  - FederationListResponseSchema factory
+ *  - FederationGetResponseSchema factory
+ *  - FederationCapabilitiesResponseSchema
+ *  - FederationErrorEnvelopeSchema + error code exhaustiveness
+ *  - FederationError exception hierarchy
+ *  - tagWithSource helper round-trip
+ *  - SourceTagSchema
+ */
+
+import { describe, expect, it } from 'vitest';
+import { z } from 'zod';
+
+import {
+  FEDERATION_ERROR_CODES,
+  FEDERATION_VERBS,
+  FederationCapabilitiesResponseSchema,
+  FederationError,
+  FederationErrorEnvelopeSchema,
+  FederationForbiddenError,
+  FederationInternalError,
+  FederationInvalidRequestError,
+  FederationNotFoundError,
+  FederationRateLimitedError,
+  FederationRequestSchema,
+  FederationScopeViolationError,
+  FederationUnauthorizedError,
+  FederationGetResponseSchema,
+  FederationListResponseSchema,
+  SOURCE_LOCAL,
+  SourceTagSchema,
+  parseFederationErrorEnvelope,
+  tagWithSource,
+} from '../index.js';
+
+// ---------------------------------------------------------------------------
+// Verbs
+// ---------------------------------------------------------------------------
+
+describe('FEDERATION_VERBS', () => {
+  it('contains exactly list, get, capabilities', () => {
+    expect(FEDERATION_VERBS).toEqual(['list', 'get', 'capabilities']);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// FederationRequestSchema
+// ---------------------------------------------------------------------------
+
+describe('FederationRequestSchema', () => {
+  it('accepts a minimal valid list request', () => {
+    const result = FederationRequestSchema.safeParse({ verb: 'list', resource: 'tasks' });
+    expect(result.success).toBe(true);
+  });
+
+  it('accepts a get request with cursor and params', () => {
+    const result = FederationRequestSchema.safeParse({
+      verb: 'get',
+      resource: 'notes',
+      cursor: 'abc123',
+      params: { filter: 'mine' },
+    });
+    expect(result.success).toBe(true);
+    if (result.success) {
+      expect(result.data.cursor).toBe('abc123');
+      expect(result.data.params?.['filter']).toBe('mine');
+    }
+  });
+
+  it('accepts a capabilities request', () => {
+    const result = FederationRequestSchema.safeParse({ verb: 'capabilities', resource: 'tasks' });
+    expect(result.success).toBe(true);
+  });
+
+  it('rejects an unknown verb', () => {
+    const result = FederationRequestSchema.safeParse({ verb: 'search', resource: 'tasks' });
+    expect(result.success).toBe(false);
+  });
+
+  it('rejects an empty resource string', () => {
+    const result = FederationRequestSchema.safeParse({ verb: 'list', resource: '' });
+    expect(result.success).toBe(false);
+  });
+
+  it('rejects a missing verb', () => {
+    const result = FederationRequestSchema.safeParse({ resource: 'tasks' });
+    expect(result.success).toBe(false);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// FederationListResponseSchema factory
+// ---------------------------------------------------------------------------
+
+describe('FederationListResponseSchema', () => {
+  const ItemSchema = z.object({ id: z.string(), name: z.string() });
+  const ListSchema = FederationListResponseSchema(ItemSchema);
+
+  it('accepts a valid list envelope', () => {
+    const result = ListSchema.safeParse({
+      items: [{ id: '1', name: 'Task A' }],
+      nextCursor: 'page2',
+      _partial: false,
+      _truncated: false,
+    });
+    expect(result.success).toBe(true);
+    if (result.success) {
+      expect(result.data.items).toHaveLength(1);
+      expect(result.data.nextCursor).toBe('page2');
+    }
+  });
+
+  it('accepts a minimal envelope with empty items', () => {
+    const result = ListSchema.safeParse({ items: [] });
+    expect(result.success).toBe(true);
+  });
+
+  it('rejects when items is missing', () => {
+    const result = ListSchema.safeParse({ nextCursor: 'x' });
+    expect(result.success).toBe(false);
+  });
+
+  it('rejects when an item fails validation', () => {
+    const result = ListSchema.safeParse({ items: [{ id: 1, name: 'bad' }] });
+    expect(result.success).toBe(false);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// FederationGetResponseSchema factory
+// ---------------------------------------------------------------------------
+
+describe('FederationGetResponseSchema', () => {
+  const ItemSchema = z.object({ id: z.string() });
+  const GetSchema = FederationGetResponseSchema(ItemSchema);
+
+  it('accepts a found item', () => {
+    const result = GetSchema.safeParse({ item: { id: 'abc' } });
+    expect(result.success).toBe(true);
+    if (result.success) {
+      expect(result.data.item).toEqual({ id: 'abc' });
+    }
+  });
+
+  it('accepts null item (not found)', () => {
+    const result = GetSchema.safeParse({ item: null });
+    expect(result.success).toBe(true);
+    if (result.success) {
+      expect(result.data.item).toBeNull();
+    }
+  });
+
+  it('rejects when item is missing', () => {
+    const result = GetSchema.safeParse({});
+    expect(result.success).toBe(false);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// FederationCapabilitiesResponseSchema
+// ---------------------------------------------------------------------------
+
+describe('FederationCapabilitiesResponseSchema', () => {
+  it('accepts a valid capabilities response', () => {
+    const result = FederationCapabilitiesResponseSchema.safeParse({
+      resources: ['tasks', 'notes'],
+      excluded_resources: ['credentials'],
+      max_rows_per_query: 500,
+      supported_verbs: ['list', 'get', 'capabilities'],
+    });
+    expect(result.success).toBe(true);
+    if (result.success) {
+      expect(result.data.max_rows_per_query).toBe(500);
+    }
+  });
+
+  it('accepts a response with filters field', () => {
+    const result = FederationCapabilitiesResponseSchema.safeParse({
+      resources: ['tasks', 'notes'],
+      excluded_resources: [],
+      max_rows_per_query: 100,
+      supported_verbs: ['list'],
+      filters: {
+        tasks: { include_teams: ['team-a'], include_personal: true },
+        notes: { include_personal: false },
+      },
+    });
+    expect(result.success).toBe(true);
+    if (result.success) {
+      expect(result.data.filters?.['tasks']?.include_teams).toEqual(['team-a']);
+    }
+  });
+
+  it('accepts a response with partial filters (only include_teams)', () => {
+    const result = FederationCapabilitiesResponseSchema.safeParse({
+      resources: ['tasks'],
+      excluded_resources: [],
+      max_rows_per_query: 50,
+      supported_verbs: ['list'],
+      filters: { tasks: { include_teams: ['eng'] } },
+    });
+    expect(result.success).toBe(true);
+  });
+
+  it('accepts a response with rate_limit (M4 full shape)', () => {
+    const result = FederationCapabilitiesResponseSchema.safeParse({
+      resources: ['tasks'],
+      excluded_resources: [],
+      max_rows_per_query: 100,
+      supported_verbs: ['list'],
+      rate_limit: { limit_per_minute: 60, remaining: 55, reset_at: '2026-04-23T12:00:00Z' },
+    });
+    expect(result.success).toBe(true);
+    if (result.success) {
+      expect(result.data.rate_limit?.limit_per_minute).toBe(60);
+      expect(result.data.rate_limit?.remaining).toBe(55);
+    }
+  });
+
+  it('accepts a response with rate_limit (M3 minimal — limit_per_minute only)', () => {
+    const result = FederationCapabilitiesResponseSchema.safeParse({
+      resources: ['tasks'],
+      excluded_resources: [],
+      max_rows_per_query: 100,
+      supported_verbs: ['list'],
+      rate_limit: { limit_per_minute: 120 },
+    });
+    expect(result.success).toBe(true);
+  });
+
+  it('accepts a response without rate_limit (field is optional)', () => {
+    const result = FederationCapabilitiesResponseSchema.safeParse({
+      resources: ['tasks'],
+      excluded_resources: [],
+      max_rows_per_query: 100,
+      supported_verbs: ['list'],
+    });
+    expect(result.success).toBe(true);
+    if (result.success) {
+      expect(result.data.rate_limit).toBeUndefined();
+    }
+  });
+
+  it('rejects rate_limit with non-positive limit_per_minute', () => {
+    const result = FederationCapabilitiesResponseSchema.safeParse({
+      resources: ['tasks'],
+      excluded_resources: [],
+      max_rows_per_query: 100,
+      supported_verbs: ['list'],
+      rate_limit: { limit_per_minute: 0 },
+    });
+    expect(result.success).toBe(false);
+  });
+
+  it('rejects rate_limit with invalid reset_at datetime', () => {
+    const result = FederationCapabilitiesResponseSchema.safeParse({
+      resources: ['tasks'],
+      excluded_resources: [],
+      max_rows_per_query: 100,
+      supported_verbs: ['list'],
+      rate_limit: { limit_per_minute: 60, reset_at: 'not-a-datetime' },
+    });
+    expect(result.success).toBe(false);
+  });
+
+  it('rejects supported_verbs with an invalid verb (MED-3 enum guard)', () => {
+    const result = FederationCapabilitiesResponseSchema.safeParse({
+      resources: ['tasks'],
+      excluded_resources: [],
+      max_rows_per_query: 100,
+      supported_verbs: ['invalid_verb'],
+    });
+    expect(result.success).toBe(false);
+  });
+
+  it('rejects empty resources array', () => {
+    const result = FederationCapabilitiesResponseSchema.safeParse({
+      resources: [],
+      excluded_resources: [],
+      max_rows_per_query: 100,
+      supported_verbs: ['list'],
+    });
+    expect(result.success).toBe(false);
+  });
+
+  it('rejects non-integer max_rows_per_query', () => {
+    const result = FederationCapabilitiesResponseSchema.safeParse({
+      resources: ['tasks'],
+      excluded_resources: [],
+      max_rows_per_query: 1.5,
+      supported_verbs: ['list'],
+    });
+    expect(result.success).toBe(false);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// FederationErrorEnvelopeSchema + error code exhaustiveness
+// ---------------------------------------------------------------------------
+
+describe('FederationErrorEnvelopeSchema', () => {
+  it('accepts each valid error code', () => {
+    for (const code of FEDERATION_ERROR_CODES) {
+      const result = FederationErrorEnvelopeSchema.safeParse({
+        error: { code, message: 'test' },
+      });
+      expect(result.success, `code ${code} should be valid`).toBe(true);
+    }
+  });
+
+  it('rejects an unknown error code', () => {
+    const result = FederationErrorEnvelopeSchema.safeParse({
+      error: { code: 'unknown_code', message: 'test' },
+    });
+    expect(result.success).toBe(false);
+  });
+
+  it('accepts optional details field', () => {
+    const result = FederationErrorEnvelopeSchema.safeParse({
+      error: { code: 'forbidden', message: 'nope', details: { grantId: 'xyz' } },
+    });
+    expect(result.success).toBe(true);
+  });
+
+  it('rejects when message is missing', () => {
+    const result = FederationErrorEnvelopeSchema.safeParse({ error: { code: 'not_found' } });
+    expect(result.success).toBe(false);
+  });
+});
+
+describe('parseFederationErrorEnvelope', () => {
+  it('returns a typed envelope for valid input', () => {
+    const env = parseFederationErrorEnvelope({ error: { code: 'not_found', message: 'gone' } });
+    expect(env.error.code).toBe('not_found');
+  });
+
+  it('throws for invalid input', () => {
+    expect(() => parseFederationErrorEnvelope({ bad: 'shape' })).toThrow();
+  });
+});
+
+// ---------------------------------------------------------------------------
+// FederationError exception hierarchy
+// ---------------------------------------------------------------------------
+
+describe('FederationError hierarchy', () => {
+  const cases: Array<[string, FederationError]> = [
+    ['unauthorized', new FederationUnauthorizedError()],
+    ['forbidden', new FederationForbiddenError()],
+    ['not_found', new FederationNotFoundError()],
+    ['rate_limited', new FederationRateLimitedError()],
+    ['scope_violation', new FederationScopeViolationError()],
+    ['invalid_request', new FederationInvalidRequestError()],
+    ['internal_error', new FederationInternalError()],
+  ];
+
+  it.each(cases)('code %s is an instance of FederationError', (_code, err) => {
+    expect(err).toBeInstanceOf(FederationError);
+    expect(err).toBeInstanceOf(Error);
+  });
+
+  it.each(cases)('code %s has correct code property', (code, err) => {
+    expect(err.code).toBe(code);
+  });
+
+  it('toEnvelope serialises to wire format', () => {
+    const err = new FederationForbiddenError('Access denied', { grantId: 'g1' });
+    const env = err.toEnvelope();
+    expect(env.error.code).toBe('forbidden');
+    expect(env.error.message).toBe('Access denied');
+    expect(env.error.details).toEqual({ grantId: 'g1' });
+  });
+
+  it('toEnvelope omits details when not provided', () => {
+    const err = new FederationNotFoundError();
+    const env = err.toEnvelope();
+    expect(Object.prototype.hasOwnProperty.call(env.error, 'details')).toBe(false);
+  });
+
+  it('error codes tuple covers all subclasses (exhaustiveness check)', () => {
+    // If a new subclass is added without a code, this test fails at compile time.
+    const allCodes = new Set(FEDERATION_ERROR_CODES);
+    for (const [code] of cases) {
+      expect(allCodes.has(code as (typeof FEDERATION_ERROR_CODES)[number])).toBe(true);
+    }
+    // All codes are covered by at least one case
+    expect(cases).toHaveLength(FEDERATION_ERROR_CODES.length);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Source tag + tagWithSource
+// ---------------------------------------------------------------------------
+
+describe('SourceTagSchema', () => {
+  it('accepts a non-empty _source string', () => {
+    expect(SourceTagSchema.safeParse({ _source: 'local' }).success).toBe(true);
+    expect(SourceTagSchema.safeParse({ _source: 'mosaic.uscllc.com' }).success).toBe(true);
+  });
+
+  it('rejects empty _source string', () => {
+    expect(SourceTagSchema.safeParse({ _source: '' }).success).toBe(false);
+  });
+});
+
+describe('tagWithSource', () => {
+  it('stamps each item with the given source', () => {
+    const items = [{ id: '1' }, { id: '2' }];
+    const tagged = tagWithSource(items, SOURCE_LOCAL);
+    expect(tagged).toEqual([
+      { id: '1', _source: 'local' },
+      { id: '2', _source: 'local' },
+    ]);
+  });
+
+  it('preserves original item fields', () => {
+    const items = [{ id: 'x', name: 'Task', done: false }];
+    const tagged = tagWithSource(items, 'mosaic.uscllc.com');
+    expect(tagged[0]).toMatchObject({ id: 'x', name: 'Task', done: false });
+    expect(tagged[0]?._source).toBe('mosaic.uscllc.com');
+  });
+
+  it('returns empty array for empty input', () => {
+    expect(tagWithSource([], 'local')).toEqual([]);
+  });
+
+  it('round-trip: tagWithSource output passes SourceTagSchema', () => {
+    const tagged = tagWithSource([{ id: '1' }], 'local');
+    expect(SourceTagSchema.safeParse(tagged[0]).success).toBe(true);
+  });
+});
--- a/packages/types/src/federation/error.ts
+++ b/packages/types/src/federation/error.ts
@@ -0,0 +1,164 @@
+/**
+ * Federation wire-format error envelope and exception hierarchy.
+ *
+ * Source of truth: docs/federation/PRD.md §6, §8.
+ *
+ * DESIGN: Typed error classes rather than discriminated union values
+ * ──────────────────────────────────────────────────────────────────
+ * We expose:
+ *   1. `FEDERATION_ERROR_CODES` — closed string-enum tuple (exhaustiveness-checkable).
+ *   2. `FederationErrorCode` — union type inferred from the tuple.
+ *   3. `FederationErrorEnvelopeSchema` — Zod schema for the wire format.
+ *   4. `FederationError` — base Error subclass with a typed `code` property.
+ *      One concrete subclass per code (e.g. `FederationUnauthorizedError`),
+ *      which enables `instanceof` dispatch in handlers without a switch.
+ *
+ * Rationale: subclasses give gateway handlers and the client a clean dispatch
+ * point (catch + instanceof) without re-parsing or switch tables.  All classes
+ * carry `code` so a generic logger can act on any FederationError uniformly.
+ *
+ * Pure — no NestJS, no DB, no Node-only APIs. Safe for browser/edge.
+ */
+
+import { z } from 'zod';
+
+// ---------------------------------------------------------------------------
+// Error code enum (closed)
+// ---------------------------------------------------------------------------
+
+export const FEDERATION_ERROR_CODES = [
+  'unauthorized',
+  'forbidden',
+  'not_found',
+  'rate_limited',
+  'scope_violation',
+  'invalid_request',
+  'internal_error',
+] as const;
+
+export type FederationErrorCode = (typeof FEDERATION_ERROR_CODES)[number];
+
+// ---------------------------------------------------------------------------
+// Wire-format schema
+// ---------------------------------------------------------------------------
+
+export const FederationErrorEnvelopeSchema = z.object({
+  error: z.object({
+    code: z.enum(FEDERATION_ERROR_CODES),
+    message: z.string(),
+    details: z.unknown().optional(),
+  }),
+});
+
+export type FederationErrorEnvelope = z.infer<typeof FederationErrorEnvelopeSchema>;
+
+// ---------------------------------------------------------------------------
+// Exception class hierarchy
+// ---------------------------------------------------------------------------
+
+/**
+ * Base class for all federation errors.
+ * Carries a typed `code` so handlers can act uniformly on any FederationError.
+ */
+export class FederationError extends Error {
+  readonly code: FederationErrorCode;
+  readonly details?: unknown;
+
+  constructor(code: FederationErrorCode, message: string, details?: unknown) {
+    super(message);
+    this.name = 'FederationError';
+    this.code = code;
+    this.details = details;
+  }
+
+  /** Serialise to the wire-format error envelope. */
+  toEnvelope(): FederationErrorEnvelope {
+    return {
+      error: {
+        code: this.code,
+        message: this.message,
+        ...(this.details !== undefined ? { details: this.details } : {}),
+      },
+    };
+  }
+}
+
+/** Client cert is missing, invalid, or signed by an untrusted CA. */
+export class FederationUnauthorizedError extends FederationError {
+  constructor(message = 'Unauthorized', details?: unknown) {
+    super('unauthorized', message, details);
+    this.name = 'FederationUnauthorizedError';
+  }
+}
+
+/** Grant is inactive, revoked, or the subject user lacks access to the resource. */
+export class FederationForbiddenError extends FederationError {
+  constructor(message = 'Forbidden', details?: unknown) {
+    super('forbidden', message, details);
+    this.name = 'FederationForbiddenError';
+  }
+}
+
+/** Requested resource does not exist. */
+export class FederationNotFoundError extends FederationError {
+  constructor(message = 'Not found', details?: unknown) {
+    super('not_found', message, details);
+    this.name = 'FederationNotFoundError';
+  }
+}
+
+/** Grant has exceeded its rate limit; Retry-After should accompany this. */
+export class FederationRateLimitedError extends FederationError {
+  constructor(message = 'Rate limit exceeded', details?: unknown) {
+    super('rate_limited', message, details);
+    this.name = 'FederationRateLimitedError';
+  }
+}
+
+/**
+ * The request targets a resource or performs an action that the grant's
+ * scope explicitly disallows (distinct from generic 403 — scope_violation
+ * means the scope configuration itself blocked the request).
+ */
+export class FederationScopeViolationError extends FederationError {
+  constructor(message = 'Scope violation', details?: unknown) {
+    super('scope_violation', message, details);
+    this.name = 'FederationScopeViolationError';
+  }
+}
+
+/** Malformed request — missing fields, invalid cursor, unknown verb, etc. */
+export class FederationInvalidRequestError extends FederationError {
+  constructor(message = 'Invalid request', details?: unknown) {
+    super('invalid_request', message, details);
+    this.name = 'FederationInvalidRequestError';
+  }
+}
+
+/** Unexpected server-side failure. */
+export class FederationInternalError extends FederationError {
+  constructor(message = 'Internal error', details?: unknown) {
+    super('internal_error', message, details);
+    this.name = 'FederationInternalError';
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Typed parser
+// ---------------------------------------------------------------------------
+
+/**
+ * Parse an unknown value as a FederationErrorEnvelope.
+ * Throws a plain Error (not FederationError) when parsing fails — this means
+ * the payload wasn't even a valid error envelope.
+ */
+export function parseFederationErrorEnvelope(input: unknown): FederationErrorEnvelope {
+  const result = FederationErrorEnvelopeSchema.safeParse(input);
+  if (!result.success) {
+    const issues = result.error.issues
+      .map((e) => `  - [${e.path.join('.') || 'root'}] ${e.message}`)
+      .join('\n');
+    throw new Error(`Invalid federation error envelope:\n${issues}`);
+  }
+  return result.data;
+}
--- a/packages/types/src/federation/index.ts
+++ b/packages/types/src/federation/index.ts
@@ -0,0 +1,16 @@
+/**
+ * Federation wire-format DTOs — public barrel.
+ *
+ * Exports everything downstream M3 tasks need:
+ *   verbs.ts      — FEDERATION_VERBS constant + FederationVerb type
+ *   request.ts    — FederationRequestSchema + FederationRequest
+ *   response.ts   — list/get/capabilities schema factories + types
+ *   source-tag.ts — SourceTagSchema, tagWithSource helper
+ *   error.ts      — error envelope schema + typed exception hierarchy
+ */
+
+export * from './verbs.js';
+export * from './request.js';
+export * from './response.js';
+export * from './source-tag.js';
+export * from './error.js';
--- a/packages/types/src/federation/request.ts
+++ b/packages/types/src/federation/request.ts
@@ -0,0 +1,47 @@
+/**
+ * Federation wire-format request schema.
+ *
+ * Source of truth: docs/federation/PRD.md §9 (query model).
+ *
+ * Pure — no NestJS, no DB, no Node-only APIs. Safe for browser/edge.
+ */
+
+import { z } from 'zod';
+import { FEDERATION_VERBS } from './verbs.js';
+
+// ---------------------------------------------------------------------------
+// Query params — free-form key/value pairs passed alongside the request
+// ---------------------------------------------------------------------------
+
+const QueryParamsSchema = z.record(z.string(), z.string()).optional();
+
+// ---------------------------------------------------------------------------
+// Top-level request schema
+// ---------------------------------------------------------------------------
+
+export const FederationRequestSchema = z.object({
+  /**
+   * Verb being invoked. One of the M3 federation verbs.
+   */
+  verb: z.enum(FEDERATION_VERBS),
+
+  /**
+   * Resource path being queried, e.g. "tasks", "notes", "memory".
+   * Forward-slash-separated for sub-resources (e.g. "teams/abc/tasks").
+   */
+  resource: z.string().min(1, { message: 'resource must not be empty' }),
+
+  /**
+   * Optional free-form query params (filters, sort, etc.).
+   * Values are always strings; consumers parse as needed.
+   */
+  params: QueryParamsSchema,
+
+  /**
+   * Opaque pagination cursor returned by a previous list response.
+   * Absent on first page.
+   */
+  cursor: z.string().optional(),
+});
+
+export type FederationRequest = z.infer<typeof FederationRequestSchema>;
--- a/packages/types/src/federation/response.ts
+++ b/packages/types/src/federation/response.ts
@@ -0,0 +1,162 @@
+/**
+ * Federation wire-format response schemas.
+ *
+ * Source of truth: docs/federation/PRD.md §9 and MILESTONES.md §M3.
+ *
+ * DESIGN: Generic factory functions rather than z.lazy
+ * ─────────────────────────────────────────────────────
+ * Zod generic schemas cannot be expressed as a single re-usable `z.ZodType`
+ * value because TypeScript's type system erases the generic at the call site.
+ * The idiomatic Zod v4 pattern is factory functions that take an item schema
+ * and return a fully-typed schema.
+ *
+ *   const MyListSchema = FederationListResponseSchema(z.string());
+ *   type MyList = z.infer<typeof MyListSchema>;
+ *   // => { items: string[]; nextCursor?: string; _partial?: boolean; _truncated?: boolean }
+ *
+ * Downstream consumers (M3-03..M3-07, M3-08, M3-09) should call these
+ * factories once per resource type and cache the result.
+ *
+ * Pure — no NestJS, no DB, no Node-only APIs. Safe for browser/edge.
+ */
+
+import { z } from 'zod';
+
+import { FEDERATION_VERBS } from './verbs.js';
+
+// ---------------------------------------------------------------------------
+// Shared envelope flags
+// ---------------------------------------------------------------------------
+
+/**
+ * `_partial`: true when the response is a subset of available data (e.g. due
+ * to scope intersection reducing the result set).
+ */
+const PartialFlag = z.boolean().optional();
+
+/**
+ * `_truncated`: true when the response was capped by max_rows_per_query and
+ * additional pages exist beyond the current cursor.
+ */
+const TruncatedFlag = z.boolean().optional();
+
+// ---------------------------------------------------------------------------
+// FederationListResponseSchema<T> factory
+// ---------------------------------------------------------------------------
+
+/**
+ * Returns a Zod schema for a paginated federation list envelope.
+ *
+ * @param itemSchema - Zod schema for a single item in the list.
+ *
+ * @example
+ * ```ts
+ * const TaskListSchema = FederationListResponseSchema(TaskSchema);
+ * type TaskList = z.infer<typeof TaskListSchema>;
+ * ```
+ */
+export function FederationListResponseSchema<T extends z.ZodTypeAny>(itemSchema: T) {
+  return z.object({
+    items: z.array(itemSchema),
+    nextCursor: z.string().optional(),
+    _partial: PartialFlag,
+    _truncated: TruncatedFlag,
+  });
+}
+
+export type FederationListResponse<T> = {
+  items: T[];
+  nextCursor?: string;
+  _partial?: boolean;
+  _truncated?: boolean;
+};
+
+// ---------------------------------------------------------------------------
+// FederationGetResponseSchema<T> factory
+// ---------------------------------------------------------------------------
+
+/**
+ * Returns a Zod schema for a single-item federation get envelope.
+ *
+ * `item` is null when the resource was not found (404 equivalent on the wire).
+ *
+ * @param itemSchema - Zod schema for the item (nullable is applied internally).
+ *
+ * @example
+ * ```ts
+ * const TaskGetSchema = FederationGetResponseSchema(TaskSchema);
+ * type TaskGet = z.infer<typeof TaskGetSchema>;
+ * ```
+ */
+export function FederationGetResponseSchema<T extends z.ZodTypeAny>(itemSchema: T) {
+  return z.object({
+    item: itemSchema.nullable(),
+    _partial: PartialFlag,
+  });
+}
+
+export type FederationGetResponse<T> = {
+  item: T | null;
+  _partial?: boolean;
+};
+
+// ---------------------------------------------------------------------------
+// FederationCapabilitiesResponseSchema (fixed shape)
+// ---------------------------------------------------------------------------
+
+/**
+ * Shape mirrors FederationScope (apps/gateway/src/federation/scope-schema.ts)
+ * but is kept separate to avoid coupling packages/types to the gateway module.
+ * The serving side populates this from the resolved grant scope at request time.
+ */
+export const FederationCapabilitiesResponseSchema = z.object({
+  /**
+   * Resources this grant is allowed to query.
+   */
+  resources: z.array(z.string()).nonempty(),
+
+  /**
+   * Resources explicitly blocked for this grant even if they exist.
+   */
+  excluded_resources: z.array(z.string()),
+
+  /**
+   * Per-resource filters (mirrors FederationScope.filters from PRD §8.1).
+   * Keys are resource names; values control team/personal visibility.
+   */
+  filters: z
+    .record(
+      z.string(),
+      z.object({
+        include_teams: z.array(z.string()).optional(),
+        include_personal: z.boolean().optional(),
+      }),
+    )
+    .optional(),
+
+  /**
+   * Hard cap on rows returned per query for this grant.
+   */
+  max_rows_per_query: z.number().int().positive(),
+
+  /**
+   * Verbs currently available. Will expand in M4+ (search).
+   * Closed enum — only values from FEDERATION_VERBS are accepted.
+   */
+  supported_verbs: z.array(z.enum(FEDERATION_VERBS)).nonempty(),
+
+  /**
+   * Rate-limit state for this grant (PRD §9.1).
+   * M4 populates `remaining` and `reset_at`; M3 servers may return only
+   * `limit_per_minute` or omit the field entirely.
+   */
+  rate_limit: z
+    .object({
+      limit_per_minute: z.number().int().positive(),
+      remaining: z.number().int().nonnegative().optional(),
+      reset_at: z.string().datetime().optional(),
+    })
+    .optional(),
+});
+
+export type FederationCapabilitiesResponse = z.infer<typeof FederationCapabilitiesResponseSchema>;
--- a/packages/types/src/federation/source-tag.ts
+++ b/packages/types/src/federation/source-tag.ts
@@ -0,0 +1,61 @@
+/**
+ * _source tag for federation fan-out results.
+ *
+ * Source of truth: docs/federation/PRD.md §9.3 and MILESTONES.md §M3 acceptance test #8.
+ *
+ * When source: "all" is requested, the gateway fans out to local + all active
+ * federated peers, merges results, and tags each item with _source so the
+ * caller knows the provenance.
+ *
+ * Pure — no NestJS, no DB, no Node-only APIs. Safe for browser/edge.
+ */
+
+import { z } from 'zod';
+
+// ---------------------------------------------------------------------------
+// Source tag schema
+// ---------------------------------------------------------------------------
+
+/**
+ * `_source` is either:
+ * - `"local"` — the item came from this gateway's own storage.
+ * - a peer common name (e.g. `"mosaic.uscllc.com"`) — the item came from
+ *   that federated peer.
+ */
+export const SourceTagSchema = z.object({
+  _source: z.string().min(1, { message: '_source must not be empty' }),
+});
+
+export type SourceTag = z.infer<typeof SourceTagSchema>;
+
+/**
+ * Literal union for the well-known local source value.
+ * Peers are identified by hostname strings, so there is no closed enum.
+ */
+export const SOURCE_LOCAL = 'local' as const;
+
+// ---------------------------------------------------------------------------
+// Helper: tagWithSource
+// ---------------------------------------------------------------------------
+
+/**
+ * Stamps each item in `items` with `{ _source: source }`.
+ *
+ * The return type merges the item type with SourceTag so callers get full
+ * type-safety on both the original fields and `_source`.
+ *
+ * @param items  - Array of items to tag.
+ * @param source - Either `"local"` or a peer hostname (common name from the
+ *                 client cert's CN or O field).
+ *
+ * @example
+ * ```ts
+ * const local = tagWithSource([{ id: '1', title: 'Task' }], 'local');
+ * // => [{ id: '1', title: 'Task', _source: 'local' }]
+ *
+ * const remote = tagWithSource(peerItems, 'mosaic.uscllc.com');
+ * ```
+ */
+export function tagWithSource<T extends object>(items: T[], source: string): Array<T & SourceTag> {
+  return items.map((item) => ({ ...item, _source: source }));
+}
--- a/packages/types/src/federation/verbs.ts
+++ b/packages/types/src/federation/verbs.ts
@@ -0,0 +1,11 @@
+/**
+ * Federation verb constants and types.
+ *
+ * Source of truth: docs/federation/PRD.md §9.1
+ *
+ * M3 ships list, get, capabilities. search lives in M4.
+ */
+
+export const FEDERATION_VERBS = ['list', 'get', 'capabilities'] as const;
+
+export type FederationVerb = (typeof FEDERATION_VERBS)[number];
--- a/packages/types/src/index.ts
+++ b/packages/types/src/index.ts
@@ -5,3 +5,4 @@ export * from './agent/index.js';
 export * from './provider/index.js';
 export * from './routing/index.js';
 export * from './commands/index.js';
+export * from './federation/index.js';
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -179,6 +179,9 @@ importers:
      socket.io:
        specifier: ^4.8.0
        version: 4.8.3
+      undici:
+        specifier: ^7.24.6
+        version: 7.24.6
      uuid:
        specifier: ^11.0.0
        version: 11.1.0
@@ -679,6 +682,9 @@ importers:
      class-validator:
        specifier: ^0.15.1
        version: 0.15.1
+      zod:
+        specifier: ^4.3.6
+        version: 4.3.6
    devDependencies:
      typescript:
        specifier: ^5.8.0
@@ -6990,10 +6996,6 @@ packages:
    resolution: {integrity: sha512-gBLkYIlEnSp8pFbT64yFgGE6UIB9tAkhukC23PmMDCe5Nd+cRqKxSjw5y54MK2AZMgZfJWMaNE4nYUHgi1XEOw==}
    engines: {node: '>=18.17'}

-  undici@7.24.3:
-    resolution: {integrity: sha512-eJdUmK/Wrx2d+mnWWmwwLRyA7OQCkLap60sk3dOK4ViZR7DKwwptwuIvFBg2HaiP9ESaEdhtpSymQPvytpmkCA==}
-    engines: {node: '>=20.18.1'}
-
  undici@7.24.6:
    resolution: {integrity: sha512-Xi4agocCbRzt0yYMZGMA6ApD7gvtUFaxm4ZmeacWI4cZxaF6C+8I8QfofC20NAePiB/IcvZmzkJ7XPa471AEtA==}
    engines: {node: '>=20.18.1'}
@@ -8728,7 +8730,7 @@ snapshots:
      openai: 6.26.0(ws@8.20.0)(zod@4.3.6)
      partial-json: 0.1.7
      proxy-agent: 6.5.0
-      undici: 7.24.3
+      undici: 7.24.6
      zod-to-json-schema: 3.25.1(zod@4.3.6)
    transitivePeerDependencies:
      - '@modelcontextprotocol/sdk'
@@ -12587,7 +12589,7 @@ snapshots:
      saxes: 6.0.0
      symbol-tree: 3.2.4
      tough-cookie: 6.0.1
-      undici: 7.24.3
+      undici: 7.24.6
      w3c-xmlserializer: 5.0.0
      webidl-conversions: 8.0.1
      whatwg-mimetype: 5.0.0
@@ -14438,8 +14440,6 @@ snapshots:

  undici@6.21.3: {}

-  undici@7.24.3: {}
-
  undici@7.24.6: {}

  unhomoglyph@1.0.6: {}
--- a/tools/federation-harness/README.md
+++ b/tools/federation-harness/README.md
@@ -0,0 +1,254 @@
+# Federation Test Harness
+
+Local two-gateway federation test infrastructure for Mosaic Stack M3+.
+
+This harness boots two real gateway instances (`gateway-a`, `gateway-b`) on a
+shared Docker bridge network, each backed by its own Postgres (pgvector) +
+Valkey, sharing a single Step-CA. It is the test bed for all M3+ federation
+E2E tests.
+
+## Prerequisites
+
+- Docker with Compose v2 (`docker compose version` ≥ 2.20)
+- pnpm (for running via repo scripts)
+- `infra/step-ca/dev-password` must exist (copy from `infra/step-ca/dev-password.example`)
+
+## Network Topology
+
+```
+Host machine
+├── localhost:14001  →  gateway-a   (Server A — home / requesting)
+├── localhost:14002  →  gateway-b   (Server B — work / serving)
+├── localhost:15432  →  postgres-a
+├── localhost:15433  →  postgres-b
+├── localhost:16379  →  valkey-a
+├── localhost:16380  →  valkey-b
+└── localhost:19000  →  step-ca     (shared CA)
+
+Docker network: fed-test-net (bridge)
+  gateway-a ←──── mTLS ────→ gateway-b
+             ↘             ↗
+               step-ca
+```
+
+Ports are chosen to avoid collision with the base dev stack (5433, 6380, 14242, 9000).
+
+## Starting the Harness
+
+```bash
+# From repo root
+docker compose -f tools/federation-harness/docker-compose.two-gateways.yml up -d
+
+# Wait for all services to be healthy (~60-90s on first boot due to NestJS cold start)
+docker compose -f tools/federation-harness/docker-compose.two-gateways.yml ps
+```
+
+## Seeding Test Data
+
+The seed script provisions three grant scope variants (A, B, C) and walks the
+full enrollment flow so Server A ends up with active peers pointing at Server B.
+
+```bash
+# Assumes stack is already running
+pnpm tsx tools/federation-harness/seed.ts
+
+# Or boot + seed in one step
+pnpm tsx tools/federation-harness/seed.ts --boot
+```
+
+### Scope Variants
+
+| Variant | Resources          | Filters                            | Excluded    | Purpose                         |
+| ------- | ------------------ | ---------------------------------- | ----------- | ------------------------------- |
+| A       | tasks, notes       | include_personal: true             | (none)      | Personal data federation        |
+| B       | tasks              | include_teams: ['T1'], no personal | (none)      | Team-scoped, no personal        |
+| C       | tasks, credentials | include_personal: true             | credentials | Sanity: excluded wins over list |
+
+## Using from Vitest
+
+```ts
+import {
+  bootHarness,
+  tearDownHarness,
+  serverA,
+  serverB,
+  seed,
+} from '../../tools/federation-harness/harness.js';
+import type { HarnessHandle } from '../../tools/federation-harness/harness.js';
+
+let handle: HarnessHandle;
+
+beforeAll(async () => {
+  handle = await bootHarness();
+}, 180_000); // allow 3 min for Docker pull + NestJS cold start
+
+afterAll(async () => {
+  await tearDownHarness(handle);
+});
+
+test('variant A: list tasks returns personal tasks', async () => {
+  // NOTE: Only 'all' is supported for now — per-variant narrowing is M3-11.
+  const seedResult = await seed(handle, 'all');
+  const a = serverA(handle);
+
+  const res = await fetch(`${a.baseUrl}/api/federation/tasks`, {
+    headers: { 'x-federation-grant': seedResult.grants.variantA.id },
+  });
+  expect(res.status).toBe(200);
+});
+```
+
+> **Note:** `seed()` bootstraps a fresh admin user on each gateway via
+> `POST /api/bootstrap/setup`. Both gateways must have zero users (pristine DB).
+> If either gateway already has users, `seed()` throws with a clear error.
+> Reset state with `docker compose down -v`.
+
+The `bootHarness()` function is **idempotent**: if both gateways are already
+healthy, it reuses the running stack and returns `ownedStack: false`. Tests
+should not call `tearDownHarness` when `ownedStack` is false unless they
+explicitly want to shut down a shared stack.
+
+## Vitest Config (pnpm test:federation)
+
+Add to `vitest.config.ts` at repo root (or a dedicated config):
+
+```ts
+// vitest.federation.config.ts
+import { defineConfig } from 'vitest/config';
+
+export default defineConfig({
+  test: {
+    include: ['**/*.federation.test.ts'],
+    testTimeout: 60_000,
+    hookTimeout: 180_000,
+    reporters: ['verbose'],
+  },
+});
+```
+
+Then add to root `package.json`:
+
+```json
+"test:federation": "vitest run --config vitest.federation.config.ts"
+```
+
+## Nuking State
+
+```bash
+# Remove containers AND volumes (ephemeral state — CA keys, DBs, everything)
+docker compose -f tools/federation-harness/docker-compose.two-gateways.yml down -v
+```
+
+On next `up`, Step-CA re-initialises from scratch and generates new CA keys.
+
+## Step-CA Root Certificate
+
+The CA root lives in the `fed-harness-step-ca` Docker volume at
+`/home/step/certs/root_ca.crt`. To extract it to the host:
+
+```bash
+docker run --rm \
+  -v fed-harness-step-ca:/home/step \
+  alpine cat /home/step/certs/root_ca.crt > /tmp/fed-harness-root-ca.crt
+```
+
+## Troubleshooting
+
+### Port conflicts
+
+Default host ports: 14001, 14002, 15432, 15433, 16379, 16380, 19000.
+Override via environment variables before `docker compose up`:
+
+```bash
+GATEWAY_A_HOST_PORT=14101 GATEWAY_B_HOST_PORT=14102 \
+  docker compose -f tools/federation-harness/docker-compose.two-gateways.yml up -d
+```
+
+### Image pull failures
+
+The gateway image is digest-pinned to:
+
+```
+git.mosaicstack.dev/mosaicstack/stack/gateway@sha256:1069117740e00ccfeba357cae38c43f3729fe5ae702740ce474f6512414d7c02
+```
+
+(sha-9f1a081, post-#491 IMG-FIX)
+
+If the registry is unreachable, Docker will use the locally cached image if
+present. If no local image exists, the compose up will fail with a pull error.
+In that case:
+
+1. Ensure you can reach `git.mosaicstack.dev` (VPN, DNS, etc.).
+2. Log in: `docker login git.mosaicstack.dev`
+3. Pull manually: `docker pull git.mosaicstack.dev/mosaicstack/stack/gateway@sha256:1069117740e00ccfeba357cae38c43f3729fe5ae702740ce474f6512414d7c02`
+
+### NestJS cold start
+
+Gateway containers take 40–60 seconds to become healthy on first boot (Node.js
+module resolution + NestJS DI bootstrap). The `start_period: 60s` in the
+compose healthcheck covers this. `bootHarness()` polls for up to 3 minutes.
+
+### Step-CA startup
+
+Step-CA initialises on first boot (generates CA keys). This takes ~5-10s.
+The `start_period: 30s` in the healthcheck covers it. Both gateways wait for
+Step-CA to be healthy before starting (`depends_on: step-ca: condition: service_healthy`).
+
+### dev-password missing
+
+The Step-CA container requires `infra/step-ca/dev-password` to be mounted.
+Copy the example and set a local password:
+
+```bash
+cp infra/step-ca/dev-password.example infra/step-ca/dev-password
+# Edit the file to set your preferred dev CA password
+```
+
+The file is `.gitignore`d — do not commit it.
+
+## Image Digest Note
+
+The gateway image is pinned to `sha256:1069117740e00ccfeba357cae38c43f3729fe5ae702740ce474f6512414d7c02`
+(sha-9f1a081). This is the digest promoted by PR #491 (IMG-FIX). The `latest`
+tag is forbidden per Mosaic image policy. When a new gateway build is promoted,
+update the digest in `docker-compose.two-gateways.yml` and in this file.
+
+## Known Limitations
+
+### BETTER_AUTH_URL enrollment URL bug (upstream production code — not yet fixed)
+
+`apps/gateway/src/federation/federation.controller.ts:145` constructs the
+enrollment URL using `process.env['BETTER_AUTH_URL'] ?? 'http://localhost:14242'`.
+This is an upstream bug: `BETTER_AUTH_URL` is the Better Auth origin (typically
+the web app), not the gateway's own base URL. In non-harness deployments this
+produces an enrollment URL pointing to the wrong host or port.
+
+**How the harness handles this:**
+
+1. **In-cluster calls (container-to-container):** The compose file sets
+   `BETTER_AUTH_URL: 'http://gateway-b:3000'` so the enrollment URL returned by
+   the gateway uses the Docker internal hostname. This lets other containers in the
+   `fed-test-net` network resolve and reach Server B's enrollment endpoint.
+
+2. **Host-side URL rewrite (seed script):** The `seed.ts` script runs on the host
+   machine where `gateway-b` is not a resolvable hostname. Before calling
+   `fetch(enrollmentUrl, ...)`, the seed script rewrites the URL: it extracts only
+   the token path segment from `enrollmentUrl` and reassembles the URL using the
+   host-accessible `serverBUrl` (default: `http://localhost:14002`). This lets the
+   seed script redeem enrollment tokens from the host without being affected by the
+   in-cluster hostname in the returned URL.
+
+**TODO:** Fix `federation.controller.ts` to derive the enrollment URL from its own
+listening address (e.g. `GATEWAY_BASE_URL` env var or a dedicated
+`FEDERATION_ENROLLMENT_BASE_URL` env var) rather than reusing `BETTER_AUTH_URL`.
+Tracked as a follow-up to PR #505 — do not bundle with harness changes.
+
+## Permanent Infrastructure
+
+This harness is designed to outlive M3 and be reused by M4+ milestone tests.
+It is not a throwaway scaffold — treat it as production test infrastructure:
+
+- Keep it idempotent.
+- Do not hardcode test assumptions in the harness layer (put them in tests).
+- Update the seed script when new scope variants are needed.
+- The README and harness should be kept in sync as the federation API evolves.
--- a/tools/federation-harness/docker-compose.two-gateways.yml
+++ b/tools/federation-harness/docker-compose.two-gateways.yml
@@ -0,0 +1,247 @@
+# tools/federation-harness/docker-compose.two-gateways.yml
+#
+# Two-gateway federation test harness — local-only, no Portainer/Swarm needed.
+#
+# USAGE (manual):
+#   docker compose -f tools/federation-harness/docker-compose.two-gateways.yml up -d
+#   docker compose -f tools/federation-harness/docker-compose.two-gateways.yml down -v
+#
+# USAGE (from harness.ts):
+#   const handle = await bootHarness();
+#   ...
+#   await tearDownHarness(handle);
+#
+# TOPOLOGY:
+#   gateway-a  — "home" instance (Server A, the requesting side)
+#     └── postgres-a  (pgvector/pg17, port 15432)
+#     └── valkey-a    (port 16379)
+#   gateway-b  — "work" instance (Server B, the serving side)
+#     └── postgres-b  (pgvector/pg17, port 15433)
+#     └── valkey-b    (port 16380)
+#   step-ca    — shared CA for both gateways (port 19000)
+#
+# All services share the `fed-test-net` bridge network.
+# Host port ranges (15432-15433, 16379-16380, 14001-14002, 19000) are chosen
+# to avoid collision with the base dev stack (5433, 6380, 14242, 9000).
+#
+# IMAGE:
+#   Pinned to the immutable digest sha256:1069117740e00ccfeba357cae38c43f3729fe5ae702740ce474f6512414d7c02
+#   (sha-9f1a081, post-#491 IMG-FIX, smoke-tested locally).
+#   Update this digest only after a new CI build is promoted to the registry.
+#
+# STEP-CA:
+#   Single shared Step-CA instance. Both gateways connect to it.
+#   CA volume is ephemeral per `docker compose down -v`; regenerated on next up.
+#   The harness seed script provisions the CA roots cross-trust after first boot.
+
+services:
+  # ─── Shared Certificate Authority ────────────────────────────────────────────
+  step-ca:
+    image: smallstep/step-ca:0.27.4
+    container_name: fed-harness-step-ca
+    restart: unless-stopped
+    ports:
+      - '${STEP_CA_HOST_PORT:-19000}:9000'
+    volumes:
+      - step_ca_data:/home/step
+      - ../../infra/step-ca/init.sh:/usr/local/bin/mosaic-step-ca-init.sh:ro
+      - ../../infra/step-ca/templates:/etc/step-ca-templates:ro
+      - ../../infra/step-ca/dev-password:/run/secrets/ca_password:ro
+    entrypoint: ['/bin/sh', '/usr/local/bin/mosaic-step-ca-init.sh']
+    networks:
+      - fed-test-net
+    healthcheck:
+      test:
+        [
+          'CMD',
+          'step',
+          'ca',
+          'health',
+          '--ca-url',
+          'https://localhost:9000',
+          '--root',
+          '/home/step/certs/root_ca.crt',
+        ]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+      start_period: 30s
+
+  # ─── Server A — Home / Requesting Gateway ────────────────────────────────────
+  postgres-a:
+    image: pgvector/pgvector:pg17
+    container_name: fed-harness-postgres-a
+    restart: unless-stopped
+    ports:
+      - '${PG_A_HOST_PORT:-15432}:5432'
+    environment:
+      POSTGRES_USER: mosaic
+      POSTGRES_PASSWORD: mosaic
+      POSTGRES_DB: mosaic
+    volumes:
+      - pg_a_data:/var/lib/postgresql/data
+      - ../../infra/pg-init:/docker-entrypoint-initdb.d:ro
+    networks:
+      - fed-test-net
+    healthcheck:
+      test: ['CMD-SHELL', 'pg_isready -U mosaic']
+      interval: 5s
+      timeout: 3s
+      retries: 5
+
+  valkey-a:
+    image: valkey/valkey:8-alpine
+    container_name: fed-harness-valkey-a
+    restart: unless-stopped
+    ports:
+      - '${VALKEY_A_HOST_PORT:-16379}:6379'
+    volumes:
+      - valkey_a_data:/data
+    networks:
+      - fed-test-net
+    healthcheck:
+      test: ['CMD', 'valkey-cli', 'ping']
+      interval: 5s
+      timeout: 3s
+      retries: 5
+
+  gateway-a:
+    image: git.mosaicstack.dev/mosaicstack/stack/gateway@sha256:1069117740e00ccfeba357cae38c43f3729fe5ae702740ce474f6512414d7c02
+    # Tag for human reference: sha-9f1a081 (post-#491 IMG-FIX; smoke-tested locally)
+    container_name: fed-harness-gateway-a
+    restart: unless-stopped
+    ports:
+      - '${GATEWAY_A_HOST_PORT:-14001}:3000'
+    environment:
+      MOSAIC_TIER: federated
+      DATABASE_URL: postgres://mosaic:mosaic@postgres-a:5432/mosaic
+      VALKEY_URL: redis://valkey-a:6379
+      GATEWAY_PORT: '3000'
+      GATEWAY_CORS_ORIGIN: 'http://localhost:14001'
+      BETTER_AUTH_SECRET: harness-secret-server-a-do-not-use-in-prod
+      BETTER_AUTH_URL: 'http://gateway-a:3000'
+      STEP_CA_URL: 'https://step-ca:9000'
+      FEDERATION_PEER_HOSTNAME: gateway-a
+      # Bootstrap password for POST /api/bootstrap/setup — used by seed.ts to create
+      # the first admin user. Only valid on a pristine (zero-user) database.
+      # Not the same as ADMIN_API_KEY — there is no static API key in the gateway.
+      ADMIN_BOOTSTRAP_PASSWORD: harness-admin-password-a
+    depends_on:
+      postgres-a:
+        condition: service_healthy
+      valkey-a:
+        condition: service_healthy
+      step-ca:
+        condition: service_healthy
+    networks:
+      - fed-test-net
+    healthcheck:
+      test:
+        [
+          'CMD',
+          'node',
+          '-e',
+          "require('http').get('http://127.0.0.1:3000/api/health', r => process.exit(r.statusCode === 200 ? 0 : 1)).on('error', () => process.exit(1))",
+        ]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+      start_period: 60s
+
+  # ─── Server B — Work / Serving Gateway ──────────────────────────────────────
+  postgres-b:
+    image: pgvector/pgvector:pg17
+    container_name: fed-harness-postgres-b
+    restart: unless-stopped
+    ports:
+      - '${PG_B_HOST_PORT:-15433}:5432'
+    environment:
+      POSTGRES_USER: mosaic
+      POSTGRES_PASSWORD: mosaic
+      POSTGRES_DB: mosaic
+    volumes:
+      - pg_b_data:/var/lib/postgresql/data
+      - ../../infra/pg-init:/docker-entrypoint-initdb.d:ro
+    networks:
+      - fed-test-net
+    healthcheck:
+      test: ['CMD-SHELL', 'pg_isready -U mosaic']
+      interval: 5s
+      timeout: 3s
+      retries: 5
+
+  valkey-b:
+    image: valkey/valkey:8-alpine
+    container_name: fed-harness-valkey-b
+    restart: unless-stopped
+    ports:
+      - '${VALKEY_B_HOST_PORT:-16380}:6379'
+    volumes:
+      - valkey_b_data:/data
+    networks:
+      - fed-test-net
+    healthcheck:
+      test: ['CMD', 'valkey-cli', 'ping']
+      interval: 5s
+      timeout: 3s
+      retries: 5
+
+  gateway-b:
+    image: git.mosaicstack.dev/mosaicstack/stack/gateway@sha256:1069117740e00ccfeba357cae38c43f3729fe5ae702740ce474f6512414d7c02
+    # Tag for human reference: sha-9f1a081 (post-#491 IMG-FIX; smoke-tested locally)
+    container_name: fed-harness-gateway-b
+    restart: unless-stopped
+    ports:
+      - '${GATEWAY_B_HOST_PORT:-14002}:3000'
+    environment:
+      MOSAIC_TIER: federated
+      DATABASE_URL: postgres://mosaic:mosaic@postgres-b:5432/mosaic
+      VALKEY_URL: redis://valkey-b:6379
+      GATEWAY_PORT: '3000'
+      GATEWAY_CORS_ORIGIN: 'http://localhost:14002'
+      BETTER_AUTH_SECRET: harness-secret-server-b-do-not-use-in-prod
+      BETTER_AUTH_URL: 'http://gateway-b:3000'
+      STEP_CA_URL: 'https://step-ca:9000'
+      FEDERATION_PEER_HOSTNAME: gateway-b
+      # Bootstrap password for POST /api/bootstrap/setup — used by seed.ts to create
+      # the first admin user. Only valid on a pristine (zero-user) database.
+      # Not the same as ADMIN_API_KEY — there is no static API key in the gateway.
+      ADMIN_BOOTSTRAP_PASSWORD: harness-admin-password-b
+    depends_on:
+      postgres-b:
+        condition: service_healthy
+      valkey-b:
+        condition: service_healthy
+      step-ca:
+        condition: service_healthy
+    networks:
+      - fed-test-net
+    healthcheck:
+      test:
+        [
+          'CMD',
+          'node',
+          '-e',
+          "require('http').get('http://127.0.0.1:3000/api/health', r => process.exit(r.statusCode === 200 ? 0 : 1)).on('error', () => process.exit(1))",
+        ]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+      start_period: 60s
+
+networks:
+  fed-test-net:
+    name: fed-test-net
+    driver: bridge
+
+volumes:
+  step_ca_data:
+    name: fed-harness-step-ca
+  pg_a_data:
+    name: fed-harness-pg-a
+  valkey_a_data:
+    name: fed-harness-valkey-a
+  pg_b_data:
+    name: fed-harness-pg-b
+  valkey_b_data:
+    name: fed-harness-valkey-b
--- a/tools/federation-harness/harness.ts
+++ b/tools/federation-harness/harness.ts
@@ -0,0 +1,290 @@
+/**
+ * tools/federation-harness/harness.ts
+ *
+ * Vitest-consumable helpers for the two-gateway federation harness.
+ *
+ * USAGE (in a vitest test file):
+ *
+ *   import { bootHarness, tearDownHarness, serverA, serverB, seed } from
+ *     '../../tools/federation-harness/harness.js';
+ *
+ *   let handle: HarnessHandle;
+ *
+ *   beforeAll(async () => {
+ *     handle = await bootHarness();
+ *   }, 180_000);
+ *
+ *   afterAll(async () => {
+ *     await tearDownHarness(handle);
+ *   });
+ *
+ *   test('variant A — list tasks', async () => {
+ *     const seedResult = await seed(handle, 'all');
+ *     const a = serverA(handle);
+ *     const res = await fetch(`${a.baseUrl}/api/federation/list/tasks`, {
+ *       headers: { Authorization: `Bearer ${seedResult.adminTokenA}` },
+ *     });
+ *     expect(res.status).toBe(200);
+ *   });
+ *
+ * NOTE: The `seed()` helper currently only supports scenario='all'. Passing any
+ * other value throws immediately. Per-variant narrowing is deferred to M3-11.
+ *
+ * ESM / NodeNext: all imports use .js extensions.
+ */
+
+import { execSync, execFileSync } from 'node:child_process';
+import { resolve, dirname } from 'node:path';
+import { fileURLToPath } from 'node:url';
+import { runSeed, type SeedResult } from './seed.js';
+
+// ─── Types ───────────────────────────────────────────────────────────────────
+
+export interface GatewayAccessor {
+  /** Base URL reachable from the host machine, e.g. http://localhost:14001 */
+  baseUrl: string;
+  /** Bootstrap password used for POST /api/bootstrap/setup on a pristine gateway */
+  bootstrapPassword: string;
+  /** Internal Docker network hostname (for container-to-container calls) */
+  internalHostname: string;
+}
+
+export interface HarnessHandle {
+  /** Server A accessor */
+  a: GatewayAccessor;
+  /** Server B accessor */
+  b: GatewayAccessor;
+  /** Absolute path to the docker-compose file */
+  composeFile: string;
+  /** Whether this instance booted the stack (vs. reusing an existing one) */
+  ownedStack: boolean;
+  /** Optional seed result if seed() was called */
+  seedResult?: SeedResult;
+}
+
+/**
+ * Scenario to seed. Currently only 'all' is implemented; per-variant narrowing
+ * is tracked as M3-11. Passing any other value throws immediately with a clear
+ * error rather than silently over-seeding.
+ */
+export type SeedScenario = 'variantA' | 'variantB' | 'variantC' | 'all';
+
+// ─── Constants ────────────────────────────────────────────────────────────────
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const COMPOSE_FILE = resolve(__dirname, 'docker-compose.two-gateways.yml');
+
+const GATEWAY_A_URL = process.env['GATEWAY_A_URL'] ?? 'http://localhost:14001';
+const GATEWAY_B_URL = process.env['GATEWAY_B_URL'] ?? 'http://localhost:14002';
+const ADMIN_BOOTSTRAP_PASSWORD_A =
+  process.env['ADMIN_BOOTSTRAP_PASSWORD_A'] ?? 'harness-admin-password-a';
+const ADMIN_BOOTSTRAP_PASSWORD_B =
+  process.env['ADMIN_BOOTSTRAP_PASSWORD_B'] ?? 'harness-admin-password-b';
+
+const READINESS_TIMEOUT_MS = 180_000;
+const READINESS_POLL_MS = 3_000;
+
+// ─── Internal helpers ─────────────────────────────────────────────────────────
+
+async function isGatewayHealthy(baseUrl: string): Promise<boolean> {
+  try {
+    const res = await fetch(`${baseUrl}/api/health`, { signal: AbortSignal.timeout(5_000) });
+    return res.ok;
+  } catch {
+    return false;
+  }
+}
+
+/**
+ * Poll both gateways in parallel until both are healthy or the shared deadline
+ * expires. Polling in parallel (rather than sequentially) avoids the bug where
+ * a slow gateway-a consumes all of the readiness budget before gateway-b is
+ * checked.
+ */
+async function waitForStack(handle: HarnessHandle): Promise<void> {
+  const gateways: Array<{ label: string; url: string }> = [
+    { label: 'gateway-a', url: handle.a.baseUrl },
+    { label: 'gateway-b', url: handle.b.baseUrl },
+  ];
+
+  await Promise.all(
+    gateways.map(async (gw) => {
+      // Each gateway gets its own independent deadline.
+      const deadline = Date.now() + READINESS_TIMEOUT_MS;
+      process.stdout.write(`[harness] Waiting for ${gw.label}...`);
+
+      while (Date.now() < deadline) {
+        if (await isGatewayHealthy(gw.url)) {
+          process.stdout.write(` ready\n`);
+          return;
+        }
+        if (Date.now() + READINESS_POLL_MS > deadline) {
+          throw new Error(
+            `[harness] ${gw.label} did not become healthy within ${READINESS_TIMEOUT_MS.toString()}ms`,
+          );
+        }
+        await new Promise((r) => setTimeout(r, READINESS_POLL_MS));
+        process.stdout.write('.');
+      }
+
+      throw new Error(
+        `[harness] ${gw.label} did not become healthy within ${READINESS_TIMEOUT_MS.toString()}ms`,
+      );
+    }),
+  );
+}
+
+function isStackRunning(): boolean {
+  try {
+    const output = execFileSync(
+      'docker',
+      ['compose', '-f', COMPOSE_FILE, 'ps', '--format', 'json'],
+      { encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] },
+    );
+
+    if (!output.trim()) return false;
+
+    // Parse JSON lines — each running service emits a JSON object per line
+    const lines = output.trim().split('\n').filter(Boolean);
+    const runningServices = lines.filter((line) => {
+      try {
+        const obj = JSON.parse(line) as { State?: string };
+        return obj.State === 'running';
+      } catch {
+        return false;
+      }
+    });
+
+    // Expect at least gateway-a and gateway-b running
+    return runningServices.length >= 2;
+  } catch {
+    return false;
+  }
+}
+
+// ─── Public API ───────────────────────────────────────────────────────────────
+
+/**
+ * Boot the harness stack.
+ *
+ * Idempotent: if the stack is already running and both gateways are healthy,
+ * this function reuses the existing stack and returns a handle with
+ * `ownedStack: false`. Callers that set `ownedStack: false` should NOT call
+ * `tearDownHarness` unless they explicitly want to tear down a pre-existing stack.
+ *
+ * If the stack is not running, it starts it with `docker compose up -d` and
+ * waits for both gateways to pass their /api/health probe.
+ */
+export async function bootHarness(): Promise<HarnessHandle> {
+  const handle: HarnessHandle = {
+    a: {
+      baseUrl: GATEWAY_A_URL,
+      bootstrapPassword: ADMIN_BOOTSTRAP_PASSWORD_A,
+      internalHostname: 'gateway-a',
+    },
+    b: {
+      baseUrl: GATEWAY_B_URL,
+      bootstrapPassword: ADMIN_BOOTSTRAP_PASSWORD_B,
+      internalHostname: 'gateway-b',
+    },
+    composeFile: COMPOSE_FILE,
+    ownedStack: false,
+  };
+
+  // Check if both gateways are already healthy
+  const [aHealthy, bHealthy] = await Promise.all([
+    isGatewayHealthy(handle.a.baseUrl),
+    isGatewayHealthy(handle.b.baseUrl),
+  ]);
+
+  if (aHealthy && bHealthy) {
+    console.log('[harness] Stack already running — reusing existing stack.');
+    handle.ownedStack = false;
+    return handle;
+  }
+
+  console.log('[harness] Starting federation harness stack...');
+  execSync(`docker compose -f "${COMPOSE_FILE}" up -d`, { stdio: 'inherit' });
+  handle.ownedStack = true;
+
+  await waitForStack(handle);
+  console.log('[harness] Stack is ready.');
+
+  return handle;
+}
+
+/**
+ * Tear down the harness stack.
+ *
+ * Runs `docker compose down -v` to remove containers AND volumes (ephemeral state).
+ * Only tears down if `handle.ownedStack` is true unless `force` is set.
+ */
+export async function tearDownHarness(
+  handle: HarnessHandle,
+  opts?: { force?: boolean },
+): Promise<void> {
+  if (!handle.ownedStack && !opts?.force) {
+    console.log(
+      '[harness] Stack not owned by this handle — skipping teardown (pass force: true to override).',
+    );
+    return;
+  }
+
+  console.log('[harness] Tearing down federation harness stack...');
+  execSync(`docker compose -f "${handle.composeFile}" down -v`, { stdio: 'inherit' });
+  console.log('[harness] Stack torn down.');
+}
+
+/**
+ * Return the Server A accessor from a harness handle.
+ * Convenience wrapper for test readability.
+ */
+export function serverA(handle: HarnessHandle): GatewayAccessor {
+  return handle.a;
+}
+
+/**
+ * Return the Server B accessor from a harness handle.
+ * Convenience wrapper for test readability.
+ */
+export function serverB(handle: HarnessHandle): GatewayAccessor {
+  return handle.b;
+}
+
+/**
+ * Seed the harness with test data for one or more scenarios.
+ *
+ * @param handle   The harness handle returned by bootHarness().
+ * @param scenario Which scope variants to provision. Currently only 'all' is
+ *                 supported — passing any other value throws immediately with a
+ *                 clear error. Per-variant narrowing is tracked as M3-11.
+ *
+ * Returns a SeedResult with grant IDs, peer IDs, and admin tokens for each
+ * gateway, which test assertions can reference.
+ *
+ * IMPORTANT: The harness assumes a pristine database on both gateways. The seed
+ * bootstraps an admin user on each gateway via POST /api/bootstrap/setup. If
+ * either gateway already has users, seed() throws with a clear error message.
+ * Run 'docker compose down -v' to reset state.
+ */
+export async function seed(
+  handle: HarnessHandle,
+  scenario: SeedScenario = 'all',
+): Promise<SeedResult> {
+  if (scenario !== 'all') {
+    throw new Error(
+      `seed: scenario narrowing not yet implemented; pass "all" for now. ` +
+        `Got: "${scenario}". Per-variant narrowing is tracked as M3-11.`,
+    );
+  }
+
+  const result = await runSeed({
+    serverAUrl: handle.a.baseUrl,
+    serverBUrl: handle.b.baseUrl,
+    adminBootstrapPasswordA: handle.a.bootstrapPassword,
+    adminBootstrapPasswordB: handle.b.bootstrapPassword,
+  });
+
+  handle.seedResult = result;
+  return result;
+}
--- a/tools/federation-harness/seed.ts
+++ b/tools/federation-harness/seed.ts
@@ -0,0 +1,603 @@
+#!/usr/bin/env tsx
+/**
+ * tools/federation-harness/seed.ts
+ *
+ * Provisions test data for the two-gateway federation harness.
+ * Run via: tsx tools/federation-harness/seed.ts
+ *
+ * What this script does:
+ *  1. (Optional) Boots the compose stack if --boot flag is passed.
+ *  2. Waits for both gateways to be healthy.
+ *  3. Bootstraps an admin user + token on each gateway via POST /api/bootstrap/setup.
+ *  4. Creates three grants on Server B matching the M3 acceptance test scenarios:
+ *     - Scope variant A: tasks + notes, include_personal: true
+ *     - Scope variant B: tasks only, include_teams: ['T1'], exclude T2
+ *     - Scope variant C: tasks + credentials in resources, credentials excluded (sanity)
+ *  5. For each grant, walks the full enrollment flow:
+ *       a. Server B creates a peer keypair (represents the requesting side).
+ *       b. Server B creates the grant referencing that peer.
+ *       c. Server B issues an enrollment token.
+ *       d. Server A creates its own peer keypair (represents its view of B).
+ *       e. Server A redeems the enrollment token at Server B's enrollment endpoint,
+ *          submitting A's CSR → receives signed cert back.
+ *       f. Server A stores the cert on its peer record → peer becomes active.
+ *  6. Inserts representative test tasks/notes/credentials on Server B.
+ *
+ * IMPORTANT: This script uses the real admin REST API — no direct DB writes.
+ * It exercises the full enrollment flow as M3 acceptance tests will.
+ *
+ * ESM / NodeNext: all imports use .js extensions.
+ */
+
+import { execSync } from 'node:child_process';
+import { resolve, dirname } from 'node:path';
+import { fileURLToPath } from 'node:url';
+
+// ─── Constants ───────────────────────────────────────────────────────────────
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const COMPOSE_FILE = resolve(__dirname, 'docker-compose.two-gateways.yml');
+
+/** Base URLs as seen from the host machine (mapped host ports). */
+const SERVER_A_URL = process.env['GATEWAY_A_URL'] ?? 'http://localhost:14001';
+const SERVER_B_URL = process.env['GATEWAY_B_URL'] ?? 'http://localhost:14002';
+
+/**
+ * Bootstrap passwords used when calling POST /api/bootstrap/setup on each
+ * gateway. Each gateway starts with zero users and requires a one-time setup
+ * call before any admin-guarded endpoints can be used.
+ */
+const ADMIN_BOOTSTRAP_PASSWORD_A =
+  process.env['ADMIN_BOOTSTRAP_PASSWORD_A'] ?? 'harness-admin-password-a';
+const ADMIN_BOOTSTRAP_PASSWORD_B =
+  process.env['ADMIN_BOOTSTRAP_PASSWORD_B'] ?? 'harness-admin-password-b';
+
+const READINESS_TIMEOUT_MS = 120_000;
+const READINESS_POLL_MS = 3_000;
+
+// ─── Scope variant definitions (for M3 acceptance tests) ─────────────────────
+
+/** Scope variant A — tasks + notes, personal data included. */
+export const SCOPE_VARIANT_A = {
+  resources: ['tasks', 'notes'],
+  filters: {
+    tasks: { include_personal: true },
+    notes: { include_personal: true },
+  },
+  excluded_resources: [] as string[],
+  max_rows_per_query: 500,
+};
+
+/** Scope variant B — tasks only, team T1 only, no personal. */
+export const SCOPE_VARIANT_B = {
+  resources: ['tasks'],
+  filters: {
+    tasks: { include_teams: ['T1'], include_personal: false },
+  },
+  excluded_resources: [] as string[],
+  max_rows_per_query: 500,
+};
+
+/**
+ * Scope variant C — tasks + credentials in resources list, but credentials
+ * explicitly in excluded_resources. Sanity test: credentials must still be
+ * inaccessible even though they appear in resources.
+ */
+export const SCOPE_VARIANT_C = {
+  resources: ['tasks', 'credentials'],
+  filters: {
+    tasks: { include_personal: true },
+  },
+  excluded_resources: ['credentials'],
+  max_rows_per_query: 500,
+};
+
+// ─── Inline types (no import from packages/types — M3-01 branch not yet merged) ─
+
+interface AdminFetchOptions {
+  method?: string;
+  body?: unknown;
+  adminToken: string;
+}
+
+interface PeerRecord {
+  peerId: string;
+  csrPem: string;
+}
+
+interface GrantRecord {
+  id: string;
+  status: string;
+  scope: unknown;
+}
+
+interface EnrollmentTokenResult {
+  token: string;
+  expiresAt: string;
+  enrollmentUrl: string;
+}
+
+interface EnrollmentRedeemResult {
+  certPem: string;
+  certChainPem: string;
+}
+
+interface BootstrapResult {
+  adminUserId: string;
+  adminToken: string;
+}
+
+export interface SeedResult {
+  serverAUrl: string;
+  serverBUrl: string;
+  adminTokenA: string;
+  adminTokenB: string;
+  adminUserIdA: string;
+  adminUserIdB: string;
+  grants: {
+    variantA: GrantRecord;
+    variantB: GrantRecord;
+    variantC: GrantRecord;
+  };
+  peers: {
+    variantA: PeerRecord & { grantId: string };
+    variantB: PeerRecord & { grantId: string };
+    variantC: PeerRecord & { grantId: string };
+  };
+}
+
+// ─── HTTP helpers ─────────────────────────────────────────────────────────────
+
+/**
+ * Authenticated admin fetch. Sends `Authorization: Bearer <adminToken>` which
+ * is the only path supported by AdminGuard (DB-backed sha256 token lookup).
+ * No `x-admin-key` header path exists in the gateway.
+ */
+async function adminFetch<T>(baseUrl: string, path: string, opts: AdminFetchOptions): Promise<T> {
+  const url = `${baseUrl}${path}`;
+  const res = await fetch(url, {
+    method: opts.method ?? 'GET',
+    headers: {
+      'Content-Type': 'application/json',
+      Authorization: `Bearer ${opts.adminToken}`,
+    },
+    body: opts.body !== undefined ? JSON.stringify(opts.body) : undefined,
+  });
+
+  if (!res.ok) {
+    const text = await res.text().catch(() => '(no body)');
+    throw new Error(`${opts.method ?? 'GET'} ${url} → ${res.status}: ${text}`);
+  }
+
+  return res.json() as Promise<T>;
+}
+
+// ─── Admin bootstrap ──────────────────────────────────────────────────────────
+
+/**
+ * Bootstrap an admin user on a pristine gateway.
+ *
+ * Steps:
+ *  1. GET /api/bootstrap/status — confirms needsSetup === true.
+ *  2. POST /api/bootstrap/setup with { name, email, password } — returns
+ *     { user, token: { plaintext } }.
+ *
+ * The harness assumes a fresh DB. If needsSetup is false the harness fails
+ * fast with a clear error rather than proceeding with an unknown token.
+ */
+async function bootstrapAdmin(
+  baseUrl: string,
+  label: string,
+  password: string,
+): Promise<BootstrapResult> {
+  console.log(`[seed] Bootstrapping admin on ${label} (${baseUrl})...`);
+
+  // 1. Check status
+  const statusRes = await fetch(`${baseUrl}/api/bootstrap/status`);
+  if (!statusRes.ok) {
+    throw new Error(`[seed] GET ${baseUrl}/api/bootstrap/status → ${statusRes.status.toString()}`);
+  }
+  const status = (await statusRes.json()) as { needsSetup: boolean };
+
+  if (!status.needsSetup) {
+    throw new Error(
+      `[seed] ${label} at ${baseUrl} already has users (needsSetup=false). ` +
+        `The harness requires a pristine database. Run 'docker compose down -v' to reset.`,
+    );
+  }
+
+  // 2. Bootstrap
+  const setupRes = await fetch(`${baseUrl}/api/bootstrap/setup`, {
+    method: 'POST',
+    headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify({
+      name: `Harness Admin (${label})`,
+      email: `harness-admin-${label.toLowerCase().replace(/\s+/g, '-')}@example.invalid`,
+      password,
+    }),
+  });
+
+  if (!setupRes.ok) {
+    const body = await setupRes.text().catch(() => '(no body)');
+    throw new Error(
+      `[seed] POST ${baseUrl}/api/bootstrap/setup → ${setupRes.status.toString()}: ${body}`,
+    );
+  }
+
+  const result = (await setupRes.json()) as {
+    user: { id: string };
+    token: { plaintext: string };
+  };
+
+  console.log(`[seed]   ${label} admin user: ${result.user.id}`);
+  console.log(`[seed]   ${label} admin token: ${result.token.plaintext.slice(0, 8)}...`);
+
+  return {
+    adminUserId: result.user.id,
+    adminToken: result.token.plaintext,
+  };
+}
+
+// ─── Readiness probe ──────────────────────────────────────────────────────────
+
+async function waitForGateway(baseUrl: string, label: string): Promise<void> {
+  const deadline = Date.now() + READINESS_TIMEOUT_MS;
+  let lastError: string = '';
+
+  while (Date.now() < deadline) {
+    try {
+      const res = await fetch(`${baseUrl}/api/health`, { signal: AbortSignal.timeout(5_000) });
+      if (res.ok) {
+        console.log(`[seed] ${label} is ready (${baseUrl})`);
+        return;
+      }
+      lastError = `HTTP ${res.status.toString()}`;
+    } catch (err) {
+      lastError = err instanceof Error ? err.message : String(err);
+    }
+    await new Promise((r) => setTimeout(r, READINESS_POLL_MS));
+  }
+
+  throw new Error(
+    `[seed] ${label} did not become ready within ${READINESS_TIMEOUT_MS.toString()}ms — last error: ${lastError}`,
+  );
+}
+
+// ─── Enrollment flow ──────────────────────────────────────────────────────────
+
+/**
+ * Walk the full enrollment flow for one grant.
+ *
+ * The correct two-sided flow (matching the data model's FK semantics):
+ *
+ *  1. On Server B: POST /api/admin/federation/peers/keypair
+ *       → peerId_B (Server B's peer record representing the requesting side)
+ *  2. On Server B: POST /api/admin/federation/grants with peerId: peerId_B
+ *       → grant (FK to Server B's own federation_peers table — no violation)
+ *  3. On Server B: POST /api/admin/federation/grants/:id/tokens
+ *       → enrollmentUrl pointing back to Server B
+ *  4. On Server A: POST /api/admin/federation/peers/keypair
+ *       → peerId_A + csrPem_A (Server A's local record of Server B)
+ *  5. Server A → Server B: POST enrollmentUrl with { csrPem: csrPem_A }
+ *       → certPem signed by Server B's CA
+ *  6. On Server A: PATCH /api/admin/federation/peers/:peerId_A/cert with certPem
+ *       → Server A's peer record transitions to active
+ *
+ * Returns the activated grant (from Server B) and Server A's peer record.
+ */
+async function enrollGrant(opts: {
+  label: string;
+  subjectUserId: string;
+  scope: unknown;
+  adminTokenA: string;
+  adminTokenB: string;
+  serverAUrl: string;
+  serverBUrl: string;
+}): Promise<{ grant: GrantRecord; peer: PeerRecord & { grantId: string } }> {
+  const { label, subjectUserId, scope, adminTokenA, adminTokenB, serverAUrl, serverBUrl } = opts;
+  console.log(`\n[seed] Enrolling grant for scope variant ${label}...`);
+
+  // 1. Create peer keypair on Server B (represents the requesting peer from B's perspective)
+  const peerB = await adminFetch<PeerRecord>(serverBUrl, '/api/admin/federation/peers/keypair', {
+    method: 'POST',
+    adminToken: adminTokenB,
+    body: {
+      commonName: `harness-peer-${label.toLowerCase()}-from-b`,
+      displayName: `Harness Peer ${label} (Server A as seen from B)`,
+      endpointUrl: serverAUrl,
+    },
+  });
+  console.log(`[seed]   Created peer on B: ${peerB.peerId}`);
+
+  // 2. Create grant on Server B referencing B's own peer record
+  const grant = await adminFetch<GrantRecord>(serverBUrl, '/api/admin/federation/grants', {
+    method: 'POST',
+    adminToken: adminTokenB,
+    body: {
+      peerId: peerB.peerId,
+      subjectUserId,
+      scope,
+    },
+  });
+  console.log(`[seed]   Created grant on B: ${grant.id} (status: ${grant.status})`);
+
+  // 3. Generate enrollment token on Server B
+  const tokenResult = await adminFetch<EnrollmentTokenResult>(
+    serverBUrl,
+    `/api/admin/federation/grants/${grant.id}/tokens`,
+    { method: 'POST', adminToken: adminTokenB, body: { ttlSeconds: 900 } },
+  );
+  console.log(`[seed]   Enrollment token: ${tokenResult.token.slice(0, 8)}...`);
+  console.log(`[seed]   Enrollment URL: ${tokenResult.enrollmentUrl}`);
+
+  // 4. Create peer keypair on Server A (Server A's local record of Server B)
+  const peerA = await adminFetch<PeerRecord>(serverAUrl, '/api/admin/federation/peers/keypair', {
+    method: 'POST',
+    adminToken: adminTokenA,
+    body: {
+      commonName: `harness-peer-${label.toLowerCase()}-from-a`,
+      displayName: `Harness Peer ${label} (Server B as seen from A)`,
+      endpointUrl: serverBUrl,
+    },
+  });
+  console.log(`[seed]   Created peer on A: ${peerA.peerId}`);
+
+  // 5. Redeem token at Server B's enrollment endpoint with A's CSR.
+  //    The enrollment endpoint is not admin-guarded — the one-time token IS the credential.
+  //
+  //    The enrollmentUrl returned by the gateway is built using BETTER_AUTH_URL which
+  //    resolves to the in-cluster Docker hostname (gateway-b:3000). That URL is only
+  //    reachable from other containers, not from the host machine running this script.
+  //    We rewrite the host portion to use the host-accessible serverBUrl so the
+  //    seed script can reach the endpoint from the host.
+  const parsedEnrollment = new URL(tokenResult.enrollmentUrl);
+  const tokenSegment = parsedEnrollment.pathname.split('/').pop()!;
+  const redeemUrl = `${serverBUrl}/api/federation/enrollment/${tokenSegment}`;
+  console.log(`[seed]   Rewritten redeem URL (host-accessible): ${redeemUrl}`);
+  const redeemRes = await fetch(redeemUrl, {
+    method: 'POST',
+    headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify({ csrPem: peerA.csrPem }),
+  });
+
+  if (!redeemRes.ok) {
+    const body = await redeemRes.text().catch(() => '(no body)');
+    throw new Error(`Enrollment redemption failed: ${redeemRes.status.toString()} — ${body}`);
+  }
+
+  const redeemResult = (await redeemRes.json()) as EnrollmentRedeemResult;
+  console.log(`[seed]   Cert issued (${redeemResult.certPem.length.toString()} bytes)`);
+
+  // 6. Store cert on Server A's peer record → transitions to active
+  await adminFetch<unknown>(serverAUrl, `/api/admin/federation/peers/${peerA.peerId}/cert`, {
+    method: 'PATCH',
+    adminToken: adminTokenA,
+    body: { certPem: redeemResult.certPem },
+  });
+  console.log(`[seed]   Cert stored on A — peer ${peerA.peerId} is now active`);
+
+  // Verify grant flipped to active on B
+  const activeGrant = await adminFetch<GrantRecord>(
+    serverBUrl,
+    `/api/admin/federation/grants/${grant.id}`,
+    { adminToken: adminTokenB },
+  );
+  console.log(`[seed]   Grant status on B: ${activeGrant.status}`);
+
+  return { grant: activeGrant, peer: { ...peerA, grantId: grant.id } };
+}
+
+// ─── Test data insertion ──────────────────────────────────────────────────────
+
+/**
+ * Insert representative test data on Server B via its admin APIs.
+ *
+ * NOTE: The gateway's task/note/credential APIs require an authenticated user
+ * session. For the harness, we seed via admin-level endpoints if available,
+ * or document the gap here for M3-11 to fill in with proper user session seeding.
+ *
+ * ASSUMPTION: Server B exposes POST /api/admin/tasks (or similar) for test data.
+ * If that endpoint does not yet exist, this function logs a warning and skips
+ * without failing — M3-11 will add the session-based seeding path.
+ */
+async function seedTestData(
+  subjectUserId: string,
+  scopeLabel: string,
+  serverBUrl: string,
+  adminTokenB: string,
+): Promise<void> {
+  console.log(`\n[seed] Seeding test data on Server B for ${scopeLabel}...`);
+
+  const testTasks = [
+    {
+      title: `${scopeLabel} Task 1`,
+      description: 'Federation harness test task',
+      userId: subjectUserId,
+    },
+    {
+      title: `${scopeLabel} Task 2`,
+      description: 'Team-scoped test task',
+      userId: subjectUserId,
+      teamId: 'T1',
+    },
+  ];
+
+  const testNotes = [
+    {
+      title: `${scopeLabel} Note 1`,
+      content: 'Personal note for federation test',
+      userId: subjectUserId,
+    },
+  ];
+
+  // Attempt to insert — tolerate 404 (endpoint not yet implemented)
+  for (const task of testTasks) {
+    try {
+      await adminFetch<unknown>(serverBUrl, '/api/admin/tasks', {
+        method: 'POST',
+        adminToken: adminTokenB,
+        body: task,
+      });
+      console.log(`[seed]   Inserted task: "${task.title}"`);
+    } catch (err) {
+      const msg = err instanceof Error ? err.message : String(err);
+      if (msg.includes('404') || msg.includes('Cannot POST')) {
+        console.warn(
+          `[seed]   WARN: /api/admin/tasks not found — skipping task insertion (expected until M3-11)`,
+        );
+        break;
+      }
+      throw err;
+    }
+  }
+
+  for (const note of testNotes) {
+    try {
+      await adminFetch<unknown>(serverBUrl, '/api/admin/notes', {
+        method: 'POST',
+        adminToken: adminTokenB,
+        body: note,
+      });
+      console.log(`[seed]   Inserted note: "${note.title}"`);
+    } catch (err) {
+      const msg = err instanceof Error ? err.message : String(err);
+      if (msg.includes('404') || msg.includes('Cannot POST')) {
+        console.warn(
+          `[seed]   WARN: /api/admin/notes not found — skipping note insertion (expected until M3-11)`,
+        );
+        break;
+      }
+      throw err;
+    }
+  }
+
+  console.log(`[seed] Test data seeding for ${scopeLabel} complete.`);
+}
+
+// ─── Main entrypoint ──────────────────────────────────────────────────────────
+
+export async function runSeed(opts?: {
+  serverAUrl?: string;
+  serverBUrl?: string;
+  adminBootstrapPasswordA?: string;
+  adminBootstrapPasswordB?: string;
+  subjectUserIds?: { variantA: string; variantB: string; variantC: string };
+}): Promise<SeedResult> {
+  const aUrl = opts?.serverAUrl ?? SERVER_A_URL;
+  const bUrl = opts?.serverBUrl ?? SERVER_B_URL;
+  const passwordA = opts?.adminBootstrapPasswordA ?? ADMIN_BOOTSTRAP_PASSWORD_A;
+  const passwordB = opts?.adminBootstrapPasswordB ?? ADMIN_BOOTSTRAP_PASSWORD_B;
+
+  // Use provided or default subject user IDs.
+  // In a real run these would be real user UUIDs from Server B's DB.
+  // For the harness, the admin bootstrap user on Server B is used as the subject.
+  // These are overridden after bootstrap if opts.subjectUserIds is not provided.
+  const subjectIds = opts?.subjectUserIds;
+
+  console.log('[seed] Waiting for gateways to be ready...');
+  await Promise.all([waitForGateway(aUrl, 'Server A'), waitForGateway(bUrl, 'Server B')]);
+
+  // Bootstrap admin users on both gateways (requires pristine DBs).
+  console.log('\n[seed] Bootstrapping admin accounts...');
+  const [bootstrapA, bootstrapB] = await Promise.all([
+    bootstrapAdmin(aUrl, 'Server A', passwordA),
+    bootstrapAdmin(bUrl, 'Server B', passwordB),
+  ]);
+
+  // Default subject user IDs to the admin user on Server B (guaranteed to exist).
+  const resolvedSubjectIds = subjectIds ?? {
+    variantA: bootstrapB.adminUserId,
+    variantB: bootstrapB.adminUserId,
+    variantC: bootstrapB.adminUserId,
+  };
+
+  // Enroll all three scope variants sequentially to avoid race conditions on
+  // the step-ca signing queue. Parallel enrollment would work too but
+  // sequential is easier to debug when something goes wrong.
+  console.log('\n[seed] Enrolling scope variants...');
+  const resultA = await enrollGrant({
+    label: 'A',
+    subjectUserId: resolvedSubjectIds.variantA,
+    scope: SCOPE_VARIANT_A,
+    adminTokenA: bootstrapA.adminToken,
+    adminTokenB: bootstrapB.adminToken,
+    serverAUrl: aUrl,
+    serverBUrl: bUrl,
+  });
+  const resultB = await enrollGrant({
+    label: 'B',
+    subjectUserId: resolvedSubjectIds.variantB,
+    scope: SCOPE_VARIANT_B,
+    adminTokenA: bootstrapA.adminToken,
+    adminTokenB: bootstrapB.adminToken,
+    serverAUrl: aUrl,
+    serverBUrl: bUrl,
+  });
+  const resultC = await enrollGrant({
+    label: 'C',
+    subjectUserId: resolvedSubjectIds.variantC,
+    scope: SCOPE_VARIANT_C,
+    adminTokenA: bootstrapA.adminToken,
+    adminTokenB: bootstrapB.adminToken,
+    serverAUrl: aUrl,
+    serverBUrl: bUrl,
+  });
+
+  // Seed test data on Server B for each scope variant
+  await Promise.all([
+    seedTestData(resolvedSubjectIds.variantA, 'A', bUrl, bootstrapB.adminToken),
+    seedTestData(resolvedSubjectIds.variantB, 'B', bUrl, bootstrapB.adminToken),
+    seedTestData(resolvedSubjectIds.variantC, 'C', bUrl, bootstrapB.adminToken),
+  ]);
+
+  const result: SeedResult = {
+    serverAUrl: aUrl,
+    serverBUrl: bUrl,
+    adminTokenA: bootstrapA.adminToken,
+    adminTokenB: bootstrapB.adminToken,
+    adminUserIdA: bootstrapA.adminUserId,
+    adminUserIdB: bootstrapB.adminUserId,
+    grants: {
+      variantA: resultA.grant,
+      variantB: resultB.grant,
+      variantC: resultC.grant,
+    },
+    peers: {
+      variantA: resultA.peer,
+      variantB: resultB.peer,
+      variantC: resultC.peer,
+    },
+  };
+
+  console.log('\n[seed] Seed complete.');
+  console.log('[seed] Summary:');
+  console.log(`  Variant A grant: ${result.grants.variantA.id} (${result.grants.variantA.status})`);
+  console.log(`  Variant B grant: ${result.grants.variantB.id} (${result.grants.variantB.status})`);
+  console.log(`  Variant C grant: ${result.grants.variantC.id} (${result.grants.variantC.status})`);
+
+  return result;
+}
+
+// ─── CLI entry ────────────────────────────────────────────────────────────────
+
+const isCli =
+  process.argv[1] != null &&
+  fileURLToPath(import.meta.url).endsWith(process.argv[1]!.split('/').pop()!);
+
+if (isCli) {
+  const shouldBoot = process.argv.includes('--boot');
+
+  if (shouldBoot) {
+    console.log('[seed] --boot flag detected — starting compose stack...');
+    execSync(`docker compose -f "${COMPOSE_FILE}" up -d`, { stdio: 'inherit' });
+  }
+
+  runSeed()
+    .then(() => {
+      process.exit(0);
+    })
+    .catch((err) => {
+      console.error('[seed] Fatal:', err);
+      process.exit(1);
+    });
+}
Author	SHA1	Message	Date
jason.woltje	b67f2c9f08	Merge pull request 'feat(federation): outbound mTLS FederationClient (FED-M3-08)' (#508 ) from feat/federation-m3-client into main All checks were successful ci/woodpecker/push/ci Pipeline was successful Details ci/woodpecker/push/publish Pipeline was successful Details	2026-04-24 04:30:29 +00:00
Jarvis	37675ae3f2	fix(federation/client): serialize cache fills, destroy evicted Agent, cover env-var guard All checks were successful ci/woodpecker/pr/ci Pipeline was successful Details ci/woodpecker/push/ci Pipeline was successful Details - HIGH-A: resolveEntry now uses promise-cache pattern so concurrent callers serialize on a single in-flight build, eliminating duplicate key material in heap and duplicate DB round-trips - HIGH-B: flushPeer destroys the evicted undici Agent so stale TLS connections close on cert rotation - MED-C: add regression test for PEER_MISCONFIGURED when STEP_CA_ROOT_CERT_PATH is unset Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-23 22:56:57 -05:00
Jarvis	a4a6769a6d	fix(federation/client): pin Step-CA root, fix lockfile, harden cache test All checks were successful ci/woodpecker/push/ci Pipeline was successful Details ci/woodpecker/pr/ci Pipeline was successful Details CRIT-1: regenerate pnpm-lock.yaml so apps/gateway resolves undici@7.24.6 (prior PR pushed package.json without lockfile update; CI failed with ERR_PNPM_OUTDATED_LOCKFILE). Incidentally cleans 57 lines of stale peer-dep entries. CRIT-2: cache-hit test no longer swallows resolveEntry errors. Calls the private method directly twice and asserts identity equality plus a single DB select, removing the silent-failure path the prior assertion allowed. HIGH-1: mTLS Agent now pins Step-CA root via STEP_CA_ROOT_CERT_PATH. Without the env var resolveEntry throws PEER_MISCONFIGURED, refusing to dial peers against the public trust store. PEM is read once and cached on the service instance. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>	2026-04-23 22:30:09 -05:00
Jarvis	21650fb194	feat(federation): outbound mTLS FederationClient (FED-M3-08) Some checks failed ci/woodpecker/push/ci Pipeline failed Details ci/woodpecker/pr/ci Pipeline failed Details Implements FederationClientService — a NestJS injectable that dials peer gateways over mTLS (undici Agent with cert+sealed-key from federation_peers), invokes list/get/capabilities verbs, validates responses via Zod, and surfaces all failure modes as typed FederationClientError with a coherent error code taxonomy (PEER_NOT_FOUND, PEER_INACTIVE, PEER_MISCONFIGURED, NETWORK, FORBIDDEN, HTTP_{status}, INVALID_RESPONSE). Per-peer Agent instances are cached in a Map for the service lifetime; flushPeer(peerId) invalidates the cache for M5/M6 cert rotation and revocation events. Wired into FederationModule providers + exports so QuerySourceService (M3-09) can inject it. 13 unit tests covering all required scenarios via undici MockAgent + real sealClientKey/unsealClientKey round-trip. Closes #462 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-04-23 22:16:52 -05:00
jason.woltje	89c733e0b9	feat(federation): two-gateway test harness scaffold (FED-M3-02) (#505 ) All checks were successful ci/woodpecker/push/ci Pipeline was successful Details ci/woodpecker/push/publish Pipeline was successful Details	2026-04-24 03:01:25 +00:00
jason.woltje	ee3f2defd9	feat(types): federation v1 DTOs (FED-M3-01) (#506 ) Some checks failed ci/woodpecker/push/ci Pipeline was successful Details ci/woodpecker/push/publish Pipeline failed Details	2026-04-24 02:54:40 +00:00
jason.woltje	7342c1290d	fix(federation): use real PEM certs in enrollment + ca service tests (#507 ) Some checks failed ci/woodpecker/push/ci Pipeline was successful Details ci/woodpecker/push/publish Pipeline failed Details	2026-04-24 02:43:42 +00:00