Compare commits

...

7 Commits

Author SHA1 Message Date
b67f2c9f08 Merge pull request 'feat(federation): outbound mTLS FederationClient (FED-M3-08)' (#508) from feat/federation-m3-client into main
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/push/publish Pipeline was successful
2026-04-24 04:30:29 +00:00
Jarvis
37675ae3f2 fix(federation/client): serialize cache fills, destroy evicted Agent, cover env-var guard
All checks were successful
ci/woodpecker/pr/ci Pipeline was successful
ci/woodpecker/push/ci Pipeline was successful
- HIGH-A: resolveEntry now uses promise-cache pattern so concurrent
  callers serialize on a single in-flight build, eliminating duplicate
  key material in heap and duplicate DB round-trips
- HIGH-B: flushPeer destroys the evicted undici Agent so stale TLS
  connections close on cert rotation
- MED-C: add regression test for PEER_MISCONFIGURED when
  STEP_CA_ROOT_CERT_PATH is unset

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-23 22:56:57 -05:00
Jarvis
a4a6769a6d fix(federation/client): pin Step-CA root, fix lockfile, harden cache test
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/pr/ci Pipeline was successful
CRIT-1: regenerate pnpm-lock.yaml so apps/gateway resolves undici@7.24.6
(prior PR pushed package.json without lockfile update; CI failed with
ERR_PNPM_OUTDATED_LOCKFILE). Incidentally cleans 57 lines of stale
peer-dep entries.

CRIT-2: cache-hit test no longer swallows resolveEntry errors. Calls the
private method directly twice and asserts identity equality plus a
single DB select, removing the silent-failure path the prior assertion
allowed.

HIGH-1: mTLS Agent now pins Step-CA root via STEP_CA_ROOT_CERT_PATH.
Without the env var resolveEntry throws PEER_MISCONFIGURED, refusing to
dial peers against the public trust store. PEM is read once and cached
on the service instance.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-23 22:30:09 -05:00
Jarvis
21650fb194 feat(federation): outbound mTLS FederationClient (FED-M3-08)
Some checks failed
ci/woodpecker/push/ci Pipeline failed
ci/woodpecker/pr/ci Pipeline failed
Implements FederationClientService — a NestJS injectable that dials peer
gateways over mTLS (undici Agent with cert+sealed-key from federation_peers),
invokes list/get/capabilities verbs, validates responses via Zod, and surfaces
all failure modes as typed FederationClientError with a coherent error code
taxonomy (PEER_NOT_FOUND, PEER_INACTIVE, PEER_MISCONFIGURED, NETWORK,
FORBIDDEN, HTTP_{status}, INVALID_RESPONSE).

Per-peer Agent instances are cached in a Map for the service lifetime;
flushPeer(peerId) invalidates the cache for M5/M6 cert rotation and
revocation events.

Wired into FederationModule providers + exports so QuerySourceService
(M3-09) can inject it.

13 unit tests covering all required scenarios via undici MockAgent +
real sealClientKey/unsealClientKey round-trip.

Closes #462

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-23 22:16:52 -05:00
89c733e0b9 feat(federation): two-gateway test harness scaffold (FED-M3-02) (#505)
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/push/publish Pipeline was successful
2026-04-24 03:01:25 +00:00
ee3f2defd9 feat(types): federation v1 DTOs (FED-M3-01) (#506)
Some checks failed
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/push/publish Pipeline failed
2026-04-24 02:54:40 +00:00
7342c1290d fix(federation): use real PEM certs in enrollment + ca service tests (#507)
Some checks failed
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/push/publish Pipeline failed
2026-04-24 02:43:42 +00:00
23 changed files with 3590 additions and 48 deletions

View File

@@ -73,6 +73,7 @@
"rxjs": "^7.8.0",
"socket.io": "^4.8.0",
"uuid": "^11.0.0",
"undici": "^7.24.6",
"zod": "^4.3.6"
},
"devDependencies": {

View File

@@ -24,10 +24,11 @@
*/
import 'reflect-metadata';
import { describe, it, expect, vi, beforeEach } from 'vitest';
import { describe, it, expect, vi, beforeEach, beforeAll } from 'vitest';
import { GoneException, NotFoundException } from '@nestjs/common';
import type { Db } from '@mosaicstack/db';
import { EnrollmentService } from '../enrollment.service.js';
import { makeSelfSignedCert } from './helpers/test-cert.js';
// ---------------------------------------------------------------------------
// Test constants
@@ -38,10 +39,18 @@ const PEER_ID = 'p2222222-2222-2222-2222-222222222222';
const USER_ID = 'u3333333-3333-3333-3333-333333333333';
const TOKEN = 'a'.repeat(64); // 64-char hex
const MOCK_CERT_PEM = '-----BEGIN CERTIFICATE-----\nMOCK\n-----END CERTIFICATE-----\n';
const MOCK_CHAIN_PEM = MOCK_CERT_PEM + MOCK_CERT_PEM;
// Real self-signed EC P-256 cert — populated once in beforeAll.
// Required because EnrollmentService.extractCertNotAfter calls new X509Certificate(certPem)
// with strict parsing (PR #501 HIGH-2: no silent fallback).
let REAL_CERT_PEM: string;
const MOCK_CHAIN_PEM = () => REAL_CERT_PEM + REAL_CERT_PEM;
const MOCK_SERIAL = 'ABCD1234';
beforeAll(async () => {
REAL_CERT_PEM = await makeSelfSignedCert();
});
// ---------------------------------------------------------------------------
// Factory helpers
// ---------------------------------------------------------------------------
@@ -103,11 +112,27 @@ function makeDb({
const claimUpdateMock = vi.fn().mockReturnValue({ set: setClaimMock });
// transaction(cb) — cb receives txMock; txMock has update + insert
const txInsertValues = vi.fn().mockResolvedValue(undefined);
const txInsertMock = vi.fn().mockReturnValue({ values: txInsertValues });
const txWhereUpdate = vi.fn().mockResolvedValue(undefined);
//
// The tx mock must support two tx.update() call patterns (CRIT-2, PR #501):
// 1. Grant activation: .update().set().where().returning() → resolves to [{ id }]
// 2. Peer update: .update().set().where() → resolves to undefined
//
// We achieve this by making txWhereUpdate return an object with BOTH a thenable
// interface (so `await tx.update().set().where()` works) AND a .returning() method.
const txGrantActivatedRow = { id: GRANT_ID };
const txReturningMock = vi.fn().mockResolvedValue([txGrantActivatedRow]);
const txWhereUpdate = vi.fn().mockReturnValue({
// .returning() for grant activation (first tx.update call)
returning: txReturningMock,
// thenables so `await tx.update().set().where()` also works for peer update
then: (resolve: (v: undefined) => void) => resolve(undefined),
catch: () => undefined,
finally: () => undefined,
});
const txSetMock = vi.fn().mockReturnValue({ where: txWhereUpdate });
const txUpdateMock = vi.fn().mockReturnValue({ set: txSetMock });
const txInsertValues = vi.fn().mockResolvedValue(undefined);
const txInsertMock = vi.fn().mockReturnValue({ values: txInsertValues });
const txMock = { update: txUpdateMock, insert: txInsertMock };
const transactionMock = vi
.fn()
@@ -132,6 +157,7 @@ function makeDb({
txInsertValues,
txInsertMock,
txWhereUpdate,
txReturningMock,
txSetMock,
txUpdateMock,
txMock,
@@ -146,11 +172,13 @@ function makeDb({
function makeCaService() {
return {
issueCert: vi.fn().mockResolvedValue({
certPem: MOCK_CERT_PEM,
certChainPem: MOCK_CHAIN_PEM,
// REAL_CERT_PEM is populated by beforeAll — safe to reference via closure here
// because makeCaService() is only called after the suite's beforeAll runs.
issueCert: vi.fn().mockImplementation(async () => ({
certPem: REAL_CERT_PEM,
certChainPem: MOCK_CHAIN_PEM(),
serialNumber: MOCK_SERIAL,
}),
})),
};
}
@@ -301,29 +329,29 @@ describe('EnrollmentService.redeem — success path', () => {
});
caService.issueCert.mockImplementation(async () => {
callOrder.push('issueCert');
return { certPem: MOCK_CERT_PEM, certChainPem: MOCK_CHAIN_PEM, serialNumber: MOCK_SERIAL };
return { certPem: REAL_CERT_PEM, certChainPem: MOCK_CHAIN_PEM(), serialNumber: MOCK_SERIAL };
});
await service.redeem(TOKEN, MOCK_CERT_PEM);
await service.redeem(TOKEN, '---CSR---');
expect(callOrder).toEqual(['claim', 'issueCert']);
});
it('calls CaService.issueCert with grantId, subjectUserId, csrPem, ttlSeconds=300', async () => {
await service.redeem(TOKEN, MOCK_CERT_PEM);
await service.redeem(TOKEN, '---CSR---');
expect(caService.issueCert).toHaveBeenCalledWith(
expect.objectContaining({
grantId: GRANT_ID,
subjectUserId: USER_ID,
csrPem: MOCK_CERT_PEM,
csrPem: '---CSR---',
ttlSeconds: 300,
}),
);
});
it('runs activate grant + peer update + audit inside a transaction', async () => {
await service.redeem(TOKEN, MOCK_CERT_PEM);
await service.redeem(TOKEN, '---CSR---');
expect(db._mocks.transactionMock).toHaveBeenCalledOnce();
// tx.update called twice: activate grant + update peer
@@ -333,17 +361,17 @@ describe('EnrollmentService.redeem — success path', () => {
});
it('activates grant (sets status=active) inside the transaction', async () => {
await service.redeem(TOKEN, MOCK_CERT_PEM);
await service.redeem(TOKEN, '---CSR---');
expect(db._mocks.txSetMock).toHaveBeenCalledWith(expect.objectContaining({ status: 'active' }));
});
it('updates the federationPeers row with certPem, certSerial, state=active inside the transaction', async () => {
await service.redeem(TOKEN, MOCK_CERT_PEM);
await service.redeem(TOKEN, '---CSR---');
expect(db._mocks.txSetMock).toHaveBeenCalledWith(
expect.objectContaining({
certPem: MOCK_CERT_PEM,
certPem: REAL_CERT_PEM,
certSerial: MOCK_SERIAL,
state: 'active',
}),
@@ -351,7 +379,7 @@ describe('EnrollmentService.redeem — success path', () => {
});
it('inserts an audit log row inside the transaction', async () => {
await service.redeem(TOKEN, MOCK_CERT_PEM);
await service.redeem(TOKEN, '---CSR---');
expect(db._mocks.txInsertValues).toHaveBeenCalledWith(
expect.objectContaining({
@@ -363,11 +391,11 @@ describe('EnrollmentService.redeem — success path', () => {
});
it('returns { certPem, certChainPem } from CaService', async () => {
const result = await service.redeem(TOKEN, MOCK_CERT_PEM);
const result = await service.redeem(TOKEN, '---CSR---');
expect(result).toEqual({
certPem: MOCK_CERT_PEM,
certChainPem: MOCK_CHAIN_PEM,
certPem: REAL_CERT_PEM,
certChainPem: MOCK_CHAIN_PEM(),
});
});
});

View File

@@ -0,0 +1,138 @@
/**
* Test helpers for generating real X.509 PEM certificates in unit tests.
*
* PR #501 (FED-M2-11) introduced strict `new X509Certificate(certPem)` parsing
* in both EnrollmentService.extractCertNotAfter and CaService.issueCert — dummy
* cert strings now throw `error:0680007B:asn1 encoding routines::header too long`.
*
* These helpers produce minimal but cryptographically valid self-signed EC P-256
* certificates via @peculiar/x509 + Node.js webcrypto, suitable for test mocks.
*
* Two variants:
* - makeSelfSignedCert() Plain cert — satisfies node:crypto X509Certificate parse.
* - makeMosaicIssuedCert(opts) Cert with custom Mosaic OID extensions — satisfies the
* CRIT-1 OID presence + value checks in CaService.issueCert.
*/
import { webcrypto } from 'node:crypto';
import {
X509CertificateGenerator,
Extension,
KeyUsagesExtension,
KeyUsageFlags,
BasicConstraintsExtension,
cryptoProvider,
} from '@peculiar/x509';
// ---------------------------------------------------------------------------
// Internal helpers
// ---------------------------------------------------------------------------
/**
* Encode a string as an ASN.1 UTF8String TLV:
* 0x0C (tag) + 1-byte length (for strings ≤ 127 bytes) + UTF-8 bytes.
*
* CaService.issueCert reads the extension value as:
* decoder.decode(grantIdExt.value.slice(2))
* i.e. it skips the tag + length byte and decodes the remainder as UTF-8.
* So we must produce exactly this encoding as the OCTET STRING content.
*/
function encodeUtf8String(value: string): Uint8Array {
const utf8 = new TextEncoder().encode(value);
if (utf8.length > 127) {
throw new Error('encodeUtf8String: value too long for single-byte length encoding');
}
const buf = new Uint8Array(2 + utf8.length);
buf[0] = 0x0c; // ASN.1 UTF8String tag
buf[1] = utf8.length;
buf.set(utf8, 2);
return buf;
}
// ---------------------------------------------------------------------------
// Mosaic OID constants (must match production CaService)
// ---------------------------------------------------------------------------
const OID_MOSAIC_GRANT_ID = '1.3.6.1.4.1.99999.1';
const OID_MOSAIC_SUBJECT_USER_ID = '1.3.6.1.4.1.99999.2';
// ---------------------------------------------------------------------------
// Public API
// ---------------------------------------------------------------------------
/**
* Generate a minimal self-signed EC P-256 certificate valid for 1 day.
* CN=harness-test, no custom extensions.
*
* Suitable for:
* - EnrollmentService.extractCertNotAfter (just needs parseable PEM)
* - Any mock that returns certPem / certChainPem without OID checks
*/
export async function makeSelfSignedCert(): Promise<string> {
// Ensure @peculiar/x509 uses Node.js webcrypto (available as globalThis.crypto in Node 19+,
// but we set it explicitly here to be safe on all Node 18+ versions).
cryptoProvider.set(webcrypto as unknown as Parameters<typeof cryptoProvider.set>[0]);
const alg = { name: 'ECDSA', namedCurve: 'P-256', hash: 'SHA-256' } as const;
const keys = await webcrypto.subtle.generateKey(alg, false, ['sign', 'verify']);
const now = new Date();
const tomorrow = new Date(now.getTime() + 86_400_000);
const cert = await X509CertificateGenerator.createSelfSigned({
serialNumber: '01',
name: 'CN=harness-test',
notBefore: now,
notAfter: tomorrow,
signingAlgorithm: alg,
keys,
extensions: [
new BasicConstraintsExtension(false),
new KeyUsagesExtension(KeyUsageFlags.digitalSignature),
],
});
return cert.toString('pem');
}
/**
* Generate a self-signed EC P-256 certificate that contains the two custom
* Mosaic OID extensions required by CaService.issueCert's CRIT-1 check:
* OID 1.3.6.1.4.1.99999.1 → mosaic_grant_id (value = grantId)
* OID 1.3.6.1.4.1.99999.2 → mosaic_subject_user_id (value = subjectUserId)
*
* The extension value encoding matches the production parser's `.slice(2)` assumption:
* each extension value is an OCTET STRING wrapping an ASN.1 UTF8String TLV.
*/
export async function makeMosaicIssuedCert(opts: {
grantId: string;
subjectUserId: string;
}): Promise<string> {
// Ensure @peculiar/x509 uses Node.js webcrypto.
cryptoProvider.set(webcrypto as unknown as Parameters<typeof cryptoProvider.set>[0]);
const alg = { name: 'ECDSA', namedCurve: 'P-256', hash: 'SHA-256' } as const;
const keys = await webcrypto.subtle.generateKey(alg, false, ['sign', 'verify']);
const now = new Date();
const tomorrow = new Date(now.getTime() + 86_400_000);
const cert = await X509CertificateGenerator.createSelfSigned({
serialNumber: '01',
name: 'CN=mosaic-issued-test',
notBefore: now,
notAfter: tomorrow,
signingAlgorithm: alg,
keys,
extensions: [
new BasicConstraintsExtension(false),
new KeyUsagesExtension(KeyUsageFlags.digitalSignature),
// mosaic_grant_id — OID 1.3.6.1.4.1.99999.1
new Extension(OID_MOSAIC_GRANT_ID, false, encodeUtf8String(opts.grantId)),
// mosaic_subject_user_id — OID 1.3.6.1.4.1.99999.2
new Extension(OID_MOSAIC_SUBJECT_USER_ID, false, encodeUtf8String(opts.subjectUserId)),
],
});
return cert.toString('pem');
}

View File

@@ -20,9 +20,10 @@
*/
import 'reflect-metadata';
import { describe, it, expect, vi, beforeEach, type Mock } from 'vitest';
import { describe, it, expect, vi, beforeEach, beforeAll, type Mock } from 'vitest';
import { jwtVerify, exportJWK, generateKeyPair } from 'jose';
import { Pkcs10CertificateRequestGenerator } from '@peculiar/x509';
import { makeMosaicIssuedCert } from './__tests__/helpers/test-cert.js';
// ---------------------------------------------------------------------------
// Mock node:https BEFORE importing CaService so the mock is in place when
@@ -74,6 +75,11 @@ const FAKE_CA_PEM = FAKE_CERT_PEM;
const GRANT_ID = 'a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11';
const SUBJECT_USER_ID = 'b1ffcd00-0d1c-5f09-cc7e-7cc0ce491b22';
// Real self-signed cert containing both Mosaic OID extensions — populated in beforeAll.
// Required because CaService.issueCert performs CRIT-1 OID presence/value checks on the
// response cert (PR #501 — strict parsing, no silent fallback).
let realIssuedCertPem: string;
// ---------------------------------------------------------------------------
// Generate a real EC P-256 key pair and CSR for integration-style tests
// ---------------------------------------------------------------------------
@@ -194,6 +200,15 @@ function makeHttpsMock(statusCode: number, body: unknown, errorMsg?: string): vo
describe('CaService', () => {
let service: CaService;
beforeAll(async () => {
// Generate a cert with the two Mosaic OIDs so that CaService.issueCert's
// CRIT-1 OID checks pass when mock step-ca returns it as `crt`.
realIssuedCertPem = await makeMosaicIssuedCert({
grantId: GRANT_ID,
subjectUserId: SUBJECT_USER_ID,
});
});
beforeEach(() => {
vi.clearAllMocks();
service = new CaService();
@@ -226,9 +241,9 @@ describe('CaService', () => {
// Now test that the service's validateCsr accepts it.
// We call it indirectly via issueCert with a successful mock.
makeHttpsMock(200, { crt: FAKE_CERT_PEM, certChain: [FAKE_CERT_PEM, FAKE_CA_PEM] });
makeHttpsMock(200, { crt: realIssuedCertPem, certChain: [realIssuedCertPem, FAKE_CA_PEM] });
const result = await service.issueCert(makeReq({ csrPem: realCsrPem }));
expect(result.certPem).toBe(FAKE_CERT_PEM);
expect(result.certPem).toBe(realIssuedCertPem);
});
it('throws INVALID_CSR for a malformed PEM-shaped CSR', async () => {
@@ -251,14 +266,14 @@ describe('CaService', () => {
it('returns IssuedCertDto on success (certChain present)', async () => {
if (!realCsrPem) realCsrPem = await generateRealCsr();
makeHttpsMock(200, {
crt: FAKE_CERT_PEM,
certChain: [FAKE_CERT_PEM, FAKE_CA_PEM],
crt: realIssuedCertPem,
certChain: [realIssuedCertPem, FAKE_CA_PEM],
});
const result = await service.issueCert(makeReq());
expect(result.certPem).toBe(FAKE_CERT_PEM);
expect(result.certChainPem).toContain(FAKE_CERT_PEM);
expect(result.certPem).toBe(realIssuedCertPem);
expect(result.certChainPem).toContain(realIssuedCertPem);
expect(result.certChainPem).toContain(FAKE_CA_PEM);
expect(typeof result.serialNumber).toBe('string');
});
@@ -270,14 +285,14 @@ describe('CaService', () => {
it('builds certChainPem from crt+ca when certChain is absent', async () => {
if (!realCsrPem) realCsrPem = await generateRealCsr();
makeHttpsMock(200, {
crt: FAKE_CERT_PEM,
crt: realIssuedCertPem,
ca: FAKE_CA_PEM,
});
const result = await service.issueCert(makeReq());
expect(result.certPem).toBe(FAKE_CERT_PEM);
expect(result.certChainPem).toContain(FAKE_CERT_PEM);
expect(result.certPem).toBe(realIssuedCertPem);
expect(result.certChainPem).toContain(realIssuedCertPem);
expect(result.certChainPem).toContain(FAKE_CA_PEM);
});
@@ -287,12 +302,12 @@ describe('CaService', () => {
it('falls back to certPem alone when certChain and ca are absent', async () => {
if (!realCsrPem) realCsrPem = await generateRealCsr();
makeHttpsMock(200, { crt: FAKE_CERT_PEM });
makeHttpsMock(200, { crt: realIssuedCertPem });
const result = await service.issueCert(makeReq());
expect(result.certPem).toBe(FAKE_CERT_PEM);
expect(result.certChainPem).toBe(FAKE_CERT_PEM);
expect(result.certPem).toBe(realIssuedCertPem);
expect(result.certChainPem).toBe(realIssuedCertPem);
});
// -------------------------------------------------------------------------
@@ -398,7 +413,7 @@ describe('CaService', () => {
statusCode: 200,
on: (event: string, cb: (chunk?: Buffer) => void) => {
if (event === 'data') {
cb(Buffer.from(JSON.stringify({ crt: FAKE_CERT_PEM })));
cb(Buffer.from(JSON.stringify({ crt: realIssuedCertPem })));
}
if (event === 'end') {
cb();
@@ -555,7 +570,7 @@ describe('CaService', () => {
statusCode: 200,
on: (event: string, cb: (chunk?: Buffer) => void) => {
if (event === 'data') {
cb(Buffer.from(JSON.stringify({ crt: FAKE_CERT_PEM })));
cb(Buffer.from(JSON.stringify({ crt: realIssuedCertPem })));
}
if (event === 'end') {
cb();

View File

@@ -0,0 +1,553 @@
/**
* Unit tests for FederationClientService (FED-M3-08).
*
* HTTP mocking strategy:
* undici MockAgent is used to intercept outbound HTTP requests. The service
* uses `undici.fetch` with a `dispatcher` option, so MockAgent is set as the
* global dispatcher and all requests flow through it.
*
* Because the service builds one `undici.Agent` per peer and passes it as
* the dispatcher on every fetch call, we cannot intercept at the Agent level
* in unit tests without significant refactoring. Instead, we set the global
* dispatcher to a MockAgent and override the service's `doRequest` indirection
* by spying on the internal fetch call.
*
* For the cert/key wiring, we use the real `sealClientKey` function from
* peer-key.util.ts with a test secret — no stubs.
*
* Sealed-key setup:
* Each test (or beforeAll) calls `sealClientKey(TEST_PRIVATE_KEY_PEM)` with
* BETTER_AUTH_SECRET set to a deterministic test value so that
* `unsealClientKey` in the service recovers the original PEM.
*/
import 'reflect-metadata';
import { describe, it, expect, vi, beforeEach, afterEach, beforeAll, afterAll } from 'vitest';
import { MockAgent, setGlobalDispatcher, getGlobalDispatcher } from 'undici';
import type { Dispatcher } from 'undici';
import { writeFileSync, unlinkSync } from 'node:fs';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import type { Db } from '@mosaicstack/db';
import { FederationClientService, FederationClientError } from '../federation-client.service.js';
import { sealClientKey } from '../../peer-key.util.js';
// ---------------------------------------------------------------------------
// Test constants
// ---------------------------------------------------------------------------
const TEST_SECRET = 'test-secret-for-federation-client-spec-only';
const PEER_ID = 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa';
const ENDPOINT = 'https://peer.example.com';
// Minimal valid RSA/EC private key PEM — does NOT need to be a real key for
// unit tests because we only verify it round-trips through seal/unseal, not
// that it actually negotiates TLS (MockAgent handles that).
const TEST_PRIVATE_KEY_PEM = `-----BEGIN PRIVATE KEY-----
MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDummyKeyForTests
-----END PRIVATE KEY-----`;
// Minimal self-signed cert PEM (dummy — only used for mTLS Agent construction)
const TEST_CERT_PEM = `-----BEGIN CERTIFICATE-----
MIIBdummyCertForFederationClientTests==
-----END CERTIFICATE-----`;
const TEST_CERT_SERIAL = 'ABCDEF1234567890';
// ---------------------------------------------------------------------------
// Sealed key (computed once in beforeAll)
// ---------------------------------------------------------------------------
let SEALED_KEY: string;
// Path to a stub Step-CA root cert file written in beforeAll. The cert is never
// actually used to negotiate TLS in unit tests (MockAgent + spy on resolveEntry
// short-circuit the network), but loadStepCaRoot() requires the file to exist.
const STUB_CA_PEM_PATH = join(tmpdir(), 'federation-client-spec-ca.pem');
const STUB_CA_PEM = `-----BEGIN CERTIFICATE-----
MIIBdummyCAforFederationClientSpecOnly==
-----END CERTIFICATE-----
`;
// ---------------------------------------------------------------------------
// Peer row factory
// ---------------------------------------------------------------------------
function makePeerRow(overrides: Partial<Record<string, unknown>> = {}) {
return {
id: PEER_ID,
commonName: 'peer-example-com',
displayName: 'Test Peer',
certPem: TEST_CERT_PEM,
certSerial: TEST_CERT_SERIAL,
certNotAfter: new Date('2030-01-01T00:00:00Z'),
clientKeyPem: SEALED_KEY,
state: 'active' as const,
endpointUrl: ENDPOINT,
lastSeenAt: null,
createdAt: new Date('2026-01-01T00:00:00Z'),
revokedAt: null,
...overrides,
};
}
// ---------------------------------------------------------------------------
// Mock DB builder
// ---------------------------------------------------------------------------
function makeDb(selectRows: unknown[] = [makePeerRow()]): Db {
const limitSelect = vi.fn().mockResolvedValue(selectRows);
const whereSelect = vi.fn().mockReturnValue({ limit: limitSelect });
const fromSelect = vi.fn().mockReturnValue({ where: whereSelect });
const selectMock = vi.fn().mockReturnValue({ from: fromSelect });
return {
select: selectMock,
insert: vi.fn(),
update: vi.fn(),
delete: vi.fn(),
transaction: vi.fn(),
} as unknown as Db;
}
// ---------------------------------------------------------------------------
// Helpers for MockAgent HTTP interception
// ---------------------------------------------------------------------------
/**
* Create a MockAgent + MockPool for the peer endpoint, set it as the global
* dispatcher, and return both for per-test configuration.
*/
function makeMockAgent() {
const mockAgent = new MockAgent({ connections: 1 });
mockAgent.disableNetConnect();
setGlobalDispatcher(mockAgent);
const pool = mockAgent.get(ENDPOINT);
return { mockAgent, pool };
}
/**
* Build a FederationClientService with a mock DB and a spy on the internal
* fetch so we can intercept at the HTTP layer via MockAgent.
*
* The service calls `fetch(url, { dispatcher: agent })` where `agent` is the
* mTLS undici.Agent built from the peer's cert+key. To make MockAgent work,
* we need the fetch dispatcher to be the MockAgent, not the per-peer Agent.
*
* Strategy: we replace the private `resolveEntry` result's `agent` field with
* the MockAgent's pool, so fetch uses our interceptor. We do this by spying
* on `resolveEntry` and returning a controlled entry.
*/
function makeService(db: Db, mockPool: Dispatcher): FederationClientService {
const svc = new FederationClientService(db);
// Override resolveEntry to inject MockAgent pool as the dispatcher
vi.spyOn(
svc as unknown as { resolveEntry: (peerId: string) => Promise<unknown> },
'resolveEntry',
).mockImplementation(async (_peerId: string) => {
// Still call DB (via the real logic) to exercise peer validation,
// but return mock pool as the agent.
// For simplicity in unit tests, directly return a controlled entry.
return {
agent: mockPool,
endpointUrl: ENDPOINT,
certPem: TEST_CERT_PEM,
certSerial: TEST_CERT_SERIAL,
};
});
return svc;
}
// ---------------------------------------------------------------------------
// Test setup
// ---------------------------------------------------------------------------
let originalDispatcher: Dispatcher;
beforeAll(() => {
// Seal the test key once — requires BETTER_AUTH_SECRET
const saved = process.env['BETTER_AUTH_SECRET'];
process.env['BETTER_AUTH_SECRET'] = TEST_SECRET;
try {
SEALED_KEY = sealClientKey(TEST_PRIVATE_KEY_PEM);
} finally {
if (saved === undefined) {
delete process.env['BETTER_AUTH_SECRET'];
} else {
process.env['BETTER_AUTH_SECRET'] = saved;
}
}
writeFileSync(STUB_CA_PEM_PATH, STUB_CA_PEM, 'utf8');
});
afterAll(() => {
try {
unlinkSync(STUB_CA_PEM_PATH);
} catch {
// best-effort cleanup
}
});
beforeEach(() => {
originalDispatcher = getGlobalDispatcher();
process.env['BETTER_AUTH_SECRET'] = TEST_SECRET;
process.env['STEP_CA_ROOT_CERT_PATH'] = STUB_CA_PEM_PATH;
});
afterEach(() => {
setGlobalDispatcher(originalDispatcher);
vi.restoreAllMocks();
delete process.env['BETTER_AUTH_SECRET'];
delete process.env['STEP_CA_ROOT_CERT_PATH'];
});
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
/** Successful list response body */
const LIST_BODY = {
items: [{ id: '1', title: 'Task One' }],
nextCursor: undefined,
_partial: false,
};
/** Successful get response body */
const GET_BODY = {
item: { id: '1', title: 'Task One' },
_partial: false,
};
/** Successful capabilities response body */
const CAP_BODY = {
resources: ['tasks'],
excluded_resources: [],
max_rows_per_query: 100,
supported_verbs: ['list', 'get', 'capabilities'] as const,
};
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
describe('FederationClientService', () => {
// ─── Successful verb calls ─────────────────────────────────────────────────
describe('list()', () => {
it('returns parsed typed response on success', async () => {
const db = makeDb();
const { mockAgent, pool } = makeMockAgent();
const svc = makeService(db, pool);
pool
.intercept({
path: '/api/federation/v1/list/tasks',
method: 'POST',
})
.reply(200, LIST_BODY, { headers: { 'content-type': 'application/json' } });
const result = await svc.list(PEER_ID, 'tasks', {});
expect(result.items).toHaveLength(1);
expect(result.items[0]).toMatchObject({ id: '1', title: 'Task One' });
await mockAgent.close();
});
});
describe('get()', () => {
it('returns parsed typed response on success', async () => {
const db = makeDb();
const { mockAgent, pool } = makeMockAgent();
const svc = makeService(db, pool);
pool
.intercept({
path: '/api/federation/v1/get/tasks/1',
method: 'POST',
})
.reply(200, GET_BODY, { headers: { 'content-type': 'application/json' } });
const result = await svc.get(PEER_ID, 'tasks', '1', {});
expect(result.item).toMatchObject({ id: '1', title: 'Task One' });
await mockAgent.close();
});
});
describe('capabilities()', () => {
it('returns parsed capabilities response on success', async () => {
const db = makeDb();
const { mockAgent, pool } = makeMockAgent();
const svc = makeService(db, pool);
pool
.intercept({
path: '/api/federation/v1/capabilities',
method: 'GET',
})
.reply(200, CAP_BODY, { headers: { 'content-type': 'application/json' } });
const result = await svc.capabilities(PEER_ID);
expect(result.resources).toContain('tasks');
expect(result.max_rows_per_query).toBe(100);
await mockAgent.close();
});
});
// ─── HTTP error surfaces ──────────────────────────────────────────────────
describe('non-2xx responses', () => {
it('surfaces 403 as FederationClientError({ status: 403, code: "FORBIDDEN" })', async () => {
const db = makeDb();
const { mockAgent, pool } = makeMockAgent();
const svc = makeService(db, pool);
pool.intercept({ path: '/api/federation/v1/list/tasks', method: 'POST' }).reply(
403,
{ error: { code: 'forbidden', message: 'Access denied' } },
{
headers: { 'content-type': 'application/json' },
},
);
await expect(svc.list(PEER_ID, 'tasks', {})).rejects.toMatchObject({
status: 403,
code: 'FORBIDDEN',
peerId: PEER_ID,
});
await mockAgent.close();
});
it('surfaces 404 as FederationClientError({ status: 404, code: "HTTP_404" })', async () => {
const db = makeDb();
const { mockAgent, pool } = makeMockAgent();
const svc = makeService(db, pool);
pool.intercept({ path: '/api/federation/v1/get/tasks/999', method: 'POST' }).reply(
404,
{ error: { code: 'not_found', message: 'Not found' } },
{
headers: { 'content-type': 'application/json' },
},
);
await expect(svc.get(PEER_ID, 'tasks', '999', {})).rejects.toMatchObject({
status: 404,
code: 'HTTP_404',
peerId: PEER_ID,
});
await mockAgent.close();
});
});
// ─── Network error ─────────────────────────────────────────────────────────
describe('network errors', () => {
it('surfaces network error as FederationClientError({ code: "NETWORK" })', async () => {
const db = makeDb();
const { mockAgent, pool } = makeMockAgent();
const svc = makeService(db, pool);
pool
.intercept({ path: '/api/federation/v1/capabilities', method: 'GET' })
.replyWithError(new Error('ECONNREFUSED'));
await expect(svc.capabilities(PEER_ID)).rejects.toMatchObject({
code: 'NETWORK',
peerId: PEER_ID,
});
await mockAgent.close();
});
});
// ─── Invalid response body ─────────────────────────────────────────────────
describe('invalid response body', () => {
it('surfaces as FederationClientError({ code: "INVALID_RESPONSE" }) when body shape is wrong', async () => {
const db = makeDb();
const { mockAgent, pool } = makeMockAgent();
const svc = makeService(db, pool);
// capabilities returns wrong shape (missing required fields)
pool
.intercept({ path: '/api/federation/v1/capabilities', method: 'GET' })
.reply(200, { totally: 'wrong' }, { headers: { 'content-type': 'application/json' } });
await expect(svc.capabilities(PEER_ID)).rejects.toMatchObject({
code: 'INVALID_RESPONSE',
peerId: PEER_ID,
});
await mockAgent.close();
});
});
// ─── Peer DB validation ────────────────────────────────────────────────────
describe('peer validation (without resolveEntry spy)', () => {
/**
* These tests exercise the real `resolveEntry` path — no spy on resolveEntry.
*/
it('throws PEER_NOT_FOUND when peer is not in DB', async () => {
// DB returns empty array (peer not found)
const db = makeDb([]);
const svc = new FederationClientService(db);
await expect(svc.capabilities(PEER_ID)).rejects.toMatchObject({
code: 'PEER_NOT_FOUND',
peerId: PEER_ID,
});
});
it('throws PEER_INACTIVE when peer state is not "active"', async () => {
const db = makeDb([makePeerRow({ state: 'suspended' })]);
const svc = new FederationClientService(db);
await expect(svc.capabilities(PEER_ID)).rejects.toMatchObject({
code: 'PEER_INACTIVE',
peerId: PEER_ID,
});
});
});
// ─── Cache behaviour ───────────────────────────────────────────────────────
describe('cache behaviour', () => {
it('hits cache on second call — only one DB lookup happens', async () => {
// Verify cache by calling the private resolveEntry directly twice and
// asserting the DB was queried only once. This avoids the HTTP layer,
// which would require either a real network or per-peer Agent rewiring
// that the cache invariant doesn't depend on.
const db = makeDb();
const selectSpy = vi.spyOn(db, 'select');
const svc = new FederationClientService(db);
const resolveEntry = (
svc as unknown as { resolveEntry: (peerId: string) => Promise<unknown> }
).resolveEntry.bind(svc);
const first = await resolveEntry(PEER_ID);
const second = await resolveEntry(PEER_ID);
expect(first).toBe(second);
expect(selectSpy).toHaveBeenCalledTimes(1);
});
it('serializes concurrent resolveEntry calls — only one DB lookup', async () => {
const db = makeDb();
const selectSpy = vi.spyOn(db, 'select');
const svc = new FederationClientService(db);
const resolveEntry = (
svc as unknown as {
resolveEntry: (peerId: string) => Promise<unknown>;
}
).resolveEntry.bind(svc);
const [a, b] = await Promise.all([resolveEntry(PEER_ID), resolveEntry(PEER_ID)]);
expect(a).toBe(b);
expect(selectSpy).toHaveBeenCalledTimes(1);
});
it('flushPeer destroys the evicted Agent so old TLS connections close', async () => {
const db = makeDb();
const svc = new FederationClientService(db);
const resolveEntry = (
svc as unknown as {
resolveEntry: (peerId: string) => Promise<{ agent: { destroy: () => Promise<void> } }>;
}
).resolveEntry.bind(svc);
const entry = await resolveEntry(PEER_ID);
const destroySpy = vi.spyOn(entry.agent, 'destroy').mockResolvedValue();
svc.flushPeer(PEER_ID);
expect(destroySpy).toHaveBeenCalledTimes(1);
});
it('flushPeer() invalidates cache — next call re-reads DB', async () => {
const db = makeDb();
const { mockAgent, pool } = makeMockAgent();
const svc = makeService(db, pool);
pool
.intercept({ path: '/api/federation/v1/capabilities', method: 'GET' })
.reply(200, CAP_BODY, { headers: { 'content-type': 'application/json' } })
.times(2);
// First call — populates cache (via mock resolveEntry)
await svc.capabilities(PEER_ID);
// Flush the cache
svc.flushPeer(PEER_ID);
// The spy on resolveEntry is still active — check it's called again after flush
const resolveEntrySpy = vi.spyOn(
svc as unknown as { resolveEntry: (peerId: string) => Promise<unknown> },
'resolveEntry',
);
// Second call after flush — should call resolveEntry again
await svc.capabilities(PEER_ID);
// resolveEntry should have been called once after we started spying (post-flush)
expect(resolveEntrySpy).toHaveBeenCalledTimes(1);
await mockAgent.close();
});
});
// ─── loadStepCaRoot env-var guard ─────────────────────────────────────────
describe('loadStepCaRoot() env-var guard', () => {
it('throws PEER_MISCONFIGURED when STEP_CA_ROOT_CERT_PATH is not set', async () => {
delete process.env['STEP_CA_ROOT_CERT_PATH'];
const db = makeDb();
const svc = new FederationClientService(db);
const resolveEntry = (
svc as unknown as {
resolveEntry: (peerId: string) => Promise<unknown>;
}
).resolveEntry.bind(svc);
await expect(resolveEntry(PEER_ID)).rejects.toMatchObject({
code: 'PEER_MISCONFIGURED',
});
});
});
// ─── FederationClientError class ──────────────────────────────────────────
describe('FederationClientError', () => {
it('is instanceof Error and FederationClientError', () => {
const err = new FederationClientError({
code: 'PEER_NOT_FOUND',
message: 'test',
peerId: PEER_ID,
});
expect(err).toBeInstanceOf(Error);
expect(err).toBeInstanceOf(FederationClientError);
expect(err.name).toBe('FederationClientError');
});
it('carries status, code, and peerId', () => {
const err = new FederationClientError({
status: 403,
code: 'FORBIDDEN',
message: 'forbidden',
peerId: PEER_ID,
});
expect(err.status).toBe(403);
expect(err.code).toBe('FORBIDDEN');
expect(err.peerId).toBe(PEER_ID);
});
});
});

View File

@@ -0,0 +1,500 @@
/**
* FederationClientService — outbound mTLS client for federation requests (FED-M3-08).
*
* Dials peer gateways over mTLS using the cert+sealed-key stored in `federation_peers`,
* invokes federation verbs (list / get / capabilities), and surfaces all failure modes
* as typed `FederationClientError` instances.
*
* ## Error code taxonomy
*
* | Code | When |
* | ------------------ | ------------------------------------------------------------- |
* | PEER_NOT_FOUND | No row in federation_peers for the given peerId |
* | PEER_INACTIVE | Peer row exists but state !== 'active' |
* | PEER_MISCONFIGURED | Peer row is active but missing endpointUrl or clientKeyPem |
* | NETWORK | undici threw a connection / TLS / timeout error |
* | HTTP_{status} | Peer returned a non-2xx response (e.g. HTTP_403, HTTP_404) |
* | FORBIDDEN | Peer returned 403 (convenience alias alongside HTTP_403) |
* | INVALID_RESPONSE | Response body failed Zod schema validation |
*
* ## Cache strategy
*
* Per-peer `undici.Agent` instances are cached in a `Map<peerId, AgentCacheEntry>` for
* the lifetime of the service instance. The cache is keyed on peerId (UUID).
*
* Cache invalidation:
* - `flushPeer(peerId)` — removes the entry immediately. M5/M6 MUST call this on
* cert rotation or peer revocation events so the next request re-reads the DB and
* builds a fresh TLS Agent with the new cert material.
* - On cache miss: re-reads the DB, checks state === 'active', rebuilds Agent.
*
* Cache does NOT auto-expire. The service is expected to be a singleton scoped to the
* NestJS application lifecycle; flushing on revocation/rotation is the only invalidation
* path by design (avoids redundant DB round-trips on the hot path).
*/
import { Injectable, Inject, Logger } from '@nestjs/common';
import { readFileSync } from 'node:fs';
import { Agent, fetch as undiciFetch } from 'undici';
import type { Dispatcher } from 'undici';
import { z } from 'zod';
import { type Db, eq, federationPeers } from '@mosaicstack/db';
import {
FederationListResponseSchema,
FederationGetResponseSchema,
FederationCapabilitiesResponseSchema,
FederationErrorEnvelopeSchema,
type FederationListResponse,
type FederationGetResponse,
type FederationCapabilitiesResponse,
} from '@mosaicstack/types';
import { DB } from '../../database/database.module.js';
import { unsealClientKey } from '../peer-key.util.js';
// ---------------------------------------------------------------------------
// Error taxonomy
// ---------------------------------------------------------------------------
/**
* Client-side error code set. Distinct from the server-side `FederationErrorCode`
* (which lives in `@mosaicstack/types`) because the client has additional failure
* modes (PEER_NOT_FOUND, PEER_INACTIVE, PEER_MISCONFIGURED, NETWORK) that the
* server never emits.
*/
export type FederationClientErrorCode =
| 'PEER_NOT_FOUND'
| 'PEER_INACTIVE'
| 'PEER_MISCONFIGURED'
| 'NETWORK'
| 'FORBIDDEN'
| 'INVALID_RESPONSE'
| `HTTP_${number}`;
export interface FederationClientErrorOptions {
status?: number;
code: FederationClientErrorCode;
message: string;
peerId: string;
cause?: unknown;
}
/**
* Thrown by FederationClientService on every failure path.
* Callers can dispatch on `error.code` for programmatic handling.
*/
export class FederationClientError extends Error {
readonly status?: number;
readonly code: FederationClientErrorCode;
readonly peerId: string;
readonly cause?: unknown;
constructor(opts: FederationClientErrorOptions) {
super(opts.message);
this.name = 'FederationClientError';
this.status = opts.status;
this.code = opts.code;
this.peerId = opts.peerId;
this.cause = opts.cause;
}
}
// ---------------------------------------------------------------------------
// Internal cache types
// ---------------------------------------------------------------------------
interface AgentCacheEntry {
agent: Agent;
endpointUrl: string;
certPem: string;
certSerial: string;
}
// ---------------------------------------------------------------------------
// Service
// ---------------------------------------------------------------------------
@Injectable()
export class FederationClientService {
private readonly logger = new Logger(FederationClientService.name);
/**
* Per-peer undici Agent cache.
* Key = peerId (UUID string).
*
* Values are either a resolved `AgentCacheEntry` or an in-flight
* `Promise<AgentCacheEntry>` (promise-cache pattern). Storing the promise
* prevents duplicate DB lookups and duplicate key-unseal operations when two
* requests for the same peer arrive before the first build completes.
*
* Flush via `flushPeer(peerId)` on cert rotation / peer revocation (M5/M6).
*/
private readonly cache = new Map<string, AgentCacheEntry | Promise<AgentCacheEntry>>();
/**
* Step-CA root cert PEM, loaded once from `STEP_CA_ROOT_CERT_PATH`.
* Used as the trust anchor for peer server certificates so federation TLS is
* pinned to our PKI, not the public trust store. Lazily loaded on first use
* so unit tests that don't exercise the agent path can run without the env var.
*/
private cachedCaPem: string | null = null;
constructor(@Inject(DB) private readonly db: Db) {}
// -------------------------------------------------------------------------
// Public verb API
// -------------------------------------------------------------------------
/**
* Invoke the `list` verb on a remote peer.
*
* @param peerId UUID of the peer row in `federation_peers`.
* @param resource Resource path, e.g. "tasks".
* @param request Free-form body sent as JSON in the POST body.
* @returns Parsed `FederationListResponse<T>`.
*/
async list<T>(
peerId: string,
resource: string,
request: Record<string, unknown>,
): Promise<FederationListResponse<T>> {
const { endpointUrl, agent } = await this.resolveEntry(peerId);
const url = `${endpointUrl}/api/federation/v1/list/${encodeURIComponent(resource)}`;
const body = await this.doPost(peerId, url, agent, request);
return this.parseWith<FederationListResponse<T>>(
peerId,
body,
FederationListResponseSchema(z.unknown()),
);
}
/**
* Invoke the `get` verb on a remote peer.
*
* @param peerId UUID of the peer row in `federation_peers`.
* @param resource Resource path, e.g. "tasks".
* @param id Resource identifier.
* @param request Free-form body sent as JSON in the POST body.
* @returns Parsed `FederationGetResponse<T>`.
*/
async get<T>(
peerId: string,
resource: string,
id: string,
request: Record<string, unknown>,
): Promise<FederationGetResponse<T>> {
const { endpointUrl, agent } = await this.resolveEntry(peerId);
const url = `${endpointUrl}/api/federation/v1/get/${encodeURIComponent(resource)}/${encodeURIComponent(id)}`;
const body = await this.doPost(peerId, url, agent, request);
return this.parseWith<FederationGetResponse<T>>(
peerId,
body,
FederationGetResponseSchema(z.unknown()),
);
}
/**
* Invoke the `capabilities` verb on a remote peer.
*
* @param peerId UUID of the peer row in `federation_peers`.
* @returns Parsed `FederationCapabilitiesResponse`.
*/
async capabilities(peerId: string): Promise<FederationCapabilitiesResponse> {
const { endpointUrl, agent } = await this.resolveEntry(peerId);
const url = `${endpointUrl}/api/federation/v1/capabilities`;
const body = await this.doGet(peerId, url, agent);
return this.parseWith<FederationCapabilitiesResponse>(
peerId,
body,
FederationCapabilitiesResponseSchema,
);
}
// -------------------------------------------------------------------------
// Cache management
// -------------------------------------------------------------------------
/**
* Flush the cached Agent for a specific peer.
*
* M5/M6 MUST call this on:
* - cert rotation events (so new cert material is picked up)
* - peer revocation events (so future requests fail at PEER_INACTIVE)
*
* After flushing, the next call to `list`, `get`, or `capabilities` for
* this peer will re-read the DB and rebuild the Agent.
*/
flushPeer(peerId: string): void {
const entry = this.cache.get(peerId);
if (entry === undefined) {
return;
}
this.cache.delete(peerId);
if (!(entry instanceof Promise)) {
// best-effort destroy; promise-cached entries skip destroy because
// the in-flight build owns its own Agent which will be GC'd when the
// owning request handles the rejection from the cache miss
entry.agent.destroy().catch(() => {
// intentionally ignored — destroy errors are not actionable
});
}
this.logger.log(`Cache flushed for peer ${peerId}`);
}
// -------------------------------------------------------------------------
// Internal helpers
// -------------------------------------------------------------------------
/**
* Load and cache the Step-CA root cert PEM from `STEP_CA_ROOT_CERT_PATH`.
* Throws `FederationClientError` if the env var is unset or the file cannot
* be read — mTLS to a peer without a pinned trust anchor would silently
* fall back to the public trust store.
*/
private loadStepCaRoot(): string {
if (this.cachedCaPem !== null) {
return this.cachedCaPem;
}
const path = process.env['STEP_CA_ROOT_CERT_PATH'];
if (!path) {
throw new FederationClientError({
code: 'PEER_MISCONFIGURED',
message: 'STEP_CA_ROOT_CERT_PATH is not set; refusing to dial peer without pinned CA trust',
peerId: '',
});
}
try {
const pem = readFileSync(path, 'utf8');
this.cachedCaPem = pem;
return pem;
} catch (err) {
throw new FederationClientError({
code: 'PEER_MISCONFIGURED',
message: `Failed to read STEP_CA_ROOT_CERT_PATH (${path})`,
peerId: '',
cause: err,
});
}
}
/**
* Resolve the cache entry for a peer, reading DB on miss.
*
* Uses a promise-cache pattern: concurrent callers for the same uncached
* `peerId` all `await` the same in-flight `Promise<AgentCacheEntry>` so
* only one DB lookup and one key-unseal ever runs per peer per cache miss.
* The promise is replaced with the concrete entry on success, or deleted on
* rejection so a transient error does not poison the cache permanently.
*
* Throws `FederationClientError` with appropriate code if the peer is not
* found, is inactive, or is missing required fields.
*/
private async resolveEntry(peerId: string): Promise<AgentCacheEntry> {
const cached = this.cache.get(peerId);
if (cached) {
return cached; // Promise or concrete entry — both are awaitable
}
const inflight = this.buildEntry(peerId).then(
(entry) => {
this.cache.set(peerId, entry); // replace promise with concrete value
return entry;
},
(err: unknown) => {
this.cache.delete(peerId); // don't poison the cache with a rejected promise
throw err;
},
);
this.cache.set(peerId, inflight);
return inflight;
}
/**
* Build the `AgentCacheEntry` for a peer by reading the DB, validating the
* peer's state, unsealing the private key, and constructing the mTLS Agent.
*
* Throws `FederationClientError` with appropriate code if the peer is not
* found, is inactive, or is missing required fields.
*/
private async buildEntry(peerId: string): Promise<AgentCacheEntry> {
// DB lookup
const [peer] = await this.db
.select()
.from(federationPeers)
.where(eq(federationPeers.id, peerId))
.limit(1);
if (!peer) {
throw new FederationClientError({
code: 'PEER_NOT_FOUND',
message: `Federation peer ${peerId} not found`,
peerId,
});
}
if (peer.state !== 'active') {
throw new FederationClientError({
code: 'PEER_INACTIVE',
message: `Federation peer ${peerId} is not active (state: ${peer.state})`,
peerId,
});
}
if (!peer.endpointUrl || !peer.clientKeyPem) {
throw new FederationClientError({
code: 'PEER_MISCONFIGURED',
message: `Federation peer ${peerId} is missing endpointUrl or clientKeyPem`,
peerId,
});
}
// Unseal the private key
let privateKeyPem: string;
try {
privateKeyPem = unsealClientKey(peer.clientKeyPem);
} catch (err) {
throw new FederationClientError({
code: 'PEER_MISCONFIGURED',
message: `Failed to unseal client key for peer ${peerId}`,
peerId,
cause: err,
});
}
// Build mTLS agent — pin trust to Step-CA root so we never accept
// a peer cert signed by a public CA (defense against MITM with a
// publicly-trusted DV cert for the peer's hostname).
const agent = new Agent({
connect: {
cert: peer.certPem,
key: privateKeyPem,
ca: this.loadStepCaRoot(),
// rejectUnauthorized: true is the undici default for HTTPS
},
});
const entry: AgentCacheEntry = {
agent,
endpointUrl: peer.endpointUrl,
certPem: peer.certPem,
certSerial: peer.certSerial,
};
this.logger.log(`Agent cached for peer ${peerId} (serial: ${peer.certSerial})`);
return entry;
}
/**
* Execute a POST request with a JSON body.
* Returns the parsed response body as an unknown value.
* Throws `FederationClientError` on network errors and non-2xx responses.
*/
private async doPost(
peerId: string,
url: string,
agent: Dispatcher,
body: Record<string, unknown>,
): Promise<unknown> {
return this.doRequest(peerId, url, agent, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(body),
});
}
/**
* Execute a GET request.
* Returns the parsed response body as an unknown value.
* Throws `FederationClientError` on network errors and non-2xx responses.
*/
private async doGet(peerId: string, url: string, agent: Dispatcher): Promise<unknown> {
return this.doRequest(peerId, url, agent, { method: 'GET' });
}
private async doRequest(
peerId: string,
url: string,
agent: Dispatcher,
init: { method: string; headers?: Record<string, string>; body?: string },
): Promise<unknown> {
let response: Awaited<ReturnType<typeof undiciFetch>>;
try {
response = await undiciFetch(url, {
...init,
dispatcher: agent,
});
} catch (err) {
throw new FederationClientError({
code: 'NETWORK',
message: `Network error calling peer ${peerId} at ${url}: ${err instanceof Error ? err.message : String(err)}`,
peerId,
cause: err,
});
}
const rawBody = await response.text().catch(() => '');
if (!response.ok) {
const status = response.status;
// Attempt to parse as federation error envelope
let serverMessage = `HTTP ${status}`;
try {
const json: unknown = JSON.parse(rawBody);
const result = FederationErrorEnvelopeSchema.safeParse(json);
if (result.success) {
serverMessage = result.data.error.message;
}
} catch {
// Not valid JSON or not a federation envelope — use generic message
}
// Specific code for 403 (most actionable for callers); generic HTTP_{n} for others
const code: FederationClientErrorCode = status === 403 ? 'FORBIDDEN' : `HTTP_${status}`;
throw new FederationClientError({
status,
code,
message: `Peer ${peerId} returned ${status}: ${serverMessage}`,
peerId,
});
}
try {
return JSON.parse(rawBody) as unknown;
} catch (err) {
throw new FederationClientError({
code: 'INVALID_RESPONSE',
message: `Peer ${peerId} returned non-JSON body`,
peerId,
cause: err,
});
}
}
/**
* Parse and validate a response body against a Zod schema.
*
* For list/get, callers pass the result of `FederationListResponseSchema(z.unknown())`
* so that the envelope structure is validated without requiring a concrete item schema
* at the client level. The generic `T` provides compile-time typing.
*
* Throws `FederationClientError({ code: 'INVALID_RESPONSE' })` on parse failure.
*/
private parseWith<T>(peerId: string, body: unknown, schema: z.ZodTypeAny): T {
const result = schema.safeParse(body);
if (!result.success) {
const issues = result.error.issues
.map((e: z.ZodIssue) => `[${e.path.join('.') || 'root'}] ${e.message}`)
.join('; ');
throw new FederationClientError({
code: 'INVALID_RESPONSE',
message: `Peer ${peerId} returned invalid response shape: ${issues}`,
peerId,
});
}
return result.data as T;
}
}

View File

@@ -0,0 +1,13 @@
/**
* Federation client barrel — re-exports for FederationModule consumers.
*
* M3-09 (QuerySourceService) and future milestones should import from here,
* not directly from the implementation file.
*/
export {
FederationClientService,
FederationClientError,
type FederationClientErrorCode,
type FederationClientErrorOptions,
} from './federation-client.service.js';

View File

@@ -5,10 +5,11 @@ import { EnrollmentController } from './enrollment.controller.js';
import { EnrollmentService } from './enrollment.service.js';
import { FederationController } from './federation.controller.js';
import { GrantsService } from './grants.service.js';
import { FederationClientService } from './client/index.js';
@Module({
controllers: [EnrollmentController, FederationController],
providers: [AdminGuard, CaService, EnrollmentService, GrantsService],
exports: [CaService, EnrollmentService, GrantsService],
providers: [AdminGuard, CaService, EnrollmentService, GrantsService, FederationClientService],
exports: [CaService, EnrollmentService, GrantsService, FederationClientService],
})
export class FederationModule {}

View File

@@ -30,6 +30,7 @@ export default tseslint.config(
'apps/gateway/vitest.config.ts',
'packages/storage/vitest.config.ts',
'packages/mosaic/__tests__/*.ts',
'tools/federation-harness/*.ts',
],
},
},

View File

@@ -26,7 +26,8 @@
},
"dependencies": {
"class-transformer": "^0.5.1",
"class-validator": "^0.15.1"
"class-validator": "^0.15.1",
"zod": "^4.3.6"
},
"publishConfig": {
"registry": "https://git.mosaicstack.dev/api/packages/mosaicstack/npm/",

View File

@@ -0,0 +1,435 @@
/**
* Unit tests for federation wire-format DTOs.
*
* Coverage:
* - FederationRequestSchema (valid + invalid)
* - FederationListResponseSchema factory
* - FederationGetResponseSchema factory
* - FederationCapabilitiesResponseSchema
* - FederationErrorEnvelopeSchema + error code exhaustiveness
* - FederationError exception hierarchy
* - tagWithSource helper round-trip
* - SourceTagSchema
*/
import { describe, expect, it } from 'vitest';
import { z } from 'zod';
import {
FEDERATION_ERROR_CODES,
FEDERATION_VERBS,
FederationCapabilitiesResponseSchema,
FederationError,
FederationErrorEnvelopeSchema,
FederationForbiddenError,
FederationInternalError,
FederationInvalidRequestError,
FederationNotFoundError,
FederationRateLimitedError,
FederationRequestSchema,
FederationScopeViolationError,
FederationUnauthorizedError,
FederationGetResponseSchema,
FederationListResponseSchema,
SOURCE_LOCAL,
SourceTagSchema,
parseFederationErrorEnvelope,
tagWithSource,
} from '../index.js';
// ---------------------------------------------------------------------------
// Verbs
// ---------------------------------------------------------------------------
describe('FEDERATION_VERBS', () => {
it('contains exactly list, get, capabilities', () => {
expect(FEDERATION_VERBS).toEqual(['list', 'get', 'capabilities']);
});
});
// ---------------------------------------------------------------------------
// FederationRequestSchema
// ---------------------------------------------------------------------------
describe('FederationRequestSchema', () => {
it('accepts a minimal valid list request', () => {
const result = FederationRequestSchema.safeParse({ verb: 'list', resource: 'tasks' });
expect(result.success).toBe(true);
});
it('accepts a get request with cursor and params', () => {
const result = FederationRequestSchema.safeParse({
verb: 'get',
resource: 'notes',
cursor: 'abc123',
params: { filter: 'mine' },
});
expect(result.success).toBe(true);
if (result.success) {
expect(result.data.cursor).toBe('abc123');
expect(result.data.params?.['filter']).toBe('mine');
}
});
it('accepts a capabilities request', () => {
const result = FederationRequestSchema.safeParse({ verb: 'capabilities', resource: 'tasks' });
expect(result.success).toBe(true);
});
it('rejects an unknown verb', () => {
const result = FederationRequestSchema.safeParse({ verb: 'search', resource: 'tasks' });
expect(result.success).toBe(false);
});
it('rejects an empty resource string', () => {
const result = FederationRequestSchema.safeParse({ verb: 'list', resource: '' });
expect(result.success).toBe(false);
});
it('rejects a missing verb', () => {
const result = FederationRequestSchema.safeParse({ resource: 'tasks' });
expect(result.success).toBe(false);
});
});
// ---------------------------------------------------------------------------
// FederationListResponseSchema factory
// ---------------------------------------------------------------------------
describe('FederationListResponseSchema', () => {
const ItemSchema = z.object({ id: z.string(), name: z.string() });
const ListSchema = FederationListResponseSchema(ItemSchema);
it('accepts a valid list envelope', () => {
const result = ListSchema.safeParse({
items: [{ id: '1', name: 'Task A' }],
nextCursor: 'page2',
_partial: false,
_truncated: false,
});
expect(result.success).toBe(true);
if (result.success) {
expect(result.data.items).toHaveLength(1);
expect(result.data.nextCursor).toBe('page2');
}
});
it('accepts a minimal envelope with empty items', () => {
const result = ListSchema.safeParse({ items: [] });
expect(result.success).toBe(true);
});
it('rejects when items is missing', () => {
const result = ListSchema.safeParse({ nextCursor: 'x' });
expect(result.success).toBe(false);
});
it('rejects when an item fails validation', () => {
const result = ListSchema.safeParse({ items: [{ id: 1, name: 'bad' }] });
expect(result.success).toBe(false);
});
});
// ---------------------------------------------------------------------------
// FederationGetResponseSchema factory
// ---------------------------------------------------------------------------
describe('FederationGetResponseSchema', () => {
const ItemSchema = z.object({ id: z.string() });
const GetSchema = FederationGetResponseSchema(ItemSchema);
it('accepts a found item', () => {
const result = GetSchema.safeParse({ item: { id: 'abc' } });
expect(result.success).toBe(true);
if (result.success) {
expect(result.data.item).toEqual({ id: 'abc' });
}
});
it('accepts null item (not found)', () => {
const result = GetSchema.safeParse({ item: null });
expect(result.success).toBe(true);
if (result.success) {
expect(result.data.item).toBeNull();
}
});
it('rejects when item is missing', () => {
const result = GetSchema.safeParse({});
expect(result.success).toBe(false);
});
});
// ---------------------------------------------------------------------------
// FederationCapabilitiesResponseSchema
// ---------------------------------------------------------------------------
describe('FederationCapabilitiesResponseSchema', () => {
it('accepts a valid capabilities response', () => {
const result = FederationCapabilitiesResponseSchema.safeParse({
resources: ['tasks', 'notes'],
excluded_resources: ['credentials'],
max_rows_per_query: 500,
supported_verbs: ['list', 'get', 'capabilities'],
});
expect(result.success).toBe(true);
if (result.success) {
expect(result.data.max_rows_per_query).toBe(500);
}
});
it('accepts a response with filters field', () => {
const result = FederationCapabilitiesResponseSchema.safeParse({
resources: ['tasks', 'notes'],
excluded_resources: [],
max_rows_per_query: 100,
supported_verbs: ['list'],
filters: {
tasks: { include_teams: ['team-a'], include_personal: true },
notes: { include_personal: false },
},
});
expect(result.success).toBe(true);
if (result.success) {
expect(result.data.filters?.['tasks']?.include_teams).toEqual(['team-a']);
}
});
it('accepts a response with partial filters (only include_teams)', () => {
const result = FederationCapabilitiesResponseSchema.safeParse({
resources: ['tasks'],
excluded_resources: [],
max_rows_per_query: 50,
supported_verbs: ['list'],
filters: { tasks: { include_teams: ['eng'] } },
});
expect(result.success).toBe(true);
});
it('accepts a response with rate_limit (M4 full shape)', () => {
const result = FederationCapabilitiesResponseSchema.safeParse({
resources: ['tasks'],
excluded_resources: [],
max_rows_per_query: 100,
supported_verbs: ['list'],
rate_limit: { limit_per_minute: 60, remaining: 55, reset_at: '2026-04-23T12:00:00Z' },
});
expect(result.success).toBe(true);
if (result.success) {
expect(result.data.rate_limit?.limit_per_minute).toBe(60);
expect(result.data.rate_limit?.remaining).toBe(55);
}
});
it('accepts a response with rate_limit (M3 minimal — limit_per_minute only)', () => {
const result = FederationCapabilitiesResponseSchema.safeParse({
resources: ['tasks'],
excluded_resources: [],
max_rows_per_query: 100,
supported_verbs: ['list'],
rate_limit: { limit_per_minute: 120 },
});
expect(result.success).toBe(true);
});
it('accepts a response without rate_limit (field is optional)', () => {
const result = FederationCapabilitiesResponseSchema.safeParse({
resources: ['tasks'],
excluded_resources: [],
max_rows_per_query: 100,
supported_verbs: ['list'],
});
expect(result.success).toBe(true);
if (result.success) {
expect(result.data.rate_limit).toBeUndefined();
}
});
it('rejects rate_limit with non-positive limit_per_minute', () => {
const result = FederationCapabilitiesResponseSchema.safeParse({
resources: ['tasks'],
excluded_resources: [],
max_rows_per_query: 100,
supported_verbs: ['list'],
rate_limit: { limit_per_minute: 0 },
});
expect(result.success).toBe(false);
});
it('rejects rate_limit with invalid reset_at datetime', () => {
const result = FederationCapabilitiesResponseSchema.safeParse({
resources: ['tasks'],
excluded_resources: [],
max_rows_per_query: 100,
supported_verbs: ['list'],
rate_limit: { limit_per_minute: 60, reset_at: 'not-a-datetime' },
});
expect(result.success).toBe(false);
});
it('rejects supported_verbs with an invalid verb (MED-3 enum guard)', () => {
const result = FederationCapabilitiesResponseSchema.safeParse({
resources: ['tasks'],
excluded_resources: [],
max_rows_per_query: 100,
supported_verbs: ['invalid_verb'],
});
expect(result.success).toBe(false);
});
it('rejects empty resources array', () => {
const result = FederationCapabilitiesResponseSchema.safeParse({
resources: [],
excluded_resources: [],
max_rows_per_query: 100,
supported_verbs: ['list'],
});
expect(result.success).toBe(false);
});
it('rejects non-integer max_rows_per_query', () => {
const result = FederationCapabilitiesResponseSchema.safeParse({
resources: ['tasks'],
excluded_resources: [],
max_rows_per_query: 1.5,
supported_verbs: ['list'],
});
expect(result.success).toBe(false);
});
});
// ---------------------------------------------------------------------------
// FederationErrorEnvelopeSchema + error code exhaustiveness
// ---------------------------------------------------------------------------
describe('FederationErrorEnvelopeSchema', () => {
it('accepts each valid error code', () => {
for (const code of FEDERATION_ERROR_CODES) {
const result = FederationErrorEnvelopeSchema.safeParse({
error: { code, message: 'test' },
});
expect(result.success, `code ${code} should be valid`).toBe(true);
}
});
it('rejects an unknown error code', () => {
const result = FederationErrorEnvelopeSchema.safeParse({
error: { code: 'unknown_code', message: 'test' },
});
expect(result.success).toBe(false);
});
it('accepts optional details field', () => {
const result = FederationErrorEnvelopeSchema.safeParse({
error: { code: 'forbidden', message: 'nope', details: { grantId: 'xyz' } },
});
expect(result.success).toBe(true);
});
it('rejects when message is missing', () => {
const result = FederationErrorEnvelopeSchema.safeParse({ error: { code: 'not_found' } });
expect(result.success).toBe(false);
});
});
describe('parseFederationErrorEnvelope', () => {
it('returns a typed envelope for valid input', () => {
const env = parseFederationErrorEnvelope({ error: { code: 'not_found', message: 'gone' } });
expect(env.error.code).toBe('not_found');
});
it('throws for invalid input', () => {
expect(() => parseFederationErrorEnvelope({ bad: 'shape' })).toThrow();
});
});
// ---------------------------------------------------------------------------
// FederationError exception hierarchy
// ---------------------------------------------------------------------------
describe('FederationError hierarchy', () => {
const cases: Array<[string, FederationError]> = [
['unauthorized', new FederationUnauthorizedError()],
['forbidden', new FederationForbiddenError()],
['not_found', new FederationNotFoundError()],
['rate_limited', new FederationRateLimitedError()],
['scope_violation', new FederationScopeViolationError()],
['invalid_request', new FederationInvalidRequestError()],
['internal_error', new FederationInternalError()],
];
it.each(cases)('code %s is an instance of FederationError', (_code, err) => {
expect(err).toBeInstanceOf(FederationError);
expect(err).toBeInstanceOf(Error);
});
it.each(cases)('code %s has correct code property', (code, err) => {
expect(err.code).toBe(code);
});
it('toEnvelope serialises to wire format', () => {
const err = new FederationForbiddenError('Access denied', { grantId: 'g1' });
const env = err.toEnvelope();
expect(env.error.code).toBe('forbidden');
expect(env.error.message).toBe('Access denied');
expect(env.error.details).toEqual({ grantId: 'g1' });
});
it('toEnvelope omits details when not provided', () => {
const err = new FederationNotFoundError();
const env = err.toEnvelope();
expect(Object.prototype.hasOwnProperty.call(env.error, 'details')).toBe(false);
});
it('error codes tuple covers all subclasses (exhaustiveness check)', () => {
// If a new subclass is added without a code, this test fails at compile time.
const allCodes = new Set(FEDERATION_ERROR_CODES);
for (const [code] of cases) {
expect(allCodes.has(code as (typeof FEDERATION_ERROR_CODES)[number])).toBe(true);
}
// All codes are covered by at least one case
expect(cases).toHaveLength(FEDERATION_ERROR_CODES.length);
});
});
// ---------------------------------------------------------------------------
// Source tag + tagWithSource
// ---------------------------------------------------------------------------
describe('SourceTagSchema', () => {
it('accepts a non-empty _source string', () => {
expect(SourceTagSchema.safeParse({ _source: 'local' }).success).toBe(true);
expect(SourceTagSchema.safeParse({ _source: 'mosaic.uscllc.com' }).success).toBe(true);
});
it('rejects empty _source string', () => {
expect(SourceTagSchema.safeParse({ _source: '' }).success).toBe(false);
});
});
describe('tagWithSource', () => {
it('stamps each item with the given source', () => {
const items = [{ id: '1' }, { id: '2' }];
const tagged = tagWithSource(items, SOURCE_LOCAL);
expect(tagged).toEqual([
{ id: '1', _source: 'local' },
{ id: '2', _source: 'local' },
]);
});
it('preserves original item fields', () => {
const items = [{ id: 'x', name: 'Task', done: false }];
const tagged = tagWithSource(items, 'mosaic.uscllc.com');
expect(tagged[0]).toMatchObject({ id: 'x', name: 'Task', done: false });
expect(tagged[0]?._source).toBe('mosaic.uscllc.com');
});
it('returns empty array for empty input', () => {
expect(tagWithSource([], 'local')).toEqual([]);
});
it('round-trip: tagWithSource output passes SourceTagSchema', () => {
const tagged = tagWithSource([{ id: '1' }], 'local');
expect(SourceTagSchema.safeParse(tagged[0]).success).toBe(true);
});
});

View File

@@ -0,0 +1,164 @@
/**
* Federation wire-format error envelope and exception hierarchy.
*
* Source of truth: docs/federation/PRD.md §6, §8.
*
* DESIGN: Typed error classes rather than discriminated union values
* ──────────────────────────────────────────────────────────────────
* We expose:
* 1. `FEDERATION_ERROR_CODES` — closed string-enum tuple (exhaustiveness-checkable).
* 2. `FederationErrorCode` — union type inferred from the tuple.
* 3. `FederationErrorEnvelopeSchema` — Zod schema for the wire format.
* 4. `FederationError` — base Error subclass with a typed `code` property.
* One concrete subclass per code (e.g. `FederationUnauthorizedError`),
* which enables `instanceof` dispatch in handlers without a switch.
*
* Rationale: subclasses give gateway handlers and the client a clean dispatch
* point (catch + instanceof) without re-parsing or switch tables. All classes
* carry `code` so a generic logger can act on any FederationError uniformly.
*
* Pure — no NestJS, no DB, no Node-only APIs. Safe for browser/edge.
*/
import { z } from 'zod';
// ---------------------------------------------------------------------------
// Error code enum (closed)
// ---------------------------------------------------------------------------
export const FEDERATION_ERROR_CODES = [
'unauthorized',
'forbidden',
'not_found',
'rate_limited',
'scope_violation',
'invalid_request',
'internal_error',
] as const;
export type FederationErrorCode = (typeof FEDERATION_ERROR_CODES)[number];
// ---------------------------------------------------------------------------
// Wire-format schema
// ---------------------------------------------------------------------------
export const FederationErrorEnvelopeSchema = z.object({
error: z.object({
code: z.enum(FEDERATION_ERROR_CODES),
message: z.string(),
details: z.unknown().optional(),
}),
});
export type FederationErrorEnvelope = z.infer<typeof FederationErrorEnvelopeSchema>;
// ---------------------------------------------------------------------------
// Exception class hierarchy
// ---------------------------------------------------------------------------
/**
* Base class for all federation errors.
* Carries a typed `code` so handlers can act uniformly on any FederationError.
*/
export class FederationError extends Error {
readonly code: FederationErrorCode;
readonly details?: unknown;
constructor(code: FederationErrorCode, message: string, details?: unknown) {
super(message);
this.name = 'FederationError';
this.code = code;
this.details = details;
}
/** Serialise to the wire-format error envelope. */
toEnvelope(): FederationErrorEnvelope {
return {
error: {
code: this.code,
message: this.message,
...(this.details !== undefined ? { details: this.details } : {}),
},
};
}
}
/** Client cert is missing, invalid, or signed by an untrusted CA. */
export class FederationUnauthorizedError extends FederationError {
constructor(message = 'Unauthorized', details?: unknown) {
super('unauthorized', message, details);
this.name = 'FederationUnauthorizedError';
}
}
/** Grant is inactive, revoked, or the subject user lacks access to the resource. */
export class FederationForbiddenError extends FederationError {
constructor(message = 'Forbidden', details?: unknown) {
super('forbidden', message, details);
this.name = 'FederationForbiddenError';
}
}
/** Requested resource does not exist. */
export class FederationNotFoundError extends FederationError {
constructor(message = 'Not found', details?: unknown) {
super('not_found', message, details);
this.name = 'FederationNotFoundError';
}
}
/** Grant has exceeded its rate limit; Retry-After should accompany this. */
export class FederationRateLimitedError extends FederationError {
constructor(message = 'Rate limit exceeded', details?: unknown) {
super('rate_limited', message, details);
this.name = 'FederationRateLimitedError';
}
}
/**
* The request targets a resource or performs an action that the grant's
* scope explicitly disallows (distinct from generic 403 — scope_violation
* means the scope configuration itself blocked the request).
*/
export class FederationScopeViolationError extends FederationError {
constructor(message = 'Scope violation', details?: unknown) {
super('scope_violation', message, details);
this.name = 'FederationScopeViolationError';
}
}
/** Malformed request — missing fields, invalid cursor, unknown verb, etc. */
export class FederationInvalidRequestError extends FederationError {
constructor(message = 'Invalid request', details?: unknown) {
super('invalid_request', message, details);
this.name = 'FederationInvalidRequestError';
}
}
/** Unexpected server-side failure. */
export class FederationInternalError extends FederationError {
constructor(message = 'Internal error', details?: unknown) {
super('internal_error', message, details);
this.name = 'FederationInternalError';
}
}
// ---------------------------------------------------------------------------
// Typed parser
// ---------------------------------------------------------------------------
/**
* Parse an unknown value as a FederationErrorEnvelope.
* Throws a plain Error (not FederationError) when parsing fails — this means
* the payload wasn't even a valid error envelope.
*/
export function parseFederationErrorEnvelope(input: unknown): FederationErrorEnvelope {
const result = FederationErrorEnvelopeSchema.safeParse(input);
if (!result.success) {
const issues = result.error.issues
.map((e) => ` - [${e.path.join('.') || 'root'}] ${e.message}`)
.join('\n');
throw new Error(`Invalid federation error envelope:\n${issues}`);
}
return result.data;
}

View File

@@ -0,0 +1,16 @@
/**
* Federation wire-format DTOs — public barrel.
*
* Exports everything downstream M3 tasks need:
* verbs.ts — FEDERATION_VERBS constant + FederationVerb type
* request.ts — FederationRequestSchema + FederationRequest
* response.ts — list/get/capabilities schema factories + types
* source-tag.ts — SourceTagSchema, tagWithSource helper
* error.ts — error envelope schema + typed exception hierarchy
*/
export * from './verbs.js';
export * from './request.js';
export * from './response.js';
export * from './source-tag.js';
export * from './error.js';

View File

@@ -0,0 +1,47 @@
/**
* Federation wire-format request schema.
*
* Source of truth: docs/federation/PRD.md §9 (query model).
*
* Pure — no NestJS, no DB, no Node-only APIs. Safe for browser/edge.
*/
import { z } from 'zod';
import { FEDERATION_VERBS } from './verbs.js';
// ---------------------------------------------------------------------------
// Query params — free-form key/value pairs passed alongside the request
// ---------------------------------------------------------------------------
const QueryParamsSchema = z.record(z.string(), z.string()).optional();
// ---------------------------------------------------------------------------
// Top-level request schema
// ---------------------------------------------------------------------------
export const FederationRequestSchema = z.object({
/**
* Verb being invoked. One of the M3 federation verbs.
*/
verb: z.enum(FEDERATION_VERBS),
/**
* Resource path being queried, e.g. "tasks", "notes", "memory".
* Forward-slash-separated for sub-resources (e.g. "teams/abc/tasks").
*/
resource: z.string().min(1, { message: 'resource must not be empty' }),
/**
* Optional free-form query params (filters, sort, etc.).
* Values are always strings; consumers parse as needed.
*/
params: QueryParamsSchema,
/**
* Opaque pagination cursor returned by a previous list response.
* Absent on first page.
*/
cursor: z.string().optional(),
});
export type FederationRequest = z.infer<typeof FederationRequestSchema>;

View File

@@ -0,0 +1,162 @@
/**
* Federation wire-format response schemas.
*
* Source of truth: docs/federation/PRD.md §9 and MILESTONES.md §M3.
*
* DESIGN: Generic factory functions rather than z.lazy
* ─────────────────────────────────────────────────────
* Zod generic schemas cannot be expressed as a single re-usable `z.ZodType`
* value because TypeScript's type system erases the generic at the call site.
* The idiomatic Zod v4 pattern is factory functions that take an item schema
* and return a fully-typed schema.
*
* const MyListSchema = FederationListResponseSchema(z.string());
* type MyList = z.infer<typeof MyListSchema>;
* // => { items: string[]; nextCursor?: string; _partial?: boolean; _truncated?: boolean }
*
* Downstream consumers (M3-03..M3-07, M3-08, M3-09) should call these
* factories once per resource type and cache the result.
*
* Pure — no NestJS, no DB, no Node-only APIs. Safe for browser/edge.
*/
import { z } from 'zod';
import { FEDERATION_VERBS } from './verbs.js';
// ---------------------------------------------------------------------------
// Shared envelope flags
// ---------------------------------------------------------------------------
/**
* `_partial`: true when the response is a subset of available data (e.g. due
* to scope intersection reducing the result set).
*/
const PartialFlag = z.boolean().optional();
/**
* `_truncated`: true when the response was capped by max_rows_per_query and
* additional pages exist beyond the current cursor.
*/
const TruncatedFlag = z.boolean().optional();
// ---------------------------------------------------------------------------
// FederationListResponseSchema<T> factory
// ---------------------------------------------------------------------------
/**
* Returns a Zod schema for a paginated federation list envelope.
*
* @param itemSchema - Zod schema for a single item in the list.
*
* @example
* ```ts
* const TaskListSchema = FederationListResponseSchema(TaskSchema);
* type TaskList = z.infer<typeof TaskListSchema>;
* ```
*/
export function FederationListResponseSchema<T extends z.ZodTypeAny>(itemSchema: T) {
return z.object({
items: z.array(itemSchema),
nextCursor: z.string().optional(),
_partial: PartialFlag,
_truncated: TruncatedFlag,
});
}
export type FederationListResponse<T> = {
items: T[];
nextCursor?: string;
_partial?: boolean;
_truncated?: boolean;
};
// ---------------------------------------------------------------------------
// FederationGetResponseSchema<T> factory
// ---------------------------------------------------------------------------
/**
* Returns a Zod schema for a single-item federation get envelope.
*
* `item` is null when the resource was not found (404 equivalent on the wire).
*
* @param itemSchema - Zod schema for the item (nullable is applied internally).
*
* @example
* ```ts
* const TaskGetSchema = FederationGetResponseSchema(TaskSchema);
* type TaskGet = z.infer<typeof TaskGetSchema>;
* ```
*/
export function FederationGetResponseSchema<T extends z.ZodTypeAny>(itemSchema: T) {
return z.object({
item: itemSchema.nullable(),
_partial: PartialFlag,
});
}
export type FederationGetResponse<T> = {
item: T | null;
_partial?: boolean;
};
// ---------------------------------------------------------------------------
// FederationCapabilitiesResponseSchema (fixed shape)
// ---------------------------------------------------------------------------
/**
* Shape mirrors FederationScope (apps/gateway/src/federation/scope-schema.ts)
* but is kept separate to avoid coupling packages/types to the gateway module.
* The serving side populates this from the resolved grant scope at request time.
*/
export const FederationCapabilitiesResponseSchema = z.object({
/**
* Resources this grant is allowed to query.
*/
resources: z.array(z.string()).nonempty(),
/**
* Resources explicitly blocked for this grant even if they exist.
*/
excluded_resources: z.array(z.string()),
/**
* Per-resource filters (mirrors FederationScope.filters from PRD §8.1).
* Keys are resource names; values control team/personal visibility.
*/
filters: z
.record(
z.string(),
z.object({
include_teams: z.array(z.string()).optional(),
include_personal: z.boolean().optional(),
}),
)
.optional(),
/**
* Hard cap on rows returned per query for this grant.
*/
max_rows_per_query: z.number().int().positive(),
/**
* Verbs currently available. Will expand in M4+ (search).
* Closed enum — only values from FEDERATION_VERBS are accepted.
*/
supported_verbs: z.array(z.enum(FEDERATION_VERBS)).nonempty(),
/**
* Rate-limit state for this grant (PRD §9.1).
* M4 populates `remaining` and `reset_at`; M3 servers may return only
* `limit_per_minute` or omit the field entirely.
*/
rate_limit: z
.object({
limit_per_minute: z.number().int().positive(),
remaining: z.number().int().nonnegative().optional(),
reset_at: z.string().datetime().optional(),
})
.optional(),
});
export type FederationCapabilitiesResponse = z.infer<typeof FederationCapabilitiesResponseSchema>;

View File

@@ -0,0 +1,61 @@
/**
* _source tag for federation fan-out results.
*
* Source of truth: docs/federation/PRD.md §9.3 and MILESTONES.md §M3 acceptance test #8.
*
* When source: "all" is requested, the gateway fans out to local + all active
* federated peers, merges results, and tags each item with _source so the
* caller knows the provenance.
*
* Pure — no NestJS, no DB, no Node-only APIs. Safe for browser/edge.
*/
import { z } from 'zod';
// ---------------------------------------------------------------------------
// Source tag schema
// ---------------------------------------------------------------------------
/**
* `_source` is either:
* - `"local"` — the item came from this gateway's own storage.
* - a peer common name (e.g. `"mosaic.uscllc.com"`) — the item came from
* that federated peer.
*/
export const SourceTagSchema = z.object({
_source: z.string().min(1, { message: '_source must not be empty' }),
});
export type SourceTag = z.infer<typeof SourceTagSchema>;
/**
* Literal union for the well-known local source value.
* Peers are identified by hostname strings, so there is no closed enum.
*/
export const SOURCE_LOCAL = 'local' as const;
// ---------------------------------------------------------------------------
// Helper: tagWithSource
// ---------------------------------------------------------------------------
/**
* Stamps each item in `items` with `{ _source: source }`.
*
* The return type merges the item type with SourceTag so callers get full
* type-safety on both the original fields and `_source`.
*
* @param items - Array of items to tag.
* @param source - Either `"local"` or a peer hostname (common name from the
* client cert's CN or O field).
*
* @example
* ```ts
* const local = tagWithSource([{ id: '1', title: 'Task' }], 'local');
* // => [{ id: '1', title: 'Task', _source: 'local' }]
*
* const remote = tagWithSource(peerItems, 'mosaic.uscllc.com');
* ```
*/
export function tagWithSource<T extends object>(items: T[], source: string): Array<T & SourceTag> {
return items.map((item) => ({ ...item, _source: source }));
}

View File

@@ -0,0 +1,11 @@
/**
* Federation verb constants and types.
*
* Source of truth: docs/federation/PRD.md §9.1
*
* M3 ships list, get, capabilities. search lives in M4.
*/
export const FEDERATION_VERBS = ['list', 'get', 'capabilities'] as const;
export type FederationVerb = (typeof FEDERATION_VERBS)[number];

View File

@@ -5,3 +5,4 @@ export * from './agent/index.js';
export * from './provider/index.js';
export * from './routing/index.js';
export * from './commands/index.js';
export * from './federation/index.js';

16
pnpm-lock.yaml generated
View File

@@ -179,6 +179,9 @@ importers:
socket.io:
specifier: ^4.8.0
version: 4.8.3
undici:
specifier: ^7.24.6
version: 7.24.6
uuid:
specifier: ^11.0.0
version: 11.1.0
@@ -679,6 +682,9 @@ importers:
class-validator:
specifier: ^0.15.1
version: 0.15.1
zod:
specifier: ^4.3.6
version: 4.3.6
devDependencies:
typescript:
specifier: ^5.8.0
@@ -6990,10 +6996,6 @@ packages:
resolution: {integrity: sha512-gBLkYIlEnSp8pFbT64yFgGE6UIB9tAkhukC23PmMDCe5Nd+cRqKxSjw5y54MK2AZMgZfJWMaNE4nYUHgi1XEOw==}
engines: {node: '>=18.17'}
undici@7.24.3:
resolution: {integrity: sha512-eJdUmK/Wrx2d+mnWWmwwLRyA7OQCkLap60sk3dOK4ViZR7DKwwptwuIvFBg2HaiP9ESaEdhtpSymQPvytpmkCA==}
engines: {node: '>=20.18.1'}
undici@7.24.6:
resolution: {integrity: sha512-Xi4agocCbRzt0yYMZGMA6ApD7gvtUFaxm4ZmeacWI4cZxaF6C+8I8QfofC20NAePiB/IcvZmzkJ7XPa471AEtA==}
engines: {node: '>=20.18.1'}
@@ -8728,7 +8730,7 @@ snapshots:
openai: 6.26.0(ws@8.20.0)(zod@4.3.6)
partial-json: 0.1.7
proxy-agent: 6.5.0
undici: 7.24.3
undici: 7.24.6
zod-to-json-schema: 3.25.1(zod@4.3.6)
transitivePeerDependencies:
- '@modelcontextprotocol/sdk'
@@ -12587,7 +12589,7 @@ snapshots:
saxes: 6.0.0
symbol-tree: 3.2.4
tough-cookie: 6.0.1
undici: 7.24.3
undici: 7.24.6
w3c-xmlserializer: 5.0.0
webidl-conversions: 8.0.1
whatwg-mimetype: 5.0.0
@@ -14438,8 +14440,6 @@ snapshots:
undici@6.21.3: {}
undici@7.24.3: {}
undici@7.24.6: {}
unhomoglyph@1.0.6: {}

View File

@@ -0,0 +1,254 @@
# Federation Test Harness
Local two-gateway federation test infrastructure for Mosaic Stack M3+.
This harness boots two real gateway instances (`gateway-a`, `gateway-b`) on a
shared Docker bridge network, each backed by its own Postgres (pgvector) +
Valkey, sharing a single Step-CA. It is the test bed for all M3+ federation
E2E tests.
## Prerequisites
- Docker with Compose v2 (`docker compose version` ≥ 2.20)
- pnpm (for running via repo scripts)
- `infra/step-ca/dev-password` must exist (copy from `infra/step-ca/dev-password.example`)
## Network Topology
```
Host machine
├── localhost:14001 → gateway-a (Server A — home / requesting)
├── localhost:14002 → gateway-b (Server B — work / serving)
├── localhost:15432 → postgres-a
├── localhost:15433 → postgres-b
├── localhost:16379 → valkey-a
├── localhost:16380 → valkey-b
└── localhost:19000 → step-ca (shared CA)
Docker network: fed-test-net (bridge)
gateway-a ←──── mTLS ────→ gateway-b
↘ ↗
step-ca
```
Ports are chosen to avoid collision with the base dev stack (5433, 6380, 14242, 9000).
## Starting the Harness
```bash
# From repo root
docker compose -f tools/federation-harness/docker-compose.two-gateways.yml up -d
# Wait for all services to be healthy (~60-90s on first boot due to NestJS cold start)
docker compose -f tools/federation-harness/docker-compose.two-gateways.yml ps
```
## Seeding Test Data
The seed script provisions three grant scope variants (A, B, C) and walks the
full enrollment flow so Server A ends up with active peers pointing at Server B.
```bash
# Assumes stack is already running
pnpm tsx tools/federation-harness/seed.ts
# Or boot + seed in one step
pnpm tsx tools/federation-harness/seed.ts --boot
```
### Scope Variants
| Variant | Resources | Filters | Excluded | Purpose |
| ------- | ------------------ | ---------------------------------- | ----------- | ------------------------------- |
| A | tasks, notes | include_personal: true | (none) | Personal data federation |
| B | tasks | include_teams: ['T1'], no personal | (none) | Team-scoped, no personal |
| C | tasks, credentials | include_personal: true | credentials | Sanity: excluded wins over list |
## Using from Vitest
```ts
import {
bootHarness,
tearDownHarness,
serverA,
serverB,
seed,
} from '../../tools/federation-harness/harness.js';
import type { HarnessHandle } from '../../tools/federation-harness/harness.js';
let handle: HarnessHandle;
beforeAll(async () => {
handle = await bootHarness();
}, 180_000); // allow 3 min for Docker pull + NestJS cold start
afterAll(async () => {
await tearDownHarness(handle);
});
test('variant A: list tasks returns personal tasks', async () => {
// NOTE: Only 'all' is supported for now — per-variant narrowing is M3-11.
const seedResult = await seed(handle, 'all');
const a = serverA(handle);
const res = await fetch(`${a.baseUrl}/api/federation/tasks`, {
headers: { 'x-federation-grant': seedResult.grants.variantA.id },
});
expect(res.status).toBe(200);
});
```
> **Note:** `seed()` bootstraps a fresh admin user on each gateway via
> `POST /api/bootstrap/setup`. Both gateways must have zero users (pristine DB).
> If either gateway already has users, `seed()` throws with a clear error.
> Reset state with `docker compose down -v`.
The `bootHarness()` function is **idempotent**: if both gateways are already
healthy, it reuses the running stack and returns `ownedStack: false`. Tests
should not call `tearDownHarness` when `ownedStack` is false unless they
explicitly want to shut down a shared stack.
## Vitest Config (pnpm test:federation)
Add to `vitest.config.ts` at repo root (or a dedicated config):
```ts
// vitest.federation.config.ts
import { defineConfig } from 'vitest/config';
export default defineConfig({
test: {
include: ['**/*.federation.test.ts'],
testTimeout: 60_000,
hookTimeout: 180_000,
reporters: ['verbose'],
},
});
```
Then add to root `package.json`:
```json
"test:federation": "vitest run --config vitest.federation.config.ts"
```
## Nuking State
```bash
# Remove containers AND volumes (ephemeral state — CA keys, DBs, everything)
docker compose -f tools/federation-harness/docker-compose.two-gateways.yml down -v
```
On next `up`, Step-CA re-initialises from scratch and generates new CA keys.
## Step-CA Root Certificate
The CA root lives in the `fed-harness-step-ca` Docker volume at
`/home/step/certs/root_ca.crt`. To extract it to the host:
```bash
docker run --rm \
-v fed-harness-step-ca:/home/step \
alpine cat /home/step/certs/root_ca.crt > /tmp/fed-harness-root-ca.crt
```
## Troubleshooting
### Port conflicts
Default host ports: 14001, 14002, 15432, 15433, 16379, 16380, 19000.
Override via environment variables before `docker compose up`:
```bash
GATEWAY_A_HOST_PORT=14101 GATEWAY_B_HOST_PORT=14102 \
docker compose -f tools/federation-harness/docker-compose.two-gateways.yml up -d
```
### Image pull failures
The gateway image is digest-pinned to:
```
git.mosaicstack.dev/mosaicstack/stack/gateway@sha256:1069117740e00ccfeba357cae38c43f3729fe5ae702740ce474f6512414d7c02
```
(sha-9f1a081, post-#491 IMG-FIX)
If the registry is unreachable, Docker will use the locally cached image if
present. If no local image exists, the compose up will fail with a pull error.
In that case:
1. Ensure you can reach `git.mosaicstack.dev` (VPN, DNS, etc.).
2. Log in: `docker login git.mosaicstack.dev`
3. Pull manually: `docker pull git.mosaicstack.dev/mosaicstack/stack/gateway@sha256:1069117740e00ccfeba357cae38c43f3729fe5ae702740ce474f6512414d7c02`
### NestJS cold start
Gateway containers take 4060 seconds to become healthy on first boot (Node.js
module resolution + NestJS DI bootstrap). The `start_period: 60s` in the
compose healthcheck covers this. `bootHarness()` polls for up to 3 minutes.
### Step-CA startup
Step-CA initialises on first boot (generates CA keys). This takes ~5-10s.
The `start_period: 30s` in the healthcheck covers it. Both gateways wait for
Step-CA to be healthy before starting (`depends_on: step-ca: condition: service_healthy`).
### dev-password missing
The Step-CA container requires `infra/step-ca/dev-password` to be mounted.
Copy the example and set a local password:
```bash
cp infra/step-ca/dev-password.example infra/step-ca/dev-password
# Edit the file to set your preferred dev CA password
```
The file is `.gitignore`d — do not commit it.
## Image Digest Note
The gateway image is pinned to `sha256:1069117740e00ccfeba357cae38c43f3729fe5ae702740ce474f6512414d7c02`
(sha-9f1a081). This is the digest promoted by PR #491 (IMG-FIX). The `latest`
tag is forbidden per Mosaic image policy. When a new gateway build is promoted,
update the digest in `docker-compose.two-gateways.yml` and in this file.
## Known Limitations
### BETTER_AUTH_URL enrollment URL bug (upstream production code — not yet fixed)
`apps/gateway/src/federation/federation.controller.ts:145` constructs the
enrollment URL using `process.env['BETTER_AUTH_URL'] ?? 'http://localhost:14242'`.
This is an upstream bug: `BETTER_AUTH_URL` is the Better Auth origin (typically
the web app), not the gateway's own base URL. In non-harness deployments this
produces an enrollment URL pointing to the wrong host or port.
**How the harness handles this:**
1. **In-cluster calls (container-to-container):** The compose file sets
`BETTER_AUTH_URL: 'http://gateway-b:3000'` so the enrollment URL returned by
the gateway uses the Docker internal hostname. This lets other containers in the
`fed-test-net` network resolve and reach Server B's enrollment endpoint.
2. **Host-side URL rewrite (seed script):** The `seed.ts` script runs on the host
machine where `gateway-b` is not a resolvable hostname. Before calling
`fetch(enrollmentUrl, ...)`, the seed script rewrites the URL: it extracts only
the token path segment from `enrollmentUrl` and reassembles the URL using the
host-accessible `serverBUrl` (default: `http://localhost:14002`). This lets the
seed script redeem enrollment tokens from the host without being affected by the
in-cluster hostname in the returned URL.
**TODO:** Fix `federation.controller.ts` to derive the enrollment URL from its own
listening address (e.g. `GATEWAY_BASE_URL` env var or a dedicated
`FEDERATION_ENROLLMENT_BASE_URL` env var) rather than reusing `BETTER_AUTH_URL`.
Tracked as a follow-up to PR #505 — do not bundle with harness changes.
## Permanent Infrastructure
This harness is designed to outlive M3 and be reused by M4+ milestone tests.
It is not a throwaway scaffold — treat it as production test infrastructure:
- Keep it idempotent.
- Do not hardcode test assumptions in the harness layer (put them in tests).
- Update the seed script when new scope variants are needed.
- The README and harness should be kept in sync as the federation API evolves.

View File

@@ -0,0 +1,247 @@
# tools/federation-harness/docker-compose.two-gateways.yml
#
# Two-gateway federation test harness — local-only, no Portainer/Swarm needed.
#
# USAGE (manual):
# docker compose -f tools/federation-harness/docker-compose.two-gateways.yml up -d
# docker compose -f tools/federation-harness/docker-compose.two-gateways.yml down -v
#
# USAGE (from harness.ts):
# const handle = await bootHarness();
# ...
# await tearDownHarness(handle);
#
# TOPOLOGY:
# gateway-a — "home" instance (Server A, the requesting side)
# └── postgres-a (pgvector/pg17, port 15432)
# └── valkey-a (port 16379)
# gateway-b — "work" instance (Server B, the serving side)
# └── postgres-b (pgvector/pg17, port 15433)
# └── valkey-b (port 16380)
# step-ca — shared CA for both gateways (port 19000)
#
# All services share the `fed-test-net` bridge network.
# Host port ranges (15432-15433, 16379-16380, 14001-14002, 19000) are chosen
# to avoid collision with the base dev stack (5433, 6380, 14242, 9000).
#
# IMAGE:
# Pinned to the immutable digest sha256:1069117740e00ccfeba357cae38c43f3729fe5ae702740ce474f6512414d7c02
# (sha-9f1a081, post-#491 IMG-FIX, smoke-tested locally).
# Update this digest only after a new CI build is promoted to the registry.
#
# STEP-CA:
# Single shared Step-CA instance. Both gateways connect to it.
# CA volume is ephemeral per `docker compose down -v`; regenerated on next up.
# The harness seed script provisions the CA roots cross-trust after first boot.
services:
# ─── Shared Certificate Authority ────────────────────────────────────────────
step-ca:
image: smallstep/step-ca:0.27.4
container_name: fed-harness-step-ca
restart: unless-stopped
ports:
- '${STEP_CA_HOST_PORT:-19000}:9000'
volumes:
- step_ca_data:/home/step
- ../../infra/step-ca/init.sh:/usr/local/bin/mosaic-step-ca-init.sh:ro
- ../../infra/step-ca/templates:/etc/step-ca-templates:ro
- ../../infra/step-ca/dev-password:/run/secrets/ca_password:ro
entrypoint: ['/bin/sh', '/usr/local/bin/mosaic-step-ca-init.sh']
networks:
- fed-test-net
healthcheck:
test:
[
'CMD',
'step',
'ca',
'health',
'--ca-url',
'https://localhost:9000',
'--root',
'/home/step/certs/root_ca.crt',
]
interval: 10s
timeout: 5s
retries: 5
start_period: 30s
# ─── Server A — Home / Requesting Gateway ────────────────────────────────────
postgres-a:
image: pgvector/pgvector:pg17
container_name: fed-harness-postgres-a
restart: unless-stopped
ports:
- '${PG_A_HOST_PORT:-15432}:5432'
environment:
POSTGRES_USER: mosaic
POSTGRES_PASSWORD: mosaic
POSTGRES_DB: mosaic
volumes:
- pg_a_data:/var/lib/postgresql/data
- ../../infra/pg-init:/docker-entrypoint-initdb.d:ro
networks:
- fed-test-net
healthcheck:
test: ['CMD-SHELL', 'pg_isready -U mosaic']
interval: 5s
timeout: 3s
retries: 5
valkey-a:
image: valkey/valkey:8-alpine
container_name: fed-harness-valkey-a
restart: unless-stopped
ports:
- '${VALKEY_A_HOST_PORT:-16379}:6379'
volumes:
- valkey_a_data:/data
networks:
- fed-test-net
healthcheck:
test: ['CMD', 'valkey-cli', 'ping']
interval: 5s
timeout: 3s
retries: 5
gateway-a:
image: git.mosaicstack.dev/mosaicstack/stack/gateway@sha256:1069117740e00ccfeba357cae38c43f3729fe5ae702740ce474f6512414d7c02
# Tag for human reference: sha-9f1a081 (post-#491 IMG-FIX; smoke-tested locally)
container_name: fed-harness-gateway-a
restart: unless-stopped
ports:
- '${GATEWAY_A_HOST_PORT:-14001}:3000'
environment:
MOSAIC_TIER: federated
DATABASE_URL: postgres://mosaic:mosaic@postgres-a:5432/mosaic
VALKEY_URL: redis://valkey-a:6379
GATEWAY_PORT: '3000'
GATEWAY_CORS_ORIGIN: 'http://localhost:14001'
BETTER_AUTH_SECRET: harness-secret-server-a-do-not-use-in-prod
BETTER_AUTH_URL: 'http://gateway-a:3000'
STEP_CA_URL: 'https://step-ca:9000'
FEDERATION_PEER_HOSTNAME: gateway-a
# Bootstrap password for POST /api/bootstrap/setup — used by seed.ts to create
# the first admin user. Only valid on a pristine (zero-user) database.
# Not the same as ADMIN_API_KEY — there is no static API key in the gateway.
ADMIN_BOOTSTRAP_PASSWORD: harness-admin-password-a
depends_on:
postgres-a:
condition: service_healthy
valkey-a:
condition: service_healthy
step-ca:
condition: service_healthy
networks:
- fed-test-net
healthcheck:
test:
[
'CMD',
'node',
'-e',
"require('http').get('http://127.0.0.1:3000/api/health', r => process.exit(r.statusCode === 200 ? 0 : 1)).on('error', () => process.exit(1))",
]
interval: 10s
timeout: 5s
retries: 5
start_period: 60s
# ─── Server B — Work / Serving Gateway ──────────────────────────────────────
postgres-b:
image: pgvector/pgvector:pg17
container_name: fed-harness-postgres-b
restart: unless-stopped
ports:
- '${PG_B_HOST_PORT:-15433}:5432'
environment:
POSTGRES_USER: mosaic
POSTGRES_PASSWORD: mosaic
POSTGRES_DB: mosaic
volumes:
- pg_b_data:/var/lib/postgresql/data
- ../../infra/pg-init:/docker-entrypoint-initdb.d:ro
networks:
- fed-test-net
healthcheck:
test: ['CMD-SHELL', 'pg_isready -U mosaic']
interval: 5s
timeout: 3s
retries: 5
valkey-b:
image: valkey/valkey:8-alpine
container_name: fed-harness-valkey-b
restart: unless-stopped
ports:
- '${VALKEY_B_HOST_PORT:-16380}:6379'
volumes:
- valkey_b_data:/data
networks:
- fed-test-net
healthcheck:
test: ['CMD', 'valkey-cli', 'ping']
interval: 5s
timeout: 3s
retries: 5
gateway-b:
image: git.mosaicstack.dev/mosaicstack/stack/gateway@sha256:1069117740e00ccfeba357cae38c43f3729fe5ae702740ce474f6512414d7c02
# Tag for human reference: sha-9f1a081 (post-#491 IMG-FIX; smoke-tested locally)
container_name: fed-harness-gateway-b
restart: unless-stopped
ports:
- '${GATEWAY_B_HOST_PORT:-14002}:3000'
environment:
MOSAIC_TIER: federated
DATABASE_URL: postgres://mosaic:mosaic@postgres-b:5432/mosaic
VALKEY_URL: redis://valkey-b:6379
GATEWAY_PORT: '3000'
GATEWAY_CORS_ORIGIN: 'http://localhost:14002'
BETTER_AUTH_SECRET: harness-secret-server-b-do-not-use-in-prod
BETTER_AUTH_URL: 'http://gateway-b:3000'
STEP_CA_URL: 'https://step-ca:9000'
FEDERATION_PEER_HOSTNAME: gateway-b
# Bootstrap password for POST /api/bootstrap/setup — used by seed.ts to create
# the first admin user. Only valid on a pristine (zero-user) database.
# Not the same as ADMIN_API_KEY — there is no static API key in the gateway.
ADMIN_BOOTSTRAP_PASSWORD: harness-admin-password-b
depends_on:
postgres-b:
condition: service_healthy
valkey-b:
condition: service_healthy
step-ca:
condition: service_healthy
networks:
- fed-test-net
healthcheck:
test:
[
'CMD',
'node',
'-e',
"require('http').get('http://127.0.0.1:3000/api/health', r => process.exit(r.statusCode === 200 ? 0 : 1)).on('error', () => process.exit(1))",
]
interval: 10s
timeout: 5s
retries: 5
start_period: 60s
networks:
fed-test-net:
name: fed-test-net
driver: bridge
volumes:
step_ca_data:
name: fed-harness-step-ca
pg_a_data:
name: fed-harness-pg-a
valkey_a_data:
name: fed-harness-valkey-a
pg_b_data:
name: fed-harness-pg-b
valkey_b_data:
name: fed-harness-valkey-b

View File

@@ -0,0 +1,290 @@
/**
* tools/federation-harness/harness.ts
*
* Vitest-consumable helpers for the two-gateway federation harness.
*
* USAGE (in a vitest test file):
*
* import { bootHarness, tearDownHarness, serverA, serverB, seed } from
* '../../tools/federation-harness/harness.js';
*
* let handle: HarnessHandle;
*
* beforeAll(async () => {
* handle = await bootHarness();
* }, 180_000);
*
* afterAll(async () => {
* await tearDownHarness(handle);
* });
*
* test('variant A — list tasks', async () => {
* const seedResult = await seed(handle, 'all');
* const a = serverA(handle);
* const res = await fetch(`${a.baseUrl}/api/federation/list/tasks`, {
* headers: { Authorization: `Bearer ${seedResult.adminTokenA}` },
* });
* expect(res.status).toBe(200);
* });
*
* NOTE: The `seed()` helper currently only supports scenario='all'. Passing any
* other value throws immediately. Per-variant narrowing is deferred to M3-11.
*
* ESM / NodeNext: all imports use .js extensions.
*/
import { execSync, execFileSync } from 'node:child_process';
import { resolve, dirname } from 'node:path';
import { fileURLToPath } from 'node:url';
import { runSeed, type SeedResult } from './seed.js';
// ─── Types ───────────────────────────────────────────────────────────────────
export interface GatewayAccessor {
/** Base URL reachable from the host machine, e.g. http://localhost:14001 */
baseUrl: string;
/** Bootstrap password used for POST /api/bootstrap/setup on a pristine gateway */
bootstrapPassword: string;
/** Internal Docker network hostname (for container-to-container calls) */
internalHostname: string;
}
export interface HarnessHandle {
/** Server A accessor */
a: GatewayAccessor;
/** Server B accessor */
b: GatewayAccessor;
/** Absolute path to the docker-compose file */
composeFile: string;
/** Whether this instance booted the stack (vs. reusing an existing one) */
ownedStack: boolean;
/** Optional seed result if seed() was called */
seedResult?: SeedResult;
}
/**
* Scenario to seed. Currently only 'all' is implemented; per-variant narrowing
* is tracked as M3-11. Passing any other value throws immediately with a clear
* error rather than silently over-seeding.
*/
export type SeedScenario = 'variantA' | 'variantB' | 'variantC' | 'all';
// ─── Constants ────────────────────────────────────────────────────────────────
const __dirname = dirname(fileURLToPath(import.meta.url));
const COMPOSE_FILE = resolve(__dirname, 'docker-compose.two-gateways.yml');
const GATEWAY_A_URL = process.env['GATEWAY_A_URL'] ?? 'http://localhost:14001';
const GATEWAY_B_URL = process.env['GATEWAY_B_URL'] ?? 'http://localhost:14002';
const ADMIN_BOOTSTRAP_PASSWORD_A =
process.env['ADMIN_BOOTSTRAP_PASSWORD_A'] ?? 'harness-admin-password-a';
const ADMIN_BOOTSTRAP_PASSWORD_B =
process.env['ADMIN_BOOTSTRAP_PASSWORD_B'] ?? 'harness-admin-password-b';
const READINESS_TIMEOUT_MS = 180_000;
const READINESS_POLL_MS = 3_000;
// ─── Internal helpers ─────────────────────────────────────────────────────────
async function isGatewayHealthy(baseUrl: string): Promise<boolean> {
try {
const res = await fetch(`${baseUrl}/api/health`, { signal: AbortSignal.timeout(5_000) });
return res.ok;
} catch {
return false;
}
}
/**
* Poll both gateways in parallel until both are healthy or the shared deadline
* expires. Polling in parallel (rather than sequentially) avoids the bug where
* a slow gateway-a consumes all of the readiness budget before gateway-b is
* checked.
*/
async function waitForStack(handle: HarnessHandle): Promise<void> {
const gateways: Array<{ label: string; url: string }> = [
{ label: 'gateway-a', url: handle.a.baseUrl },
{ label: 'gateway-b', url: handle.b.baseUrl },
];
await Promise.all(
gateways.map(async (gw) => {
// Each gateway gets its own independent deadline.
const deadline = Date.now() + READINESS_TIMEOUT_MS;
process.stdout.write(`[harness] Waiting for ${gw.label}...`);
while (Date.now() < deadline) {
if (await isGatewayHealthy(gw.url)) {
process.stdout.write(` ready\n`);
return;
}
if (Date.now() + READINESS_POLL_MS > deadline) {
throw new Error(
`[harness] ${gw.label} did not become healthy within ${READINESS_TIMEOUT_MS.toString()}ms`,
);
}
await new Promise((r) => setTimeout(r, READINESS_POLL_MS));
process.stdout.write('.');
}
throw new Error(
`[harness] ${gw.label} did not become healthy within ${READINESS_TIMEOUT_MS.toString()}ms`,
);
}),
);
}
function isStackRunning(): boolean {
try {
const output = execFileSync(
'docker',
['compose', '-f', COMPOSE_FILE, 'ps', '--format', 'json'],
{ encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] },
);
if (!output.trim()) return false;
// Parse JSON lines — each running service emits a JSON object per line
const lines = output.trim().split('\n').filter(Boolean);
const runningServices = lines.filter((line) => {
try {
const obj = JSON.parse(line) as { State?: string };
return obj.State === 'running';
} catch {
return false;
}
});
// Expect at least gateway-a and gateway-b running
return runningServices.length >= 2;
} catch {
return false;
}
}
// ─── Public API ───────────────────────────────────────────────────────────────
/**
* Boot the harness stack.
*
* Idempotent: if the stack is already running and both gateways are healthy,
* this function reuses the existing stack and returns a handle with
* `ownedStack: false`. Callers that set `ownedStack: false` should NOT call
* `tearDownHarness` unless they explicitly want to tear down a pre-existing stack.
*
* If the stack is not running, it starts it with `docker compose up -d` and
* waits for both gateways to pass their /api/health probe.
*/
export async function bootHarness(): Promise<HarnessHandle> {
const handle: HarnessHandle = {
a: {
baseUrl: GATEWAY_A_URL,
bootstrapPassword: ADMIN_BOOTSTRAP_PASSWORD_A,
internalHostname: 'gateway-a',
},
b: {
baseUrl: GATEWAY_B_URL,
bootstrapPassword: ADMIN_BOOTSTRAP_PASSWORD_B,
internalHostname: 'gateway-b',
},
composeFile: COMPOSE_FILE,
ownedStack: false,
};
// Check if both gateways are already healthy
const [aHealthy, bHealthy] = await Promise.all([
isGatewayHealthy(handle.a.baseUrl),
isGatewayHealthy(handle.b.baseUrl),
]);
if (aHealthy && bHealthy) {
console.log('[harness] Stack already running — reusing existing stack.');
handle.ownedStack = false;
return handle;
}
console.log('[harness] Starting federation harness stack...');
execSync(`docker compose -f "${COMPOSE_FILE}" up -d`, { stdio: 'inherit' });
handle.ownedStack = true;
await waitForStack(handle);
console.log('[harness] Stack is ready.');
return handle;
}
/**
* Tear down the harness stack.
*
* Runs `docker compose down -v` to remove containers AND volumes (ephemeral state).
* Only tears down if `handle.ownedStack` is true unless `force` is set.
*/
export async function tearDownHarness(
handle: HarnessHandle,
opts?: { force?: boolean },
): Promise<void> {
if (!handle.ownedStack && !opts?.force) {
console.log(
'[harness] Stack not owned by this handle — skipping teardown (pass force: true to override).',
);
return;
}
console.log('[harness] Tearing down federation harness stack...');
execSync(`docker compose -f "${handle.composeFile}" down -v`, { stdio: 'inherit' });
console.log('[harness] Stack torn down.');
}
/**
* Return the Server A accessor from a harness handle.
* Convenience wrapper for test readability.
*/
export function serverA(handle: HarnessHandle): GatewayAccessor {
return handle.a;
}
/**
* Return the Server B accessor from a harness handle.
* Convenience wrapper for test readability.
*/
export function serverB(handle: HarnessHandle): GatewayAccessor {
return handle.b;
}
/**
* Seed the harness with test data for one or more scenarios.
*
* @param handle The harness handle returned by bootHarness().
* @param scenario Which scope variants to provision. Currently only 'all' is
* supported — passing any other value throws immediately with a
* clear error. Per-variant narrowing is tracked as M3-11.
*
* Returns a SeedResult with grant IDs, peer IDs, and admin tokens for each
* gateway, which test assertions can reference.
*
* IMPORTANT: The harness assumes a pristine database on both gateways. The seed
* bootstraps an admin user on each gateway via POST /api/bootstrap/setup. If
* either gateway already has users, seed() throws with a clear error message.
* Run 'docker compose down -v' to reset state.
*/
export async function seed(
handle: HarnessHandle,
scenario: SeedScenario = 'all',
): Promise<SeedResult> {
if (scenario !== 'all') {
throw new Error(
`seed: scenario narrowing not yet implemented; pass "all" for now. ` +
`Got: "${scenario}". Per-variant narrowing is tracked as M3-11.`,
);
}
const result = await runSeed({
serverAUrl: handle.a.baseUrl,
serverBUrl: handle.b.baseUrl,
adminBootstrapPasswordA: handle.a.bootstrapPassword,
adminBootstrapPasswordB: handle.b.bootstrapPassword,
});
handle.seedResult = result;
return result;
}

View File

@@ -0,0 +1,603 @@
#!/usr/bin/env tsx
/**
* tools/federation-harness/seed.ts
*
* Provisions test data for the two-gateway federation harness.
* Run via: tsx tools/federation-harness/seed.ts
*
* What this script does:
* 1. (Optional) Boots the compose stack if --boot flag is passed.
* 2. Waits for both gateways to be healthy.
* 3. Bootstraps an admin user + token on each gateway via POST /api/bootstrap/setup.
* 4. Creates three grants on Server B matching the M3 acceptance test scenarios:
* - Scope variant A: tasks + notes, include_personal: true
* - Scope variant B: tasks only, include_teams: ['T1'], exclude T2
* - Scope variant C: tasks + credentials in resources, credentials excluded (sanity)
* 5. For each grant, walks the full enrollment flow:
* a. Server B creates a peer keypair (represents the requesting side).
* b. Server B creates the grant referencing that peer.
* c. Server B issues an enrollment token.
* d. Server A creates its own peer keypair (represents its view of B).
* e. Server A redeems the enrollment token at Server B's enrollment endpoint,
* submitting A's CSR → receives signed cert back.
* f. Server A stores the cert on its peer record → peer becomes active.
* 6. Inserts representative test tasks/notes/credentials on Server B.
*
* IMPORTANT: This script uses the real admin REST API — no direct DB writes.
* It exercises the full enrollment flow as M3 acceptance tests will.
*
* ESM / NodeNext: all imports use .js extensions.
*/
import { execSync } from 'node:child_process';
import { resolve, dirname } from 'node:path';
import { fileURLToPath } from 'node:url';
// ─── Constants ───────────────────────────────────────────────────────────────
const __dirname = dirname(fileURLToPath(import.meta.url));
const COMPOSE_FILE = resolve(__dirname, 'docker-compose.two-gateways.yml');
/** Base URLs as seen from the host machine (mapped host ports). */
const SERVER_A_URL = process.env['GATEWAY_A_URL'] ?? 'http://localhost:14001';
const SERVER_B_URL = process.env['GATEWAY_B_URL'] ?? 'http://localhost:14002';
/**
* Bootstrap passwords used when calling POST /api/bootstrap/setup on each
* gateway. Each gateway starts with zero users and requires a one-time setup
* call before any admin-guarded endpoints can be used.
*/
const ADMIN_BOOTSTRAP_PASSWORD_A =
process.env['ADMIN_BOOTSTRAP_PASSWORD_A'] ?? 'harness-admin-password-a';
const ADMIN_BOOTSTRAP_PASSWORD_B =
process.env['ADMIN_BOOTSTRAP_PASSWORD_B'] ?? 'harness-admin-password-b';
const READINESS_TIMEOUT_MS = 120_000;
const READINESS_POLL_MS = 3_000;
// ─── Scope variant definitions (for M3 acceptance tests) ─────────────────────
/** Scope variant A — tasks + notes, personal data included. */
export const SCOPE_VARIANT_A = {
resources: ['tasks', 'notes'],
filters: {
tasks: { include_personal: true },
notes: { include_personal: true },
},
excluded_resources: [] as string[],
max_rows_per_query: 500,
};
/** Scope variant B — tasks only, team T1 only, no personal. */
export const SCOPE_VARIANT_B = {
resources: ['tasks'],
filters: {
tasks: { include_teams: ['T1'], include_personal: false },
},
excluded_resources: [] as string[],
max_rows_per_query: 500,
};
/**
* Scope variant C — tasks + credentials in resources list, but credentials
* explicitly in excluded_resources. Sanity test: credentials must still be
* inaccessible even though they appear in resources.
*/
export const SCOPE_VARIANT_C = {
resources: ['tasks', 'credentials'],
filters: {
tasks: { include_personal: true },
},
excluded_resources: ['credentials'],
max_rows_per_query: 500,
};
// ─── Inline types (no import from packages/types — M3-01 branch not yet merged) ─
interface AdminFetchOptions {
method?: string;
body?: unknown;
adminToken: string;
}
interface PeerRecord {
peerId: string;
csrPem: string;
}
interface GrantRecord {
id: string;
status: string;
scope: unknown;
}
interface EnrollmentTokenResult {
token: string;
expiresAt: string;
enrollmentUrl: string;
}
interface EnrollmentRedeemResult {
certPem: string;
certChainPem: string;
}
interface BootstrapResult {
adminUserId: string;
adminToken: string;
}
export interface SeedResult {
serverAUrl: string;
serverBUrl: string;
adminTokenA: string;
adminTokenB: string;
adminUserIdA: string;
adminUserIdB: string;
grants: {
variantA: GrantRecord;
variantB: GrantRecord;
variantC: GrantRecord;
};
peers: {
variantA: PeerRecord & { grantId: string };
variantB: PeerRecord & { grantId: string };
variantC: PeerRecord & { grantId: string };
};
}
// ─── HTTP helpers ─────────────────────────────────────────────────────────────
/**
* Authenticated admin fetch. Sends `Authorization: Bearer <adminToken>` which
* is the only path supported by AdminGuard (DB-backed sha256 token lookup).
* No `x-admin-key` header path exists in the gateway.
*/
async function adminFetch<T>(baseUrl: string, path: string, opts: AdminFetchOptions): Promise<T> {
const url = `${baseUrl}${path}`;
const res = await fetch(url, {
method: opts.method ?? 'GET',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${opts.adminToken}`,
},
body: opts.body !== undefined ? JSON.stringify(opts.body) : undefined,
});
if (!res.ok) {
const text = await res.text().catch(() => '(no body)');
throw new Error(`${opts.method ?? 'GET'} ${url}${res.status}: ${text}`);
}
return res.json() as Promise<T>;
}
// ─── Admin bootstrap ──────────────────────────────────────────────────────────
/**
* Bootstrap an admin user on a pristine gateway.
*
* Steps:
* 1. GET /api/bootstrap/status — confirms needsSetup === true.
* 2. POST /api/bootstrap/setup with { name, email, password } — returns
* { user, token: { plaintext } }.
*
* The harness assumes a fresh DB. If needsSetup is false the harness fails
* fast with a clear error rather than proceeding with an unknown token.
*/
async function bootstrapAdmin(
baseUrl: string,
label: string,
password: string,
): Promise<BootstrapResult> {
console.log(`[seed] Bootstrapping admin on ${label} (${baseUrl})...`);
// 1. Check status
const statusRes = await fetch(`${baseUrl}/api/bootstrap/status`);
if (!statusRes.ok) {
throw new Error(`[seed] GET ${baseUrl}/api/bootstrap/status → ${statusRes.status.toString()}`);
}
const status = (await statusRes.json()) as { needsSetup: boolean };
if (!status.needsSetup) {
throw new Error(
`[seed] ${label} at ${baseUrl} already has users (needsSetup=false). ` +
`The harness requires a pristine database. Run 'docker compose down -v' to reset.`,
);
}
// 2. Bootstrap
const setupRes = await fetch(`${baseUrl}/api/bootstrap/setup`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
name: `Harness Admin (${label})`,
email: `harness-admin-${label.toLowerCase().replace(/\s+/g, '-')}@example.invalid`,
password,
}),
});
if (!setupRes.ok) {
const body = await setupRes.text().catch(() => '(no body)');
throw new Error(
`[seed] POST ${baseUrl}/api/bootstrap/setup → ${setupRes.status.toString()}: ${body}`,
);
}
const result = (await setupRes.json()) as {
user: { id: string };
token: { plaintext: string };
};
console.log(`[seed] ${label} admin user: ${result.user.id}`);
console.log(`[seed] ${label} admin token: ${result.token.plaintext.slice(0, 8)}...`);
return {
adminUserId: result.user.id,
adminToken: result.token.plaintext,
};
}
// ─── Readiness probe ──────────────────────────────────────────────────────────
async function waitForGateway(baseUrl: string, label: string): Promise<void> {
const deadline = Date.now() + READINESS_TIMEOUT_MS;
let lastError: string = '';
while (Date.now() < deadline) {
try {
const res = await fetch(`${baseUrl}/api/health`, { signal: AbortSignal.timeout(5_000) });
if (res.ok) {
console.log(`[seed] ${label} is ready (${baseUrl})`);
return;
}
lastError = `HTTP ${res.status.toString()}`;
} catch (err) {
lastError = err instanceof Error ? err.message : String(err);
}
await new Promise((r) => setTimeout(r, READINESS_POLL_MS));
}
throw new Error(
`[seed] ${label} did not become ready within ${READINESS_TIMEOUT_MS.toString()}ms — last error: ${lastError}`,
);
}
// ─── Enrollment flow ──────────────────────────────────────────────────────────
/**
* Walk the full enrollment flow for one grant.
*
* The correct two-sided flow (matching the data model's FK semantics):
*
* 1. On Server B: POST /api/admin/federation/peers/keypair
* → peerId_B (Server B's peer record representing the requesting side)
* 2. On Server B: POST /api/admin/federation/grants with peerId: peerId_B
* → grant (FK to Server B's own federation_peers table — no violation)
* 3. On Server B: POST /api/admin/federation/grants/:id/tokens
* → enrollmentUrl pointing back to Server B
* 4. On Server A: POST /api/admin/federation/peers/keypair
* → peerId_A + csrPem_A (Server A's local record of Server B)
* 5. Server A → Server B: POST enrollmentUrl with { csrPem: csrPem_A }
* → certPem signed by Server B's CA
* 6. On Server A: PATCH /api/admin/federation/peers/:peerId_A/cert with certPem
* → Server A's peer record transitions to active
*
* Returns the activated grant (from Server B) and Server A's peer record.
*/
async function enrollGrant(opts: {
label: string;
subjectUserId: string;
scope: unknown;
adminTokenA: string;
adminTokenB: string;
serverAUrl: string;
serverBUrl: string;
}): Promise<{ grant: GrantRecord; peer: PeerRecord & { grantId: string } }> {
const { label, subjectUserId, scope, adminTokenA, adminTokenB, serverAUrl, serverBUrl } = opts;
console.log(`\n[seed] Enrolling grant for scope variant ${label}...`);
// 1. Create peer keypair on Server B (represents the requesting peer from B's perspective)
const peerB = await adminFetch<PeerRecord>(serverBUrl, '/api/admin/federation/peers/keypair', {
method: 'POST',
adminToken: adminTokenB,
body: {
commonName: `harness-peer-${label.toLowerCase()}-from-b`,
displayName: `Harness Peer ${label} (Server A as seen from B)`,
endpointUrl: serverAUrl,
},
});
console.log(`[seed] Created peer on B: ${peerB.peerId}`);
// 2. Create grant on Server B referencing B's own peer record
const grant = await adminFetch<GrantRecord>(serverBUrl, '/api/admin/federation/grants', {
method: 'POST',
adminToken: adminTokenB,
body: {
peerId: peerB.peerId,
subjectUserId,
scope,
},
});
console.log(`[seed] Created grant on B: ${grant.id} (status: ${grant.status})`);
// 3. Generate enrollment token on Server B
const tokenResult = await adminFetch<EnrollmentTokenResult>(
serverBUrl,
`/api/admin/federation/grants/${grant.id}/tokens`,
{ method: 'POST', adminToken: adminTokenB, body: { ttlSeconds: 900 } },
);
console.log(`[seed] Enrollment token: ${tokenResult.token.slice(0, 8)}...`);
console.log(`[seed] Enrollment URL: ${tokenResult.enrollmentUrl}`);
// 4. Create peer keypair on Server A (Server A's local record of Server B)
const peerA = await adminFetch<PeerRecord>(serverAUrl, '/api/admin/federation/peers/keypair', {
method: 'POST',
adminToken: adminTokenA,
body: {
commonName: `harness-peer-${label.toLowerCase()}-from-a`,
displayName: `Harness Peer ${label} (Server B as seen from A)`,
endpointUrl: serverBUrl,
},
});
console.log(`[seed] Created peer on A: ${peerA.peerId}`);
// 5. Redeem token at Server B's enrollment endpoint with A's CSR.
// The enrollment endpoint is not admin-guarded — the one-time token IS the credential.
//
// The enrollmentUrl returned by the gateway is built using BETTER_AUTH_URL which
// resolves to the in-cluster Docker hostname (gateway-b:3000). That URL is only
// reachable from other containers, not from the host machine running this script.
// We rewrite the host portion to use the host-accessible serverBUrl so the
// seed script can reach the endpoint from the host.
const parsedEnrollment = new URL(tokenResult.enrollmentUrl);
const tokenSegment = parsedEnrollment.pathname.split('/').pop()!;
const redeemUrl = `${serverBUrl}/api/federation/enrollment/${tokenSegment}`;
console.log(`[seed] Rewritten redeem URL (host-accessible): ${redeemUrl}`);
const redeemRes = await fetch(redeemUrl, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ csrPem: peerA.csrPem }),
});
if (!redeemRes.ok) {
const body = await redeemRes.text().catch(() => '(no body)');
throw new Error(`Enrollment redemption failed: ${redeemRes.status.toString()}${body}`);
}
const redeemResult = (await redeemRes.json()) as EnrollmentRedeemResult;
console.log(`[seed] Cert issued (${redeemResult.certPem.length.toString()} bytes)`);
// 6. Store cert on Server A's peer record → transitions to active
await adminFetch<unknown>(serverAUrl, `/api/admin/federation/peers/${peerA.peerId}/cert`, {
method: 'PATCH',
adminToken: adminTokenA,
body: { certPem: redeemResult.certPem },
});
console.log(`[seed] Cert stored on A — peer ${peerA.peerId} is now active`);
// Verify grant flipped to active on B
const activeGrant = await adminFetch<GrantRecord>(
serverBUrl,
`/api/admin/federation/grants/${grant.id}`,
{ adminToken: adminTokenB },
);
console.log(`[seed] Grant status on B: ${activeGrant.status}`);
return { grant: activeGrant, peer: { ...peerA, grantId: grant.id } };
}
// ─── Test data insertion ──────────────────────────────────────────────────────
/**
* Insert representative test data on Server B via its admin APIs.
*
* NOTE: The gateway's task/note/credential APIs require an authenticated user
* session. For the harness, we seed via admin-level endpoints if available,
* or document the gap here for M3-11 to fill in with proper user session seeding.
*
* ASSUMPTION: Server B exposes POST /api/admin/tasks (or similar) for test data.
* If that endpoint does not yet exist, this function logs a warning and skips
* without failing — M3-11 will add the session-based seeding path.
*/
async function seedTestData(
subjectUserId: string,
scopeLabel: string,
serverBUrl: string,
adminTokenB: string,
): Promise<void> {
console.log(`\n[seed] Seeding test data on Server B for ${scopeLabel}...`);
const testTasks = [
{
title: `${scopeLabel} Task 1`,
description: 'Federation harness test task',
userId: subjectUserId,
},
{
title: `${scopeLabel} Task 2`,
description: 'Team-scoped test task',
userId: subjectUserId,
teamId: 'T1',
},
];
const testNotes = [
{
title: `${scopeLabel} Note 1`,
content: 'Personal note for federation test',
userId: subjectUserId,
},
];
// Attempt to insert — tolerate 404 (endpoint not yet implemented)
for (const task of testTasks) {
try {
await adminFetch<unknown>(serverBUrl, '/api/admin/tasks', {
method: 'POST',
adminToken: adminTokenB,
body: task,
});
console.log(`[seed] Inserted task: "${task.title}"`);
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
if (msg.includes('404') || msg.includes('Cannot POST')) {
console.warn(
`[seed] WARN: /api/admin/tasks not found — skipping task insertion (expected until M3-11)`,
);
break;
}
throw err;
}
}
for (const note of testNotes) {
try {
await adminFetch<unknown>(serverBUrl, '/api/admin/notes', {
method: 'POST',
adminToken: adminTokenB,
body: note,
});
console.log(`[seed] Inserted note: "${note.title}"`);
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
if (msg.includes('404') || msg.includes('Cannot POST')) {
console.warn(
`[seed] WARN: /api/admin/notes not found — skipping note insertion (expected until M3-11)`,
);
break;
}
throw err;
}
}
console.log(`[seed] Test data seeding for ${scopeLabel} complete.`);
}
// ─── Main entrypoint ──────────────────────────────────────────────────────────
export async function runSeed(opts?: {
serverAUrl?: string;
serverBUrl?: string;
adminBootstrapPasswordA?: string;
adminBootstrapPasswordB?: string;
subjectUserIds?: { variantA: string; variantB: string; variantC: string };
}): Promise<SeedResult> {
const aUrl = opts?.serverAUrl ?? SERVER_A_URL;
const bUrl = opts?.serverBUrl ?? SERVER_B_URL;
const passwordA = opts?.adminBootstrapPasswordA ?? ADMIN_BOOTSTRAP_PASSWORD_A;
const passwordB = opts?.adminBootstrapPasswordB ?? ADMIN_BOOTSTRAP_PASSWORD_B;
// Use provided or default subject user IDs.
// In a real run these would be real user UUIDs from Server B's DB.
// For the harness, the admin bootstrap user on Server B is used as the subject.
// These are overridden after bootstrap if opts.subjectUserIds is not provided.
const subjectIds = opts?.subjectUserIds;
console.log('[seed] Waiting for gateways to be ready...');
await Promise.all([waitForGateway(aUrl, 'Server A'), waitForGateway(bUrl, 'Server B')]);
// Bootstrap admin users on both gateways (requires pristine DBs).
console.log('\n[seed] Bootstrapping admin accounts...');
const [bootstrapA, bootstrapB] = await Promise.all([
bootstrapAdmin(aUrl, 'Server A', passwordA),
bootstrapAdmin(bUrl, 'Server B', passwordB),
]);
// Default subject user IDs to the admin user on Server B (guaranteed to exist).
const resolvedSubjectIds = subjectIds ?? {
variantA: bootstrapB.adminUserId,
variantB: bootstrapB.adminUserId,
variantC: bootstrapB.adminUserId,
};
// Enroll all three scope variants sequentially to avoid race conditions on
// the step-ca signing queue. Parallel enrollment would work too but
// sequential is easier to debug when something goes wrong.
console.log('\n[seed] Enrolling scope variants...');
const resultA = await enrollGrant({
label: 'A',
subjectUserId: resolvedSubjectIds.variantA,
scope: SCOPE_VARIANT_A,
adminTokenA: bootstrapA.adminToken,
adminTokenB: bootstrapB.adminToken,
serverAUrl: aUrl,
serverBUrl: bUrl,
});
const resultB = await enrollGrant({
label: 'B',
subjectUserId: resolvedSubjectIds.variantB,
scope: SCOPE_VARIANT_B,
adminTokenA: bootstrapA.adminToken,
adminTokenB: bootstrapB.adminToken,
serverAUrl: aUrl,
serverBUrl: bUrl,
});
const resultC = await enrollGrant({
label: 'C',
subjectUserId: resolvedSubjectIds.variantC,
scope: SCOPE_VARIANT_C,
adminTokenA: bootstrapA.adminToken,
adminTokenB: bootstrapB.adminToken,
serverAUrl: aUrl,
serverBUrl: bUrl,
});
// Seed test data on Server B for each scope variant
await Promise.all([
seedTestData(resolvedSubjectIds.variantA, 'A', bUrl, bootstrapB.adminToken),
seedTestData(resolvedSubjectIds.variantB, 'B', bUrl, bootstrapB.adminToken),
seedTestData(resolvedSubjectIds.variantC, 'C', bUrl, bootstrapB.adminToken),
]);
const result: SeedResult = {
serverAUrl: aUrl,
serverBUrl: bUrl,
adminTokenA: bootstrapA.adminToken,
adminTokenB: bootstrapB.adminToken,
adminUserIdA: bootstrapA.adminUserId,
adminUserIdB: bootstrapB.adminUserId,
grants: {
variantA: resultA.grant,
variantB: resultB.grant,
variantC: resultC.grant,
},
peers: {
variantA: resultA.peer,
variantB: resultB.peer,
variantC: resultC.peer,
},
};
console.log('\n[seed] Seed complete.');
console.log('[seed] Summary:');
console.log(` Variant A grant: ${result.grants.variantA.id} (${result.grants.variantA.status})`);
console.log(` Variant B grant: ${result.grants.variantB.id} (${result.grants.variantB.status})`);
console.log(` Variant C grant: ${result.grants.variantC.id} (${result.grants.variantC.status})`);
return result;
}
// ─── CLI entry ────────────────────────────────────────────────────────────────
const isCli =
process.argv[1] != null &&
fileURLToPath(import.meta.url).endsWith(process.argv[1]!.split('/').pop()!);
if (isCli) {
const shouldBoot = process.argv.includes('--boot');
if (shouldBoot) {
console.log('[seed] --boot flag detected — starting compose stack...');
execSync(`docker compose -f "${COMPOSE_FILE}" up -d`, { stdio: 'inherit' });
}
runSeed()
.then(() => {
process.exit(0);
})
.catch((err) => {
console.error('[seed] Fatal:', err);
process.exit(1);
});
}