Compare commits

..

4 Commits

Author SHA1 Message Date
Jarvis
37675ae3f2 fix(federation/client): serialize cache fills, destroy evicted Agent, cover env-var guard
All checks were successful
ci/woodpecker/pr/ci Pipeline was successful
ci/woodpecker/push/ci Pipeline was successful
- HIGH-A: resolveEntry now uses promise-cache pattern so concurrent
  callers serialize on a single in-flight build, eliminating duplicate
  key material in heap and duplicate DB round-trips
- HIGH-B: flushPeer destroys the evicted undici Agent so stale TLS
  connections close on cert rotation
- MED-C: add regression test for PEER_MISCONFIGURED when
  STEP_CA_ROOT_CERT_PATH is unset

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-23 22:56:57 -05:00
Jarvis
a4a6769a6d fix(federation/client): pin Step-CA root, fix lockfile, harden cache test
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/pr/ci Pipeline was successful
CRIT-1: regenerate pnpm-lock.yaml so apps/gateway resolves undici@7.24.6
(prior PR pushed package.json without lockfile update; CI failed with
ERR_PNPM_OUTDATED_LOCKFILE). Incidentally cleans 57 lines of stale
peer-dep entries.

CRIT-2: cache-hit test no longer swallows resolveEntry errors. Calls the
private method directly twice and asserts identity equality plus a
single DB select, removing the silent-failure path the prior assertion
allowed.

HIGH-1: mTLS Agent now pins Step-CA root via STEP_CA_ROOT_CERT_PATH.
Without the env var resolveEntry throws PEER_MISCONFIGURED, refusing to
dial peers against the public trust store. PEM is read once and cached
on the service instance.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-23 22:30:09 -05:00
Jarvis
21650fb194 feat(federation): outbound mTLS FederationClient (FED-M3-08)
Some checks failed
ci/woodpecker/push/ci Pipeline failed
ci/woodpecker/pr/ci Pipeline failed
Implements FederationClientService — a NestJS injectable that dials peer
gateways over mTLS (undici Agent with cert+sealed-key from federation_peers),
invokes list/get/capabilities verbs, validates responses via Zod, and surfaces
all failure modes as typed FederationClientError with a coherent error code
taxonomy (PEER_NOT_FOUND, PEER_INACTIVE, PEER_MISCONFIGURED, NETWORK,
FORBIDDEN, HTTP_{status}, INVALID_RESPONSE).

Per-peer Agent instances are cached in a Map for the service lifetime;
flushPeer(peerId) invalidates the cache for M5/M6 cert rotation and
revocation events.

Wired into FederationModule providers + exports so QuerySourceService
(M3-09) can inject it.

13 unit tests covering all required scenarios via undici MockAgent +
real sealClientKey/unsealClientKey round-trip.

Closes #462

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-23 22:16:52 -05:00
89c733e0b9 feat(federation): two-gateway test harness scaffold (FED-M3-02) (#505)
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/push/publish Pipeline was successful
2026-04-24 03:01:25 +00:00
6 changed files with 1077 additions and 59 deletions

View File

@@ -73,6 +73,7 @@
"rxjs": "^7.8.0",
"socket.io": "^4.8.0",
"uuid": "^11.0.0",
"undici": "^7.24.6",
"zod": "^4.3.6"
},
"devDependencies": {

View File

@@ -0,0 +1,553 @@
/**
* Unit tests for FederationClientService (FED-M3-08).
*
* HTTP mocking strategy:
* undici MockAgent is used to intercept outbound HTTP requests. The service
* uses `undici.fetch` with a `dispatcher` option, so MockAgent is set as the
* global dispatcher and all requests flow through it.
*
* Because the service builds one `undici.Agent` per peer and passes it as
* the dispatcher on every fetch call, we cannot intercept at the Agent level
* in unit tests without significant refactoring. Instead, we set the global
* dispatcher to a MockAgent and override the service's `doRequest` indirection
* by spying on the internal fetch call.
*
* For the cert/key wiring, we use the real `sealClientKey` function from
* peer-key.util.ts with a test secret — no stubs.
*
* Sealed-key setup:
* Each test (or beforeAll) calls `sealClientKey(TEST_PRIVATE_KEY_PEM)` with
* BETTER_AUTH_SECRET set to a deterministic test value so that
* `unsealClientKey` in the service recovers the original PEM.
*/
import 'reflect-metadata';
import { describe, it, expect, vi, beforeEach, afterEach, beforeAll, afterAll } from 'vitest';
import { MockAgent, setGlobalDispatcher, getGlobalDispatcher } from 'undici';
import type { Dispatcher } from 'undici';
import { writeFileSync, unlinkSync } from 'node:fs';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import type { Db } from '@mosaicstack/db';
import { FederationClientService, FederationClientError } from '../federation-client.service.js';
import { sealClientKey } from '../../peer-key.util.js';
// ---------------------------------------------------------------------------
// Test constants
// ---------------------------------------------------------------------------
const TEST_SECRET = 'test-secret-for-federation-client-spec-only';
const PEER_ID = 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa';
const ENDPOINT = 'https://peer.example.com';
// Minimal valid RSA/EC private key PEM — does NOT need to be a real key for
// unit tests because we only verify it round-trips through seal/unseal, not
// that it actually negotiates TLS (MockAgent handles that).
const TEST_PRIVATE_KEY_PEM = `-----BEGIN PRIVATE KEY-----
MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDummyKeyForTests
-----END PRIVATE KEY-----`;
// Minimal self-signed cert PEM (dummy — only used for mTLS Agent construction)
const TEST_CERT_PEM = `-----BEGIN CERTIFICATE-----
MIIBdummyCertForFederationClientTests==
-----END CERTIFICATE-----`;
const TEST_CERT_SERIAL = 'ABCDEF1234567890';
// ---------------------------------------------------------------------------
// Sealed key (computed once in beforeAll)
// ---------------------------------------------------------------------------
let SEALED_KEY: string;
// Path to a stub Step-CA root cert file written in beforeAll. The cert is never
// actually used to negotiate TLS in unit tests (MockAgent + spy on resolveEntry
// short-circuit the network), but loadStepCaRoot() requires the file to exist.
const STUB_CA_PEM_PATH = join(tmpdir(), 'federation-client-spec-ca.pem');
const STUB_CA_PEM = `-----BEGIN CERTIFICATE-----
MIIBdummyCAforFederationClientSpecOnly==
-----END CERTIFICATE-----
`;
// ---------------------------------------------------------------------------
// Peer row factory
// ---------------------------------------------------------------------------
function makePeerRow(overrides: Partial<Record<string, unknown>> = {}) {
return {
id: PEER_ID,
commonName: 'peer-example-com',
displayName: 'Test Peer',
certPem: TEST_CERT_PEM,
certSerial: TEST_CERT_SERIAL,
certNotAfter: new Date('2030-01-01T00:00:00Z'),
clientKeyPem: SEALED_KEY,
state: 'active' as const,
endpointUrl: ENDPOINT,
lastSeenAt: null,
createdAt: new Date('2026-01-01T00:00:00Z'),
revokedAt: null,
...overrides,
};
}
// ---------------------------------------------------------------------------
// Mock DB builder
// ---------------------------------------------------------------------------
function makeDb(selectRows: unknown[] = [makePeerRow()]): Db {
const limitSelect = vi.fn().mockResolvedValue(selectRows);
const whereSelect = vi.fn().mockReturnValue({ limit: limitSelect });
const fromSelect = vi.fn().mockReturnValue({ where: whereSelect });
const selectMock = vi.fn().mockReturnValue({ from: fromSelect });
return {
select: selectMock,
insert: vi.fn(),
update: vi.fn(),
delete: vi.fn(),
transaction: vi.fn(),
} as unknown as Db;
}
// ---------------------------------------------------------------------------
// Helpers for MockAgent HTTP interception
// ---------------------------------------------------------------------------
/**
* Create a MockAgent + MockPool for the peer endpoint, set it as the global
* dispatcher, and return both for per-test configuration.
*/
function makeMockAgent() {
const mockAgent = new MockAgent({ connections: 1 });
mockAgent.disableNetConnect();
setGlobalDispatcher(mockAgent);
const pool = mockAgent.get(ENDPOINT);
return { mockAgent, pool };
}
/**
* Build a FederationClientService with a mock DB and a spy on the internal
* fetch so we can intercept at the HTTP layer via MockAgent.
*
* The service calls `fetch(url, { dispatcher: agent })` where `agent` is the
* mTLS undici.Agent built from the peer's cert+key. To make MockAgent work,
* we need the fetch dispatcher to be the MockAgent, not the per-peer Agent.
*
* Strategy: we replace the private `resolveEntry` result's `agent` field with
* the MockAgent's pool, so fetch uses our interceptor. We do this by spying
* on `resolveEntry` and returning a controlled entry.
*/
function makeService(db: Db, mockPool: Dispatcher): FederationClientService {
const svc = new FederationClientService(db);
// Override resolveEntry to inject MockAgent pool as the dispatcher
vi.spyOn(
svc as unknown as { resolveEntry: (peerId: string) => Promise<unknown> },
'resolveEntry',
).mockImplementation(async (_peerId: string) => {
// Still call DB (via the real logic) to exercise peer validation,
// but return mock pool as the agent.
// For simplicity in unit tests, directly return a controlled entry.
return {
agent: mockPool,
endpointUrl: ENDPOINT,
certPem: TEST_CERT_PEM,
certSerial: TEST_CERT_SERIAL,
};
});
return svc;
}
// ---------------------------------------------------------------------------
// Test setup
// ---------------------------------------------------------------------------
let originalDispatcher: Dispatcher;
beforeAll(() => {
// Seal the test key once — requires BETTER_AUTH_SECRET
const saved = process.env['BETTER_AUTH_SECRET'];
process.env['BETTER_AUTH_SECRET'] = TEST_SECRET;
try {
SEALED_KEY = sealClientKey(TEST_PRIVATE_KEY_PEM);
} finally {
if (saved === undefined) {
delete process.env['BETTER_AUTH_SECRET'];
} else {
process.env['BETTER_AUTH_SECRET'] = saved;
}
}
writeFileSync(STUB_CA_PEM_PATH, STUB_CA_PEM, 'utf8');
});
afterAll(() => {
try {
unlinkSync(STUB_CA_PEM_PATH);
} catch {
// best-effort cleanup
}
});
beforeEach(() => {
originalDispatcher = getGlobalDispatcher();
process.env['BETTER_AUTH_SECRET'] = TEST_SECRET;
process.env['STEP_CA_ROOT_CERT_PATH'] = STUB_CA_PEM_PATH;
});
afterEach(() => {
setGlobalDispatcher(originalDispatcher);
vi.restoreAllMocks();
delete process.env['BETTER_AUTH_SECRET'];
delete process.env['STEP_CA_ROOT_CERT_PATH'];
});
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
/** Successful list response body */
const LIST_BODY = {
items: [{ id: '1', title: 'Task One' }],
nextCursor: undefined,
_partial: false,
};
/** Successful get response body */
const GET_BODY = {
item: { id: '1', title: 'Task One' },
_partial: false,
};
/** Successful capabilities response body */
const CAP_BODY = {
resources: ['tasks'],
excluded_resources: [],
max_rows_per_query: 100,
supported_verbs: ['list', 'get', 'capabilities'] as const,
};
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
describe('FederationClientService', () => {
// ─── Successful verb calls ─────────────────────────────────────────────────
describe('list()', () => {
it('returns parsed typed response on success', async () => {
const db = makeDb();
const { mockAgent, pool } = makeMockAgent();
const svc = makeService(db, pool);
pool
.intercept({
path: '/api/federation/v1/list/tasks',
method: 'POST',
})
.reply(200, LIST_BODY, { headers: { 'content-type': 'application/json' } });
const result = await svc.list(PEER_ID, 'tasks', {});
expect(result.items).toHaveLength(1);
expect(result.items[0]).toMatchObject({ id: '1', title: 'Task One' });
await mockAgent.close();
});
});
describe('get()', () => {
it('returns parsed typed response on success', async () => {
const db = makeDb();
const { mockAgent, pool } = makeMockAgent();
const svc = makeService(db, pool);
pool
.intercept({
path: '/api/federation/v1/get/tasks/1',
method: 'POST',
})
.reply(200, GET_BODY, { headers: { 'content-type': 'application/json' } });
const result = await svc.get(PEER_ID, 'tasks', '1', {});
expect(result.item).toMatchObject({ id: '1', title: 'Task One' });
await mockAgent.close();
});
});
describe('capabilities()', () => {
it('returns parsed capabilities response on success', async () => {
const db = makeDb();
const { mockAgent, pool } = makeMockAgent();
const svc = makeService(db, pool);
pool
.intercept({
path: '/api/federation/v1/capabilities',
method: 'GET',
})
.reply(200, CAP_BODY, { headers: { 'content-type': 'application/json' } });
const result = await svc.capabilities(PEER_ID);
expect(result.resources).toContain('tasks');
expect(result.max_rows_per_query).toBe(100);
await mockAgent.close();
});
});
// ─── HTTP error surfaces ──────────────────────────────────────────────────
describe('non-2xx responses', () => {
it('surfaces 403 as FederationClientError({ status: 403, code: "FORBIDDEN" })', async () => {
const db = makeDb();
const { mockAgent, pool } = makeMockAgent();
const svc = makeService(db, pool);
pool.intercept({ path: '/api/federation/v1/list/tasks', method: 'POST' }).reply(
403,
{ error: { code: 'forbidden', message: 'Access denied' } },
{
headers: { 'content-type': 'application/json' },
},
);
await expect(svc.list(PEER_ID, 'tasks', {})).rejects.toMatchObject({
status: 403,
code: 'FORBIDDEN',
peerId: PEER_ID,
});
await mockAgent.close();
});
it('surfaces 404 as FederationClientError({ status: 404, code: "HTTP_404" })', async () => {
const db = makeDb();
const { mockAgent, pool } = makeMockAgent();
const svc = makeService(db, pool);
pool.intercept({ path: '/api/federation/v1/get/tasks/999', method: 'POST' }).reply(
404,
{ error: { code: 'not_found', message: 'Not found' } },
{
headers: { 'content-type': 'application/json' },
},
);
await expect(svc.get(PEER_ID, 'tasks', '999', {})).rejects.toMatchObject({
status: 404,
code: 'HTTP_404',
peerId: PEER_ID,
});
await mockAgent.close();
});
});
// ─── Network error ─────────────────────────────────────────────────────────
describe('network errors', () => {
it('surfaces network error as FederationClientError({ code: "NETWORK" })', async () => {
const db = makeDb();
const { mockAgent, pool } = makeMockAgent();
const svc = makeService(db, pool);
pool
.intercept({ path: '/api/federation/v1/capabilities', method: 'GET' })
.replyWithError(new Error('ECONNREFUSED'));
await expect(svc.capabilities(PEER_ID)).rejects.toMatchObject({
code: 'NETWORK',
peerId: PEER_ID,
});
await mockAgent.close();
});
});
// ─── Invalid response body ─────────────────────────────────────────────────
describe('invalid response body', () => {
it('surfaces as FederationClientError({ code: "INVALID_RESPONSE" }) when body shape is wrong', async () => {
const db = makeDb();
const { mockAgent, pool } = makeMockAgent();
const svc = makeService(db, pool);
// capabilities returns wrong shape (missing required fields)
pool
.intercept({ path: '/api/federation/v1/capabilities', method: 'GET' })
.reply(200, { totally: 'wrong' }, { headers: { 'content-type': 'application/json' } });
await expect(svc.capabilities(PEER_ID)).rejects.toMatchObject({
code: 'INVALID_RESPONSE',
peerId: PEER_ID,
});
await mockAgent.close();
});
});
// ─── Peer DB validation ────────────────────────────────────────────────────
describe('peer validation (without resolveEntry spy)', () => {
/**
* These tests exercise the real `resolveEntry` path — no spy on resolveEntry.
*/
it('throws PEER_NOT_FOUND when peer is not in DB', async () => {
// DB returns empty array (peer not found)
const db = makeDb([]);
const svc = new FederationClientService(db);
await expect(svc.capabilities(PEER_ID)).rejects.toMatchObject({
code: 'PEER_NOT_FOUND',
peerId: PEER_ID,
});
});
it('throws PEER_INACTIVE when peer state is not "active"', async () => {
const db = makeDb([makePeerRow({ state: 'suspended' })]);
const svc = new FederationClientService(db);
await expect(svc.capabilities(PEER_ID)).rejects.toMatchObject({
code: 'PEER_INACTIVE',
peerId: PEER_ID,
});
});
});
// ─── Cache behaviour ───────────────────────────────────────────────────────
describe('cache behaviour', () => {
it('hits cache on second call — only one DB lookup happens', async () => {
// Verify cache by calling the private resolveEntry directly twice and
// asserting the DB was queried only once. This avoids the HTTP layer,
// which would require either a real network or per-peer Agent rewiring
// that the cache invariant doesn't depend on.
const db = makeDb();
const selectSpy = vi.spyOn(db, 'select');
const svc = new FederationClientService(db);
const resolveEntry = (
svc as unknown as { resolveEntry: (peerId: string) => Promise<unknown> }
).resolveEntry.bind(svc);
const first = await resolveEntry(PEER_ID);
const second = await resolveEntry(PEER_ID);
expect(first).toBe(second);
expect(selectSpy).toHaveBeenCalledTimes(1);
});
it('serializes concurrent resolveEntry calls — only one DB lookup', async () => {
const db = makeDb();
const selectSpy = vi.spyOn(db, 'select');
const svc = new FederationClientService(db);
const resolveEntry = (
svc as unknown as {
resolveEntry: (peerId: string) => Promise<unknown>;
}
).resolveEntry.bind(svc);
const [a, b] = await Promise.all([resolveEntry(PEER_ID), resolveEntry(PEER_ID)]);
expect(a).toBe(b);
expect(selectSpy).toHaveBeenCalledTimes(1);
});
it('flushPeer destroys the evicted Agent so old TLS connections close', async () => {
const db = makeDb();
const svc = new FederationClientService(db);
const resolveEntry = (
svc as unknown as {
resolveEntry: (peerId: string) => Promise<{ agent: { destroy: () => Promise<void> } }>;
}
).resolveEntry.bind(svc);
const entry = await resolveEntry(PEER_ID);
const destroySpy = vi.spyOn(entry.agent, 'destroy').mockResolvedValue();
svc.flushPeer(PEER_ID);
expect(destroySpy).toHaveBeenCalledTimes(1);
});
it('flushPeer() invalidates cache — next call re-reads DB', async () => {
const db = makeDb();
const { mockAgent, pool } = makeMockAgent();
const svc = makeService(db, pool);
pool
.intercept({ path: '/api/federation/v1/capabilities', method: 'GET' })
.reply(200, CAP_BODY, { headers: { 'content-type': 'application/json' } })
.times(2);
// First call — populates cache (via mock resolveEntry)
await svc.capabilities(PEER_ID);
// Flush the cache
svc.flushPeer(PEER_ID);
// The spy on resolveEntry is still active — check it's called again after flush
const resolveEntrySpy = vi.spyOn(
svc as unknown as { resolveEntry: (peerId: string) => Promise<unknown> },
'resolveEntry',
);
// Second call after flush — should call resolveEntry again
await svc.capabilities(PEER_ID);
// resolveEntry should have been called once after we started spying (post-flush)
expect(resolveEntrySpy).toHaveBeenCalledTimes(1);
await mockAgent.close();
});
});
// ─── loadStepCaRoot env-var guard ─────────────────────────────────────────
describe('loadStepCaRoot() env-var guard', () => {
it('throws PEER_MISCONFIGURED when STEP_CA_ROOT_CERT_PATH is not set', async () => {
delete process.env['STEP_CA_ROOT_CERT_PATH'];
const db = makeDb();
const svc = new FederationClientService(db);
const resolveEntry = (
svc as unknown as {
resolveEntry: (peerId: string) => Promise<unknown>;
}
).resolveEntry.bind(svc);
await expect(resolveEntry(PEER_ID)).rejects.toMatchObject({
code: 'PEER_MISCONFIGURED',
});
});
});
// ─── FederationClientError class ──────────────────────────────────────────
describe('FederationClientError', () => {
it('is instanceof Error and FederationClientError', () => {
const err = new FederationClientError({
code: 'PEER_NOT_FOUND',
message: 'test',
peerId: PEER_ID,
});
expect(err).toBeInstanceOf(Error);
expect(err).toBeInstanceOf(FederationClientError);
expect(err.name).toBe('FederationClientError');
});
it('carries status, code, and peerId', () => {
const err = new FederationClientError({
status: 403,
code: 'FORBIDDEN',
message: 'forbidden',
peerId: PEER_ID,
});
expect(err.status).toBe(403);
expect(err.code).toBe('FORBIDDEN');
expect(err.peerId).toBe(PEER_ID);
});
});
});

View File

@@ -0,0 +1,500 @@
/**
* FederationClientService — outbound mTLS client for federation requests (FED-M3-08).
*
* Dials peer gateways over mTLS using the cert+sealed-key stored in `federation_peers`,
* invokes federation verbs (list / get / capabilities), and surfaces all failure modes
* as typed `FederationClientError` instances.
*
* ## Error code taxonomy
*
* | Code | When |
* | ------------------ | ------------------------------------------------------------- |
* | PEER_NOT_FOUND | No row in federation_peers for the given peerId |
* | PEER_INACTIVE | Peer row exists but state !== 'active' |
* | PEER_MISCONFIGURED | Peer row is active but missing endpointUrl or clientKeyPem |
* | NETWORK | undici threw a connection / TLS / timeout error |
* | HTTP_{status} | Peer returned a non-2xx response (e.g. HTTP_403, HTTP_404) |
* | FORBIDDEN | Peer returned 403 (convenience alias alongside HTTP_403) |
* | INVALID_RESPONSE | Response body failed Zod schema validation |
*
* ## Cache strategy
*
* Per-peer `undici.Agent` instances are cached in a `Map<peerId, AgentCacheEntry>` for
* the lifetime of the service instance. The cache is keyed on peerId (UUID).
*
* Cache invalidation:
* - `flushPeer(peerId)` — removes the entry immediately. M5/M6 MUST call this on
* cert rotation or peer revocation events so the next request re-reads the DB and
* builds a fresh TLS Agent with the new cert material.
* - On cache miss: re-reads the DB, checks state === 'active', rebuilds Agent.
*
* Cache does NOT auto-expire. The service is expected to be a singleton scoped to the
* NestJS application lifecycle; flushing on revocation/rotation is the only invalidation
* path by design (avoids redundant DB round-trips on the hot path).
*/
import { Injectable, Inject, Logger } from '@nestjs/common';
import { readFileSync } from 'node:fs';
import { Agent, fetch as undiciFetch } from 'undici';
import type { Dispatcher } from 'undici';
import { z } from 'zod';
import { type Db, eq, federationPeers } from '@mosaicstack/db';
import {
FederationListResponseSchema,
FederationGetResponseSchema,
FederationCapabilitiesResponseSchema,
FederationErrorEnvelopeSchema,
type FederationListResponse,
type FederationGetResponse,
type FederationCapabilitiesResponse,
} from '@mosaicstack/types';
import { DB } from '../../database/database.module.js';
import { unsealClientKey } from '../peer-key.util.js';
// ---------------------------------------------------------------------------
// Error taxonomy
// ---------------------------------------------------------------------------
/**
* Client-side error code set. Distinct from the server-side `FederationErrorCode`
* (which lives in `@mosaicstack/types`) because the client has additional failure
* modes (PEER_NOT_FOUND, PEER_INACTIVE, PEER_MISCONFIGURED, NETWORK) that the
* server never emits.
*/
export type FederationClientErrorCode =
| 'PEER_NOT_FOUND'
| 'PEER_INACTIVE'
| 'PEER_MISCONFIGURED'
| 'NETWORK'
| 'FORBIDDEN'
| 'INVALID_RESPONSE'
| `HTTP_${number}`;
export interface FederationClientErrorOptions {
status?: number;
code: FederationClientErrorCode;
message: string;
peerId: string;
cause?: unknown;
}
/**
* Thrown by FederationClientService on every failure path.
* Callers can dispatch on `error.code` for programmatic handling.
*/
export class FederationClientError extends Error {
readonly status?: number;
readonly code: FederationClientErrorCode;
readonly peerId: string;
readonly cause?: unknown;
constructor(opts: FederationClientErrorOptions) {
super(opts.message);
this.name = 'FederationClientError';
this.status = opts.status;
this.code = opts.code;
this.peerId = opts.peerId;
this.cause = opts.cause;
}
}
// ---------------------------------------------------------------------------
// Internal cache types
// ---------------------------------------------------------------------------
interface AgentCacheEntry {
agent: Agent;
endpointUrl: string;
certPem: string;
certSerial: string;
}
// ---------------------------------------------------------------------------
// Service
// ---------------------------------------------------------------------------
@Injectable()
export class FederationClientService {
private readonly logger = new Logger(FederationClientService.name);
/**
* Per-peer undici Agent cache.
* Key = peerId (UUID string).
*
* Values are either a resolved `AgentCacheEntry` or an in-flight
* `Promise<AgentCacheEntry>` (promise-cache pattern). Storing the promise
* prevents duplicate DB lookups and duplicate key-unseal operations when two
* requests for the same peer arrive before the first build completes.
*
* Flush via `flushPeer(peerId)` on cert rotation / peer revocation (M5/M6).
*/
private readonly cache = new Map<string, AgentCacheEntry | Promise<AgentCacheEntry>>();
/**
* Step-CA root cert PEM, loaded once from `STEP_CA_ROOT_CERT_PATH`.
* Used as the trust anchor for peer server certificates so federation TLS is
* pinned to our PKI, not the public trust store. Lazily loaded on first use
* so unit tests that don't exercise the agent path can run without the env var.
*/
private cachedCaPem: string | null = null;
constructor(@Inject(DB) private readonly db: Db) {}
// -------------------------------------------------------------------------
// Public verb API
// -------------------------------------------------------------------------
/**
* Invoke the `list` verb on a remote peer.
*
* @param peerId UUID of the peer row in `federation_peers`.
* @param resource Resource path, e.g. "tasks".
* @param request Free-form body sent as JSON in the POST body.
* @returns Parsed `FederationListResponse<T>`.
*/
async list<T>(
peerId: string,
resource: string,
request: Record<string, unknown>,
): Promise<FederationListResponse<T>> {
const { endpointUrl, agent } = await this.resolveEntry(peerId);
const url = `${endpointUrl}/api/federation/v1/list/${encodeURIComponent(resource)}`;
const body = await this.doPost(peerId, url, agent, request);
return this.parseWith<FederationListResponse<T>>(
peerId,
body,
FederationListResponseSchema(z.unknown()),
);
}
/**
* Invoke the `get` verb on a remote peer.
*
* @param peerId UUID of the peer row in `federation_peers`.
* @param resource Resource path, e.g. "tasks".
* @param id Resource identifier.
* @param request Free-form body sent as JSON in the POST body.
* @returns Parsed `FederationGetResponse<T>`.
*/
async get<T>(
peerId: string,
resource: string,
id: string,
request: Record<string, unknown>,
): Promise<FederationGetResponse<T>> {
const { endpointUrl, agent } = await this.resolveEntry(peerId);
const url = `${endpointUrl}/api/federation/v1/get/${encodeURIComponent(resource)}/${encodeURIComponent(id)}`;
const body = await this.doPost(peerId, url, agent, request);
return this.parseWith<FederationGetResponse<T>>(
peerId,
body,
FederationGetResponseSchema(z.unknown()),
);
}
/**
* Invoke the `capabilities` verb on a remote peer.
*
* @param peerId UUID of the peer row in `federation_peers`.
* @returns Parsed `FederationCapabilitiesResponse`.
*/
async capabilities(peerId: string): Promise<FederationCapabilitiesResponse> {
const { endpointUrl, agent } = await this.resolveEntry(peerId);
const url = `${endpointUrl}/api/federation/v1/capabilities`;
const body = await this.doGet(peerId, url, agent);
return this.parseWith<FederationCapabilitiesResponse>(
peerId,
body,
FederationCapabilitiesResponseSchema,
);
}
// -------------------------------------------------------------------------
// Cache management
// -------------------------------------------------------------------------
/**
* Flush the cached Agent for a specific peer.
*
* M5/M6 MUST call this on:
* - cert rotation events (so new cert material is picked up)
* - peer revocation events (so future requests fail at PEER_INACTIVE)
*
* After flushing, the next call to `list`, `get`, or `capabilities` for
* this peer will re-read the DB and rebuild the Agent.
*/
flushPeer(peerId: string): void {
const entry = this.cache.get(peerId);
if (entry === undefined) {
return;
}
this.cache.delete(peerId);
if (!(entry instanceof Promise)) {
// best-effort destroy; promise-cached entries skip destroy because
// the in-flight build owns its own Agent which will be GC'd when the
// owning request handles the rejection from the cache miss
entry.agent.destroy().catch(() => {
// intentionally ignored — destroy errors are not actionable
});
}
this.logger.log(`Cache flushed for peer ${peerId}`);
}
// -------------------------------------------------------------------------
// Internal helpers
// -------------------------------------------------------------------------
/**
* Load and cache the Step-CA root cert PEM from `STEP_CA_ROOT_CERT_PATH`.
* Throws `FederationClientError` if the env var is unset or the file cannot
* be read — mTLS to a peer without a pinned trust anchor would silently
* fall back to the public trust store.
*/
private loadStepCaRoot(): string {
if (this.cachedCaPem !== null) {
return this.cachedCaPem;
}
const path = process.env['STEP_CA_ROOT_CERT_PATH'];
if (!path) {
throw new FederationClientError({
code: 'PEER_MISCONFIGURED',
message: 'STEP_CA_ROOT_CERT_PATH is not set; refusing to dial peer without pinned CA trust',
peerId: '',
});
}
try {
const pem = readFileSync(path, 'utf8');
this.cachedCaPem = pem;
return pem;
} catch (err) {
throw new FederationClientError({
code: 'PEER_MISCONFIGURED',
message: `Failed to read STEP_CA_ROOT_CERT_PATH (${path})`,
peerId: '',
cause: err,
});
}
}
/**
* Resolve the cache entry for a peer, reading DB on miss.
*
* Uses a promise-cache pattern: concurrent callers for the same uncached
* `peerId` all `await` the same in-flight `Promise<AgentCacheEntry>` so
* only one DB lookup and one key-unseal ever runs per peer per cache miss.
* The promise is replaced with the concrete entry on success, or deleted on
* rejection so a transient error does not poison the cache permanently.
*
* Throws `FederationClientError` with appropriate code if the peer is not
* found, is inactive, or is missing required fields.
*/
private async resolveEntry(peerId: string): Promise<AgentCacheEntry> {
const cached = this.cache.get(peerId);
if (cached) {
return cached; // Promise or concrete entry — both are awaitable
}
const inflight = this.buildEntry(peerId).then(
(entry) => {
this.cache.set(peerId, entry); // replace promise with concrete value
return entry;
},
(err: unknown) => {
this.cache.delete(peerId); // don't poison the cache with a rejected promise
throw err;
},
);
this.cache.set(peerId, inflight);
return inflight;
}
/**
* Build the `AgentCacheEntry` for a peer by reading the DB, validating the
* peer's state, unsealing the private key, and constructing the mTLS Agent.
*
* Throws `FederationClientError` with appropriate code if the peer is not
* found, is inactive, or is missing required fields.
*/
private async buildEntry(peerId: string): Promise<AgentCacheEntry> {
// DB lookup
const [peer] = await this.db
.select()
.from(federationPeers)
.where(eq(federationPeers.id, peerId))
.limit(1);
if (!peer) {
throw new FederationClientError({
code: 'PEER_NOT_FOUND',
message: `Federation peer ${peerId} not found`,
peerId,
});
}
if (peer.state !== 'active') {
throw new FederationClientError({
code: 'PEER_INACTIVE',
message: `Federation peer ${peerId} is not active (state: ${peer.state})`,
peerId,
});
}
if (!peer.endpointUrl || !peer.clientKeyPem) {
throw new FederationClientError({
code: 'PEER_MISCONFIGURED',
message: `Federation peer ${peerId} is missing endpointUrl or clientKeyPem`,
peerId,
});
}
// Unseal the private key
let privateKeyPem: string;
try {
privateKeyPem = unsealClientKey(peer.clientKeyPem);
} catch (err) {
throw new FederationClientError({
code: 'PEER_MISCONFIGURED',
message: `Failed to unseal client key for peer ${peerId}`,
peerId,
cause: err,
});
}
// Build mTLS agent — pin trust to Step-CA root so we never accept
// a peer cert signed by a public CA (defense against MITM with a
// publicly-trusted DV cert for the peer's hostname).
const agent = new Agent({
connect: {
cert: peer.certPem,
key: privateKeyPem,
ca: this.loadStepCaRoot(),
// rejectUnauthorized: true is the undici default for HTTPS
},
});
const entry: AgentCacheEntry = {
agent,
endpointUrl: peer.endpointUrl,
certPem: peer.certPem,
certSerial: peer.certSerial,
};
this.logger.log(`Agent cached for peer ${peerId} (serial: ${peer.certSerial})`);
return entry;
}
/**
* Execute a POST request with a JSON body.
* Returns the parsed response body as an unknown value.
* Throws `FederationClientError` on network errors and non-2xx responses.
*/
private async doPost(
peerId: string,
url: string,
agent: Dispatcher,
body: Record<string, unknown>,
): Promise<unknown> {
return this.doRequest(peerId, url, agent, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(body),
});
}
/**
* Execute a GET request.
* Returns the parsed response body as an unknown value.
* Throws `FederationClientError` on network errors and non-2xx responses.
*/
private async doGet(peerId: string, url: string, agent: Dispatcher): Promise<unknown> {
return this.doRequest(peerId, url, agent, { method: 'GET' });
}
private async doRequest(
peerId: string,
url: string,
agent: Dispatcher,
init: { method: string; headers?: Record<string, string>; body?: string },
): Promise<unknown> {
let response: Awaited<ReturnType<typeof undiciFetch>>;
try {
response = await undiciFetch(url, {
...init,
dispatcher: agent,
});
} catch (err) {
throw new FederationClientError({
code: 'NETWORK',
message: `Network error calling peer ${peerId} at ${url}: ${err instanceof Error ? err.message : String(err)}`,
peerId,
cause: err,
});
}
const rawBody = await response.text().catch(() => '');
if (!response.ok) {
const status = response.status;
// Attempt to parse as federation error envelope
let serverMessage = `HTTP ${status}`;
try {
const json: unknown = JSON.parse(rawBody);
const result = FederationErrorEnvelopeSchema.safeParse(json);
if (result.success) {
serverMessage = result.data.error.message;
}
} catch {
// Not valid JSON or not a federation envelope — use generic message
}
// Specific code for 403 (most actionable for callers); generic HTTP_{n} for others
const code: FederationClientErrorCode = status === 403 ? 'FORBIDDEN' : `HTTP_${status}`;
throw new FederationClientError({
status,
code,
message: `Peer ${peerId} returned ${status}: ${serverMessage}`,
peerId,
});
}
try {
return JSON.parse(rawBody) as unknown;
} catch (err) {
throw new FederationClientError({
code: 'INVALID_RESPONSE',
message: `Peer ${peerId} returned non-JSON body`,
peerId,
cause: err,
});
}
}
/**
* Parse and validate a response body against a Zod schema.
*
* For list/get, callers pass the result of `FederationListResponseSchema(z.unknown())`
* so that the envelope structure is validated without requiring a concrete item schema
* at the client level. The generic `T` provides compile-time typing.
*
* Throws `FederationClientError({ code: 'INVALID_RESPONSE' })` on parse failure.
*/
private parseWith<T>(peerId: string, body: unknown, schema: z.ZodTypeAny): T {
const result = schema.safeParse(body);
if (!result.success) {
const issues = result.error.issues
.map((e: z.ZodIssue) => `[${e.path.join('.') || 'root'}] ${e.message}`)
.join('; ');
throw new FederationClientError({
code: 'INVALID_RESPONSE',
message: `Peer ${peerId} returned invalid response shape: ${issues}`,
peerId,
});
}
return result.data as T;
}
}

View File

@@ -0,0 +1,13 @@
/**
* Federation client barrel — re-exports for FederationModule consumers.
*
* M3-09 (QuerySourceService) and future milestones should import from here,
* not directly from the implementation file.
*/
export {
FederationClientService,
FederationClientError,
type FederationClientErrorCode,
type FederationClientErrorOptions,
} from './federation-client.service.js';

View File

@@ -5,10 +5,11 @@ import { EnrollmentController } from './enrollment.controller.js';
import { EnrollmentService } from './enrollment.service.js';
import { FederationController } from './federation.controller.js';
import { GrantsService } from './grants.service.js';
import { FederationClientService } from './client/index.js';
@Module({
controllers: [EnrollmentController, FederationController],
providers: [AdminGuard, CaService, EnrollmentService, GrantsService],
exports: [CaService, EnrollmentService, GrantsService],
providers: [AdminGuard, CaService, EnrollmentService, GrantsService, FederationClientService],
exports: [CaService, EnrollmentService, GrantsService, FederationClientService],
})
export class FederationModule {}

64
pnpm-lock.yaml generated
View File

@@ -179,6 +179,9 @@ importers:
socket.io:
specifier: ^4.8.0
version: 4.8.3
undici:
specifier: ^7.24.6
version: 7.24.6
uuid:
specifier: ^11.0.0
version: 11.1.0
@@ -713,10 +716,10 @@ importers:
dependencies:
'@mariozechner/pi-agent-core':
specifier: ^0.63.1
version: 0.63.2(@modelcontextprotocol/sdk@1.28.0(zod@4.3.6))(ws@8.20.0)(zod@3.25.76)
version: 0.63.2(@modelcontextprotocol/sdk@1.28.0(zod@4.3.6))(ws@8.20.0)(zod@4.3.6)
'@mariozechner/pi-ai':
specifier: ^0.63.1
version: 0.63.2(@modelcontextprotocol/sdk@1.28.0(zod@4.3.6))(ws@8.20.0)(zod@3.25.76)
version: 0.63.2(@modelcontextprotocol/sdk@1.28.0(zod@4.3.6))(ws@8.20.0)(zod@4.3.6)
'@sinclair/typebox':
specifier: ^0.34.41
version: 0.34.48
@@ -6993,10 +6996,6 @@ packages:
resolution: {integrity: sha512-gBLkYIlEnSp8pFbT64yFgGE6UIB9tAkhukC23PmMDCe5Nd+cRqKxSjw5y54MK2AZMgZfJWMaNE4nYUHgi1XEOw==}
engines: {node: '>=18.17'}
undici@7.24.3:
resolution: {integrity: sha512-eJdUmK/Wrx2d+mnWWmwwLRyA7OQCkLap60sk3dOK4ViZR7DKwwptwuIvFBg2HaiP9ESaEdhtpSymQPvytpmkCA==}
engines: {node: '>=20.18.1'}
undici@7.24.6:
resolution: {integrity: sha512-Xi4agocCbRzt0yYMZGMA6ApD7gvtUFaxm4ZmeacWI4cZxaF6C+8I8QfofC20NAePiB/IcvZmzkJ7XPa471AEtA==}
engines: {node: '>=20.18.1'}
@@ -7329,12 +7328,6 @@ snapshots:
'@jridgewell/gen-mapping': 0.3.13
'@jridgewell/trace-mapping': 0.3.31
'@anthropic-ai/sdk@0.73.0(zod@3.25.76)':
dependencies:
json-schema-to-ts: 3.1.1
optionalDependencies:
zod: 3.25.76
'@anthropic-ai/sdk@0.73.0(zod@4.3.6)':
dependencies:
json-schema-to-ts: 3.1.1
@@ -8676,18 +8669,6 @@ snapshots:
- ws
- zod
'@mariozechner/pi-agent-core@0.63.2(@modelcontextprotocol/sdk@1.28.0(zod@4.3.6))(ws@8.20.0)(zod@3.25.76)':
dependencies:
'@mariozechner/pi-ai': 0.63.2(@modelcontextprotocol/sdk@1.28.0(zod@4.3.6))(ws@8.20.0)(zod@3.25.76)
transitivePeerDependencies:
- '@modelcontextprotocol/sdk'
- aws-crt
- bufferutil
- supports-color
- utf-8-validate
- ws
- zod
'@mariozechner/pi-agent-core@0.63.2(@modelcontextprotocol/sdk@1.28.0(zod@4.3.6))(ws@8.20.0)(zod@4.3.6)':
dependencies:
'@mariozechner/pi-ai': 0.63.2(@modelcontextprotocol/sdk@1.28.0(zod@4.3.6))(ws@8.20.0)(zod@4.3.6)
@@ -8736,30 +8717,6 @@ snapshots:
- ws
- zod
'@mariozechner/pi-ai@0.63.2(@modelcontextprotocol/sdk@1.28.0(zod@4.3.6))(ws@8.20.0)(zod@3.25.76)':
dependencies:
'@anthropic-ai/sdk': 0.73.0(zod@3.25.76)
'@aws-sdk/client-bedrock-runtime': 3.1008.0
'@google/genai': 1.45.0(@modelcontextprotocol/sdk@1.28.0(zod@4.3.6))
'@mistralai/mistralai': 1.14.1
'@sinclair/typebox': 0.34.48
ajv: 8.18.0
ajv-formats: 3.0.1(ajv@8.18.0)
chalk: 5.6.2
openai: 6.26.0(ws@8.20.0)(zod@3.25.76)
partial-json: 0.1.7
proxy-agent: 6.5.0
undici: 7.24.3
zod-to-json-schema: 3.25.1(zod@3.25.76)
transitivePeerDependencies:
- '@modelcontextprotocol/sdk'
- aws-crt
- bufferutil
- supports-color
- utf-8-validate
- ws
- zod
'@mariozechner/pi-ai@0.63.2(@modelcontextprotocol/sdk@1.28.0(zod@4.3.6))(ws@8.20.0)(zod@4.3.6)':
dependencies:
'@anthropic-ai/sdk': 0.73.0(zod@4.3.6)
@@ -8773,7 +8730,7 @@ snapshots:
openai: 6.26.0(ws@8.20.0)(zod@4.3.6)
partial-json: 0.1.7
proxy-agent: 6.5.0
undici: 7.24.3
undici: 7.24.6
zod-to-json-schema: 3.25.1(zod@4.3.6)
transitivePeerDependencies:
- '@modelcontextprotocol/sdk'
@@ -12632,7 +12589,7 @@ snapshots:
saxes: 6.0.0
symbol-tree: 3.2.4
tough-cookie: 6.0.1
undici: 7.24.3
undici: 7.24.6
w3c-xmlserializer: 5.0.0
webidl-conversions: 8.0.1
whatwg-mimetype: 5.0.0
@@ -13352,11 +13309,6 @@ snapshots:
dependencies:
mimic-function: 5.0.1
openai@6.26.0(ws@8.20.0)(zod@3.25.76):
optionalDependencies:
ws: 8.20.0
zod: 3.25.76
openai@6.26.0(ws@8.20.0)(zod@4.3.6):
optionalDependencies:
ws: 8.20.0
@@ -14488,8 +14440,6 @@ snapshots:
undici@6.21.3: {}
undici@7.24.3: {}
undici@7.24.6: {}
unhomoglyph@1.0.6: {}