feat(federation): outbound mTLS FederationClient (FED-M3-08)
Implements FederationClientService — a NestJS injectable that dials peer
gateways over mTLS (undici Agent with cert+sealed-key from federation_peers),
invokes list/get/capabilities verbs, validates responses via Zod, and surfaces
all failure modes as typed FederationClientError with a coherent error code
taxonomy (PEER_NOT_FOUND, PEER_INACTIVE, PEER_MISCONFIGURED, NETWORK,
FORBIDDEN, HTTP_{status}, INVALID_RESPONSE).
Per-peer Agent instances are cached in a Map for the service lifetime;
flushPeer(peerId) invalidates the cache for M5/M6 cert rotation and
revocation events.
Wired into FederationModule providers + exports so QuerySourceService
(M3-09) can inject it.
13 unit tests covering all required scenarios via undici MockAgent +
real sealClientKey/unsealClientKey round-trip.
Closes #462
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
412
apps/gateway/src/federation/client/federation-client.service.ts
Normal file
412
apps/gateway/src/federation/client/federation-client.service.ts
Normal file
@@ -0,0 +1,412 @@
|
||||
/**
|
||||
* FederationClientService — outbound mTLS client for federation requests (FED-M3-08).
|
||||
*
|
||||
* Dials peer gateways over mTLS using the cert+sealed-key stored in `federation_peers`,
|
||||
* invokes federation verbs (list / get / capabilities), and surfaces all failure modes
|
||||
* as typed `FederationClientError` instances.
|
||||
*
|
||||
* ## Error code taxonomy
|
||||
*
|
||||
* | Code | When |
|
||||
* | ------------------ | ------------------------------------------------------------- |
|
||||
* | PEER_NOT_FOUND | No row in federation_peers for the given peerId |
|
||||
* | PEER_INACTIVE | Peer row exists but state !== 'active' |
|
||||
* | PEER_MISCONFIGURED | Peer row is active but missing endpointUrl or clientKeyPem |
|
||||
* | NETWORK | undici threw a connection / TLS / timeout error |
|
||||
* | HTTP_{status} | Peer returned a non-2xx response (e.g. HTTP_403, HTTP_404) |
|
||||
* | FORBIDDEN | Peer returned 403 (convenience alias alongside HTTP_403) |
|
||||
* | INVALID_RESPONSE | Response body failed Zod schema validation |
|
||||
*
|
||||
* ## Cache strategy
|
||||
*
|
||||
* Per-peer `undici.Agent` instances are cached in a `Map<peerId, AgentCacheEntry>` for
|
||||
* the lifetime of the service instance. The cache is keyed on peerId (UUID).
|
||||
*
|
||||
* Cache invalidation:
|
||||
* - `flushPeer(peerId)` — removes the entry immediately. M5/M6 MUST call this on
|
||||
* cert rotation or peer revocation events so the next request re-reads the DB and
|
||||
* builds a fresh TLS Agent with the new cert material.
|
||||
* - On cache miss: re-reads the DB, checks state === 'active', rebuilds Agent.
|
||||
*
|
||||
* Cache does NOT auto-expire. The service is expected to be a singleton scoped to the
|
||||
* NestJS application lifecycle; flushing on revocation/rotation is the only invalidation
|
||||
* path by design (avoids redundant DB round-trips on the hot path).
|
||||
*/
|
||||
|
||||
import { Injectable, Inject, Logger } from '@nestjs/common';
|
||||
import { Agent, fetch as undiciFetch } from 'undici';
|
||||
import type { Dispatcher } from 'undici';
|
||||
import { z } from 'zod';
|
||||
import { type Db, eq, federationPeers } from '@mosaicstack/db';
|
||||
import {
|
||||
FederationListResponseSchema,
|
||||
FederationGetResponseSchema,
|
||||
FederationCapabilitiesResponseSchema,
|
||||
FederationErrorEnvelopeSchema,
|
||||
type FederationListResponse,
|
||||
type FederationGetResponse,
|
||||
type FederationCapabilitiesResponse,
|
||||
} from '@mosaicstack/types';
|
||||
import { DB } from '../../database/database.module.js';
|
||||
import { unsealClientKey } from '../peer-key.util.js';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Error taxonomy
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Client-side error code set. Distinct from the server-side `FederationErrorCode`
|
||||
* (which lives in `@mosaicstack/types`) because the client has additional failure
|
||||
* modes (PEER_NOT_FOUND, PEER_INACTIVE, PEER_MISCONFIGURED, NETWORK) that the
|
||||
* server never emits.
|
||||
*/
|
||||
export type FederationClientErrorCode =
|
||||
| 'PEER_NOT_FOUND'
|
||||
| 'PEER_INACTIVE'
|
||||
| 'PEER_MISCONFIGURED'
|
||||
| 'NETWORK'
|
||||
| 'FORBIDDEN'
|
||||
| 'INVALID_RESPONSE'
|
||||
| `HTTP_${number}`;
|
||||
|
||||
export interface FederationClientErrorOptions {
|
||||
status?: number;
|
||||
code: FederationClientErrorCode;
|
||||
message: string;
|
||||
peerId: string;
|
||||
cause?: unknown;
|
||||
}
|
||||
|
||||
/**
|
||||
* Thrown by FederationClientService on every failure path.
|
||||
* Callers can dispatch on `error.code` for programmatic handling.
|
||||
*/
|
||||
export class FederationClientError extends Error {
|
||||
readonly status?: number;
|
||||
readonly code: FederationClientErrorCode;
|
||||
readonly peerId: string;
|
||||
readonly cause?: unknown;
|
||||
|
||||
constructor(opts: FederationClientErrorOptions) {
|
||||
super(opts.message);
|
||||
this.name = 'FederationClientError';
|
||||
this.status = opts.status;
|
||||
this.code = opts.code;
|
||||
this.peerId = opts.peerId;
|
||||
this.cause = opts.cause;
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Internal cache types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
interface AgentCacheEntry {
|
||||
agent: Agent;
|
||||
endpointUrl: string;
|
||||
certPem: string;
|
||||
certSerial: string;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Service
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
@Injectable()
|
||||
export class FederationClientService {
|
||||
private readonly logger = new Logger(FederationClientService.name);
|
||||
|
||||
/**
|
||||
* Per-peer undici Agent cache.
|
||||
* Key = peerId (UUID string).
|
||||
* Flush via `flushPeer(peerId)` on cert rotation / peer revocation (M5/M6).
|
||||
*/
|
||||
private readonly cache = new Map<string, AgentCacheEntry>();
|
||||
|
||||
constructor(@Inject(DB) private readonly db: Db) {}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Public verb API
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Invoke the `list` verb on a remote peer.
|
||||
*
|
||||
* @param peerId UUID of the peer row in `federation_peers`.
|
||||
* @param resource Resource path, e.g. "tasks".
|
||||
* @param request Free-form body sent as JSON in the POST body.
|
||||
* @returns Parsed `FederationListResponse<T>`.
|
||||
*/
|
||||
async list<T>(
|
||||
peerId: string,
|
||||
resource: string,
|
||||
request: Record<string, unknown>,
|
||||
): Promise<FederationListResponse<T>> {
|
||||
const { endpointUrl, agent } = await this.resolveEntry(peerId);
|
||||
const url = `${endpointUrl}/api/federation/v1/list/${encodeURIComponent(resource)}`;
|
||||
const body = await this.doPost(peerId, url, agent, request);
|
||||
return this.parseWith<FederationListResponse<T>>(
|
||||
peerId,
|
||||
body,
|
||||
FederationListResponseSchema(z.unknown()),
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Invoke the `get` verb on a remote peer.
|
||||
*
|
||||
* @param peerId UUID of the peer row in `federation_peers`.
|
||||
* @param resource Resource path, e.g. "tasks".
|
||||
* @param id Resource identifier.
|
||||
* @param request Free-form body sent as JSON in the POST body.
|
||||
* @returns Parsed `FederationGetResponse<T>`.
|
||||
*/
|
||||
async get<T>(
|
||||
peerId: string,
|
||||
resource: string,
|
||||
id: string,
|
||||
request: Record<string, unknown>,
|
||||
): Promise<FederationGetResponse<T>> {
|
||||
const { endpointUrl, agent } = await this.resolveEntry(peerId);
|
||||
const url = `${endpointUrl}/api/federation/v1/get/${encodeURIComponent(resource)}/${encodeURIComponent(id)}`;
|
||||
const body = await this.doPost(peerId, url, agent, request);
|
||||
return this.parseWith<FederationGetResponse<T>>(
|
||||
peerId,
|
||||
body,
|
||||
FederationGetResponseSchema(z.unknown()),
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Invoke the `capabilities` verb on a remote peer.
|
||||
*
|
||||
* @param peerId UUID of the peer row in `federation_peers`.
|
||||
* @returns Parsed `FederationCapabilitiesResponse`.
|
||||
*/
|
||||
async capabilities(peerId: string): Promise<FederationCapabilitiesResponse> {
|
||||
const { endpointUrl, agent } = await this.resolveEntry(peerId);
|
||||
const url = `${endpointUrl}/api/federation/v1/capabilities`;
|
||||
const body = await this.doGet(peerId, url, agent);
|
||||
return this.parseWith<FederationCapabilitiesResponse>(
|
||||
peerId,
|
||||
body,
|
||||
FederationCapabilitiesResponseSchema,
|
||||
);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Cache management
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Flush the cached Agent for a specific peer.
|
||||
*
|
||||
* M5/M6 MUST call this on:
|
||||
* - cert rotation events (so new cert material is picked up)
|
||||
* - peer revocation events (so future requests fail at PEER_INACTIVE)
|
||||
*
|
||||
* After flushing, the next call to `list`, `get`, or `capabilities` for
|
||||
* this peer will re-read the DB and rebuild the Agent.
|
||||
*/
|
||||
flushPeer(peerId: string): void {
|
||||
if (this.cache.has(peerId)) {
|
||||
this.cache.delete(peerId);
|
||||
this.logger.log(`Cache flushed for peer ${peerId}`);
|
||||
}
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Internal helpers
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Resolve the cache entry for a peer, reading DB on miss.
|
||||
*
|
||||
* Throws `FederationClientError` with appropriate code if the peer is not
|
||||
* found, is inactive, or is missing required fields.
|
||||
*/
|
||||
private async resolveEntry(peerId: string): Promise<AgentCacheEntry> {
|
||||
const cached = this.cache.get(peerId);
|
||||
if (cached) {
|
||||
return cached;
|
||||
}
|
||||
|
||||
// DB lookup
|
||||
const [peer] = await this.db
|
||||
.select()
|
||||
.from(federationPeers)
|
||||
.where(eq(federationPeers.id, peerId))
|
||||
.limit(1);
|
||||
|
||||
if (!peer) {
|
||||
throw new FederationClientError({
|
||||
code: 'PEER_NOT_FOUND',
|
||||
message: `Federation peer ${peerId} not found`,
|
||||
peerId,
|
||||
});
|
||||
}
|
||||
|
||||
if (peer.state !== 'active') {
|
||||
throw new FederationClientError({
|
||||
code: 'PEER_INACTIVE',
|
||||
message: `Federation peer ${peerId} is not active (state: ${peer.state})`,
|
||||
peerId,
|
||||
});
|
||||
}
|
||||
|
||||
if (!peer.endpointUrl || !peer.clientKeyPem) {
|
||||
throw new FederationClientError({
|
||||
code: 'PEER_MISCONFIGURED',
|
||||
message: `Federation peer ${peerId} is missing endpointUrl or clientKeyPem`,
|
||||
peerId,
|
||||
});
|
||||
}
|
||||
|
||||
// Unseal the private key
|
||||
let privateKeyPem: string;
|
||||
try {
|
||||
privateKeyPem = unsealClientKey(peer.clientKeyPem);
|
||||
} catch (err) {
|
||||
throw new FederationClientError({
|
||||
code: 'PEER_MISCONFIGURED',
|
||||
message: `Failed to unseal client key for peer ${peerId}`,
|
||||
peerId,
|
||||
cause: err,
|
||||
});
|
||||
}
|
||||
|
||||
// Build mTLS agent
|
||||
const agent = new Agent({
|
||||
connect: {
|
||||
cert: peer.certPem,
|
||||
key: privateKeyPem,
|
||||
// rejectUnauthorized: true is the undici default for HTTPS
|
||||
},
|
||||
});
|
||||
|
||||
const entry: AgentCacheEntry = {
|
||||
agent,
|
||||
endpointUrl: peer.endpointUrl,
|
||||
certPem: peer.certPem,
|
||||
certSerial: peer.certSerial,
|
||||
};
|
||||
|
||||
this.cache.set(peerId, entry);
|
||||
this.logger.log(`Agent cached for peer ${peerId} (serial: ${peer.certSerial})`);
|
||||
|
||||
return entry;
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute a POST request with a JSON body.
|
||||
* Returns the parsed response body as an unknown value.
|
||||
* Throws `FederationClientError` on network errors and non-2xx responses.
|
||||
*/
|
||||
private async doPost(
|
||||
peerId: string,
|
||||
url: string,
|
||||
agent: Dispatcher,
|
||||
body: Record<string, unknown>,
|
||||
): Promise<unknown> {
|
||||
return this.doRequest(peerId, url, agent, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(body),
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute a GET request.
|
||||
* Returns the parsed response body as an unknown value.
|
||||
* Throws `FederationClientError` on network errors and non-2xx responses.
|
||||
*/
|
||||
private async doGet(peerId: string, url: string, agent: Dispatcher): Promise<unknown> {
|
||||
return this.doRequest(peerId, url, agent, { method: 'GET' });
|
||||
}
|
||||
|
||||
private async doRequest(
|
||||
peerId: string,
|
||||
url: string,
|
||||
agent: Dispatcher,
|
||||
init: { method: string; headers?: Record<string, string>; body?: string },
|
||||
): Promise<unknown> {
|
||||
let response: Awaited<ReturnType<typeof undiciFetch>>;
|
||||
|
||||
try {
|
||||
response = await undiciFetch(url, {
|
||||
...init,
|
||||
dispatcher: agent,
|
||||
});
|
||||
} catch (err) {
|
||||
throw new FederationClientError({
|
||||
code: 'NETWORK',
|
||||
message: `Network error calling peer ${peerId} at ${url}: ${err instanceof Error ? err.message : String(err)}`,
|
||||
peerId,
|
||||
cause: err,
|
||||
});
|
||||
}
|
||||
|
||||
const rawBody = await response.text().catch(() => '');
|
||||
|
||||
if (!response.ok) {
|
||||
const status = response.status;
|
||||
|
||||
// Attempt to parse as federation error envelope
|
||||
let serverMessage = `HTTP ${status}`;
|
||||
try {
|
||||
const json: unknown = JSON.parse(rawBody);
|
||||
const result = FederationErrorEnvelopeSchema.safeParse(json);
|
||||
if (result.success) {
|
||||
serverMessage = result.data.error.message;
|
||||
}
|
||||
} catch {
|
||||
// Not valid JSON or not a federation envelope — use generic message
|
||||
}
|
||||
|
||||
// Specific code for 403 (most actionable for callers); generic HTTP_{n} for others
|
||||
const code: FederationClientErrorCode = status === 403 ? 'FORBIDDEN' : `HTTP_${status}`;
|
||||
|
||||
throw new FederationClientError({
|
||||
status,
|
||||
code,
|
||||
message: `Peer ${peerId} returned ${status}: ${serverMessage}`,
|
||||
peerId,
|
||||
});
|
||||
}
|
||||
|
||||
try {
|
||||
return JSON.parse(rawBody) as unknown;
|
||||
} catch (err) {
|
||||
throw new FederationClientError({
|
||||
code: 'INVALID_RESPONSE',
|
||||
message: `Peer ${peerId} returned non-JSON body`,
|
||||
peerId,
|
||||
cause: err,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse and validate a response body against a Zod schema.
|
||||
*
|
||||
* For list/get, callers pass the result of `FederationListResponseSchema(z.unknown())`
|
||||
* so that the envelope structure is validated without requiring a concrete item schema
|
||||
* at the client level. The generic `T` provides compile-time typing.
|
||||
*
|
||||
* Throws `FederationClientError({ code: 'INVALID_RESPONSE' })` on parse failure.
|
||||
*/
|
||||
private parseWith<T>(peerId: string, body: unknown, schema: z.ZodTypeAny): T {
|
||||
const result = schema.safeParse(body);
|
||||
if (!result.success) {
|
||||
const issues = result.error.issues
|
||||
.map((e: z.ZodIssue) => `[${e.path.join('.') || 'root'}] ${e.message}`)
|
||||
.join('; ');
|
||||
throw new FederationClientError({
|
||||
code: 'INVALID_RESPONSE',
|
||||
message: `Peer ${peerId} returned invalid response shape: ${issues}`,
|
||||
peerId,
|
||||
});
|
||||
}
|
||||
return result.data as T;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user