Compare commits
15 Commits
fed-v0.2.0
...
docs/missi
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5cde3a3b6d | ||
|
|
079c5597ff | ||
| 755df9079e | |||
| ac5650d9f9 | |||
| bd83f86740 | |||
|
|
0af3e218a1 | ||
|
|
b01c9b3bb0 | ||
| b67f2c9f08 | |||
|
|
37675ae3f2 | ||
|
|
a4a6769a6d | ||
|
|
21650fb194 | ||
| 89c733e0b9 | |||
| ee3f2defd9 | |||
| 7342c1290d | |||
| e64ddd2c1c |
@@ -73,6 +73,7 @@
|
|||||||
"rxjs": "^7.8.0",
|
"rxjs": "^7.8.0",
|
||||||
"socket.io": "^4.8.0",
|
"socket.io": "^4.8.0",
|
||||||
"uuid": "^11.0.0",
|
"uuid": "^11.0.0",
|
||||||
|
"undici": "^7.24.6",
|
||||||
"zod": "^4.3.6"
|
"zod": "^4.3.6"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
|
|||||||
@@ -1,8 +1,21 @@
|
|||||||
import { mkdirSync } from 'node:fs';
|
import { mkdirSync } from 'node:fs';
|
||||||
import { homedir } from 'node:os';
|
import { homedir } from 'node:os';
|
||||||
import { join } from 'node:path';
|
import { join } from 'node:path';
|
||||||
import { Global, Inject, Module, type OnApplicationShutdown } from '@nestjs/common';
|
import {
|
||||||
import { createDb, createPgliteDb, type Db, type DbHandle } from '@mosaicstack/db';
|
Global,
|
||||||
|
Inject,
|
||||||
|
Logger,
|
||||||
|
Module,
|
||||||
|
type OnApplicationShutdown,
|
||||||
|
type OnModuleInit,
|
||||||
|
} from '@nestjs/common';
|
||||||
|
import {
|
||||||
|
createDb,
|
||||||
|
createPgliteDb,
|
||||||
|
runPgliteMigrations,
|
||||||
|
type Db,
|
||||||
|
type DbHandle,
|
||||||
|
} from '@mosaicstack/db';
|
||||||
import { createStorageAdapter, type StorageAdapter } from '@mosaicstack/storage';
|
import { createStorageAdapter, type StorageAdapter } from '@mosaicstack/storage';
|
||||||
import type { MosaicConfig } from '@mosaicstack/config';
|
import type { MosaicConfig } from '@mosaicstack/config';
|
||||||
import { MOSAIC_CONFIG } from '../config/config.module.js';
|
import { MOSAIC_CONFIG } from '../config/config.module.js';
|
||||||
@@ -39,12 +52,37 @@ export const STORAGE_ADAPTER = 'STORAGE_ADAPTER';
|
|||||||
],
|
],
|
||||||
exports: [DB, STORAGE_ADAPTER],
|
exports: [DB, STORAGE_ADAPTER],
|
||||||
})
|
})
|
||||||
export class DatabaseModule implements OnApplicationShutdown {
|
export class DatabaseModule implements OnApplicationShutdown, OnModuleInit {
|
||||||
|
private readonly logger = new Logger(DatabaseModule.name);
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
@Inject(DB_HANDLE) private readonly handle: DbHandle,
|
@Inject(DB_HANDLE) private readonly handle: DbHandle,
|
||||||
@Inject(STORAGE_ADAPTER) private readonly storageAdapter: StorageAdapter,
|
@Inject(STORAGE_ADAPTER) private readonly storageAdapter: StorageAdapter,
|
||||||
|
@Inject(MOSAIC_CONFIG) private readonly config: MosaicConfig,
|
||||||
) {}
|
) {}
|
||||||
|
|
||||||
|
// Migrations must complete before any module that injects DB starts serving
|
||||||
|
// requests. NestJS awaits onModuleInit before app.listen(), and modules that
|
||||||
|
// inject DB are initialized after this one — so all DB-dependent code sees a
|
||||||
|
// populated schema before the first HTTP request lands.
|
||||||
|
//
|
||||||
|
// Local (PGlite) tier: we run gateway-DB migrations explicitly here. The
|
||||||
|
// storage adapter writes to a separate PGlite directory and only manages its
|
||||||
|
// own KV tables, so we still call its migrate() afterwards.
|
||||||
|
//
|
||||||
|
// Postgres tier: PostgresAdapter.migrate() already calls runMigrations() on
|
||||||
|
// the same DATABASE_URL, so a single call covers both the gateway DB and
|
||||||
|
// the storage tables. We deliberately do NOT call runMigrations() here to
|
||||||
|
// avoid opening a second short-lived connection and doubling startup cost.
|
||||||
|
async onModuleInit(): Promise<void> {
|
||||||
|
if (this.config.tier === 'local') {
|
||||||
|
this.logger.log('Applying PGlite schema migrations...');
|
||||||
|
await runPgliteMigrations(this.handle);
|
||||||
|
}
|
||||||
|
this.logger.log(`Initializing storage adapter (${this.storageAdapter.name})...`);
|
||||||
|
await this.storageAdapter.migrate();
|
||||||
|
}
|
||||||
|
|
||||||
async onApplicationShutdown(): Promise<void> {
|
async onApplicationShutdown(): Promise<void> {
|
||||||
await Promise.all([this.handle.close(), this.storageAdapter.close()]);
|
await Promise.all([this.handle.close(), this.storageAdapter.close()]);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -24,10 +24,11 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
import 'reflect-metadata';
|
import 'reflect-metadata';
|
||||||
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
import { describe, it, expect, vi, beforeEach, beforeAll } from 'vitest';
|
||||||
import { GoneException, NotFoundException } from '@nestjs/common';
|
import { GoneException, NotFoundException } from '@nestjs/common';
|
||||||
import type { Db } from '@mosaicstack/db';
|
import type { Db } from '@mosaicstack/db';
|
||||||
import { EnrollmentService } from '../enrollment.service.js';
|
import { EnrollmentService } from '../enrollment.service.js';
|
||||||
|
import { makeSelfSignedCert } from './helpers/test-cert.js';
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Test constants
|
// Test constants
|
||||||
@@ -38,10 +39,18 @@ const PEER_ID = 'p2222222-2222-2222-2222-222222222222';
|
|||||||
const USER_ID = 'u3333333-3333-3333-3333-333333333333';
|
const USER_ID = 'u3333333-3333-3333-3333-333333333333';
|
||||||
const TOKEN = 'a'.repeat(64); // 64-char hex
|
const TOKEN = 'a'.repeat(64); // 64-char hex
|
||||||
|
|
||||||
const MOCK_CERT_PEM = '-----BEGIN CERTIFICATE-----\nMOCK\n-----END CERTIFICATE-----\n';
|
// Real self-signed EC P-256 cert — populated once in beforeAll.
|
||||||
const MOCK_CHAIN_PEM = MOCK_CERT_PEM + MOCK_CERT_PEM;
|
// Required because EnrollmentService.extractCertNotAfter calls new X509Certificate(certPem)
|
||||||
|
// with strict parsing (PR #501 HIGH-2: no silent fallback).
|
||||||
|
let REAL_CERT_PEM: string;
|
||||||
|
|
||||||
|
const MOCK_CHAIN_PEM = () => REAL_CERT_PEM + REAL_CERT_PEM;
|
||||||
const MOCK_SERIAL = 'ABCD1234';
|
const MOCK_SERIAL = 'ABCD1234';
|
||||||
|
|
||||||
|
beforeAll(async () => {
|
||||||
|
REAL_CERT_PEM = await makeSelfSignedCert();
|
||||||
|
});
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Factory helpers
|
// Factory helpers
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
@@ -103,11 +112,27 @@ function makeDb({
|
|||||||
const claimUpdateMock = vi.fn().mockReturnValue({ set: setClaimMock });
|
const claimUpdateMock = vi.fn().mockReturnValue({ set: setClaimMock });
|
||||||
|
|
||||||
// transaction(cb) — cb receives txMock; txMock has update + insert
|
// transaction(cb) — cb receives txMock; txMock has update + insert
|
||||||
const txInsertValues = vi.fn().mockResolvedValue(undefined);
|
//
|
||||||
const txInsertMock = vi.fn().mockReturnValue({ values: txInsertValues });
|
// The tx mock must support two tx.update() call patterns (CRIT-2, PR #501):
|
||||||
const txWhereUpdate = vi.fn().mockResolvedValue(undefined);
|
// 1. Grant activation: .update().set().where().returning() → resolves to [{ id }]
|
||||||
|
// 2. Peer update: .update().set().where() → resolves to undefined
|
||||||
|
//
|
||||||
|
// We achieve this by making txWhereUpdate return an object with BOTH a thenable
|
||||||
|
// interface (so `await tx.update().set().where()` works) AND a .returning() method.
|
||||||
|
const txGrantActivatedRow = { id: GRANT_ID };
|
||||||
|
const txReturningMock = vi.fn().mockResolvedValue([txGrantActivatedRow]);
|
||||||
|
const txWhereUpdate = vi.fn().mockReturnValue({
|
||||||
|
// .returning() for grant activation (first tx.update call)
|
||||||
|
returning: txReturningMock,
|
||||||
|
// thenables so `await tx.update().set().where()` also works for peer update
|
||||||
|
then: (resolve: (v: undefined) => void) => resolve(undefined),
|
||||||
|
catch: () => undefined,
|
||||||
|
finally: () => undefined,
|
||||||
|
});
|
||||||
const txSetMock = vi.fn().mockReturnValue({ where: txWhereUpdate });
|
const txSetMock = vi.fn().mockReturnValue({ where: txWhereUpdate });
|
||||||
const txUpdateMock = vi.fn().mockReturnValue({ set: txSetMock });
|
const txUpdateMock = vi.fn().mockReturnValue({ set: txSetMock });
|
||||||
|
const txInsertValues = vi.fn().mockResolvedValue(undefined);
|
||||||
|
const txInsertMock = vi.fn().mockReturnValue({ values: txInsertValues });
|
||||||
const txMock = { update: txUpdateMock, insert: txInsertMock };
|
const txMock = { update: txUpdateMock, insert: txInsertMock };
|
||||||
const transactionMock = vi
|
const transactionMock = vi
|
||||||
.fn()
|
.fn()
|
||||||
@@ -132,6 +157,7 @@ function makeDb({
|
|||||||
txInsertValues,
|
txInsertValues,
|
||||||
txInsertMock,
|
txInsertMock,
|
||||||
txWhereUpdate,
|
txWhereUpdate,
|
||||||
|
txReturningMock,
|
||||||
txSetMock,
|
txSetMock,
|
||||||
txUpdateMock,
|
txUpdateMock,
|
||||||
txMock,
|
txMock,
|
||||||
@@ -146,11 +172,13 @@ function makeDb({
|
|||||||
|
|
||||||
function makeCaService() {
|
function makeCaService() {
|
||||||
return {
|
return {
|
||||||
issueCert: vi.fn().mockResolvedValue({
|
// REAL_CERT_PEM is populated by beforeAll — safe to reference via closure here
|
||||||
certPem: MOCK_CERT_PEM,
|
// because makeCaService() is only called after the suite's beforeAll runs.
|
||||||
certChainPem: MOCK_CHAIN_PEM,
|
issueCert: vi.fn().mockImplementation(async () => ({
|
||||||
|
certPem: REAL_CERT_PEM,
|
||||||
|
certChainPem: MOCK_CHAIN_PEM(),
|
||||||
serialNumber: MOCK_SERIAL,
|
serialNumber: MOCK_SERIAL,
|
||||||
}),
|
})),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -301,29 +329,29 @@ describe('EnrollmentService.redeem — success path', () => {
|
|||||||
});
|
});
|
||||||
caService.issueCert.mockImplementation(async () => {
|
caService.issueCert.mockImplementation(async () => {
|
||||||
callOrder.push('issueCert');
|
callOrder.push('issueCert');
|
||||||
return { certPem: MOCK_CERT_PEM, certChainPem: MOCK_CHAIN_PEM, serialNumber: MOCK_SERIAL };
|
return { certPem: REAL_CERT_PEM, certChainPem: MOCK_CHAIN_PEM(), serialNumber: MOCK_SERIAL };
|
||||||
});
|
});
|
||||||
|
|
||||||
await service.redeem(TOKEN, MOCK_CERT_PEM);
|
await service.redeem(TOKEN, '---CSR---');
|
||||||
|
|
||||||
expect(callOrder).toEqual(['claim', 'issueCert']);
|
expect(callOrder).toEqual(['claim', 'issueCert']);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('calls CaService.issueCert with grantId, subjectUserId, csrPem, ttlSeconds=300', async () => {
|
it('calls CaService.issueCert with grantId, subjectUserId, csrPem, ttlSeconds=300', async () => {
|
||||||
await service.redeem(TOKEN, MOCK_CERT_PEM);
|
await service.redeem(TOKEN, '---CSR---');
|
||||||
|
|
||||||
expect(caService.issueCert).toHaveBeenCalledWith(
|
expect(caService.issueCert).toHaveBeenCalledWith(
|
||||||
expect.objectContaining({
|
expect.objectContaining({
|
||||||
grantId: GRANT_ID,
|
grantId: GRANT_ID,
|
||||||
subjectUserId: USER_ID,
|
subjectUserId: USER_ID,
|
||||||
csrPem: MOCK_CERT_PEM,
|
csrPem: '---CSR---',
|
||||||
ttlSeconds: 300,
|
ttlSeconds: 300,
|
||||||
}),
|
}),
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('runs activate grant + peer update + audit inside a transaction', async () => {
|
it('runs activate grant + peer update + audit inside a transaction', async () => {
|
||||||
await service.redeem(TOKEN, MOCK_CERT_PEM);
|
await service.redeem(TOKEN, '---CSR---');
|
||||||
|
|
||||||
expect(db._mocks.transactionMock).toHaveBeenCalledOnce();
|
expect(db._mocks.transactionMock).toHaveBeenCalledOnce();
|
||||||
// tx.update called twice: activate grant + update peer
|
// tx.update called twice: activate grant + update peer
|
||||||
@@ -333,17 +361,17 @@ describe('EnrollmentService.redeem — success path', () => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
it('activates grant (sets status=active) inside the transaction', async () => {
|
it('activates grant (sets status=active) inside the transaction', async () => {
|
||||||
await service.redeem(TOKEN, MOCK_CERT_PEM);
|
await service.redeem(TOKEN, '---CSR---');
|
||||||
|
|
||||||
expect(db._mocks.txSetMock).toHaveBeenCalledWith(expect.objectContaining({ status: 'active' }));
|
expect(db._mocks.txSetMock).toHaveBeenCalledWith(expect.objectContaining({ status: 'active' }));
|
||||||
});
|
});
|
||||||
|
|
||||||
it('updates the federationPeers row with certPem, certSerial, state=active inside the transaction', async () => {
|
it('updates the federationPeers row with certPem, certSerial, state=active inside the transaction', async () => {
|
||||||
await service.redeem(TOKEN, MOCK_CERT_PEM);
|
await service.redeem(TOKEN, '---CSR---');
|
||||||
|
|
||||||
expect(db._mocks.txSetMock).toHaveBeenCalledWith(
|
expect(db._mocks.txSetMock).toHaveBeenCalledWith(
|
||||||
expect.objectContaining({
|
expect.objectContaining({
|
||||||
certPem: MOCK_CERT_PEM,
|
certPem: REAL_CERT_PEM,
|
||||||
certSerial: MOCK_SERIAL,
|
certSerial: MOCK_SERIAL,
|
||||||
state: 'active',
|
state: 'active',
|
||||||
}),
|
}),
|
||||||
@@ -351,7 +379,7 @@ describe('EnrollmentService.redeem — success path', () => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
it('inserts an audit log row inside the transaction', async () => {
|
it('inserts an audit log row inside the transaction', async () => {
|
||||||
await service.redeem(TOKEN, MOCK_CERT_PEM);
|
await service.redeem(TOKEN, '---CSR---');
|
||||||
|
|
||||||
expect(db._mocks.txInsertValues).toHaveBeenCalledWith(
|
expect(db._mocks.txInsertValues).toHaveBeenCalledWith(
|
||||||
expect.objectContaining({
|
expect.objectContaining({
|
||||||
@@ -363,11 +391,11 @@ describe('EnrollmentService.redeem — success path', () => {
|
|||||||
});
|
});
|
||||||
|
|
||||||
it('returns { certPem, certChainPem } from CaService', async () => {
|
it('returns { certPem, certChainPem } from CaService', async () => {
|
||||||
const result = await service.redeem(TOKEN, MOCK_CERT_PEM);
|
const result = await service.redeem(TOKEN, '---CSR---');
|
||||||
|
|
||||||
expect(result).toEqual({
|
expect(result).toEqual({
|
||||||
certPem: MOCK_CERT_PEM,
|
certPem: REAL_CERT_PEM,
|
||||||
certChainPem: MOCK_CHAIN_PEM,
|
certChainPem: MOCK_CHAIN_PEM(),
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
138
apps/gateway/src/federation/__tests__/helpers/test-cert.ts
Normal file
138
apps/gateway/src/federation/__tests__/helpers/test-cert.ts
Normal file
@@ -0,0 +1,138 @@
|
|||||||
|
/**
|
||||||
|
* Test helpers for generating real X.509 PEM certificates in unit tests.
|
||||||
|
*
|
||||||
|
* PR #501 (FED-M2-11) introduced strict `new X509Certificate(certPem)` parsing
|
||||||
|
* in both EnrollmentService.extractCertNotAfter and CaService.issueCert — dummy
|
||||||
|
* cert strings now throw `error:0680007B:asn1 encoding routines::header too long`.
|
||||||
|
*
|
||||||
|
* These helpers produce minimal but cryptographically valid self-signed EC P-256
|
||||||
|
* certificates via @peculiar/x509 + Node.js webcrypto, suitable for test mocks.
|
||||||
|
*
|
||||||
|
* Two variants:
|
||||||
|
* - makeSelfSignedCert() Plain cert — satisfies node:crypto X509Certificate parse.
|
||||||
|
* - makeMosaicIssuedCert(opts) Cert with custom Mosaic OID extensions — satisfies the
|
||||||
|
* CRIT-1 OID presence + value checks in CaService.issueCert.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { webcrypto } from 'node:crypto';
|
||||||
|
import {
|
||||||
|
X509CertificateGenerator,
|
||||||
|
Extension,
|
||||||
|
KeyUsagesExtension,
|
||||||
|
KeyUsageFlags,
|
||||||
|
BasicConstraintsExtension,
|
||||||
|
cryptoProvider,
|
||||||
|
} from '@peculiar/x509';
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Internal helpers
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Encode a string as an ASN.1 UTF8String TLV:
|
||||||
|
* 0x0C (tag) + 1-byte length (for strings ≤ 127 bytes) + UTF-8 bytes.
|
||||||
|
*
|
||||||
|
* CaService.issueCert reads the extension value as:
|
||||||
|
* decoder.decode(grantIdExt.value.slice(2))
|
||||||
|
* i.e. it skips the tag + length byte and decodes the remainder as UTF-8.
|
||||||
|
* So we must produce exactly this encoding as the OCTET STRING content.
|
||||||
|
*/
|
||||||
|
function encodeUtf8String(value: string): Uint8Array {
|
||||||
|
const utf8 = new TextEncoder().encode(value);
|
||||||
|
if (utf8.length > 127) {
|
||||||
|
throw new Error('encodeUtf8String: value too long for single-byte length encoding');
|
||||||
|
}
|
||||||
|
const buf = new Uint8Array(2 + utf8.length);
|
||||||
|
buf[0] = 0x0c; // ASN.1 UTF8String tag
|
||||||
|
buf[1] = utf8.length;
|
||||||
|
buf.set(utf8, 2);
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Mosaic OID constants (must match production CaService)
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
const OID_MOSAIC_GRANT_ID = '1.3.6.1.4.1.99999.1';
|
||||||
|
const OID_MOSAIC_SUBJECT_USER_ID = '1.3.6.1.4.1.99999.2';
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Public API
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate a minimal self-signed EC P-256 certificate valid for 1 day.
|
||||||
|
* CN=harness-test, no custom extensions.
|
||||||
|
*
|
||||||
|
* Suitable for:
|
||||||
|
* - EnrollmentService.extractCertNotAfter (just needs parseable PEM)
|
||||||
|
* - Any mock that returns certPem / certChainPem without OID checks
|
||||||
|
*/
|
||||||
|
export async function makeSelfSignedCert(): Promise<string> {
|
||||||
|
// Ensure @peculiar/x509 uses Node.js webcrypto (available as globalThis.crypto in Node 19+,
|
||||||
|
// but we set it explicitly here to be safe on all Node 18+ versions).
|
||||||
|
cryptoProvider.set(webcrypto as unknown as Parameters<typeof cryptoProvider.set>[0]);
|
||||||
|
|
||||||
|
const alg = { name: 'ECDSA', namedCurve: 'P-256', hash: 'SHA-256' } as const;
|
||||||
|
const keys = await webcrypto.subtle.generateKey(alg, false, ['sign', 'verify']);
|
||||||
|
|
||||||
|
const now = new Date();
|
||||||
|
const tomorrow = new Date(now.getTime() + 86_400_000);
|
||||||
|
|
||||||
|
const cert = await X509CertificateGenerator.createSelfSigned({
|
||||||
|
serialNumber: '01',
|
||||||
|
name: 'CN=harness-test',
|
||||||
|
notBefore: now,
|
||||||
|
notAfter: tomorrow,
|
||||||
|
signingAlgorithm: alg,
|
||||||
|
keys,
|
||||||
|
extensions: [
|
||||||
|
new BasicConstraintsExtension(false),
|
||||||
|
new KeyUsagesExtension(KeyUsageFlags.digitalSignature),
|
||||||
|
],
|
||||||
|
});
|
||||||
|
|
||||||
|
return cert.toString('pem');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate a self-signed EC P-256 certificate that contains the two custom
|
||||||
|
* Mosaic OID extensions required by CaService.issueCert's CRIT-1 check:
|
||||||
|
* OID 1.3.6.1.4.1.99999.1 → mosaic_grant_id (value = grantId)
|
||||||
|
* OID 1.3.6.1.4.1.99999.2 → mosaic_subject_user_id (value = subjectUserId)
|
||||||
|
*
|
||||||
|
* The extension value encoding matches the production parser's `.slice(2)` assumption:
|
||||||
|
* each extension value is an OCTET STRING wrapping an ASN.1 UTF8String TLV.
|
||||||
|
*/
|
||||||
|
export async function makeMosaicIssuedCert(opts: {
|
||||||
|
grantId: string;
|
||||||
|
subjectUserId: string;
|
||||||
|
}): Promise<string> {
|
||||||
|
// Ensure @peculiar/x509 uses Node.js webcrypto.
|
||||||
|
cryptoProvider.set(webcrypto as unknown as Parameters<typeof cryptoProvider.set>[0]);
|
||||||
|
|
||||||
|
const alg = { name: 'ECDSA', namedCurve: 'P-256', hash: 'SHA-256' } as const;
|
||||||
|
const keys = await webcrypto.subtle.generateKey(alg, false, ['sign', 'verify']);
|
||||||
|
|
||||||
|
const now = new Date();
|
||||||
|
const tomorrow = new Date(now.getTime() + 86_400_000);
|
||||||
|
|
||||||
|
const cert = await X509CertificateGenerator.createSelfSigned({
|
||||||
|
serialNumber: '01',
|
||||||
|
name: 'CN=mosaic-issued-test',
|
||||||
|
notBefore: now,
|
||||||
|
notAfter: tomorrow,
|
||||||
|
signingAlgorithm: alg,
|
||||||
|
keys,
|
||||||
|
extensions: [
|
||||||
|
new BasicConstraintsExtension(false),
|
||||||
|
new KeyUsagesExtension(KeyUsageFlags.digitalSignature),
|
||||||
|
// mosaic_grant_id — OID 1.3.6.1.4.1.99999.1
|
||||||
|
new Extension(OID_MOSAIC_GRANT_ID, false, encodeUtf8String(opts.grantId)),
|
||||||
|
// mosaic_subject_user_id — OID 1.3.6.1.4.1.99999.2
|
||||||
|
new Extension(OID_MOSAIC_SUBJECT_USER_ID, false, encodeUtf8String(opts.subjectUserId)),
|
||||||
|
],
|
||||||
|
});
|
||||||
|
|
||||||
|
return cert.toString('pem');
|
||||||
|
}
|
||||||
@@ -20,9 +20,10 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
import 'reflect-metadata';
|
import 'reflect-metadata';
|
||||||
import { describe, it, expect, vi, beforeEach, type Mock } from 'vitest';
|
import { describe, it, expect, vi, beforeEach, beforeAll, type Mock } from 'vitest';
|
||||||
import { jwtVerify, exportJWK, generateKeyPair } from 'jose';
|
import { jwtVerify, exportJWK, generateKeyPair } from 'jose';
|
||||||
import { Pkcs10CertificateRequestGenerator } from '@peculiar/x509';
|
import { Pkcs10CertificateRequestGenerator } from '@peculiar/x509';
|
||||||
|
import { makeMosaicIssuedCert } from './__tests__/helpers/test-cert.js';
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Mock node:https BEFORE importing CaService so the mock is in place when
|
// Mock node:https BEFORE importing CaService so the mock is in place when
|
||||||
@@ -74,6 +75,11 @@ const FAKE_CA_PEM = FAKE_CERT_PEM;
|
|||||||
const GRANT_ID = 'a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11';
|
const GRANT_ID = 'a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11';
|
||||||
const SUBJECT_USER_ID = 'b1ffcd00-0d1c-5f09-cc7e-7cc0ce491b22';
|
const SUBJECT_USER_ID = 'b1ffcd00-0d1c-5f09-cc7e-7cc0ce491b22';
|
||||||
|
|
||||||
|
// Real self-signed cert containing both Mosaic OID extensions — populated in beforeAll.
|
||||||
|
// Required because CaService.issueCert performs CRIT-1 OID presence/value checks on the
|
||||||
|
// response cert (PR #501 — strict parsing, no silent fallback).
|
||||||
|
let realIssuedCertPem: string;
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Generate a real EC P-256 key pair and CSR for integration-style tests
|
// Generate a real EC P-256 key pair and CSR for integration-style tests
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
@@ -194,6 +200,15 @@ function makeHttpsMock(statusCode: number, body: unknown, errorMsg?: string): vo
|
|||||||
describe('CaService', () => {
|
describe('CaService', () => {
|
||||||
let service: CaService;
|
let service: CaService;
|
||||||
|
|
||||||
|
beforeAll(async () => {
|
||||||
|
// Generate a cert with the two Mosaic OIDs so that CaService.issueCert's
|
||||||
|
// CRIT-1 OID checks pass when mock step-ca returns it as `crt`.
|
||||||
|
realIssuedCertPem = await makeMosaicIssuedCert({
|
||||||
|
grantId: GRANT_ID,
|
||||||
|
subjectUserId: SUBJECT_USER_ID,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
beforeEach(() => {
|
beforeEach(() => {
|
||||||
vi.clearAllMocks();
|
vi.clearAllMocks();
|
||||||
service = new CaService();
|
service = new CaService();
|
||||||
@@ -226,9 +241,9 @@ describe('CaService', () => {
|
|||||||
|
|
||||||
// Now test that the service's validateCsr accepts it.
|
// Now test that the service's validateCsr accepts it.
|
||||||
// We call it indirectly via issueCert with a successful mock.
|
// We call it indirectly via issueCert with a successful mock.
|
||||||
makeHttpsMock(200, { crt: FAKE_CERT_PEM, certChain: [FAKE_CERT_PEM, FAKE_CA_PEM] });
|
makeHttpsMock(200, { crt: realIssuedCertPem, certChain: [realIssuedCertPem, FAKE_CA_PEM] });
|
||||||
const result = await service.issueCert(makeReq({ csrPem: realCsrPem }));
|
const result = await service.issueCert(makeReq({ csrPem: realCsrPem }));
|
||||||
expect(result.certPem).toBe(FAKE_CERT_PEM);
|
expect(result.certPem).toBe(realIssuedCertPem);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('throws INVALID_CSR for a malformed PEM-shaped CSR', async () => {
|
it('throws INVALID_CSR for a malformed PEM-shaped CSR', async () => {
|
||||||
@@ -251,14 +266,14 @@ describe('CaService', () => {
|
|||||||
it('returns IssuedCertDto on success (certChain present)', async () => {
|
it('returns IssuedCertDto on success (certChain present)', async () => {
|
||||||
if (!realCsrPem) realCsrPem = await generateRealCsr();
|
if (!realCsrPem) realCsrPem = await generateRealCsr();
|
||||||
makeHttpsMock(200, {
|
makeHttpsMock(200, {
|
||||||
crt: FAKE_CERT_PEM,
|
crt: realIssuedCertPem,
|
||||||
certChain: [FAKE_CERT_PEM, FAKE_CA_PEM],
|
certChain: [realIssuedCertPem, FAKE_CA_PEM],
|
||||||
});
|
});
|
||||||
|
|
||||||
const result = await service.issueCert(makeReq());
|
const result = await service.issueCert(makeReq());
|
||||||
|
|
||||||
expect(result.certPem).toBe(FAKE_CERT_PEM);
|
expect(result.certPem).toBe(realIssuedCertPem);
|
||||||
expect(result.certChainPem).toContain(FAKE_CERT_PEM);
|
expect(result.certChainPem).toContain(realIssuedCertPem);
|
||||||
expect(result.certChainPem).toContain(FAKE_CA_PEM);
|
expect(result.certChainPem).toContain(FAKE_CA_PEM);
|
||||||
expect(typeof result.serialNumber).toBe('string');
|
expect(typeof result.serialNumber).toBe('string');
|
||||||
});
|
});
|
||||||
@@ -270,14 +285,14 @@ describe('CaService', () => {
|
|||||||
it('builds certChainPem from crt+ca when certChain is absent', async () => {
|
it('builds certChainPem from crt+ca when certChain is absent', async () => {
|
||||||
if (!realCsrPem) realCsrPem = await generateRealCsr();
|
if (!realCsrPem) realCsrPem = await generateRealCsr();
|
||||||
makeHttpsMock(200, {
|
makeHttpsMock(200, {
|
||||||
crt: FAKE_CERT_PEM,
|
crt: realIssuedCertPem,
|
||||||
ca: FAKE_CA_PEM,
|
ca: FAKE_CA_PEM,
|
||||||
});
|
});
|
||||||
|
|
||||||
const result = await service.issueCert(makeReq());
|
const result = await service.issueCert(makeReq());
|
||||||
|
|
||||||
expect(result.certPem).toBe(FAKE_CERT_PEM);
|
expect(result.certPem).toBe(realIssuedCertPem);
|
||||||
expect(result.certChainPem).toContain(FAKE_CERT_PEM);
|
expect(result.certChainPem).toContain(realIssuedCertPem);
|
||||||
expect(result.certChainPem).toContain(FAKE_CA_PEM);
|
expect(result.certChainPem).toContain(FAKE_CA_PEM);
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -287,12 +302,12 @@ describe('CaService', () => {
|
|||||||
|
|
||||||
it('falls back to certPem alone when certChain and ca are absent', async () => {
|
it('falls back to certPem alone when certChain and ca are absent', async () => {
|
||||||
if (!realCsrPem) realCsrPem = await generateRealCsr();
|
if (!realCsrPem) realCsrPem = await generateRealCsr();
|
||||||
makeHttpsMock(200, { crt: FAKE_CERT_PEM });
|
makeHttpsMock(200, { crt: realIssuedCertPem });
|
||||||
|
|
||||||
const result = await service.issueCert(makeReq());
|
const result = await service.issueCert(makeReq());
|
||||||
|
|
||||||
expect(result.certPem).toBe(FAKE_CERT_PEM);
|
expect(result.certPem).toBe(realIssuedCertPem);
|
||||||
expect(result.certChainPem).toBe(FAKE_CERT_PEM);
|
expect(result.certChainPem).toBe(realIssuedCertPem);
|
||||||
});
|
});
|
||||||
|
|
||||||
// -------------------------------------------------------------------------
|
// -------------------------------------------------------------------------
|
||||||
@@ -398,7 +413,7 @@ describe('CaService', () => {
|
|||||||
statusCode: 200,
|
statusCode: 200,
|
||||||
on: (event: string, cb: (chunk?: Buffer) => void) => {
|
on: (event: string, cb: (chunk?: Buffer) => void) => {
|
||||||
if (event === 'data') {
|
if (event === 'data') {
|
||||||
cb(Buffer.from(JSON.stringify({ crt: FAKE_CERT_PEM })));
|
cb(Buffer.from(JSON.stringify({ crt: realIssuedCertPem })));
|
||||||
}
|
}
|
||||||
if (event === 'end') {
|
if (event === 'end') {
|
||||||
cb();
|
cb();
|
||||||
@@ -555,7 +570,7 @@ describe('CaService', () => {
|
|||||||
statusCode: 200,
|
statusCode: 200,
|
||||||
on: (event: string, cb: (chunk?: Buffer) => void) => {
|
on: (event: string, cb: (chunk?: Buffer) => void) => {
|
||||||
if (event === 'data') {
|
if (event === 'data') {
|
||||||
cb(Buffer.from(JSON.stringify({ crt: FAKE_CERT_PEM })));
|
cb(Buffer.from(JSON.stringify({ crt: realIssuedCertPem })));
|
||||||
}
|
}
|
||||||
if (event === 'end') {
|
if (event === 'end') {
|
||||||
cb();
|
cb();
|
||||||
|
|||||||
@@ -0,0 +1,553 @@
|
|||||||
|
/**
|
||||||
|
* Unit tests for FederationClientService (FED-M3-08).
|
||||||
|
*
|
||||||
|
* HTTP mocking strategy:
|
||||||
|
* undici MockAgent is used to intercept outbound HTTP requests. The service
|
||||||
|
* uses `undici.fetch` with a `dispatcher` option, so MockAgent is set as the
|
||||||
|
* global dispatcher and all requests flow through it.
|
||||||
|
*
|
||||||
|
* Because the service builds one `undici.Agent` per peer and passes it as
|
||||||
|
* the dispatcher on every fetch call, we cannot intercept at the Agent level
|
||||||
|
* in unit tests without significant refactoring. Instead, we set the global
|
||||||
|
* dispatcher to a MockAgent and override the service's `doRequest` indirection
|
||||||
|
* by spying on the internal fetch call.
|
||||||
|
*
|
||||||
|
* For the cert/key wiring, we use the real `sealClientKey` function from
|
||||||
|
* peer-key.util.ts with a test secret — no stubs.
|
||||||
|
*
|
||||||
|
* Sealed-key setup:
|
||||||
|
* Each test (or beforeAll) calls `sealClientKey(TEST_PRIVATE_KEY_PEM)` with
|
||||||
|
* BETTER_AUTH_SECRET set to a deterministic test value so that
|
||||||
|
* `unsealClientKey` in the service recovers the original PEM.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import 'reflect-metadata';
|
||||||
|
import { describe, it, expect, vi, beforeEach, afterEach, beforeAll, afterAll } from 'vitest';
|
||||||
|
import { MockAgent, setGlobalDispatcher, getGlobalDispatcher } from 'undici';
|
||||||
|
import type { Dispatcher } from 'undici';
|
||||||
|
import { writeFileSync, unlinkSync } from 'node:fs';
|
||||||
|
import { tmpdir } from 'node:os';
|
||||||
|
import { join } from 'node:path';
|
||||||
|
import type { Db } from '@mosaicstack/db';
|
||||||
|
import { FederationClientService, FederationClientError } from '../federation-client.service.js';
|
||||||
|
import { sealClientKey } from '../../peer-key.util.js';
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Test constants
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
const TEST_SECRET = 'test-secret-for-federation-client-spec-only';
|
||||||
|
const PEER_ID = 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa';
|
||||||
|
const ENDPOINT = 'https://peer.example.com';
|
||||||
|
|
||||||
|
// Minimal valid RSA/EC private key PEM — does NOT need to be a real key for
|
||||||
|
// unit tests because we only verify it round-trips through seal/unseal, not
|
||||||
|
// that it actually negotiates TLS (MockAgent handles that).
|
||||||
|
const TEST_PRIVATE_KEY_PEM = `-----BEGIN PRIVATE KEY-----
|
||||||
|
MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDummyKeyForTests
|
||||||
|
-----END PRIVATE KEY-----`;
|
||||||
|
|
||||||
|
// Minimal self-signed cert PEM (dummy — only used for mTLS Agent construction)
|
||||||
|
const TEST_CERT_PEM = `-----BEGIN CERTIFICATE-----
|
||||||
|
MIIBdummyCertForFederationClientTests==
|
||||||
|
-----END CERTIFICATE-----`;
|
||||||
|
|
||||||
|
const TEST_CERT_SERIAL = 'ABCDEF1234567890';
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Sealed key (computed once in beforeAll)
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
let SEALED_KEY: string;
|
||||||
|
|
||||||
|
// Path to a stub Step-CA root cert file written in beforeAll. The cert is never
|
||||||
|
// actually used to negotiate TLS in unit tests (MockAgent + spy on resolveEntry
|
||||||
|
// short-circuit the network), but loadStepCaRoot() requires the file to exist.
|
||||||
|
const STUB_CA_PEM_PATH = join(tmpdir(), 'federation-client-spec-ca.pem');
|
||||||
|
const STUB_CA_PEM = `-----BEGIN CERTIFICATE-----
|
||||||
|
MIIBdummyCAforFederationClientSpecOnly==
|
||||||
|
-----END CERTIFICATE-----
|
||||||
|
`;
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Peer row factory
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
function makePeerRow(overrides: Partial<Record<string, unknown>> = {}) {
|
||||||
|
return {
|
||||||
|
id: PEER_ID,
|
||||||
|
commonName: 'peer-example-com',
|
||||||
|
displayName: 'Test Peer',
|
||||||
|
certPem: TEST_CERT_PEM,
|
||||||
|
certSerial: TEST_CERT_SERIAL,
|
||||||
|
certNotAfter: new Date('2030-01-01T00:00:00Z'),
|
||||||
|
clientKeyPem: SEALED_KEY,
|
||||||
|
state: 'active' as const,
|
||||||
|
endpointUrl: ENDPOINT,
|
||||||
|
lastSeenAt: null,
|
||||||
|
createdAt: new Date('2026-01-01T00:00:00Z'),
|
||||||
|
revokedAt: null,
|
||||||
|
...overrides,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Mock DB builder
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
function makeDb(selectRows: unknown[] = [makePeerRow()]): Db {
|
||||||
|
const limitSelect = vi.fn().mockResolvedValue(selectRows);
|
||||||
|
const whereSelect = vi.fn().mockReturnValue({ limit: limitSelect });
|
||||||
|
const fromSelect = vi.fn().mockReturnValue({ where: whereSelect });
|
||||||
|
const selectMock = vi.fn().mockReturnValue({ from: fromSelect });
|
||||||
|
|
||||||
|
return {
|
||||||
|
select: selectMock,
|
||||||
|
insert: vi.fn(),
|
||||||
|
update: vi.fn(),
|
||||||
|
delete: vi.fn(),
|
||||||
|
transaction: vi.fn(),
|
||||||
|
} as unknown as Db;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Helpers for MockAgent HTTP interception
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a MockAgent + MockPool for the peer endpoint, set it as the global
|
||||||
|
* dispatcher, and return both for per-test configuration.
|
||||||
|
*/
|
||||||
|
function makeMockAgent() {
|
||||||
|
const mockAgent = new MockAgent({ connections: 1 });
|
||||||
|
mockAgent.disableNetConnect();
|
||||||
|
setGlobalDispatcher(mockAgent);
|
||||||
|
const pool = mockAgent.get(ENDPOINT);
|
||||||
|
return { mockAgent, pool };
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build a FederationClientService with a mock DB and a spy on the internal
|
||||||
|
* fetch so we can intercept at the HTTP layer via MockAgent.
|
||||||
|
*
|
||||||
|
* The service calls `fetch(url, { dispatcher: agent })` where `agent` is the
|
||||||
|
* mTLS undici.Agent built from the peer's cert+key. To make MockAgent work,
|
||||||
|
* we need the fetch dispatcher to be the MockAgent, not the per-peer Agent.
|
||||||
|
*
|
||||||
|
* Strategy: we replace the private `resolveEntry` result's `agent` field with
|
||||||
|
* the MockAgent's pool, so fetch uses our interceptor. We do this by spying
|
||||||
|
* on `resolveEntry` and returning a controlled entry.
|
||||||
|
*/
|
||||||
|
function makeService(db: Db, mockPool: Dispatcher): FederationClientService {
|
||||||
|
const svc = new FederationClientService(db);
|
||||||
|
|
||||||
|
// Override resolveEntry to inject MockAgent pool as the dispatcher
|
||||||
|
vi.spyOn(
|
||||||
|
svc as unknown as { resolveEntry: (peerId: string) => Promise<unknown> },
|
||||||
|
'resolveEntry',
|
||||||
|
).mockImplementation(async (_peerId: string) => {
|
||||||
|
// Still call DB (via the real logic) to exercise peer validation,
|
||||||
|
// but return mock pool as the agent.
|
||||||
|
// For simplicity in unit tests, directly return a controlled entry.
|
||||||
|
return {
|
||||||
|
agent: mockPool,
|
||||||
|
endpointUrl: ENDPOINT,
|
||||||
|
certPem: TEST_CERT_PEM,
|
||||||
|
certSerial: TEST_CERT_SERIAL,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
return svc;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Test setup
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
let originalDispatcher: Dispatcher;
|
||||||
|
|
||||||
|
beforeAll(() => {
|
||||||
|
// Seal the test key once — requires BETTER_AUTH_SECRET
|
||||||
|
const saved = process.env['BETTER_AUTH_SECRET'];
|
||||||
|
process.env['BETTER_AUTH_SECRET'] = TEST_SECRET;
|
||||||
|
try {
|
||||||
|
SEALED_KEY = sealClientKey(TEST_PRIVATE_KEY_PEM);
|
||||||
|
} finally {
|
||||||
|
if (saved === undefined) {
|
||||||
|
delete process.env['BETTER_AUTH_SECRET'];
|
||||||
|
} else {
|
||||||
|
process.env['BETTER_AUTH_SECRET'] = saved;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
writeFileSync(STUB_CA_PEM_PATH, STUB_CA_PEM, 'utf8');
|
||||||
|
});
|
||||||
|
|
||||||
|
afterAll(() => {
|
||||||
|
try {
|
||||||
|
unlinkSync(STUB_CA_PEM_PATH);
|
||||||
|
} catch {
|
||||||
|
// best-effort cleanup
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
originalDispatcher = getGlobalDispatcher();
|
||||||
|
process.env['BETTER_AUTH_SECRET'] = TEST_SECRET;
|
||||||
|
process.env['STEP_CA_ROOT_CERT_PATH'] = STUB_CA_PEM_PATH;
|
||||||
|
});
|
||||||
|
|
||||||
|
afterEach(() => {
|
||||||
|
setGlobalDispatcher(originalDispatcher);
|
||||||
|
vi.restoreAllMocks();
|
||||||
|
delete process.env['BETTER_AUTH_SECRET'];
|
||||||
|
delete process.env['STEP_CA_ROOT_CERT_PATH'];
|
||||||
|
});
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Helpers
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/** Successful list response body */
|
||||||
|
const LIST_BODY = {
|
||||||
|
items: [{ id: '1', title: 'Task One' }],
|
||||||
|
nextCursor: undefined,
|
||||||
|
_partial: false,
|
||||||
|
};
|
||||||
|
|
||||||
|
/** Successful get response body */
|
||||||
|
const GET_BODY = {
|
||||||
|
item: { id: '1', title: 'Task One' },
|
||||||
|
_partial: false,
|
||||||
|
};
|
||||||
|
|
||||||
|
/** Successful capabilities response body */
|
||||||
|
const CAP_BODY = {
|
||||||
|
resources: ['tasks'],
|
||||||
|
excluded_resources: [],
|
||||||
|
max_rows_per_query: 100,
|
||||||
|
supported_verbs: ['list', 'get', 'capabilities'] as const,
|
||||||
|
};
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Tests
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
describe('FederationClientService', () => {
|
||||||
|
// ─── Successful verb calls ─────────────────────────────────────────────────
|
||||||
|
|
||||||
|
describe('list()', () => {
|
||||||
|
it('returns parsed typed response on success', async () => {
|
||||||
|
const db = makeDb();
|
||||||
|
const { mockAgent, pool } = makeMockAgent();
|
||||||
|
const svc = makeService(db, pool);
|
||||||
|
|
||||||
|
pool
|
||||||
|
.intercept({
|
||||||
|
path: '/api/federation/v1/list/tasks',
|
||||||
|
method: 'POST',
|
||||||
|
})
|
||||||
|
.reply(200, LIST_BODY, { headers: { 'content-type': 'application/json' } });
|
||||||
|
|
||||||
|
const result = await svc.list(PEER_ID, 'tasks', {});
|
||||||
|
|
||||||
|
expect(result.items).toHaveLength(1);
|
||||||
|
expect(result.items[0]).toMatchObject({ id: '1', title: 'Task One' });
|
||||||
|
|
||||||
|
await mockAgent.close();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('get()', () => {
|
||||||
|
it('returns parsed typed response on success', async () => {
|
||||||
|
const db = makeDb();
|
||||||
|
const { mockAgent, pool } = makeMockAgent();
|
||||||
|
const svc = makeService(db, pool);
|
||||||
|
|
||||||
|
pool
|
||||||
|
.intercept({
|
||||||
|
path: '/api/federation/v1/get/tasks/1',
|
||||||
|
method: 'POST',
|
||||||
|
})
|
||||||
|
.reply(200, GET_BODY, { headers: { 'content-type': 'application/json' } });
|
||||||
|
|
||||||
|
const result = await svc.get(PEER_ID, 'tasks', '1', {});
|
||||||
|
|
||||||
|
expect(result.item).toMatchObject({ id: '1', title: 'Task One' });
|
||||||
|
|
||||||
|
await mockAgent.close();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('capabilities()', () => {
|
||||||
|
it('returns parsed capabilities response on success', async () => {
|
||||||
|
const db = makeDb();
|
||||||
|
const { mockAgent, pool } = makeMockAgent();
|
||||||
|
const svc = makeService(db, pool);
|
||||||
|
|
||||||
|
pool
|
||||||
|
.intercept({
|
||||||
|
path: '/api/federation/v1/capabilities',
|
||||||
|
method: 'GET',
|
||||||
|
})
|
||||||
|
.reply(200, CAP_BODY, { headers: { 'content-type': 'application/json' } });
|
||||||
|
|
||||||
|
const result = await svc.capabilities(PEER_ID);
|
||||||
|
|
||||||
|
expect(result.resources).toContain('tasks');
|
||||||
|
expect(result.max_rows_per_query).toBe(100);
|
||||||
|
|
||||||
|
await mockAgent.close();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// ─── HTTP error surfaces ──────────────────────────────────────────────────
|
||||||
|
|
||||||
|
describe('non-2xx responses', () => {
|
||||||
|
it('surfaces 403 as FederationClientError({ status: 403, code: "FORBIDDEN" })', async () => {
|
||||||
|
const db = makeDb();
|
||||||
|
const { mockAgent, pool } = makeMockAgent();
|
||||||
|
const svc = makeService(db, pool);
|
||||||
|
|
||||||
|
pool.intercept({ path: '/api/federation/v1/list/tasks', method: 'POST' }).reply(
|
||||||
|
403,
|
||||||
|
{ error: { code: 'forbidden', message: 'Access denied' } },
|
||||||
|
{
|
||||||
|
headers: { 'content-type': 'application/json' },
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
await expect(svc.list(PEER_ID, 'tasks', {})).rejects.toMatchObject({
|
||||||
|
status: 403,
|
||||||
|
code: 'FORBIDDEN',
|
||||||
|
peerId: PEER_ID,
|
||||||
|
});
|
||||||
|
|
||||||
|
await mockAgent.close();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('surfaces 404 as FederationClientError({ status: 404, code: "HTTP_404" })', async () => {
|
||||||
|
const db = makeDb();
|
||||||
|
const { mockAgent, pool } = makeMockAgent();
|
||||||
|
const svc = makeService(db, pool);
|
||||||
|
|
||||||
|
pool.intercept({ path: '/api/federation/v1/get/tasks/999', method: 'POST' }).reply(
|
||||||
|
404,
|
||||||
|
{ error: { code: 'not_found', message: 'Not found' } },
|
||||||
|
{
|
||||||
|
headers: { 'content-type': 'application/json' },
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
await expect(svc.get(PEER_ID, 'tasks', '999', {})).rejects.toMatchObject({
|
||||||
|
status: 404,
|
||||||
|
code: 'HTTP_404',
|
||||||
|
peerId: PEER_ID,
|
||||||
|
});
|
||||||
|
|
||||||
|
await mockAgent.close();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// ─── Network error ─────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
describe('network errors', () => {
|
||||||
|
it('surfaces network error as FederationClientError({ code: "NETWORK" })', async () => {
|
||||||
|
const db = makeDb();
|
||||||
|
const { mockAgent, pool } = makeMockAgent();
|
||||||
|
const svc = makeService(db, pool);
|
||||||
|
|
||||||
|
pool
|
||||||
|
.intercept({ path: '/api/federation/v1/capabilities', method: 'GET' })
|
||||||
|
.replyWithError(new Error('ECONNREFUSED'));
|
||||||
|
|
||||||
|
await expect(svc.capabilities(PEER_ID)).rejects.toMatchObject({
|
||||||
|
code: 'NETWORK',
|
||||||
|
peerId: PEER_ID,
|
||||||
|
});
|
||||||
|
|
||||||
|
await mockAgent.close();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// ─── Invalid response body ─────────────────────────────────────────────────
|
||||||
|
|
||||||
|
describe('invalid response body', () => {
|
||||||
|
it('surfaces as FederationClientError({ code: "INVALID_RESPONSE" }) when body shape is wrong', async () => {
|
||||||
|
const db = makeDb();
|
||||||
|
const { mockAgent, pool } = makeMockAgent();
|
||||||
|
const svc = makeService(db, pool);
|
||||||
|
|
||||||
|
// capabilities returns wrong shape (missing required fields)
|
||||||
|
pool
|
||||||
|
.intercept({ path: '/api/federation/v1/capabilities', method: 'GET' })
|
||||||
|
.reply(200, { totally: 'wrong' }, { headers: { 'content-type': 'application/json' } });
|
||||||
|
|
||||||
|
await expect(svc.capabilities(PEER_ID)).rejects.toMatchObject({
|
||||||
|
code: 'INVALID_RESPONSE',
|
||||||
|
peerId: PEER_ID,
|
||||||
|
});
|
||||||
|
|
||||||
|
await mockAgent.close();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// ─── Peer DB validation ────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
describe('peer validation (without resolveEntry spy)', () => {
|
||||||
|
/**
|
||||||
|
* These tests exercise the real `resolveEntry` path — no spy on resolveEntry.
|
||||||
|
*/
|
||||||
|
|
||||||
|
it('throws PEER_NOT_FOUND when peer is not in DB', async () => {
|
||||||
|
// DB returns empty array (peer not found)
|
||||||
|
const db = makeDb([]);
|
||||||
|
const svc = new FederationClientService(db);
|
||||||
|
|
||||||
|
await expect(svc.capabilities(PEER_ID)).rejects.toMatchObject({
|
||||||
|
code: 'PEER_NOT_FOUND',
|
||||||
|
peerId: PEER_ID,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('throws PEER_INACTIVE when peer state is not "active"', async () => {
|
||||||
|
const db = makeDb([makePeerRow({ state: 'suspended' })]);
|
||||||
|
const svc = new FederationClientService(db);
|
||||||
|
|
||||||
|
await expect(svc.capabilities(PEER_ID)).rejects.toMatchObject({
|
||||||
|
code: 'PEER_INACTIVE',
|
||||||
|
peerId: PEER_ID,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// ─── Cache behaviour ───────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
describe('cache behaviour', () => {
|
||||||
|
it('hits cache on second call — only one DB lookup happens', async () => {
|
||||||
|
// Verify cache by calling the private resolveEntry directly twice and
|
||||||
|
// asserting the DB was queried only once. This avoids the HTTP layer,
|
||||||
|
// which would require either a real network or per-peer Agent rewiring
|
||||||
|
// that the cache invariant doesn't depend on.
|
||||||
|
const db = makeDb();
|
||||||
|
const selectSpy = vi.spyOn(db, 'select');
|
||||||
|
const svc = new FederationClientService(db);
|
||||||
|
const resolveEntry = (
|
||||||
|
svc as unknown as { resolveEntry: (peerId: string) => Promise<unknown> }
|
||||||
|
).resolveEntry.bind(svc);
|
||||||
|
|
||||||
|
const first = await resolveEntry(PEER_ID);
|
||||||
|
const second = await resolveEntry(PEER_ID);
|
||||||
|
|
||||||
|
expect(first).toBe(second);
|
||||||
|
expect(selectSpy).toHaveBeenCalledTimes(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('serializes concurrent resolveEntry calls — only one DB lookup', async () => {
|
||||||
|
const db = makeDb();
|
||||||
|
const selectSpy = vi.spyOn(db, 'select');
|
||||||
|
const svc = new FederationClientService(db);
|
||||||
|
const resolveEntry = (
|
||||||
|
svc as unknown as {
|
||||||
|
resolveEntry: (peerId: string) => Promise<unknown>;
|
||||||
|
}
|
||||||
|
).resolveEntry.bind(svc);
|
||||||
|
|
||||||
|
const [a, b] = await Promise.all([resolveEntry(PEER_ID), resolveEntry(PEER_ID)]);
|
||||||
|
expect(a).toBe(b);
|
||||||
|
expect(selectSpy).toHaveBeenCalledTimes(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('flushPeer destroys the evicted Agent so old TLS connections close', async () => {
|
||||||
|
const db = makeDb();
|
||||||
|
const svc = new FederationClientService(db);
|
||||||
|
const resolveEntry = (
|
||||||
|
svc as unknown as {
|
||||||
|
resolveEntry: (peerId: string) => Promise<{ agent: { destroy: () => Promise<void> } }>;
|
||||||
|
}
|
||||||
|
).resolveEntry.bind(svc);
|
||||||
|
|
||||||
|
const entry = await resolveEntry(PEER_ID);
|
||||||
|
const destroySpy = vi.spyOn(entry.agent, 'destroy').mockResolvedValue();
|
||||||
|
|
||||||
|
svc.flushPeer(PEER_ID);
|
||||||
|
expect(destroySpy).toHaveBeenCalledTimes(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('flushPeer() invalidates cache — next call re-reads DB', async () => {
|
||||||
|
const db = makeDb();
|
||||||
|
const { mockAgent, pool } = makeMockAgent();
|
||||||
|
const svc = makeService(db, pool);
|
||||||
|
|
||||||
|
pool
|
||||||
|
.intercept({ path: '/api/federation/v1/capabilities', method: 'GET' })
|
||||||
|
.reply(200, CAP_BODY, { headers: { 'content-type': 'application/json' } })
|
||||||
|
.times(2);
|
||||||
|
|
||||||
|
// First call — populates cache (via mock resolveEntry)
|
||||||
|
await svc.capabilities(PEER_ID);
|
||||||
|
|
||||||
|
// Flush the cache
|
||||||
|
svc.flushPeer(PEER_ID);
|
||||||
|
|
||||||
|
// The spy on resolveEntry is still active — check it's called again after flush
|
||||||
|
const resolveEntrySpy = vi.spyOn(
|
||||||
|
svc as unknown as { resolveEntry: (peerId: string) => Promise<unknown> },
|
||||||
|
'resolveEntry',
|
||||||
|
);
|
||||||
|
|
||||||
|
// Second call after flush — should call resolveEntry again
|
||||||
|
await svc.capabilities(PEER_ID);
|
||||||
|
|
||||||
|
// resolveEntry should have been called once after we started spying (post-flush)
|
||||||
|
expect(resolveEntrySpy).toHaveBeenCalledTimes(1);
|
||||||
|
|
||||||
|
await mockAgent.close();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// ─── loadStepCaRoot env-var guard ─────────────────────────────────────────
|
||||||
|
|
||||||
|
describe('loadStepCaRoot() env-var guard', () => {
|
||||||
|
it('throws PEER_MISCONFIGURED when STEP_CA_ROOT_CERT_PATH is not set', async () => {
|
||||||
|
delete process.env['STEP_CA_ROOT_CERT_PATH'];
|
||||||
|
const db = makeDb();
|
||||||
|
const svc = new FederationClientService(db);
|
||||||
|
const resolveEntry = (
|
||||||
|
svc as unknown as {
|
||||||
|
resolveEntry: (peerId: string) => Promise<unknown>;
|
||||||
|
}
|
||||||
|
).resolveEntry.bind(svc);
|
||||||
|
|
||||||
|
await expect(resolveEntry(PEER_ID)).rejects.toMatchObject({
|
||||||
|
code: 'PEER_MISCONFIGURED',
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// ─── FederationClientError class ──────────────────────────────────────────
|
||||||
|
|
||||||
|
describe('FederationClientError', () => {
|
||||||
|
it('is instanceof Error and FederationClientError', () => {
|
||||||
|
const err = new FederationClientError({
|
||||||
|
code: 'PEER_NOT_FOUND',
|
||||||
|
message: 'test',
|
||||||
|
peerId: PEER_ID,
|
||||||
|
});
|
||||||
|
expect(err).toBeInstanceOf(Error);
|
||||||
|
expect(err).toBeInstanceOf(FederationClientError);
|
||||||
|
expect(err.name).toBe('FederationClientError');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('carries status, code, and peerId', () => {
|
||||||
|
const err = new FederationClientError({
|
||||||
|
status: 403,
|
||||||
|
code: 'FORBIDDEN',
|
||||||
|
message: 'forbidden',
|
||||||
|
peerId: PEER_ID,
|
||||||
|
});
|
||||||
|
expect(err.status).toBe(403);
|
||||||
|
expect(err.code).toBe('FORBIDDEN');
|
||||||
|
expect(err.peerId).toBe(PEER_ID);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
500
apps/gateway/src/federation/client/federation-client.service.ts
Normal file
500
apps/gateway/src/federation/client/federation-client.service.ts
Normal file
@@ -0,0 +1,500 @@
|
|||||||
|
/**
|
||||||
|
* FederationClientService — outbound mTLS client for federation requests (FED-M3-08).
|
||||||
|
*
|
||||||
|
* Dials peer gateways over mTLS using the cert+sealed-key stored in `federation_peers`,
|
||||||
|
* invokes federation verbs (list / get / capabilities), and surfaces all failure modes
|
||||||
|
* as typed `FederationClientError` instances.
|
||||||
|
*
|
||||||
|
* ## Error code taxonomy
|
||||||
|
*
|
||||||
|
* | Code | When |
|
||||||
|
* | ------------------ | ------------------------------------------------------------- |
|
||||||
|
* | PEER_NOT_FOUND | No row in federation_peers for the given peerId |
|
||||||
|
* | PEER_INACTIVE | Peer row exists but state !== 'active' |
|
||||||
|
* | PEER_MISCONFIGURED | Peer row is active but missing endpointUrl or clientKeyPem |
|
||||||
|
* | NETWORK | undici threw a connection / TLS / timeout error |
|
||||||
|
* | HTTP_{status} | Peer returned a non-2xx response (e.g. HTTP_403, HTTP_404) |
|
||||||
|
* | FORBIDDEN | Peer returned 403 (convenience alias alongside HTTP_403) |
|
||||||
|
* | INVALID_RESPONSE | Response body failed Zod schema validation |
|
||||||
|
*
|
||||||
|
* ## Cache strategy
|
||||||
|
*
|
||||||
|
* Per-peer `undici.Agent` instances are cached in a `Map<peerId, AgentCacheEntry>` for
|
||||||
|
* the lifetime of the service instance. The cache is keyed on peerId (UUID).
|
||||||
|
*
|
||||||
|
* Cache invalidation:
|
||||||
|
* - `flushPeer(peerId)` — removes the entry immediately. M5/M6 MUST call this on
|
||||||
|
* cert rotation or peer revocation events so the next request re-reads the DB and
|
||||||
|
* builds a fresh TLS Agent with the new cert material.
|
||||||
|
* - On cache miss: re-reads the DB, checks state === 'active', rebuilds Agent.
|
||||||
|
*
|
||||||
|
* Cache does NOT auto-expire. The service is expected to be a singleton scoped to the
|
||||||
|
* NestJS application lifecycle; flushing on revocation/rotation is the only invalidation
|
||||||
|
* path by design (avoids redundant DB round-trips on the hot path).
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { Injectable, Inject, Logger } from '@nestjs/common';
|
||||||
|
import { readFileSync } from 'node:fs';
|
||||||
|
import { Agent, fetch as undiciFetch } from 'undici';
|
||||||
|
import type { Dispatcher } from 'undici';
|
||||||
|
import { z } from 'zod';
|
||||||
|
import { type Db, eq, federationPeers } from '@mosaicstack/db';
|
||||||
|
import {
|
||||||
|
FederationListResponseSchema,
|
||||||
|
FederationGetResponseSchema,
|
||||||
|
FederationCapabilitiesResponseSchema,
|
||||||
|
FederationErrorEnvelopeSchema,
|
||||||
|
type FederationListResponse,
|
||||||
|
type FederationGetResponse,
|
||||||
|
type FederationCapabilitiesResponse,
|
||||||
|
} from '@mosaicstack/types';
|
||||||
|
import { DB } from '../../database/database.module.js';
|
||||||
|
import { unsealClientKey } from '../peer-key.util.js';
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Error taxonomy
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Client-side error code set. Distinct from the server-side `FederationErrorCode`
|
||||||
|
* (which lives in `@mosaicstack/types`) because the client has additional failure
|
||||||
|
* modes (PEER_NOT_FOUND, PEER_INACTIVE, PEER_MISCONFIGURED, NETWORK) that the
|
||||||
|
* server never emits.
|
||||||
|
*/
|
||||||
|
export type FederationClientErrorCode =
|
||||||
|
| 'PEER_NOT_FOUND'
|
||||||
|
| 'PEER_INACTIVE'
|
||||||
|
| 'PEER_MISCONFIGURED'
|
||||||
|
| 'NETWORK'
|
||||||
|
| 'FORBIDDEN'
|
||||||
|
| 'INVALID_RESPONSE'
|
||||||
|
| `HTTP_${number}`;
|
||||||
|
|
||||||
|
export interface FederationClientErrorOptions {
|
||||||
|
status?: number;
|
||||||
|
code: FederationClientErrorCode;
|
||||||
|
message: string;
|
||||||
|
peerId: string;
|
||||||
|
cause?: unknown;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Thrown by FederationClientService on every failure path.
|
||||||
|
* Callers can dispatch on `error.code` for programmatic handling.
|
||||||
|
*/
|
||||||
|
export class FederationClientError extends Error {
|
||||||
|
readonly status?: number;
|
||||||
|
readonly code: FederationClientErrorCode;
|
||||||
|
readonly peerId: string;
|
||||||
|
readonly cause?: unknown;
|
||||||
|
|
||||||
|
constructor(opts: FederationClientErrorOptions) {
|
||||||
|
super(opts.message);
|
||||||
|
this.name = 'FederationClientError';
|
||||||
|
this.status = opts.status;
|
||||||
|
this.code = opts.code;
|
||||||
|
this.peerId = opts.peerId;
|
||||||
|
this.cause = opts.cause;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Internal cache types
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
interface AgentCacheEntry {
|
||||||
|
agent: Agent;
|
||||||
|
endpointUrl: string;
|
||||||
|
certPem: string;
|
||||||
|
certSerial: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Service
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@Injectable()
|
||||||
|
export class FederationClientService {
|
||||||
|
private readonly logger = new Logger(FederationClientService.name);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Per-peer undici Agent cache.
|
||||||
|
* Key = peerId (UUID string).
|
||||||
|
*
|
||||||
|
* Values are either a resolved `AgentCacheEntry` or an in-flight
|
||||||
|
* `Promise<AgentCacheEntry>` (promise-cache pattern). Storing the promise
|
||||||
|
* prevents duplicate DB lookups and duplicate key-unseal operations when two
|
||||||
|
* requests for the same peer arrive before the first build completes.
|
||||||
|
*
|
||||||
|
* Flush via `flushPeer(peerId)` on cert rotation / peer revocation (M5/M6).
|
||||||
|
*/
|
||||||
|
private readonly cache = new Map<string, AgentCacheEntry | Promise<AgentCacheEntry>>();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Step-CA root cert PEM, loaded once from `STEP_CA_ROOT_CERT_PATH`.
|
||||||
|
* Used as the trust anchor for peer server certificates so federation TLS is
|
||||||
|
* pinned to our PKI, not the public trust store. Lazily loaded on first use
|
||||||
|
* so unit tests that don't exercise the agent path can run without the env var.
|
||||||
|
*/
|
||||||
|
private cachedCaPem: string | null = null;
|
||||||
|
|
||||||
|
constructor(@Inject(DB) private readonly db: Db) {}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Public verb API
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Invoke the `list` verb on a remote peer.
|
||||||
|
*
|
||||||
|
* @param peerId UUID of the peer row in `federation_peers`.
|
||||||
|
* @param resource Resource path, e.g. "tasks".
|
||||||
|
* @param request Free-form body sent as JSON in the POST body.
|
||||||
|
* @returns Parsed `FederationListResponse<T>`.
|
||||||
|
*/
|
||||||
|
async list<T>(
|
||||||
|
peerId: string,
|
||||||
|
resource: string,
|
||||||
|
request: Record<string, unknown>,
|
||||||
|
): Promise<FederationListResponse<T>> {
|
||||||
|
const { endpointUrl, agent } = await this.resolveEntry(peerId);
|
||||||
|
const url = `${endpointUrl}/api/federation/v1/list/${encodeURIComponent(resource)}`;
|
||||||
|
const body = await this.doPost(peerId, url, agent, request);
|
||||||
|
return this.parseWith<FederationListResponse<T>>(
|
||||||
|
peerId,
|
||||||
|
body,
|
||||||
|
FederationListResponseSchema(z.unknown()),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Invoke the `get` verb on a remote peer.
|
||||||
|
*
|
||||||
|
* @param peerId UUID of the peer row in `federation_peers`.
|
||||||
|
* @param resource Resource path, e.g. "tasks".
|
||||||
|
* @param id Resource identifier.
|
||||||
|
* @param request Free-form body sent as JSON in the POST body.
|
||||||
|
* @returns Parsed `FederationGetResponse<T>`.
|
||||||
|
*/
|
||||||
|
async get<T>(
|
||||||
|
peerId: string,
|
||||||
|
resource: string,
|
||||||
|
id: string,
|
||||||
|
request: Record<string, unknown>,
|
||||||
|
): Promise<FederationGetResponse<T>> {
|
||||||
|
const { endpointUrl, agent } = await this.resolveEntry(peerId);
|
||||||
|
const url = `${endpointUrl}/api/federation/v1/get/${encodeURIComponent(resource)}/${encodeURIComponent(id)}`;
|
||||||
|
const body = await this.doPost(peerId, url, agent, request);
|
||||||
|
return this.parseWith<FederationGetResponse<T>>(
|
||||||
|
peerId,
|
||||||
|
body,
|
||||||
|
FederationGetResponseSchema(z.unknown()),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Invoke the `capabilities` verb on a remote peer.
|
||||||
|
*
|
||||||
|
* @param peerId UUID of the peer row in `federation_peers`.
|
||||||
|
* @returns Parsed `FederationCapabilitiesResponse`.
|
||||||
|
*/
|
||||||
|
async capabilities(peerId: string): Promise<FederationCapabilitiesResponse> {
|
||||||
|
const { endpointUrl, agent } = await this.resolveEntry(peerId);
|
||||||
|
const url = `${endpointUrl}/api/federation/v1/capabilities`;
|
||||||
|
const body = await this.doGet(peerId, url, agent);
|
||||||
|
return this.parseWith<FederationCapabilitiesResponse>(
|
||||||
|
peerId,
|
||||||
|
body,
|
||||||
|
FederationCapabilitiesResponseSchema,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Cache management
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Flush the cached Agent for a specific peer.
|
||||||
|
*
|
||||||
|
* M5/M6 MUST call this on:
|
||||||
|
* - cert rotation events (so new cert material is picked up)
|
||||||
|
* - peer revocation events (so future requests fail at PEER_INACTIVE)
|
||||||
|
*
|
||||||
|
* After flushing, the next call to `list`, `get`, or `capabilities` for
|
||||||
|
* this peer will re-read the DB and rebuild the Agent.
|
||||||
|
*/
|
||||||
|
flushPeer(peerId: string): void {
|
||||||
|
const entry = this.cache.get(peerId);
|
||||||
|
if (entry === undefined) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
this.cache.delete(peerId);
|
||||||
|
if (!(entry instanceof Promise)) {
|
||||||
|
// best-effort destroy; promise-cached entries skip destroy because
|
||||||
|
// the in-flight build owns its own Agent which will be GC'd when the
|
||||||
|
// owning request handles the rejection from the cache miss
|
||||||
|
entry.agent.destroy().catch(() => {
|
||||||
|
// intentionally ignored — destroy errors are not actionable
|
||||||
|
});
|
||||||
|
}
|
||||||
|
this.logger.log(`Cache flushed for peer ${peerId}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Internal helpers
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Load and cache the Step-CA root cert PEM from `STEP_CA_ROOT_CERT_PATH`.
|
||||||
|
* Throws `FederationClientError` if the env var is unset or the file cannot
|
||||||
|
* be read — mTLS to a peer without a pinned trust anchor would silently
|
||||||
|
* fall back to the public trust store.
|
||||||
|
*/
|
||||||
|
private loadStepCaRoot(): string {
|
||||||
|
if (this.cachedCaPem !== null) {
|
||||||
|
return this.cachedCaPem;
|
||||||
|
}
|
||||||
|
const path = process.env['STEP_CA_ROOT_CERT_PATH'];
|
||||||
|
if (!path) {
|
||||||
|
throw new FederationClientError({
|
||||||
|
code: 'PEER_MISCONFIGURED',
|
||||||
|
message: 'STEP_CA_ROOT_CERT_PATH is not set; refusing to dial peer without pinned CA trust',
|
||||||
|
peerId: '',
|
||||||
|
});
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
const pem = readFileSync(path, 'utf8');
|
||||||
|
this.cachedCaPem = pem;
|
||||||
|
return pem;
|
||||||
|
} catch (err) {
|
||||||
|
throw new FederationClientError({
|
||||||
|
code: 'PEER_MISCONFIGURED',
|
||||||
|
message: `Failed to read STEP_CA_ROOT_CERT_PATH (${path})`,
|
||||||
|
peerId: '',
|
||||||
|
cause: err,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Resolve the cache entry for a peer, reading DB on miss.
|
||||||
|
*
|
||||||
|
* Uses a promise-cache pattern: concurrent callers for the same uncached
|
||||||
|
* `peerId` all `await` the same in-flight `Promise<AgentCacheEntry>` so
|
||||||
|
* only one DB lookup and one key-unseal ever runs per peer per cache miss.
|
||||||
|
* The promise is replaced with the concrete entry on success, or deleted on
|
||||||
|
* rejection so a transient error does not poison the cache permanently.
|
||||||
|
*
|
||||||
|
* Throws `FederationClientError` with appropriate code if the peer is not
|
||||||
|
* found, is inactive, or is missing required fields.
|
||||||
|
*/
|
||||||
|
private async resolveEntry(peerId: string): Promise<AgentCacheEntry> {
|
||||||
|
const cached = this.cache.get(peerId);
|
||||||
|
if (cached) {
|
||||||
|
return cached; // Promise or concrete entry — both are awaitable
|
||||||
|
}
|
||||||
|
|
||||||
|
const inflight = this.buildEntry(peerId).then(
|
||||||
|
(entry) => {
|
||||||
|
this.cache.set(peerId, entry); // replace promise with concrete value
|
||||||
|
return entry;
|
||||||
|
},
|
||||||
|
(err: unknown) => {
|
||||||
|
this.cache.delete(peerId); // don't poison the cache with a rejected promise
|
||||||
|
throw err;
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
this.cache.set(peerId, inflight);
|
||||||
|
return inflight;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build the `AgentCacheEntry` for a peer by reading the DB, validating the
|
||||||
|
* peer's state, unsealing the private key, and constructing the mTLS Agent.
|
||||||
|
*
|
||||||
|
* Throws `FederationClientError` with appropriate code if the peer is not
|
||||||
|
* found, is inactive, or is missing required fields.
|
||||||
|
*/
|
||||||
|
private async buildEntry(peerId: string): Promise<AgentCacheEntry> {
|
||||||
|
// DB lookup
|
||||||
|
const [peer] = await this.db
|
||||||
|
.select()
|
||||||
|
.from(federationPeers)
|
||||||
|
.where(eq(federationPeers.id, peerId))
|
||||||
|
.limit(1);
|
||||||
|
|
||||||
|
if (!peer) {
|
||||||
|
throw new FederationClientError({
|
||||||
|
code: 'PEER_NOT_FOUND',
|
||||||
|
message: `Federation peer ${peerId} not found`,
|
||||||
|
peerId,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (peer.state !== 'active') {
|
||||||
|
throw new FederationClientError({
|
||||||
|
code: 'PEER_INACTIVE',
|
||||||
|
message: `Federation peer ${peerId} is not active (state: ${peer.state})`,
|
||||||
|
peerId,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!peer.endpointUrl || !peer.clientKeyPem) {
|
||||||
|
throw new FederationClientError({
|
||||||
|
code: 'PEER_MISCONFIGURED',
|
||||||
|
message: `Federation peer ${peerId} is missing endpointUrl or clientKeyPem`,
|
||||||
|
peerId,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Unseal the private key
|
||||||
|
let privateKeyPem: string;
|
||||||
|
try {
|
||||||
|
privateKeyPem = unsealClientKey(peer.clientKeyPem);
|
||||||
|
} catch (err) {
|
||||||
|
throw new FederationClientError({
|
||||||
|
code: 'PEER_MISCONFIGURED',
|
||||||
|
message: `Failed to unseal client key for peer ${peerId}`,
|
||||||
|
peerId,
|
||||||
|
cause: err,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build mTLS agent — pin trust to Step-CA root so we never accept
|
||||||
|
// a peer cert signed by a public CA (defense against MITM with a
|
||||||
|
// publicly-trusted DV cert for the peer's hostname).
|
||||||
|
const agent = new Agent({
|
||||||
|
connect: {
|
||||||
|
cert: peer.certPem,
|
||||||
|
key: privateKeyPem,
|
||||||
|
ca: this.loadStepCaRoot(),
|
||||||
|
// rejectUnauthorized: true is the undici default for HTTPS
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const entry: AgentCacheEntry = {
|
||||||
|
agent,
|
||||||
|
endpointUrl: peer.endpointUrl,
|
||||||
|
certPem: peer.certPem,
|
||||||
|
certSerial: peer.certSerial,
|
||||||
|
};
|
||||||
|
|
||||||
|
this.logger.log(`Agent cached for peer ${peerId} (serial: ${peer.certSerial})`);
|
||||||
|
|
||||||
|
return entry;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Execute a POST request with a JSON body.
|
||||||
|
* Returns the parsed response body as an unknown value.
|
||||||
|
* Throws `FederationClientError` on network errors and non-2xx responses.
|
||||||
|
*/
|
||||||
|
private async doPost(
|
||||||
|
peerId: string,
|
||||||
|
url: string,
|
||||||
|
agent: Dispatcher,
|
||||||
|
body: Record<string, unknown>,
|
||||||
|
): Promise<unknown> {
|
||||||
|
return this.doRequest(peerId, url, agent, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify(body),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Execute a GET request.
|
||||||
|
* Returns the parsed response body as an unknown value.
|
||||||
|
* Throws `FederationClientError` on network errors and non-2xx responses.
|
||||||
|
*/
|
||||||
|
private async doGet(peerId: string, url: string, agent: Dispatcher): Promise<unknown> {
|
||||||
|
return this.doRequest(peerId, url, agent, { method: 'GET' });
|
||||||
|
}
|
||||||
|
|
||||||
|
private async doRequest(
|
||||||
|
peerId: string,
|
||||||
|
url: string,
|
||||||
|
agent: Dispatcher,
|
||||||
|
init: { method: string; headers?: Record<string, string>; body?: string },
|
||||||
|
): Promise<unknown> {
|
||||||
|
let response: Awaited<ReturnType<typeof undiciFetch>>;
|
||||||
|
|
||||||
|
try {
|
||||||
|
response = await undiciFetch(url, {
|
||||||
|
...init,
|
||||||
|
dispatcher: agent,
|
||||||
|
});
|
||||||
|
} catch (err) {
|
||||||
|
throw new FederationClientError({
|
||||||
|
code: 'NETWORK',
|
||||||
|
message: `Network error calling peer ${peerId} at ${url}: ${err instanceof Error ? err.message : String(err)}`,
|
||||||
|
peerId,
|
||||||
|
cause: err,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const rawBody = await response.text().catch(() => '');
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
const status = response.status;
|
||||||
|
|
||||||
|
// Attempt to parse as federation error envelope
|
||||||
|
let serverMessage = `HTTP ${status}`;
|
||||||
|
try {
|
||||||
|
const json: unknown = JSON.parse(rawBody);
|
||||||
|
const result = FederationErrorEnvelopeSchema.safeParse(json);
|
||||||
|
if (result.success) {
|
||||||
|
serverMessage = result.data.error.message;
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// Not valid JSON or not a federation envelope — use generic message
|
||||||
|
}
|
||||||
|
|
||||||
|
// Specific code for 403 (most actionable for callers); generic HTTP_{n} for others
|
||||||
|
const code: FederationClientErrorCode = status === 403 ? 'FORBIDDEN' : `HTTP_${status}`;
|
||||||
|
|
||||||
|
throw new FederationClientError({
|
||||||
|
status,
|
||||||
|
code,
|
||||||
|
message: `Peer ${peerId} returned ${status}: ${serverMessage}`,
|
||||||
|
peerId,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
return JSON.parse(rawBody) as unknown;
|
||||||
|
} catch (err) {
|
||||||
|
throw new FederationClientError({
|
||||||
|
code: 'INVALID_RESPONSE',
|
||||||
|
message: `Peer ${peerId} returned non-JSON body`,
|
||||||
|
peerId,
|
||||||
|
cause: err,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse and validate a response body against a Zod schema.
|
||||||
|
*
|
||||||
|
* For list/get, callers pass the result of `FederationListResponseSchema(z.unknown())`
|
||||||
|
* so that the envelope structure is validated without requiring a concrete item schema
|
||||||
|
* at the client level. The generic `T` provides compile-time typing.
|
||||||
|
*
|
||||||
|
* Throws `FederationClientError({ code: 'INVALID_RESPONSE' })` on parse failure.
|
||||||
|
*/
|
||||||
|
private parseWith<T>(peerId: string, body: unknown, schema: z.ZodTypeAny): T {
|
||||||
|
const result = schema.safeParse(body);
|
||||||
|
if (!result.success) {
|
||||||
|
const issues = result.error.issues
|
||||||
|
.map((e: z.ZodIssue) => `[${e.path.join('.') || 'root'}] ${e.message}`)
|
||||||
|
.join('; ');
|
||||||
|
throw new FederationClientError({
|
||||||
|
code: 'INVALID_RESPONSE',
|
||||||
|
message: `Peer ${peerId} returned invalid response shape: ${issues}`,
|
||||||
|
peerId,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return result.data as T;
|
||||||
|
}
|
||||||
|
}
|
||||||
13
apps/gateway/src/federation/client/index.ts
Normal file
13
apps/gateway/src/federation/client/index.ts
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
/**
|
||||||
|
* Federation client barrel — re-exports for FederationModule consumers.
|
||||||
|
*
|
||||||
|
* M3-09 (QuerySourceService) and future milestones should import from here,
|
||||||
|
* not directly from the implementation file.
|
||||||
|
*/
|
||||||
|
|
||||||
|
export {
|
||||||
|
FederationClientService,
|
||||||
|
FederationClientError,
|
||||||
|
type FederationClientErrorCode,
|
||||||
|
type FederationClientErrorOptions,
|
||||||
|
} from './federation-client.service.js';
|
||||||
@@ -5,10 +5,25 @@ import { EnrollmentController } from './enrollment.controller.js';
|
|||||||
import { EnrollmentService } from './enrollment.service.js';
|
import { EnrollmentService } from './enrollment.service.js';
|
||||||
import { FederationController } from './federation.controller.js';
|
import { FederationController } from './federation.controller.js';
|
||||||
import { GrantsService } from './grants.service.js';
|
import { GrantsService } from './grants.service.js';
|
||||||
|
import { FederationClientService } from './client/index.js';
|
||||||
|
import { FederationAuthGuard } from './server/index.js';
|
||||||
|
|
||||||
@Module({
|
@Module({
|
||||||
controllers: [EnrollmentController, FederationController],
|
controllers: [EnrollmentController, FederationController],
|
||||||
providers: [AdminGuard, CaService, EnrollmentService, GrantsService],
|
providers: [
|
||||||
exports: [CaService, EnrollmentService, GrantsService],
|
AdminGuard,
|
||||||
|
CaService,
|
||||||
|
EnrollmentService,
|
||||||
|
GrantsService,
|
||||||
|
FederationClientService,
|
||||||
|
FederationAuthGuard,
|
||||||
|
],
|
||||||
|
exports: [
|
||||||
|
CaService,
|
||||||
|
EnrollmentService,
|
||||||
|
GrantsService,
|
||||||
|
FederationClientService,
|
||||||
|
FederationAuthGuard,
|
||||||
|
],
|
||||||
})
|
})
|
||||||
export class FederationModule {}
|
export class FederationModule {}
|
||||||
|
|||||||
@@ -10,12 +10,14 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
import { ConflictException, Inject, Injectable, NotFoundException } from '@nestjs/common';
|
import { ConflictException, Inject, Injectable, NotFoundException } from '@nestjs/common';
|
||||||
import { type Db, and, eq, federationGrants } from '@mosaicstack/db';
|
import { type Db, and, eq, federationGrants, federationPeers } from '@mosaicstack/db';
|
||||||
import { DB } from '../database/database.module.js';
|
import { DB } from '../database/database.module.js';
|
||||||
import { parseFederationScope } from './scope-schema.js';
|
import { parseFederationScope } from './scope-schema.js';
|
||||||
import type { CreateGrantDto, ListGrantsDto } from './grants.dto.js';
|
import type { CreateGrantDto, ListGrantsDto } from './grants.dto.js';
|
||||||
|
|
||||||
export type Grant = typeof federationGrants.$inferSelect;
|
export type Grant = typeof federationGrants.$inferSelect;
|
||||||
|
export type Peer = typeof federationPeers.$inferSelect;
|
||||||
|
export type GrantWithPeer = Grant & { peer: Peer };
|
||||||
|
|
||||||
@Injectable()
|
@Injectable()
|
||||||
export class GrantsService {
|
export class GrantsService {
|
||||||
@@ -60,6 +62,33 @@ export class GrantsService {
|
|||||||
return grant;
|
return grant;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fetch a single grant by ID, joined with its associated peer row.
|
||||||
|
* Used by FederationAuthGuard to perform grant status + cert serial checks
|
||||||
|
* in a single DB round-trip.
|
||||||
|
*
|
||||||
|
* Throws NotFoundException if the grant does not exist.
|
||||||
|
* Throws NotFoundException if the associated peer row is missing (data integrity issue).
|
||||||
|
*/
|
||||||
|
async getGrantWithPeer(id: string): Promise<GrantWithPeer> {
|
||||||
|
const rows = await this.db
|
||||||
|
.select()
|
||||||
|
.from(federationGrants)
|
||||||
|
.innerJoin(federationPeers, eq(federationGrants.peerId, federationPeers.id))
|
||||||
|
.where(eq(federationGrants.id, id))
|
||||||
|
.limit(1);
|
||||||
|
|
||||||
|
const row = rows[0];
|
||||||
|
if (!row) {
|
||||||
|
throw new NotFoundException(`Grant ${id} not found`);
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
...row.federation_grants,
|
||||||
|
peer: row.federation_peers,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* List grants with optional filters for peerId, subjectUserId, and status.
|
* List grants with optional filters for peerId, subjectUserId, and status.
|
||||||
*/
|
*/
|
||||||
|
|||||||
146
apps/gateway/src/federation/oid.util.ts
Normal file
146
apps/gateway/src/federation/oid.util.ts
Normal file
@@ -0,0 +1,146 @@
|
|||||||
|
/**
|
||||||
|
* Shared OID extraction helpers for Mosaic federation certificates.
|
||||||
|
*
|
||||||
|
* Custom OID registry (PRD §6, docs/federation/SETUP.md):
|
||||||
|
* 1.3.6.1.4.1.99999.1 — mosaic_grant_id
|
||||||
|
* 1.3.6.1.4.1.99999.2 — mosaic_subject_user_id
|
||||||
|
*
|
||||||
|
* The encoding convention: each extension value is an OCTET STRING wrapping
|
||||||
|
* an ASN.1 UTF8String TLV:
|
||||||
|
* 0x0C (tag) + 1-byte length + UTF-8 bytes
|
||||||
|
*
|
||||||
|
* CaService encodes values this way via encodeUtf8String(), and this module
|
||||||
|
* decodes them with the corresponding `.slice(2)` to skip tag + length byte.
|
||||||
|
*
|
||||||
|
* This module is intentionally pure — no NestJS, no DB, no network I/O.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { X509Certificate } from '@peculiar/x509';
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// OID constants
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
export const OID_MOSAIC_GRANT_ID = '1.3.6.1.4.1.99999.1';
|
||||||
|
export const OID_MOSAIC_SUBJECT_USER_ID = '1.3.6.1.4.1.99999.2';
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Extraction result types
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
export interface MosaicOids {
|
||||||
|
grantId: string;
|
||||||
|
subjectUserId: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export type OidExtractionResult =
|
||||||
|
| { ok: true; value: MosaicOids }
|
||||||
|
| {
|
||||||
|
ok: false;
|
||||||
|
error: 'MISSING_GRANT_ID' | 'MISSING_SUBJECT_USER_ID' | 'PARSE_ERROR';
|
||||||
|
detail?: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Helpers
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
const decoder = new TextDecoder();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Decode an extension value encoded as ASN.1 UTF8String TLV
|
||||||
|
* (tag 0x0C + 1-byte length + UTF-8 bytes).
|
||||||
|
* Validates tag, length byte, and buffer bounds before decoding.
|
||||||
|
* Throws a descriptive Error on malformed input; caller wraps in try/catch.
|
||||||
|
*/
|
||||||
|
function decodeUtf8StringTlv(value: ArrayBuffer): string {
|
||||||
|
const bytes = new Uint8Array(value);
|
||||||
|
|
||||||
|
// Need at least tag + length bytes
|
||||||
|
if (bytes.length < 2) {
|
||||||
|
throw new Error(`UTF8String TLV too short: expected at least 2 bytes, got ${bytes.length}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Tag byte must be 0x0C (ASN.1 UTF8String)
|
||||||
|
if (bytes[0] !== 0x0c) {
|
||||||
|
throw new Error(
|
||||||
|
`UTF8String TLV tag mismatch: expected 0x0C, got 0x${bytes[0]!.toString(16).toUpperCase()}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only single-byte length form is supported (values 0–127); long form not needed
|
||||||
|
// for OID strings of this length.
|
||||||
|
const declaredLength = bytes[1]!;
|
||||||
|
if (declaredLength > 127) {
|
||||||
|
throw new Error(
|
||||||
|
`UTF8String TLV uses long-form length (0x${declaredLength.toString(16).toUpperCase()}), which is not supported`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Declared length must match actual remaining bytes
|
||||||
|
if (declaredLength !== bytes.length - 2) {
|
||||||
|
throw new Error(
|
||||||
|
`UTF8String TLV length mismatch: declared ${declaredLength}, actual ${bytes.length - 2}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip: tag (1 byte) + length (1 byte)
|
||||||
|
return decoder.decode(bytes.slice(2));
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Public API
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract Mosaic custom OIDs (grantId, subjectUserId) from an X.509 certificate
|
||||||
|
* already parsed via @peculiar/x509.
|
||||||
|
*
|
||||||
|
* Returns `{ ok: true, value: MosaicOids }` on success, or
|
||||||
|
* `{ ok: false, error: <code>, detail? }` on any failure — never throws.
|
||||||
|
*/
|
||||||
|
export function extractMosaicOids(cert: X509Certificate): OidExtractionResult {
|
||||||
|
try {
|
||||||
|
const grantIdExt = cert.getExtension(OID_MOSAIC_GRANT_ID);
|
||||||
|
if (!grantIdExt) {
|
||||||
|
return { ok: false, error: 'MISSING_GRANT_ID' };
|
||||||
|
}
|
||||||
|
|
||||||
|
const subjectUserIdExt = cert.getExtension(OID_MOSAIC_SUBJECT_USER_ID);
|
||||||
|
if (!subjectUserIdExt) {
|
||||||
|
return { ok: false, error: 'MISSING_SUBJECT_USER_ID' };
|
||||||
|
}
|
||||||
|
|
||||||
|
const grantId = decodeUtf8StringTlv(grantIdExt.value);
|
||||||
|
const subjectUserId = decodeUtf8StringTlv(subjectUserIdExt.value);
|
||||||
|
|
||||||
|
return {
|
||||||
|
ok: true,
|
||||||
|
value: { grantId, subjectUserId },
|
||||||
|
};
|
||||||
|
} catch (err) {
|
||||||
|
return {
|
||||||
|
ok: false,
|
||||||
|
error: 'PARSE_ERROR',
|
||||||
|
detail: err instanceof Error ? err.message : String(err),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse a PEM-encoded certificate and extract Mosaic OIDs.
|
||||||
|
* Returns an OidExtractionResult — never throws.
|
||||||
|
*/
|
||||||
|
export function extractMosaicOidsFromPem(certPem: string): OidExtractionResult {
|
||||||
|
let cert: X509Certificate;
|
||||||
|
try {
|
||||||
|
cert = new X509Certificate(certPem);
|
||||||
|
} catch (err) {
|
||||||
|
return {
|
||||||
|
ok: false,
|
||||||
|
error: 'PARSE_ERROR',
|
||||||
|
detail: err instanceof Error ? err.message : String(err),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
return extractMosaicOids(cert);
|
||||||
|
}
|
||||||
@@ -0,0 +1,521 @@
|
|||||||
|
/**
|
||||||
|
* Unit tests for FederationAuthGuard (FED-M3-03).
|
||||||
|
*
|
||||||
|
* Coverage:
|
||||||
|
* - Missing cert (no TLS socket / no getPeerCertificate) → 401
|
||||||
|
* - Cert parse failure (corrupt DER raw bytes) → 401
|
||||||
|
* - Missing grantId OID → 401
|
||||||
|
* - Missing subjectUserId OID → 401
|
||||||
|
* - Grant not found (GrantsService throws NotFoundException) → 403
|
||||||
|
* - Grant in `pending` status → 403
|
||||||
|
* - Grant in `revoked` status → 403
|
||||||
|
* - Grant in `expired` status → 403
|
||||||
|
* - Cert serial mismatch → 403
|
||||||
|
* - Happy path: active grant + matching cert serial → context attached, returns true
|
||||||
|
*/
|
||||||
|
|
||||||
|
import 'reflect-metadata';
|
||||||
|
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||||
|
import type { ExecutionContext } from '@nestjs/common';
|
||||||
|
import { NotFoundException } from '@nestjs/common';
|
||||||
|
import { FederationAuthGuard } from '../federation-auth.guard.js';
|
||||||
|
import { makeMosaicIssuedCert } from '../../__tests__/helpers/test-cert.js';
|
||||||
|
import type { GrantsService, GrantWithPeer } from '../../grants.service.js';
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Test constants
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
const GRANT_ID = 'a1111111-1111-1111-1111-111111111111';
|
||||||
|
const USER_ID = 'b2222222-2222-2222-2222-222222222222';
|
||||||
|
const PEER_ID = 'c3333333-3333-3333-3333-333333333333';
|
||||||
|
|
||||||
|
// Node.js TLS serialNumber is uppercase hex (no colons)
|
||||||
|
const CERT_SERIAL_HEX = '01';
|
||||||
|
|
||||||
|
const VALID_SCOPE = { resources: ['tasks'], max_rows_per_query: 100 };
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Mock builders
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build a minimal GrantWithPeer-shaped mock.
|
||||||
|
*/
|
||||||
|
function makeGrantWithPeer(overrides: Partial<GrantWithPeer> = {}): GrantWithPeer {
|
||||||
|
return {
|
||||||
|
id: GRANT_ID,
|
||||||
|
peerId: PEER_ID,
|
||||||
|
subjectUserId: USER_ID,
|
||||||
|
scope: VALID_SCOPE,
|
||||||
|
status: 'active',
|
||||||
|
expiresAt: null,
|
||||||
|
createdAt: new Date('2026-01-01T00:00:00Z'),
|
||||||
|
revokedAt: null,
|
||||||
|
revokedReason: null,
|
||||||
|
peer: {
|
||||||
|
id: PEER_ID,
|
||||||
|
commonName: 'test-peer',
|
||||||
|
displayName: 'Test Peer',
|
||||||
|
certPem: '',
|
||||||
|
certSerial: CERT_SERIAL_HEX,
|
||||||
|
certNotAfter: new Date(Date.now() + 86_400_000),
|
||||||
|
clientKeyPem: null,
|
||||||
|
state: 'active',
|
||||||
|
endpointUrl: null,
|
||||||
|
lastSeenAt: null,
|
||||||
|
createdAt: new Date('2026-01-01T00:00:00Z'),
|
||||||
|
revokedAt: null,
|
||||||
|
},
|
||||||
|
...overrides,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build a mock ExecutionContext with a pre-built TLS peer certificate.
|
||||||
|
*
|
||||||
|
* `certPem` — PEM string to present as the raw DER cert (converted to Buffer).
|
||||||
|
* Pass null to simulate "no cert presented".
|
||||||
|
* `certSerialHex` — serialNumber string returned by the TLS socket.
|
||||||
|
* Node.js returns uppercase hex.
|
||||||
|
* `hasTlsSocket` — if false, raw.socket has no getPeerCertificate (plain HTTP).
|
||||||
|
*/
|
||||||
|
function makeContext(opts: {
|
||||||
|
certPem: string | null;
|
||||||
|
certSerialHex?: string;
|
||||||
|
hasTlsSocket?: boolean;
|
||||||
|
}): {
|
||||||
|
ctx: ExecutionContext;
|
||||||
|
statusMock: ReturnType<typeof vi.fn>;
|
||||||
|
sendMock: ReturnType<typeof vi.fn>;
|
||||||
|
} {
|
||||||
|
const { certPem, certSerialHex = CERT_SERIAL_HEX, hasTlsSocket = true } = opts;
|
||||||
|
|
||||||
|
// Build peerCert object that Node.js TLS socket.getPeerCertificate() returns
|
||||||
|
let peerCert: Record<string, unknown>;
|
||||||
|
if (certPem === null) {
|
||||||
|
// Simulate no cert: Node.js returns object with empty string fields
|
||||||
|
peerCert = { raw: null, serialNumber: '' };
|
||||||
|
} else {
|
||||||
|
// Convert PEM to DER Buffer (strip headers + base64 decode)
|
||||||
|
const b64 = certPem
|
||||||
|
.replace(/-----BEGIN CERTIFICATE-----/, '')
|
||||||
|
.replace(/-----END CERTIFICATE-----/, '')
|
||||||
|
.replace(/\s+/g, '');
|
||||||
|
const raw = Buffer.from(b64, 'base64');
|
||||||
|
peerCert = { raw, serialNumber: certSerialHex };
|
||||||
|
}
|
||||||
|
|
||||||
|
const getPeerCertificate = vi.fn().mockReturnValue(peerCert);
|
||||||
|
|
||||||
|
const socket = hasTlsSocket ? { getPeerCertificate } : {}; // No getPeerCertificate → non-TLS
|
||||||
|
|
||||||
|
// Fastify reply mocks
|
||||||
|
const sendMock = vi.fn().mockReturnValue(undefined);
|
||||||
|
const headerMock = vi.fn().mockReturnValue({ send: sendMock });
|
||||||
|
const statusMock = vi.fn().mockReturnValue({ header: headerMock });
|
||||||
|
|
||||||
|
const request = {
|
||||||
|
raw: {
|
||||||
|
socket,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const reply = {
|
||||||
|
status: statusMock,
|
||||||
|
};
|
||||||
|
|
||||||
|
const ctx = {
|
||||||
|
switchToHttp: () => ({
|
||||||
|
getRequest: () => request,
|
||||||
|
getResponse: () => reply,
|
||||||
|
}),
|
||||||
|
} as unknown as ExecutionContext;
|
||||||
|
|
||||||
|
return { ctx, statusMock, sendMock };
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build a mock GrantsService.
|
||||||
|
*/
|
||||||
|
function makeGrantsService(
|
||||||
|
overrides: Partial<Pick<GrantsService, 'getGrantWithPeer'>> = {},
|
||||||
|
): GrantsService {
|
||||||
|
return {
|
||||||
|
getGrantWithPeer: vi.fn().mockResolvedValue(makeGrantWithPeer()),
|
||||||
|
...overrides,
|
||||||
|
} as unknown as GrantsService;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Test suite
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
describe('FederationAuthGuard', () => {
|
||||||
|
let certPem: string;
|
||||||
|
|
||||||
|
beforeEach(async () => {
|
||||||
|
// Generate a real Mosaic-issued cert with the standard OIDs
|
||||||
|
certPem = await makeMosaicIssuedCert({ grantId: GRANT_ID, subjectUserId: USER_ID });
|
||||||
|
});
|
||||||
|
|
||||||
|
// ── 401: No TLS socket ────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
it('returns 401 when there is no TLS socket (plain HTTP connection)', async () => {
|
||||||
|
const { ctx, statusMock, sendMock } = makeContext({
|
||||||
|
certPem: certPem,
|
||||||
|
hasTlsSocket: false,
|
||||||
|
});
|
||||||
|
|
||||||
|
const guard = new FederationAuthGuard(makeGrantsService());
|
||||||
|
const result = await guard.canActivate(ctx);
|
||||||
|
|
||||||
|
expect(result).toBe(false);
|
||||||
|
expect(statusMock).toHaveBeenCalledWith(401);
|
||||||
|
expect(sendMock).toHaveBeenCalledWith(
|
||||||
|
expect.objectContaining({
|
||||||
|
error: expect.objectContaining({ code: 'unauthorized', message: expect.any(String) }),
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
// ── 401: Cert not presented ───────────────────────────────────────────────
|
||||||
|
|
||||||
|
it('returns 401 when the peer did not present a certificate', async () => {
|
||||||
|
const { ctx, statusMock, sendMock } = makeContext({ certPem: null });
|
||||||
|
|
||||||
|
const guard = new FederationAuthGuard(makeGrantsService());
|
||||||
|
const result = await guard.canActivate(ctx);
|
||||||
|
|
||||||
|
expect(result).toBe(false);
|
||||||
|
expect(statusMock).toHaveBeenCalledWith(401);
|
||||||
|
expect(sendMock).toHaveBeenCalledWith(
|
||||||
|
expect.objectContaining({
|
||||||
|
error: expect.objectContaining({ code: 'unauthorized', message: expect.any(String) }),
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
// ── 401: Cert parse failure ───────────────────────────────────────────────
|
||||||
|
|
||||||
|
it('returns 401 when the certificate DER bytes are corrupt', async () => {
|
||||||
|
// Build context with a cert that has garbage DER bytes
|
||||||
|
const corruptPem = '-----BEGIN CERTIFICATE-----\naW52YWxpZA==\n-----END CERTIFICATE-----';
|
||||||
|
const { ctx, statusMock, sendMock } = makeContext({ certPem: corruptPem });
|
||||||
|
|
||||||
|
const guard = new FederationAuthGuard(makeGrantsService());
|
||||||
|
const result = await guard.canActivate(ctx);
|
||||||
|
|
||||||
|
expect(result).toBe(false);
|
||||||
|
expect(statusMock).toHaveBeenCalledWith(401);
|
||||||
|
expect(sendMock).toHaveBeenCalledWith(
|
||||||
|
expect.objectContaining({
|
||||||
|
error: expect.objectContaining({ code: 'unauthorized', message: expect.any(String) }),
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
// ── 401: Missing grantId OID ─────────────────────────────────────────────
|
||||||
|
|
||||||
|
it('returns 401 when the cert is missing the grantId OID', async () => {
|
||||||
|
// makeSelfSignedCert produces a cert without any Mosaic OIDs
|
||||||
|
const { makeSelfSignedCert } = await import('../../__tests__/helpers/test-cert.js');
|
||||||
|
const plainCert = await makeSelfSignedCert();
|
||||||
|
const { ctx, statusMock, sendMock } = makeContext({ certPem: plainCert });
|
||||||
|
|
||||||
|
const guard = new FederationAuthGuard(makeGrantsService());
|
||||||
|
const result = await guard.canActivate(ctx);
|
||||||
|
|
||||||
|
expect(result).toBe(false);
|
||||||
|
expect(statusMock).toHaveBeenCalledWith(401);
|
||||||
|
expect(sendMock).toHaveBeenCalledWith(
|
||||||
|
expect.objectContaining({
|
||||||
|
error: expect.objectContaining({ code: 'unauthorized', message: expect.any(String) }),
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
// ── 401: Missing subjectUserId OID ───────────────────────────────────────
|
||||||
|
|
||||||
|
it('returns 401 when the cert has grantId OID but is missing subjectUserId OID', async () => {
|
||||||
|
// Build a cert with only the grantId OID by importing cert generator internals
|
||||||
|
const { webcrypto } = await import('node:crypto');
|
||||||
|
const {
|
||||||
|
X509CertificateGenerator,
|
||||||
|
Extension,
|
||||||
|
KeyUsagesExtension,
|
||||||
|
KeyUsageFlags,
|
||||||
|
BasicConstraintsExtension,
|
||||||
|
cryptoProvider,
|
||||||
|
} = await import('@peculiar/x509');
|
||||||
|
|
||||||
|
cryptoProvider.set(webcrypto as unknown as Parameters<typeof cryptoProvider.set>[0]);
|
||||||
|
|
||||||
|
const alg = { name: 'ECDSA', namedCurve: 'P-256', hash: 'SHA-256' } as const;
|
||||||
|
const keys = await webcrypto.subtle.generateKey(alg, false, ['sign', 'verify']);
|
||||||
|
const now = new Date();
|
||||||
|
const tomorrow = new Date(now.getTime() + 86_400_000);
|
||||||
|
|
||||||
|
// Encode grantId only — missing subjectUserId extension
|
||||||
|
const utf8 = new TextEncoder().encode(GRANT_ID);
|
||||||
|
const encoded = new Uint8Array(2 + utf8.length);
|
||||||
|
encoded[0] = 0x0c;
|
||||||
|
encoded[1] = utf8.length;
|
||||||
|
encoded.set(utf8, 2);
|
||||||
|
|
||||||
|
const cert = await X509CertificateGenerator.createSelfSigned({
|
||||||
|
serialNumber: '01',
|
||||||
|
name: 'CN=partial-oid-test',
|
||||||
|
notBefore: now,
|
||||||
|
notAfter: tomorrow,
|
||||||
|
signingAlgorithm: alg,
|
||||||
|
keys,
|
||||||
|
extensions: [
|
||||||
|
new BasicConstraintsExtension(false),
|
||||||
|
new KeyUsagesExtension(KeyUsageFlags.digitalSignature),
|
||||||
|
new Extension('1.3.6.1.4.1.99999.1', false, encoded), // grantId only
|
||||||
|
],
|
||||||
|
});
|
||||||
|
|
||||||
|
const { ctx, statusMock, sendMock } = makeContext({ certPem: cert.toString('pem') });
|
||||||
|
|
||||||
|
const guard = new FederationAuthGuard(makeGrantsService());
|
||||||
|
const result = await guard.canActivate(ctx);
|
||||||
|
|
||||||
|
expect(result).toBe(false);
|
||||||
|
expect(statusMock).toHaveBeenCalledWith(401);
|
||||||
|
expect(sendMock).toHaveBeenCalledWith(
|
||||||
|
expect.objectContaining({
|
||||||
|
error: expect.objectContaining({ code: 'unauthorized', message: expect.any(String) }),
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
// ── 403: Grant not found ─────────────────────────────────────────────────
|
||||||
|
|
||||||
|
it('returns 403 when the grantId from the cert does not exist in DB', async () => {
|
||||||
|
const grantsService = makeGrantsService({
|
||||||
|
getGrantWithPeer: vi
|
||||||
|
.fn()
|
||||||
|
.mockRejectedValue(new NotFoundException(`Grant ${GRANT_ID} not found`)),
|
||||||
|
});
|
||||||
|
|
||||||
|
const { ctx, statusMock, sendMock } = makeContext({ certPem });
|
||||||
|
|
||||||
|
const guard = new FederationAuthGuard(grantsService);
|
||||||
|
const result = await guard.canActivate(ctx);
|
||||||
|
|
||||||
|
expect(result).toBe(false);
|
||||||
|
expect(statusMock).toHaveBeenCalledWith(403);
|
||||||
|
expect(sendMock).toHaveBeenCalledWith(
|
||||||
|
expect.objectContaining({
|
||||||
|
error: expect.objectContaining({ code: 'forbidden', message: 'Federation access denied' }),
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
// ── 403: Grant in `pending` status ───────────────────────────────────────
|
||||||
|
|
||||||
|
it('returns 403 when the grant is in pending status', async () => {
|
||||||
|
const grantsService = makeGrantsService({
|
||||||
|
getGrantWithPeer: vi.fn().mockResolvedValue(makeGrantWithPeer({ status: 'pending' })),
|
||||||
|
});
|
||||||
|
|
||||||
|
const { ctx, statusMock, sendMock } = makeContext({ certPem });
|
||||||
|
|
||||||
|
const guard = new FederationAuthGuard(grantsService);
|
||||||
|
const result = await guard.canActivate(ctx);
|
||||||
|
|
||||||
|
expect(result).toBe(false);
|
||||||
|
expect(statusMock).toHaveBeenCalledWith(403);
|
||||||
|
expect(sendMock).toHaveBeenCalledWith(
|
||||||
|
expect.objectContaining({
|
||||||
|
error: expect.objectContaining({ code: 'forbidden', message: 'Federation access denied' }),
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
// ── 403: Grant in `revoked` status ───────────────────────────────────────
|
||||||
|
|
||||||
|
it('returns 403 when the grant is in revoked status', async () => {
|
||||||
|
const grantsService = makeGrantsService({
|
||||||
|
getGrantWithPeer: vi
|
||||||
|
.fn()
|
||||||
|
.mockResolvedValue(makeGrantWithPeer({ status: 'revoked', revokedAt: new Date() })),
|
||||||
|
});
|
||||||
|
|
||||||
|
const { ctx, statusMock, sendMock } = makeContext({ certPem });
|
||||||
|
|
||||||
|
const guard = new FederationAuthGuard(grantsService);
|
||||||
|
const result = await guard.canActivate(ctx);
|
||||||
|
|
||||||
|
expect(result).toBe(false);
|
||||||
|
expect(statusMock).toHaveBeenCalledWith(403);
|
||||||
|
expect(sendMock).toHaveBeenCalledWith(
|
||||||
|
expect.objectContaining({
|
||||||
|
error: expect.objectContaining({ code: 'forbidden', message: 'Federation access denied' }),
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
// ── 403: Grant in `expired` status ───────────────────────────────────────
|
||||||
|
|
||||||
|
it('returns 403 when the grant is in expired status', async () => {
|
||||||
|
const grantsService = makeGrantsService({
|
||||||
|
getGrantWithPeer: vi.fn().mockResolvedValue(makeGrantWithPeer({ status: 'expired' })),
|
||||||
|
});
|
||||||
|
|
||||||
|
const { ctx, statusMock, sendMock } = makeContext({ certPem });
|
||||||
|
|
||||||
|
const guard = new FederationAuthGuard(grantsService);
|
||||||
|
const result = await guard.canActivate(ctx);
|
||||||
|
|
||||||
|
expect(result).toBe(false);
|
||||||
|
expect(statusMock).toHaveBeenCalledWith(403);
|
||||||
|
expect(sendMock).toHaveBeenCalledWith(
|
||||||
|
expect.objectContaining({
|
||||||
|
error: expect.objectContaining({ code: 'forbidden', message: 'Federation access denied' }),
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
// ── 403: Cert serial mismatch ─────────────────────────────────────────────
|
||||||
|
|
||||||
|
it('returns 403 when the cert serial does not match the registered peer cert serial', async () => {
|
||||||
|
// Return a grant whose peer has a different stored serial
|
||||||
|
const grantsService = makeGrantsService({
|
||||||
|
getGrantWithPeer: vi.fn().mockResolvedValue(
|
||||||
|
makeGrantWithPeer({
|
||||||
|
peer: {
|
||||||
|
id: PEER_ID,
|
||||||
|
commonName: 'test-peer',
|
||||||
|
displayName: 'Test Peer',
|
||||||
|
certPem: '',
|
||||||
|
certSerial: 'DEADBEEF', // different from CERT_SERIAL_HEX='01'
|
||||||
|
certNotAfter: new Date(Date.now() + 86_400_000),
|
||||||
|
clientKeyPem: null,
|
||||||
|
state: 'active',
|
||||||
|
endpointUrl: null,
|
||||||
|
lastSeenAt: null,
|
||||||
|
createdAt: new Date('2026-01-01T00:00:00Z'),
|
||||||
|
revokedAt: null,
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
),
|
||||||
|
});
|
||||||
|
|
||||||
|
// Context presents cert with serial '01' but DB has 'DEADBEEF'
|
||||||
|
const { ctx, statusMock, sendMock } = makeContext({ certPem, certSerialHex: '01' });
|
||||||
|
|
||||||
|
const guard = new FederationAuthGuard(grantsService);
|
||||||
|
const result = await guard.canActivate(ctx);
|
||||||
|
|
||||||
|
expect(result).toBe(false);
|
||||||
|
expect(statusMock).toHaveBeenCalledWith(403);
|
||||||
|
expect(sendMock).toHaveBeenCalledWith(
|
||||||
|
expect.objectContaining({
|
||||||
|
error: expect.objectContaining({ code: 'forbidden', message: 'Federation access denied' }),
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
// ── 403: subjectUserId cert/DB mismatch (CRIT-1 regression test) ─────────
|
||||||
|
|
||||||
|
it('returns 403 when the cert subjectUserId does not match the DB grant subjectUserId', async () => {
|
||||||
|
// Build a cert that claims an attacker's subjectUserId
|
||||||
|
const attackerSubjectUserId = 'attacker-user-id';
|
||||||
|
const attackerCertPem = await makeMosaicIssuedCert({
|
||||||
|
grantId: GRANT_ID,
|
||||||
|
subjectUserId: attackerSubjectUserId,
|
||||||
|
});
|
||||||
|
|
||||||
|
// DB returns a grant with the legitimate USER_ID
|
||||||
|
const grantsService = makeGrantsService({
|
||||||
|
getGrantWithPeer: vi.fn().mockResolvedValue(makeGrantWithPeer({ subjectUserId: USER_ID })),
|
||||||
|
});
|
||||||
|
|
||||||
|
// Cert presents attacker-user-id but DB has USER_ID — should be rejected
|
||||||
|
const { ctx, statusMock, sendMock } = makeContext({
|
||||||
|
certPem: attackerCertPem,
|
||||||
|
certSerialHex: CERT_SERIAL_HEX,
|
||||||
|
});
|
||||||
|
|
||||||
|
const guard = new FederationAuthGuard(grantsService);
|
||||||
|
const result = await guard.canActivate(ctx);
|
||||||
|
|
||||||
|
expect(result).toBe(false);
|
||||||
|
expect(statusMock).toHaveBeenCalledWith(403);
|
||||||
|
expect(sendMock).toHaveBeenCalledWith(
|
||||||
|
expect.objectContaining({
|
||||||
|
error: expect.objectContaining({ code: 'forbidden', message: 'Federation access denied' }),
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
// ── Happy path ────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
it('returns true and attaches federationContext on happy path', async () => {
|
||||||
|
const grant = makeGrantWithPeer({
|
||||||
|
status: 'active',
|
||||||
|
peer: {
|
||||||
|
id: PEER_ID,
|
||||||
|
commonName: 'test-peer',
|
||||||
|
displayName: 'Test Peer',
|
||||||
|
certPem: '',
|
||||||
|
certSerial: CERT_SERIAL_HEX,
|
||||||
|
certNotAfter: new Date(Date.now() + 86_400_000),
|
||||||
|
clientKeyPem: null,
|
||||||
|
state: 'active',
|
||||||
|
endpointUrl: null,
|
||||||
|
lastSeenAt: null,
|
||||||
|
createdAt: new Date('2026-01-01T00:00:00Z'),
|
||||||
|
revokedAt: null,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const grantsService = makeGrantsService({
|
||||||
|
getGrantWithPeer: vi.fn().mockResolvedValue(grant),
|
||||||
|
});
|
||||||
|
|
||||||
|
// Build context manually to capture what gets set on request.federationContext
|
||||||
|
const b64 = certPem
|
||||||
|
.replace(/-----BEGIN CERTIFICATE-----/, '')
|
||||||
|
.replace(/-----END CERTIFICATE-----/, '')
|
||||||
|
.replace(/\s+/g, '');
|
||||||
|
const raw = Buffer.from(b64, 'base64');
|
||||||
|
const peerCert = { raw, serialNumber: CERT_SERIAL_HEX };
|
||||||
|
|
||||||
|
const sendMock = vi.fn().mockReturnValue(undefined);
|
||||||
|
const headerMock = vi.fn().mockReturnValue({ send: sendMock });
|
||||||
|
const statusMock = vi.fn().mockReturnValue({ header: headerMock });
|
||||||
|
|
||||||
|
const request: Record<string, unknown> = {
|
||||||
|
raw: {
|
||||||
|
socket: { getPeerCertificate: vi.fn().mockReturnValue(peerCert) },
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const reply = { status: statusMock };
|
||||||
|
|
||||||
|
const ctx = {
|
||||||
|
switchToHttp: () => ({
|
||||||
|
getRequest: () => request,
|
||||||
|
getResponse: () => reply,
|
||||||
|
}),
|
||||||
|
} as unknown as ExecutionContext;
|
||||||
|
|
||||||
|
const guard = new FederationAuthGuard(grantsService);
|
||||||
|
const result = await guard.canActivate(ctx);
|
||||||
|
|
||||||
|
expect(result).toBe(true);
|
||||||
|
expect(statusMock).not.toHaveBeenCalled();
|
||||||
|
|
||||||
|
// Verify the context was attached correctly
|
||||||
|
expect(request['federationContext']).toEqual({
|
||||||
|
grantId: GRANT_ID,
|
||||||
|
subjectUserId: USER_ID,
|
||||||
|
peerId: PEER_ID,
|
||||||
|
scope: VALID_SCOPE,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
212
apps/gateway/src/federation/server/federation-auth.guard.ts
Normal file
212
apps/gateway/src/federation/server/federation-auth.guard.ts
Normal file
@@ -0,0 +1,212 @@
|
|||||||
|
/**
|
||||||
|
* FederationAuthGuard — NestJS CanActivate guard for inbound federation requests.
|
||||||
|
*
|
||||||
|
* Validates the mTLS client certificate presented by a peer gateway, extracts
|
||||||
|
* custom OIDs to identify the grant + subject user, loads the grant from DB,
|
||||||
|
* asserts it is active, and verifies the cert serial against the registered peer
|
||||||
|
* cert serial as a defense-in-depth measure.
|
||||||
|
*
|
||||||
|
* On success, attaches `request.federationContext` for downstream verb controllers.
|
||||||
|
* On failure, responds with the federation wire-format error envelope (not raw
|
||||||
|
* NestJS exception JSON) to match the federation protocol contract.
|
||||||
|
*
|
||||||
|
* ## Cert-serial check decision
|
||||||
|
* The guard validates that the inbound client cert's serial number matches the
|
||||||
|
* `certSerial` stored on the associated `federation_peers` row. This is a
|
||||||
|
* defense-in-depth measure: even if the mTLS handshake is compromised at the
|
||||||
|
* transport layer (e.g. misconfigured TLS terminator that forwards arbitrary
|
||||||
|
* client certs), an attacker cannot replay a cert with a different serial than
|
||||||
|
* what was registered during enrollment. This check is NOT loosened because:
|
||||||
|
* 1. It is O(1) — no additional DB round-trip (peerId is on the grant row,
|
||||||
|
* so we join to federationPeers in the same query).
|
||||||
|
* 2. Cert renewal MUST update the stored serial — enforced by M6 scheduler.
|
||||||
|
* 3. The OID-only path (without serial check) would allow any cert from the
|
||||||
|
* same CA bearing the same grantId OID to succeed after cert compromise.
|
||||||
|
*
|
||||||
|
* ## FastifyRequest typing path
|
||||||
|
* NestJS + Fastify wraps the raw Node.js IncomingMessage in a FastifyRequest.
|
||||||
|
* The underlying TLS socket is accessed via `request.raw.socket`, which is a
|
||||||
|
* `tls.TLSSocket` when the server is listening on HTTPS. In development/test
|
||||||
|
* the gateway may run over plain HTTP, in which case `getPeerCertificate` is
|
||||||
|
* not available. The guard safely handles both cases by checking for the
|
||||||
|
* method's existence before calling it.
|
||||||
|
*
|
||||||
|
* Note: The guard reads the peer certificate from the *already-completed*
|
||||||
|
* TLS handshake via `socket.getPeerCertificate(detailed=true)`. This relies
|
||||||
|
* on the server being configured with `requestCert: true` at the TLS level
|
||||||
|
* so Fastify/Node.js requests the client cert during the handshake.
|
||||||
|
* The guard does NOT verify the cert chain itself — that is handled by the
|
||||||
|
* TLS layer (Node.js `rejectUnauthorized: true` with the CA cert pinned).
|
||||||
|
*/
|
||||||
|
|
||||||
|
import {
|
||||||
|
type CanActivate,
|
||||||
|
type ExecutionContext,
|
||||||
|
Inject,
|
||||||
|
Injectable,
|
||||||
|
Logger,
|
||||||
|
} from '@nestjs/common';
|
||||||
|
import type { FastifyReply, FastifyRequest } from 'fastify';
|
||||||
|
import * as tls from 'node:tls';
|
||||||
|
import { X509Certificate } from '@peculiar/x509';
|
||||||
|
import { FederationForbiddenError, FederationUnauthorizedError } from '@mosaicstack/types';
|
||||||
|
import { extractMosaicOids } from '../oid.util.js';
|
||||||
|
import { GrantsService } from '../grants.service.js';
|
||||||
|
import type { FederationContext } from './federation-context.js';
|
||||||
|
import './federation-context.js'; // side-effect import: applies FastifyRequest module augmentation
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Internal helpers
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Send a federation wire-format error response directly on the Fastify reply.
|
||||||
|
* Returns false — callers return this value from canActivate.
|
||||||
|
*/
|
||||||
|
function sendFederationError(
|
||||||
|
reply: FastifyReply,
|
||||||
|
error: FederationUnauthorizedError | FederationForbiddenError,
|
||||||
|
): boolean {
|
||||||
|
const statusCode = error.code === 'unauthorized' ? 401 : 403;
|
||||||
|
void reply.status(statusCode).header('content-type', 'application/json').send(error.toEnvelope());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Guard
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@Injectable()
|
||||||
|
export class FederationAuthGuard implements CanActivate {
|
||||||
|
private readonly logger = new Logger(FederationAuthGuard.name);
|
||||||
|
|
||||||
|
constructor(@Inject(GrantsService) private readonly grantsService: GrantsService) {}
|
||||||
|
|
||||||
|
async canActivate(context: ExecutionContext): Promise<boolean> {
|
||||||
|
const http = context.switchToHttp();
|
||||||
|
const request = http.getRequest<FastifyRequest>();
|
||||||
|
const reply = http.getResponse<FastifyReply>();
|
||||||
|
|
||||||
|
// ── Step 1: Extract peer certificate from TLS socket ────────────────────
|
||||||
|
const rawSocket = request.raw.socket;
|
||||||
|
|
||||||
|
// Check TLS socket: getPeerCertificate is only available on TLS connections.
|
||||||
|
if (
|
||||||
|
!rawSocket ||
|
||||||
|
typeof (rawSocket as Partial<tls.TLSSocket>).getPeerCertificate !== 'function'
|
||||||
|
) {
|
||||||
|
this.logger.warn('No TLS socket — client cert unavailable (non-mTLS connection)');
|
||||||
|
return sendFederationError(
|
||||||
|
reply,
|
||||||
|
new FederationUnauthorizedError('Client certificate required'),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const tlsSocket = rawSocket as tls.TLSSocket;
|
||||||
|
const peerCert = tlsSocket.getPeerCertificate(true);
|
||||||
|
|
||||||
|
// Node.js returns an object with empty string fields when no cert was presented.
|
||||||
|
if (!peerCert || !peerCert.raw) {
|
||||||
|
this.logger.warn('Peer certificate not presented (mTLS handshake did not supply cert)');
|
||||||
|
return sendFederationError(
|
||||||
|
reply,
|
||||||
|
new FederationUnauthorizedError('Client certificate required'),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Step 2: Parse the DER-encoded certificate via @peculiar/x509 ────────
|
||||||
|
let cert: X509Certificate;
|
||||||
|
try {
|
||||||
|
// peerCert.raw is a Buffer containing the DER-encoded cert
|
||||||
|
cert = new X509Certificate(peerCert.raw);
|
||||||
|
} catch (err) {
|
||||||
|
this.logger.warn(
|
||||||
|
`Failed to parse peer certificate: ${err instanceof Error ? err.message : String(err)}`,
|
||||||
|
);
|
||||||
|
return sendFederationError(
|
||||||
|
reply,
|
||||||
|
new FederationUnauthorizedError('Client certificate could not be parsed'),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Step 3: Extract Mosaic custom OIDs ──────────────────────────────────
|
||||||
|
const oidResult = extractMosaicOids(cert);
|
||||||
|
|
||||||
|
if (!oidResult.ok) {
|
||||||
|
const message =
|
||||||
|
oidResult.error === 'MISSING_GRANT_ID'
|
||||||
|
? 'Client certificate is missing required OID: mosaic_grant_id (1.3.6.1.4.1.99999.1)'
|
||||||
|
: oidResult.error === 'MISSING_SUBJECT_USER_ID'
|
||||||
|
? 'Client certificate is missing required OID: mosaic_subject_user_id (1.3.6.1.4.1.99999.2)'
|
||||||
|
: `Client certificate OID extraction failed: ${oidResult.detail ?? 'unknown error'}`;
|
||||||
|
this.logger.warn(`OID extraction failure [${oidResult.error}]: ${message}`);
|
||||||
|
return sendFederationError(reply, new FederationUnauthorizedError(message));
|
||||||
|
}
|
||||||
|
|
||||||
|
const { grantId, subjectUserId } = oidResult.value;
|
||||||
|
|
||||||
|
// ── Step 4: Load grant from DB ───────────────────────────────────────────
|
||||||
|
let grant: Awaited<ReturnType<GrantsService['getGrantWithPeer']>>;
|
||||||
|
try {
|
||||||
|
grant = await this.grantsService.getGrantWithPeer(grantId);
|
||||||
|
} catch {
|
||||||
|
// getGrantWithPeer throws NotFoundException when not found
|
||||||
|
this.logger.warn(`Grant not found: ${grantId}`);
|
||||||
|
return sendFederationError(reply, new FederationForbiddenError('Federation access denied'));
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Step 5: Assert grant is active ──────────────────────────────────────
|
||||||
|
if (grant.status !== 'active') {
|
||||||
|
this.logger.warn(`Grant ${grantId} is not active — status=${grant.status}`);
|
||||||
|
return sendFederationError(reply, new FederationForbiddenError('Federation access denied'));
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Step 5b: Validate cert-extracted subjectUserId against DB (CRIT-1) ──
|
||||||
|
// The cert claim is untrusted input; the DB row is authoritative.
|
||||||
|
if (subjectUserId !== grant.subjectUserId) {
|
||||||
|
this.logger.warn(`subjectUserId mismatch for grant ${grantId}`);
|
||||||
|
return sendFederationError(reply, new FederationForbiddenError('Federation access denied'));
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Step 6: Defense-in-depth — cert serial must match registered peer ───
|
||||||
|
// The serial number from Node.js TLS is upper-case hex without colons.
|
||||||
|
// The @peculiar/x509 serialNumber is decimal. We compare using the native
|
||||||
|
// Node.js crypto cert serial which is uppercase hex, matching DB storage.
|
||||||
|
// Both are derived from the peerCert.serialNumber Node.js provides.
|
||||||
|
const inboundSerial: string = peerCert.serialNumber ?? '';
|
||||||
|
|
||||||
|
if (!grant.peer.certSerial) {
|
||||||
|
// Peer row exists but has no stored serial — something is wrong with enrollment
|
||||||
|
this.logger.error(`Peer ${grant.peerId} has no stored certSerial — enrollment incomplete`);
|
||||||
|
return sendFederationError(reply, new FederationForbiddenError('Federation access denied'));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Normalize both to uppercase for comparison (Node.js serialNumber is
|
||||||
|
// already uppercase hex; DB value was stored from extractSerial() which
|
||||||
|
// returns crypto.X509Certificate.serialNumber — also uppercase hex).
|
||||||
|
if (inboundSerial.toUpperCase() !== grant.peer.certSerial.toUpperCase()) {
|
||||||
|
this.logger.warn(
|
||||||
|
`Cert serial mismatch for grant ${grantId}: ` +
|
||||||
|
`inbound=${inboundSerial} registered=${grant.peer.certSerial}`,
|
||||||
|
);
|
||||||
|
return sendFederationError(reply, new FederationForbiddenError('Federation access denied'));
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Step 7: Attach FederationContext to request ──────────────────────────
|
||||||
|
// Use grant.subjectUserId from DB (authoritative) — not the cert-extracted value.
|
||||||
|
const federationContext: FederationContext = {
|
||||||
|
grantId,
|
||||||
|
subjectUserId: grant.subjectUserId,
|
||||||
|
peerId: grant.peerId,
|
||||||
|
scope: grant.scope as Record<string, unknown>,
|
||||||
|
};
|
||||||
|
|
||||||
|
request.federationContext = federationContext;
|
||||||
|
|
||||||
|
this.logger.debug(
|
||||||
|
`Federation auth OK — grantId=${grantId} peerId=${grant.peerId} subjectUserId=${grant.subjectUserId}`,
|
||||||
|
);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
39
apps/gateway/src/federation/server/federation-context.ts
Normal file
39
apps/gateway/src/federation/server/federation-context.ts
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
/**
|
||||||
|
* FederationContext — attached to inbound federation requests after successful
|
||||||
|
* mTLS + grant validation by FederationAuthGuard.
|
||||||
|
*
|
||||||
|
* Downstream verb controllers access this via `request.federationContext`.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Augment FastifyRequest so TypeScript knows about the federation context
|
||||||
|
* property that FederationAuthGuard attaches on success.
|
||||||
|
*/
|
||||||
|
declare module 'fastify' {
|
||||||
|
interface FastifyRequest {
|
||||||
|
federationContext?: FederationContext;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Typed context object attached to the request by FederationAuthGuard.
|
||||||
|
* Carries all data extracted from the mTLS cert + grant DB row needed
|
||||||
|
* by downstream federation verb handlers.
|
||||||
|
*/
|
||||||
|
export interface FederationContext {
|
||||||
|
/** The federation grant ID extracted from OID 1.3.6.1.4.1.99999.1 */
|
||||||
|
grantId: string;
|
||||||
|
|
||||||
|
/** The local subject user whose data is accessible under this grant */
|
||||||
|
subjectUserId: string;
|
||||||
|
|
||||||
|
/** The peer gateway ID (from the grant's peerId FK) */
|
||||||
|
peerId: string;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Grant scope — determines which resources the peer may query.
|
||||||
|
* Typed as Record<string, unknown> because the full scope schema lives in
|
||||||
|
* scope-schema.ts; downstream handlers should narrow via parseFederationScope.
|
||||||
|
*/
|
||||||
|
scope: Record<string, unknown>;
|
||||||
|
}
|
||||||
13
apps/gateway/src/federation/server/index.ts
Normal file
13
apps/gateway/src/federation/server/index.ts
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
/**
|
||||||
|
* Federation server-side barrel — inbound request handling.
|
||||||
|
*
|
||||||
|
* Exports the mTLS auth guard and the FederationContext interface
|
||||||
|
* for use by verb controllers (M3-05/06/07).
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
* import { FederationAuthGuard } from './server/index.js';
|
||||||
|
* @UseGuards(FederationAuthGuard)
|
||||||
|
*/
|
||||||
|
|
||||||
|
export { FederationAuthGuard } from './federation-auth.guard.js';
|
||||||
|
export type { FederationContext } from './federation-context.js';
|
||||||
@@ -7,11 +7,11 @@
|
|||||||
|
|
||||||
**ID:** federation-v1-20260419
|
**ID:** federation-v1-20260419
|
||||||
**Statement:** Jarvis operates across 3–4 workstations in two physical locations (home, USC). The user currently reaches back to a single jarvis-brain checkout from every session; a prior OpenBrain attempt caused cache, latency, and opacity pain. This mission builds asymmetric federation between Mosaic Stack gateways so that a session on a user's home gateway can query their work gateway in real time without data ever persisting across the boundary, with full multi-tenant isolation and standard-PKI (X.509 / Step-CA) trust management.
|
**Statement:** Jarvis operates across 3–4 workstations in two physical locations (home, USC). The user currently reaches back to a single jarvis-brain checkout from every session; a prior OpenBrain attempt caused cache, latency, and opacity pain. This mission builds asymmetric federation between Mosaic Stack gateways so that a session on a user's home gateway can query their work gateway in real time without data ever persisting across the boundary, with full multi-tenant isolation and standard-PKI (X.509 / Step-CA) trust management.
|
||||||
**Phase:** M2 active — Step-CA + grant schema + admin CLI; parallel test-deploy workstream stood up
|
**Phase:** M3 active — mTLS handshake + list/get/capabilities verbs + scope enforcement
|
||||||
**Current Milestone:** FED-M2
|
**Current Milestone:** FED-M3
|
||||||
**Progress:** 1 / 7 milestones
|
**Progress:** 2 / 7 milestones
|
||||||
**Status:** active
|
**Status:** active
|
||||||
**Last Updated:** 2026-04-21 (M2 decomposed; mos-test-1/-2 designated as federation E2E test hosts)
|
**Last Updated:** 2026-04-21 (M2 closed via PR #503, tag `fed-v0.2.0-m2`, issue #461 closed; M3 decomposed into 14 tasks)
|
||||||
**Parent Mission:** None — new mission
|
**Parent Mission:** None — new mission
|
||||||
|
|
||||||
## Test Infrastructure
|
## Test Infrastructure
|
||||||
@@ -63,8 +63,8 @@ Key design references:
|
|||||||
| # | ID | Name | Status | Branch | Issue | Started | Completed |
|
| # | ID | Name | Status | Branch | Issue | Started | Completed |
|
||||||
| --- | ------ | --------------------------------------------- | ----------- | ------------------ | ----- | ---------- | ---------- |
|
| --- | ------ | --------------------------------------------- | ----------- | ------------------ | ----- | ---------- | ---------- |
|
||||||
| 1 | FED-M1 | Federated tier infrastructure | done | (12 PRs #470-#481) | #460 | 2026-04-19 | 2026-04-19 |
|
| 1 | FED-M1 | Federated tier infrastructure | done | (12 PRs #470-#481) | #460 | 2026-04-19 | 2026-04-19 |
|
||||||
| 2 | FED-M2 | Step-CA + grant schema + admin CLI | in-progress | (decomposition) | #461 | 2026-04-21 | — |
|
| 2 | FED-M2 | Step-CA + grant schema + admin CLI | done | (PRs #483-#503) | #461 | 2026-04-21 | 2026-04-21 |
|
||||||
| 3 | FED-M3 | mTLS handshake + list/get + scope enforcement | not-started | — | #462 | — | — |
|
| 3 | FED-M3 | mTLS handshake + list/get + scope enforcement | in-progress | (decomposition) | #462 | 2026-04-21 | — |
|
||||||
| 4 | FED-M4 | search verb + audit log + rate limit | not-started | — | #463 | — | — |
|
| 4 | FED-M4 | search verb + audit log + rate limit | not-started | — | #463 | — | — |
|
||||||
| 5 | FED-M5 | Cache + offline degradation + OTEL | not-started | — | #464 | — | — |
|
| 5 | FED-M5 | Cache + offline degradation + OTEL | not-started | — | #464 | — | — |
|
||||||
| 6 | FED-M6 | Revocation + auto-renewal + CRL | not-started | — | #465 | — | — |
|
| 6 | FED-M6 | Revocation + auto-renewal + CRL | not-started | — | #465 | — | — |
|
||||||
@@ -85,17 +85,24 @@ Key design references:
|
|||||||
|
|
||||||
## Session History
|
## Session History
|
||||||
|
|
||||||
| Session | Date | Runtime | Outcome |
|
| Session | Date | Runtime | Outcome |
|
||||||
| ------- | ---------- | ------- | --------------------------------------------------------------------- |
|
| ------- | ----------------------- | ------- | ------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| S1 | 2026-04-19 | claude | PRD authored, MILESTONES decomposed, 7 issues filed |
|
| S1 | 2026-04-19 | claude | PRD authored, MILESTONES decomposed, 7 issues filed |
|
||||||
| S2-S4 | 2026-04-19 | claude | FED-M1 complete: 12 tasks (PRs #470-#481) merged; tag `fed-v0.1.0-m1` |
|
| S2-S4 | 2026-04-19 | claude | FED-M1 complete: 12 tasks (PRs #470-#481) merged; tag `fed-v0.1.0-m1` |
|
||||||
|
| S5-S22 | 2026-04-19 → 2026-04-21 | claude | FED-M2 complete: 13 tasks (PRs #483-#503) merged; tag `fed-v0.2.0-m2`; issue #461 closed. Step-CA + grant schema + admin CLI shipped. |
|
||||||
|
| S23 | 2026-04-21 | claude | M3 decomposed into 14 tasks in `docs/federation/TASKS.md`. Manifest M3 row → in-progress. Next: kickoff M3-01. |
|
||||||
|
|
||||||
## Next Step
|
## Next Step
|
||||||
|
|
||||||
FED-M2 active. Decomposition landed in `docs/federation/TASKS.md` (M2-01..M2-13 code workstream + DEPLOY-01..DEPLOY-05 parallel test-deploy workstream, ~88K total). Tracking issue #482.
|
FED-M3 active. Decomposition landed in `docs/federation/TASKS.md` (M3-01..M3-14, ~100K estimate). Tracking issue #462.
|
||||||
|
|
||||||
Parallel execution plan:
|
Execution plan (parallel where possible):
|
||||||
|
|
||||||
- **CODE workstream**: M2-01 (DB migration) starts immediately — sonnet subagent on `feat/federation-m2-schema`. Then M2-02 → M2-09 sequentially with M2-04/M2-05/M2-06/M2-07 having interleaved CA/storage/grant dependencies.
|
- **Foundation**: M3-01 (DTOs in `packages/types/src/federation/`) starts immediately — sonnet subagent on `feat/federation-m3-types`. Blocks all server + client work.
|
||||||
- **DEPLOY workstream**: DEPLOY-01 (image verify) → DEPLOY-02 (stack template) → DEPLOY-03/04 (mos-test-1/-2 deploy) → DEPLOY-05 (TEST-INFRA.md). Gated on Portainer wrapper PR (`PORTAINER_INSECURE` flag) merging first.
|
- **Server stream** (after M3-01): M3-03 (AuthGuard) + M3-04 (ScopeService) in series, then M3-05 / M3-06 / M3-07 (verbs) in parallel.
|
||||||
- **Re-converge** at M2-10 (E2E test) once both workstreams ready.
|
- **Client stream** (after M3-01, parallel with server): M3-08 (FederationClient) → M3-09 (QuerySourceService).
|
||||||
|
- **Harness** (parallel with everything): M3-02 (`tools/federation-harness/`) — needed for M3-11.
|
||||||
|
- **Test gates**: M3-10 (Integration) → M3-11 (E2E with harness) → M3-12 (Independent security review, two rounds budgeted).
|
||||||
|
- **Close**: M3-13 (Docs) → M3-14 (release tag `fed-v0.3.0-m3`, close #462).
|
||||||
|
|
||||||
|
**Test-bed fallback:** `mos-test-1/-2` deploy is still blocked on `FED-M2-DEPLOY-IMG-FIX`. The harness in M3-02 ships a local two-gateway docker-compose so M3-11 is not blocked. Production-host validation is M7's responsibility (PRD AC-12).
|
||||||
|
|||||||
@@ -85,7 +85,38 @@ Goal: An admin can create a federation grant; counterparty enrolls; cert is sign
|
|||||||
|
|
||||||
## Milestone 3 — mTLS handshake + list/get + scope enforcement (FED-M3)
|
## Milestone 3 — mTLS handshake + list/get + scope enforcement (FED-M3)
|
||||||
|
|
||||||
_Deferred. Issue #462._
|
Goal: Two federated gateways exchange real data over mTLS. Inbound requests pass through cert validation → grant lookup → scope enforcement → native RBAC → response. `list`, `get`, and `capabilities` verbs land. The federation E2E harness (`tools/federation-harness/`) is the new permanent test bed for M3+ and is gated on every milestone going forward.
|
||||||
|
|
||||||
|
> **Critical trust boundary.** Every 401/403 path needs a test. Code review is non-negotiable; M3-12 budgets two review rounds.
|
||||||
|
>
|
||||||
|
> **Tracking issue:** #462.
|
||||||
|
|
||||||
|
| id | status | description | issue | agent | branch | depends_on | estimate | notes |
|
||||||
|
| --------- | ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----- | ------ | ------------------------------------ | ---------------- | -------- | -------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
|
| FED-M3-01 | not-started | `packages/types/src/federation/` — request/response DTOs for `list`, `get`, `capabilities` verbs. Wire-format zod schemas + inferred TS types. Includes `FederationRequest`, `FederationListResponse<T>`, `FederationGetResponse<T>`, `FederationCapabilitiesResponse`, error envelope, `_source` tag. | #462 | sonnet | feat/federation-m3-types | — | 4K | Reusable from gateway server + client + harness. Pure types — no I/O, no NestJS. |
|
||||||
|
| FED-M3-02 | not-started | `tools/federation-harness/` scaffold: `docker-compose.two-gateways.yml` (Server A + Server B + step-CA), `seed.ts` (provisions grants, peers, sample tasks/notes/credentials per scope variant), `harness.ts` helper (boots stack, returns typed clients). README documents harness use. | #462 | sonnet | feat/federation-m3-harness | DEPLOY-04 (soft) | 8K | Falls back to local docker-compose if `mos-test-1/-2` not yet redeployed (DEPLOY chain blocked on IMG-FIX). Permanent test infra used by M3+. |
|
||||||
|
| FED-M3-03 | not-started | `apps/gateway/src/federation/server/federation-auth.guard.ts` (NestJS guard). Validates inbound client cert from Fastify TLS context, extracts `grantId` + `subjectUserId` from custom OIDs, loads grant from DB, asserts `status='active'`, attaches `FederationContext` to request. | #462 | sonnet | feat/federation-m3-auth-guard | M3-01 | 8K | Reuses OID parsing logic mirrored from `ca.service.ts` post-issuance verification. 401 on malformed/missing OIDs; 403 on revoked/expired/missing grant. |
|
||||||
|
| FED-M3-04 | not-started | `apps/gateway/src/federation/server/scope.service.ts`. Pipeline: (1) resource allowlist + excluded check, (2) native RBAC eval as `subjectUserId`, (3) scope filter intersection (`include_teams`, `include_personal`), (4) `max_rows_per_query` cap. Pure service — DB calls injected. | #462 | sonnet | feat/federation-m3-scope-service | M3-01 | 10K | Hardest correctness target in M3. Reuses `parseFederationScope` (M2-03). Returns either `{ allowed: true, filter }` or structured deny reason for audit. |
|
||||||
|
| FED-M3-05 | not-started | `apps/gateway/src/federation/server/verbs/list.controller.ts`. Wires AuthGuard → ScopeService → tasks/notes/memory query layer; applies row cap; tags rows with `_source`. Resource selector via path param. | #462 | sonnet | feat/federation-m3-verb-list | M3-03, M3-04 | 6K | Routes: `POST /api/federation/v1/list/:resource`. No body persistence. Audit write deferred to M4. |
|
||||||
|
| FED-M3-06 | not-started | `apps/gateway/src/federation/server/verbs/get.controller.ts`. Single-resource fetch by id; same pipeline as list. 404 on not-found, 403 on RBAC/scope deny — both audited the same way. | #462 | sonnet | feat/federation-m3-verb-get | M3-03, M3-04 | 6K | `POST /api/federation/v1/get/:resource/:id`. Mirrors list controller patterns. |
|
||||||
|
| FED-M3-07 | not-started | `apps/gateway/src/federation/server/verbs/capabilities.controller.ts`. Read-only enumeration: returns `{ resources, excluded_resources, max_rows_per_query, supported_verbs }` derived from grant scope. Always allowed for an active grant — no RBAC eval. | #462 | sonnet | feat/federation-m3-verb-capabilities | M3-03 | 4K | `GET /api/federation/v1/capabilities`. Smallest verb; useful sanity check that mTLS + auth guard work end-to-end. |
|
||||||
|
| FED-M3-08 | not-started | `apps/gateway/src/federation/client/federation-client.service.ts`. Outbound mTLS dialer: picks `(certPem, sealed clientKey)` from `federation_peers`, unwraps key, builds undici Agent with mTLS, calls peer verb, parses typed response, wraps non-2xx into `FederationClientError`. | #462 | sonnet | feat/federation-m3-client | M3-01 | 8K | Independent of server stream — can land in parallel with M3-03/04. Cert/key cached per-peer; flushed by future M5/M6 logic. |
|
||||||
|
| FED-M3-09 | not-started | `apps/gateway/src/federation/client/query-source.service.ts`. Accepts `source: "local" \| "federated:<host>" \| "all"` from gateway query layer; for `"all"` fans out to local + each peer in parallel; merges results; tags every row with `_source`. | #462 | sonnet | feat/federation-m3-query-source | M3-08 | 8K | Per-peer failure surfaces as `_partial: true` in response, not hard failure (sets up M5 offline UX). M5 adds caching + circuit breaker on top. |
|
||||||
|
| FED-M3-10 | not-started | Integration tests for MILESTONES.md M3 acceptance #6 (malformed OIDs → 401; valid cert + revoked grant → 403) and #7 (`max_rows_per_query` cap). Real PG, mocked TLS context (Fastify req shim). | #462 | sonnet | feat/federation-m3-integration | M3-05, M3-06 | 8K | Vitest profile gated by `FEDERATED_INTEGRATION=1`. Single-gateway suite; no harness required. |
|
||||||
|
| FED-M3-11 | not-started | E2E tests for MILESTONES.md M3 acceptance #1, #2, #3, #4, #5, #8, #9, #10 (8 cases). Uses harness from M3-02; two real gateways, real Step-CA, real mTLS. Each test asserts both happy-path response and audit/no-persist invariants. | #462 | sonnet | feat/federation-m3-e2e | M3-02, M3-09 | 12K | Largest single task. Each acceptance gets its own `it(...)` for clear failure attribution. |
|
||||||
|
| FED-M3-12 | not-started | Independent security review (sonnet, not author of M3-03/04/05/06/07/08/09): focus on cert-SAN spoofing, OID extraction edge cases, scope-bypass via filter manipulation, RBAC-bypass via subjectUser swap, response leakage when scope deny. | #462 | sonnet | feat/federation-m3-security-review | M3-11 | 10K | Two review rounds budgeted. PRD requires explicit test for every 401/403 path — review verifies coverage. |
|
||||||
|
| FED-M3-13 | not-started | Docs update: `docs/federation/SETUP.md` mTLS handshake section, new `docs/federation/HARNESS.md` for federation-harness usage, OID reference table in SETUP.md, scope enforcement pipeline diagram. Runbook still M7-deferred. | #462 | haiku | feat/federation-m3-docs | M3-12 | 5K | One ASCII diagram for the auth-guard → scope → RBAC pipeline; helps future reviewers reason about denial paths. |
|
||||||
|
| FED-M3-14 | not-started | PR aggregate close, CI green, merge to main, close #462. Release tag `fed-v0.3.0-m3`. Update mission manifest M3 row → done; M4 row → in-progress when work begins. | #462 | sonnet | chore/federation-m3-close | M3-13 | 3K | Same close pattern as M1-12 / M2-13. |
|
||||||
|
|
||||||
|
**M3 estimate:** ~100K tokens (vs MILESTONES.md 40K — same per-task breakdown pattern as M1/M2: tests, review, and docs split out from implementation cost). Largest milestone in the federation mission.
|
||||||
|
|
||||||
|
**Parallelization opportunities:**
|
||||||
|
|
||||||
|
- M3-08 (client) can land in parallel with M3-03/M3-04 (server pipeline) — they only share DTOs from M3-01.
|
||||||
|
- M3-02 (harness) can land in parallel with everything except M3-11.
|
||||||
|
- M3-05/M3-06/M3-07 (verbs) are independent of each other once M3-03/M3-04 land.
|
||||||
|
|
||||||
|
**Test bed fallback:** If `mos-test-1.woltje.com` / `mos-test-2.woltje.com` are still blocked on `FED-M2-DEPLOY-IMG-FIX` when M3-11 is ready to run, the harness's local `docker-compose.two-gateways.yml` is a sufficient stand-in. Production-host validation moves to M7 acceptance suite (PRD AC-12).
|
||||||
|
|
||||||
## Milestone 4 — search + audit + rate limit (FED-M4)
|
## Milestone 4 — search + audit + rate limit (FED-M4)
|
||||||
|
|
||||||
|
|||||||
99
docs/mission-control/BOARD.md
Normal file
99
docs/mission-control/BOARD.md
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
# Mission Control Plane — Feature Board
|
||||||
|
|
||||||
|
> Discussion board for the combined PRD / mission / Kanban workflow.
|
||||||
|
> Use this to decide scope before implementation.
|
||||||
|
|
||||||
|
## Board Legend
|
||||||
|
|
||||||
|
- **Must-have** — required for the first usable version
|
||||||
|
- **Should-have** — strongly preferred, but can ship after the core path
|
||||||
|
- **Could-have** — valuable later if time permits
|
||||||
|
- **Won't-have** — explicitly deferred
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Feature Board
|
||||||
|
|
||||||
|
| Feature Card | Need | Priority | Decision / Notes |
|
||||||
|
| --- | --- | --- | --- |
|
||||||
|
| Canonical mission manifest | One durable root object for goal, PRD, board, session | Must-have | Mission manifest becomes the anchor for all downstream state |
|
||||||
|
| PRD generator integration | PRD should be generated from a feature idea and saved in docs | Must-have | Use Mosaic PRDy format and keep the file human-reviewable |
|
||||||
|
| Board atomization | Break PRD into assignable tasks with dependencies | Must-have | Each user story should map to one or more tasks |
|
||||||
|
| Short-cycle detector | Detect compaction churn and repeated tool loops | Must-have | Coordinator should track churn score per session |
|
||||||
|
| Handoff packet | Preserve actionable context across rotations | Must-have | Use a compact structured summary, not a raw transcript |
|
||||||
|
| Auto-resume workers | Let new sessions read mission + board on start | Should-have | Makes overnight autonomy realistic |
|
||||||
|
| Mission status view | Show current phase, blockers, and active session | Should-have | Expose through CLI first, dashboard later |
|
||||||
|
| Worktree root convention | Keep worktrees off `/tmp` and on the larger persistent drive | Should-have | Prefer `/src/<repo>-worktrees` for repo worktrees and long-lived agent work |
|
||||||
|
| Review gate | Prevent autonomous work from shipping unreviewed | Should-have | Use reviewer tasks before mission close |
|
||||||
|
| Rotation policy config | Configure thresholds per mission/profile | Could-have | Keep v1 simple, add tuning later |
|
||||||
|
| Goal decomposition suggestions | Suggest sub-goals from the PRD | Could-have | Good for planning, not necessary for core path |
|
||||||
|
| Cross-channel continuity | Continue a mission across CLI/gateway/remote channels | Could-have | Important later, not required for MVP |
|
||||||
|
| Automatic board sync | Mirror git docs into DB and back | Could-have | Nice-to-have after the file-first flow stabilizes |
|
||||||
|
| Fully autonomous closeout | Let mission finish without human intervention | Won't-have | Keep an operator-visible review step |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Needs Discussion
|
||||||
|
|
||||||
|
### 1) Canonical source of truth
|
||||||
|
|
||||||
|
**Question:** Should the PRD, mission manifest, and board all live in git, or should one be the database source of truth?
|
||||||
|
|
||||||
|
**Proposed answer:** Keep the human-readable artifacts in git and sync the mission runtime state to the database.
|
||||||
|
|
||||||
|
### 2) Scope of automation
|
||||||
|
|
||||||
|
**Question:** Should the first version auto-create the board from the PRD, or require a human/orchestrator to approve the split?
|
||||||
|
|
||||||
|
**Proposed answer:** Auto-create a draft board, then let the orchestrator approve or adjust it.
|
||||||
|
|
||||||
|
### 3) Rotation triggers
|
||||||
|
|
||||||
|
**Question:** What should trigger a forced session rotation?
|
||||||
|
|
||||||
|
**Candidate signals:**
|
||||||
|
- repeated compaction
|
||||||
|
- repeated prompts for permission
|
||||||
|
- identical tool loops
|
||||||
|
- no new file/task state after several turns
|
||||||
|
- task blocked on a missing prerequisite
|
||||||
|
|
||||||
|
**Proposed answer:** Use a weighted churn score with a small hard cap on repeated compactions.
|
||||||
|
|
||||||
|
### 4) Handoff format
|
||||||
|
|
||||||
|
**Question:** What should the next session receive?
|
||||||
|
|
||||||
|
**Proposed answer:**
|
||||||
|
- Mission ID
|
||||||
|
- PRD path
|
||||||
|
- Active board task
|
||||||
|
- Completed work
|
||||||
|
- Blockers
|
||||||
|
- Next 3 actions
|
||||||
|
- Non-negotiable constraints
|
||||||
|
|
||||||
|
### 5) Operator control
|
||||||
|
|
||||||
|
**Question:** Should the operator be able to force a rotation or pause the mission?
|
||||||
|
|
||||||
|
**Proposed answer:** Yes. Human override should win.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Draft Decisions
|
||||||
|
|
||||||
|
1. File-first artifacts, DB-backed runtime state.
|
||||||
|
2. PRD-first planning, board-second execution.
|
||||||
|
3. Auto-rotation on churn, but human override remains available.
|
||||||
|
4. Structured handoff packets required on every rotation.
|
||||||
|
5. Mission close requires a reviewer task.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Open Questions
|
||||||
|
|
||||||
|
- What exact data fields belong in the mission manifest?
|
||||||
|
- Should rotation thresholds vary by agent profile?
|
||||||
|
- What is the minimum viable status surface for v1?
|
||||||
|
- Should the board support milestones in addition to tasks?
|
||||||
95
docs/mission-control/MISSION-MANIFEST.md
Normal file
95
docs/mission-control/MISSION-MANIFEST.md
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
# Mission Manifest — Mosaic Mission Control Plane
|
||||||
|
|
||||||
|
> Persistent document tracking scope, status, and handoff history for the combined PRD / mission / Kanban workflow.
|
||||||
|
|
||||||
|
## Mission
|
||||||
|
|
||||||
|
**ID:** mission-control-plane-20260506
|
||||||
|
|
||||||
|
**Statement:** Combine Mosaic PRDy, coord, and Kanban into one durable workflow so an agent can move from feature idea to PRD to mission to task board and keep working across session rotation, compaction, and restarts with minimal context loss.
|
||||||
|
|
||||||
|
**Phase:** planning — MC-01 complete, MC-02 next
|
||||||
|
|
||||||
|
**Current Milestone:** MC-02
|
||||||
|
|
||||||
|
**Progress:** 1 / 6 milestones
|
||||||
|
|
||||||
|
**Status:** active
|
||||||
|
|
||||||
|
**Last Updated:** 2026-05-06
|
||||||
|
|
||||||
|
**Parent Mission:** None — new mission
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Context
|
||||||
|
|
||||||
|
This mission exists because overnight autonomy breaks when the working session short-cycles. The system needs durable artifacts and a mechanical coordinator that can:
|
||||||
|
|
||||||
|
1. keep a canonical PRD,
|
||||||
|
2. atomize the PRD into board tasks,
|
||||||
|
3. track mission state separately from the chat session,
|
||||||
|
4. detect churn or compaction pressure,
|
||||||
|
5. rotate to a fresh session, and
|
||||||
|
6. re-enter from a structured handoff.
|
||||||
|
|
||||||
|
Operational convention: repo worktrees and long-lived working directories should use `/src/<repo>-worktrees` instead of `/tmp`.
|
||||||
|
|
||||||
|
Design references:
|
||||||
|
|
||||||
|
- `docs/mission-control/PRD.md` — product requirements
|
||||||
|
- `docs/mission-control/BOARD.md` — feature discussion board
|
||||||
|
- `docs/mission-control/TASKS.md` — atomized execution plan
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Success Criteria
|
||||||
|
|
||||||
|
- [ ] AC-1: A feature idea can be converted into a PRD, mission, and task board.
|
||||||
|
- [ ] AC-2: The coordinator can load a mission and its board from durable storage.
|
||||||
|
- [ ] AC-3: The coordinator can detect short-cycling and rotate sessions automatically.
|
||||||
|
- [ ] AC-4: A rotated session can resume from a handoff packet without manual re-prompting.
|
||||||
|
- [ ] AC-5: The board remains traceable back to the PRD user stories.
|
||||||
|
- [ ] AC-6: Operators can inspect mission state, task state, and latest handoff from one place.
|
||||||
|
- [ ] AC-7: The system can run overnight without losing the mission goal.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Milestones
|
||||||
|
|
||||||
|
| # | ID | Name | Status | Branch | Started | Completed |
|
||||||
|
| --- | --- | --- | --- | --- | --- | --- |
|
||||||
|
| 1 | MC-01 | PRD + mission schema foundation | in-progress | docs/mission-control-* | 2026-05-06 | — |
|
||||||
|
| 2 | MC-02 | Mission runtime model | not-started | — | — | — |
|
||||||
|
| 3 | MC-03 | Board atomization and task linkage | not-started | — | — | — |
|
||||||
|
| 4 | MC-04 | Short-cycle detector and rotation engine | not-started | — | — | — |
|
||||||
|
| 5 | MC-05 | Handoff generation and re-entry | not-started | — | — | — |
|
||||||
|
| 6 | MC-06 | Operator surface and E2E validation | not-started | — | — | — |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Budget
|
||||||
|
|
||||||
|
| Milestone | Est. tokens | Parallelizable? |
|
||||||
|
| --- | --- | --- |
|
||||||
|
| MC-01 | 16K | No |
|
||||||
|
| MC-02 | 20K | No |
|
||||||
|
| MC-03 | 24K | Mostly after MC-01 |
|
||||||
|
| MC-04 | 20K | After MC-02 |
|
||||||
|
| MC-05 | 18K | After MC-04 |
|
||||||
|
| MC-06 | 26K | After MC-04/05 |
|
||||||
|
| **Total** | **~124K** | |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Session History
|
||||||
|
|
||||||
|
| Session | Date | Runtime | Outcome |
|
||||||
|
| --- | --- | --- | --- |
|
||||||
|
| S1 | 2026-05-06 | hermes | PRD, board, task plan, mission manifest, and worktree convention drafted |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Next Step
|
||||||
|
|
||||||
|
Kick off MC-02: implement the durable mission runtime model and wire the mission state into the coordinator.
|
||||||
200
docs/mission-control/PRD.md
Normal file
200
docs/mission-control/PRD.md
Normal file
@@ -0,0 +1,200 @@
|
|||||||
|
# PRD: Mosaic Mission Control Plane
|
||||||
|
|
||||||
|
## Metadata
|
||||||
|
|
||||||
|
- **Owner:** Jason Woltje
|
||||||
|
- **Date:** 2026-05-06
|
||||||
|
- **Status:** draft
|
||||||
|
- **Framework:** Mosaic PRDy + coord + Kanban
|
||||||
|
- **Target Repo:** `git.mosaicstack.dev/mosaic/mosaic-stack`
|
||||||
|
- **Primary Modules:** `packages/prdy`, `packages/coord`, `packages/queue`, `apps/gateway`, `packages/brain`, `packages/cli`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Problem Statement
|
||||||
|
|
||||||
|
Mosaic already has the ingredients for durable agent work: PRD generation (`prdy`), mission coordination (`coord`), and task execution boards (`Kanban` / `TASKS.md`). Today those systems can still drift apart:
|
||||||
|
|
||||||
|
- A PRD can exist without a mission record.
|
||||||
|
- A mission can exist without a machine-readable execution board.
|
||||||
|
- Agents can short-cycle or compact repeatedly without a durable handoff.
|
||||||
|
- The next session may know the goal, but not the exact next step.
|
||||||
|
|
||||||
|
The result is brittle overnight autonomy: work continues only as long as a single session remains healthy.
|
||||||
|
|
||||||
|
This feature unifies those layers into one durable workflow so a mission can survive session rotation, compaction, and restarts with minimal state loss.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Goals
|
||||||
|
|
||||||
|
1. Create one canonical pipeline from idea → PRD → mission → board → execution.
|
||||||
|
2. Let `prdy` generate a PRD that is immediately usable as a mission input.
|
||||||
|
3. Let `coord` own mission state, handoffs, and session rotation.
|
||||||
|
4. Let the board hold atomized tasks with dependencies and assignees.
|
||||||
|
5. Let agents read the mission and board to learn the next action without extra prompting.
|
||||||
|
6. Detect short-cycling and rotate sessions before quality degrades.
|
||||||
|
7. Preserve useful context across handoffs with a structured summary packet.
|
||||||
|
8. Give operators a single place to see mission status, task state, and the current session.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Non-Goals
|
||||||
|
|
||||||
|
1. Replacing the Mosaic agent runtime or gateway architecture.
|
||||||
|
2. Rewriting `prdy` or `coord` from scratch.
|
||||||
|
3. Turning the board into a general project-management system.
|
||||||
|
4. Building a full Gantt/charting product.
|
||||||
|
5. Removing human review or approval gates.
|
||||||
|
6. Allowing agents to create arbitrary mission state without schema.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## User Stories
|
||||||
|
|
||||||
|
### US-001: Create a mission from a feature idea
|
||||||
|
|
||||||
|
**Description:** As an orchestrator, I want to turn a feature idea into a PRD and mission so that agents can work from a durable spec instead of a chat transcript.
|
||||||
|
|
||||||
|
**Acceptance Criteria:**
|
||||||
|
- [ ] `prdy` can emit a PRD with goals, non-goals, and requirements.
|
||||||
|
- [ ] The PRD is linked to a mission ID.
|
||||||
|
- [ ] The mission manifest references the PRD path.
|
||||||
|
- [ ] The mission is readable by downstream agent sessions.
|
||||||
|
|
||||||
|
### US-002: Atomize work into a board
|
||||||
|
|
||||||
|
**Description:** As an orchestrator, I want to split a PRD into board tasks so that work can be assigned to specialists.
|
||||||
|
|
||||||
|
**Acceptance Criteria:**
|
||||||
|
- [ ] Each user story can become one or more tasks.
|
||||||
|
- [ ] Tasks have assignees, dependencies, and estimates.
|
||||||
|
- [ ] Tasks are machine-readable and durable.
|
||||||
|
- [ ] The board can be regenerated from the PRD without ambiguity.
|
||||||
|
|
||||||
|
### US-003: Rotate sessions without losing the mission
|
||||||
|
|
||||||
|
**Description:** As a coordinator, I want to restart or rotate a session when it short-cycles so that the mission continues with minimal loss.
|
||||||
|
|
||||||
|
**Acceptance Criteria:**
|
||||||
|
- [ ] The coordinator detects compaction pressure or repeated loops.
|
||||||
|
- [ ] The coordinator writes a handoff summary before rotation.
|
||||||
|
- [ ] A new session can resume from the handoff packet.
|
||||||
|
- [ ] The mission state remains intact across the rotation.
|
||||||
|
|
||||||
|
### US-004: Let workers read the next step automatically
|
||||||
|
|
||||||
|
**Description:** As a worker agent, I want to read the mission and board at startup so I can do the next useful thing without waiting for a human prompt.
|
||||||
|
|
||||||
|
**Acceptance Criteria:**
|
||||||
|
- [ ] Startup loads the active mission manifest.
|
||||||
|
- [ ] Startup loads the current board/task row.
|
||||||
|
- [ ] Startup exposes the next action clearly in the prompt.
|
||||||
|
- [ ] The agent can continue after compaction using the same mission context.
|
||||||
|
|
||||||
|
### US-005: Observe mission health from one place
|
||||||
|
|
||||||
|
**Description:** As an operator, I want a single view of mission health so that I can see progress, blocked tasks, and session churn.
|
||||||
|
|
||||||
|
**Acceptance Criteria:**
|
||||||
|
- [ ] Mission state shows current phase and progress.
|
||||||
|
- [ ] Board state shows task status by assignee.
|
||||||
|
- [ ] Short-cycle/rotation events are visible.
|
||||||
|
- [ ] Handoffs are inspectable.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Functional Requirements
|
||||||
|
|
||||||
|
FR-1. The system must represent a mission as a durable object with an ID, goal, current phase, PRD path, board path, and active session ID.
|
||||||
|
|
||||||
|
FR-2. The system must represent a PRD as a markdown document with goals, user stories, functional requirements, non-goals, technical considerations, and success metrics.
|
||||||
|
|
||||||
|
FR-3. The system must represent execution work as a board of atomized tasks with status, assignee, dependency, and estimate fields.
|
||||||
|
|
||||||
|
FR-4. The coordinator must be able to derive a task board from a PRD.
|
||||||
|
|
||||||
|
FR-5. The coordinator must be able to write a handoff packet that includes goal, current state, completed work, blocked work, next steps, and constraints.
|
||||||
|
|
||||||
|
FR-6. The coordinator must detect short-cycling signals such as repeated compactions, repeated tool loops, repeated approval prompts, or no progress across several turns.
|
||||||
|
|
||||||
|
FR-7. The coordinator must rotate the session when the short-cycle threshold is exceeded.
|
||||||
|
|
||||||
|
FR-8. The coordinator must preserve mission continuity across session rotation.
|
||||||
|
|
||||||
|
FR-9. The worker session must read the mission state and board state at startup.
|
||||||
|
|
||||||
|
FR-10. The worker session must be able to resume from the last handoff summary without the operator rewriting the goal manually.
|
||||||
|
|
||||||
|
FR-11. The operator must be able to inspect the mission state, PRD, board, and latest handoff from one place.
|
||||||
|
|
||||||
|
FR-12. The mission system must keep a traceable link between PRD requirements and board tasks.
|
||||||
|
|
||||||
|
FR-13. The system must not allow a task to become active without a valid mission context.
|
||||||
|
|
||||||
|
FR-14. The system must keep durable history for rotation and handoff events.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Board Discussion: Features and Needs
|
||||||
|
|
||||||
|
This is the feature discussion board that should drive the mission design.
|
||||||
|
|
||||||
|
| Card | Need | Why it matters | Proposed decision |
|
||||||
|
| --- | --- | --- | --- |
|
||||||
|
| Canonical mission record | One source of truth for goal/state | Prevents drift between chat, docs, and queue | Make mission manifest the durable root object |
|
||||||
|
| PRD → board derivation | Break feature ideas into executable work | Lets the plan be assigned and tracked | Keep PRD as the spec, generate board tasks from user stories |
|
||||||
|
| Session watchdog | Detect churn/short-cycling | Keeps overnight runs productive | Add short-cycle scoring and forced rotation |
|
||||||
|
| Structured handoff | Preserve context across session changes | Minimizes restart loss | Use a compact JSON/MD handoff packet |
|
||||||
|
| Worker auto-read | Let agents resume without human re-prompting | Reduces operator overhead | Load mission + board on session start |
|
||||||
|
| Status surface | Show progress and blockers clearly | Operators need confidence | Expose mission state via CLI and dashboard |
|
||||||
|
| Review gate | Keep quality high on autonomous work | Prevents silent regressions | Require review tasks before close |
|
||||||
|
| Recoverability | Resume after failure or restart | Mission should outlive a process | Persist session and handoff history |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Design Considerations
|
||||||
|
|
||||||
|
1. The PRD should stay human-readable markdown, because the board and mission references need to be reviewable in git.
|
||||||
|
2. The board should be machine-readable enough for automation but still readable by humans.
|
||||||
|
3. The mission manifest should point to the PRD and board, not duplicate them.
|
||||||
|
4. Handoff packets should be compact and structured so they can be injected into a new session with minimal token cost.
|
||||||
|
5. The coordinator should prefer rotation over forced context growth once the session is near the compaction threshold.
|
||||||
|
6. Existing Mosaic commands should be extended, not replaced, wherever possible.
|
||||||
|
7. The same mission should be resumable across CLI, gateway, and remote channels.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Technical Considerations
|
||||||
|
|
||||||
|
- Likely storage split:
|
||||||
|
- PRD/board/manifest in git-backed docs
|
||||||
|
- mission/session state in the Mosaic data layer
|
||||||
|
- runtime health in queue/session state
|
||||||
|
- Worktrees and long-lived agent working directories should live under `/src/<repo>-worktrees` rather than `/tmp` so they sit on the larger persistent drive and survive longer-running missions.
|
||||||
|
- The coordinator needs a stable session identity, even if the active session changes.
|
||||||
|
- Task dependencies must be enforced so workers do not start early.
|
||||||
|
- The handoff packet should include the top 3 immediate actions and the strongest constraints.
|
||||||
|
- Rotation triggers should be configurable per profile or per mission.
|
||||||
|
- The initial version can be file-first, with dashboard sync added later.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Success Metrics
|
||||||
|
|
||||||
|
- A mission can rotate sessions without losing the active goal.
|
||||||
|
- A new session can resume from the latest handoff in under one turn.
|
||||||
|
- Board tasks remain aligned to PRD user stories.
|
||||||
|
- Short-cycling sessions are replaced before repeated compaction harms quality.
|
||||||
|
- Operators can find mission state without spelunking across multiple chat logs.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Open Questions
|
||||||
|
|
||||||
|
1. What should the canonical mission ID format be?
|
||||||
|
2. Should the board live only in git, or also in the database?
|
||||||
|
3. Should rotation be automatic by default, or opt-in per mission?
|
||||||
|
4. What should the short-cycle threshold be initially?
|
||||||
|
5. Should handoffs be pure text, structured JSON, or both?
|
||||||
|
6. Which CLI command should be the primary mission entrypoint: `mosaic mission`, `mosaic coord`, or `mosaic prdy`?
|
||||||
113
docs/mission-control/TASKS.md
Normal file
113
docs/mission-control/TASKS.md
Normal file
@@ -0,0 +1,113 @@
|
|||||||
|
# Tasks — Mosaic Mission Control Plane
|
||||||
|
|
||||||
|
> Single-writer: orchestrator only. Workers read but never modify.
|
||||||
|
>
|
||||||
|
> **Mission:** mission-control-plane-20260506
|
||||||
|
> **Schema:** `| id | status | description | issue | agent | branch | depends_on | estimate | notes |`
|
||||||
|
> **Status values:** `not-started` | `in-progress` | `done` | `blocked` | `failed` | `needs-qa`
|
||||||
|
> **Agent values:** `codex` | `glm-5.1` | `haiku` | `sonnet` | `opus` | `—` (auto)
|
||||||
|
>
|
||||||
|
> Scope: this file decomposes the combined PRD / mission / board workflow into atomized tasks.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Milestone 1 — PRD + mission schema foundation
|
||||||
|
|
||||||
|
Goal: create the durable doc structure and the minimal mission metadata needed to keep PRD, board, and mission aligned.
|
||||||
|
|
||||||
|
| id | status | description | issue | agent | branch | depends_on | estimate | notes |
|
||||||
|
| --- | --- | --- | --- | --- | --- | --- | --- | --- |
|
||||||
|
| MC-01-01 | not-started | Write `docs/mission-control/PRD.md` with goals, non-goals, functional requirements, and success metrics. | — | sonnet | docs/mission-control-prd | — | 5K | Human-readable PRD becomes the spec anchor. |
|
||||||
|
| MC-01-02 | not-started | Write `docs/mission-control/BOARD.md` as a decision board for scope, priority, and open questions. | — | haiku | docs/mission-control-board | MC-01-01 | 3K | Keeps discussion separate from the spec. |
|
||||||
|
| MC-01-03 | not-started | Write `docs/mission-control/MISSION-MANIFEST.md` linking PRD, board, tasks, and mission identity. | — | sonnet | docs/mission-control-manifest | MC-01-01, MC-01-02 | 4K | Durable mission root object. |
|
||||||
|
| MC-01-04 | not-started | Write `docs/mission-control/TASKS.md` with the atomized execution plan and dependency graph. | — | sonnet | docs/mission-control-tasks | MC-01-03 | 4K | Board-backed execution plan. |
|
||||||
|
|
||||||
|
**Milestone 1 estimate:** ~16K tokens
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Milestone 2 — Mission runtime model
|
||||||
|
|
||||||
|
Goal: make missions first-class runtime objects that can survive session restarts and compaction.
|
||||||
|
|
||||||
|
| id | status | description | issue | agent | branch | depends_on | estimate | notes |
|
||||||
|
| --- | --- | --- | --- | --- | --- | --- | --- | --- |
|
||||||
|
| MC-02-01 | not-started | Define mission schema in the data layer: mission ID, goal, phase, PRD path, board path, active session ID, last handoff, and churn score. | — | codex | feat/mission-control-schema | MC-01-03 | 6K | This is the durable root state. |
|
||||||
|
| MC-02-02 | not-started | Add mission read/write services to `packages/coord` so the coordinator can load and persist mission state. | — | codex | feat/mission-control-coord-store | MC-02-01 | 6K | Keep storage simple and explicit. |
|
||||||
|
| MC-02-03 | not-started | Add mission status reporting to `mosaic mission` and `mosaic coord status`. | — | codex | feat/mission-control-status-cli | MC-02-02 | 4K | Operators need one obvious status command. |
|
||||||
|
| MC-02-04 | not-started | Add tests for mission persistence and recovery after restart. | — | haiku | feat/mission-control-persistence-tests | MC-02-02 | 4K | Verify mission survives process churn. |
|
||||||
|
|| MC-02-05 | done | Add a worktree-root convention to the mission runtime notes and startup guidance so agents prefer `/src/<repo>-worktrees` over `/tmp`. | — | haiku | docs/mission-control-worktree-root | MC-01-03 | 3K | Keep long-lived work on the larger persistent drive. |
|
||||||
|
|
||||||
|
**Milestone 2 estimate:** ~20K tokens
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Milestone 3 — Board atomization and task linkage
|
||||||
|
|
||||||
|
Goal: derive assignable tasks from the PRD and keep them linked to mission state.
|
||||||
|
|
||||||
|
| id | status | description | issue | agent | branch | depends_on | estimate | notes |
|
||||||
|
| --- | --- | --- | --- | --- | --- | --- | --- | --- |
|
||||||
|
| MC-03-01 | not-started | Add a PRD-to-task decomposition rule set: every user story maps to one or more board tasks. | — | sonnet | feat/mission-control-decompose | MC-01-01 | 5K | Start simple and deterministic. |
|
||||||
|
| MC-03-02 | not-started | Implement board generation from the PRD in a machine-readable format. | — | codex | feat/mission-control-board-gen | MC-03-01 | 6K | Output should be usable by the coordinator. |
|
||||||
|
| MC-03-03 | not-started | Add dependency validation so tasks cannot start before parent tasks complete. | — | codex | feat/mission-control-deps | MC-03-02 | 5K | Enforces ordering. |
|
||||||
|
| MC-03-04 | not-started | Add review-task support so a mission cannot close without a reviewer step. | — | sonnet | feat/mission-control-review-gate | MC-03-03 | 4K | Preserves quality. |
|
||||||
|
| MC-03-05 | not-started | Add tests proving the board stays traceable back to the PRD user stories. | — | haiku | feat/mission-control-trace-tests | MC-03-02, MC-03-03 | 4K | Traceability is the point. |
|
||||||
|
|
||||||
|
**Milestone 3 estimate:** ~24K tokens
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Milestone 4 — Short-cycle detector and rotation engine
|
||||||
|
|
||||||
|
Goal: detect when a session is stuck and rotate to a fresh session before quality falls off.
|
||||||
|
|
||||||
|
| id | status | description | issue | agent | branch | depends_on | estimate | notes |
|
||||||
|
| --- | --- | --- | --- | --- | --- | --- | --- | --- |
|
||||||
|
| MC-04-01 | not-started | Define churn signals: repeated compaction, identical tool loops, repeated permission prompts, and no progress across several turns. | — | sonnet | feat/mission-control-churn-signals | MC-02-01 | 4K | Keep the rules explicit. |
|
||||||
|
| MC-04-02 | not-started | Implement churn scoring in the coordinator with configurable thresholds. | — | codex | feat/mission-control-churn-score | MC-04-01 | 6K | Weighted score makes tuning easier. |
|
||||||
|
| MC-04-03 | not-started | Implement automatic session rotation when churn crosses the threshold. | — | codex | feat/mission-control-rotate-session | MC-04-02 | 6K | The session is disposable; the mission is not. |
|
||||||
|
| MC-04-04 | not-started | Add tests for rotation triggers and for avoiding premature rotation. | — | haiku | feat/mission-control-rotation-tests | MC-04-03 | 4K | Prevent flapping. |
|
||||||
|
|
||||||
|
**Milestone 4 estimate:** ~20K tokens
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Milestone 5 — Handoff generation and re-entry
|
||||||
|
|
||||||
|
Goal: preserve the best context from the old session and inject it into the new session cleanly.
|
||||||
|
|
||||||
|
| id | status | description | issue | agent | branch | depends_on | estimate | notes |
|
||||||
|
| --- | --- | --- | --- | --- | --- | --- | --- | --- |
|
||||||
|
| MC-05-01 | not-started | Define the handoff packet schema: mission ID, session ID, completed work, blockers, next 3 actions, and constraints. | — | sonnet | feat/mission-control-handoff-schema | MC-02-01 | 4K | Keep it compact and structured. |
|
||||||
|
| MC-05-02 | not-started | Implement handoff packet writing during rotation. | — | codex | feat/mission-control-handoff-write | MC-05-01, MC-04-03 | 5K | Persist before the old session exits. |
|
||||||
|
| MC-05-03 | not-started | Implement handoff packet loading at session startup. | — | codex | feat/mission-control-handoff-load | MC-05-01, MC-04-03 | 5K | New session should know the next action. |
|
||||||
|
| MC-05-04 | not-started | Add tests proving a rotated session can continue the mission without manual re-prompting. | — | haiku | feat/mission-control-handoff-tests | MC-05-02, MC-05-03 | 4K | Resume quality is the key metric. |
|
||||||
|
|
||||||
|
**Milestone 5 estimate:** ~18K tokens
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Milestone 6 — Operator surface and E2E validation
|
||||||
|
|
||||||
|
Goal: expose the whole workflow through commands and verify it end-to-end.
|
||||||
|
|
||||||
|
| id | status | description | issue | agent | branch | depends_on | estimate | notes |
|
||||||
|
| --- | --- | --- | --- | --- | --- | --- | --- | --- |
|
||||||
|
| MC-06-01 | not-started | Add a CLI command to inspect the active mission, PRD path, board path, task statuses, and latest handoff. | — | codex | feat/mission-control-inspect-cli | MC-02-03, MC-05-03 | 5K | One place to inspect the whole stack. |
|
||||||
|
| MC-06-02 | not-started | Add a compact dashboard or TUI summary view for mission health. | — | codex | feat/mission-control-summary-ui | MC-06-01 | 6K | Nice to have, but not before the core works. |
|
||||||
|
| MC-06-03 | not-started | Build an E2E harness that simulates compaction / rotation and verifies the mission can continue. | — | sonnet | feat/mission-control-e2e-harness | MC-04-03, MC-05-03 | 8K | This is the proof that the design works. |
|
||||||
|
| MC-06-04 | not-started | Add final docs for operators explaining how PRD, mission, and board fit together. | — | haiku | feat/mission-control-ops-docs | MC-06-03 | 4K | Make it usable by humans. |
|
||||||
|
| MC-06-05 | not-started | Consolidate review findings and close the mission with a release note. | — | sonnet | chore/mission-control-close | MC-06-04 | 3K | Only after the E2E passes. |
|
||||||
|
|
||||||
|
**Milestone 6 estimate:** ~26K tokens
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Execution Notes
|
||||||
|
|
||||||
|
- `sonnet` is best for planning, decomposition, and the review-gate tasks.
|
||||||
|
- `codex` is best for schema, coordinator, and CLI implementation.
|
||||||
|
- `haiku` is best for validation, traceability checks, and docs.
|
||||||
|
- The first implementation pass should stay file-first and keep the runtime state thin.
|
||||||
|
- The mission should not close until the PRD, board, mission manifest, and E2E harness all agree.
|
||||||
238
docs/plans/2026-05-06-hermes-mosaic-alignment.md
Normal file
238
docs/plans/2026-05-06-hermes-mosaic-alignment.md
Normal file
@@ -0,0 +1,238 @@
|
|||||||
|
# Hermes-Mosaic Alignment Plan
|
||||||
|
|
||||||
|
> **For Hermes:** Use subagent-driven-development skill to implement this plan task-by-task.
|
||||||
|
|
||||||
|
**Goal:** Package Mosaic's mechanical coordination primitives as a native Hermes toolset so any Hermes profile gets mission management, task decomposition, handoff, and session continuity without depending on the Mosaic gateway or OpenClaw runtime.
|
||||||
|
|
||||||
|
**Architecture:** Extract the coordination logic from Mosaic's `packages/coord` (TypeScript, file-first) into a Hermes Python toolset that wraps the same file conventions. The Mosaic Stack repo remains the canonical upstream for the file formats (TASKS.md schema, mission.json schema, handoff packet schema). Hermes implements native Python tools that read/write those same files, plus tool-calls for churn detection and handoff generation that have no Mosaic equivalent today.
|
||||||
|
|
||||||
|
**Tech Stack:** Python (Hermes toolset), SQLite (Hermes Kanban), JSON + Markdown (Mosaic file conventions)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Alignment Map
|
||||||
|
|
||||||
|
### What Mosaic has that Hermes needs
|
||||||
|
|
||||||
|
| Mosaic Component | What it does | Natural Hermes home | Why |
|
||||||
|
|---|---|---|---|
|
||||||
|
| `packages/coord` (mission.ts) | Mission CRUD, session tracking, milestone state | **Hermes toolset: `mission`** | Mission state is session-scoped, not gateway-scoped. Hermes sessions already have identity, process tracking, and context windows. |
|
||||||
|
| `packages/coord` (tasks-file.ts) | Parse/write TASKS.md tables | **Hermes toolset: `mission`** (same) | Hermes already reads/writes files. The TASKS.md parser is ~300 lines of pure string manipulation — trivial Python port. |
|
||||||
|
| `packages/coord` (runner.ts) | Spawn claude/codex workers with continuation prompts | **Already covered by `delegate_task`** | Hermes delegate_task already does isolated subagent spawning with restricted toolsets. The runner's "find next task and build continuation prompt" logic moves into a tool-call. |
|
||||||
|
| `packages/coord` (status.ts) | Mission health, task progress, next task | **Hermes toolset: `mission`** (same) | Status readout fits naturally as a tool-call. No gateway needed. |
|
||||||
|
| `packages/prdy` | PRD generation wizard | **Hermes skill: `prdy`** | PRD generation is a prompt + template problem, not infrastructure. A Hermes skill with templates is the right fit. |
|
||||||
|
| `plugins/mosaic-framework` | before_agent_start + subagent_spawning hooks | **Hermes system prompt injection** | Hermes already injects system context via skills and config. The framework preamble and worktree rules become standard Hermes skills loaded by the orchestrator profile. |
|
||||||
|
| `plugins/macp` | OpenClaw ACP bridge (spawn codex/claude) | **Already covered by `delegate_task` + ACP** | Hermes already has ACP support and delegate_task. The MACP bridge is redundant when running natively in Hermes. |
|
||||||
|
| Churn detection (planned) | Detect compaction loops, repeated tool calls, no progress | **Hermes middleware** | This needs to live inside Hermes's turn loop where it can observe tool-call patterns. Mosaic can't see this from outside. |
|
||||||
|
| Handoff packet (planned) | Structured context summary for session rotation | **Hermes toolset: `mission`** | Handoff is a serialization of mission + session state. Hermes owns the session, so it should own the handoff. |
|
||||||
|
|
||||||
|
### What Hermes already has that replaces Mosaic infrastructure
|
||||||
|
|
||||||
|
| Mosaic concept | Hermes equivalent | Notes |
|
||||||
|
|---|---|---|
|
||||||
|
| Gateway (NestJS) | Hermes gateway | Hermes already has a gateway with WebSocket, Discord, Telegram, CLI. No need for a second one. |
|
||||||
|
| Pi SDK agent runtime | Hermes agent loop | Hermes IS the agent runtime. OpenClaw's Pi SDK is a different runtime that Mosaic targets. |
|
||||||
|
| MACP ACP bridge | `delegate_task` + ACP tools | Same capability, already native. |
|
||||||
|
| Session identity | Hermes session IDs + process_registry | Hermes already tracks session identity, PIDs, and background processes. |
|
||||||
|
| Task execution board | Hermes Kanban | Fully functional SQLite-backed Kanban with dispatcher, triage, events, comments. |
|
||||||
|
| Worker spawning | Hermes dispatcher + cron | Kanban dispatcher + cron already handle this. |
|
||||||
|
| Context injection | Hermes skills + system prompt | Skills are loaded at session start and injected into context. Exactly what mosaic-framework plugin does. |
|
||||||
|
| File checkpoints | Hermes checkpoint_manager | Already tracks file mutations with shadow git. |
|
||||||
|
|
||||||
|
### What Mosaic keeps as its own entity
|
||||||
|
|
||||||
|
| Component | Why it stays in Mosaic |
|
||||||
|
|---|---|
|
||||||
|
| `apps/gateway` | NestJS API surface — Mosaic's web platform offering |
|
||||||
|
| `apps/web` | Next.js dashboard — Mosaic's UI offering |
|
||||||
|
| `packages/types` | Shared TS contracts for Mosaic gateway plugins |
|
||||||
|
| `packages/db` | Drizzle ORM + PG — Mosaic's data layer |
|
||||||
|
| `packages/auth` | BetterAuth — Mosaic's auth system |
|
||||||
|
| `packages/brain` | PG-backed data layer for Mosaic web app |
|
||||||
|
| `packages/queue` | Valkey task queue for Mosaic gateway |
|
||||||
|
| `plugins/discord` | OpenClaw Discord plugin |
|
||||||
|
| `plugins/telegram` | OpenClaw Telegram plugin |
|
||||||
|
| `packages/mosaic` CLI | The `mosaic` CLI — Mosaic's own command surface |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Architecture: `mission` Toolset for Hermes
|
||||||
|
|
||||||
|
### New files under `/opt/hermes/tools/`
|
||||||
|
|
||||||
|
```
|
||||||
|
mission_tools.py — Tool-call surface (mission_create, mission_status,
|
||||||
|
mission_next_task, mission_update_task, mission_handoff,
|
||||||
|
mission_resume)
|
||||||
|
mission_state.py — State management (read/write mission.json, parse TASKS.md,
|
||||||
|
parse MISSION-MANIFEST.md)
|
||||||
|
mission_churn.py — Churn detection (tool-loop counter, compaction counter,
|
||||||
|
progress scorer)
|
||||||
|
mission_handoff.py — Handoff packet generation and loading
|
||||||
|
```
|
||||||
|
|
||||||
|
### Tool-calls exposed to the agent
|
||||||
|
|
||||||
|
| Tool | What it does | When the agent calls it |
|
||||||
|
|---|---|---|
|
||||||
|
| `mission_create` | Initialize mission.json + TASKS.md + MISSION-MANIFEST.md in a project dir | When starting a new mission |
|
||||||
|
| `mission_status` | Read current mission state, milestone progress, next task, active session | At session start, or when checking progress |
|
||||||
|
| `mission_next_task` | Find the next `not-started` task whose dependencies are met, return its full spec | When the agent needs work to do |
|
||||||
|
| `mission_update_task` | Update a task row status in TASKS.md | When completing or blocking a task |
|
||||||
|
| `mission_handoff` | Generate a handoff packet from current session context + mission state | Before session rotation or at session end |
|
||||||
|
| `mission_resume` | Load a handoff packet and inject it as context for the new session | At session start after rotation |
|
||||||
|
|
||||||
|
### Toolset registration
|
||||||
|
|
||||||
|
The `mission` toolset follows the same pattern as `kanban`:
|
||||||
|
|
||||||
|
1. **Gating**: Tools are available when:
|
||||||
|
- The profile has `mission` in its toolsets config, OR
|
||||||
|
- A `HERMES_MISSION_DIR` env var is set (cron/dispatcher spawned workers)
|
||||||
|
|
||||||
|
2. **File conventions**: The toolset reads/writes the same file formats as Mosaic `packages/coord`:
|
||||||
|
- `.mosaic/orchestrator/mission.json` — mission state
|
||||||
|
- `docs/TASKS.md` — task table
|
||||||
|
- `docs/MISSION-MANIFEST.md` — mission manifest
|
||||||
|
- `docs/scratchpads/<id>.md` — session scratchpad
|
||||||
|
|
||||||
|
3. **Kanban bridge**: Optional bidirectional sync between mission TASKS.md rows and Kanban task cards, so the dashboard sees mission tasks.
|
||||||
|
|
||||||
|
### Churn detection (middleware)
|
||||||
|
|
||||||
|
Churn detection lives in Hermes's turn loop, NOT as a tool-call. It observes:
|
||||||
|
|
||||||
|
- Repeated compaction events (context window pressure)
|
||||||
|
- Identical tool-call sequences (loop detection)
|
||||||
|
- No file state changes across N turns
|
||||||
|
- Repeated permission denials
|
||||||
|
|
||||||
|
When churn score exceeds threshold:
|
||||||
|
1. `mission_handoff` is called automatically
|
||||||
|
2. Session is rotated (fresh context window)
|
||||||
|
3. `mission_resume` is called in the new session
|
||||||
|
|
||||||
|
This is new infrastructure that only Hermes can provide (Mosaic runs outside the agent loop).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Implementation Tasks
|
||||||
|
|
||||||
|
### Phase 1: Core state management (Python port of coord)
|
||||||
|
|
||||||
|
| Task | Files | Estimate |
|
||||||
|
|---|---|---|
|
||||||
|
| 1.1 Port mission.json read/write to Python | `mission_state.py` | 2h |
|
||||||
|
| 1.2 Port TASKS.md parser to Python | `mission_state.py` | 2h |
|
||||||
|
| 1.3 Port MISSION-MANIFEST.md reader to Python | `mission_state.py` | 1h |
|
||||||
|
| 1.4 Implement `mission_create` tool-call | `mission_tools.py` | 1h |
|
||||||
|
| 1.5 Implement `mission_status` tool-call | `mission_tools.py` | 1h |
|
||||||
|
| 1.6 Implement `mission_next_task` tool-call | `mission_tools.py` | 1h |
|
||||||
|
| 1.7 Implement `mission_update_task` tool-call | `mission_tools.py` | 1h |
|
||||||
|
| 1.8 Register `mission` toolset in Hermes registry | `tools/registry.py` | 30m |
|
||||||
|
| 1.9 Add `mission` to orchestrator profile toolsets | `config.yaml` | 10m |
|
||||||
|
| 1.10 Write unit tests for mission_state | `tests/test_mission_state.py` | 2h |
|
||||||
|
| 1.11 Write unit tests for TASKS.md parser | `tests/test_tasks_parser.py` | 1h |
|
||||||
|
|
||||||
|
**Phase 1 estimate:** ~13h
|
||||||
|
|
||||||
|
### Phase 2: Handoff and session continuity
|
||||||
|
|
||||||
|
| Task | Files | Estimate |
|
||||||
|
|---|---|---|
|
||||||
|
| 2.1 Define handoff packet schema (JSON) | `mission_handoff.py` | 1h |
|
||||||
|
| 2.2 Implement `mission_handoff` tool-call | `mission_handoff.py`, `mission_tools.py` | 2h |
|
||||||
|
| 2.3 Implement `mission_resume` tool-call | `mission_handoff.py`, `mission_tools.py` | 2h |
|
||||||
|
| 2.4 Wire handoff into session start (auto-resume) | agent loop hook | 2h |
|
||||||
|
| 2.5 Write tests for handoff round-trip | `tests/test_mission_handoff.py` | 1h |
|
||||||
|
|
||||||
|
**Phase 2 estimate:** ~8h
|
||||||
|
|
||||||
|
### Phase 3: Churn detection
|
||||||
|
|
||||||
|
| Task | Files | Estimate |
|
||||||
|
|---|---|---|
|
||||||
|
| 3.1 Define churn signal weights and thresholds | `mission_churn.py` | 1h |
|
||||||
|
| 3.2 Implement tool-loop detector (consecutive identical calls) | `mission_churn.py` | 2h |
|
||||||
|
| 3.3 Implement compaction pressure detector | `mission_churn.py` | 1h |
|
||||||
|
| 3.4 Implement progress scorer (file state delta) | `mission_churn.py` | 2h |
|
||||||
|
| 3.5 Wire churn scoring into agent turn loop | agent loop middleware | 2h |
|
||||||
|
| 3.6 Implement auto-rotation trigger | agent loop + handoff | 2h |
|
||||||
|
| 3.7 Write tests for churn scoring | `tests/test_mission_churn.py` | 1h |
|
||||||
|
|
||||||
|
**Phase 3 estimate:** ~11h
|
||||||
|
|
||||||
|
### Phase 4: Kanban bridge + CLI surface
|
||||||
|
|
||||||
|
| Task | Files | Estimate |
|
||||||
|
|---|---|---|
|
||||||
|
| 4.1 Implement TASKS.md → Kanban sync (one-way first) | `mission_kanban_sync.py` | 2h |
|
||||||
|
| 4.2 Add `hermes mission` CLI subcommand | `mission_cli.py` | 2h |
|
||||||
|
| 4.3 Add `hermes mission status` command | `mission_cli.py` | 1h |
|
||||||
|
| 4.4 Add `hermes mission init` command | `mission_cli.py` | 1h |
|
||||||
|
| 4.5 Add `hermes mission handoff` command | `mission_cli.py` | 1h |
|
||||||
|
| 4.6 Add `hermes mission resume` command | `mission_cli.py` | 1h |
|
||||||
|
|
||||||
|
**Phase 4 estimate:** ~8h
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## File Format Compatibility
|
||||||
|
|
||||||
|
The Python implementation MUST read and write the exact same file formats as Mosaic's TypeScript `packages/coord`. This means:
|
||||||
|
|
||||||
|
1. **mission.json** schema is identical to `Mission` type in `packages/coord/src/types.ts`
|
||||||
|
2. **TASKS.md** table format is identical to what `packages/coord/src/tasks-file.ts` parses
|
||||||
|
3. **MISSION-MANIFEST.md** is free-form markdown (no parser needed — just read the file)
|
||||||
|
4. **Handoff packets** are a new JSON format defined in this toolset (Mosaic doesn't have them yet)
|
||||||
|
|
||||||
|
This way a project can use Hermes mission tools OR Mosaic `mosaic coord` commands interchangeably. The files are the contract.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Relationship Diagram
|
||||||
|
|
||||||
|
```
|
||||||
|
Mosaic Stack (TypeScript) Hermes Agent (Python)
|
||||||
|
┌─────────────────────────┐ ┌─────────────────────────┐
|
||||||
|
│ packages/coord │ │ tools/mission_tools.py │
|
||||||
|
│ ├─ mission.ts │◄──────►│ ├─ mission_state.py │
|
||||||
|
│ ├─ tasks-file.ts │ same │ ├─ mission_handoff.py │
|
||||||
|
│ ├─ status.ts │ files │ ├─ mission_churn.py │
|
||||||
|
│ └─ runner.ts │ │ └─ mission_tools.py │
|
||||||
|
│ │ │ │
|
||||||
|
│ packages/prdy │ │ skills/prdy/ │
|
||||||
|
│ └─ templates, wizard │◄──────►│ └─ SKILL.md + templates │
|
||||||
|
│ │ │ │
|
||||||
|
│ plugins/mosaic-framework│ │ skills/ (existing) │
|
||||||
|
│ └─ context injection │◄──────►│ └─ kanban-orchestrator │
|
||||||
|
│ │ │ + mosaic-coding-* │
|
||||||
|
│ plugins/macp │ │ tools/delegate_task.py │
|
||||||
|
│ └─ ACP bridge │◄──────►│ └─ already covers this │
|
||||||
|
│ │ │ │
|
||||||
|
│ (stays in Mosaic) │ │ tools/kanban_tools.py │
|
||||||
|
│ apps/gateway │ │ └─ Hermes Kanban DB │
|
||||||
|
│ apps/web │ │ │
|
||||||
|
│ packages/db │ │ tools/cronjob_tools.py │
|
||||||
|
│ packages/queue │ │ └─ already covers cron │
|
||||||
|
└─────────────────────────┘ └─────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Open Questions
|
||||||
|
|
||||||
|
1. **Should the `mission` toolset ship with Hermes core, or as a plugin?**
|
||||||
|
- Recommendation: ship as a **built-in toolset** (like `kanban`) since mission coordination is a core agent capability, not an optional integration. The file formats are stable and the code is small.
|
||||||
|
|
||||||
|
2. **Should churn detection be per-profile configurable?**
|
||||||
|
- Recommendation: yes. Add `mission.churn_threshold` and `mission.churn_weights` to profile config.yaml. Default threshold = 5 consecutive no-progress turns.
|
||||||
|
|
||||||
|
3. **Should handoff packets live in the project dir or in Hermes home?**
|
||||||
|
- Recommendation: **project dir** (`.mosaic/handoffs/<session-id>.json`). This keeps them version-controlled and accessible regardless of which agent runtime picks up the project.
|
||||||
|
|
||||||
|
4. **Bidirectional Kanban sync?**
|
||||||
|
- Recommendation: **one-way first** (TASKS.md → Kanban). Bidirectional adds conflict resolution complexity. Ship one-way, add reverse sync in v2 if needed.
|
||||||
|
|
||||||
|
5. **PRD generation — skill or tool-call?**
|
||||||
|
- Recommendation: **skill** (`prdy`). PRD generation is a prompt engineering problem with templates. Skills already handle this pattern perfectly.
|
||||||
234
docs/plans/2026-05-07-coordination-resilience.md
Normal file
234
docs/plans/2026-05-07-coordination-resilience.md
Normal file
@@ -0,0 +1,234 @@
|
|||||||
|
# Mosaic Stack ↔ Hermes Coordination Resilience
|
||||||
|
|
||||||
|
> Purpose: document the self-healing coordination patterns that emerged while implementing the Hermes mission toolset, distress-card protocol, and auto-heal watchers, so the same mechanics can be reimplemented in Mosaic Stack or any similar agent platform.
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
The coordination layer should be treated as a system of mechanical recovery loops rather than a single interactive agent session.
|
||||||
|
|
||||||
|
## SIBKISS operational summary
|
||||||
|
|
||||||
|
- mission on
|
||||||
|
- heartbeat always
|
||||||
|
- resume from packet
|
||||||
|
- block with `[BLOCKED]`
|
||||||
|
- reassign
|
||||||
|
- keep tasks tiny
|
||||||
|
- auto-heal dead workers
|
||||||
|
|
||||||
|
The design has four parts:
|
||||||
|
|
||||||
|
1. Atomic task decomposition — workers operate only within a small, explicit scope.
|
||||||
|
2. Distress signaling — workers create a standardized `[BLOCKED]` card when they encounter a blocker outside their scope.
|
||||||
|
3. Mechanical fallback — if the worker cannot phone home because of rate limits or dead context, a cron-style watcher synthesizes the distress card for them.
|
||||||
|
4. Auto-heal / reassignment — stale workers are reaped, crash-loops are reset, and rate-limited work is reassigned to a different profile/provider.
|
||||||
|
|
||||||
|
## Why this exists
|
||||||
|
|
||||||
|
Observed failure modes:
|
||||||
|
|
||||||
|
- Scope creep: a worker completes the target fix, then spends the rest of its budget chasing downstream cascade work.
|
||||||
|
- Silent failure / dead worker: the worker PID is gone, but the task remains running or blocked.
|
||||||
|
- Rate-limited worker: the worker is too constrained to create a help card itself, so it spins or fails without a clean handoff.
|
||||||
|
|
||||||
|
The answer is not to raise iteration caps or ask the worker to keep trying longer. The answer is to make the coordination layer self-healing and the work items atomic.
|
||||||
|
|
||||||
|
## Core workflow
|
||||||
|
|
||||||
|
### 1) Atomic task boundaries
|
||||||
|
|
||||||
|
Every task should have:
|
||||||
|
|
||||||
|
- one concern
|
||||||
|
- explicit files/packages in scope
|
||||||
|
- explicit files/packages out of scope
|
||||||
|
- a maximum file count if possible
|
||||||
|
- a stated expected iteration budget
|
||||||
|
|
||||||
|
When a worker discovers work outside scope, it must stop fixing it and hand off.
|
||||||
|
|
||||||
|
### 2) Worker-authored distress card
|
||||||
|
|
||||||
|
If the worker can still report status, it creates a card like:
|
||||||
|
|
||||||
|
- Title: `[BLOCKED] t_<source_id> <blocker_type>`
|
||||||
|
- Assignee: `tuesday` / orchestrator role
|
||||||
|
- Status: `ready`
|
||||||
|
- Body: standardized distress template with source task, blocker type, completed work, cannot-touch scope, and needed action
|
||||||
|
|
||||||
|
The orchestrator receives the card, acts on it, and closes the loop.
|
||||||
|
|
||||||
|
## Routing rules
|
||||||
|
|
||||||
|
### Distress card routing
|
||||||
|
|
||||||
|
- Title: `[BLOCKED] t_<source_id> <blocker_type>`
|
||||||
|
- Assignee: `tuesday` / orchestrator role
|
||||||
|
- Status: `ready`
|
||||||
|
- Body: standardized distress template with source task, blocker type, completed work, cannot-touch scope, and needed action
|
||||||
|
- Source task stays linked to the distress card so the recovery trail is auditable
|
||||||
|
|
||||||
|
The orchestrator receives the card, acts on it, and closes the loop.
|
||||||
|
|
||||||
|
### 3) Mechanical fallback for rate-limited workers
|
||||||
|
|
||||||
|
If the worker is too rate-limited or unstable to create the distress card itself, a no-agent watcher must synthesize the card from the task row and failure metadata.
|
||||||
|
|
||||||
|
That watcher should:
|
||||||
|
|
||||||
|
- inspect running / blocked tasks
|
||||||
|
- detect repeated 429 / 503 / overload errors
|
||||||
|
- create the same standardized `[BLOCKED]` card on behalf of the worker
|
||||||
|
- link the distress card to the source task
|
||||||
|
- add a comment to the source task
|
||||||
|
- allow the dispatcher to pick up the new card immediately
|
||||||
|
|
||||||
|
This is the key fix for the logic issue: the worker does not need to be able to phone home if the watcher can do it mechanically.
|
||||||
|
|
||||||
|
### 4) Auto-heal for dead workers
|
||||||
|
|
||||||
|
A separate no-agent watcher should:
|
||||||
|
|
||||||
|
- reap dead PIDs stuck in `running`
|
||||||
|
- reset crash-loops whose failures are infrastructure-related
|
||||||
|
- escalate tasks that have been reset too many times
|
||||||
|
|
||||||
|
This watcher prevents stale tasks from clogging the board and keeps the dispatch queue moving.
|
||||||
|
|
||||||
|
## Distress card contract
|
||||||
|
|
||||||
|
### Canonical title
|
||||||
|
|
||||||
|
```text
|
||||||
|
[BLOCKED] t_<source_task_id> <blocker_type>
|
||||||
|
```
|
||||||
|
|
||||||
|
### Canonical blocker types
|
||||||
|
|
||||||
|
- `scope_boundary`
|
||||||
|
- `env_blocker`
|
||||||
|
- `credential_failure`
|
||||||
|
- `dependency`
|
||||||
|
- `iteration_budget`
|
||||||
|
- `rate_limited`
|
||||||
|
|
||||||
|
### Canonical body
|
||||||
|
|
||||||
|
```markdown
|
||||||
|
## Distress Signal
|
||||||
|
- Blocked task: t_xxx
|
||||||
|
- Worker: <profile_name>
|
||||||
|
- Branch: <git_branch_name>
|
||||||
|
- Workspace: <path>
|
||||||
|
- Blocker type: <type>
|
||||||
|
- Completed: <what was done>
|
||||||
|
- Cannot touch: <out-of-scope packages/files>
|
||||||
|
- Needs: <what the orchestrator should do>
|
||||||
|
- State: committed | uncommitted | stashed(<stash_name>)
|
||||||
|
|
||||||
|
## Scope Guard
|
||||||
|
DO NOT touch: anything outside diagnosing and remediating the blocker described above
|
||||||
|
Only fix: assign, split, reassign, or unblock the source task
|
||||||
|
```
|
||||||
|
|
||||||
|
## Routing rules
|
||||||
|
|
||||||
|
### Distress card routing
|
||||||
|
|
||||||
|
- `[BLOCKED]` title prefix should bypass normal triage.
|
||||||
|
- The card should go directly to the orchestration profile.
|
||||||
|
- The orchestrator should start from a clean session each time.
|
||||||
|
|
||||||
|
### Rate-limit fallback
|
||||||
|
|
||||||
|
When the source task is rate-limited:
|
||||||
|
|
||||||
|
- do not keep retrying in the worker
|
||||||
|
- let the watcher synthesize the distress card
|
||||||
|
- have the orchestrator reassign the source task to a different profile/provider combo
|
||||||
|
|
||||||
|
### Provider fallback principle
|
||||||
|
|
||||||
|
Never reassign rate-limited work back to the same provider if the failure was provider pressure. Use a different provider when possible.
|
||||||
|
|
||||||
|
### Suggested fallback order
|
||||||
|
|
||||||
|
1. Keep the current task body and scope guards intact.
|
||||||
|
2. Reassign to a different profile on a different provider.
|
||||||
|
3. If that is impossible, reassign to a different profile on the same provider only for non-rate-limit blockers.
|
||||||
|
4. If repeated failures continue, split the task into a narrower atomic card.
|
||||||
|
|
||||||
|
## Related recovery docs
|
||||||
|
|
||||||
|
- Mission packet recovery contract: `/opt/hermes/docs/mission-toolset-heartbeat.md`
|
||||||
|
- Hermes mission implementation plan: `/opt/hermes/docs/plans/mission-toolset-implementation.md`
|
||||||
|
- The same packet-first resume rule applies: inspect the latest packet before re-reading mission files.
|
||||||
|
- New-session trigger: when a profile config changes, start a fresh session or `/reset` so the updated toolset is actually loaded.
|
||||||
|
|
||||||
|
## Watchers to implement
|
||||||
|
|
||||||
|
### Auto-heal watcher
|
||||||
|
|
||||||
|
Responsibilities:
|
||||||
|
|
||||||
|
- reap stale workers
|
||||||
|
- reset dead-PID crash loops
|
||||||
|
- track reset counts
|
||||||
|
- escalate after repeated resets
|
||||||
|
|
||||||
|
### Distress synthesizer watcher
|
||||||
|
|
||||||
|
Responsibilities:
|
||||||
|
|
||||||
|
- detect rate-limited / stuck workers
|
||||||
|
- create `[BLOCKED]` cards mechanically
|
||||||
|
- link the card to the source task
|
||||||
|
- leave a comment for traceability
|
||||||
|
|
||||||
|
### Iteration-budget watcher
|
||||||
|
|
||||||
|
Responsibilities:
|
||||||
|
|
||||||
|
- detect long-running tasks and repeated failure patterns
|
||||||
|
- recommend splits when a task is clearly over-scoped
|
||||||
|
- report tasks that need human review after multiple resets
|
||||||
|
|
||||||
|
## Operational principle
|
||||||
|
|
||||||
|
If a task cannot cleanly finish within its atomic scope, the right response is to surface a smaller coordination problem, not to keep burning context.
|
||||||
|
|
||||||
|
This is what makes the system robust across compaction, rate limits, and dead workers.
|
||||||
|
|
||||||
|
## Suggested implementation order
|
||||||
|
|
||||||
|
1. Atomic task metadata in task bodies
|
||||||
|
2. Worker-authored distress card protocol
|
||||||
|
3. Mechanical distress synthesizer watcher
|
||||||
|
4. Auto-heal watcher for dead workers
|
||||||
|
5. Orchestrator routing rules for `[BLOCKED]`
|
||||||
|
6. Rate-limit fallback / model reassignment table
|
||||||
|
|
||||||
|
## Where this fits in Hermes
|
||||||
|
|
||||||
|
- Kanban = durable work graph and status engine
|
||||||
|
- Watchers = mechanical healing and distress synthesis
|
||||||
|
- Orchestrator = split / reassign / unblock decision-maker
|
||||||
|
- Workers = execution inside atomic task boundaries
|
||||||
|
|
||||||
|
## Where this fits in Mosaic Stack
|
||||||
|
|
||||||
|
- PRD / coordination infra should encode the same patterns
|
||||||
|
- Mosaic can use the same distress-card contract and watcher logic
|
||||||
|
- The coordination model should be runtime-agnostic: any agent system can use it if it can write a task card and react to a ready queue
|
||||||
|
|
||||||
|
## Cross-project takeaway
|
||||||
|
|
||||||
|
The important pattern is not the specific tool names. It is the mechanical feedback loop:
|
||||||
|
|
||||||
|
- detect failure without requiring the failing worker to succeed
|
||||||
|
- create a standardized help artifact
|
||||||
|
- route that artifact to a fresh orchestrator context
|
||||||
|
- repair the assignment graph
|
||||||
|
- continue the mission
|
||||||
|
|
||||||
|
That pattern is reusable anywhere.
|
||||||
@@ -612,3 +612,44 @@ Independent security review surfaced three high-impact and four medium findings;
|
|||||||
7. DEPLOY-03/04 acceptance probes (`mosaic gateway doctor --json`, pgvector `vector(3)` round-trip)
|
7. DEPLOY-03/04 acceptance probes (`mosaic gateway doctor --json`, pgvector `vector(3)` round-trip)
|
||||||
8. DEPLOY-05: author `docs/federation/TEST-INFRA.md`
|
8. DEPLOY-05: author `docs/federation/TEST-INFRA.md`
|
||||||
9. M2-02 (Step-CA sidecar) kicks off after image health is green
|
9. M2-02 (Step-CA sidecar) kicks off after image health is green
|
||||||
|
|
||||||
|
### Session 23 — 2026-04-21 — M2 close + M3 decomposition
|
||||||
|
|
||||||
|
**Closed at compaction boundary:** all 13 M2 tasks done, PRs #494–#503 merged to `main`, tag `fed-v0.2.0-m2` published, Gitea release notes posted, issue #461 closed. Main at `4ece6dc6`.
|
||||||
|
|
||||||
|
**M2 hardening landed in PR #501** (security review remediation):
|
||||||
|
|
||||||
|
- CRIT-1: post-issuance OID verification in `ca.service.ts` (rejects cert if `mosaic_grant_id` / `mosaic_subject_user_id` extensions missing or mismatched)
|
||||||
|
- CRIT-2: atomic activation guard `WHERE status='pending'` on grant + `WHERE state='pending'` on peer; throws `ConflictException` if lost race
|
||||||
|
- HIGH-2: removed try/catch fallback in `extractCertNotAfter` — parse failures propagate as 500 (no silent 90-day default)
|
||||||
|
- HIGH-4: token slice for logging (`${token.slice(0, 8)}...`) — no full token in stdout
|
||||||
|
- HIGH-5: `redeem()` wrapped in try/catch with best-effort failure audit; uses `null` (not `'unknown'`) for nullable UUID FK fallback
|
||||||
|
- MED-3: `createToken` validates `grant.peerId === dto.peerId`; `BadRequestException` on mismatch
|
||||||
|
|
||||||
|
**Remaining M2 security findings deferred to M3+:**
|
||||||
|
|
||||||
|
- HIGH-1: peerId/subjectUserId tenancy validation on `createGrant` (M3 ScopeService work surfaces this)
|
||||||
|
- HIGH-3: Step-CA cert SHA-256 fingerprint pinning (M5 cert handling)
|
||||||
|
- MED-1: token entropy already 32 bytes — wontfix
|
||||||
|
- MED-2: per-route rate limit on enrollment endpoint (M4 rate limit work)
|
||||||
|
- MED-4: CSR CN binding to peer's commonName (M3 AuthGuard work)
|
||||||
|
|
||||||
|
**M3 decomposition landed in this session:**
|
||||||
|
|
||||||
|
- 14 tasks (M3-01..M3-14), ~100K estimate
|
||||||
|
- Structure mirrors M1/M2 pattern: foundation → server stream + client stream + harness in parallel → integration → E2E → security review → docs → close
|
||||||
|
- M3-02 ships local two-gateway docker-compose (`tools/federation-harness/`) so M3-11 E2E is not blocked on the Portainer test bed (which is still blocked on `FED-M2-DEPLOY-IMG-FIX`)
|
||||||
|
|
||||||
|
**Subagent doctrine retained from M2:**
|
||||||
|
|
||||||
|
- All worker subagents use `isolation: "worktree"` to prevent branch-race incidents
|
||||||
|
- Code review is independent (different subagent, no overlap with author of work)
|
||||||
|
- `tea pr create --repo mosaicstack/stack --login mosaicstack` is the working PR-create path; `pr-create.sh` has shell-quoting bugs (followup #45 if not already filed)
|
||||||
|
- Cost tier: foundational implementation = sonnet, docs = haiku, complex multi-file architecture (security review, scope service) = sonnet with two review rounds
|
||||||
|
|
||||||
|
**Next concrete step:**
|
||||||
|
|
||||||
|
1. PR for the M3 planning artifact (this commit) — branch `docs/federation-m3-planning`
|
||||||
|
2. After merge, kickoff M3-01 (DTOs) on `feat/federation-m3-types` with sonnet subagent in worktree
|
||||||
|
3. Once M3-01 lands, fan out: M3-02 (harness) || M3-03 (AuthGuard) → M3-04 (ScopeService) || M3-08 (FederationClient)
|
||||||
|
4. Re-converge at M3-10 (Integration) → M3-11 (E2E)
|
||||||
|
|||||||
@@ -30,6 +30,7 @@ export default tseslint.config(
|
|||||||
'apps/gateway/vitest.config.ts',
|
'apps/gateway/vitest.config.ts',
|
||||||
'packages/storage/vitest.config.ts',
|
'packages/storage/vitest.config.ts',
|
||||||
'packages/mosaic/__tests__/*.ts',
|
'packages/mosaic/__tests__/*.ts',
|
||||||
|
'tools/federation-harness/*.ts',
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -42,6 +42,7 @@
|
|||||||
"access": "public"
|
"access": "public"
|
||||||
},
|
},
|
||||||
"files": [
|
"files": [
|
||||||
"dist"
|
"dist",
|
||||||
|
"drizzle"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,10 +1,12 @@
|
|||||||
import { PGlite } from '@electric-sql/pglite';
|
import { PGlite } from '@electric-sql/pglite';
|
||||||
|
import { vector } from '@electric-sql/pglite/vector';
|
||||||
import { drizzle } from 'drizzle-orm/pglite';
|
import { drizzle } from 'drizzle-orm/pglite';
|
||||||
import * as schema from './schema.js';
|
import * as schema from './schema.js';
|
||||||
import type { DbHandle } from './client.js';
|
import type { DbHandle } from './client.js';
|
||||||
|
|
||||||
export function createPgliteDb(dataDir: string): DbHandle {
|
export function createPgliteDb(dataDir: string): DbHandle {
|
||||||
const client = new PGlite(dataDir);
|
// pgvector extension is required by migration 0001 (insights.embedding column).
|
||||||
|
const client = new PGlite(dataDir, { extensions: { vector } });
|
||||||
const db = drizzle(client, { schema });
|
const db = drizzle(client, { schema });
|
||||||
return {
|
return {
|
||||||
db: db as unknown as DbHandle['db'],
|
db: db as unknown as DbHandle['db'],
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
export { createDb, type Db, type DbHandle } from './client.js';
|
export { createDb, type Db, type DbHandle } from './client.js';
|
||||||
export { createPgliteDb } from './client-pglite.js';
|
export { createPgliteDb } from './client-pglite.js';
|
||||||
export { runMigrations } from './migrate.js';
|
export { runMigrations, runPgliteMigrations } from './migrate.js';
|
||||||
export * from './schema.js';
|
export * from './schema.js';
|
||||||
export * from './federation.js';
|
export * from './federation.js';
|
||||||
export {
|
export {
|
||||||
|
|||||||
70
packages/db/src/migrate.test.ts
Normal file
70
packages/db/src/migrate.test.ts
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
import { mkdtempSync, rmSync } from 'node:fs';
|
||||||
|
import { tmpdir } from 'node:os';
|
||||||
|
import { join } from 'node:path';
|
||||||
|
import { sql } from 'drizzle-orm';
|
||||||
|
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
|
||||||
|
import { createPgliteDb } from './client-pglite.js';
|
||||||
|
import { runPgliteMigrations } from './migrate.js';
|
||||||
|
import type { DbHandle } from './client.js';
|
||||||
|
|
||||||
|
interface PgliteExec {
|
||||||
|
exec(query: string): Promise<unknown>;
|
||||||
|
}
|
||||||
|
|
||||||
|
describe('runPgliteMigrations', () => {
|
||||||
|
let dataDir: string;
|
||||||
|
let handle: DbHandle;
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
dataDir = mkdtempSync(join(tmpdir(), 'mosaic-db-migrate-test-'));
|
||||||
|
handle = createPgliteDb(dataDir);
|
||||||
|
});
|
||||||
|
|
||||||
|
afterEach(async () => {
|
||||||
|
await handle.close();
|
||||||
|
rmSync(dataDir, { recursive: true, force: true });
|
||||||
|
});
|
||||||
|
|
||||||
|
it('creates the BetterAuth tables required by the gateway', async () => {
|
||||||
|
await runPgliteMigrations(handle);
|
||||||
|
|
||||||
|
const result = (await handle.db.execute(sql`
|
||||||
|
SELECT table_name FROM information_schema.tables
|
||||||
|
WHERE table_schema = 'public'
|
||||||
|
ORDER BY table_name
|
||||||
|
`)) as unknown as { rows: Array<{ table_name: string }> };
|
||||||
|
|
||||||
|
const tables = result.rows.map((r) => r.table_name);
|
||||||
|
|
||||||
|
// Auth tables — required for sign-in / bootstrap to function.
|
||||||
|
expect(tables).toContain('users');
|
||||||
|
expect(tables).toContain('sessions');
|
||||||
|
expect(tables).toContain('accounts');
|
||||||
|
expect(tables).toContain('verifications');
|
||||||
|
|
||||||
|
// Schema sanity check — admin token table consumed by mosaic gateway config.
|
||||||
|
expect(tables).toContain('admin_tokens');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('is idempotent — running twice does not error', async () => {
|
||||||
|
await runPgliteMigrations(handle);
|
||||||
|
await expect(runPgliteMigrations(handle)).resolves.toBeUndefined();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('surfaces statement-level error context on failure and leaves no ledger row', async () => {
|
||||||
|
// Pre-create a `users` table that conflicts with migration 0000's CREATE TABLE,
|
||||||
|
// forcing it to fail without IF NOT EXISTS.
|
||||||
|
const client = (handle.db as unknown as { $client: PgliteExec }).$client;
|
||||||
|
await client.exec('CREATE TABLE users (sentinel text)');
|
||||||
|
|
||||||
|
await expect(runPgliteMigrations(handle)).rejects.toThrow(
|
||||||
|
/migration hash=[a-f0-9]+ statement #\d+ failed/,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Ledger should be empty — partial application must not pretend to be complete.
|
||||||
|
const ledger = (await handle.db.execute(
|
||||||
|
sql`SELECT count(*)::int AS count FROM drizzle.__drizzle_migrations`,
|
||||||
|
)) as unknown as { rows: Array<{ count: number }> };
|
||||||
|
expect(ledger.rows[0]?.count).toBe(0);
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -1,18 +1,109 @@
|
|||||||
import { dirname, resolve } from 'node:path';
|
import { dirname, resolve } from 'node:path';
|
||||||
import { fileURLToPath } from 'node:url';
|
import { fileURLToPath } from 'node:url';
|
||||||
import { drizzle } from 'drizzle-orm/postgres-js';
|
import { sql } from 'drizzle-orm';
|
||||||
import { migrate } from 'drizzle-orm/postgres-js/migrator';
|
import { drizzle as drizzlePostgres } from 'drizzle-orm/postgres-js';
|
||||||
|
import { migrate as migratePostgres } from 'drizzle-orm/postgres-js/migrator';
|
||||||
|
import { readMigrationFiles } from 'drizzle-orm/migrator';
|
||||||
import postgres from 'postgres';
|
import postgres from 'postgres';
|
||||||
import { DEFAULT_DATABASE_URL } from './defaults.js';
|
import { DEFAULT_DATABASE_URL } from './defaults.js';
|
||||||
|
import type { DbHandle } from './client.js';
|
||||||
|
|
||||||
|
interface PgliteExecutor {
|
||||||
|
exec(query: string): Promise<unknown>;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface ExecuteRows<T> {
|
||||||
|
rows: T[];
|
||||||
|
}
|
||||||
|
|
||||||
|
function migrationsFolder(): string {
|
||||||
|
const here = dirname(fileURLToPath(import.meta.url));
|
||||||
|
return resolve(here, '../drizzle');
|
||||||
|
}
|
||||||
|
|
||||||
export async function runMigrations(url?: string): Promise<void> {
|
export async function runMigrations(url?: string): Promise<void> {
|
||||||
const connectionString = url ?? process.env['DATABASE_URL'] ?? DEFAULT_DATABASE_URL;
|
const connectionString = url ?? process.env['DATABASE_URL'] ?? DEFAULT_DATABASE_URL;
|
||||||
const sql = postgres(connectionString, { max: 1 });
|
const sqlClient = postgres(connectionString, { max: 1 });
|
||||||
const db = drizzle(sql);
|
const db = drizzlePostgres(sqlClient);
|
||||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
||||||
try {
|
try {
|
||||||
await migrate(db, { migrationsFolder: resolve(__dirname, '../drizzle') });
|
// TODO: postgres-tier first-install also fails because (a) Drizzle wraps every
|
||||||
|
// migration in one transaction (breaks 0009's ALTER TYPE ADD VALUE → SET DEFAULT
|
||||||
|
// sequence) and (b) drizzle/meta/_journal.json has 0009 ordered before 0008,
|
||||||
|
// which the postgres-js migrator skips by `created_at < folderMillis`. The
|
||||||
|
// PGlite path below sidesteps both. A follow-up should either share the
|
||||||
|
// per-statement loop (see runPgliteMigrations) or fix the journal ordering.
|
||||||
|
await migratePostgres(db, { migrationsFolder: migrationsFolder() });
|
||||||
} finally {
|
} finally {
|
||||||
await sql.end();
|
await sqlClient.end();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Apply Drizzle migrations against an embedded PGlite database.
|
||||||
|
//
|
||||||
|
// We don't reuse drizzle's pglite migrator because it wraps ALL migrations in
|
||||||
|
// one outer transaction, which breaks Postgres' `check_safe_enum_use` rule —
|
||||||
|
// e.g. migration 0009 does `ALTER TYPE ADD VALUE 'pending'` then references
|
||||||
|
// `'pending'` as a default in the same tx. PGlite's `exec()` runs each
|
||||||
|
// statement under the Simple Query protocol, autocommitting between them.
|
||||||
|
//
|
||||||
|
// We still write to the standard `drizzle.__drizzle_migrations` ledger so the
|
||||||
|
// result is interoperable with `runMigrations()` on a postgres-backed deploy
|
||||||
|
// (modulo the journal-ordering bug noted above).
|
||||||
|
//
|
||||||
|
// We skip-by-hash rather than skip-by-folderMillis (which is what Drizzle's
|
||||||
|
// postgres-js migrator does). That's deliberate — out-of-order timestamps in
|
||||||
|
// `_journal.json` won't silently drop migrations.
|
||||||
|
//
|
||||||
|
// Failure model: each statement autocommits, and the ledger row is written
|
||||||
|
// only after all statements in a migration succeed. A crash mid-migration
|
||||||
|
// leaves the prefix applied with no ledger entry, so the next boot will
|
||||||
|
// replay those statements and fail loudly on "already exists". Recovery:
|
||||||
|
// drop the partially-applied objects, or insert the migration's hash into
|
||||||
|
// `drizzle.__drizzle_migrations` manually. The error log identifies which
|
||||||
|
// statement of which migration was the culprit.
|
||||||
|
export async function runPgliteMigrations(handle: DbHandle): Promise<void> {
|
||||||
|
const client = (handle.db as unknown as { $client?: PgliteExecutor }).$client;
|
||||||
|
if (!client || typeof client.exec !== 'function') {
|
||||||
|
throw new Error('runPgliteMigrations: handle.db is not backed by a PGlite client');
|
||||||
|
}
|
||||||
|
|
||||||
|
await client.exec('CREATE SCHEMA IF NOT EXISTS drizzle');
|
||||||
|
await client.exec(`
|
||||||
|
CREATE TABLE IF NOT EXISTS drizzle.__drizzle_migrations (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
hash text NOT NULL,
|
||||||
|
created_at bigint
|
||||||
|
)
|
||||||
|
`);
|
||||||
|
|
||||||
|
const appliedRows = (await handle.db.execute(
|
||||||
|
sql`SELECT hash FROM drizzle.__drizzle_migrations`,
|
||||||
|
)) as unknown as ExecuteRows<{ hash: string }>;
|
||||||
|
const applied = new Set(appliedRows.rows.map((r) => r.hash));
|
||||||
|
|
||||||
|
const migrations = readMigrationFiles({ migrationsFolder: migrationsFolder() });
|
||||||
|
for (const migration of migrations) {
|
||||||
|
if (applied.has(migration.hash)) continue;
|
||||||
|
|
||||||
|
// Run each statement-breakpoint chunk in its own exec() call so PGlite
|
||||||
|
// commits between statements — this is what lets `ALTER TYPE ADD VALUE`
|
||||||
|
// become visible before a subsequent statement references the new value.
|
||||||
|
for (const [stmtIdx, stmt] of migration.sql.entries()) {
|
||||||
|
const trimmed = stmt.trim();
|
||||||
|
if (!trimmed) continue;
|
||||||
|
try {
|
||||||
|
await client.exec(trimmed);
|
||||||
|
} catch (err) {
|
||||||
|
const cause = err instanceof Error ? err.message : String(err);
|
||||||
|
throw new Error(
|
||||||
|
`runPgliteMigrations: migration hash=${migration.hash} statement #${stmtIdx} failed: ${cause}\n` +
|
||||||
|
`Statement: ${trimmed.slice(0, 200)}${trimmed.length > 200 ? '…' : ''}`,
|
||||||
|
{ cause: err },
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
await handle.db.execute(
|
||||||
|
sql`INSERT INTO drizzle.__drizzle_migrations (hash, created_at) VALUES (${migration.hash}, ${migration.folderMillis})`,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -16,8 +16,15 @@ import fs from 'node:fs/promises';
|
|||||||
import os from 'node:os';
|
import os from 'node:os';
|
||||||
import path from 'node:path';
|
import path from 'node:path';
|
||||||
|
|
||||||
import { users, teams, teamMembers, conversations, messages } from '@mosaicstack/db';
|
import {
|
||||||
import { createPgliteDbWithVector, runPgliteMigrations } from './test-utils/pglite-with-vector.js';
|
users,
|
||||||
|
teams,
|
||||||
|
teamMembers,
|
||||||
|
conversations,
|
||||||
|
messages,
|
||||||
|
createPgliteDb,
|
||||||
|
runPgliteMigrations,
|
||||||
|
} from '@mosaicstack/db';
|
||||||
|
|
||||||
import postgres from 'postgres';
|
import postgres from 'postgres';
|
||||||
import { afterAll, describe, expect, it } from 'vitest';
|
import { afterAll, describe, expect, it } from 'vitest';
|
||||||
@@ -102,11 +109,8 @@ describe.skipIf(!run)('migrate-tier — PGlite → federated PG', () => {
|
|||||||
/* ---- 1. Create a temp PGlite db ---------------------------------- */
|
/* ---- 1. Create a temp PGlite db ---------------------------------- */
|
||||||
|
|
||||||
pgliteDataDir = await fs.mkdtemp(path.join(os.tmpdir(), 'fed-m1-08-'));
|
pgliteDataDir = await fs.mkdtemp(path.join(os.tmpdir(), 'fed-m1-08-'));
|
||||||
const handle = createPgliteDbWithVector(pgliteDataDir);
|
const handle = createPgliteDb(pgliteDataDir);
|
||||||
|
await runPgliteMigrations(handle);
|
||||||
// Run Drizzle migrations against PGlite.
|
|
||||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
||||||
await runPgliteMigrations(handle.db as any);
|
|
||||||
|
|
||||||
/* ---- 2. Seed representative data --------------------------------- */
|
/* ---- 2. Seed representative data --------------------------------- */
|
||||||
|
|
||||||
|
|||||||
@@ -1,52 +0,0 @@
|
|||||||
/**
|
|
||||||
* Test-only helpers for creating a PGlite database with the pgvector extension
|
|
||||||
* and running Drizzle migrations against it.
|
|
||||||
*
|
|
||||||
* These are intentionally NOT exported from @mosaicstack/db to avoid pulling
|
|
||||||
* the WASM vector bundle into the public API surface.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import { createRequire } from 'node:module';
|
|
||||||
import { dirname, resolve } from 'node:path';
|
|
||||||
|
|
||||||
import { PGlite } from '@electric-sql/pglite';
|
|
||||||
import { vector } from '@electric-sql/pglite/vector';
|
|
||||||
import { drizzle } from 'drizzle-orm/pglite';
|
|
||||||
import { migrate as migratePglite } from 'drizzle-orm/pglite/migrator';
|
|
||||||
import type { PgliteDatabase } from 'drizzle-orm/pglite';
|
|
||||||
import * as schema from '@mosaicstack/db';
|
|
||||||
import type { DbHandle } from '@mosaicstack/db';
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create a PGlite DB handle with the pgvector extension loaded.
|
|
||||||
* Required for running Drizzle migrations that include `CREATE EXTENSION vector`.
|
|
||||||
*/
|
|
||||||
export function createPgliteDbWithVector(dataDir: string): DbHandle {
|
|
||||||
const client = new PGlite(dataDir, { extensions: { vector } });
|
|
||||||
const db = drizzle(client, { schema });
|
|
||||||
return {
|
|
||||||
db: db as unknown as DbHandle['db'],
|
|
||||||
close: async () => {
|
|
||||||
await client.close();
|
|
||||||
},
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Run Drizzle migrations against an already-open PGlite database handle.
|
|
||||||
* Resolves the migrations folder from @mosaicstack/db's installed location.
|
|
||||||
*
|
|
||||||
* @param db A PgliteDatabase instance (from drizzle-orm/pglite).
|
|
||||||
*/
|
|
||||||
export async function runPgliteMigrations(
|
|
||||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
||||||
db: PgliteDatabase<any>,
|
|
||||||
): Promise<void> {
|
|
||||||
// Resolve @mosaicstack/db package root to locate its drizzle migrations folder.
|
|
||||||
const _require = createRequire(import.meta.url);
|
|
||||||
const dbPkgMain = _require.resolve('@mosaicstack/db');
|
|
||||||
// dbPkgMain → …/packages/db/dist/index.js → dirname = dist/
|
|
||||||
// go up one level from dist/ to find the sibling drizzle/ folder
|
|
||||||
const migrationsFolder = resolve(dirname(dbPkgMain), '../drizzle');
|
|
||||||
await migratePglite(db, { migrationsFolder });
|
|
||||||
}
|
|
||||||
@@ -26,7 +26,8 @@
|
|||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"class-transformer": "^0.5.1",
|
"class-transformer": "^0.5.1",
|
||||||
"class-validator": "^0.15.1"
|
"class-validator": "^0.15.1",
|
||||||
|
"zod": "^4.3.6"
|
||||||
},
|
},
|
||||||
"publishConfig": {
|
"publishConfig": {
|
||||||
"registry": "https://git.mosaicstack.dev/api/packages/mosaicstack/npm/",
|
"registry": "https://git.mosaicstack.dev/api/packages/mosaicstack/npm/",
|
||||||
|
|||||||
435
packages/types/src/federation/__tests__/federation.spec.ts
Normal file
435
packages/types/src/federation/__tests__/federation.spec.ts
Normal file
@@ -0,0 +1,435 @@
|
|||||||
|
/**
|
||||||
|
* Unit tests for federation wire-format DTOs.
|
||||||
|
*
|
||||||
|
* Coverage:
|
||||||
|
* - FederationRequestSchema (valid + invalid)
|
||||||
|
* - FederationListResponseSchema factory
|
||||||
|
* - FederationGetResponseSchema factory
|
||||||
|
* - FederationCapabilitiesResponseSchema
|
||||||
|
* - FederationErrorEnvelopeSchema + error code exhaustiveness
|
||||||
|
* - FederationError exception hierarchy
|
||||||
|
* - tagWithSource helper round-trip
|
||||||
|
* - SourceTagSchema
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { describe, expect, it } from 'vitest';
|
||||||
|
import { z } from 'zod';
|
||||||
|
|
||||||
|
import {
|
||||||
|
FEDERATION_ERROR_CODES,
|
||||||
|
FEDERATION_VERBS,
|
||||||
|
FederationCapabilitiesResponseSchema,
|
||||||
|
FederationError,
|
||||||
|
FederationErrorEnvelopeSchema,
|
||||||
|
FederationForbiddenError,
|
||||||
|
FederationInternalError,
|
||||||
|
FederationInvalidRequestError,
|
||||||
|
FederationNotFoundError,
|
||||||
|
FederationRateLimitedError,
|
||||||
|
FederationRequestSchema,
|
||||||
|
FederationScopeViolationError,
|
||||||
|
FederationUnauthorizedError,
|
||||||
|
FederationGetResponseSchema,
|
||||||
|
FederationListResponseSchema,
|
||||||
|
SOURCE_LOCAL,
|
||||||
|
SourceTagSchema,
|
||||||
|
parseFederationErrorEnvelope,
|
||||||
|
tagWithSource,
|
||||||
|
} from '../index.js';
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Verbs
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
describe('FEDERATION_VERBS', () => {
|
||||||
|
it('contains exactly list, get, capabilities', () => {
|
||||||
|
expect(FEDERATION_VERBS).toEqual(['list', 'get', 'capabilities']);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// FederationRequestSchema
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
describe('FederationRequestSchema', () => {
|
||||||
|
it('accepts a minimal valid list request', () => {
|
||||||
|
const result = FederationRequestSchema.safeParse({ verb: 'list', resource: 'tasks' });
|
||||||
|
expect(result.success).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('accepts a get request with cursor and params', () => {
|
||||||
|
const result = FederationRequestSchema.safeParse({
|
||||||
|
verb: 'get',
|
||||||
|
resource: 'notes',
|
||||||
|
cursor: 'abc123',
|
||||||
|
params: { filter: 'mine' },
|
||||||
|
});
|
||||||
|
expect(result.success).toBe(true);
|
||||||
|
if (result.success) {
|
||||||
|
expect(result.data.cursor).toBe('abc123');
|
||||||
|
expect(result.data.params?.['filter']).toBe('mine');
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('accepts a capabilities request', () => {
|
||||||
|
const result = FederationRequestSchema.safeParse({ verb: 'capabilities', resource: 'tasks' });
|
||||||
|
expect(result.success).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects an unknown verb', () => {
|
||||||
|
const result = FederationRequestSchema.safeParse({ verb: 'search', resource: 'tasks' });
|
||||||
|
expect(result.success).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects an empty resource string', () => {
|
||||||
|
const result = FederationRequestSchema.safeParse({ verb: 'list', resource: '' });
|
||||||
|
expect(result.success).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects a missing verb', () => {
|
||||||
|
const result = FederationRequestSchema.safeParse({ resource: 'tasks' });
|
||||||
|
expect(result.success).toBe(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// FederationListResponseSchema factory
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
describe('FederationListResponseSchema', () => {
|
||||||
|
const ItemSchema = z.object({ id: z.string(), name: z.string() });
|
||||||
|
const ListSchema = FederationListResponseSchema(ItemSchema);
|
||||||
|
|
||||||
|
it('accepts a valid list envelope', () => {
|
||||||
|
const result = ListSchema.safeParse({
|
||||||
|
items: [{ id: '1', name: 'Task A' }],
|
||||||
|
nextCursor: 'page2',
|
||||||
|
_partial: false,
|
||||||
|
_truncated: false,
|
||||||
|
});
|
||||||
|
expect(result.success).toBe(true);
|
||||||
|
if (result.success) {
|
||||||
|
expect(result.data.items).toHaveLength(1);
|
||||||
|
expect(result.data.nextCursor).toBe('page2');
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('accepts a minimal envelope with empty items', () => {
|
||||||
|
const result = ListSchema.safeParse({ items: [] });
|
||||||
|
expect(result.success).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects when items is missing', () => {
|
||||||
|
const result = ListSchema.safeParse({ nextCursor: 'x' });
|
||||||
|
expect(result.success).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects when an item fails validation', () => {
|
||||||
|
const result = ListSchema.safeParse({ items: [{ id: 1, name: 'bad' }] });
|
||||||
|
expect(result.success).toBe(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// FederationGetResponseSchema factory
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
describe('FederationGetResponseSchema', () => {
|
||||||
|
const ItemSchema = z.object({ id: z.string() });
|
||||||
|
const GetSchema = FederationGetResponseSchema(ItemSchema);
|
||||||
|
|
||||||
|
it('accepts a found item', () => {
|
||||||
|
const result = GetSchema.safeParse({ item: { id: 'abc' } });
|
||||||
|
expect(result.success).toBe(true);
|
||||||
|
if (result.success) {
|
||||||
|
expect(result.data.item).toEqual({ id: 'abc' });
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('accepts null item (not found)', () => {
|
||||||
|
const result = GetSchema.safeParse({ item: null });
|
||||||
|
expect(result.success).toBe(true);
|
||||||
|
if (result.success) {
|
||||||
|
expect(result.data.item).toBeNull();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects when item is missing', () => {
|
||||||
|
const result = GetSchema.safeParse({});
|
||||||
|
expect(result.success).toBe(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// FederationCapabilitiesResponseSchema
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
describe('FederationCapabilitiesResponseSchema', () => {
|
||||||
|
it('accepts a valid capabilities response', () => {
|
||||||
|
const result = FederationCapabilitiesResponseSchema.safeParse({
|
||||||
|
resources: ['tasks', 'notes'],
|
||||||
|
excluded_resources: ['credentials'],
|
||||||
|
max_rows_per_query: 500,
|
||||||
|
supported_verbs: ['list', 'get', 'capabilities'],
|
||||||
|
});
|
||||||
|
expect(result.success).toBe(true);
|
||||||
|
if (result.success) {
|
||||||
|
expect(result.data.max_rows_per_query).toBe(500);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('accepts a response with filters field', () => {
|
||||||
|
const result = FederationCapabilitiesResponseSchema.safeParse({
|
||||||
|
resources: ['tasks', 'notes'],
|
||||||
|
excluded_resources: [],
|
||||||
|
max_rows_per_query: 100,
|
||||||
|
supported_verbs: ['list'],
|
||||||
|
filters: {
|
||||||
|
tasks: { include_teams: ['team-a'], include_personal: true },
|
||||||
|
notes: { include_personal: false },
|
||||||
|
},
|
||||||
|
});
|
||||||
|
expect(result.success).toBe(true);
|
||||||
|
if (result.success) {
|
||||||
|
expect(result.data.filters?.['tasks']?.include_teams).toEqual(['team-a']);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('accepts a response with partial filters (only include_teams)', () => {
|
||||||
|
const result = FederationCapabilitiesResponseSchema.safeParse({
|
||||||
|
resources: ['tasks'],
|
||||||
|
excluded_resources: [],
|
||||||
|
max_rows_per_query: 50,
|
||||||
|
supported_verbs: ['list'],
|
||||||
|
filters: { tasks: { include_teams: ['eng'] } },
|
||||||
|
});
|
||||||
|
expect(result.success).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('accepts a response with rate_limit (M4 full shape)', () => {
|
||||||
|
const result = FederationCapabilitiesResponseSchema.safeParse({
|
||||||
|
resources: ['tasks'],
|
||||||
|
excluded_resources: [],
|
||||||
|
max_rows_per_query: 100,
|
||||||
|
supported_verbs: ['list'],
|
||||||
|
rate_limit: { limit_per_minute: 60, remaining: 55, reset_at: '2026-04-23T12:00:00Z' },
|
||||||
|
});
|
||||||
|
expect(result.success).toBe(true);
|
||||||
|
if (result.success) {
|
||||||
|
expect(result.data.rate_limit?.limit_per_minute).toBe(60);
|
||||||
|
expect(result.data.rate_limit?.remaining).toBe(55);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('accepts a response with rate_limit (M3 minimal — limit_per_minute only)', () => {
|
||||||
|
const result = FederationCapabilitiesResponseSchema.safeParse({
|
||||||
|
resources: ['tasks'],
|
||||||
|
excluded_resources: [],
|
||||||
|
max_rows_per_query: 100,
|
||||||
|
supported_verbs: ['list'],
|
||||||
|
rate_limit: { limit_per_minute: 120 },
|
||||||
|
});
|
||||||
|
expect(result.success).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('accepts a response without rate_limit (field is optional)', () => {
|
||||||
|
const result = FederationCapabilitiesResponseSchema.safeParse({
|
||||||
|
resources: ['tasks'],
|
||||||
|
excluded_resources: [],
|
||||||
|
max_rows_per_query: 100,
|
||||||
|
supported_verbs: ['list'],
|
||||||
|
});
|
||||||
|
expect(result.success).toBe(true);
|
||||||
|
if (result.success) {
|
||||||
|
expect(result.data.rate_limit).toBeUndefined();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects rate_limit with non-positive limit_per_minute', () => {
|
||||||
|
const result = FederationCapabilitiesResponseSchema.safeParse({
|
||||||
|
resources: ['tasks'],
|
||||||
|
excluded_resources: [],
|
||||||
|
max_rows_per_query: 100,
|
||||||
|
supported_verbs: ['list'],
|
||||||
|
rate_limit: { limit_per_minute: 0 },
|
||||||
|
});
|
||||||
|
expect(result.success).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects rate_limit with invalid reset_at datetime', () => {
|
||||||
|
const result = FederationCapabilitiesResponseSchema.safeParse({
|
||||||
|
resources: ['tasks'],
|
||||||
|
excluded_resources: [],
|
||||||
|
max_rows_per_query: 100,
|
||||||
|
supported_verbs: ['list'],
|
||||||
|
rate_limit: { limit_per_minute: 60, reset_at: 'not-a-datetime' },
|
||||||
|
});
|
||||||
|
expect(result.success).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects supported_verbs with an invalid verb (MED-3 enum guard)', () => {
|
||||||
|
const result = FederationCapabilitiesResponseSchema.safeParse({
|
||||||
|
resources: ['tasks'],
|
||||||
|
excluded_resources: [],
|
||||||
|
max_rows_per_query: 100,
|
||||||
|
supported_verbs: ['invalid_verb'],
|
||||||
|
});
|
||||||
|
expect(result.success).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects empty resources array', () => {
|
||||||
|
const result = FederationCapabilitiesResponseSchema.safeParse({
|
||||||
|
resources: [],
|
||||||
|
excluded_resources: [],
|
||||||
|
max_rows_per_query: 100,
|
||||||
|
supported_verbs: ['list'],
|
||||||
|
});
|
||||||
|
expect(result.success).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects non-integer max_rows_per_query', () => {
|
||||||
|
const result = FederationCapabilitiesResponseSchema.safeParse({
|
||||||
|
resources: ['tasks'],
|
||||||
|
excluded_resources: [],
|
||||||
|
max_rows_per_query: 1.5,
|
||||||
|
supported_verbs: ['list'],
|
||||||
|
});
|
||||||
|
expect(result.success).toBe(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// FederationErrorEnvelopeSchema + error code exhaustiveness
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
describe('FederationErrorEnvelopeSchema', () => {
|
||||||
|
it('accepts each valid error code', () => {
|
||||||
|
for (const code of FEDERATION_ERROR_CODES) {
|
||||||
|
const result = FederationErrorEnvelopeSchema.safeParse({
|
||||||
|
error: { code, message: 'test' },
|
||||||
|
});
|
||||||
|
expect(result.success, `code ${code} should be valid`).toBe(true);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects an unknown error code', () => {
|
||||||
|
const result = FederationErrorEnvelopeSchema.safeParse({
|
||||||
|
error: { code: 'unknown_code', message: 'test' },
|
||||||
|
});
|
||||||
|
expect(result.success).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('accepts optional details field', () => {
|
||||||
|
const result = FederationErrorEnvelopeSchema.safeParse({
|
||||||
|
error: { code: 'forbidden', message: 'nope', details: { grantId: 'xyz' } },
|
||||||
|
});
|
||||||
|
expect(result.success).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects when message is missing', () => {
|
||||||
|
const result = FederationErrorEnvelopeSchema.safeParse({ error: { code: 'not_found' } });
|
||||||
|
expect(result.success).toBe(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('parseFederationErrorEnvelope', () => {
|
||||||
|
it('returns a typed envelope for valid input', () => {
|
||||||
|
const env = parseFederationErrorEnvelope({ error: { code: 'not_found', message: 'gone' } });
|
||||||
|
expect(env.error.code).toBe('not_found');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('throws for invalid input', () => {
|
||||||
|
expect(() => parseFederationErrorEnvelope({ bad: 'shape' })).toThrow();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// FederationError exception hierarchy
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
describe('FederationError hierarchy', () => {
|
||||||
|
const cases: Array<[string, FederationError]> = [
|
||||||
|
['unauthorized', new FederationUnauthorizedError()],
|
||||||
|
['forbidden', new FederationForbiddenError()],
|
||||||
|
['not_found', new FederationNotFoundError()],
|
||||||
|
['rate_limited', new FederationRateLimitedError()],
|
||||||
|
['scope_violation', new FederationScopeViolationError()],
|
||||||
|
['invalid_request', new FederationInvalidRequestError()],
|
||||||
|
['internal_error', new FederationInternalError()],
|
||||||
|
];
|
||||||
|
|
||||||
|
it.each(cases)('code %s is an instance of FederationError', (_code, err) => {
|
||||||
|
expect(err).toBeInstanceOf(FederationError);
|
||||||
|
expect(err).toBeInstanceOf(Error);
|
||||||
|
});
|
||||||
|
|
||||||
|
it.each(cases)('code %s has correct code property', (code, err) => {
|
||||||
|
expect(err.code).toBe(code);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('toEnvelope serialises to wire format', () => {
|
||||||
|
const err = new FederationForbiddenError('Access denied', { grantId: 'g1' });
|
||||||
|
const env = err.toEnvelope();
|
||||||
|
expect(env.error.code).toBe('forbidden');
|
||||||
|
expect(env.error.message).toBe('Access denied');
|
||||||
|
expect(env.error.details).toEqual({ grantId: 'g1' });
|
||||||
|
});
|
||||||
|
|
||||||
|
it('toEnvelope omits details when not provided', () => {
|
||||||
|
const err = new FederationNotFoundError();
|
||||||
|
const env = err.toEnvelope();
|
||||||
|
expect(Object.prototype.hasOwnProperty.call(env.error, 'details')).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('error codes tuple covers all subclasses (exhaustiveness check)', () => {
|
||||||
|
// If a new subclass is added without a code, this test fails at compile time.
|
||||||
|
const allCodes = new Set(FEDERATION_ERROR_CODES);
|
||||||
|
for (const [code] of cases) {
|
||||||
|
expect(allCodes.has(code as (typeof FEDERATION_ERROR_CODES)[number])).toBe(true);
|
||||||
|
}
|
||||||
|
// All codes are covered by at least one case
|
||||||
|
expect(cases).toHaveLength(FEDERATION_ERROR_CODES.length);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Source tag + tagWithSource
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
describe('SourceTagSchema', () => {
|
||||||
|
it('accepts a non-empty _source string', () => {
|
||||||
|
expect(SourceTagSchema.safeParse({ _source: 'local' }).success).toBe(true);
|
||||||
|
expect(SourceTagSchema.safeParse({ _source: 'mosaic.uscllc.com' }).success).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('rejects empty _source string', () => {
|
||||||
|
expect(SourceTagSchema.safeParse({ _source: '' }).success).toBe(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('tagWithSource', () => {
|
||||||
|
it('stamps each item with the given source', () => {
|
||||||
|
const items = [{ id: '1' }, { id: '2' }];
|
||||||
|
const tagged = tagWithSource(items, SOURCE_LOCAL);
|
||||||
|
expect(tagged).toEqual([
|
||||||
|
{ id: '1', _source: 'local' },
|
||||||
|
{ id: '2', _source: 'local' },
|
||||||
|
]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('preserves original item fields', () => {
|
||||||
|
const items = [{ id: 'x', name: 'Task', done: false }];
|
||||||
|
const tagged = tagWithSource(items, 'mosaic.uscllc.com');
|
||||||
|
expect(tagged[0]).toMatchObject({ id: 'x', name: 'Task', done: false });
|
||||||
|
expect(tagged[0]?._source).toBe('mosaic.uscllc.com');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns empty array for empty input', () => {
|
||||||
|
expect(tagWithSource([], 'local')).toEqual([]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('round-trip: tagWithSource output passes SourceTagSchema', () => {
|
||||||
|
const tagged = tagWithSource([{ id: '1' }], 'local');
|
||||||
|
expect(SourceTagSchema.safeParse(tagged[0]).success).toBe(true);
|
||||||
|
});
|
||||||
|
});
|
||||||
164
packages/types/src/federation/error.ts
Normal file
164
packages/types/src/federation/error.ts
Normal file
@@ -0,0 +1,164 @@
|
|||||||
|
/**
|
||||||
|
* Federation wire-format error envelope and exception hierarchy.
|
||||||
|
*
|
||||||
|
* Source of truth: docs/federation/PRD.md §6, §8.
|
||||||
|
*
|
||||||
|
* DESIGN: Typed error classes rather than discriminated union values
|
||||||
|
* ──────────────────────────────────────────────────────────────────
|
||||||
|
* We expose:
|
||||||
|
* 1. `FEDERATION_ERROR_CODES` — closed string-enum tuple (exhaustiveness-checkable).
|
||||||
|
* 2. `FederationErrorCode` — union type inferred from the tuple.
|
||||||
|
* 3. `FederationErrorEnvelopeSchema` — Zod schema for the wire format.
|
||||||
|
* 4. `FederationError` — base Error subclass with a typed `code` property.
|
||||||
|
* One concrete subclass per code (e.g. `FederationUnauthorizedError`),
|
||||||
|
* which enables `instanceof` dispatch in handlers without a switch.
|
||||||
|
*
|
||||||
|
* Rationale: subclasses give gateway handlers and the client a clean dispatch
|
||||||
|
* point (catch + instanceof) without re-parsing or switch tables. All classes
|
||||||
|
* carry `code` so a generic logger can act on any FederationError uniformly.
|
||||||
|
*
|
||||||
|
* Pure — no NestJS, no DB, no Node-only APIs. Safe for browser/edge.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { z } from 'zod';
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Error code enum (closed)
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
export const FEDERATION_ERROR_CODES = [
|
||||||
|
'unauthorized',
|
||||||
|
'forbidden',
|
||||||
|
'not_found',
|
||||||
|
'rate_limited',
|
||||||
|
'scope_violation',
|
||||||
|
'invalid_request',
|
||||||
|
'internal_error',
|
||||||
|
] as const;
|
||||||
|
|
||||||
|
export type FederationErrorCode = (typeof FEDERATION_ERROR_CODES)[number];
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Wire-format schema
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
export const FederationErrorEnvelopeSchema = z.object({
|
||||||
|
error: z.object({
|
||||||
|
code: z.enum(FEDERATION_ERROR_CODES),
|
||||||
|
message: z.string(),
|
||||||
|
details: z.unknown().optional(),
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
export type FederationErrorEnvelope = z.infer<typeof FederationErrorEnvelopeSchema>;
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Exception class hierarchy
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Base class for all federation errors.
|
||||||
|
* Carries a typed `code` so handlers can act uniformly on any FederationError.
|
||||||
|
*/
|
||||||
|
export class FederationError extends Error {
|
||||||
|
readonly code: FederationErrorCode;
|
||||||
|
readonly details?: unknown;
|
||||||
|
|
||||||
|
constructor(code: FederationErrorCode, message: string, details?: unknown) {
|
||||||
|
super(message);
|
||||||
|
this.name = 'FederationError';
|
||||||
|
this.code = code;
|
||||||
|
this.details = details;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Serialise to the wire-format error envelope. */
|
||||||
|
toEnvelope(): FederationErrorEnvelope {
|
||||||
|
return {
|
||||||
|
error: {
|
||||||
|
code: this.code,
|
||||||
|
message: this.message,
|
||||||
|
...(this.details !== undefined ? { details: this.details } : {}),
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Client cert is missing, invalid, or signed by an untrusted CA. */
|
||||||
|
export class FederationUnauthorizedError extends FederationError {
|
||||||
|
constructor(message = 'Unauthorized', details?: unknown) {
|
||||||
|
super('unauthorized', message, details);
|
||||||
|
this.name = 'FederationUnauthorizedError';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Grant is inactive, revoked, or the subject user lacks access to the resource. */
|
||||||
|
export class FederationForbiddenError extends FederationError {
|
||||||
|
constructor(message = 'Forbidden', details?: unknown) {
|
||||||
|
super('forbidden', message, details);
|
||||||
|
this.name = 'FederationForbiddenError';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Requested resource does not exist. */
|
||||||
|
export class FederationNotFoundError extends FederationError {
|
||||||
|
constructor(message = 'Not found', details?: unknown) {
|
||||||
|
super('not_found', message, details);
|
||||||
|
this.name = 'FederationNotFoundError';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Grant has exceeded its rate limit; Retry-After should accompany this. */
|
||||||
|
export class FederationRateLimitedError extends FederationError {
|
||||||
|
constructor(message = 'Rate limit exceeded', details?: unknown) {
|
||||||
|
super('rate_limited', message, details);
|
||||||
|
this.name = 'FederationRateLimitedError';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The request targets a resource or performs an action that the grant's
|
||||||
|
* scope explicitly disallows (distinct from generic 403 — scope_violation
|
||||||
|
* means the scope configuration itself blocked the request).
|
||||||
|
*/
|
||||||
|
export class FederationScopeViolationError extends FederationError {
|
||||||
|
constructor(message = 'Scope violation', details?: unknown) {
|
||||||
|
super('scope_violation', message, details);
|
||||||
|
this.name = 'FederationScopeViolationError';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Malformed request — missing fields, invalid cursor, unknown verb, etc. */
|
||||||
|
export class FederationInvalidRequestError extends FederationError {
|
||||||
|
constructor(message = 'Invalid request', details?: unknown) {
|
||||||
|
super('invalid_request', message, details);
|
||||||
|
this.name = 'FederationInvalidRequestError';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Unexpected server-side failure. */
|
||||||
|
export class FederationInternalError extends FederationError {
|
||||||
|
constructor(message = 'Internal error', details?: unknown) {
|
||||||
|
super('internal_error', message, details);
|
||||||
|
this.name = 'FederationInternalError';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Typed parser
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse an unknown value as a FederationErrorEnvelope.
|
||||||
|
* Throws a plain Error (not FederationError) when parsing fails — this means
|
||||||
|
* the payload wasn't even a valid error envelope.
|
||||||
|
*/
|
||||||
|
export function parseFederationErrorEnvelope(input: unknown): FederationErrorEnvelope {
|
||||||
|
const result = FederationErrorEnvelopeSchema.safeParse(input);
|
||||||
|
if (!result.success) {
|
||||||
|
const issues = result.error.issues
|
||||||
|
.map((e) => ` - [${e.path.join('.') || 'root'}] ${e.message}`)
|
||||||
|
.join('\n');
|
||||||
|
throw new Error(`Invalid federation error envelope:\n${issues}`);
|
||||||
|
}
|
||||||
|
return result.data;
|
||||||
|
}
|
||||||
16
packages/types/src/federation/index.ts
Normal file
16
packages/types/src/federation/index.ts
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
/**
|
||||||
|
* Federation wire-format DTOs — public barrel.
|
||||||
|
*
|
||||||
|
* Exports everything downstream M3 tasks need:
|
||||||
|
* verbs.ts — FEDERATION_VERBS constant + FederationVerb type
|
||||||
|
* request.ts — FederationRequestSchema + FederationRequest
|
||||||
|
* response.ts — list/get/capabilities schema factories + types
|
||||||
|
* source-tag.ts — SourceTagSchema, tagWithSource helper
|
||||||
|
* error.ts — error envelope schema + typed exception hierarchy
|
||||||
|
*/
|
||||||
|
|
||||||
|
export * from './verbs.js';
|
||||||
|
export * from './request.js';
|
||||||
|
export * from './response.js';
|
||||||
|
export * from './source-tag.js';
|
||||||
|
export * from './error.js';
|
||||||
47
packages/types/src/federation/request.ts
Normal file
47
packages/types/src/federation/request.ts
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
/**
|
||||||
|
* Federation wire-format request schema.
|
||||||
|
*
|
||||||
|
* Source of truth: docs/federation/PRD.md §9 (query model).
|
||||||
|
*
|
||||||
|
* Pure — no NestJS, no DB, no Node-only APIs. Safe for browser/edge.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { z } from 'zod';
|
||||||
|
import { FEDERATION_VERBS } from './verbs.js';
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Query params — free-form key/value pairs passed alongside the request
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
const QueryParamsSchema = z.record(z.string(), z.string()).optional();
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Top-level request schema
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
export const FederationRequestSchema = z.object({
|
||||||
|
/**
|
||||||
|
* Verb being invoked. One of the M3 federation verbs.
|
||||||
|
*/
|
||||||
|
verb: z.enum(FEDERATION_VERBS),
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Resource path being queried, e.g. "tasks", "notes", "memory".
|
||||||
|
* Forward-slash-separated for sub-resources (e.g. "teams/abc/tasks").
|
||||||
|
*/
|
||||||
|
resource: z.string().min(1, { message: 'resource must not be empty' }),
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Optional free-form query params (filters, sort, etc.).
|
||||||
|
* Values are always strings; consumers parse as needed.
|
||||||
|
*/
|
||||||
|
params: QueryParamsSchema,
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Opaque pagination cursor returned by a previous list response.
|
||||||
|
* Absent on first page.
|
||||||
|
*/
|
||||||
|
cursor: z.string().optional(),
|
||||||
|
});
|
||||||
|
|
||||||
|
export type FederationRequest = z.infer<typeof FederationRequestSchema>;
|
||||||
162
packages/types/src/federation/response.ts
Normal file
162
packages/types/src/federation/response.ts
Normal file
@@ -0,0 +1,162 @@
|
|||||||
|
/**
|
||||||
|
* Federation wire-format response schemas.
|
||||||
|
*
|
||||||
|
* Source of truth: docs/federation/PRD.md §9 and MILESTONES.md §M3.
|
||||||
|
*
|
||||||
|
* DESIGN: Generic factory functions rather than z.lazy
|
||||||
|
* ─────────────────────────────────────────────────────
|
||||||
|
* Zod generic schemas cannot be expressed as a single re-usable `z.ZodType`
|
||||||
|
* value because TypeScript's type system erases the generic at the call site.
|
||||||
|
* The idiomatic Zod v4 pattern is factory functions that take an item schema
|
||||||
|
* and return a fully-typed schema.
|
||||||
|
*
|
||||||
|
* const MyListSchema = FederationListResponseSchema(z.string());
|
||||||
|
* type MyList = z.infer<typeof MyListSchema>;
|
||||||
|
* // => { items: string[]; nextCursor?: string; _partial?: boolean; _truncated?: boolean }
|
||||||
|
*
|
||||||
|
* Downstream consumers (M3-03..M3-07, M3-08, M3-09) should call these
|
||||||
|
* factories once per resource type and cache the result.
|
||||||
|
*
|
||||||
|
* Pure — no NestJS, no DB, no Node-only APIs. Safe for browser/edge.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { z } from 'zod';
|
||||||
|
|
||||||
|
import { FEDERATION_VERBS } from './verbs.js';
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Shared envelope flags
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* `_partial`: true when the response is a subset of available data (e.g. due
|
||||||
|
* to scope intersection reducing the result set).
|
||||||
|
*/
|
||||||
|
const PartialFlag = z.boolean().optional();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* `_truncated`: true when the response was capped by max_rows_per_query and
|
||||||
|
* additional pages exist beyond the current cursor.
|
||||||
|
*/
|
||||||
|
const TruncatedFlag = z.boolean().optional();
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// FederationListResponseSchema<T> factory
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a Zod schema for a paginated federation list envelope.
|
||||||
|
*
|
||||||
|
* @param itemSchema - Zod schema for a single item in the list.
|
||||||
|
*
|
||||||
|
* @example
|
||||||
|
* ```ts
|
||||||
|
* const TaskListSchema = FederationListResponseSchema(TaskSchema);
|
||||||
|
* type TaskList = z.infer<typeof TaskListSchema>;
|
||||||
|
* ```
|
||||||
|
*/
|
||||||
|
export function FederationListResponseSchema<T extends z.ZodTypeAny>(itemSchema: T) {
|
||||||
|
return z.object({
|
||||||
|
items: z.array(itemSchema),
|
||||||
|
nextCursor: z.string().optional(),
|
||||||
|
_partial: PartialFlag,
|
||||||
|
_truncated: TruncatedFlag,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
export type FederationListResponse<T> = {
|
||||||
|
items: T[];
|
||||||
|
nextCursor?: string;
|
||||||
|
_partial?: boolean;
|
||||||
|
_truncated?: boolean;
|
||||||
|
};
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// FederationGetResponseSchema<T> factory
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a Zod schema for a single-item federation get envelope.
|
||||||
|
*
|
||||||
|
* `item` is null when the resource was not found (404 equivalent on the wire).
|
||||||
|
*
|
||||||
|
* @param itemSchema - Zod schema for the item (nullable is applied internally).
|
||||||
|
*
|
||||||
|
* @example
|
||||||
|
* ```ts
|
||||||
|
* const TaskGetSchema = FederationGetResponseSchema(TaskSchema);
|
||||||
|
* type TaskGet = z.infer<typeof TaskGetSchema>;
|
||||||
|
* ```
|
||||||
|
*/
|
||||||
|
export function FederationGetResponseSchema<T extends z.ZodTypeAny>(itemSchema: T) {
|
||||||
|
return z.object({
|
||||||
|
item: itemSchema.nullable(),
|
||||||
|
_partial: PartialFlag,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
export type FederationGetResponse<T> = {
|
||||||
|
item: T | null;
|
||||||
|
_partial?: boolean;
|
||||||
|
};
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// FederationCapabilitiesResponseSchema (fixed shape)
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Shape mirrors FederationScope (apps/gateway/src/federation/scope-schema.ts)
|
||||||
|
* but is kept separate to avoid coupling packages/types to the gateway module.
|
||||||
|
* The serving side populates this from the resolved grant scope at request time.
|
||||||
|
*/
|
||||||
|
export const FederationCapabilitiesResponseSchema = z.object({
|
||||||
|
/**
|
||||||
|
* Resources this grant is allowed to query.
|
||||||
|
*/
|
||||||
|
resources: z.array(z.string()).nonempty(),
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Resources explicitly blocked for this grant even if they exist.
|
||||||
|
*/
|
||||||
|
excluded_resources: z.array(z.string()),
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Per-resource filters (mirrors FederationScope.filters from PRD §8.1).
|
||||||
|
* Keys are resource names; values control team/personal visibility.
|
||||||
|
*/
|
||||||
|
filters: z
|
||||||
|
.record(
|
||||||
|
z.string(),
|
||||||
|
z.object({
|
||||||
|
include_teams: z.array(z.string()).optional(),
|
||||||
|
include_personal: z.boolean().optional(),
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
.optional(),
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Hard cap on rows returned per query for this grant.
|
||||||
|
*/
|
||||||
|
max_rows_per_query: z.number().int().positive(),
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Verbs currently available. Will expand in M4+ (search).
|
||||||
|
* Closed enum — only values from FEDERATION_VERBS are accepted.
|
||||||
|
*/
|
||||||
|
supported_verbs: z.array(z.enum(FEDERATION_VERBS)).nonempty(),
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Rate-limit state for this grant (PRD §9.1).
|
||||||
|
* M4 populates `remaining` and `reset_at`; M3 servers may return only
|
||||||
|
* `limit_per_minute` or omit the field entirely.
|
||||||
|
*/
|
||||||
|
rate_limit: z
|
||||||
|
.object({
|
||||||
|
limit_per_minute: z.number().int().positive(),
|
||||||
|
remaining: z.number().int().nonnegative().optional(),
|
||||||
|
reset_at: z.string().datetime().optional(),
|
||||||
|
})
|
||||||
|
.optional(),
|
||||||
|
});
|
||||||
|
|
||||||
|
export type FederationCapabilitiesResponse = z.infer<typeof FederationCapabilitiesResponseSchema>;
|
||||||
61
packages/types/src/federation/source-tag.ts
Normal file
61
packages/types/src/federation/source-tag.ts
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
/**
|
||||||
|
* _source tag for federation fan-out results.
|
||||||
|
*
|
||||||
|
* Source of truth: docs/federation/PRD.md §9.3 and MILESTONES.md §M3 acceptance test #8.
|
||||||
|
*
|
||||||
|
* When source: "all" is requested, the gateway fans out to local + all active
|
||||||
|
* federated peers, merges results, and tags each item with _source so the
|
||||||
|
* caller knows the provenance.
|
||||||
|
*
|
||||||
|
* Pure — no NestJS, no DB, no Node-only APIs. Safe for browser/edge.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { z } from 'zod';
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Source tag schema
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* `_source` is either:
|
||||||
|
* - `"local"` — the item came from this gateway's own storage.
|
||||||
|
* - a peer common name (e.g. `"mosaic.uscllc.com"`) — the item came from
|
||||||
|
* that federated peer.
|
||||||
|
*/
|
||||||
|
export const SourceTagSchema = z.object({
|
||||||
|
_source: z.string().min(1, { message: '_source must not be empty' }),
|
||||||
|
});
|
||||||
|
|
||||||
|
export type SourceTag = z.infer<typeof SourceTagSchema>;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Literal union for the well-known local source value.
|
||||||
|
* Peers are identified by hostname strings, so there is no closed enum.
|
||||||
|
*/
|
||||||
|
export const SOURCE_LOCAL = 'local' as const;
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Helper: tagWithSource
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stamps each item in `items` with `{ _source: source }`.
|
||||||
|
*
|
||||||
|
* The return type merges the item type with SourceTag so callers get full
|
||||||
|
* type-safety on both the original fields and `_source`.
|
||||||
|
*
|
||||||
|
* @param items - Array of items to tag.
|
||||||
|
* @param source - Either `"local"` or a peer hostname (common name from the
|
||||||
|
* client cert's CN or O field).
|
||||||
|
*
|
||||||
|
* @example
|
||||||
|
* ```ts
|
||||||
|
* const local = tagWithSource([{ id: '1', title: 'Task' }], 'local');
|
||||||
|
* // => [{ id: '1', title: 'Task', _source: 'local' }]
|
||||||
|
*
|
||||||
|
* const remote = tagWithSource(peerItems, 'mosaic.uscllc.com');
|
||||||
|
* ```
|
||||||
|
*/
|
||||||
|
export function tagWithSource<T extends object>(items: T[], source: string): Array<T & SourceTag> {
|
||||||
|
return items.map((item) => ({ ...item, _source: source }));
|
||||||
|
}
|
||||||
11
packages/types/src/federation/verbs.ts
Normal file
11
packages/types/src/federation/verbs.ts
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
/**
|
||||||
|
* Federation verb constants and types.
|
||||||
|
*
|
||||||
|
* Source of truth: docs/federation/PRD.md §9.1
|
||||||
|
*
|
||||||
|
* M3 ships list, get, capabilities. search lives in M4.
|
||||||
|
*/
|
||||||
|
|
||||||
|
export const FEDERATION_VERBS = ['list', 'get', 'capabilities'] as const;
|
||||||
|
|
||||||
|
export type FederationVerb = (typeof FEDERATION_VERBS)[number];
|
||||||
@@ -5,3 +5,4 @@ export * from './agent/index.js';
|
|||||||
export * from './provider/index.js';
|
export * from './provider/index.js';
|
||||||
export * from './routing/index.js';
|
export * from './routing/index.js';
|
||||||
export * from './commands/index.js';
|
export * from './commands/index.js';
|
||||||
|
export * from './federation/index.js';
|
||||||
|
|||||||
16
pnpm-lock.yaml
generated
16
pnpm-lock.yaml
generated
@@ -179,6 +179,9 @@ importers:
|
|||||||
socket.io:
|
socket.io:
|
||||||
specifier: ^4.8.0
|
specifier: ^4.8.0
|
||||||
version: 4.8.3
|
version: 4.8.3
|
||||||
|
undici:
|
||||||
|
specifier: ^7.24.6
|
||||||
|
version: 7.24.6
|
||||||
uuid:
|
uuid:
|
||||||
specifier: ^11.0.0
|
specifier: ^11.0.0
|
||||||
version: 11.1.0
|
version: 11.1.0
|
||||||
@@ -679,6 +682,9 @@ importers:
|
|||||||
class-validator:
|
class-validator:
|
||||||
specifier: ^0.15.1
|
specifier: ^0.15.1
|
||||||
version: 0.15.1
|
version: 0.15.1
|
||||||
|
zod:
|
||||||
|
specifier: ^4.3.6
|
||||||
|
version: 4.3.6
|
||||||
devDependencies:
|
devDependencies:
|
||||||
typescript:
|
typescript:
|
||||||
specifier: ^5.8.0
|
specifier: ^5.8.0
|
||||||
@@ -6990,10 +6996,6 @@ packages:
|
|||||||
resolution: {integrity: sha512-gBLkYIlEnSp8pFbT64yFgGE6UIB9tAkhukC23PmMDCe5Nd+cRqKxSjw5y54MK2AZMgZfJWMaNE4nYUHgi1XEOw==}
|
resolution: {integrity: sha512-gBLkYIlEnSp8pFbT64yFgGE6UIB9tAkhukC23PmMDCe5Nd+cRqKxSjw5y54MK2AZMgZfJWMaNE4nYUHgi1XEOw==}
|
||||||
engines: {node: '>=18.17'}
|
engines: {node: '>=18.17'}
|
||||||
|
|
||||||
undici@7.24.3:
|
|
||||||
resolution: {integrity: sha512-eJdUmK/Wrx2d+mnWWmwwLRyA7OQCkLap60sk3dOK4ViZR7DKwwptwuIvFBg2HaiP9ESaEdhtpSymQPvytpmkCA==}
|
|
||||||
engines: {node: '>=20.18.1'}
|
|
||||||
|
|
||||||
undici@7.24.6:
|
undici@7.24.6:
|
||||||
resolution: {integrity: sha512-Xi4agocCbRzt0yYMZGMA6ApD7gvtUFaxm4ZmeacWI4cZxaF6C+8I8QfofC20NAePiB/IcvZmzkJ7XPa471AEtA==}
|
resolution: {integrity: sha512-Xi4agocCbRzt0yYMZGMA6ApD7gvtUFaxm4ZmeacWI4cZxaF6C+8I8QfofC20NAePiB/IcvZmzkJ7XPa471AEtA==}
|
||||||
engines: {node: '>=20.18.1'}
|
engines: {node: '>=20.18.1'}
|
||||||
@@ -8728,7 +8730,7 @@ snapshots:
|
|||||||
openai: 6.26.0(ws@8.20.0)(zod@4.3.6)
|
openai: 6.26.0(ws@8.20.0)(zod@4.3.6)
|
||||||
partial-json: 0.1.7
|
partial-json: 0.1.7
|
||||||
proxy-agent: 6.5.0
|
proxy-agent: 6.5.0
|
||||||
undici: 7.24.3
|
undici: 7.24.6
|
||||||
zod-to-json-schema: 3.25.1(zod@4.3.6)
|
zod-to-json-schema: 3.25.1(zod@4.3.6)
|
||||||
transitivePeerDependencies:
|
transitivePeerDependencies:
|
||||||
- '@modelcontextprotocol/sdk'
|
- '@modelcontextprotocol/sdk'
|
||||||
@@ -12587,7 +12589,7 @@ snapshots:
|
|||||||
saxes: 6.0.0
|
saxes: 6.0.0
|
||||||
symbol-tree: 3.2.4
|
symbol-tree: 3.2.4
|
||||||
tough-cookie: 6.0.1
|
tough-cookie: 6.0.1
|
||||||
undici: 7.24.3
|
undici: 7.24.6
|
||||||
w3c-xmlserializer: 5.0.0
|
w3c-xmlserializer: 5.0.0
|
||||||
webidl-conversions: 8.0.1
|
webidl-conversions: 8.0.1
|
||||||
whatwg-mimetype: 5.0.0
|
whatwg-mimetype: 5.0.0
|
||||||
@@ -14438,8 +14440,6 @@ snapshots:
|
|||||||
|
|
||||||
undici@6.21.3: {}
|
undici@6.21.3: {}
|
||||||
|
|
||||||
undici@7.24.3: {}
|
|
||||||
|
|
||||||
undici@7.24.6: {}
|
undici@7.24.6: {}
|
||||||
|
|
||||||
unhomoglyph@1.0.6: {}
|
unhomoglyph@1.0.6: {}
|
||||||
|
|||||||
125
scratchpads/fix-db-bootstrap-migrations.md
Normal file
125
scratchpads/fix-db-bootstrap-migrations.md
Normal file
@@ -0,0 +1,125 @@
|
|||||||
|
# fix(db): bootstrap migrations on local-tier gateway startup
|
||||||
|
|
||||||
|
## Problem
|
||||||
|
|
||||||
|
Fresh `mosaic gateway install` (npm-installed) leaves the gateway DB schema empty:
|
||||||
|
|
||||||
|
```
|
||||||
|
relation "users" does not exist
|
||||||
|
```
|
||||||
|
|
||||||
|
Sign-in 500s, `auth users create` says "Not signed in", `admin/bootstrap setup`
|
||||||
|
also fails — every entry point queries `users` before doing anything else.
|
||||||
|
|
||||||
|
## Scope
|
||||||
|
|
||||||
|
This PR fixes the **local (PGlite) tier** end-to-end. The postgres-tier path
|
||||||
|
has additional pre-existing bugs (see "Known issues, out of scope" below) and
|
||||||
|
needs a separate change with real Postgres validation.
|
||||||
|
|
||||||
|
## Root causes addressed (5 stacked bugs on the local-tier path)
|
||||||
|
|
||||||
|
1. **`packages/db/package.json` `files: ["dist"]`** — the `drizzle/` SQL
|
||||||
|
migrations folder is excluded from the published tarball. Even if a
|
||||||
|
migrate runner existed, it would have nothing to apply.
|
||||||
|
|
||||||
|
2. **`packages/db/src/migrate.ts`** only supports `postgres-js`. Local-tier
|
||||||
|
gateways use embedded PGlite, which can't be reached over a postgres wire
|
||||||
|
protocol — so `runMigrations()` is unusable for the local tier.
|
||||||
|
|
||||||
|
3. **`apps/gateway/src/database/database.module.ts`** never invokes
|
||||||
|
migrations at startup. The module creates the DB handle and storage
|
||||||
|
adapter, but no consumer calls `.migrate()` on either. `mosaic storage
|
||||||
|
migrate` CLI even claims "pglite runs schema setup automatically on first
|
||||||
|
connection via `adapter.migrate()`" — but `adapter.migrate()` is only
|
||||||
|
called by tests, never at runtime.
|
||||||
|
|
||||||
|
4. **`createPgliteDb` does not load the pgvector extension.** Migration 0001
|
||||||
|
declares `CREATE EXTENSION IF NOT EXISTS vector;` for the
|
||||||
|
`insights.embedding` column. Bare PGlite has no pgvector — the migration
|
||||||
|
fails on extension control file lookup.
|
||||||
|
|
||||||
|
5. **Drizzle's PG migrator wraps every migration in one outer transaction.**
|
||||||
|
Migration 0009 does `ALTER TYPE grant_status ADD VALUE 'pending'` and then
|
||||||
|
`ALTER TABLE federation_grants ALTER COLUMN status SET DEFAULT 'pending'`.
|
||||||
|
Postgres' `check_safe_enum_use` rejects the second statement because the
|
||||||
|
new enum value isn't committed yet. Splitting the migration into two
|
||||||
|
files doesn't help — drizzle batches all migrations into one outer tx.
|
||||||
|
|
||||||
|
## Fix
|
||||||
|
|
||||||
|
- `packages/db/package.json` — ship `drizzle/` in `files`.
|
||||||
|
- `packages/db/src/client-pglite.ts` — load `@electric-sql/pglite/vector`.
|
||||||
|
- `packages/db/src/migrate.ts` — add `runPgliteMigrations(handle)`. Walks the
|
||||||
|
Drizzle journal and runs each statement-breakpoint chunk through PGlite's
|
||||||
|
`client.exec()` (Simple Query protocol → autocommit per statement). Writes
|
||||||
|
to the standard `drizzle.__drizzle_migrations` ledger so the result is
|
||||||
|
interoperable with `runMigrations()` on a postgres-backed deployment.
|
||||||
|
Per-statement try/catch surfaces which statement of which migration failed
|
||||||
|
and the ledger row is only written on full success.
|
||||||
|
- `packages/db/src/index.ts` — re-export.
|
||||||
|
- `apps/gateway/src/database/database.module.ts` — implement `OnModuleInit`:
|
||||||
|
- Local tier → `runPgliteMigrations(handle)`, then `storageAdapter.migrate()`
|
||||||
|
(the local storage adapter has its own kv tables in a separate PGlite dir).
|
||||||
|
- Postgres tier → `storageAdapter.migrate()` only, since
|
||||||
|
`PostgresAdapter.migrate()` already calls `runMigrations(url)` against
|
||||||
|
the same DATABASE_URL — we deliberately don't double-call.
|
||||||
|
|
||||||
|
NestJS awaits `onModuleInit` before `app.listen()`, so DB-dependent modules
|
||||||
|
see a populated schema before any HTTP traffic is accepted.
|
||||||
|
|
||||||
|
- `packages/storage/src/test-utils/pglite-with-vector.ts` — **deleted**.
|
||||||
|
The "intentionally not exported" rationale is moot now that migration 0001
|
||||||
|
forces pgvector load anyway. `migrate-tier.integration.test.ts` switched
|
||||||
|
to `createPgliteDb` + `runPgliteMigrations` from `@mosaicstack/db`.
|
||||||
|
|
||||||
|
## Tests
|
||||||
|
|
||||||
|
`packages/db/src/migrate.test.ts`:
|
||||||
|
|
||||||
|
- Verifies `runPgliteMigrations` creates the BetterAuth tables (the original
|
||||||
|
failure mode).
|
||||||
|
- Idempotence (transitively re-runs migration 0009).
|
||||||
|
- Partial-failure: pre-creates a conflicting `users` table, asserts the
|
||||||
|
thrown error includes statement context (`hash=… statement #N failed`)
|
||||||
|
and that no ledger row was written.
|
||||||
|
|
||||||
|
## QA evidence
|
||||||
|
|
||||||
|
End-to-end on a fresh PGlite install:
|
||||||
|
|
||||||
|
- `[DatabaseModule] Applying PGlite schema migrations...` then
|
||||||
|
`Initializing storage adapter (pglite)...` in startup log.
|
||||||
|
- `GET /api/bootstrap/status` → `{"needsSetup":true}` HTTP 200 (was 500
|
||||||
|
with `relation "users" does not exist`).
|
||||||
|
- `POST /api/bootstrap/setup` with empty body → HTTP 400 with Zod
|
||||||
|
validation error (was 500), confirming the request reached the
|
||||||
|
validator past the table-existence check.
|
||||||
|
|
||||||
|
## Known issues, out of scope (file separately)
|
||||||
|
|
||||||
|
- **Postgres-tier first install is still broken.** `runMigrations()` uses
|
||||||
|
Drizzle's `migratePostgres`, which has the same outer-transaction problem
|
||||||
|
as PGlite's migrator. A fresh standalone-tier install would also fail at
|
||||||
|
migration 0009. Inline TODO in `migrate.ts:31-35` flags this. Fixing it
|
||||||
|
needs either (a) a shared per-statement loop reused for both drivers, or
|
||||||
|
(b) splitting migration 0009.
|
||||||
|
- **`drizzle/meta/_journal.json` has 0009 ordered before 0008** (`when`
|
||||||
|
values `1745280000000` < `1776822435828`). `migratePostgres` skips by
|
||||||
|
`created_at < folderMillis`, so on a postgres deployment that already
|
||||||
|
applied 0008, 0009 would be skipped forever. Our hash-based skip in the
|
||||||
|
PGlite path sidesteps this.
|
||||||
|
- **No advisory lock around the migration loop.** Two gateway processes
|
||||||
|
pointed at the same DATABASE_URL would race. PGlite is single-process by
|
||||||
|
file lock so the local tier is fine; postgres-tier deployments should add
|
||||||
|
`pg_advisory_lock(<deterministic-id>)` around the loop in a follow-up.
|
||||||
|
- **`mosaic storage migrate` CLI message is misleading** — it claims
|
||||||
|
"automatic on first connection via adapter.migrate()" but the adapter
|
||||||
|
doesn't self-migrate. With this PR the gateway invokes it explicitly, but
|
||||||
|
the CLI message could still be tightened.
|
||||||
|
- **Crash mid-migration leaves a partial-state PGlite DB without a ledger
|
||||||
|
row.** Detected loudly on next boot (the replay errors on "already
|
||||||
|
exists"), but recovery is manual (drop the partially-applied objects or
|
||||||
|
insert the migration hash into `drizzle.__drizzle_migrations`). A robust
|
||||||
|
fix would add a "started_at" column to a sidecar table to detect
|
||||||
|
half-applied state and refuse to start with actionable guidance.
|
||||||
254
tools/federation-harness/README.md
Normal file
254
tools/federation-harness/README.md
Normal file
@@ -0,0 +1,254 @@
|
|||||||
|
# Federation Test Harness
|
||||||
|
|
||||||
|
Local two-gateway federation test infrastructure for Mosaic Stack M3+.
|
||||||
|
|
||||||
|
This harness boots two real gateway instances (`gateway-a`, `gateway-b`) on a
|
||||||
|
shared Docker bridge network, each backed by its own Postgres (pgvector) +
|
||||||
|
Valkey, sharing a single Step-CA. It is the test bed for all M3+ federation
|
||||||
|
E2E tests.
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
- Docker with Compose v2 (`docker compose version` ≥ 2.20)
|
||||||
|
- pnpm (for running via repo scripts)
|
||||||
|
- `infra/step-ca/dev-password` must exist (copy from `infra/step-ca/dev-password.example`)
|
||||||
|
|
||||||
|
## Network Topology
|
||||||
|
|
||||||
|
```
|
||||||
|
Host machine
|
||||||
|
├── localhost:14001 → gateway-a (Server A — home / requesting)
|
||||||
|
├── localhost:14002 → gateway-b (Server B — work / serving)
|
||||||
|
├── localhost:15432 → postgres-a
|
||||||
|
├── localhost:15433 → postgres-b
|
||||||
|
├── localhost:16379 → valkey-a
|
||||||
|
├── localhost:16380 → valkey-b
|
||||||
|
└── localhost:19000 → step-ca (shared CA)
|
||||||
|
|
||||||
|
Docker network: fed-test-net (bridge)
|
||||||
|
gateway-a ←──── mTLS ────→ gateway-b
|
||||||
|
↘ ↗
|
||||||
|
step-ca
|
||||||
|
```
|
||||||
|
|
||||||
|
Ports are chosen to avoid collision with the base dev stack (5433, 6380, 14242, 9000).
|
||||||
|
|
||||||
|
## Starting the Harness
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# From repo root
|
||||||
|
docker compose -f tools/federation-harness/docker-compose.two-gateways.yml up -d
|
||||||
|
|
||||||
|
# Wait for all services to be healthy (~60-90s on first boot due to NestJS cold start)
|
||||||
|
docker compose -f tools/federation-harness/docker-compose.two-gateways.yml ps
|
||||||
|
```
|
||||||
|
|
||||||
|
## Seeding Test Data
|
||||||
|
|
||||||
|
The seed script provisions three grant scope variants (A, B, C) and walks the
|
||||||
|
full enrollment flow so Server A ends up with active peers pointing at Server B.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Assumes stack is already running
|
||||||
|
pnpm tsx tools/federation-harness/seed.ts
|
||||||
|
|
||||||
|
# Or boot + seed in one step
|
||||||
|
pnpm tsx tools/federation-harness/seed.ts --boot
|
||||||
|
```
|
||||||
|
|
||||||
|
### Scope Variants
|
||||||
|
|
||||||
|
| Variant | Resources | Filters | Excluded | Purpose |
|
||||||
|
| ------- | ------------------ | ---------------------------------- | ----------- | ------------------------------- |
|
||||||
|
| A | tasks, notes | include_personal: true | (none) | Personal data federation |
|
||||||
|
| B | tasks | include_teams: ['T1'], no personal | (none) | Team-scoped, no personal |
|
||||||
|
| C | tasks, credentials | include_personal: true | credentials | Sanity: excluded wins over list |
|
||||||
|
|
||||||
|
## Using from Vitest
|
||||||
|
|
||||||
|
```ts
|
||||||
|
import {
|
||||||
|
bootHarness,
|
||||||
|
tearDownHarness,
|
||||||
|
serverA,
|
||||||
|
serverB,
|
||||||
|
seed,
|
||||||
|
} from '../../tools/federation-harness/harness.js';
|
||||||
|
import type { HarnessHandle } from '../../tools/federation-harness/harness.js';
|
||||||
|
|
||||||
|
let handle: HarnessHandle;
|
||||||
|
|
||||||
|
beforeAll(async () => {
|
||||||
|
handle = await bootHarness();
|
||||||
|
}, 180_000); // allow 3 min for Docker pull + NestJS cold start
|
||||||
|
|
||||||
|
afterAll(async () => {
|
||||||
|
await tearDownHarness(handle);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('variant A: list tasks returns personal tasks', async () => {
|
||||||
|
// NOTE: Only 'all' is supported for now — per-variant narrowing is M3-11.
|
||||||
|
const seedResult = await seed(handle, 'all');
|
||||||
|
const a = serverA(handle);
|
||||||
|
|
||||||
|
const res = await fetch(`${a.baseUrl}/api/federation/tasks`, {
|
||||||
|
headers: { 'x-federation-grant': seedResult.grants.variantA.id },
|
||||||
|
});
|
||||||
|
expect(res.status).toBe(200);
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
> **Note:** `seed()` bootstraps a fresh admin user on each gateway via
|
||||||
|
> `POST /api/bootstrap/setup`. Both gateways must have zero users (pristine DB).
|
||||||
|
> If either gateway already has users, `seed()` throws with a clear error.
|
||||||
|
> Reset state with `docker compose down -v`.
|
||||||
|
|
||||||
|
The `bootHarness()` function is **idempotent**: if both gateways are already
|
||||||
|
healthy, it reuses the running stack and returns `ownedStack: false`. Tests
|
||||||
|
should not call `tearDownHarness` when `ownedStack` is false unless they
|
||||||
|
explicitly want to shut down a shared stack.
|
||||||
|
|
||||||
|
## Vitest Config (pnpm test:federation)
|
||||||
|
|
||||||
|
Add to `vitest.config.ts` at repo root (or a dedicated config):
|
||||||
|
|
||||||
|
```ts
|
||||||
|
// vitest.federation.config.ts
|
||||||
|
import { defineConfig } from 'vitest/config';
|
||||||
|
|
||||||
|
export default defineConfig({
|
||||||
|
test: {
|
||||||
|
include: ['**/*.federation.test.ts'],
|
||||||
|
testTimeout: 60_000,
|
||||||
|
hookTimeout: 180_000,
|
||||||
|
reporters: ['verbose'],
|
||||||
|
},
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
Then add to root `package.json`:
|
||||||
|
|
||||||
|
```json
|
||||||
|
"test:federation": "vitest run --config vitest.federation.config.ts"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Nuking State
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Remove containers AND volumes (ephemeral state — CA keys, DBs, everything)
|
||||||
|
docker compose -f tools/federation-harness/docker-compose.two-gateways.yml down -v
|
||||||
|
```
|
||||||
|
|
||||||
|
On next `up`, Step-CA re-initialises from scratch and generates new CA keys.
|
||||||
|
|
||||||
|
## Step-CA Root Certificate
|
||||||
|
|
||||||
|
The CA root lives in the `fed-harness-step-ca` Docker volume at
|
||||||
|
`/home/step/certs/root_ca.crt`. To extract it to the host:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker run --rm \
|
||||||
|
-v fed-harness-step-ca:/home/step \
|
||||||
|
alpine cat /home/step/certs/root_ca.crt > /tmp/fed-harness-root-ca.crt
|
||||||
|
```
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Port conflicts
|
||||||
|
|
||||||
|
Default host ports: 14001, 14002, 15432, 15433, 16379, 16380, 19000.
|
||||||
|
Override via environment variables before `docker compose up`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
GATEWAY_A_HOST_PORT=14101 GATEWAY_B_HOST_PORT=14102 \
|
||||||
|
docker compose -f tools/federation-harness/docker-compose.two-gateways.yml up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
### Image pull failures
|
||||||
|
|
||||||
|
The gateway image is digest-pinned to:
|
||||||
|
|
||||||
|
```
|
||||||
|
git.mosaicstack.dev/mosaicstack/stack/gateway@sha256:1069117740e00ccfeba357cae38c43f3729fe5ae702740ce474f6512414d7c02
|
||||||
|
```
|
||||||
|
|
||||||
|
(sha-9f1a081, post-#491 IMG-FIX)
|
||||||
|
|
||||||
|
If the registry is unreachable, Docker will use the locally cached image if
|
||||||
|
present. If no local image exists, the compose up will fail with a pull error.
|
||||||
|
In that case:
|
||||||
|
|
||||||
|
1. Ensure you can reach `git.mosaicstack.dev` (VPN, DNS, etc.).
|
||||||
|
2. Log in: `docker login git.mosaicstack.dev`
|
||||||
|
3. Pull manually: `docker pull git.mosaicstack.dev/mosaicstack/stack/gateway@sha256:1069117740e00ccfeba357cae38c43f3729fe5ae702740ce474f6512414d7c02`
|
||||||
|
|
||||||
|
### NestJS cold start
|
||||||
|
|
||||||
|
Gateway containers take 40–60 seconds to become healthy on first boot (Node.js
|
||||||
|
module resolution + NestJS DI bootstrap). The `start_period: 60s` in the
|
||||||
|
compose healthcheck covers this. `bootHarness()` polls for up to 3 minutes.
|
||||||
|
|
||||||
|
### Step-CA startup
|
||||||
|
|
||||||
|
Step-CA initialises on first boot (generates CA keys). This takes ~5-10s.
|
||||||
|
The `start_period: 30s` in the healthcheck covers it. Both gateways wait for
|
||||||
|
Step-CA to be healthy before starting (`depends_on: step-ca: condition: service_healthy`).
|
||||||
|
|
||||||
|
### dev-password missing
|
||||||
|
|
||||||
|
The Step-CA container requires `infra/step-ca/dev-password` to be mounted.
|
||||||
|
Copy the example and set a local password:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cp infra/step-ca/dev-password.example infra/step-ca/dev-password
|
||||||
|
# Edit the file to set your preferred dev CA password
|
||||||
|
```
|
||||||
|
|
||||||
|
The file is `.gitignore`d — do not commit it.
|
||||||
|
|
||||||
|
## Image Digest Note
|
||||||
|
|
||||||
|
The gateway image is pinned to `sha256:1069117740e00ccfeba357cae38c43f3729fe5ae702740ce474f6512414d7c02`
|
||||||
|
(sha-9f1a081). This is the digest promoted by PR #491 (IMG-FIX). The `latest`
|
||||||
|
tag is forbidden per Mosaic image policy. When a new gateway build is promoted,
|
||||||
|
update the digest in `docker-compose.two-gateways.yml` and in this file.
|
||||||
|
|
||||||
|
## Known Limitations
|
||||||
|
|
||||||
|
### BETTER_AUTH_URL enrollment URL bug (upstream production code — not yet fixed)
|
||||||
|
|
||||||
|
`apps/gateway/src/federation/federation.controller.ts:145` constructs the
|
||||||
|
enrollment URL using `process.env['BETTER_AUTH_URL'] ?? 'http://localhost:14242'`.
|
||||||
|
This is an upstream bug: `BETTER_AUTH_URL` is the Better Auth origin (typically
|
||||||
|
the web app), not the gateway's own base URL. In non-harness deployments this
|
||||||
|
produces an enrollment URL pointing to the wrong host or port.
|
||||||
|
|
||||||
|
**How the harness handles this:**
|
||||||
|
|
||||||
|
1. **In-cluster calls (container-to-container):** The compose file sets
|
||||||
|
`BETTER_AUTH_URL: 'http://gateway-b:3000'` so the enrollment URL returned by
|
||||||
|
the gateway uses the Docker internal hostname. This lets other containers in the
|
||||||
|
`fed-test-net` network resolve and reach Server B's enrollment endpoint.
|
||||||
|
|
||||||
|
2. **Host-side URL rewrite (seed script):** The `seed.ts` script runs on the host
|
||||||
|
machine where `gateway-b` is not a resolvable hostname. Before calling
|
||||||
|
`fetch(enrollmentUrl, ...)`, the seed script rewrites the URL: it extracts only
|
||||||
|
the token path segment from `enrollmentUrl` and reassembles the URL using the
|
||||||
|
host-accessible `serverBUrl` (default: `http://localhost:14002`). This lets the
|
||||||
|
seed script redeem enrollment tokens from the host without being affected by the
|
||||||
|
in-cluster hostname in the returned URL.
|
||||||
|
|
||||||
|
**TODO:** Fix `federation.controller.ts` to derive the enrollment URL from its own
|
||||||
|
listening address (e.g. `GATEWAY_BASE_URL` env var or a dedicated
|
||||||
|
`FEDERATION_ENROLLMENT_BASE_URL` env var) rather than reusing `BETTER_AUTH_URL`.
|
||||||
|
Tracked as a follow-up to PR #505 — do not bundle with harness changes.
|
||||||
|
|
||||||
|
## Permanent Infrastructure
|
||||||
|
|
||||||
|
This harness is designed to outlive M3 and be reused by M4+ milestone tests.
|
||||||
|
It is not a throwaway scaffold — treat it as production test infrastructure:
|
||||||
|
|
||||||
|
- Keep it idempotent.
|
||||||
|
- Do not hardcode test assumptions in the harness layer (put them in tests).
|
||||||
|
- Update the seed script when new scope variants are needed.
|
||||||
|
- The README and harness should be kept in sync as the federation API evolves.
|
||||||
247
tools/federation-harness/docker-compose.two-gateways.yml
Normal file
247
tools/federation-harness/docker-compose.two-gateways.yml
Normal file
@@ -0,0 +1,247 @@
|
|||||||
|
# tools/federation-harness/docker-compose.two-gateways.yml
|
||||||
|
#
|
||||||
|
# Two-gateway federation test harness — local-only, no Portainer/Swarm needed.
|
||||||
|
#
|
||||||
|
# USAGE (manual):
|
||||||
|
# docker compose -f tools/federation-harness/docker-compose.two-gateways.yml up -d
|
||||||
|
# docker compose -f tools/federation-harness/docker-compose.two-gateways.yml down -v
|
||||||
|
#
|
||||||
|
# USAGE (from harness.ts):
|
||||||
|
# const handle = await bootHarness();
|
||||||
|
# ...
|
||||||
|
# await tearDownHarness(handle);
|
||||||
|
#
|
||||||
|
# TOPOLOGY:
|
||||||
|
# gateway-a — "home" instance (Server A, the requesting side)
|
||||||
|
# └── postgres-a (pgvector/pg17, port 15432)
|
||||||
|
# └── valkey-a (port 16379)
|
||||||
|
# gateway-b — "work" instance (Server B, the serving side)
|
||||||
|
# └── postgres-b (pgvector/pg17, port 15433)
|
||||||
|
# └── valkey-b (port 16380)
|
||||||
|
# step-ca — shared CA for both gateways (port 19000)
|
||||||
|
#
|
||||||
|
# All services share the `fed-test-net` bridge network.
|
||||||
|
# Host port ranges (15432-15433, 16379-16380, 14001-14002, 19000) are chosen
|
||||||
|
# to avoid collision with the base dev stack (5433, 6380, 14242, 9000).
|
||||||
|
#
|
||||||
|
# IMAGE:
|
||||||
|
# Pinned to the immutable digest sha256:1069117740e00ccfeba357cae38c43f3729fe5ae702740ce474f6512414d7c02
|
||||||
|
# (sha-9f1a081, post-#491 IMG-FIX, smoke-tested locally).
|
||||||
|
# Update this digest only after a new CI build is promoted to the registry.
|
||||||
|
#
|
||||||
|
# STEP-CA:
|
||||||
|
# Single shared Step-CA instance. Both gateways connect to it.
|
||||||
|
# CA volume is ephemeral per `docker compose down -v`; regenerated on next up.
|
||||||
|
# The harness seed script provisions the CA roots cross-trust after first boot.
|
||||||
|
|
||||||
|
services:
|
||||||
|
# ─── Shared Certificate Authority ────────────────────────────────────────────
|
||||||
|
step-ca:
|
||||||
|
image: smallstep/step-ca:0.27.4
|
||||||
|
container_name: fed-harness-step-ca
|
||||||
|
restart: unless-stopped
|
||||||
|
ports:
|
||||||
|
- '${STEP_CA_HOST_PORT:-19000}:9000'
|
||||||
|
volumes:
|
||||||
|
- step_ca_data:/home/step
|
||||||
|
- ../../infra/step-ca/init.sh:/usr/local/bin/mosaic-step-ca-init.sh:ro
|
||||||
|
- ../../infra/step-ca/templates:/etc/step-ca-templates:ro
|
||||||
|
- ../../infra/step-ca/dev-password:/run/secrets/ca_password:ro
|
||||||
|
entrypoint: ['/bin/sh', '/usr/local/bin/mosaic-step-ca-init.sh']
|
||||||
|
networks:
|
||||||
|
- fed-test-net
|
||||||
|
healthcheck:
|
||||||
|
test:
|
||||||
|
[
|
||||||
|
'CMD',
|
||||||
|
'step',
|
||||||
|
'ca',
|
||||||
|
'health',
|
||||||
|
'--ca-url',
|
||||||
|
'https://localhost:9000',
|
||||||
|
'--root',
|
||||||
|
'/home/step/certs/root_ca.crt',
|
||||||
|
]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 5
|
||||||
|
start_period: 30s
|
||||||
|
|
||||||
|
# ─── Server A — Home / Requesting Gateway ────────────────────────────────────
|
||||||
|
postgres-a:
|
||||||
|
image: pgvector/pgvector:pg17
|
||||||
|
container_name: fed-harness-postgres-a
|
||||||
|
restart: unless-stopped
|
||||||
|
ports:
|
||||||
|
- '${PG_A_HOST_PORT:-15432}:5432'
|
||||||
|
environment:
|
||||||
|
POSTGRES_USER: mosaic
|
||||||
|
POSTGRES_PASSWORD: mosaic
|
||||||
|
POSTGRES_DB: mosaic
|
||||||
|
volumes:
|
||||||
|
- pg_a_data:/var/lib/postgresql/data
|
||||||
|
- ../../infra/pg-init:/docker-entrypoint-initdb.d:ro
|
||||||
|
networks:
|
||||||
|
- fed-test-net
|
||||||
|
healthcheck:
|
||||||
|
test: ['CMD-SHELL', 'pg_isready -U mosaic']
|
||||||
|
interval: 5s
|
||||||
|
timeout: 3s
|
||||||
|
retries: 5
|
||||||
|
|
||||||
|
valkey-a:
|
||||||
|
image: valkey/valkey:8-alpine
|
||||||
|
container_name: fed-harness-valkey-a
|
||||||
|
restart: unless-stopped
|
||||||
|
ports:
|
||||||
|
- '${VALKEY_A_HOST_PORT:-16379}:6379'
|
||||||
|
volumes:
|
||||||
|
- valkey_a_data:/data
|
||||||
|
networks:
|
||||||
|
- fed-test-net
|
||||||
|
healthcheck:
|
||||||
|
test: ['CMD', 'valkey-cli', 'ping']
|
||||||
|
interval: 5s
|
||||||
|
timeout: 3s
|
||||||
|
retries: 5
|
||||||
|
|
||||||
|
gateway-a:
|
||||||
|
image: git.mosaicstack.dev/mosaicstack/stack/gateway@sha256:1069117740e00ccfeba357cae38c43f3729fe5ae702740ce474f6512414d7c02
|
||||||
|
# Tag for human reference: sha-9f1a081 (post-#491 IMG-FIX; smoke-tested locally)
|
||||||
|
container_name: fed-harness-gateway-a
|
||||||
|
restart: unless-stopped
|
||||||
|
ports:
|
||||||
|
- '${GATEWAY_A_HOST_PORT:-14001}:3000'
|
||||||
|
environment:
|
||||||
|
MOSAIC_TIER: federated
|
||||||
|
DATABASE_URL: postgres://mosaic:mosaic@postgres-a:5432/mosaic
|
||||||
|
VALKEY_URL: redis://valkey-a:6379
|
||||||
|
GATEWAY_PORT: '3000'
|
||||||
|
GATEWAY_CORS_ORIGIN: 'http://localhost:14001'
|
||||||
|
BETTER_AUTH_SECRET: harness-secret-server-a-do-not-use-in-prod
|
||||||
|
BETTER_AUTH_URL: 'http://gateway-a:3000'
|
||||||
|
STEP_CA_URL: 'https://step-ca:9000'
|
||||||
|
FEDERATION_PEER_HOSTNAME: gateway-a
|
||||||
|
# Bootstrap password for POST /api/bootstrap/setup — used by seed.ts to create
|
||||||
|
# the first admin user. Only valid on a pristine (zero-user) database.
|
||||||
|
# Not the same as ADMIN_API_KEY — there is no static API key in the gateway.
|
||||||
|
ADMIN_BOOTSTRAP_PASSWORD: harness-admin-password-a
|
||||||
|
depends_on:
|
||||||
|
postgres-a:
|
||||||
|
condition: service_healthy
|
||||||
|
valkey-a:
|
||||||
|
condition: service_healthy
|
||||||
|
step-ca:
|
||||||
|
condition: service_healthy
|
||||||
|
networks:
|
||||||
|
- fed-test-net
|
||||||
|
healthcheck:
|
||||||
|
test:
|
||||||
|
[
|
||||||
|
'CMD',
|
||||||
|
'node',
|
||||||
|
'-e',
|
||||||
|
"require('http').get('http://127.0.0.1:3000/api/health', r => process.exit(r.statusCode === 200 ? 0 : 1)).on('error', () => process.exit(1))",
|
||||||
|
]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 5
|
||||||
|
start_period: 60s
|
||||||
|
|
||||||
|
# ─── Server B — Work / Serving Gateway ──────────────────────────────────────
|
||||||
|
postgres-b:
|
||||||
|
image: pgvector/pgvector:pg17
|
||||||
|
container_name: fed-harness-postgres-b
|
||||||
|
restart: unless-stopped
|
||||||
|
ports:
|
||||||
|
- '${PG_B_HOST_PORT:-15433}:5432'
|
||||||
|
environment:
|
||||||
|
POSTGRES_USER: mosaic
|
||||||
|
POSTGRES_PASSWORD: mosaic
|
||||||
|
POSTGRES_DB: mosaic
|
||||||
|
volumes:
|
||||||
|
- pg_b_data:/var/lib/postgresql/data
|
||||||
|
- ../../infra/pg-init:/docker-entrypoint-initdb.d:ro
|
||||||
|
networks:
|
||||||
|
- fed-test-net
|
||||||
|
healthcheck:
|
||||||
|
test: ['CMD-SHELL', 'pg_isready -U mosaic']
|
||||||
|
interval: 5s
|
||||||
|
timeout: 3s
|
||||||
|
retries: 5
|
||||||
|
|
||||||
|
valkey-b:
|
||||||
|
image: valkey/valkey:8-alpine
|
||||||
|
container_name: fed-harness-valkey-b
|
||||||
|
restart: unless-stopped
|
||||||
|
ports:
|
||||||
|
- '${VALKEY_B_HOST_PORT:-16380}:6379'
|
||||||
|
volumes:
|
||||||
|
- valkey_b_data:/data
|
||||||
|
networks:
|
||||||
|
- fed-test-net
|
||||||
|
healthcheck:
|
||||||
|
test: ['CMD', 'valkey-cli', 'ping']
|
||||||
|
interval: 5s
|
||||||
|
timeout: 3s
|
||||||
|
retries: 5
|
||||||
|
|
||||||
|
gateway-b:
|
||||||
|
image: git.mosaicstack.dev/mosaicstack/stack/gateway@sha256:1069117740e00ccfeba357cae38c43f3729fe5ae702740ce474f6512414d7c02
|
||||||
|
# Tag for human reference: sha-9f1a081 (post-#491 IMG-FIX; smoke-tested locally)
|
||||||
|
container_name: fed-harness-gateway-b
|
||||||
|
restart: unless-stopped
|
||||||
|
ports:
|
||||||
|
- '${GATEWAY_B_HOST_PORT:-14002}:3000'
|
||||||
|
environment:
|
||||||
|
MOSAIC_TIER: federated
|
||||||
|
DATABASE_URL: postgres://mosaic:mosaic@postgres-b:5432/mosaic
|
||||||
|
VALKEY_URL: redis://valkey-b:6379
|
||||||
|
GATEWAY_PORT: '3000'
|
||||||
|
GATEWAY_CORS_ORIGIN: 'http://localhost:14002'
|
||||||
|
BETTER_AUTH_SECRET: harness-secret-server-b-do-not-use-in-prod
|
||||||
|
BETTER_AUTH_URL: 'http://gateway-b:3000'
|
||||||
|
STEP_CA_URL: 'https://step-ca:9000'
|
||||||
|
FEDERATION_PEER_HOSTNAME: gateway-b
|
||||||
|
# Bootstrap password for POST /api/bootstrap/setup — used by seed.ts to create
|
||||||
|
# the first admin user. Only valid on a pristine (zero-user) database.
|
||||||
|
# Not the same as ADMIN_API_KEY — there is no static API key in the gateway.
|
||||||
|
ADMIN_BOOTSTRAP_PASSWORD: harness-admin-password-b
|
||||||
|
depends_on:
|
||||||
|
postgres-b:
|
||||||
|
condition: service_healthy
|
||||||
|
valkey-b:
|
||||||
|
condition: service_healthy
|
||||||
|
step-ca:
|
||||||
|
condition: service_healthy
|
||||||
|
networks:
|
||||||
|
- fed-test-net
|
||||||
|
healthcheck:
|
||||||
|
test:
|
||||||
|
[
|
||||||
|
'CMD',
|
||||||
|
'node',
|
||||||
|
'-e',
|
||||||
|
"require('http').get('http://127.0.0.1:3000/api/health', r => process.exit(r.statusCode === 200 ? 0 : 1)).on('error', () => process.exit(1))",
|
||||||
|
]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 5
|
||||||
|
start_period: 60s
|
||||||
|
|
||||||
|
networks:
|
||||||
|
fed-test-net:
|
||||||
|
name: fed-test-net
|
||||||
|
driver: bridge
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
step_ca_data:
|
||||||
|
name: fed-harness-step-ca
|
||||||
|
pg_a_data:
|
||||||
|
name: fed-harness-pg-a
|
||||||
|
valkey_a_data:
|
||||||
|
name: fed-harness-valkey-a
|
||||||
|
pg_b_data:
|
||||||
|
name: fed-harness-pg-b
|
||||||
|
valkey_b_data:
|
||||||
|
name: fed-harness-valkey-b
|
||||||
290
tools/federation-harness/harness.ts
Normal file
290
tools/federation-harness/harness.ts
Normal file
@@ -0,0 +1,290 @@
|
|||||||
|
/**
|
||||||
|
* tools/federation-harness/harness.ts
|
||||||
|
*
|
||||||
|
* Vitest-consumable helpers for the two-gateway federation harness.
|
||||||
|
*
|
||||||
|
* USAGE (in a vitest test file):
|
||||||
|
*
|
||||||
|
* import { bootHarness, tearDownHarness, serverA, serverB, seed } from
|
||||||
|
* '../../tools/federation-harness/harness.js';
|
||||||
|
*
|
||||||
|
* let handle: HarnessHandle;
|
||||||
|
*
|
||||||
|
* beforeAll(async () => {
|
||||||
|
* handle = await bootHarness();
|
||||||
|
* }, 180_000);
|
||||||
|
*
|
||||||
|
* afterAll(async () => {
|
||||||
|
* await tearDownHarness(handle);
|
||||||
|
* });
|
||||||
|
*
|
||||||
|
* test('variant A — list tasks', async () => {
|
||||||
|
* const seedResult = await seed(handle, 'all');
|
||||||
|
* const a = serverA(handle);
|
||||||
|
* const res = await fetch(`${a.baseUrl}/api/federation/list/tasks`, {
|
||||||
|
* headers: { Authorization: `Bearer ${seedResult.adminTokenA}` },
|
||||||
|
* });
|
||||||
|
* expect(res.status).toBe(200);
|
||||||
|
* });
|
||||||
|
*
|
||||||
|
* NOTE: The `seed()` helper currently only supports scenario='all'. Passing any
|
||||||
|
* other value throws immediately. Per-variant narrowing is deferred to M3-11.
|
||||||
|
*
|
||||||
|
* ESM / NodeNext: all imports use .js extensions.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { execSync, execFileSync } from 'node:child_process';
|
||||||
|
import { resolve, dirname } from 'node:path';
|
||||||
|
import { fileURLToPath } from 'node:url';
|
||||||
|
import { runSeed, type SeedResult } from './seed.js';
|
||||||
|
|
||||||
|
// ─── Types ───────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
export interface GatewayAccessor {
|
||||||
|
/** Base URL reachable from the host machine, e.g. http://localhost:14001 */
|
||||||
|
baseUrl: string;
|
||||||
|
/** Bootstrap password used for POST /api/bootstrap/setup on a pristine gateway */
|
||||||
|
bootstrapPassword: string;
|
||||||
|
/** Internal Docker network hostname (for container-to-container calls) */
|
||||||
|
internalHostname: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface HarnessHandle {
|
||||||
|
/** Server A accessor */
|
||||||
|
a: GatewayAccessor;
|
||||||
|
/** Server B accessor */
|
||||||
|
b: GatewayAccessor;
|
||||||
|
/** Absolute path to the docker-compose file */
|
||||||
|
composeFile: string;
|
||||||
|
/** Whether this instance booted the stack (vs. reusing an existing one) */
|
||||||
|
ownedStack: boolean;
|
||||||
|
/** Optional seed result if seed() was called */
|
||||||
|
seedResult?: SeedResult;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Scenario to seed. Currently only 'all' is implemented; per-variant narrowing
|
||||||
|
* is tracked as M3-11. Passing any other value throws immediately with a clear
|
||||||
|
* error rather than silently over-seeding.
|
||||||
|
*/
|
||||||
|
export type SeedScenario = 'variantA' | 'variantB' | 'variantC' | 'all';
|
||||||
|
|
||||||
|
// ─── Constants ────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||||
|
const COMPOSE_FILE = resolve(__dirname, 'docker-compose.two-gateways.yml');
|
||||||
|
|
||||||
|
const GATEWAY_A_URL = process.env['GATEWAY_A_URL'] ?? 'http://localhost:14001';
|
||||||
|
const GATEWAY_B_URL = process.env['GATEWAY_B_URL'] ?? 'http://localhost:14002';
|
||||||
|
const ADMIN_BOOTSTRAP_PASSWORD_A =
|
||||||
|
process.env['ADMIN_BOOTSTRAP_PASSWORD_A'] ?? 'harness-admin-password-a';
|
||||||
|
const ADMIN_BOOTSTRAP_PASSWORD_B =
|
||||||
|
process.env['ADMIN_BOOTSTRAP_PASSWORD_B'] ?? 'harness-admin-password-b';
|
||||||
|
|
||||||
|
const READINESS_TIMEOUT_MS = 180_000;
|
||||||
|
const READINESS_POLL_MS = 3_000;
|
||||||
|
|
||||||
|
// ─── Internal helpers ─────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
async function isGatewayHealthy(baseUrl: string): Promise<boolean> {
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${baseUrl}/api/health`, { signal: AbortSignal.timeout(5_000) });
|
||||||
|
return res.ok;
|
||||||
|
} catch {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Poll both gateways in parallel until both are healthy or the shared deadline
|
||||||
|
* expires. Polling in parallel (rather than sequentially) avoids the bug where
|
||||||
|
* a slow gateway-a consumes all of the readiness budget before gateway-b is
|
||||||
|
* checked.
|
||||||
|
*/
|
||||||
|
async function waitForStack(handle: HarnessHandle): Promise<void> {
|
||||||
|
const gateways: Array<{ label: string; url: string }> = [
|
||||||
|
{ label: 'gateway-a', url: handle.a.baseUrl },
|
||||||
|
{ label: 'gateway-b', url: handle.b.baseUrl },
|
||||||
|
];
|
||||||
|
|
||||||
|
await Promise.all(
|
||||||
|
gateways.map(async (gw) => {
|
||||||
|
// Each gateway gets its own independent deadline.
|
||||||
|
const deadline = Date.now() + READINESS_TIMEOUT_MS;
|
||||||
|
process.stdout.write(`[harness] Waiting for ${gw.label}...`);
|
||||||
|
|
||||||
|
while (Date.now() < deadline) {
|
||||||
|
if (await isGatewayHealthy(gw.url)) {
|
||||||
|
process.stdout.write(` ready\n`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (Date.now() + READINESS_POLL_MS > deadline) {
|
||||||
|
throw new Error(
|
||||||
|
`[harness] ${gw.label} did not become healthy within ${READINESS_TIMEOUT_MS.toString()}ms`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
await new Promise((r) => setTimeout(r, READINESS_POLL_MS));
|
||||||
|
process.stdout.write('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new Error(
|
||||||
|
`[harness] ${gw.label} did not become healthy within ${READINESS_TIMEOUT_MS.toString()}ms`,
|
||||||
|
);
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
function isStackRunning(): boolean {
|
||||||
|
try {
|
||||||
|
const output = execFileSync(
|
||||||
|
'docker',
|
||||||
|
['compose', '-f', COMPOSE_FILE, 'ps', '--format', 'json'],
|
||||||
|
{ encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'] },
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!output.trim()) return false;
|
||||||
|
|
||||||
|
// Parse JSON lines — each running service emits a JSON object per line
|
||||||
|
const lines = output.trim().split('\n').filter(Boolean);
|
||||||
|
const runningServices = lines.filter((line) => {
|
||||||
|
try {
|
||||||
|
const obj = JSON.parse(line) as { State?: string };
|
||||||
|
return obj.State === 'running';
|
||||||
|
} catch {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Expect at least gateway-a and gateway-b running
|
||||||
|
return runningServices.length >= 2;
|
||||||
|
} catch {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Public API ───────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Boot the harness stack.
|
||||||
|
*
|
||||||
|
* Idempotent: if the stack is already running and both gateways are healthy,
|
||||||
|
* this function reuses the existing stack and returns a handle with
|
||||||
|
* `ownedStack: false`. Callers that set `ownedStack: false` should NOT call
|
||||||
|
* `tearDownHarness` unless they explicitly want to tear down a pre-existing stack.
|
||||||
|
*
|
||||||
|
* If the stack is not running, it starts it with `docker compose up -d` and
|
||||||
|
* waits for both gateways to pass their /api/health probe.
|
||||||
|
*/
|
||||||
|
export async function bootHarness(): Promise<HarnessHandle> {
|
||||||
|
const handle: HarnessHandle = {
|
||||||
|
a: {
|
||||||
|
baseUrl: GATEWAY_A_URL,
|
||||||
|
bootstrapPassword: ADMIN_BOOTSTRAP_PASSWORD_A,
|
||||||
|
internalHostname: 'gateway-a',
|
||||||
|
},
|
||||||
|
b: {
|
||||||
|
baseUrl: GATEWAY_B_URL,
|
||||||
|
bootstrapPassword: ADMIN_BOOTSTRAP_PASSWORD_B,
|
||||||
|
internalHostname: 'gateway-b',
|
||||||
|
},
|
||||||
|
composeFile: COMPOSE_FILE,
|
||||||
|
ownedStack: false,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Check if both gateways are already healthy
|
||||||
|
const [aHealthy, bHealthy] = await Promise.all([
|
||||||
|
isGatewayHealthy(handle.a.baseUrl),
|
||||||
|
isGatewayHealthy(handle.b.baseUrl),
|
||||||
|
]);
|
||||||
|
|
||||||
|
if (aHealthy && bHealthy) {
|
||||||
|
console.log('[harness] Stack already running — reusing existing stack.');
|
||||||
|
handle.ownedStack = false;
|
||||||
|
return handle;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('[harness] Starting federation harness stack...');
|
||||||
|
execSync(`docker compose -f "${COMPOSE_FILE}" up -d`, { stdio: 'inherit' });
|
||||||
|
handle.ownedStack = true;
|
||||||
|
|
||||||
|
await waitForStack(handle);
|
||||||
|
console.log('[harness] Stack is ready.');
|
||||||
|
|
||||||
|
return handle;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tear down the harness stack.
|
||||||
|
*
|
||||||
|
* Runs `docker compose down -v` to remove containers AND volumes (ephemeral state).
|
||||||
|
* Only tears down if `handle.ownedStack` is true unless `force` is set.
|
||||||
|
*/
|
||||||
|
export async function tearDownHarness(
|
||||||
|
handle: HarnessHandle,
|
||||||
|
opts?: { force?: boolean },
|
||||||
|
): Promise<void> {
|
||||||
|
if (!handle.ownedStack && !opts?.force) {
|
||||||
|
console.log(
|
||||||
|
'[harness] Stack not owned by this handle — skipping teardown (pass force: true to override).',
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log('[harness] Tearing down federation harness stack...');
|
||||||
|
execSync(`docker compose -f "${handle.composeFile}" down -v`, { stdio: 'inherit' });
|
||||||
|
console.log('[harness] Stack torn down.');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the Server A accessor from a harness handle.
|
||||||
|
* Convenience wrapper for test readability.
|
||||||
|
*/
|
||||||
|
export function serverA(handle: HarnessHandle): GatewayAccessor {
|
||||||
|
return handle.a;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the Server B accessor from a harness handle.
|
||||||
|
* Convenience wrapper for test readability.
|
||||||
|
*/
|
||||||
|
export function serverB(handle: HarnessHandle): GatewayAccessor {
|
||||||
|
return handle.b;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Seed the harness with test data for one or more scenarios.
|
||||||
|
*
|
||||||
|
* @param handle The harness handle returned by bootHarness().
|
||||||
|
* @param scenario Which scope variants to provision. Currently only 'all' is
|
||||||
|
* supported — passing any other value throws immediately with a
|
||||||
|
* clear error. Per-variant narrowing is tracked as M3-11.
|
||||||
|
*
|
||||||
|
* Returns a SeedResult with grant IDs, peer IDs, and admin tokens for each
|
||||||
|
* gateway, which test assertions can reference.
|
||||||
|
*
|
||||||
|
* IMPORTANT: The harness assumes a pristine database on both gateways. The seed
|
||||||
|
* bootstraps an admin user on each gateway via POST /api/bootstrap/setup. If
|
||||||
|
* either gateway already has users, seed() throws with a clear error message.
|
||||||
|
* Run 'docker compose down -v' to reset state.
|
||||||
|
*/
|
||||||
|
export async function seed(
|
||||||
|
handle: HarnessHandle,
|
||||||
|
scenario: SeedScenario = 'all',
|
||||||
|
): Promise<SeedResult> {
|
||||||
|
if (scenario !== 'all') {
|
||||||
|
throw new Error(
|
||||||
|
`seed: scenario narrowing not yet implemented; pass "all" for now. ` +
|
||||||
|
`Got: "${scenario}". Per-variant narrowing is tracked as M3-11.`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = await runSeed({
|
||||||
|
serverAUrl: handle.a.baseUrl,
|
||||||
|
serverBUrl: handle.b.baseUrl,
|
||||||
|
adminBootstrapPasswordA: handle.a.bootstrapPassword,
|
||||||
|
adminBootstrapPasswordB: handle.b.bootstrapPassword,
|
||||||
|
});
|
||||||
|
|
||||||
|
handle.seedResult = result;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
603
tools/federation-harness/seed.ts
Normal file
603
tools/federation-harness/seed.ts
Normal file
@@ -0,0 +1,603 @@
|
|||||||
|
#!/usr/bin/env tsx
|
||||||
|
/**
|
||||||
|
* tools/federation-harness/seed.ts
|
||||||
|
*
|
||||||
|
* Provisions test data for the two-gateway federation harness.
|
||||||
|
* Run via: tsx tools/federation-harness/seed.ts
|
||||||
|
*
|
||||||
|
* What this script does:
|
||||||
|
* 1. (Optional) Boots the compose stack if --boot flag is passed.
|
||||||
|
* 2. Waits for both gateways to be healthy.
|
||||||
|
* 3. Bootstraps an admin user + token on each gateway via POST /api/bootstrap/setup.
|
||||||
|
* 4. Creates three grants on Server B matching the M3 acceptance test scenarios:
|
||||||
|
* - Scope variant A: tasks + notes, include_personal: true
|
||||||
|
* - Scope variant B: tasks only, include_teams: ['T1'], exclude T2
|
||||||
|
* - Scope variant C: tasks + credentials in resources, credentials excluded (sanity)
|
||||||
|
* 5. For each grant, walks the full enrollment flow:
|
||||||
|
* a. Server B creates a peer keypair (represents the requesting side).
|
||||||
|
* b. Server B creates the grant referencing that peer.
|
||||||
|
* c. Server B issues an enrollment token.
|
||||||
|
* d. Server A creates its own peer keypair (represents its view of B).
|
||||||
|
* e. Server A redeems the enrollment token at Server B's enrollment endpoint,
|
||||||
|
* submitting A's CSR → receives signed cert back.
|
||||||
|
* f. Server A stores the cert on its peer record → peer becomes active.
|
||||||
|
* 6. Inserts representative test tasks/notes/credentials on Server B.
|
||||||
|
*
|
||||||
|
* IMPORTANT: This script uses the real admin REST API — no direct DB writes.
|
||||||
|
* It exercises the full enrollment flow as M3 acceptance tests will.
|
||||||
|
*
|
||||||
|
* ESM / NodeNext: all imports use .js extensions.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { execSync } from 'node:child_process';
|
||||||
|
import { resolve, dirname } from 'node:path';
|
||||||
|
import { fileURLToPath } from 'node:url';
|
||||||
|
|
||||||
|
// ─── Constants ───────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||||
|
const COMPOSE_FILE = resolve(__dirname, 'docker-compose.two-gateways.yml');
|
||||||
|
|
||||||
|
/** Base URLs as seen from the host machine (mapped host ports). */
|
||||||
|
const SERVER_A_URL = process.env['GATEWAY_A_URL'] ?? 'http://localhost:14001';
|
||||||
|
const SERVER_B_URL = process.env['GATEWAY_B_URL'] ?? 'http://localhost:14002';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Bootstrap passwords used when calling POST /api/bootstrap/setup on each
|
||||||
|
* gateway. Each gateway starts with zero users and requires a one-time setup
|
||||||
|
* call before any admin-guarded endpoints can be used.
|
||||||
|
*/
|
||||||
|
const ADMIN_BOOTSTRAP_PASSWORD_A =
|
||||||
|
process.env['ADMIN_BOOTSTRAP_PASSWORD_A'] ?? 'harness-admin-password-a';
|
||||||
|
const ADMIN_BOOTSTRAP_PASSWORD_B =
|
||||||
|
process.env['ADMIN_BOOTSTRAP_PASSWORD_B'] ?? 'harness-admin-password-b';
|
||||||
|
|
||||||
|
const READINESS_TIMEOUT_MS = 120_000;
|
||||||
|
const READINESS_POLL_MS = 3_000;
|
||||||
|
|
||||||
|
// ─── Scope variant definitions (for M3 acceptance tests) ─────────────────────
|
||||||
|
|
||||||
|
/** Scope variant A — tasks + notes, personal data included. */
|
||||||
|
export const SCOPE_VARIANT_A = {
|
||||||
|
resources: ['tasks', 'notes'],
|
||||||
|
filters: {
|
||||||
|
tasks: { include_personal: true },
|
||||||
|
notes: { include_personal: true },
|
||||||
|
},
|
||||||
|
excluded_resources: [] as string[],
|
||||||
|
max_rows_per_query: 500,
|
||||||
|
};
|
||||||
|
|
||||||
|
/** Scope variant B — tasks only, team T1 only, no personal. */
|
||||||
|
export const SCOPE_VARIANT_B = {
|
||||||
|
resources: ['tasks'],
|
||||||
|
filters: {
|
||||||
|
tasks: { include_teams: ['T1'], include_personal: false },
|
||||||
|
},
|
||||||
|
excluded_resources: [] as string[],
|
||||||
|
max_rows_per_query: 500,
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Scope variant C — tasks + credentials in resources list, but credentials
|
||||||
|
* explicitly in excluded_resources. Sanity test: credentials must still be
|
||||||
|
* inaccessible even though they appear in resources.
|
||||||
|
*/
|
||||||
|
export const SCOPE_VARIANT_C = {
|
||||||
|
resources: ['tasks', 'credentials'],
|
||||||
|
filters: {
|
||||||
|
tasks: { include_personal: true },
|
||||||
|
},
|
||||||
|
excluded_resources: ['credentials'],
|
||||||
|
max_rows_per_query: 500,
|
||||||
|
};
|
||||||
|
|
||||||
|
// ─── Inline types (no import from packages/types — M3-01 branch not yet merged) ─
|
||||||
|
|
||||||
|
interface AdminFetchOptions {
|
||||||
|
method?: string;
|
||||||
|
body?: unknown;
|
||||||
|
adminToken: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface PeerRecord {
|
||||||
|
peerId: string;
|
||||||
|
csrPem: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface GrantRecord {
|
||||||
|
id: string;
|
||||||
|
status: string;
|
||||||
|
scope: unknown;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface EnrollmentTokenResult {
|
||||||
|
token: string;
|
||||||
|
expiresAt: string;
|
||||||
|
enrollmentUrl: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface EnrollmentRedeemResult {
|
||||||
|
certPem: string;
|
||||||
|
certChainPem: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface BootstrapResult {
|
||||||
|
adminUserId: string;
|
||||||
|
adminToken: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface SeedResult {
|
||||||
|
serverAUrl: string;
|
||||||
|
serverBUrl: string;
|
||||||
|
adminTokenA: string;
|
||||||
|
adminTokenB: string;
|
||||||
|
adminUserIdA: string;
|
||||||
|
adminUserIdB: string;
|
||||||
|
grants: {
|
||||||
|
variantA: GrantRecord;
|
||||||
|
variantB: GrantRecord;
|
||||||
|
variantC: GrantRecord;
|
||||||
|
};
|
||||||
|
peers: {
|
||||||
|
variantA: PeerRecord & { grantId: string };
|
||||||
|
variantB: PeerRecord & { grantId: string };
|
||||||
|
variantC: PeerRecord & { grantId: string };
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── HTTP helpers ─────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Authenticated admin fetch. Sends `Authorization: Bearer <adminToken>` which
|
||||||
|
* is the only path supported by AdminGuard (DB-backed sha256 token lookup).
|
||||||
|
* No `x-admin-key` header path exists in the gateway.
|
||||||
|
*/
|
||||||
|
async function adminFetch<T>(baseUrl: string, path: string, opts: AdminFetchOptions): Promise<T> {
|
||||||
|
const url = `${baseUrl}${path}`;
|
||||||
|
const res = await fetch(url, {
|
||||||
|
method: opts.method ?? 'GET',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
Authorization: `Bearer ${opts.adminToken}`,
|
||||||
|
},
|
||||||
|
body: opts.body !== undefined ? JSON.stringify(opts.body) : undefined,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!res.ok) {
|
||||||
|
const text = await res.text().catch(() => '(no body)');
|
||||||
|
throw new Error(`${opts.method ?? 'GET'} ${url} → ${res.status}: ${text}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
return res.json() as Promise<T>;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Admin bootstrap ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Bootstrap an admin user on a pristine gateway.
|
||||||
|
*
|
||||||
|
* Steps:
|
||||||
|
* 1. GET /api/bootstrap/status — confirms needsSetup === true.
|
||||||
|
* 2. POST /api/bootstrap/setup with { name, email, password } — returns
|
||||||
|
* { user, token: { plaintext } }.
|
||||||
|
*
|
||||||
|
* The harness assumes a fresh DB. If needsSetup is false the harness fails
|
||||||
|
* fast with a clear error rather than proceeding with an unknown token.
|
||||||
|
*/
|
||||||
|
async function bootstrapAdmin(
|
||||||
|
baseUrl: string,
|
||||||
|
label: string,
|
||||||
|
password: string,
|
||||||
|
): Promise<BootstrapResult> {
|
||||||
|
console.log(`[seed] Bootstrapping admin on ${label} (${baseUrl})...`);
|
||||||
|
|
||||||
|
// 1. Check status
|
||||||
|
const statusRes = await fetch(`${baseUrl}/api/bootstrap/status`);
|
||||||
|
if (!statusRes.ok) {
|
||||||
|
throw new Error(`[seed] GET ${baseUrl}/api/bootstrap/status → ${statusRes.status.toString()}`);
|
||||||
|
}
|
||||||
|
const status = (await statusRes.json()) as { needsSetup: boolean };
|
||||||
|
|
||||||
|
if (!status.needsSetup) {
|
||||||
|
throw new Error(
|
||||||
|
`[seed] ${label} at ${baseUrl} already has users (needsSetup=false). ` +
|
||||||
|
`The harness requires a pristine database. Run 'docker compose down -v' to reset.`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2. Bootstrap
|
||||||
|
const setupRes = await fetch(`${baseUrl}/api/bootstrap/setup`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({
|
||||||
|
name: `Harness Admin (${label})`,
|
||||||
|
email: `harness-admin-${label.toLowerCase().replace(/\s+/g, '-')}@example.invalid`,
|
||||||
|
password,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!setupRes.ok) {
|
||||||
|
const body = await setupRes.text().catch(() => '(no body)');
|
||||||
|
throw new Error(
|
||||||
|
`[seed] POST ${baseUrl}/api/bootstrap/setup → ${setupRes.status.toString()}: ${body}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = (await setupRes.json()) as {
|
||||||
|
user: { id: string };
|
||||||
|
token: { plaintext: string };
|
||||||
|
};
|
||||||
|
|
||||||
|
console.log(`[seed] ${label} admin user: ${result.user.id}`);
|
||||||
|
console.log(`[seed] ${label} admin token: ${result.token.plaintext.slice(0, 8)}...`);
|
||||||
|
|
||||||
|
return {
|
||||||
|
adminUserId: result.user.id,
|
||||||
|
adminToken: result.token.plaintext,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Readiness probe ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
async function waitForGateway(baseUrl: string, label: string): Promise<void> {
|
||||||
|
const deadline = Date.now() + READINESS_TIMEOUT_MS;
|
||||||
|
let lastError: string = '';
|
||||||
|
|
||||||
|
while (Date.now() < deadline) {
|
||||||
|
try {
|
||||||
|
const res = await fetch(`${baseUrl}/api/health`, { signal: AbortSignal.timeout(5_000) });
|
||||||
|
if (res.ok) {
|
||||||
|
console.log(`[seed] ${label} is ready (${baseUrl})`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
lastError = `HTTP ${res.status.toString()}`;
|
||||||
|
} catch (err) {
|
||||||
|
lastError = err instanceof Error ? err.message : String(err);
|
||||||
|
}
|
||||||
|
await new Promise((r) => setTimeout(r, READINESS_POLL_MS));
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new Error(
|
||||||
|
`[seed] ${label} did not become ready within ${READINESS_TIMEOUT_MS.toString()}ms — last error: ${lastError}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Enrollment flow ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Walk the full enrollment flow for one grant.
|
||||||
|
*
|
||||||
|
* The correct two-sided flow (matching the data model's FK semantics):
|
||||||
|
*
|
||||||
|
* 1. On Server B: POST /api/admin/federation/peers/keypair
|
||||||
|
* → peerId_B (Server B's peer record representing the requesting side)
|
||||||
|
* 2. On Server B: POST /api/admin/federation/grants with peerId: peerId_B
|
||||||
|
* → grant (FK to Server B's own federation_peers table — no violation)
|
||||||
|
* 3. On Server B: POST /api/admin/federation/grants/:id/tokens
|
||||||
|
* → enrollmentUrl pointing back to Server B
|
||||||
|
* 4. On Server A: POST /api/admin/federation/peers/keypair
|
||||||
|
* → peerId_A + csrPem_A (Server A's local record of Server B)
|
||||||
|
* 5. Server A → Server B: POST enrollmentUrl with { csrPem: csrPem_A }
|
||||||
|
* → certPem signed by Server B's CA
|
||||||
|
* 6. On Server A: PATCH /api/admin/federation/peers/:peerId_A/cert with certPem
|
||||||
|
* → Server A's peer record transitions to active
|
||||||
|
*
|
||||||
|
* Returns the activated grant (from Server B) and Server A's peer record.
|
||||||
|
*/
|
||||||
|
async function enrollGrant(opts: {
|
||||||
|
label: string;
|
||||||
|
subjectUserId: string;
|
||||||
|
scope: unknown;
|
||||||
|
adminTokenA: string;
|
||||||
|
adminTokenB: string;
|
||||||
|
serverAUrl: string;
|
||||||
|
serverBUrl: string;
|
||||||
|
}): Promise<{ grant: GrantRecord; peer: PeerRecord & { grantId: string } }> {
|
||||||
|
const { label, subjectUserId, scope, adminTokenA, adminTokenB, serverAUrl, serverBUrl } = opts;
|
||||||
|
console.log(`\n[seed] Enrolling grant for scope variant ${label}...`);
|
||||||
|
|
||||||
|
// 1. Create peer keypair on Server B (represents the requesting peer from B's perspective)
|
||||||
|
const peerB = await adminFetch<PeerRecord>(serverBUrl, '/api/admin/federation/peers/keypair', {
|
||||||
|
method: 'POST',
|
||||||
|
adminToken: adminTokenB,
|
||||||
|
body: {
|
||||||
|
commonName: `harness-peer-${label.toLowerCase()}-from-b`,
|
||||||
|
displayName: `Harness Peer ${label} (Server A as seen from B)`,
|
||||||
|
endpointUrl: serverAUrl,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
console.log(`[seed] Created peer on B: ${peerB.peerId}`);
|
||||||
|
|
||||||
|
// 2. Create grant on Server B referencing B's own peer record
|
||||||
|
const grant = await adminFetch<GrantRecord>(serverBUrl, '/api/admin/federation/grants', {
|
||||||
|
method: 'POST',
|
||||||
|
adminToken: adminTokenB,
|
||||||
|
body: {
|
||||||
|
peerId: peerB.peerId,
|
||||||
|
subjectUserId,
|
||||||
|
scope,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
console.log(`[seed] Created grant on B: ${grant.id} (status: ${grant.status})`);
|
||||||
|
|
||||||
|
// 3. Generate enrollment token on Server B
|
||||||
|
const tokenResult = await adminFetch<EnrollmentTokenResult>(
|
||||||
|
serverBUrl,
|
||||||
|
`/api/admin/federation/grants/${grant.id}/tokens`,
|
||||||
|
{ method: 'POST', adminToken: adminTokenB, body: { ttlSeconds: 900 } },
|
||||||
|
);
|
||||||
|
console.log(`[seed] Enrollment token: ${tokenResult.token.slice(0, 8)}...`);
|
||||||
|
console.log(`[seed] Enrollment URL: ${tokenResult.enrollmentUrl}`);
|
||||||
|
|
||||||
|
// 4. Create peer keypair on Server A (Server A's local record of Server B)
|
||||||
|
const peerA = await adminFetch<PeerRecord>(serverAUrl, '/api/admin/federation/peers/keypair', {
|
||||||
|
method: 'POST',
|
||||||
|
adminToken: adminTokenA,
|
||||||
|
body: {
|
||||||
|
commonName: `harness-peer-${label.toLowerCase()}-from-a`,
|
||||||
|
displayName: `Harness Peer ${label} (Server B as seen from A)`,
|
||||||
|
endpointUrl: serverBUrl,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
console.log(`[seed] Created peer on A: ${peerA.peerId}`);
|
||||||
|
|
||||||
|
// 5. Redeem token at Server B's enrollment endpoint with A's CSR.
|
||||||
|
// The enrollment endpoint is not admin-guarded — the one-time token IS the credential.
|
||||||
|
//
|
||||||
|
// The enrollmentUrl returned by the gateway is built using BETTER_AUTH_URL which
|
||||||
|
// resolves to the in-cluster Docker hostname (gateway-b:3000). That URL is only
|
||||||
|
// reachable from other containers, not from the host machine running this script.
|
||||||
|
// We rewrite the host portion to use the host-accessible serverBUrl so the
|
||||||
|
// seed script can reach the endpoint from the host.
|
||||||
|
const parsedEnrollment = new URL(tokenResult.enrollmentUrl);
|
||||||
|
const tokenSegment = parsedEnrollment.pathname.split('/').pop()!;
|
||||||
|
const redeemUrl = `${serverBUrl}/api/federation/enrollment/${tokenSegment}`;
|
||||||
|
console.log(`[seed] Rewritten redeem URL (host-accessible): ${redeemUrl}`);
|
||||||
|
const redeemRes = await fetch(redeemUrl, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ csrPem: peerA.csrPem }),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!redeemRes.ok) {
|
||||||
|
const body = await redeemRes.text().catch(() => '(no body)');
|
||||||
|
throw new Error(`Enrollment redemption failed: ${redeemRes.status.toString()} — ${body}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const redeemResult = (await redeemRes.json()) as EnrollmentRedeemResult;
|
||||||
|
console.log(`[seed] Cert issued (${redeemResult.certPem.length.toString()} bytes)`);
|
||||||
|
|
||||||
|
// 6. Store cert on Server A's peer record → transitions to active
|
||||||
|
await adminFetch<unknown>(serverAUrl, `/api/admin/federation/peers/${peerA.peerId}/cert`, {
|
||||||
|
method: 'PATCH',
|
||||||
|
adminToken: adminTokenA,
|
||||||
|
body: { certPem: redeemResult.certPem },
|
||||||
|
});
|
||||||
|
console.log(`[seed] Cert stored on A — peer ${peerA.peerId} is now active`);
|
||||||
|
|
||||||
|
// Verify grant flipped to active on B
|
||||||
|
const activeGrant = await adminFetch<GrantRecord>(
|
||||||
|
serverBUrl,
|
||||||
|
`/api/admin/federation/grants/${grant.id}`,
|
||||||
|
{ adminToken: adminTokenB },
|
||||||
|
);
|
||||||
|
console.log(`[seed] Grant status on B: ${activeGrant.status}`);
|
||||||
|
|
||||||
|
return { grant: activeGrant, peer: { ...peerA, grantId: grant.id } };
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Test data insertion ──────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Insert representative test data on Server B via its admin APIs.
|
||||||
|
*
|
||||||
|
* NOTE: The gateway's task/note/credential APIs require an authenticated user
|
||||||
|
* session. For the harness, we seed via admin-level endpoints if available,
|
||||||
|
* or document the gap here for M3-11 to fill in with proper user session seeding.
|
||||||
|
*
|
||||||
|
* ASSUMPTION: Server B exposes POST /api/admin/tasks (or similar) for test data.
|
||||||
|
* If that endpoint does not yet exist, this function logs a warning and skips
|
||||||
|
* without failing — M3-11 will add the session-based seeding path.
|
||||||
|
*/
|
||||||
|
async function seedTestData(
|
||||||
|
subjectUserId: string,
|
||||||
|
scopeLabel: string,
|
||||||
|
serverBUrl: string,
|
||||||
|
adminTokenB: string,
|
||||||
|
): Promise<void> {
|
||||||
|
console.log(`\n[seed] Seeding test data on Server B for ${scopeLabel}...`);
|
||||||
|
|
||||||
|
const testTasks = [
|
||||||
|
{
|
||||||
|
title: `${scopeLabel} Task 1`,
|
||||||
|
description: 'Federation harness test task',
|
||||||
|
userId: subjectUserId,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
title: `${scopeLabel} Task 2`,
|
||||||
|
description: 'Team-scoped test task',
|
||||||
|
userId: subjectUserId,
|
||||||
|
teamId: 'T1',
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
const testNotes = [
|
||||||
|
{
|
||||||
|
title: `${scopeLabel} Note 1`,
|
||||||
|
content: 'Personal note for federation test',
|
||||||
|
userId: subjectUserId,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
// Attempt to insert — tolerate 404 (endpoint not yet implemented)
|
||||||
|
for (const task of testTasks) {
|
||||||
|
try {
|
||||||
|
await adminFetch<unknown>(serverBUrl, '/api/admin/tasks', {
|
||||||
|
method: 'POST',
|
||||||
|
adminToken: adminTokenB,
|
||||||
|
body: task,
|
||||||
|
});
|
||||||
|
console.log(`[seed] Inserted task: "${task.title}"`);
|
||||||
|
} catch (err) {
|
||||||
|
const msg = err instanceof Error ? err.message : String(err);
|
||||||
|
if (msg.includes('404') || msg.includes('Cannot POST')) {
|
||||||
|
console.warn(
|
||||||
|
`[seed] WARN: /api/admin/tasks not found — skipping task insertion (expected until M3-11)`,
|
||||||
|
);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const note of testNotes) {
|
||||||
|
try {
|
||||||
|
await adminFetch<unknown>(serverBUrl, '/api/admin/notes', {
|
||||||
|
method: 'POST',
|
||||||
|
adminToken: adminTokenB,
|
||||||
|
body: note,
|
||||||
|
});
|
||||||
|
console.log(`[seed] Inserted note: "${note.title}"`);
|
||||||
|
} catch (err) {
|
||||||
|
const msg = err instanceof Error ? err.message : String(err);
|
||||||
|
if (msg.includes('404') || msg.includes('Cannot POST')) {
|
||||||
|
console.warn(
|
||||||
|
`[seed] WARN: /api/admin/notes not found — skipping note insertion (expected until M3-11)`,
|
||||||
|
);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`[seed] Test data seeding for ${scopeLabel} complete.`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Main entrypoint ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
export async function runSeed(opts?: {
|
||||||
|
serverAUrl?: string;
|
||||||
|
serverBUrl?: string;
|
||||||
|
adminBootstrapPasswordA?: string;
|
||||||
|
adminBootstrapPasswordB?: string;
|
||||||
|
subjectUserIds?: { variantA: string; variantB: string; variantC: string };
|
||||||
|
}): Promise<SeedResult> {
|
||||||
|
const aUrl = opts?.serverAUrl ?? SERVER_A_URL;
|
||||||
|
const bUrl = opts?.serverBUrl ?? SERVER_B_URL;
|
||||||
|
const passwordA = opts?.adminBootstrapPasswordA ?? ADMIN_BOOTSTRAP_PASSWORD_A;
|
||||||
|
const passwordB = opts?.adminBootstrapPasswordB ?? ADMIN_BOOTSTRAP_PASSWORD_B;
|
||||||
|
|
||||||
|
// Use provided or default subject user IDs.
|
||||||
|
// In a real run these would be real user UUIDs from Server B's DB.
|
||||||
|
// For the harness, the admin bootstrap user on Server B is used as the subject.
|
||||||
|
// These are overridden after bootstrap if opts.subjectUserIds is not provided.
|
||||||
|
const subjectIds = opts?.subjectUserIds;
|
||||||
|
|
||||||
|
console.log('[seed] Waiting for gateways to be ready...');
|
||||||
|
await Promise.all([waitForGateway(aUrl, 'Server A'), waitForGateway(bUrl, 'Server B')]);
|
||||||
|
|
||||||
|
// Bootstrap admin users on both gateways (requires pristine DBs).
|
||||||
|
console.log('\n[seed] Bootstrapping admin accounts...');
|
||||||
|
const [bootstrapA, bootstrapB] = await Promise.all([
|
||||||
|
bootstrapAdmin(aUrl, 'Server A', passwordA),
|
||||||
|
bootstrapAdmin(bUrl, 'Server B', passwordB),
|
||||||
|
]);
|
||||||
|
|
||||||
|
// Default subject user IDs to the admin user on Server B (guaranteed to exist).
|
||||||
|
const resolvedSubjectIds = subjectIds ?? {
|
||||||
|
variantA: bootstrapB.adminUserId,
|
||||||
|
variantB: bootstrapB.adminUserId,
|
||||||
|
variantC: bootstrapB.adminUserId,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Enroll all three scope variants sequentially to avoid race conditions on
|
||||||
|
// the step-ca signing queue. Parallel enrollment would work too but
|
||||||
|
// sequential is easier to debug when something goes wrong.
|
||||||
|
console.log('\n[seed] Enrolling scope variants...');
|
||||||
|
const resultA = await enrollGrant({
|
||||||
|
label: 'A',
|
||||||
|
subjectUserId: resolvedSubjectIds.variantA,
|
||||||
|
scope: SCOPE_VARIANT_A,
|
||||||
|
adminTokenA: bootstrapA.adminToken,
|
||||||
|
adminTokenB: bootstrapB.adminToken,
|
||||||
|
serverAUrl: aUrl,
|
||||||
|
serverBUrl: bUrl,
|
||||||
|
});
|
||||||
|
const resultB = await enrollGrant({
|
||||||
|
label: 'B',
|
||||||
|
subjectUserId: resolvedSubjectIds.variantB,
|
||||||
|
scope: SCOPE_VARIANT_B,
|
||||||
|
adminTokenA: bootstrapA.adminToken,
|
||||||
|
adminTokenB: bootstrapB.adminToken,
|
||||||
|
serverAUrl: aUrl,
|
||||||
|
serverBUrl: bUrl,
|
||||||
|
});
|
||||||
|
const resultC = await enrollGrant({
|
||||||
|
label: 'C',
|
||||||
|
subjectUserId: resolvedSubjectIds.variantC,
|
||||||
|
scope: SCOPE_VARIANT_C,
|
||||||
|
adminTokenA: bootstrapA.adminToken,
|
||||||
|
adminTokenB: bootstrapB.adminToken,
|
||||||
|
serverAUrl: aUrl,
|
||||||
|
serverBUrl: bUrl,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Seed test data on Server B for each scope variant
|
||||||
|
await Promise.all([
|
||||||
|
seedTestData(resolvedSubjectIds.variantA, 'A', bUrl, bootstrapB.adminToken),
|
||||||
|
seedTestData(resolvedSubjectIds.variantB, 'B', bUrl, bootstrapB.adminToken),
|
||||||
|
seedTestData(resolvedSubjectIds.variantC, 'C', bUrl, bootstrapB.adminToken),
|
||||||
|
]);
|
||||||
|
|
||||||
|
const result: SeedResult = {
|
||||||
|
serverAUrl: aUrl,
|
||||||
|
serverBUrl: bUrl,
|
||||||
|
adminTokenA: bootstrapA.adminToken,
|
||||||
|
adminTokenB: bootstrapB.adminToken,
|
||||||
|
adminUserIdA: bootstrapA.adminUserId,
|
||||||
|
adminUserIdB: bootstrapB.adminUserId,
|
||||||
|
grants: {
|
||||||
|
variantA: resultA.grant,
|
||||||
|
variantB: resultB.grant,
|
||||||
|
variantC: resultC.grant,
|
||||||
|
},
|
||||||
|
peers: {
|
||||||
|
variantA: resultA.peer,
|
||||||
|
variantB: resultB.peer,
|
||||||
|
variantC: resultC.peer,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
console.log('\n[seed] Seed complete.');
|
||||||
|
console.log('[seed] Summary:');
|
||||||
|
console.log(` Variant A grant: ${result.grants.variantA.id} (${result.grants.variantA.status})`);
|
||||||
|
console.log(` Variant B grant: ${result.grants.variantB.id} (${result.grants.variantB.status})`);
|
||||||
|
console.log(` Variant C grant: ${result.grants.variantC.id} (${result.grants.variantC.status})`);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── CLI entry ────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
const isCli =
|
||||||
|
process.argv[1] != null &&
|
||||||
|
fileURLToPath(import.meta.url).endsWith(process.argv[1]!.split('/').pop()!);
|
||||||
|
|
||||||
|
if (isCli) {
|
||||||
|
const shouldBoot = process.argv.includes('--boot');
|
||||||
|
|
||||||
|
if (shouldBoot) {
|
||||||
|
console.log('[seed] --boot flag detected — starting compose stack...');
|
||||||
|
execSync(`docker compose -f "${COMPOSE_FILE}" up -d`, { stdio: 'inherit' });
|
||||||
|
}
|
||||||
|
|
||||||
|
runSeed()
|
||||||
|
.then(() => {
|
||||||
|
process.exit(0);
|
||||||
|
})
|
||||||
|
.catch((err) => {
|
||||||
|
console.error('[seed] Fatal:', err);
|
||||||
|
process.exit(1);
|
||||||
|
});
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user