Compare commits

..

2 Commits

Author SHA1 Message Date
Jarvis
0e0ad9defe fixup: federation_audit_log DESC indexes + reserved M4 columns
All checks were successful
ci/woodpecker/pr/ci Pipeline was successful
ci/woodpecker/push/ci Pipeline was successful
- Add .desc() to all three federation_audit_log created_at indexes for
  reverse-chronological scans (PRD section 7.3)
- Add reserved nullable columns query_hash, outcome, bytes_out per
  TASKS.md M2-01 spec (written by M4, columns reserved now to avoid
  retroactive migration)
- Regenerate migration 0008 in-place (replaces 0008_careless_lake.sql
  with 0008_smart_lyja.sql containing DESC indexes + new columns)
- Update integration test: add reserved columns to CREATE TABLE in
  beforeAll; add 7th test for peer→grant cascade delete

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-21 20:52:00 -05:00
Jarvis
a1ab4386fe feat(db): federation schema (grants/peers/audit_log) [FED-M2-01]
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/pr/ci Pipeline was successful
Adds Drizzle ORM schema and migration for federation v1 milestone 2:

- New enums: peer_state ('pending'|'active'|'suspended'|'revoked'),
  grant_status ('active'|'revoked'|'expired')
- New tables: federation_peers, federation_grants, federation_audit_log
- FK cascades: user delete cascades grants; peer delete set-nulls audit_log
- Migration: 0008_careless_lake.sql
- Integration tests (FEDERATED_INTEGRATION=1): 6/6 pass

No business logic, no services, no DTOs — schema only.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-21 20:38:25 -05:00
27 changed files with 19 additions and 1927 deletions

3
.gitignore vendored
View File

@@ -9,6 +9,3 @@ coverage
*.tsbuildinfo *.tsbuildinfo
.pnpm-store .pnpm-store
docs/reports/ docs/reports/
# Step-CA dev password — real file is gitignored; commit only the .example
infra/step-ca/dev-password

View File

@@ -24,7 +24,6 @@ import { GCModule } from './gc/gc.module.js';
import { ReloadModule } from './reload/reload.module.js'; import { ReloadModule } from './reload/reload.module.js';
import { WorkspaceModule } from './workspace/workspace.module.js'; import { WorkspaceModule } from './workspace/workspace.module.js';
import { QueueModule } from './queue/queue.module.js'; import { QueueModule } from './queue/queue.module.js';
import { FederationModule } from './federation/federation.module.js';
import { ThrottlerGuard, ThrottlerModule } from '@nestjs/throttler'; import { ThrottlerGuard, ThrottlerModule } from '@nestjs/throttler';
@Module({ @Module({
@@ -53,7 +52,6 @@ import { ThrottlerGuard, ThrottlerModule } from '@nestjs/throttler';
QueueModule, QueueModule,
ReloadModule, ReloadModule,
WorkspaceModule, WorkspaceModule,
FederationModule,
], ],
controllers: [HealthController], controllers: [HealthController],
providers: [ providers: [

View File

@@ -1,56 +0,0 @@
/**
* DTOs for the Step-CA client service (FED-M2-04).
*
* IssueCertRequestDto — input to CaService.issueCert()
* IssuedCertDto — output from CaService.issueCert()
*/
import { IsInt, IsNotEmpty, IsString, IsUUID, Max, Min } from 'class-validator';
export class IssueCertRequestDto {
/**
* PEM-encoded PKCS#10 Certificate Signing Request.
* The CSR must already include the desired SANs.
*/
@IsString()
@IsNotEmpty()
csrPem!: string;
/**
* UUID of the federation_grants row this certificate is being issued for.
* Embedded as the `mosaic_grant_id` custom OID extension.
*/
@IsUUID()
grantId!: string;
/**
* UUID of the local user on whose behalf the cert is being issued.
* Embedded as the `mosaic_subject_user_id` custom OID extension.
*/
@IsUUID()
subjectUserId!: string;
/**
* Requested certificate validity in seconds.
* Capped at the step-ca provisioner policy ceiling.
* Defaults to 86 400 s (24 h) when omitted by callers.
*/
@IsInt()
@Min(60)
@Max(365 * 24 * 3600)
ttlSeconds!: number;
}
export class IssuedCertDto {
/** PEM-encoded leaf certificate returned by step-ca. */
certPem!: string;
/**
* PEM-encoded full certificate chain (leaf + intermediates + root).
* Falls back to `certPem` when step-ca returns no `certChain` field.
*/
certChainPem!: string;
/** Decimal serial number string of the issued certificate. */
serialNumber!: string;
}

View File

@@ -1,360 +0,0 @@
/**
* Unit tests for CaService — Step-CA client (FED-M2-04).
*
* Coverage:
* - Happy path: returns IssuedCertDto with certPem, certChainPem, serialNumber
* - certChainPem fallback: falls back to certPem when certChain absent
* - certChainPem from ca field: uses crt+ca when certChain absent but ca present
* - HTTP 401: throws CaServiceError with cause + remediation
* - HTTP non-401 error: throws CaServiceError
* - Malformed CSR: throws before HTTP call
* - Non-JSON response: throws CaServiceError
* - HTTPS connection error: throws CaServiceError
* - JWT custom claims: mosaic_grant_id and mosaic_subject_user_id present in OTT payload
* - CaServiceError: has cause + remediation properties
* - Missing crt in response: throws CaServiceError
*/
import { describe, it, expect, vi, beforeEach, type Mock } from 'vitest';
// ---------------------------------------------------------------------------
// Mock node:https BEFORE importing CaService so the mock is in place when
// the module is loaded. Vitest/ESM require vi.mock at the top level.
// ---------------------------------------------------------------------------
vi.mock('node:https', () => {
const mockRequest = vi.fn();
const mockAgent = vi.fn().mockImplementation(() => ({}));
return {
default: { request: mockRequest, Agent: mockAgent },
request: mockRequest,
Agent: mockAgent,
};
});
vi.mock('node:fs', () => {
const mockReadFileSync = vi
.fn()
.mockReturnValue('-----BEGIN CERTIFICATE-----\nFAKEROOT\n-----END CERTIFICATE-----\n');
return {
default: { readFileSync: mockReadFileSync },
readFileSync: mockReadFileSync,
};
});
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
// Minimal self-signed certificate PEM produced by openssl for testing.
// Serial 01, RSA 512 bit (invalid for production, fine for unit tests).
const FAKE_CERT_PEM = `-----BEGIN CERTIFICATE-----
MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA0000000000000000AAAA
-----END CERTIFICATE-----\n`;
const FAKE_CSR_PEM = `-----BEGIN CERTIFICATE REQUEST-----
MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA0000000000000000AAAA
-----END CERTIFICATE REQUEST-----\n`;
const FAKE_CA_PEM = `-----BEGIN CERTIFICATE-----
CAROOT000000000000000000000000000000000000000000000000AAAA
-----END CERTIFICATE-----\n`;
const GRANT_ID = 'a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11';
const SUBJECT_USER_ID = 'b1ffcd00-0d1c-5fg9-cc7e-7cc0ce491b22';
// ---------------------------------------------------------------------------
// Setup env before importing service
// ---------------------------------------------------------------------------
const JWK_KEY = JSON.stringify({
kty: 'oct',
kid: 'test-kid',
k: 'dGVzdC1zZWNyZXQ=', // base64url("test-secret")
});
process.env['STEP_CA_URL'] = 'https://step-ca:9000';
process.env['STEP_CA_PROVISIONER_PASSWORD'] = 'test-password';
process.env['STEP_CA_PROVISIONER_KEY_JSON'] = JWK_KEY;
process.env['STEP_CA_ROOT_CERT_PATH'] = '/fake/root.pem';
// Import AFTER env is set and mocks are registered
import * as httpsModule from 'node:https';
import { CaService, CaServiceError } from './ca.service.js';
import type { IssueCertRequestDto } from './ca.dto.js';
// ---------------------------------------------------------------------------
// Helper to build a mock https.request that simulates step-ca
// ---------------------------------------------------------------------------
function makeHttpsMock(statusCode: number, body: unknown, errorMsg?: string): void {
const mockReq = {
write: vi.fn(),
end: vi.fn(),
on: vi.fn(),
};
(httpsModule.request as unknown as Mock).mockImplementation(
(
_options: unknown,
callback: (res: {
statusCode: number;
on: (event: string, cb: (chunk?: Buffer) => void) => void;
}) => void,
) => {
const mockRes = {
statusCode,
on: (event: string, cb: (chunk?: Buffer) => void) => {
if (event === 'data') {
if (body !== undefined) {
cb(Buffer.from(typeof body === 'string' ? body : JSON.stringify(body)));
}
}
if (event === 'end') {
cb();
}
},
};
if (errorMsg) {
// Simulate a connection error via the req.on('error') handler
mockReq.on.mockImplementation((event: string, cb: (err: Error) => void) => {
if (event === 'error') {
setImmediate(() => cb(new Error(errorMsg)));
}
});
} else {
// Normal flow: call the response callback
setImmediate(() => callback(mockRes));
}
return mockReq;
},
);
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
describe('CaService', () => {
let service: CaService;
beforeEach(() => {
vi.clearAllMocks();
service = new CaService();
});
function makeReq(overrides: Partial<IssueCertRequestDto> = {}): IssueCertRequestDto {
return {
csrPem: FAKE_CSR_PEM,
grantId: GRANT_ID,
subjectUserId: SUBJECT_USER_ID,
ttlSeconds: 86400,
...overrides,
};
}
// -------------------------------------------------------------------------
// Happy path
// -------------------------------------------------------------------------
it('returns IssuedCertDto on success (certChain present)', async () => {
makeHttpsMock(200, {
crt: FAKE_CERT_PEM,
certChain: [FAKE_CERT_PEM, FAKE_CA_PEM],
});
const result = await service.issueCert(makeReq());
expect(result.certPem).toBe(FAKE_CERT_PEM);
expect(result.certChainPem).toContain(FAKE_CERT_PEM);
expect(result.certChainPem).toContain(FAKE_CA_PEM);
expect(typeof result.serialNumber).toBe('string');
});
// -------------------------------------------------------------------------
// certChainPem fallback — certChain absent, ca field present
// -------------------------------------------------------------------------
it('builds certChainPem from crt+ca when certChain is absent', async () => {
makeHttpsMock(200, {
crt: FAKE_CERT_PEM,
ca: FAKE_CA_PEM,
});
const result = await service.issueCert(makeReq());
expect(result.certPem).toBe(FAKE_CERT_PEM);
expect(result.certChainPem).toContain(FAKE_CERT_PEM);
expect(result.certChainPem).toContain(FAKE_CA_PEM);
});
// -------------------------------------------------------------------------
// certChainPem fallback — no certChain, no ca field
// -------------------------------------------------------------------------
it('falls back to certPem alone when certChain and ca are absent', async () => {
makeHttpsMock(200, { crt: FAKE_CERT_PEM });
const result = await service.issueCert(makeReq());
expect(result.certPem).toBe(FAKE_CERT_PEM);
expect(result.certChainPem).toBe(FAKE_CERT_PEM);
});
// -------------------------------------------------------------------------
// HTTP 401
// -------------------------------------------------------------------------
it('throws CaServiceError on HTTP 401', async () => {
makeHttpsMock(401, { message: 'Unauthorized' });
await expect(service.issueCert(makeReq())).rejects.toSatisfy((err: unknown) => {
if (!(err instanceof CaServiceError)) return false;
expect(err.message).toMatch(/401/);
expect(err.remediation).toBeTruthy();
return true;
});
});
// -------------------------------------------------------------------------
// HTTP non-401 error (e.g. 422)
// -------------------------------------------------------------------------
it('throws CaServiceError on HTTP 422', async () => {
makeHttpsMock(422, { message: 'Unprocessable Entity' });
await expect(service.issueCert(makeReq())).rejects.toBeInstanceOf(CaServiceError);
});
// -------------------------------------------------------------------------
// Malformed CSR — throws before HTTP call
// -------------------------------------------------------------------------
it('throws CaServiceError for malformed CSR without making HTTP call', async () => {
const requestSpy = vi.spyOn(httpsModule, 'request');
await expect(service.issueCert(makeReq({ csrPem: 'not-a-valid-csr' }))).rejects.toBeInstanceOf(
CaServiceError,
);
expect(requestSpy).not.toHaveBeenCalled();
});
// -------------------------------------------------------------------------
// Non-JSON response
// -------------------------------------------------------------------------
it('throws CaServiceError when step-ca returns non-JSON', async () => {
makeHttpsMock(200, 'this is not json');
await expect(service.issueCert(makeReq())).rejects.toSatisfy((err: unknown) => {
if (!(err instanceof CaServiceError)) return false;
expect(err.message).toMatch(/non-JSON/);
return true;
});
});
// -------------------------------------------------------------------------
// HTTPS connection error
// -------------------------------------------------------------------------
it('throws CaServiceError on HTTPS connection error', async () => {
makeHttpsMock(0, undefined, 'connect ECONNREFUSED 127.0.0.1:9000');
await expect(service.issueCert(makeReq())).rejects.toSatisfy((err: unknown) => {
if (!(err instanceof CaServiceError)) return false;
expect(err.message).toMatch(/HTTPS connection/);
expect(err.cause).toBeInstanceOf(Error);
return true;
});
});
// -------------------------------------------------------------------------
// JWT custom claims: mosaic_grant_id and mosaic_subject_user_id
// -------------------------------------------------------------------------
it('includes mosaic_grant_id and mosaic_subject_user_id in the OTT payload', async () => {
let capturedBody: Record<string, unknown> | undefined;
// Override the mock to capture the request body
const mockReq = {
write: vi.fn((data: string) => {
capturedBody = JSON.parse(data) as Record<string, unknown>;
}),
end: vi.fn(),
on: vi.fn(),
};
(httpsModule.request as unknown as Mock).mockImplementation(
(
_options: unknown,
callback: (res: {
statusCode: number;
on: (event: string, cb: (chunk?: Buffer) => void) => void;
}) => void,
) => {
const mockRes = {
statusCode: 200,
on: (event: string, cb: (chunk?: Buffer) => void) => {
if (event === 'data') {
cb(Buffer.from(JSON.stringify({ crt: FAKE_CERT_PEM })));
}
if (event === 'end') {
cb();
}
},
};
setImmediate(() => callback(mockRes));
return mockReq;
},
);
await service.issueCert(makeReq());
expect(capturedBody).toBeDefined();
const ott = capturedBody!['ott'] as string;
expect(typeof ott).toBe('string');
// Decode JWT payload (second segment)
const parts = ott.split('.');
expect(parts).toHaveLength(3);
const payloadJson = Buffer.from(parts[1]!, 'base64url').toString('utf8');
const payload = JSON.parse(payloadJson) as Record<string, unknown>;
expect(payload['mosaic_grant_id']).toBe(GRANT_ID);
expect(payload['mosaic_subject_user_id']).toBe(SUBJECT_USER_ID);
});
// -------------------------------------------------------------------------
// CaServiceError has cause + remediation
// -------------------------------------------------------------------------
it('CaServiceError carries cause and remediation', () => {
const cause = new Error('original error');
const err = new CaServiceError('something went wrong', 'fix it like this', cause);
expect(err).toBeInstanceOf(Error);
expect(err).toBeInstanceOf(CaServiceError);
expect(err.message).toBe('something went wrong');
expect(err.remediation).toBe('fix it like this');
expect(err.cause).toBe(cause);
expect(err.name).toBe('CaServiceError');
});
// -------------------------------------------------------------------------
// Missing crt in response
// -------------------------------------------------------------------------
it('throws CaServiceError when response is missing the crt field', async () => {
makeHttpsMock(200, { ca: FAKE_CA_PEM });
await expect(service.issueCert(makeReq())).rejects.toSatisfy((err: unknown) => {
if (!(err instanceof CaServiceError)) return false;
expect(err.message).toMatch(/missing the "crt" field/);
return true;
});
});
});

View File

@@ -1,439 +0,0 @@
/**
* CaService — Step-CA client for federation grant certificate issuance.
*
* Responsibilities:
* 1. Build a JWK-provisioner One-Time Token (OTT) signed with HS256
* carrying Mosaic-specific claims (`mosaic_grant_id`,
* `mosaic_subject_user_id`, `step.sha`) per the step-ca JWK provisioner
* protocol.
* 2. POST the CSR + OTT to the step-ca `/1.0/sign` endpoint over HTTPS,
* pinning the trust to the CA root cert supplied via env.
* 3. Return an IssuedCertDto containing the leaf cert, full chain, and
* serial number.
*
* Environment variables (all required at runtime — validated in constructor):
* STEP_CA_URL https://step-ca:9000
* STEP_CA_PROVISIONER_PASSWORD JWK provisioner password (raw string)
* STEP_CA_PROVISIONER_KEY_JSON JWK provisioner public+private key (JSON)
* STEP_CA_ROOT_CERT_PATH Absolute path to the CA root PEM
*
* Custom OID registry (PRD §6, docs/federation/SETUP.md):
* 1.3.6.1.4.1.99999.1 — mosaic_grant_id
* 1.3.6.1.4.1.99999.2 — mosaic_subject_user_id
*
* Fail-loud contract:
* Every error path throws CaServiceError with a human-readable `remediation`
* field. Silent OID-stripping is NEVER allowed — if the sign response does
* not include the cert, we throw rather than return a cert that may be
* missing the custom extensions.
*/
import { Injectable, Logger } from '@nestjs/common';
import * as crypto from 'node:crypto';
import * as fs from 'node:fs';
import * as https from 'node:https';
import type { IssueCertRequestDto } from './ca.dto.js';
import { IssuedCertDto } from './ca.dto.js';
// ---------------------------------------------------------------------------
// Custom error class
// ---------------------------------------------------------------------------
export class CaServiceError extends Error {
readonly cause: unknown;
readonly remediation: string;
constructor(message: string, remediation: string, cause?: unknown) {
super(message);
this.name = 'CaServiceError';
this.cause = cause;
this.remediation = remediation;
}
}
// ---------------------------------------------------------------------------
// Internal types
// ---------------------------------------------------------------------------
interface StepSignResponse {
crt: string;
ca?: string;
certChain?: string[];
}
interface JwkKey {
kty: string;
kid?: string;
use?: string;
alg?: string;
k?: string; // symmetric
n?: string; // RSA
e?: string;
d?: string;
x?: string; // EC
y?: string;
crv?: string;
[key: string]: unknown;
}
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
/**
* Base64url-encode a Buffer or string (no padding).
*/
function b64url(input: Buffer | string): string {
const buf = typeof input === 'string' ? Buffer.from(input, 'utf8') : input;
return buf.toString('base64url');
}
/**
* Compute SHA-256 fingerprint of the DER-encoded CSR body.
* step-ca uses this as the `step.sha` claim to bind the OTT to a specific CSR.
*/
function csrFingerprint(csrPem: string): string {
// Strip PEM headers and decode base64 body
const b64 = csrPem
.replace(/-----BEGIN CERTIFICATE REQUEST-----/, '')
.replace(/-----END CERTIFICATE REQUEST-----/, '')
.replace(/\s+/g, '');
let derBuf: Buffer;
try {
derBuf = Buffer.from(b64, 'base64');
} catch (err) {
throw new CaServiceError(
'Failed to base64-decode the CSR PEM body',
'Verify that csrPem is a valid PKCS#10 PEM-encoded certificate request.',
err,
);
}
if (derBuf.length === 0) {
throw new CaServiceError(
'CSR PEM decoded to empty buffer — malformed input',
'Provide a valid non-empty PKCS#10 PEM-encoded certificate request.',
);
}
return crypto.createHash('sha256').update(derBuf).digest('hex');
}
/**
* Derive a signing key from the JWK provisioner password using PBKDF2
* then sign with HMAC-SHA256 to produce an HS256 JWT.
*
* step-ca JWK provisioner tokens:
* - alg: HS256
* - header.kid: provisioner key ID
* - The key is the raw password bytes (step-ca uses the password directly
* as the HMAC key when the JWK provisioner type is "JWK" with symmetric
* key, or the password-derived key when encrypting the JWK).
*
* Per step-ca source (jose/jwk.go), for a JWK provisioner the OTT is a
* JWT signed with the provisioner's decrypted private key. For HS256 the
* key material is the `k` field of the JWK (symmetric secret), which itself
* was encrypted with the provisioner password. Since we already have the
* raw provisioner password we use it directly as the HMAC key — this mirrors
* what `step ca token` does for symmetric JWK provisioners.
*/
function buildOtt(params: {
caUrl: string;
provisionerPassword: string;
provisionerKeyJson: string;
csrPem: string;
grantId: string;
subjectUserId: string;
ttlSeconds: number;
}): string {
const {
caUrl,
provisionerPassword,
provisionerKeyJson,
csrPem,
grantId,
subjectUserId,
ttlSeconds,
} = params;
let jwk: JwkKey;
try {
jwk = JSON.parse(provisionerKeyJson) as JwkKey;
} catch (err) {
throw new CaServiceError(
'STEP_CA_PROVISIONER_KEY_JSON is not valid JSON',
'Set STEP_CA_PROVISIONER_KEY_JSON to the JSON-serialised JWK object for the mosaic-fed provisioner.',
err,
);
}
const sha = csrFingerprint(csrPem);
const now = Math.floor(Date.now() / 1000);
const kid = jwk.kid ?? 'mosaic-fed';
const header = {
alg: 'HS256',
typ: 'JWT',
kid,
};
const payload = {
iss: kid,
sub: `${caUrl}/1.0/sign`,
aud: [`${caUrl}/1.0/sign`],
iat: now,
nbf: now - 30, // 30 s clock-skew tolerance
exp: now + Math.min(ttlSeconds, 3600), // OTT validity ≤ 1 h
sha,
// Mosaic custom claims consumed by federation.tpl
mosaic_grant_id: grantId,
mosaic_subject_user_id: subjectUserId,
// step.sha is the canonical field name used in the template
step: { sha },
};
const headerB64 = b64url(JSON.stringify(header));
const payloadB64 = b64url(JSON.stringify(payload));
const signingInput = `${headerB64}.${payloadB64}`;
// Use the provisioner password as the raw HMAC-SHA256 key.
const hmac = crypto.createHmac('sha256', Buffer.from(provisionerPassword, 'utf8'));
hmac.update(signingInput);
const signature = hmac.digest();
return `${signingInput}.${b64url(signature)}`;
}
/**
* Send a JSON POST to the step-ca sign endpoint.
* Returns the parsed response body or throws CaServiceError.
*/
function httpsPost(url: string, body: unknown, agent: https.Agent): Promise<StepSignResponse> {
return new Promise((resolve, reject) => {
const bodyStr = JSON.stringify(body);
const parsed = new URL(url);
const options: https.RequestOptions = {
hostname: parsed.hostname,
port: parsed.port ? parseInt(parsed.port, 10) : 443,
path: parsed.pathname,
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Content-Length': Buffer.byteLength(bodyStr),
},
agent,
};
const req = https.request(options, (res) => {
const chunks: Buffer[] = [];
res.on('data', (chunk: Buffer) => chunks.push(chunk));
res.on('end', () => {
const raw = Buffer.concat(chunks).toString('utf8');
if (res.statusCode === 401) {
reject(
new CaServiceError(
`step-ca returned HTTP 401 — invalid or expired OTT`,
'Check STEP_CA_PROVISIONER_PASSWORD and STEP_CA_PROVISIONER_KEY_JSON. Ensure the mosaic-fed provisioner is configured in the CA.',
),
);
return;
}
if (res.statusCode && res.statusCode >= 400) {
reject(
new CaServiceError(
`step-ca returned HTTP ${res.statusCode}: ${raw.slice(0, 256)}`,
`Review the step-ca logs. Status ${res.statusCode} may indicate a CSR policy violation or misconfigured provisioner.`,
),
);
return;
}
let parsed: unknown;
try {
parsed = JSON.parse(raw) as unknown;
} catch (err) {
reject(
new CaServiceError(
'step-ca returned a non-JSON response',
'Verify STEP_CA_URL points to a running step-ca instance and that TLS is properly configured.',
err,
),
);
return;
}
resolve(parsed as StepSignResponse);
});
});
req.on('error', (err: Error) => {
reject(
new CaServiceError(
`HTTPS connection to step-ca failed: ${err.message}`,
'Ensure STEP_CA_URL is reachable and STEP_CA_ROOT_CERT_PATH points to the correct CA root certificate.',
err,
),
);
});
req.write(bodyStr);
req.end();
});
}
/**
* Extract a decimal serial number from a PEM certificate.
* Returns the hex serial if conversion is not possible.
*/
function extractSerial(certPem: string): string {
try {
const cert = new crypto.X509Certificate(certPem);
return cert.serialNumber;
} catch {
return 'unknown';
}
}
// ---------------------------------------------------------------------------
// Service
// ---------------------------------------------------------------------------
@Injectable()
export class CaService {
private readonly logger = new Logger(CaService.name);
private readonly caUrl: string;
private readonly provisionerPassword: string;
private readonly provisionerKeyJson: string;
private readonly rootCertPath: string;
private readonly httpsAgent: https.Agent;
constructor() {
const caUrl = process.env['STEP_CA_URL'];
const provisionerPassword = process.env['STEP_CA_PROVISIONER_PASSWORD'];
const provisionerKeyJson = process.env['STEP_CA_PROVISIONER_KEY_JSON'];
const rootCertPath = process.env['STEP_CA_ROOT_CERT_PATH'];
if (!caUrl) {
throw new CaServiceError(
'STEP_CA_URL is not set',
'Set STEP_CA_URL to the base URL of the step-ca instance, e.g. https://step-ca:9000',
);
}
if (!provisionerPassword) {
throw new CaServiceError(
'STEP_CA_PROVISIONER_PASSWORD is not set',
'Set STEP_CA_PROVISIONER_PASSWORD to the JWK provisioner password for the mosaic-fed provisioner.',
);
}
if (!provisionerKeyJson) {
throw new CaServiceError(
'STEP_CA_PROVISIONER_KEY_JSON is not set',
'Set STEP_CA_PROVISIONER_KEY_JSON to the JSON-encoded JWK for the mosaic-fed provisioner.',
);
}
if (!rootCertPath) {
throw new CaServiceError(
'STEP_CA_ROOT_CERT_PATH is not set',
'Set STEP_CA_ROOT_CERT_PATH to the absolute path of the step-ca root CA certificate PEM file.',
);
}
this.caUrl = caUrl;
this.provisionerPassword = provisionerPassword;
this.provisionerKeyJson = provisionerKeyJson;
this.rootCertPath = rootCertPath;
// Read the root cert and pin it for all HTTPS connections.
let rootCert: string;
try {
rootCert = fs.readFileSync(this.rootCertPath, 'utf8');
} catch (err) {
throw new CaServiceError(
`Cannot read STEP_CA_ROOT_CERT_PATH: ${rootCertPath}`,
'Ensure the file exists and is readable by the gateway process.',
err,
);
}
this.httpsAgent = new https.Agent({
ca: rootCert,
rejectUnauthorized: true,
});
this.logger.log(`CaService initialised — CA URL: ${this.caUrl}`);
}
/**
* Submit a CSR to step-ca and return the issued certificate.
*
* Throws `CaServiceError` on any failure (network, auth, malformed input).
* Never silently swallows errors — fail-loud is a hard contract per M2-02 review.
*/
async issueCert(req: IssueCertRequestDto): Promise<IssuedCertDto> {
this.logger.debug(
`issueCert — grantId=${req.grantId} subjectUserId=${req.subjectUserId} ttl=${req.ttlSeconds}s`,
);
// Validate CSR before making network calls
if (!req.csrPem || !req.csrPem.includes('CERTIFICATE REQUEST')) {
throw new CaServiceError(
'csrPem does not appear to be a valid PKCS#10 PEM',
'Provide a PEM-encoded CSR starting with -----BEGIN CERTIFICATE REQUEST-----.',
);
}
const ott = buildOtt({
caUrl: this.caUrl,
provisionerPassword: this.provisionerPassword,
provisionerKeyJson: this.provisionerKeyJson,
csrPem: req.csrPem,
grantId: req.grantId,
subjectUserId: req.subjectUserId,
ttlSeconds: req.ttlSeconds,
});
const signUrl = `${this.caUrl}/1.0/sign`;
const requestBody = {
csr: req.csrPem,
ott,
validity: {
duration: `${req.ttlSeconds}s`,
},
};
this.logger.debug(`Posting CSR to ${signUrl}`);
const response = await httpsPost(signUrl, requestBody, this.httpsAgent);
if (!response.crt) {
throw new CaServiceError(
'step-ca sign response missing the "crt" field',
'This is unexpected — the step-ca instance may be misconfigured or running an incompatible version.',
);
}
// Build certChainPem: prefer certChain array, fall back to ca field, fall back to crt alone.
let certChainPem: string;
if (response.certChain && response.certChain.length > 0) {
certChainPem = response.certChain.join('\n');
} else if (response.ca) {
certChainPem = response.crt + '\n' + response.ca;
} else {
certChainPem = response.crt;
}
const serialNumber = extractSerial(response.crt);
this.logger.log(`Certificate issued — serial=${serialNumber} grantId=${req.grantId}`);
const result = new IssuedCertDto();
result.certPem = response.crt;
result.certChainPem = certChainPem;
result.serialNumber = serialNumber;
return result;
}
}

View File

@@ -1,8 +0,0 @@
import { Module } from '@nestjs/common';
import { CaService } from './ca.service.js';
@Module({
providers: [CaService],
exports: [CaService],
})
export class FederationModule {}

View File

@@ -1,187 +0,0 @@
/**
* Unit tests for FederationScopeSchema and parseFederationScope.
*
* Coverage:
* - Valid: minimal scope
* - Valid: full PRD §8.1 example
* - Valid: resources + excluded_resources (no overlap)
* - Invalid: empty resources
* - Invalid: unknown resource value
* - Invalid: resources / excluded_resources intersection
* - Invalid: filter key not in resources
* - Invalid: max_rows_per_query = 0
* - Invalid: max_rows_per_query = 10001
* - Invalid: not an object / null
* - Defaults: include_personal defaults to true; excluded_resources defaults to []
* - Sentinel: console.warn fires for sensitive resources
*/
import { describe, it, expect, vi, afterEach } from 'vitest';
import {
parseFederationScope,
FederationScopeError,
FederationScopeSchema,
} from './scope-schema.js';
afterEach(() => {
vi.restoreAllMocks();
});
describe('parseFederationScope — valid inputs', () => {
it('accepts a minimal scope (resources + max_rows_per_query only)', () => {
const scope = parseFederationScope({
resources: ['tasks'],
max_rows_per_query: 100,
});
expect(scope.resources).toEqual(['tasks']);
expect(scope.max_rows_per_query).toBe(100);
expect(scope.excluded_resources).toEqual([]);
expect(scope.filters).toBeUndefined();
});
it('accepts the full PRD §8.1 example', () => {
const scope = parseFederationScope({
resources: ['tasks', 'notes', 'memory'],
filters: {
tasks: { include_teams: ['team_uuid_1', 'team_uuid_2'], include_personal: true },
notes: { include_personal: true, include_teams: [] },
memory: { include_personal: true },
},
excluded_resources: ['credentials', 'api_keys'],
max_rows_per_query: 500,
});
expect(scope.resources).toEqual(['tasks', 'notes', 'memory']);
expect(scope.excluded_resources).toEqual(['credentials', 'api_keys']);
expect(scope.filters?.tasks?.include_teams).toEqual(['team_uuid_1', 'team_uuid_2']);
expect(scope.max_rows_per_query).toBe(500);
});
it('accepts a scope with excluded_resources and no filter overlap', () => {
const scope = parseFederationScope({
resources: ['tasks', 'notes'],
excluded_resources: ['memory'],
max_rows_per_query: 250,
});
expect(scope.resources).toEqual(['tasks', 'notes']);
expect(scope.excluded_resources).toEqual(['memory']);
});
});
describe('parseFederationScope — defaults', () => {
it('defaults excluded_resources to []', () => {
const scope = parseFederationScope({ resources: ['tasks'], max_rows_per_query: 1 });
expect(scope.excluded_resources).toEqual([]);
});
it('defaults include_personal to true when filter is provided without it', () => {
const scope = parseFederationScope({
resources: ['tasks'],
filters: { tasks: { include_teams: ['t1'] } },
max_rows_per_query: 10,
});
expect(scope.filters?.tasks?.include_personal).toBe(true);
});
});
describe('parseFederationScope — invalid inputs', () => {
it('throws FederationScopeError for empty resources array', () => {
expect(() => parseFederationScope({ resources: [], max_rows_per_query: 100 })).toThrow(
FederationScopeError,
);
});
it('throws for unknown resource value in resources', () => {
expect(() =>
parseFederationScope({ resources: ['unknown_resource'], max_rows_per_query: 100 }),
).toThrow(FederationScopeError);
});
it('throws when resources and excluded_resources intersect', () => {
expect(() =>
parseFederationScope({
resources: ['tasks', 'memory'],
excluded_resources: ['memory'],
max_rows_per_query: 100,
}),
).toThrow(FederationScopeError);
});
it('throws when filters references a resource not in resources', () => {
expect(() =>
parseFederationScope({
resources: ['tasks'],
filters: { notes: { include_personal: true } },
max_rows_per_query: 100,
}),
).toThrow(FederationScopeError);
});
it('throws for max_rows_per_query = 0', () => {
expect(() => parseFederationScope({ resources: ['tasks'], max_rows_per_query: 0 })).toThrow(
FederationScopeError,
);
});
it('throws for max_rows_per_query = 10001', () => {
expect(() => parseFederationScope({ resources: ['tasks'], max_rows_per_query: 10001 })).toThrow(
FederationScopeError,
);
});
it('throws for null input', () => {
expect(() => parseFederationScope(null)).toThrow(FederationScopeError);
});
it('throws for non-object input (string)', () => {
expect(() => parseFederationScope('not-an-object')).toThrow(FederationScopeError);
});
});
describe('parseFederationScope — sentinel warning', () => {
it('emits console.warn when resources includes "credentials"', () => {
const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {});
parseFederationScope({
resources: ['tasks', 'credentials'],
max_rows_per_query: 100,
});
expect(warnSpy).toHaveBeenCalledWith(
expect.stringContaining(
'[FederationScope] WARNING: scope grants sensitive resource "credentials"',
),
);
});
it('emits console.warn when resources includes "api_keys"', () => {
const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {});
parseFederationScope({
resources: ['tasks', 'api_keys'],
max_rows_per_query: 100,
});
expect(warnSpy).toHaveBeenCalledWith(
expect.stringContaining(
'[FederationScope] WARNING: scope grants sensitive resource "api_keys"',
),
);
});
it('does NOT emit console.warn for non-sensitive resources', () => {
const warnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {});
parseFederationScope({ resources: ['tasks', 'notes', 'memory'], max_rows_per_query: 100 });
expect(warnSpy).not.toHaveBeenCalled();
});
});
describe('FederationScopeSchema — boundary values', () => {
it('accepts max_rows_per_query = 1 (lower bound)', () => {
const result = FederationScopeSchema.safeParse({ resources: ['tasks'], max_rows_per_query: 1 });
expect(result.success).toBe(true);
});
it('accepts max_rows_per_query = 10000 (upper bound)', () => {
const result = FederationScopeSchema.safeParse({
resources: ['tasks'],
max_rows_per_query: 10000,
});
expect(result.success).toBe(true);
});
});

View File

@@ -1,147 +0,0 @@
/**
* Federation grant scope schema and validator.
*
* Source of truth: docs/federation/PRD.md §8.1
*
* This module is intentionally pure — no DB, no NestJS, no CA wiring.
* It is reusable from grant CRUD (M2-06) and scope enforcement (M3+).
*/
import { z } from 'zod';
// ---------------------------------------------------------------------------
// Allowlist of federation resources (canonical — M3+ will extend this list)
// ---------------------------------------------------------------------------
export const FEDERATION_RESOURCE_VALUES = [
'tasks',
'notes',
'memory',
'credentials',
'api_keys',
] as const;
export type FederationResource = (typeof FEDERATION_RESOURCE_VALUES)[number];
/**
* Sensitive resources require explicit admin approval (PRD §8.4).
* The parser warns when these appear in `resources`; M2-06 grant CRUD
* will add a hard gate on top of this warning.
*/
const SENSITIVE_RESOURCES: ReadonlySet<FederationResource> = new Set(['credentials', 'api_keys']);
// ---------------------------------------------------------------------------
// Sub-schemas
// ---------------------------------------------------------------------------
const ResourceArraySchema = z
.array(z.enum(FEDERATION_RESOURCE_VALUES))
.nonempty({ message: 'resources must contain at least one value' })
.refine((arr) => new Set(arr).size === arr.length, {
message: 'resources must not contain duplicate values',
});
const ResourceFilterSchema = z.object({
include_teams: z.array(z.string()).optional(),
include_personal: z.boolean().default(true),
});
// ---------------------------------------------------------------------------
// Top-level schema
// ---------------------------------------------------------------------------
export const FederationScopeSchema = z
.object({
resources: ResourceArraySchema,
excluded_resources: z
.array(z.enum(FEDERATION_RESOURCE_VALUES))
.default([])
.refine((arr) => new Set(arr).size === arr.length, {
message: 'excluded_resources must not contain duplicate values',
}),
filters: z.record(z.string(), ResourceFilterSchema).optional(),
max_rows_per_query: z
.number()
.int({ message: 'max_rows_per_query must be an integer' })
.min(1, { message: 'max_rows_per_query must be at least 1' })
.max(10000, { message: 'max_rows_per_query must be at most 10000' }),
})
.superRefine((data, ctx) => {
const resourceSet = new Set(data.resources);
// Intersection guard: a resource cannot be both granted and excluded
for (const r of data.excluded_resources) {
if (resourceSet.has(r)) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
message: `Resource "${r}" appears in both resources and excluded_resources`,
path: ['excluded_resources'],
});
}
}
// Filter keys must be a subset of resources
if (data.filters) {
for (const key of Object.keys(data.filters)) {
if (!resourceSet.has(key as FederationResource)) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
message: `filters key "${key}" references a resource not present in resources`,
path: ['filters', key],
});
}
}
}
});
export type FederationScope = z.infer<typeof FederationScopeSchema>;
// ---------------------------------------------------------------------------
// Error class
// ---------------------------------------------------------------------------
export class FederationScopeError extends Error {
constructor(message: string) {
super(message);
this.name = 'FederationScopeError';
}
}
// ---------------------------------------------------------------------------
// Typed parser
// ---------------------------------------------------------------------------
/**
* Parse and validate an unknown value as a FederationScope.
*
* Throws `FederationScopeError` with aggregated Zod issues on failure.
*
* Emits `console.warn` when sensitive resources (`credentials`, `api_keys`)
* are present in `resources` — per PRD §8.4, these require explicit admin
* approval. M2-06 grant CRUD will add a hard gate on top of this warning.
*/
export function parseFederationScope(input: unknown): FederationScope {
const result = FederationScopeSchema.safeParse(input);
if (!result.success) {
const issues = result.error.issues
.map((e) => ` - [${e.path.join('.') || 'root'}] ${e.message}`)
.join('\n');
throw new FederationScopeError(`Invalid federation scope:\n${issues}`);
}
const scope = result.data;
// Sentinel warning for sensitive resources (PRD §8.4)
for (const resource of scope.resources) {
if (SENSITIVE_RESOURCES.has(resource)) {
console.warn(
`[FederationScope] WARNING: scope grants sensitive resource "${resource}". Per PRD §8.4 this requires explicit admin approval and is logged.`,
);
}
}
return scope;
}

View File

@@ -1,70 +0,0 @@
# deploy/portainer/
Portainer stack templates for Mosaic Stack deployments.
## Files
| File | Purpose |
| -------------------------- | -------------------------------------------------------------------------------------------------------------- |
| `federated-test.stack.yml` | Docker Swarm stack for federation end-to-end test instances (`mos-test-1.woltje.com`, `mos-test-2.woltje.com`) |
---
## federated-test.stack.yml
A self-contained Swarm stack that boots a federated-tier Mosaic gateway with co-located Postgres 17 (pgvector) and Valkey 8. This is a **test template** — production deployments will use a separate template with stricter resource limits and Docker secrets.
### Deploy via Portainer UI
1. Log into Portainer.
2. Navigate to **Stacks → Add stack**.
3. Set a stack name matching `STACK_NAME` below (e.g. `mos-test-1`).
4. Choose **Web editor** and paste the contents of `federated-test.stack.yml`.
5. Scroll to **Environment variables** and add each variable listed below.
6. Click **Deploy the stack**.
### Required environment variables
| Variable | Example | Notes |
| -------------------- | --------------------------------------- | -------------------------------------------------------- |
| `STACK_NAME` | `mos-test-1` | Unique per stack — used in Traefik router/service names. |
| `HOST_FQDN` | `mos-test-1.woltje.com` | Fully-qualified hostname served by this stack. |
| `POSTGRES_PASSWORD` | _(generate randomly)_ | Database password. Do **not** reuse between stacks. |
| `BETTER_AUTH_SECRET` | _(generate: `openssl rand -base64 32`)_ | BetterAuth session signing key. |
| `BETTER_AUTH_URL` | `https://mos-test-1.woltje.com` | Public base URL of the gateway. |
Optional variables (uncomment in the YAML or set in Portainer):
| Variable | Notes |
| ----------------------------- | ---------------------------------------------------------- |
| `ANTHROPIC_API_KEY` | Enable Claude models. |
| `OPENAI_API_KEY` | Enable OpenAI models. |
| `OTEL_EXPORTER_OTLP_ENDPOINT` | Forward traces to a collector (e.g. `http://jaeger:4318`). |
### Required external resources
Before deploying, ensure the following exist on the Swarm:
1. **`traefik-public` overlay network** — shared network Traefik uses to route traffic to stacks.
```bash
docker network create --driver overlay --attachable traefik-public
```
2. **`letsencrypt` cert resolver** — configured in the Traefik Swarm stack. The stack template references `tls.certresolver=letsencrypt`; the name must match your Traefik config.
3. **DNS A record** — `${HOST_FQDN}` must resolve to the Swarm ingress IP (or a Cloudflare-proxied address pointing there).
### Deployed instances
| Stack name | HOST_FQDN | Purpose |
| ------------ | ----------------------- | ---------------------------------- |
| `mos-test-1` | `mos-test-1.woltje.com` | DEPLOY-03 — first federation peer |
| `mos-test-2` | `mos-test-2.woltje.com` | DEPLOY-04 — second federation peer |
### Image
The gateway image is pinned by digest to `fed-v0.1.0-m1` (verified in DEPLOY-01). Update the digest in the YAML when promoting a new build — never use `:latest` or a mutable tag in Swarm.
### Notes
- This template boots a **vanilla M1-baseline gateway** in federated tier. Federation grants (Step-CA, mTLS) are M2+ scope and not included here.
- Each stack gets its own Postgres volume (`postgres-data`) and Valkey volume (`valkey-data`) scoped to the stack name by Swarm.
- `depends_on` is honoured by Compose but ignored by Swarm — healthchecks on Postgres and Valkey ensure the gateway retries until they are ready.

View File

@@ -1,160 +0,0 @@
# deploy/portainer/federated-test.stack.yml
#
# Portainer / Docker Swarm stack template — federated-tier test instance
#
# PURPOSE
# Deploys a single federated-tier Mosaic gateway with co-located Postgres
# (pgvector) and Valkey for end-to-end federation testing. Intended for
# mos-test-1.woltje.com and mos-test-2.woltje.com (DEPLOY-03/04).
#
# REQUIRED ENV VARS (set per-stack in Portainer → Stacks → Environment variables)
# STACK_NAME Unique name for Traefik router/service labels.
# Examples: mos-test-1, mos-test-2
# HOST_FQDN Fully-qualified domain name served by this stack.
# Examples: mos-test-1.woltje.com, mos-test-2.woltje.com
# POSTGRES_PASSWORD Database password — set per stack; do NOT commit a default.
# BETTER_AUTH_SECRET Random 32-char string for BetterAuth session signing.
# Generate: openssl rand -base64 32
# BETTER_AUTH_URL Public gateway base URL, e.g. https://mos-test-1.woltje.com
#
# OPTIONAL ENV VARS (uncomment and set in Portainer to enable features)
# ANTHROPIC_API_KEY sk-ant-...
# OPENAI_API_KEY sk-...
# OTEL_EXPORTER_OTLP_ENDPOINT http://<collector>:4318
# OTEL_SERVICE_NAME (default: mosaic-gateway)
#
# REQUIRED EXTERNAL RESOURCES
# traefik-public Docker overlay network — must exist before deploying.
# Create: docker network create --driver overlay --attachable traefik-public
# letsencrypt Traefik cert resolver configured on the Swarm manager.
# DNS A record ${HOST_FQDN} → Swarm ingress IP (or Cloudflare proxy).
#
# IMAGE
# Pinned to sha-9f1a081 (main HEAD post-#488 Dockerfile fix). The previous
# pin (fed-v0.1.0-m1, sha256:9b72e2...) had a broken pnpm copy and could
# not resolve @mosaicstack/storage at runtime. The new digest was smoke-
# tested locally — gateway boots, imports resolve, tier-detector runs.
# Update digest here when promoting a new build.
#
# HEALTHCHECK NOTE (2026-04-21)
# Switched from busybox wget to node http.get on 127.0.0.1 (not localhost) to
# avoid IPv6 resolution issues on Alpine. Retries increased to 5 and
# start_period to 60s to cover the NestJS/GC cold-start window (~40-50s).
# restart_policy set to `any` so SIGTERM/clean-exit also triggers restart.
#
# NOTE: This is a TEST template — production deployments use a separate
# parameterised template with stricter resource limits and secrets.
version: '3.9'
services:
gateway:
image: git.mosaicstack.dev/mosaicstack/stack/gateway@sha256:1069117740e00ccfeba357cae38c43f3729fe5ae702740ce474f6512414d7c02
# Tag for human reference: sha-9f1a081 (post-#488 Dockerfile fix; smoke-tested locally)
environment:
# ── Tier ───────────────────────────────────────────────────────────────
MOSAIC_TIER: federated
# ── Database ───────────────────────────────────────────────────────────
DATABASE_URL: postgres://gateway:${POSTGRES_PASSWORD}@postgres:5432/mosaic
# ── Queue ──────────────────────────────────────────────────────────────
VALKEY_URL: redis://valkey:6379
# ── Gateway ────────────────────────────────────────────────────────────
GATEWAY_PORT: '3000'
GATEWAY_CORS_ORIGIN: https://${HOST_FQDN}
# ── Auth ───────────────────────────────────────────────────────────────
BETTER_AUTH_SECRET: ${BETTER_AUTH_SECRET}
BETTER_AUTH_URL: https://${HOST_FQDN}
# ── Observability ──────────────────────────────────────────────────────
OTEL_SERVICE_NAME: ${STACK_NAME:-mosaic-gateway}
# OTEL_EXPORTER_OTLP_ENDPOINT: http://<collector>:4318
# ── AI Providers (uncomment to enable) ─────────────────────────────────
# ANTHROPIC_API_KEY: ${ANTHROPIC_API_KEY}
# OPENAI_API_KEY: ${OPENAI_API_KEY}
networks:
- federated-test
- traefik-public
deploy:
replicas: 1
restart_policy:
condition: any
delay: 5s
max_attempts: 3
labels:
- 'traefik.enable=true'
- 'traefik.docker.network=traefik-public'
- 'traefik.http.routers.${STACK_NAME}.rule=Host(`${HOST_FQDN}`)'
- 'traefik.http.routers.${STACK_NAME}.entrypoints=websecure'
- 'traefik.http.routers.${STACK_NAME}.tls=true'
- 'traefik.http.routers.${STACK_NAME}.tls.certresolver=letsencrypt'
- 'traefik.http.services.${STACK_NAME}.loadbalancer.server.port=3000'
healthcheck:
test:
- 'CMD'
- 'node'
- '-e'
- "require('http').get('http://127.0.0.1:3000/health',r=>process.exit(r.statusCode===200?0:1)).on('error',()=>process.exit(1))"
interval: 30s
timeout: 5s
retries: 5
start_period: 60s
depends_on:
- postgres
- valkey
postgres:
image: pgvector/pgvector:pg17
environment:
POSTGRES_USER: gateway
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
POSTGRES_DB: mosaic
volumes:
- postgres-data:/var/lib/postgresql/data
networks:
- federated-test
deploy:
replicas: 1
restart_policy:
condition: on-failure
delay: 5s
max_attempts: 3
healthcheck:
test: ['CMD-SHELL', 'pg_isready -U gateway']
interval: 10s
timeout: 5s
retries: 5
start_period: 10s
valkey:
image: valkey/valkey:8-alpine
volumes:
- valkey-data:/data
networks:
- federated-test
deploy:
replicas: 1
restart_policy:
condition: on-failure
delay: 5s
max_attempts: 3
healthcheck:
test: ['CMD', 'valkey-cli', 'ping']
interval: 10s
timeout: 3s
retries: 5
start_period: 5s
volumes:
postgres-data:
valkey-data:
networks:
federated-test:
driver: overlay
traefik-public:
external: true

View File

@@ -27,7 +27,6 @@ services:
postgres-federated: postgres-federated:
image: pgvector/pgvector:pg17 image: pgvector/pgvector:pg17
profiles: [federated] profiles: [federated]
restart: unless-stopped
ports: ports:
- '${PG_FEDERATED_HOST_PORT:-5433}:5432' - '${PG_FEDERATED_HOST_PORT:-5433}:5432'
environment: environment:
@@ -46,7 +45,6 @@ services:
valkey-federated: valkey-federated:
image: valkey/valkey:8-alpine image: valkey/valkey:8-alpine
profiles: [federated] profiles: [federated]
restart: unless-stopped
ports: ports:
- '${VALKEY_FEDERATED_HOST_PORT:-6380}:6379' - '${VALKEY_FEDERATED_HOST_PORT:-6380}:6379'
volumes: volumes:
@@ -57,64 +55,6 @@ services:
timeout: 3s timeout: 3s
retries: 5 retries: 5
# ---------------------------------------------------------------------------
# Step-CA — Mosaic Federation internal certificate authority
#
# Image: pinned to 0.27.4 (latest stable as of late 2025).
# `latest` is forbidden per Mosaic image policy (immutable tag required for
# reproducible deployments and digest-first promotion in CI).
#
# Profile: `federated` — this service must not start in non-federated dev.
#
# Password:
# Dev: bind-mount ./infra/step-ca/dev-password (gitignored; copy from
# ./infra/step-ca/dev-password.example and customise locally).
# Prod: replace the bind-mount with a Docker secret:
# secrets:
# ca_password:
# external: true
# and reference it as `/run/secrets/ca_password` (same path the
# init script already uses).
#
# Provisioner: "mosaic-fed" (consumed by apps/gateway/src/federation/ca.service.ts)
# ---------------------------------------------------------------------------
step-ca:
image: smallstep/step-ca:0.27.4
profiles: [federated]
restart: unless-stopped
ports:
- '${STEP_CA_HOST_PORT:-9000}:9000'
volumes:
- step_ca_data:/home/step
# init script — executed as the container entrypoint
- ./infra/step-ca/init.sh:/usr/local/bin/mosaic-step-ca-init.sh:ro
# X.509 template skeleton (wired in M2-04)
- ./infra/step-ca/templates:/etc/step-ca-templates:ro
# Dev password file — GITIGNORED; copy from dev-password.example
# In production, replace this with a Docker secret (see comment above).
- ./infra/step-ca/dev-password:/run/secrets/ca_password:ro
entrypoint: ['/bin/sh', '/usr/local/bin/mosaic-step-ca-init.sh']
healthcheck:
# The healthcheck requires the root cert to exist, which is only true
# after init.sh has completed on first boot. start_period gives init
# time to finish before Docker starts counting retries.
test:
[
'CMD',
'step',
'ca',
'health',
'--ca-url',
'https://localhost:9000',
'--root',
'/home/step/certs/root_ca.crt',
]
interval: 10s
timeout: 5s
retries: 5
start_period: 30s
volumes: volumes:
pg_federated_data: pg_federated_data:
valkey_federated_data: valkey_federated_data:
step_ca_data:

View File

@@ -5,27 +5,18 @@ RUN corepack enable
FROM base AS builder FROM base AS builder
WORKDIR /app WORKDIR /app
# Copy workspace manifests first for layer-cached install
COPY pnpm-workspace.yaml pnpm-lock.yaml package.json ./ COPY pnpm-workspace.yaml pnpm-lock.yaml package.json ./
COPY apps/gateway/package.json ./apps/gateway/ COPY apps/gateway/package.json ./apps/gateway/
COPY packages/ ./packages/ COPY packages/ ./packages/
COPY plugins/ ./plugins/
RUN pnpm install --frozen-lockfile RUN pnpm install --frozen-lockfile
COPY . . COPY . .
# Build gateway and all of its workspace dependencies via turbo dependency graph RUN pnpm --filter @mosaic/gateway build
RUN pnpm turbo run build --filter @mosaicstack/gateway...
# Produce a self-contained deploy artifact: flat node_modules, no pnpm symlinks
# --legacy is required for pnpm v10 when inject-workspace-packages is not set
RUN pnpm --filter @mosaicstack/gateway --prod deploy --legacy /deploy
FROM base AS runner FROM base AS runner
WORKDIR /app WORKDIR /app
ENV NODE_ENV=production ENV NODE_ENV=production
# Use the pnpm deploy output — resolves all deps into a flat, self-contained node_modules
COPY --from=builder /deploy/node_modules ./node_modules
COPY --from=builder /deploy/package.json ./package.json
# dist is declared in package.json "files" so pnpm deploy copies it into /deploy;
# copy from builder explicitly as belt-and-suspenders
COPY --from=builder /app/apps/gateway/dist ./dist COPY --from=builder /app/apps/gateway/dist ./dist
COPY --from=builder /app/apps/gateway/package.json ./package.json
COPY --from=builder /app/node_modules ./node_modules
EXPOSE 4000 EXPOSE 4000
CMD ["node", "dist/main.js"] CMD ["node", "dist/main.js"]

View File

@@ -7,22 +7,13 @@
**ID:** federation-v1-20260419 **ID:** federation-v1-20260419
**Statement:** Jarvis operates across 34 workstations in two physical locations (home, USC). The user currently reaches back to a single jarvis-brain checkout from every session; a prior OpenBrain attempt caused cache, latency, and opacity pain. This mission builds asymmetric federation between Mosaic Stack gateways so that a session on a user's home gateway can query their work gateway in real time without data ever persisting across the boundary, with full multi-tenant isolation and standard-PKI (X.509 / Step-CA) trust management. **Statement:** Jarvis operates across 34 workstations in two physical locations (home, USC). The user currently reaches back to a single jarvis-brain checkout from every session; a prior OpenBrain attempt caused cache, latency, and opacity pain. This mission builds asymmetric federation between Mosaic Stack gateways so that a session on a user's home gateway can query their work gateway in real time without data ever persisting across the boundary, with full multi-tenant isolation and standard-PKI (X.509 / Step-CA) trust management.
**Phase:** M2 active — Step-CA + grant schema + admin CLI; parallel test-deploy workstream stood up **Phase:** M1 complete — federated tier infrastructure ready for testing
**Current Milestone:** FED-M2 **Current Milestone:** FED-M2 (next; deferred to mission planning)
**Progress:** 1 / 7 milestones **Progress:** 1 / 7 milestones
**Status:** active **Status:** active
**Last Updated:** 2026-04-21 (M2 decomposed; mos-test-1/-2 designated as federation E2E test hosts) **Last Updated:** 2026-04-19 (M1 complete; tag `fed-v0.1.0-m1`)
**Parent Mission:** None — new mission **Parent Mission:** None — new mission
## Test Infrastructure
| Host | Role | Image | Tier |
| ----------------------- | ----------------------------------- | ------------------------------------- | --------- |
| `mos-test-1.woltje.com` | Federation Server A (querying side) | `gateway:fed-v0.1.0-m1` (M1 baseline) | federated |
| `mos-test-2.woltje.com` | Federation Server B (serving side) | `gateway:fed-v0.1.0-m1` (M1 baseline) | federated |
These are TEST hosts for federation E2E (M3+). Distinct from PRD AC-12 production targets (`woltje.com``uscllc.com`). Deployment workstream tracked in `docs/federation/TASKS.md` under FED-M2-DEPLOY-\*.
## Context ## Context
Federation is the solution to what originally drove OpenBrain. The prior attempt coupled every agent session to a remote service, introduced cache/latency/opacity pain, and created a hard dependency that punished offline use. This redesign: Federation is the solution to what originally drove OpenBrain. The prior attempt coupled every agent session to a remote service, introduced cache/latency/opacity pain, and created a hard dependency that punished offline use. This redesign:
@@ -63,7 +54,7 @@ Key design references:
| # | ID | Name | Status | Branch | Issue | Started | Completed | | # | ID | Name | Status | Branch | Issue | Started | Completed |
| --- | ------ | --------------------------------------------- | ----------- | ------------------ | ----- | ---------- | ---------- | | --- | ------ | --------------------------------------------- | ----------- | ------------------ | ----- | ---------- | ---------- |
| 1 | FED-M1 | Federated tier infrastructure | done | (12 PRs #470-#481) | #460 | 2026-04-19 | 2026-04-19 | | 1 | FED-M1 | Federated tier infrastructure | done | (12 PRs #470-#481) | #460 | 2026-04-19 | 2026-04-19 |
| 2 | FED-M2 | Step-CA + grant schema + admin CLI | in-progress | (decomposition) | #461 | 2026-04-21 | — | | 2 | FED-M2 | Step-CA + grant schema + admin CLI | not-started | — | #461 | | — |
| 3 | FED-M3 | mTLS handshake + list/get + scope enforcement | not-started | — | #462 | — | — | | 3 | FED-M3 | mTLS handshake + list/get + scope enforcement | not-started | — | #462 | — | — |
| 4 | FED-M4 | search verb + audit log + rate limit | not-started | — | #463 | — | — | | 4 | FED-M4 | search verb + audit log + rate limit | not-started | — | #463 | — | — |
| 5 | FED-M5 | Cache + offline degradation + OTEL | not-started | — | #464 | — | — | | 5 | FED-M5 | Cache + offline degradation + OTEL | not-started | — | #464 | — | — |
@@ -92,10 +83,6 @@ Key design references:
## Next Step ## Next Step
FED-M2 active. Decomposition landed in `docs/federation/TASKS.md` (M2-01..M2-13 code workstream + DEPLOY-01..DEPLOY-05 parallel test-deploy workstream, ~88K total). Tracking issue #482. FED-M1 complete (12 PRs #470-#481, tag `fed-v0.1.0-m1`). Federated tier infrastructure is testable end-to-end: see `docs/federation/SETUP.md` and `docs/guides/migrate-tier.md`.
Parallel execution plan: Begin FED-M2 (Step-CA + grant schema + admin CLI) when planning is greenlit. Issue #461 tracks scope; orchestrator decomposes M2 into per-task rows in `docs/federation/TASKS.md` at the start of M2.
- **CODE workstream**: M2-01 (DB migration) starts immediately — sonnet subagent on `feat/federation-m2-schema`. Then M2-02 → M2-09 sequentially with M2-04/M2-05/M2-06/M2-07 having interleaved CA/storage/grant dependencies.
- **DEPLOY workstream**: DEPLOY-01 (image verify) → DEPLOY-02 (stack template) → DEPLOY-03/04 (mos-test-1/-2 deploy) → DEPLOY-05 (TEST-INFRA.md). Gated on Portainer wrapper PR (`PORTAINER_INSECURE` flag) merging first.
- **Re-converge** at M2-10 (E2E test) once both workstreams ready.

View File

@@ -117,68 +117,3 @@ docker compose -f docker-compose.federated.yml logs valkey-federated
``` ```
If Valkey is running, verify your firewall allows 6380. On macOS, Docker Desktop may require binding to `host.docker.internal` instead of `localhost`. If Valkey is running, verify your firewall allows 6380. On macOS, Docker Desktop may require binding to `host.docker.internal` instead of `localhost`.
## OID Assignments — Mosaic Internal OID Arc
Mosaic uses the private enterprise arc `1.3.6.1.4.1.99999` for custom X.509
certificate extensions in federation grant certificates.
**IMPORTANT:** This is a development/internal OID arc. Before deploying to a
production environment accessible by external parties, register a proper IANA
Private Enterprise Number (PEN) at <https://pen.iana.org/pen/PenApplication.page>
and update these assignments accordingly.
### Assigned OIDs
| OID | Symbolic name | Description |
| --------------------- | --------------------------------- | --------------------------------------------------------- |
| `1.3.6.1.4.1.99999.1` | `mosaic.federation.grantId` | UUID of the `federation_grants` row authorising this cert |
| `1.3.6.1.4.1.99999.2` | `mosaic.federation.subjectUserId` | UUID of the local user on whose behalf the cert is issued |
### Encoding
Each extension value is DER-encoded as an ASN.1 **UTF8String**:
```
Tag 0x0C (UTF8String)
Length 0x24 (36 decimal — fixed length of a UUID string)
Value <36 ASCII bytes of the UUID>
```
The step-ca X.509 template at `infra/step-ca/templates/federation.tpl`
produces this encoding via the Go template expression:
```
{{ printf "\x0c\x24%s" .Token.mosaic_grant_id | b64enc }}
```
The resulting base64 value is passed as the `value` field of the extension
object in the template JSON.
### CA Environment Variables
The `CaService` (`apps/gateway/src/federation/ca.service.ts`) requires the
following environment variables at gateway startup:
| Variable | Required | Description |
| ------------------------------ | -------- | -------------------------------------------------------------------- |
| `STEP_CA_URL` | Yes | Base URL of the step-ca instance, e.g. `https://step-ca:9000` |
| `STEP_CA_PROVISIONER_PASSWORD` | Yes | JWK provisioner password for the `mosaic-fed` provisioner |
| `STEP_CA_PROVISIONER_KEY_JSON` | Yes | JSON-encoded JWK (public + private) for the `mosaic-fed` provisioner |
| `STEP_CA_ROOT_CERT_PATH` | Yes | Absolute path to the step-ca root CA certificate PEM file |
Set these variables in your environment or secret manager before starting
the gateway. In the federated Docker Compose stack they are expected to be
injected via Docker secrets and environment variable overrides.
### Fail-loud contract
The CA service (and the X.509 template) are designed to fail loudly if the
custom OIDs cannot be embedded:
- The template produces a malformed extension value (zero-length UTF8String
body) when the JWT claims `mosaic_grant_id` or `mosaic_subject_user_id` are
absent. step-ca rejects the CSR rather than issuing a cert without the OIDs.
- `CaService.issueCert()` throws a `CaServiceError` on every error path with
a human-readable `remediation` string. It never silently returns a cert that
may be missing the required extensions.

View File

@@ -36,52 +36,9 @@ Goal: Gateway runs in `federated` tier with containerized PG+pgvector+Valkey. No
--- ---
## Pre-M2 — Test deployment infrastructure (FED-M2-DEPLOY)
Goal: Two federated-tier gateways stood up on Portainer at `mos-test-1.woltje.com` and `mos-test-2.woltje.com` running the M1 release (`gateway:fed-v0.1.0-m1`). This is the test bed for M2 enrollment work and the M3 federation E2E harness. No federation logic exercised yet — pure infrastructure validation.
> **Why now:** M2 enrollment requires a real second gateway to test peer-add flows; standing the test hosts up before M2 code lands gives both code and deployment streams a fast feedback loop.
> **Parallelizable:** This workstream runs in parallel with the M2 code workstream (M2-01 → M2-13). They re-converge at M2-10 (E2E test).
> **Tracking issue:** #482.
| id | status | description | issue | agent | branch | depends_on | estimate | notes |
| --------------------- | ----------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----- | ------ | ------------------------------------- | ------------ | -------- | -------------------------------------------------------------------------------------------------------------------------------------------------- |
| FED-M2-DEPLOY-01 | done | Verify `gateway:fed-v0.1.0-m1` image was published by `.woodpecker/publish.yml` on tag push; if not, investigate and remediate. Document image URI in deployment artifact. | #482 | sonnet | (verified inline, no PR) | — | 2K | Tag exists; digest `sha256:9b72e202a9eecc27d31920b87b475b9e96e483c0323acc57856be4b1355db1ec` captured for digest-pinned deploys. |
| FED-M2-DEPLOY-02 | done | Author Portainer git-stack compose file `deploy/portainer/federated-test.stack.yml` (gateway + PG-pgvector + Valkey, env-driven). Use immutable tag, not `latest`. | #482 | sonnet | feat/federation-deploy-stack-template | DEPLOY-01 | 5K | Shipped in PR #485. Digest-pinned. Env: STACK_NAME, HOST_FQDN, POSTGRES_PASSWORD, BETTER_AUTH_SECRET, BETTER_AUTH_URL. |
| FED-M2-DEPLOY-IMG-FIX | in-progress | Gateway image runtime broken (ERR_MODULE_NOT_FOUND for `dotenv`); Dockerfile copies `.pnpm/` store but not `apps/gateway/node_modules` symlinks. Switch to `pnpm deploy` for self-contained runtime. | #482 | sonnet | (subagent in flight) | DEPLOY-02 | 4K | Subagent `a78a9ab0ddae91fbc` in flight. Triggers Kaniko rebuild on merge; capture new digest; bump stack template in follow-up PR before redeploy. |
| FED-M2-DEPLOY-03 | blocked | Deploy stack to mos-test-1.woltje.com via `~/.config/mosaic/tools/portainer/`. Verify M1 acceptance: federated-tier boot succeeds; `mosaic gateway doctor --json` returns green; pgvector `vector(3)` round-trip works. | #482 | sonnet | feat/federation-deploy-test-1 | IMG-FIX | 3K | Stack created on Portainer endpoint 3 (Swarm `local`), but blocked on image fix. Container fails on boot until IMG-FIX merges + redeploy. |
| FED-M2-DEPLOY-04 | blocked | Deploy stack to mos-test-2.woltje.com via Portainer wrapper. Same M1 acceptance probes as DEPLOY-03. | #482 | sonnet | feat/federation-deploy-test-2 | IMG-FIX | 3K | Same status as DEPLOY-03. Stack created; blocked on image fix. |
| FED-M2-DEPLOY-05 | not-started | Document deployment in `docs/federation/TEST-INFRA.md`: hosts, image tags, secrets sourcing, redeploy procedure, teardown. Update MISSION-MANIFEST with deployment status. | #482 | haiku | feat/federation-deploy-docs | DEPLOY-03,04 | 3K | Operator-facing doc; mentions but does not duplicate `tools/portainer/README.md`. |
**Deploy workstream estimate:** ~16K tokens
---
## Milestone 2 — Step-CA + grant schema + admin CLI (FED-M2) ## Milestone 2 — Step-CA + grant schema + admin CLI (FED-M2)
Goal: An admin can create a federation grant; counterparty enrolls; cert is signed by Step-CA with SAN OIDs for `grantId` + `subjectUserId`. No runtime federation traffic flows yet (that's M3). _Deferred to mission planning when M1 is complete. Issue #461 tracks scope._
| id | status | description | issue | agent | branch | depends_on | estimate | notes |
| --------- | ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----- | ------ | ---------------------------------- | ---------------- | -------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| FED-M2-01 | needs-qa | DB migration: `federation_grants`, `federation_peers`, `federation_audit_log` tables + enum types (`grant_status`, `peer_state`). Drizzle schema + migration generation; migration tests. | #461 | sonnet | feat/federation-m2-schema | — | 5K | PR #486 open. First review NEEDS CHANGES (missing DESC indexes + reserved cols). Remediation subagent `a673dd9355dc26f82` in flight in worktree `agent-a4404ac1`. |
| FED-M2-02 | not-started | Add Step-CA sidecar to `docker-compose.federated.yml`: official `smallstep/step-ca` image, persistent CA volume, JWK provisioner config baked into init script. | #461 | sonnet | feat/federation-m2-stepca | DEPLOY-02 | 4K | Profile-gated under `federated`. CA password from secret; dev compose uses dev-only password file. |
| FED-M2-03 | not-started | Scope JSON schema + validator: `resources` allowlist, `excluded_resources`, `include_teams`, `include_personal`, `max_rows_per_query`. Vitest unit tests for valid + invalid scopes. | #461 | sonnet | feat/federation-m2-scope-schema | — | 4K | Validator independent of CA — reusable from grant CRUD + (later) M3 scope enforcement. |
| FED-M2-04 | not-started | `apps/gateway/src/federation/ca.service.ts`: Step-CA client (CSR submission, OID-bearing cert retrieval). Mocked + integration tests against real Step-CA container. | #461 | sonnet | feat/federation-m2-ca-service | M2-02 | 6K | SAN OIDs: `grantId` (custom OID 1.3.6.1.4.1.99999.1) + `subjectUserId` (1.3.6.1.4.1.99999.2). Document OID assignments in PRD/SETUP. **Acceptance**: must (a) wire `federation.tpl` template into `mosaic-fed` provisioner config and (b) include a unit/integration test asserting issued certs contain BOTH OIDs — fails-loud guard against silent OID stripping (carry-forward from M2-02 review). |
| FED-M2-05 | not-started | Sealed storage for `client_key_pem` reusing existing `provider_credentials` sealing key. Tests prove DB-at-rest is ciphertext, not PEM. Key rotation path documented (deferred impl). | #461 | sonnet | feat/federation-m2-key-sealing | M2-01 | 5K | Separate from M2-06 to keep crypto seam isolated; reviewer focus is sealing only. |
| FED-M2-06 | not-started | `grants.service.ts`: CRUD + status transitions (`pending``active``revoked`); integrates M2-03 (scope) + M2-05 (sealing). Unit tests cover all transitions including invalid ones. | #461 | sonnet | feat/federation-m2-grants-service | M2-03, M2-05 | 6K | Business logic only — CSR + cert work delegated to M2-04. Revocation handler is M6. |
| FED-M2-07 | not-started | `enrollment.controller.ts`: short-lived single-use token endpoint; CSR signing; updates grant `pending``active`; emits enrollment audit (table-only write, M4 tightens). | #461 | sonnet | feat/federation-m2-enrollment | M2-04, M2-06 | 6K | Tokens single-use with 410 on replay; tokens TTL'd at 15min; rate-limited at request layer (M4 introduces guard, M2 uses simple lock). |
| FED-M2-08 | not-started | Admin CLI: `mosaic federation grant create/list/show` + `peer add/list`. Integration with grants.service (no API duplication). Help output + machine-readable JSON option. | #461 | sonnet | feat/federation-m2-cli | M2-06, M2-07 | 7K | `peer add <enrollment-url>` is the client-side flow; resolves enrollment URL → CSR → store sealed key + cert. |
| FED-M2-09 | not-started | Integration tests covering MILESTONES.md M2 acceptance tests #1, #2, #3, #5, #7, #8 (single-gateway suite). Real Step-CA container; vitest profile gated by `FEDERATED_INTEGRATION=1`. | #461 | sonnet | feat/federation-m2-integration | M2-08 | 8K | Tests #4 (cert OID match) + #6 (two-gateway peer-add) handled separately by M2-10 (E2E). |
| FED-M2-10 | not-started | E2E test against deployed mos-test-1 + mos-test-2 (or local two-gateway docker-compose if Portainer not ready): MILESTONES test #6 `peer add` yields `active` peer record with valid cert + key. | #461 | sonnet | feat/federation-m2-e2e | M2-08, DEPLOY-04 | 6K | Falls back to local docker-compose-two-gateways if remote test hosts not yet available. Documents both paths. |
| FED-M2-11 | not-started | Independent security review (sonnet, not author of M2-04/05/06/07): focus on single-use token replay, sealing leak surfaces, OID match enforcement, scope schema bypass paths. | #461 | sonnet | feat/federation-m2-security-review | M2-10 | 8K | Apply M1 two-round pattern. Reviewer should explicitly attempt enrollment-token replay, OID-spoofing CSR, and key leak in error messages. |
| FED-M2-12 | not-started | Docs update: `docs/federation/SETUP.md` Step-CA section; new `docs/federation/ADMIN-CLI.md` with grant/peer commands; scope schema reference; OID registration note. Runbook still M7-deferred. | #461 | haiku | feat/federation-m2-docs | M2-11 | 4K | Adds CA bootstrap section to SETUP.md with `docker compose --profile federated up step-ca` example. |
| FED-M2-13 | not-started | PR aggregate close, CI green, merge to main, close #461. Release tag `fed-v0.2.0-m2`. Mark deploy stream complete. Update mission manifest M2 row. | #461 | sonnet | feat/federation-m2-close | M2-12 | 3K | Same close pattern as M1-12; queue-guard before merge; tea release-create with notes including deploy-stream PRs. |
**M2 code workstream estimate:** ~72K tokens (vs MILESTONES.md 30K — same over-budget pattern as M1, where per-task breakdown including tests/review/docs catches the real cost).
**Deploy + code combined:** ~88K tokens.
## Milestone 3 — mTLS handshake + list/get + scope enforcement (FED-M3) ## Milestone 3 — mTLS handshake + list/get + scope enforcement (FED-M3)

View File

@@ -523,92 +523,3 @@ Independent security review surfaced three high-impact and four medium findings;
- #8: confirm `packages/config/dist` not git-tracked - #8: confirm `packages/config/dist` not git-tracked
**Next mission step:** FED-M2 (Step-CA + grant schema + admin CLI). Per TASKS.md scope rule, M2 will be decomposed when it enters active planning. Issue #461 tracks scope. **Next mission step:** FED-M2 (Step-CA + grant schema + admin CLI). Per TASKS.md scope rule, M2 will be decomposed when it enters active planning. Issue #461 tracks scope.
## Session 20 — 2026-04-21 — FED-M2 kickoff
### Decisions
- **Workstream split**: parallel CODE (M2-01..M2-13, ~72K) + DEPLOY (DEPLOY-01..DEPLOY-05, ~16K) tracks; re-converge at M2-10 E2E.
- **Test hosts**: `mos-test-1.woltje.com` (querying side / Server A), `mos-test-2.woltje.com` (serving side / Server B). Wildcard `*.woltje.com` A→174.137.97.162 already exists; Traefik wildcard cert covers both subdomains. No DNS or cert work needed pre-deploy.
- **Portainer access**: requires `PORTAINER_INSECURE=1` flag added to mosaic wrappers (self-signed cert at `https://10.1.1.43:9443`). PR pending on `feat/mosaic-portainer-tls-flag`.
- **Image policy**: deploy by digest (immutable) per Mosaic policy. `gateway:fed-v0.1.0-m1` digest = `sha256:9b72e202a9eecc27d31920b87b475b9e96e483c0323acc57856be4b1355db1ec`.
### DEPLOY-01 — image manifest verified
- Tag `fed-v0.1.0-m1` exists at `git.mosaicstack.dev/mosaicstack/stack/gateway`
- Digest: `sha256:9b72e202a9eecc27d31920b87b475b9e96e483c0323acc57856be4b1355db1ec`
- 9 layers, ~530MB total
- Use this digest in DEPLOY-02 stack template (do NOT reference `:fed-v0.1.0-m1` tag in stack — pin to digest)
### Registry auth note
- Gitea container registry uses Bearer token flow (`/v2/token?service=container_registry&scope=repository:<repo>:pull`)
- Username: `jarvis` (NOT `mosaicstack`); password: `gitea.mosaicstack.token` from credentials.json
- Direct `Authorization: Bearer <pat>` does NOT work — must exchange PAT for registry token first
### Active PRs
- #483 — docs: M2 mission planning (TASKS decomposition + manifest update) — CI running
- (pending) `feat/mosaic-portainer-tls-flag` — wrapper PORTAINER_INSECURE flag (sonnet subagent in progress)
- (pending) `feat/federation-m2-schema` — FED-M2-01 DB schema migration (sonnet subagent in progress)
### MISSION-MANIFEST layout fix
- Initial M2 commit had Test Infrastructure block inserted by lint-staged prettier between "Last Updated" and "Parent Mission" — split mission frontmatter
- Fixed in 3d001fdb: moved Parent Mission back to frontmatter, kept Test Infrastructure as standalone H2 between Mission and Context
## Session 21 — 2026-04-21/22 — DEPLOY-02 merged, gateway image bug discovered, M2-01 in remediation
### PRs merged
- **#483** — docs(federation): M2 mission planning (TASKS decomposition + manifest update)
- **#484** — feat(mosaic-portainer): PORTAINER_INSECURE flag for self-signed TLS (wrapper sync to `~/.config/mosaic/tools/portainer/` done manually due to broken `mosaic upgrade` `set -o pipefail` on dash)
- **#485** — feat(deploy): portainer stack template `deploy/portainer/federated-test.stack.yml` for federation test instances [DEPLOY-02]
### Stack deployed (mos-test-1, mos-test-2)
- Both stacks created on Portainer endpoint 3 (`local` Swarm @ 10.1.1.43, the only endpoint with traefik-public + woltje.com wildcard cert)
- Swarm ID `l7z67tfpd4bvj4979ufpkyi50`
- Image pinned to digest `sha256:9b72e202a9eecc27d31920b87b475b9e96e483c0323acc57856be4b1355db1ec`
- Traefik labels target `${HOST_FQDN}` per env
### CRITICAL FINDING — gateway image runtime-broken
- `docker run` against `gateway:fed-v0.1.0-m1` fails immediately:
`Error [ERR_MODULE_NOT_FOUND]: Cannot find package 'dotenv' imported from /app/dist/main.js`
- Root cause: `docker/gateway.Dockerfile` copies `/app/node_modules` from builder — but pnpm puts deps in the content-addressed `.pnpm/` store with symlinks at `apps/gateway/node_modules/*`. The runner stage misses the symlinks → Node can't resolve workspace deps.
- M1 release was never runtime-tested as a stripped container; CI passed because tests run in dev tree where pnpm symlinks are intact.
- **Fix in flight** (subagent `a78a9ab0ddae91fbc`): switch builder to `pnpm --filter @mosaic/gateway --prod deploy /deploy`, then runner copies `/deploy/node_modules` + `/deploy/dist` + `/deploy/package.json`.
### M2-01 schema review verdict — NEEDS CHANGES
- PR #486 (`feat/federation-m2-schema`) — independent reviewer (sonnet) found 2 real issues:
1. `federation_audit_log` time-range indexes missing `.desc()` on `created_at` (3 places)
2. Reserved columns missing per TASKS.md M2-01 spec: `query_hash`, `outcome`, `bytes_out` (M4 will write; spec said reserve now)
- Also notes (advisory): subject_user_id correctly `text` (matches BetterAuth users.id; spec defect, not code defect); peer→grant cascade test not present (would be trivial to add)
- **Remediation in flight** (subagent `a673dd9355dc26f82` in worktree `agent-a4404ac1`): apply DESC + reserved cols, regenerate migration in place (preferred) or stack 0009 (fallback), force-push, post PR comment.
### Process notes
- Branch race incident: schema subagent + wrapper subagent both ran in main checkout → schema files appeared on wrapper branch. Recovered by TaskStop, `git checkout --` to clean, respawned schema subagent with `isolation: "worktree"`. **Rule going forward:** any subagent doing code edits gets `isolation: "worktree"` unless work is single-file and the orchestrator confirms no other branch will touch overlapping files.
- `pr-create.sh` shell-quotes backticks badly → use `tea pr create --repo mosaicstack/stack` directly (matches CLI-skill behavior). Will leave a followup to harden pr-create.sh.
- Gitea registry auth: bearer-token exchange flow (`/v2/token?service=container_registry&scope=repository:<repo>:pull`) — direct `Authorization: Bearer <pat>` returns 401.
- Portainer Swarm stack create endpoint: `POST /api/stacks/create/swarm/string?endpointId=<id>` (NOT `/api/stacks?type=1` — deprecated and rejected with 400).
### In-flight at compaction boundary
- Subagent `a78a9ab0ddae91fbc` — Dockerfile pnpm-deploy fix → PR (not yet opened at handoff)
- Subagent `a673dd9355dc26f82` — M2-01 schema remediation (DESC + reserved cols) → force-push to PR #486
- Both will trigger CI; orchestrator must independently re-review fixes (especially the security-adjacent schema work) per "always verify subagent claims" rule.
### Next after subagents return
1. Independent re-review of schema remediation (different subagent, fresh context)
2. Merge #486 if green
3. Merge Dockerfile fix PR if green → triggers Kaniko CI rebuild → capture new digest
4. Update `deploy/portainer/federated-test.stack.yml` to new digest in a small PR
5. Redeploy mos-test-1 + mos-test-2 (Portainer stack update via API)
6. Verify HTTPS reachability + `/health` endpoint at both hosts
7. DEPLOY-03/04 acceptance probes (`mosaic gateway doctor --json`, pgvector `vector(3)` round-trip)
8. DEPLOY-05: author `docs/federation/TEST-INFRA.md`
9. M2-02 (Step-CA sidecar) kicks off after image health is green

View File

@@ -1 +0,0 @@
dev-only-step-ca-password-do-not-use-in-production

View File

@@ -1,90 +0,0 @@
#!/bin/sh
# infra/step-ca/init.sh
#
# Idempotent first-boot initialiser for the Mosaic Federation CA.
#
# On the first run (no /home/step/config/ca.json present) this script:
# 1. Initialises Step-CA with a JWK provisioner named "mosaic-fed".
# 2. Writes the CA configuration to the persistent volume at /home/step.
# 3. Copies the federation X.509 template into the CA config directory.
# 4. Patches the mosaic-fed provisioner entry in ca.json to reference the
# template via options.x509.templateFile (using jq — must be installed
# in the container image).
#
# On subsequent runs (config already exists) this script skips init and
# starts the CA directly.
#
# The provisioner name "mosaic-fed" is consumed by:
# apps/gateway/src/federation/ca.service.ts (added in M2-04)
#
# Password source:
# Dev: mounted from ./infra/step-ca/dev-password via bind mount.
# Prod: mounted from a Docker secret at /run/secrets/ca_password.
#
# OID template:
# infra/step-ca/templates/federation.tpl emits custom OID extensions:
# 1.3.6.1.4.1.99999.1 — mosaic_grant_id
# 1.3.6.1.4.1.99999.2 — mosaic_subject_user_id
set -e
CA_CONFIG="/home/step/config/ca.json"
PASSWORD_FILE="/run/secrets/ca_password"
TEMPLATE_SRC="/etc/step-ca-templates/federation.tpl"
TEMPLATE_DEST="/home/step/templates/federation.tpl"
if [ ! -f "${CA_CONFIG}" ]; then
echo "[step-ca init] First boot detected — initialising Mosaic Federation CA..."
step ca init \
--name "Mosaic Federation CA" \
--dns "localhost" \
--dns "step-ca" \
--address ":9000" \
--provisioner "mosaic-fed" \
--password-file "${PASSWORD_FILE}" \
--provisioner-password-file "${PASSWORD_FILE}" \
--no-db
echo "[step-ca init] CA initialised."
# Copy the X.509 template into the Step-CA config directory.
if [ -f "${TEMPLATE_SRC}" ]; then
mkdir -p /home/step/templates
cp "${TEMPLATE_SRC}" "${TEMPLATE_DEST}"
echo "[step-ca init] Federation X.509 template copied to ${TEMPLATE_DEST}."
else
echo "[step-ca init] WARNING: Template source ${TEMPLATE_SRC} not found — skipping copy."
fi
# Wire the template into the mosaic-fed provisioner via jq.
# This is idempotent: the block only runs once (first boot).
#
# jq filter: find the provisioner entry with name "mosaic-fed" and set
# .options.x509.templateFile to the absolute path of the template.
# All other provisioners and config keys are left unchanged.
if [ -f "${TEMPLATE_DEST}" ] && command -v jq > /dev/null 2>&1; then
echo "[step-ca init] Patching mosaic-fed provisioner with X.509 template..."
TEMP_CONFIG="${CA_CONFIG}.tmp"
jq --arg tpl "${TEMPLATE_DEST}" '
.authority.provisioners |= map(
if .name == "mosaic-fed" then
.options.x509.templateFile = $tpl
else
.
end
)
' "${CA_CONFIG}" > "${TEMP_CONFIG}" && mv "${TEMP_CONFIG}" "${CA_CONFIG}"
echo "[step-ca init] Provisioner patched."
elif ! command -v jq > /dev/null 2>&1; then
echo "[step-ca init] WARNING: jq not found — skipping provisioner template patch."
echo "[step-ca init] Install jq in the step-ca image to enable automatic template wiring."
fi
echo "[step-ca init] Startup complete."
else
echo "[step-ca init] Config already exists — skipping init."
fi
echo "[step-ca init] Starting Step-CA on :9000..."
exec step-ca /home/step/config/ca.json --password-file "${PASSWORD_FILE}"

View File

@@ -1,56 +0,0 @@
{
"subject": {{ toJson .Subject }},
"sans": {{ toJson .SANs }},
{{- /*
Mosaic Federation X.509 Certificate Template
============================================
Provisioner: mosaic-fed (JWK)
Implemented: FED-M2-04
This template emits two custom OID extensions carrying Mosaic federation
identifiers. The OTT token (built by CaService.buildOtt) MUST include the
claims `mosaic_grant_id` and `mosaic_subject_user_id` as top-level JWT
claims. step-ca exposes them under `.Token.<claim>` in this template.
OID Registry (Mosaic Internal Arc 1.3.6.1.4.1.99999):
1.3.6.1.4.1.99999.1 mosaic_grant_id (UUID, 36 ASCII chars)
1.3.6.1.4.1.99999.2 mosaic_subject_user_id (UUID, 36 ASCII chars)
DER encoding for each extension value (ASN.1 UTF8String):
Tag = 0x0C (UTF8String)
Length = 0x24 (decimal 36 the fixed length of a UUID string)
Value = 36 ASCII bytes of the UUID
The `printf` below builds the raw TLV bytes then base64-encodes them.
step-ca expects the `value` field to be base64-encoded raw DER bytes.
Fail-loud contract:
If either claim is missing from the token the printf will produce a
zero-length UUID field, making the extension malformed. step-ca will
reject the certificate rather than issuing one without the required OIDs.
Silent OID stripping is NEVER tolerated.
Step-CA template reference:
https://smallstep.com/docs/step-ca/templates
*/ -}}
"extensions": [
{
"id": "1.3.6.1.4.1.99999.1",
"critical": false,
"value": "{{ printf "\x0c\x24%s" .Token.mosaic_grant_id | b64enc }}"
},
{
"id": "1.3.6.1.4.1.99999.2",
"critical": false,
"value": "{{ printf "\x0c\x24%s" .Token.mosaic_subject_user_id | b64enc }}"
}
],
"keyUsage": ["digitalSignature"],
"extKeyUsage": ["clientAuth"],
"basicConstraints": {
"isCA": false
}
}

View File

@@ -13,14 +13,6 @@ export PORTAINER_URL="https://portainer.example.com:9443"
export PORTAINER_API_KEY="your-api-key-here" export PORTAINER_API_KEY="your-api-key-here"
``` ```
If your Portainer instance uses a self-signed TLS certificate (e.g. internal LAN), set:
```bash
export PORTAINER_INSECURE=1
```
This passes `-k` to all curl calls, bypassing certificate verification. Do not set this against public/production instances.
You can add these to your shell profile (`~/.bashrc`, `~/.zshrc`) or use a `.env` file. You can add these to your shell profile (`~/.bashrc`, `~/.zshrc`) or use a `.env` file.
### Creating an API Key ### Creating an API Key

View File

@@ -46,14 +46,8 @@ fi
# Remove trailing slash from URL # Remove trailing slash from URL
PORTAINER_URL="${PORTAINER_URL%/}" PORTAINER_URL="${PORTAINER_URL%/}"
# TLS options
CURL_OPTS=()
if [ "${PORTAINER_INSECURE:-0}" = "1" ]; then
CURL_OPTS+=(-k)
fi
# Fetch endpoints # Fetch endpoints
response=$(curl -s "${CURL_OPTS[@]}" -w "\n%{http_code}" \ response=$(curl -s -w "\n%{http_code}" \
-H "X-API-Key: ${PORTAINER_API_KEY}" \ -H "X-API-Key: ${PORTAINER_API_KEY}" \
"${PORTAINER_URL}/api/endpoints") "${PORTAINER_URL}/api/endpoints")

View File

@@ -52,14 +52,8 @@ fi
# Remove trailing slash from URL # Remove trailing slash from URL
PORTAINER_URL="${PORTAINER_URL%/}" PORTAINER_URL="${PORTAINER_URL%/}"
# TLS options
CURL_OPTS=()
if [ "${PORTAINER_INSECURE:-0}" = "1" ]; then
CURL_OPTS+=(-k)
fi
# Fetch stacks # Fetch stacks
response=$(curl -s "${CURL_OPTS[@]}" -w "\n%{http_code}" \ response=$(curl -s -w "\n%{http_code}" \
-H "X-API-Key: ${PORTAINER_API_KEY}" \ -H "X-API-Key: ${PORTAINER_API_KEY}" \
"${PORTAINER_URL}/api/stacks") "${PORTAINER_URL}/api/stacks")

View File

@@ -64,18 +64,12 @@ fi
# Remove trailing slash from URL # Remove trailing slash from URL
PORTAINER_URL="${PORTAINER_URL%/}" PORTAINER_URL="${PORTAINER_URL%/}"
# TLS options
CURL_OPTS=()
if [ "${PORTAINER_INSECURE:-0}" = "1" ]; then
CURL_OPTS+=(-k)
fi
# Function to make API requests # Function to make API requests
api_request() { api_request() {
local method="$1" local method="$1"
local endpoint="$2" local endpoint="$2"
curl -s "${CURL_OPTS[@]}" -w "\n%{http_code}" -X "$method" \ curl -s -w "\n%{http_code}" -X "$method" \
-H "X-API-Key: ${PORTAINER_API_KEY}" \ -H "X-API-Key: ${PORTAINER_API_KEY}" \
"${PORTAINER_URL}${endpoint}" "${PORTAINER_URL}${endpoint}"
} }
@@ -171,7 +165,7 @@ fi
# Note: Docker API returns raw log stream, not JSON # Note: Docker API returns raw log stream, not JSON
if [[ "$FOLLOW" == "true" ]]; then if [[ "$FOLLOW" == "true" ]]; then
# Stream logs # Stream logs
curl -s "${CURL_OPTS[@]}" -N \ curl -s -N \
-H "X-API-Key: ${PORTAINER_API_KEY}" \ -H "X-API-Key: ${PORTAINER_API_KEY}" \
"${PORTAINER_URL}/api/endpoints/${ENDPOINT_ID}/docker/containers/${CONTAINER_ID}/logs?${params}" | \ "${PORTAINER_URL}/api/endpoints/${ENDPOINT_ID}/docker/containers/${CONTAINER_ID}/logs?${params}" | \
# Docker log format has 8-byte header per line, strip it # Docker log format has 8-byte header per line, strip it
@@ -181,7 +175,7 @@ if [[ "$FOLLOW" == "true" ]]; then
done done
else else
# Get logs (non-streaming) # Get logs (non-streaming)
curl -s "${CURL_OPTS[@]}" \ curl -s \
-H "X-API-Key: ${PORTAINER_API_KEY}" \ -H "X-API-Key: ${PORTAINER_API_KEY}" \
"${PORTAINER_URL}/api/endpoints/${ENDPOINT_ID}/docker/containers/${CONTAINER_ID}/logs?${params}" | \ "${PORTAINER_URL}/api/endpoints/${ENDPOINT_ID}/docker/containers/${CONTAINER_ID}/logs?${params}" | \
# Docker log format has 8-byte header per line, attempt to strip it # Docker log format has 8-byte header per line, attempt to strip it

View File

@@ -63,19 +63,13 @@ fi
# Remove trailing slash from URL # Remove trailing slash from URL
PORTAINER_URL="${PORTAINER_URL%/}" PORTAINER_URL="${PORTAINER_URL%/}"
# TLS options
CURL_OPTS=()
if [ "${PORTAINER_INSECURE:-0}" = "1" ]; then
CURL_OPTS+=(-k)
fi
# Function to make API requests # Function to make API requests
api_request() { api_request() {
local method="$1" local method="$1"
local endpoint="$2" local endpoint="$2"
local data="${3:-}" local data="${3:-}"
local args=(-s "${CURL_OPTS[@]}" -w "\n%{http_code}" -X "$method" -H "X-API-Key: ${PORTAINER_API_KEY}") local args=(-s -w "\n%{http_code}" -X "$method" -H "X-API-Key: ${PORTAINER_API_KEY}")
if [[ -n "$data" ]]; then if [[ -n "$data" ]]; then
args+=(-H "Content-Type: application/json" -d "$data") args+=(-H "Content-Type: application/json" -d "$data")

View File

@@ -54,18 +54,12 @@ fi
# Remove trailing slash from URL # Remove trailing slash from URL
PORTAINER_URL="${PORTAINER_URL%/}" PORTAINER_URL="${PORTAINER_URL%/}"
# TLS options
CURL_OPTS=()
if [ "${PORTAINER_INSECURE:-0}" = "1" ]; then
CURL_OPTS+=(-k)
fi
# Function to make API requests # Function to make API requests
api_request() { api_request() {
local method="$1" local method="$1"
local endpoint="$2" local endpoint="$2"
curl -s "${CURL_OPTS[@]}" -w "\n%{http_code}" -X "$method" \ curl -s -w "\n%{http_code}" -X "$method" \
-H "X-API-Key: ${PORTAINER_API_KEY}" \ -H "X-API-Key: ${PORTAINER_API_KEY}" \
"${PORTAINER_URL}${endpoint}" "${PORTAINER_URL}${endpoint}"
} }

View File

@@ -57,18 +57,12 @@ fi
# Remove trailing slash from URL # Remove trailing slash from URL
PORTAINER_URL="${PORTAINER_URL%/}" PORTAINER_URL="${PORTAINER_URL%/}"
# TLS options
CURL_OPTS=()
if [ "${PORTAINER_INSECURE:-0}" = "1" ]; then
CURL_OPTS+=(-k)
fi
# Function to make API requests # Function to make API requests
api_request() { api_request() {
local method="$1" local method="$1"
local endpoint="$2" local endpoint="$2"
curl -s "${CURL_OPTS[@]}" -w "\n%{http_code}" -X "$method" \ curl -s -w "\n%{http_code}" -X "$method" \
-H "X-API-Key: ${PORTAINER_API_KEY}" \ -H "X-API-Key: ${PORTAINER_API_KEY}" \
"${PORTAINER_URL}${endpoint}" "${PORTAINER_URL}${endpoint}"
} }

View File

@@ -54,18 +54,12 @@ fi
# Remove trailing slash from URL # Remove trailing slash from URL
PORTAINER_URL="${PORTAINER_URL%/}" PORTAINER_URL="${PORTAINER_URL%/}"
# TLS options
CURL_OPTS=()
if [ "${PORTAINER_INSECURE:-0}" = "1" ]; then
CURL_OPTS+=(-k)
fi
# Function to make API requests # Function to make API requests
api_request() { api_request() {
local method="$1" local method="$1"
local endpoint="$2" local endpoint="$2"
curl -s "${CURL_OPTS[@]}" -w "\n%{http_code}" -X "$method" \ curl -s -w "\n%{http_code}" -X "$method" \
-H "X-API-Key: ${PORTAINER_API_KEY}" \ -H "X-API-Key: ${PORTAINER_API_KEY}" \
"${PORTAINER_URL}${endpoint}" "${PORTAINER_URL}${endpoint}"
} }