Compare commits

...

10 Commits

Author SHA1 Message Date
Jarvis
0e0ad9defe fixup: federation_audit_log DESC indexes + reserved M4 columns
All checks were successful
ci/woodpecker/pr/ci Pipeline was successful
ci/woodpecker/push/ci Pipeline was successful
- Add .desc() to all three federation_audit_log created_at indexes for
  reverse-chronological scans (PRD section 7.3)
- Add reserved nullable columns query_hash, outcome, bytes_out per
  TASKS.md M2-01 spec (written by M4, columns reserved now to avoid
  retroactive migration)
- Regenerate migration 0008 in-place (replaces 0008_careless_lake.sql
  with 0008_smart_lyja.sql containing DESC indexes + new columns)
- Update integration test: add reserved columns to CREATE TABLE in
  beforeAll; add 7th test for peer→grant cascade delete

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-21 20:52:00 -05:00
Jarvis
a1ab4386fe feat(db): federation schema (grants/peers/audit_log) [FED-M2-01]
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/pr/ci Pipeline was successful
Adds Drizzle ORM schema and migration for federation v1 milestone 2:

- New enums: peer_state ('pending'|'active'|'suspended'|'revoked'),
  grant_status ('active'|'revoked'|'expired')
- New tables: federation_peers, federation_grants, federation_audit_log
- FK cascades: user delete cascades grants; peer delete set-nulls audit_log
- Migration: 0008_careless_lake.sql
- Integration tests (FEDERATED_INTEGRATION=1): 6/6 pass

No business logic, no services, no DTOs — schema only.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-21 20:38:25 -05:00
54c422ab06 Merge pull request 'docs(federation): close FED-M1 milestone' (#481) from feat/federation-m1-close into main
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/push/publish Pipeline was successful
ci/woodpecker/tag/publish Pipeline was successful
2026-04-20 02:20:43 +00:00
Jarvis
b9fb8aab57 docs(federation): close FED-M1 milestone
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/pr/ci Pipeline was successful
- TASKS.md: mark FED-M1-12 done with PR/issue/tag references
- MISSION-MANIFEST.md: phase=M1 complete, progress 1/7, M1 row done with PR range #470-#481, session log appended
- scratchpad: Session 19 entry covering M1-09 → M1-12 with PR ledger and M1 retrospective learnings

Refs #460
2026-04-19 21:12:52 -05:00
78841f228a docs(federation): operator setup + migration guides (FED-M1-11) (#480)
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/push/publish Pipeline was successful
2026-04-20 02:07:15 +00:00
dc4afee848 fix(storage): redact credentials in driver errors + advisory lock (FED-M1-10) (#479)
Some checks failed
ci/woodpecker/push/ci Pipeline failed
ci/woodpecker/push/publish Pipeline failed
2026-04-20 02:02:57 +00:00
1e2b8ac8de test(federation): standalone regression canary — no breakage from M1 (FED-M1-09) (#478)
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/push/publish Pipeline was successful
2026-04-20 01:46:35 +00:00
15d849c166 test(storage): integration test for migrate-tier (FED-M1-08) + camelCase column fix (#477)
Some checks failed
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/push/publish Pipeline failed
2026-04-20 01:40:02 +00:00
78251d4af8 test(federation): integration tests for federated tier gateway boot (FED-M1-07) (#476)
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/push/publish Pipeline was successful
2026-04-20 01:13:10 +00:00
1a4b1ebbf1 feat(gateway,storage): mosaic gateway doctor with tier health JSON (FED-M1-06) (#475)
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/push/publish Pipeline was successful
2026-04-20 01:00:39 +00:00
32 changed files with 6572 additions and 326 deletions

View File

@@ -80,6 +80,8 @@ If you already have a gateway account but no token, use `mosaic gateway config r
### Configuration ### Configuration
Mosaic supports three storage tiers: `local` (PGlite, single-host), `standalone` (PostgreSQL, single-host), and `federated` (PostgreSQL + pgvector + Valkey, multi-host). See [Federated Tier Setup](docs/federation/SETUP.md) for multi-user and production deployments, or [Migrating to Federated](docs/guides/migrate-tier.md) to upgrade from existing tiers.
```bash ```bash
mosaic config show # Print full config as JSON mosaic config show # Print full config as JSON
mosaic config get <key> # Read a specific key mosaic config get <key> # Read a specific key

View File

@@ -0,0 +1,64 @@
/**
* Test B — Gateway boot refuses (fail-fast) when PG is unreachable.
*
* Prereq: docker compose -f docker-compose.federated.yml --profile federated up -d
* (Valkey must be running; only PG is intentionally misconfigured.)
* Run: FEDERATED_INTEGRATION=1 pnpm --filter @mosaicstack/gateway test src/__tests__/integration/federated-boot.pg-unreachable.integration.test.ts
*
* Skipped when FEDERATED_INTEGRATION !== '1'.
*/
import net from 'node:net';
import { beforeAll, describe, expect, it } from 'vitest';
import { TierDetectionError, detectAndAssertTier } from '@mosaicstack/storage';
const run = process.env['FEDERATED_INTEGRATION'] === '1';
const VALKEY_URL = 'redis://localhost:6380';
/**
* Reserves a guaranteed-closed port at runtime by binding to an ephemeral OS
* port (port 0) and immediately releasing it. The OS will not reassign the
* port during the TIME_WAIT window, so it remains closed for the duration of
* this test.
*/
async function reserveClosedPort(): Promise<number> {
return new Promise((resolve, reject) => {
const server = net.createServer();
server.listen(0, '127.0.0.1', () => {
const addr = server.address();
if (typeof addr !== 'object' || !addr) return reject(new Error('no addr'));
const port = addr.port;
server.close(() => resolve(port));
});
server.on('error', reject);
});
}
describe.skipIf(!run)('federated boot — PG unreachable', () => {
let badPgUrl: string;
beforeAll(async () => {
const closedPort = await reserveClosedPort();
badPgUrl = `postgresql://mosaic:mosaic@localhost:${closedPort}/mosaic`;
});
it('detectAndAssertTier throws TierDetectionError with service: postgres when PG is down', async () => {
const brokenConfig = {
tier: 'federated' as const,
storage: {
type: 'postgres' as const,
url: badPgUrl,
enableVector: true,
},
queue: {
type: 'bullmq',
url: VALKEY_URL,
},
};
await expect(detectAndAssertTier(brokenConfig)).rejects.toSatisfy(
(err: unknown) => err instanceof TierDetectionError && err.service === 'postgres',
);
}, 10_000);
});

View File

@@ -0,0 +1,50 @@
/**
* Test A — Gateway boot succeeds when federated services are up.
*
* Prereq: docker compose -f docker-compose.federated.yml --profile federated up -d
* Run: FEDERATED_INTEGRATION=1 pnpm --filter @mosaicstack/gateway test src/__tests__/integration/federated-boot.success.integration.test.ts
*
* Skipped when FEDERATED_INTEGRATION !== '1'.
*/
import postgres from 'postgres';
import { afterAll, describe, expect, it } from 'vitest';
import { detectAndAssertTier } from '@mosaicstack/storage';
const run = process.env['FEDERATED_INTEGRATION'] === '1';
const PG_URL = 'postgresql://mosaic:mosaic@localhost:5433/mosaic';
const VALKEY_URL = 'redis://localhost:6380';
const federatedConfig = {
tier: 'federated' as const,
storage: {
type: 'postgres' as const,
url: PG_URL,
enableVector: true,
},
queue: {
type: 'bullmq',
url: VALKEY_URL,
},
};
describe.skipIf(!run)('federated boot — success path', () => {
let sql: ReturnType<typeof postgres> | undefined;
afterAll(async () => {
if (sql) {
await sql.end({ timeout: 2 }).catch(() => {});
}
});
it('detectAndAssertTier resolves without throwing when federated services are up', async () => {
await expect(detectAndAssertTier(federatedConfig)).resolves.toBeUndefined();
}, 10_000);
it('pgvector extension is registered (pg_extension row exists)', async () => {
sql = postgres(PG_URL, { max: 1, connect_timeout: 5, idle_timeout: 5 });
const rows = await sql`SELECT * FROM pg_extension WHERE extname = 'vector'`;
expect(rows).toHaveLength(1);
}, 10_000);
});

View File

@@ -0,0 +1,43 @@
/**
* Test C — pgvector extension is functional end-to-end.
*
* Creates a temp table with a vector(3) column, inserts a row, and queries it
* back — confirming the extension is not just registered but operational.
*
* Prereq: docker compose -f docker-compose.federated.yml --profile federated up -d
* Run: FEDERATED_INTEGRATION=1 pnpm --filter @mosaicstack/gateway test src/__tests__/integration/federated-pgvector.integration.test.ts
*
* Skipped when FEDERATED_INTEGRATION !== '1'.
*/
import postgres from 'postgres';
import { afterAll, describe, expect, it } from 'vitest';
const run = process.env['FEDERATED_INTEGRATION'] === '1';
const PG_URL = 'postgresql://mosaic:mosaic@localhost:5433/mosaic';
let sql: ReturnType<typeof postgres> | undefined;
afterAll(async () => {
if (sql) {
await sql.end({ timeout: 2 }).catch(() => {});
}
});
describe.skipIf(!run)('federated pgvector — functional end-to-end', () => {
it('vector ops round-trip: INSERT [1,2,3] and SELECT returns [1,2,3]', async () => {
sql = postgres(PG_URL, { max: 1, connect_timeout: 5, idle_timeout: 5 });
await sql`CREATE TEMP TABLE t (id int, embedding vector(3))`;
await sql`INSERT INTO t VALUES (1, '[1,2,3]')`;
const rows = await sql`SELECT embedding FROM t`;
expect(rows).toHaveLength(1);
// The postgres driver returns vector columns as strings like '[1,2,3]'.
// Normalise by parsing the string representation.
const raw = rows[0]?.['embedding'] as string;
const parsed = JSON.parse(raw) as number[];
expect(parsed).toEqual([1, 2, 3]);
}, 10_000);
});

View File

@@ -1,220 +0,0 @@
/**
* Tier Detector — pre-flight service reachability assertions.
*
* Runs BEFORE NestFactory.create() to surface actionable errors immediately
* rather than crashing mid-boot with an opaque stack trace.
*
* Library choices:
* - Postgres: `postgres` npm package (already a dep via @mosaicstack/db / drizzle-orm).
* The spec mentions `pg`, but only `postgres` is installed in this monorepo.
* - Valkey: `ioredis` (already a dep via @mosaicstack/queue → bullmq; same URL
* convention used by the bullmq adapter).
*/
import postgres from 'postgres';
import { Redis } from 'ioredis';
import type { MosaicConfig } from '@mosaicstack/config';
/* ------------------------------------------------------------------ */
/* Structured error type */
/* ------------------------------------------------------------------ */
export class TierDetectionError extends Error {
public readonly service: 'postgres' | 'valkey' | 'pgvector' | 'config';
public readonly host: string;
public readonly port: number;
public readonly remediation: string;
constructor(opts: {
service: 'postgres' | 'valkey' | 'pgvector' | 'config';
host: string;
port: number;
remediation: string;
cause?: unknown;
}) {
const message =
`[tier-detector] ${opts.service} unreachable or unusable at ` +
`${opts.host}:${opts.port}${opts.remediation}`;
super(message, { cause: opts.cause });
this.name = 'TierDetectionError';
this.service = opts.service;
this.host = opts.host;
this.port = opts.port;
this.remediation = opts.remediation;
}
}
/* ------------------------------------------------------------------ */
/* URL helpers */
/* ------------------------------------------------------------------ */
/** Extract host and port from a URL string, returning safe fallbacks on parse failure. */
function parseHostPort(url: string, defaultPort: number): { host: string; port: number } {
try {
const parsed = new URL(url);
const host = parsed.hostname || 'unknown';
const port = parsed.port ? parseInt(parsed.port, 10) : defaultPort;
return { host, port };
} catch {
return { host: 'unknown', port: defaultPort };
}
}
/* ------------------------------------------------------------------ */
/* Postgres probe */
/* ------------------------------------------------------------------ */
async function probePostgres(url: string): Promise<void> {
const { host, port } = parseHostPort(url, 5432);
let sql: ReturnType<typeof postgres> | undefined;
try {
sql = postgres(url, {
max: 1,
connect_timeout: 5,
idle_timeout: 5,
});
// Run a trivial query to confirm connectivity.
await sql`SELECT 1`;
} catch (cause) {
throw new TierDetectionError({
service: 'postgres',
host,
port,
remediation:
'Start Postgres: `docker compose -f docker-compose.federated.yml --profile federated up -d postgres-federated`',
cause,
});
} finally {
if (sql) {
await sql.end({ timeout: 2 }).catch(() => {
// Ignore cleanup errors — we already have what we need.
});
}
}
}
/* ------------------------------------------------------------------ */
/* pgvector probe */
/* ------------------------------------------------------------------ */
async function probePgvector(url: string): Promise<void> {
const { host, port } = parseHostPort(url, 5432);
let sql: ReturnType<typeof postgres> | undefined;
try {
sql = postgres(url, {
max: 1,
connect_timeout: 5,
idle_timeout: 5,
});
// This succeeds whether the extension is already installed or freshly created.
// It errors only if the pgvector shared library is missing from the Postgres binary.
await sql`CREATE EXTENSION IF NOT EXISTS vector`;
} catch (cause) {
const causeMsg = cause instanceof Error ? cause.message.toLowerCase() : '';
const isLibraryMissing = causeMsg.includes('extension "vector" is not available');
const remediation = isLibraryMissing
? 'Use the `pgvector/pgvector:pg17` image, not the stock `postgres:17` image. See `docker-compose.federated.yml`.'
: 'The database role lacks permission to CREATE EXTENSION. Grant `CREATE` on the database, or run as a superuser.';
throw new TierDetectionError({
service: 'pgvector',
host,
port,
remediation,
cause,
});
} finally {
if (sql) {
await sql.end({ timeout: 2 }).catch(() => {
// Ignore cleanup errors.
});
}
}
}
/* ------------------------------------------------------------------ */
/* Valkey probe */
/* ------------------------------------------------------------------ */
const DEFAULT_VALKEY_URL = 'redis://localhost:6380';
async function probeValkey(url: string): Promise<void> {
const { host, port } = parseHostPort(url, 6380);
const client = new Redis(url, {
enableReadyCheck: false,
maxRetriesPerRequest: 0,
retryStrategy: () => null, // no retries — fail fast
lazyConnect: true,
connectTimeout: 5000, // fail-fast: 5-second hard cap on connection attempt
});
try {
await client.connect();
const pong = await client.ping();
if (pong !== 'PONG') {
throw new Error(`Unexpected PING response: ${pong}`);
}
} catch (cause) {
throw new TierDetectionError({
service: 'valkey',
host,
port,
remediation:
'Start Valkey: `docker compose -f docker-compose.federated.yml --profile federated up -d valkey-federated`',
cause,
});
} finally {
client.disconnect();
}
}
/* ------------------------------------------------------------------ */
/* Public entry point */
/* ------------------------------------------------------------------ */
/**
* Assert that all services required by `config.tier` are reachable.
*
* - `local` — no-op (PGlite is in-process; no external services).
* - `standalone` — assert Postgres + Valkey (if queue.type === 'bullmq').
* - `federated` — assert Postgres + Valkey + pgvector installability.
*
* Throws `TierDetectionError` on the first failure with host:port and
* a remediation hint.
*/
export async function detectAndAssertTier(config: MosaicConfig): Promise<void> {
if (config.tier === 'local') {
// PGlite runs in-process — nothing to probe.
return;
}
const pgUrl =
config.storage.type === 'postgres' ? config.storage.url : 'postgresql://localhost:5432/mosaic';
const valkeyUrl =
config.queue.type === 'bullmq' ? (config.queue.url ?? DEFAULT_VALKEY_URL) : null;
if (config.tier === 'standalone') {
await probePostgres(pgUrl);
if (valkeyUrl) {
await probeValkey(valkeyUrl);
}
return;
}
// tier === 'federated'
// Reject misconfigured queue upfront — federated requires bullmq + a Valkey URL.
if (config.queue.type !== 'bullmq') {
throw new TierDetectionError({
service: 'config',
host: 'localhost',
port: 0,
remediation:
"Federated tier requires queue.type === 'bullmq'. " +
"Set queue: { type: 'bullmq', url: 'redis://...' } in your mosaic.config.json.",
});
}
const federatedValkeyUrl = config.queue.url ?? DEFAULT_VALKEY_URL;
await probePostgres(pgUrl);
await probeValkey(federatedValkeyUrl);
await probePgvector(pgUrl);
}

View File

@@ -25,7 +25,7 @@ import { AppModule } from './app.module.js';
import { mountAuthHandler } from './auth/auth.controller.js'; import { mountAuthHandler } from './auth/auth.controller.js';
import { mountMcpHandler } from './mcp/mcp.controller.js'; import { mountMcpHandler } from './mcp/mcp.controller.js';
import { McpService } from './mcp/mcp.service.js'; import { McpService } from './mcp/mcp.service.js';
import { detectAndAssertTier, TierDetectionError } from './bootstrap/tier-detector.js'; import { detectAndAssertTier, TierDetectionError } from '@mosaicstack/storage';
async function bootstrap(): Promise<void> { async function bootstrap(): Promise<void> {
const logger = new Logger('Bootstrap'); const logger = new Logger('Bootstrap');

View File

@@ -7,11 +7,11 @@
**ID:** federation-v1-20260419 **ID:** federation-v1-20260419
**Statement:** Jarvis operates across 34 workstations in two physical locations (home, USC). The user currently reaches back to a single jarvis-brain checkout from every session; a prior OpenBrain attempt caused cache, latency, and opacity pain. This mission builds asymmetric federation between Mosaic Stack gateways so that a session on a user's home gateway can query their work gateway in real time without data ever persisting across the boundary, with full multi-tenant isolation and standard-PKI (X.509 / Step-CA) trust management. **Statement:** Jarvis operates across 34 workstations in two physical locations (home, USC). The user currently reaches back to a single jarvis-brain checkout from every session; a prior OpenBrain attempt caused cache, latency, and opacity pain. This mission builds asymmetric federation between Mosaic Stack gateways so that a session on a user's home gateway can query their work gateway in real time without data ever persisting across the boundary, with full multi-tenant isolation and standard-PKI (X.509 / Step-CA) trust management.
**Phase:** Planning complete — M1 implementation not started **Phase:** M1 complete — federated tier infrastructure ready for testing
**Current Milestone:** FED-M1 **Current Milestone:** FED-M2 (next; deferred to mission planning)
**Progress:** 0 / 7 milestones **Progress:** 1 / 7 milestones
**Status:** active **Status:** active
**Last Updated:** 2026-04-19 (PRD + MILESTONES + tracking issues filed) **Last Updated:** 2026-04-19 (M1 complete; tag `fed-v0.1.0-m1`)
**Parent Mission:** None — new mission **Parent Mission:** None — new mission
## Context ## Context
@@ -52,8 +52,8 @@ Key design references:
## Milestones ## Milestones
| # | ID | Name | Status | Branch | Issue | Started | Completed | | # | ID | Name | Status | Branch | Issue | Started | Completed |
| --- | ------ | --------------------------------------------- | ----------- | ------ | ----- | ------- | --------- | | --- | ------ | --------------------------------------------- | ----------- | ------------------ | ----- | ---------- | ---------- |
| 1 | FED-M1 | Federated tier infrastructure | not-started | — | #460 | — | — | | 1 | FED-M1 | Federated tier infrastructure | done | (12 PRs #470-#481) | #460 | 2026-04-19 | 2026-04-19 |
| 2 | FED-M2 | Step-CA + grant schema + admin CLI | not-started | — | #461 | — | — | | 2 | FED-M2 | Step-CA + grant schema + admin CLI | not-started | — | #461 | — | — |
| 3 | FED-M3 | mTLS handshake + list/get + scope enforcement | not-started | — | #462 | — | — | | 3 | FED-M3 | mTLS handshake + list/get + scope enforcement | not-started | — | #462 | — | — |
| 4 | FED-M4 | search verb + audit log + rate limit | not-started | — | #463 | — | — | | 4 | FED-M4 | search verb + audit log + rate limit | not-started | — | #463 | — | — |
@@ -77,9 +77,12 @@ Key design references:
## Session History ## Session History
| Session | Date | Runtime | Outcome | | Session | Date | Runtime | Outcome |
| ------- | ---------- | ------- | --------------------------------------------------- | | ------- | ---------- | ------- | --------------------------------------------------------------------- |
| S1 | 2026-04-19 | claude | PRD authored, MILESTONES decomposed, 7 issues filed | | S1 | 2026-04-19 | claude | PRD authored, MILESTONES decomposed, 7 issues filed |
| S2-S4 | 2026-04-19 | claude | FED-M1 complete: 12 tasks (PRs #470-#481) merged; tag `fed-v0.1.0-m1` |
## Next Step ## Next Step
Begin FED-M1 implementation: federated tier infrastructure. Breakdown in `docs/federation/TASKS.md`. FED-M1 complete (12 PRs #470-#481, tag `fed-v0.1.0-m1`). Federated tier infrastructure is testable end-to-end: see `docs/federation/SETUP.md` and `docs/guides/migrate-tier.md`.
Begin FED-M2 (Step-CA + grant schema + admin CLI) when planning is greenlit. Issue #461 tracks scope; orchestrator decomposes M2 into per-task rows in `docs/federation/TASKS.md` at the start of M2.

119
docs/federation/SETUP.md Normal file
View File

@@ -0,0 +1,119 @@
# Federated Tier Setup Guide
## What is the federated tier?
The federated tier is designed for multi-user and multi-host deployments. It consists of PostgreSQL 17 with pgvector extension (for embeddings and RAG), Valkey for distributed task queueing and caching, and a shared configuration across multiple Mosaic gateway instances. Use this tier when running Mosaic in production or when scaling beyond a single-host deployment.
## Prerequisites
- Docker and Docker Compose installed
- Ports 5433 (PostgreSQL) and 6380 (Valkey) available on your host (or adjust environment variables)
- At least 2 GB free disk space for data volumes
## Start the federated stack
Run the federated overlay:
```bash
docker compose -f docker-compose.federated.yml --profile federated up -d
```
This starts PostgreSQL 17 with pgvector and Valkey 8. The pgvector extension is created automatically on first boot.
Verify the services are running:
```bash
docker compose -f docker-compose.federated.yml ps
```
Expected output shows `postgres-federated` and `valkey-federated` both healthy.
## Configure mosaic for federated tier
Create or update your `mosaic.config.json`:
```json
{
"tier": "federated",
"database": "postgresql://mosaic:mosaic@localhost:5433/mosaic",
"queue": "redis://localhost:6380"
}
```
If you're using environment variables instead:
```bash
export DATABASE_URL="postgresql://mosaic:mosaic@localhost:5433/mosaic"
export REDIS_URL="redis://localhost:6380"
```
## Verify health
Run the health check:
```bash
mosaic gateway doctor
```
Expected output (green):
```
Tier: federated Config: mosaic.config.json
✓ postgres localhost:5433 (42ms)
✓ valkey localhost:6380 (8ms)
✓ pgvector (embedded) (15ms)
```
For JSON output (useful in CI/automation):
```bash
mosaic gateway doctor --json
```
## Troubleshooting
### Port conflicts
**Symptom:** `bind: address already in use`
**Fix:** Stop the base dev stack first:
```bash
docker compose down
docker compose -f docker-compose.federated.yml --profile federated up -d
```
Or change the host port with an environment variable:
```bash
PG_FEDERATED_HOST_PORT=5434 VALKEY_FEDERATED_HOST_PORT=6381 \
docker compose -f docker-compose.federated.yml --profile federated up -d
```
### pgvector extension error
**Symptom:** `ERROR: could not open extension control file`
**Fix:** pgvector is created at first boot. Check logs:
```bash
docker compose -f docker-compose.federated.yml logs postgres-federated | grep -i vector
```
If missing, exec into the container and create it manually:
```bash
docker exec <postgres-federated-id> psql -U mosaic -d mosaic -c "CREATE EXTENSION vector;"
```
### Valkey connection refused
**Symptom:** `Error: connect ECONNREFUSED 127.0.0.1:6380`
**Fix:** Check service health:
```bash
docker compose -f docker-compose.federated.yml logs valkey-federated
```
If Valkey is running, verify your firewall allows 6380. On macOS, Docker Desktop may require binding to `host.docker.internal` instead of `localhost`.

View File

@@ -16,19 +16,19 @@
Goal: Gateway runs in `federated` tier with containerized PG+pgvector+Valkey. No federation logic yet. Existing standalone behavior does not regress. Goal: Gateway runs in `federated` tier with containerized PG+pgvector+Valkey. No federation logic yet. Existing standalone behavior does not regress.
| id | status | description | issue | agent | branch | depends_on | estimate | notes | | id | status | description | issue | agent | branch | depends_on | estimate | notes |
| --------- | ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----- | ------ | ------------------------------- | ---------- | -------- | ---------------------------------------------------------------------------------------------------------------------------------- | | --------- | ------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----- | ------ | ---------------------------------- | ---------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------ |
| FED-M1-01 | done | Extend `mosaic.config.json` schema: add `"federated"` to `tier` enum in validator + TS types. Keep `local` and `standalone` working. Update schema docs/README where referenced. | #460 | sonnet | feat/federation-m1-tier-config | — | 4K | Shipped in PR #470. Renamed `team``standalone`; added `team` deprecation alias; added `DEFAULT_FEDERATED_CONFIG`. | | FED-M1-01 | done | Extend `mosaic.config.json` schema: add `"federated"` to `tier` enum in validator + TS types. Keep `local` and `standalone` working. Update schema docs/README where referenced. | #460 | sonnet | feat/federation-m1-tier-config | — | 4K | Shipped in PR #470. Renamed `team``standalone`; added `team` deprecation alias; added `DEFAULT_FEDERATED_CONFIG`. |
| FED-M1-02 | done | Author `docker-compose.federated.yml` as an overlay profile: Postgres 17 + pgvector extension (port 5433), Valkey (6380), named volumes, healthchecks. Compose-up should boot cleanly on a clean machine. | #460 | sonnet | feat/federation-m1-compose | FED-M1-01 | 5K | Shipped in PR #471. Overlay defines `postgres-federated`/`valkey-federated`, profile-gated, with pg-init for pgvector extension. | | FED-M1-02 | done | Author `docker-compose.federated.yml` as an overlay profile: Postgres 17 + pgvector extension (port 5433), Valkey (6380), named volumes, healthchecks. Compose-up should boot cleanly on a clean machine. | #460 | sonnet | feat/federation-m1-compose | FED-M1-01 | 5K | Shipped in PR #471. Overlay defines `postgres-federated`/`valkey-federated`, profile-gated, with pg-init for pgvector extension. |
| FED-M1-03 | done | Add pgvector support to `packages/storage/src/adapters/postgres.ts`: create extension on init (idempotent), expose vector column type in schema helpers. No adapter changes for non-federated tiers. | #460 | sonnet | feat/federation-m1-pgvector | FED-M1-02 | 8K | Shipped in PR #472. `enableVector` flag on postgres StorageConfig; idempotent CREATE EXTENSION before migrations. | | FED-M1-03 | done | Add pgvector support to `packages/storage/src/adapters/postgres.ts`: create extension on init (idempotent), expose vector column type in schema helpers. No adapter changes for non-federated tiers. | #460 | sonnet | feat/federation-m1-pgvector | FED-M1-02 | 8K | Shipped in PR #472. `enableVector` flag on postgres StorageConfig; idempotent CREATE EXTENSION before migrations. |
| FED-M1-04 | done | Implement `apps/gateway/src/bootstrap/tier-detector.ts`: reads config, asserts PG/Valkey/pgvector reachable for `federated`, fail-fast with actionable error message on failure. Unit tests for each failure mode. | #460 | sonnet | feat/federation-m1-detector | FED-M1-03 | 8K | Shipped in PR #473. 12 tests; 5s timeouts on probes; pgvector library/permission discrimination; rejects non-bullmq for federated. | | FED-M1-04 | done | Implement `apps/gateway/src/bootstrap/tier-detector.ts`: reads config, asserts PG/Valkey/pgvector reachable for `federated`, fail-fast with actionable error message on failure. Unit tests for each failure mode. | #460 | sonnet | feat/federation-m1-detector | FED-M1-03 | 8K | Shipped in PR #473. 12 tests; 5s timeouts on probes; pgvector library/permission discrimination; rejects non-bullmq for federated. |
| FED-M1-05 | in-progress | Write `scripts/migrate-to-federated.ts`: one-way migration from `local` (PGlite) / `standalone` (PG without pgvector) → `federated`. Dumps, transforms, loads; dry-run + confirm UX. Idempotent on re-run. | #460 | sonnet | feat/federation-m1-migrate | FED-M1-04 | 10K | Do NOT run automatically. CLI subcommand `mosaic migrate tier --to federated --dry-run`. Safety rails. | | FED-M1-05 | done | Write `scripts/migrate-to-federated.ts`: one-way migration from `local` (PGlite) / `standalone` (PG without pgvector) → `federated`. Dumps, transforms, loads; dry-run + confirm UX. Idempotent on re-run. | #460 | sonnet | feat/federation-m1-migrate | FED-M1-04 | 10K | Shipped in PR #474. `mosaic storage migrate-tier`; DrizzleMigrationSource (corrects P0 found in review); 32 tests; idempotent. |
| FED-M1-06 | not-started | Update `mosaic doctor`: report current tier, required services, actual health per service, pgvector presence, overall green/yellow/red. Machine-readable JSON output flag for CI use. | #460 | sonnet | feat/federation-m1-doctor | FED-M1-04 | 6K | Existing doctor output evolves; add `--json` flag. Green/yellow/red + remediation suggestions per issue. | | FED-M1-06 | done | Update `mosaic doctor`: report current tier, required services, actual health per service, pgvector presence, overall green/yellow/red. Machine-readable JSON output flag for CI use. | #460 | sonnet | feat/federation-m1-doctor | FED-M1-04 | 6K | Shipped in PR #475 as `mosaic gateway doctor`. Probes lifted to @mosaicstack/storage; structural TierConfig breaks dep cycle. |
| FED-M1-07 | not-started | Integration test: gateway boots in `federated` tier with docker-compose `federated` profile; refuses to boot when PG unreachable (asserts fail-fast); pgvector extension query succeeds. | #460 | sonnet | feat/federation-m1-integration | FED-M1-04 | 8K | Vitest + docker-compose test profile. One test file per assertion; real services, no mocks. | | FED-M1-07 | done | Integration test: gateway boots in `federated` tier with docker-compose `federated` profile; refuses to boot when PG unreachable (asserts fail-fast); pgvector extension query succeeds. | #460 | sonnet | feat/federation-m1-integration | FED-M1-04 | 8K | Shipped in PR #476. 3 test files, 4 tests, gated by FEDERATED_INTEGRATION=1; reserved-port helper avoids host collisions. |
| FED-M1-08 | not-started | Integration test for migration script: seed a local PGlite with representative data (tasks, notes, users, teams), run migration, assert row counts + key samples equal on federated PG. | #460 | sonnet | feat/federation-m1-migrate-test | FED-M1-05 | 6K | Runs against docker-compose federated profile; uses temp PGlite file; deterministic seed. | | FED-M1-08 | done | Integration test for migration script: seed a local PGlite with representative data (tasks, notes, users, teams), run migration, assert row counts + key samples equal on federated PG. | #460 | sonnet | feat/federation-m1-migrate-test | FED-M1-05 | 6K | Shipped in PR #477. Caught P0 in M1-05 (camelCase→snake_case) missed by mocked unit tests; fix in same PR. |
| FED-M1-09 | not-started | Standalone regression: full agent-session E2E on existing `standalone` tier with a gateway built from this branch. Must pass without referencing any federation module. | #460 | haiku | feat/federation-m1-regression | FED-M1-07 | 4K | Reuse existing e2e harness; just re-point at the federation branch build. Canary that we didn't break it. | | FED-M1-09 | done | Standalone regression: full agent-session E2E on existing `standalone` tier with a gateway built from this branch. Must pass without referencing any federation module. | #460 | sonnet | feat/federation-m1-regression | FED-M1-07 | 4K | Clean canary. 351 gateway tests + 85 storage unit tests + full pnpm test all green; only FEDERATED_INTEGRATION-gated tests skip. |
| FED-M1-10 | not-started | Code review pass: security-focused on the migration script (data-at-rest during migration) + tier detector (error-message sensitivity leakage). Independent reviewer, not authors of tasks 01-09. | #460 | sonnet | — | FED-M1-09 | 8K | Use `feature-dev:code-reviewer` agent. Specifically: no secrets in error messages; no partial-migration footguns. | | FED-M1-10 | done | Code review pass: security-focused on the migration script (data-at-rest during migration) + tier detector (error-message sensitivity leakage). Independent reviewer, not authors of tasks 01-09. | #460 | sonnet | feat/federation-m1-security-review | FED-M1-09 | 8K | 2 review rounds caught 7 issues: credential leak in pg/valkey/pgvector errors + redact-error util; missing advisory lock; SKIP_TABLES rationale. |
| FED-M1-11 | not-started | Docs update: `docs/federation/` operator notes for tier setup; README blurb on federated tier; `docs/guides/` entry for migration. Do NOT touch runbook yet (deferred to FED-M7). | #460 | haiku | feat/federation-m1-docs | FED-M1-10 | 4K | Short, actionable. Link from MISSION-MANIFEST. No decisions captured here — those belong in PRD. | | FED-M1-11 | done | Docs update: `docs/federation/` operator notes for tier setup; README blurb on federated tier; `docs/guides/` entry for migration. Do NOT touch runbook yet (deferred to FED-M7). | #460 | haiku | feat/federation-m1-docs | FED-M1-10 | 4K | Shipped: `docs/federation/SETUP.md` (119 lines), `docs/guides/migrate-tier.md` (147 lines), README Configuration blurb. |
| FED-M1-12 | not-started | PR, CI green, merge to main, close #460. | #460 | — | (aggregate) | FED-M1-11 | 3K | Queue-guard before push; wait for green; merge squashed; tea `issue-close` #460. | | FED-M1-12 | done | PR, CI green, merge to main, close #460. | #460 | sonnet | feat/federation-m1-close | FED-M1-11 | 3K | M1 closed. PRs #470-#480 merged across 11 tasks. Issue #460 closed; release tag `fed-v0.1.0-m1` published. |
**M1 total estimate:** ~74K tokens (over-budget vs 20K PRD estimate — explanation below) **M1 total estimate:** ~74K tokens (over-budget vs 20K PRD estimate — explanation below)

147
docs/guides/migrate-tier.md Normal file
View File

@@ -0,0 +1,147 @@
# Migrating to the Federated Tier
Step-by-step guide to migrate from `local` (PGlite) or `standalone` (PostgreSQL without pgvector) to `federated` (PostgreSQL 17 + pgvector + Valkey).
## When to migrate
Migrate to federated tier when:
- Scaling from single-user to multi-user deployments
- Adding vector embeddings or RAG features
- Running Mosaic across multiple hosts
- Requires distributed task queueing and caching
- Moving to production with high availability
## Prerequisites
- Federated stack running and healthy (see [Federated Tier Setup](../federation/SETUP.md))
- Source database accessible and empty target database at the federated URL
- Backup of source database (recommended before any migration)
## Dry-run first
Always run a dry-run to validate the migration:
```bash
mosaic storage migrate-tier --to federated \
--target-url postgresql://mosaic:mosaic@localhost:5433/mosaic \
--dry-run
```
Expected output (partial example):
```
[migrate-tier] Analyzing source tier: pglite
[migrate-tier] Analyzing target tier: federated
[migrate-tier] Precondition: target is empty ✓
users: 5 rows
teams: 2 rows
conversations: 12 rows
messages: 187 rows
... (all tables listed)
[migrate-tier] NOTE: Source tier has no pgvector support. insights.embedding will be NULL on all migrated rows.
[migrate-tier] DRY-RUN COMPLETE (no data written). 206 total rows would be migrated.
```
Review the output. If it shows an error (e.g., target not empty), address it before proceeding.
## Run the migration
When ready, run without `--dry-run`:
```bash
mosaic storage migrate-tier --to federated \
--target-url postgresql://mosaic:mosaic@localhost:5433/mosaic \
--yes
```
The `--yes` flag skips the confirmation prompt (required in non-TTY environments like CI).
The command will:
1. Acquire an advisory lock (blocks concurrent invocations)
2. Copy data from source to target in dependency order
3. Report rows migrated per table
4. Display any warnings (e.g., null vector embeddings)
## What gets migrated
All persistent, user-bound data is migrated in dependency order:
- **users, teams, team_members** — user and team ownership
- **accounts** — OAuth provider tokens (durable credentials)
- **projects, agents, missions, tasks** — all project and agent definitions
- **conversations, messages** — all chat history
- **preferences, insights, agent_logs** — preferences and observability
- **provider_credentials** — stored API keys and secrets
- **tickets, events, skills, routing_rules, appreciations** — auxiliary records
Full order is defined in code (`MIGRATION_ORDER` in `packages/storage/src/migrate-tier.ts`).
## What gets skipped and why
Three tables are intentionally not migrated:
| Table | Reason |
| ----------------- | ----------------------------------------------------------------------------------------------- |
| **sessions** | TTL'd auth sessions from the old environment; they will fail JWT verification on the new target |
| **verifications** | One-time tokens (email verify, password reset) that have either expired or been consumed |
| **admin_tokens** | Hashed tokens bound to the old environment's secret keys; must be re-issued |
**Note on accounts and provider_credentials:** These durable credentials ARE migrated because they are user-bound and required for resuming agent work on the target environment. After migration to a multi-tenant federated deployment, operators may want to audit or wipe these if users are untrusted or credentials should not be shared.
## Idempotency and concurrency
The migration is **idempotent**:
- Re-running is safe (uses `ON CONFLICT DO UPDATE` internally)
- Ideal for retries on transient failures
- Concurrent invocations are blocked by a Postgres advisory lock; the second caller will wait
If a previous run is stuck, check for advisory locks:
```sql
SELECT * FROM pg_locks WHERE locktype='advisory';
```
If you need to force-unlock (dangerous):
```sql
SELECT pg_advisory_unlock(<lock_id>);
```
## Verify the migration
After migration completes, spot-check the target:
```bash
# Count rows on a few critical tables
psql postgresql://mosaic:mosaic@localhost:5433/mosaic -c \
"SELECT 'users' as table, COUNT(*) FROM users UNION ALL
SELECT 'conversations' as table, COUNT(*) FROM conversations UNION ALL
SELECT 'messages' as table, COUNT(*) FROM messages;"
```
Verify a known user or project exists by ID:
```bash
psql postgresql://mosaic:mosaic@localhost:5433/mosaic -c \
"SELECT id, email FROM users WHERE email='<your-email>';"
```
Ensure vector embeddings are NULL (if source was PGlite) or populated (if source was postgres + pgvector):
```bash
psql postgresql://mosaic:mosaic@localhost:5433/mosaic -c \
"SELECT embedding IS NOT NULL as has_vector FROM insights LIMIT 5;"
```
## Rollback
There is no in-place rollback. If the migration fails:
1. Restore the target database from a pre-migration backup
2. Investigate the failure logs
3. Rerun the migration
Always test migrations in a staging environment first.

View File

@@ -379,3 +379,147 @@ Initial verifier (haiku) on the first delivery returned "OK to ship" but missed
- #8: confirm `packages/config/dist` not git-tracked. - #8: confirm `packages/config/dist` not git-tracked.
**Next:** PR for FED-M1-04 → CI wait → merge. Then FED-M1-05 (migration script, codex/sonnet, 10K). **Next:** PR for FED-M1-04 → CI wait → merge. Then FED-M1-05 (migration script, codex/sonnet, 10K).
---
## Session 18 — 2026-04-19 — FED-M1-07 + FED-M1-08
**Branches landed this run:** `feat/federation-m1-integration` (PR #476, FED-M1-07), `feat/federation-m1-migrate-test` (PR #477, FED-M1-08)
**Branch active at end:** none — both PRs merged to main, branches deleted
**M1 progress:** 8 of 12 tasks done. Remaining: M1-09 (regression e2e, haiku), M1-10 (security review, sonnet), M1-11 (docs, haiku), M1-12 (close + release, orchestrator).
### FED-M1-07 — Integration tests for federated tier gateway boot
Three test files under `apps/gateway/src/__tests__/integration/` gated by `FEDERATED_INTEGRATION=1`:
- `federated-boot.success.integration.test.ts``detectAndAssertTier` resolves; `pg_extension` row for `vector` exists
- `federated-boot.pg-unreachable.integration.test.ts` — throws `TierDetectionError` with `service: 'postgres'` when PG port is closed
- `federated-pgvector.integration.test.ts` — TEMP table with `vector(3)` column round-trips data
Independent code review (sonnet) returned VERDICT: B with two IMPORTANT items, both fixed in the same PR:
- Port 5499 collision risk → replaced with `net.createServer().listen(0)` reserved-port helper
- `afterAll` and `sql` scoped outside `describe` → moved both inside `describe.skipIf` block
Independent surface verifier (haiku) confirmed all claims. 4/4 tests pass live; 4/4 skip cleanly without env var.
### FED-M1-08 — Migration integration test (caught real P0 bug)
`packages/storage/src/migrate-tier.integration.test.ts` seeds temp PGlite with cross-table data (users, teams, team_members, conversations, messages), runs `runMigrateTier`, asserts row counts + spot-checks. Gated by `FEDERATED_INTEGRATION=1`.
**P0 bug surfaced and fixed in same PR:** `DrizzleMigrationSource.readTable()` returns Drizzle's camelCase keys (`emailVerified`, `userId`); `PostgresMigrationTarget.upsertBatch()` was using them verbatim as SQL identifiers, producing `column "emailVerified" does not exist` against real federated PG. The 32 unit tests in M1-05 missed this because both source and target were mocked. Fix: `normaliseSourceRow` now applies `toSnakeCase` (`/[A-Z]/g``_<lowercase>`), idempotent on already-snake_case keys.
Code review (sonnet) returned VERDICT: B with one IMPORTANT and one MINOR, both fixed:
- `createPgliteDbWithVector` and `runPgliteMigrations` were initially added to `@mosaicstack/db` public exports → moved to `packages/storage/src/test-utils/pglite-with-vector.ts` (avoids polluting prod consumers with WASM bundle)
- `afterAll` did not call `cleanTarget` → added before connection close, ensuring orphan rows cleaned even on test panic
Side change: `packages/storage/package.json` gained `"type": "module"` (codebase convention; required for `import.meta.url` in test-utils). All other workspace packages already declared this.
### Process notes for this session
- Review-then-verify pipeline now battle-tested: M1-08 reviewer caught the P0 bug + the public-API leak that the worker would have shipped. Without review, both would have gone to main.
- Integration tests are paying for themselves immediately: M1-08 caught a real P0 in M1-05 that 32 mocked unit tests missed. Going forward, **at least one real-services integration test per code-mutating PR** should become a soft norm where feasible.
- TASKS.md status updates continue to ride on the matching feature branch (avoids direct-to-main commits).
**Followup tasks tracked but still deferred (no change):**
- #7: `tier=local` hardcoded in gateway-config resume branches (~262, ~317)
- #8: confirm `packages/config/dist` not git-tracked
**Next:** FED-M1-09 — standalone regression e2e (haiku canary, ~4K). Verifies that the existing `standalone` tier behavior still works end-to-end on the federation-touched build, since M1 changes touched shared paths (storage, config, gateway boot).
---
## Session 19 — 2026-04-19 — FED-M1-09 → FED-M1-12 (M1 close)
**Branches landed this run:** `feat/federation-m1-regression` (PR #478, M1-09), `feat/federation-m1-security-review` (PR #479, M1-10), `feat/federation-m1-docs` (PR #480, M1-11), `feat/federation-m1-close` (PR #481, M1-12)
**Branch active at end:** none — M1 closed, all branches deleted, issue #460 closed, release tag `fed-v0.1.0-m1` published
**M1 progress:** 12 of 12 tasks done. **Milestone complete.**
### FED-M1-09 — Standalone regression canary
Verification-only milestone. Re-ran the existing standalone/local test suites against current `main` (with M1-01 → M1-08 merged):
- 4 target gateway test files: 148/148 pass (conversation-persistence, cross-user-isolation, resource-ownership, session-hardening)
- Full gateway suite: 351 pass, 4 skipped (FEDERATED_INTEGRATION-gated only)
- Storage unit tests: 85 pass, 1 skipped (integration-gated)
- Top-level `pnpm test`: all green; only env-gated skips
No regression in standalone or local tier. Federation M1 changes are non-disruptive.
### FED-M1-10 — Security review (two rounds, 7 findings)
Independent security review surfaced three high-impact and four medium findings; all fixed in same PR.
**Round 1 (4 findings):**
- MEDIUM: Credential leak via `postgres`/`ioredis` driver error messages (DSN strings) re-thrown by `migrate-tier.ts` → caller; `cli.ts:402` outer catch
- MEDIUM: Same leak in `tier-detection.ts` `probePostgresMeasured` / `probePgvectorMeasured` → emitted as JSON by `mosaic gateway doctor --json`
- LOW-MEDIUM: No advisory lock on `migrate-tier`; two concurrent invocations could both pass `checkTargetPreconditions` (non-atomic) and race
- ADVISORY: `SKIP_TABLES` lacked rationale comment
**Fixes:**
- New internal helper `packages/storage/src/redact-error.ts` — regex `(postgres(?:ql)?|rediss?):\/\/[^@\s]*@``<scheme>://***@`. NOT exported from package public surface. 10 unit tests covering all schemes, multi-URL, no-creds, case-insensitive.
- `redactErrMsg` applied at all 5 leak sites
- `PostgresMigrationTarget.tryAcquireAdvisoryLock()` / `releaseAdvisoryLock()` using session-scoped `pg_try_advisory_lock(hashtext('mosaic-migrate-tier'))`. Acquired before preflight, released in `finally`. Dry-run skips. Non-blocking.
- `SKIP_TABLES` comment expanded with rationale for skipped tables (TTL'd / one-time / env-bound) AND why `accounts` (OAuth) and `provider_credentials` (AI keys) are intentionally migrated (durable user-bound, not deployment-bound).
**Round 2 (3 findings missed by first round):**
- HIGH: Round 1 regex only covered `postgres` scheme, not `redis`/`rediss` — extended to `(postgres(?:ql)?|rediss?)`
- HIGH: `probeValkeyMeasured` was missed in Round 1 → applied `redactErrMsg`
- MEDIUM: `cli.ts:402` migrate-tier outer catch was missed in Round 1 → applied `redactErrMsg`
**Process validation:** the two-round review pattern proved load-bearing for security work. A single review-then-fix cycle would have shipped the Valkey credential leak.
### FED-M1-11 — Docs (haiku)
- `docs/federation/SETUP.md` (119 lines): federated tier setup — what it is, prerequisites, docker compose start, mosaic.config.json snippet, doctor health check, troubleshooting
- `docs/guides/migrate-tier.md` (147 lines): when to migrate, dry-run first, what migrates/skips with rationale, idempotency + advisory-lock semantics, no in-place rollback
- `README.md` Configuration blurb linking to both
- Runbook deferred to FED-M7 per TASKS.md scope rule
### FED-M1-12 — Aggregate close (this PR)
- Marked M1-12 done in TASKS.md
- MISSION-MANIFEST.md: phase → "M1 complete", progress 1/7, M1 row done with PR range #470-#481, session log appended
- This Session 19 entry added
- Issue #460 closed via `~/.config/mosaic/tools/git/issue-close.sh -i 460`
- Release tag `fed-v0.1.0-m1` created and pushed to gitea
### M1 PR ledger
| PR | Task | Branch |
| ---- | ----------------------------------------- | ---------------------------------- |
| #470 | M1-01 (tier config schema) | feat/federation-m1-tier-config |
| #471 | M1-02 (compose overlay) | feat/federation-m1-compose |
| #472 | M1-03 (pgvector adapter) | feat/federation-m1-pgvector |
| #473 | M1-04 (tier-detector) | feat/federation-m1-detector |
| #474 | M1-05 (migrate-tier script) | feat/federation-m1-migrate |
| #475 | M1-06 (gateway doctor) | feat/federation-m1-doctor |
| #476 | M1-07 (boot integration tests) | feat/federation-m1-integration |
| #477 | M1-08 (migrate integration test + P0 fix) | feat/federation-m1-migrate-test |
| #478 | M1-09 (standalone regression) | feat/federation-m1-regression |
| #479 | M1-10 (security review fixes) | feat/federation-m1-security-review |
| #480 | M1-11 (docs) | feat/federation-m1-docs |
| #481 | M1-12 (aggregate close) | feat/federation-m1-close |
### Process learnings (M1 retrospective)
1. **Two-round security review is non-negotiable for security work.** First round caught postgres credential leaks; second round caught equivalent valkey leaks the worker missed when extending the regex. Single-round would have shipped HIGH severity issues.
2. **Real-services integration tests catch what mocked unit tests cannot.** M1-08 caught a P0 in M1-05 (camelCase column names) that 32 mocked unit tests missed because both source and target were mocked. Going forward: at least one real-services test per code-mutating PR where feasible.
3. **Test-utils for live services co-locate with consumer, not in shared library.** M1-08 reviewer caught `createPgliteDbWithVector` initially being added to `@mosaicstack/db` public exports — would have polluted prod consumers with WASM bundle. Moved to `packages/storage/src/test-utils/`.
4. **Per-task budgets including tests/review/docs more accurate than PRD's implementation-only estimates.** M1 PRD estimated 20K; actual ~74K. Future milestones should budget the full delivery cycle.
5. **TASKS.md status updates ride feature branches, never direct-to-main.** Caught one violation early in M1; pattern held for all 12 tasks.
6. **Subagent tier matters.** Code review needs sonnet-level reasoning (haiku missed deep issues in M1-04); claim verification (line counts, file existence) is fine on haiku.
**Followup tasks still deferred (carry forward to M2):**
- #7: `tier=local` hardcoded in gateway-config resume branches (~262, ~317)
- #8: confirm `packages/config/dist` not git-tracked
**Next mission step:** FED-M2 (Step-CA + grant schema + admin CLI). Per TASKS.md scope rule, M2 will be decomposed when it enters active planning. Issue #461 tracks scope.

View File

@@ -0,0 +1,75 @@
CREATE TYPE "public"."grant_status" AS ENUM('active', 'revoked', 'expired');--> statement-breakpoint
CREATE TYPE "public"."peer_state" AS ENUM('pending', 'active', 'suspended', 'revoked');--> statement-breakpoint
CREATE TABLE "admin_tokens" (
"id" text PRIMARY KEY NOT NULL,
"user_id" text NOT NULL,
"token_hash" text NOT NULL,
"label" text NOT NULL,
"scope" text DEFAULT 'admin' NOT NULL,
"expires_at" timestamp with time zone,
"last_used_at" timestamp with time zone,
"created_at" timestamp with time zone DEFAULT now() NOT NULL
);
--> statement-breakpoint
CREATE TABLE "federation_audit_log" (
"id" uuid PRIMARY KEY DEFAULT gen_random_uuid() NOT NULL,
"request_id" text NOT NULL,
"peer_id" uuid,
"subject_user_id" text,
"grant_id" uuid,
"verb" text NOT NULL,
"resource" text NOT NULL,
"status_code" integer NOT NULL,
"result_count" integer,
"denied_reason" text,
"latency_ms" integer,
"created_at" timestamp with time zone DEFAULT now() NOT NULL,
"query_hash" text,
"outcome" text,
"bytes_out" integer
);
--> statement-breakpoint
CREATE TABLE "federation_grants" (
"id" uuid PRIMARY KEY DEFAULT gen_random_uuid() NOT NULL,
"subject_user_id" text NOT NULL,
"peer_id" uuid NOT NULL,
"scope" jsonb NOT NULL,
"status" "grant_status" DEFAULT 'active' NOT NULL,
"expires_at" timestamp with time zone,
"created_at" timestamp with time zone DEFAULT now() NOT NULL,
"revoked_at" timestamp with time zone,
"revoked_reason" text
);
--> statement-breakpoint
CREATE TABLE "federation_peers" (
"id" uuid PRIMARY KEY DEFAULT gen_random_uuid() NOT NULL,
"common_name" text NOT NULL,
"display_name" text NOT NULL,
"cert_pem" text NOT NULL,
"cert_serial" text NOT NULL,
"cert_not_after" timestamp with time zone NOT NULL,
"client_key_pem" text,
"state" "peer_state" DEFAULT 'pending' NOT NULL,
"endpoint_url" text,
"last_seen_at" timestamp with time zone,
"created_at" timestamp with time zone DEFAULT now() NOT NULL,
"revoked_at" timestamp with time zone,
CONSTRAINT "federation_peers_common_name_unique" UNIQUE("common_name"),
CONSTRAINT "federation_peers_cert_serial_unique" UNIQUE("cert_serial")
);
--> statement-breakpoint
ALTER TABLE "admin_tokens" ADD CONSTRAINT "admin_tokens_user_id_users_id_fk" FOREIGN KEY ("user_id") REFERENCES "public"."users"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint
ALTER TABLE "federation_audit_log" ADD CONSTRAINT "federation_audit_log_peer_id_federation_peers_id_fk" FOREIGN KEY ("peer_id") REFERENCES "public"."federation_peers"("id") ON DELETE set null ON UPDATE no action;--> statement-breakpoint
ALTER TABLE "federation_audit_log" ADD CONSTRAINT "federation_audit_log_subject_user_id_users_id_fk" FOREIGN KEY ("subject_user_id") REFERENCES "public"."users"("id") ON DELETE set null ON UPDATE no action;--> statement-breakpoint
ALTER TABLE "federation_audit_log" ADD CONSTRAINT "federation_audit_log_grant_id_federation_grants_id_fk" FOREIGN KEY ("grant_id") REFERENCES "public"."federation_grants"("id") ON DELETE set null ON UPDATE no action;--> statement-breakpoint
ALTER TABLE "federation_grants" ADD CONSTRAINT "federation_grants_subject_user_id_users_id_fk" FOREIGN KEY ("subject_user_id") REFERENCES "public"."users"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint
ALTER TABLE "federation_grants" ADD CONSTRAINT "federation_grants_peer_id_federation_peers_id_fk" FOREIGN KEY ("peer_id") REFERENCES "public"."federation_peers"("id") ON DELETE cascade ON UPDATE no action;--> statement-breakpoint
CREATE INDEX "admin_tokens_user_id_idx" ON "admin_tokens" USING btree ("user_id");--> statement-breakpoint
CREATE UNIQUE INDEX "admin_tokens_hash_idx" ON "admin_tokens" USING btree ("token_hash");--> statement-breakpoint
CREATE INDEX "federation_audit_log_peer_created_at_idx" ON "federation_audit_log" USING btree ("peer_id","created_at" DESC NULLS LAST);--> statement-breakpoint
CREATE INDEX "federation_audit_log_subject_created_at_idx" ON "federation_audit_log" USING btree ("subject_user_id","created_at" DESC NULLS LAST);--> statement-breakpoint
CREATE INDEX "federation_audit_log_created_at_idx" ON "federation_audit_log" USING btree ("created_at" DESC NULLS LAST);--> statement-breakpoint
CREATE INDEX "federation_grants_subject_status_idx" ON "federation_grants" USING btree ("subject_user_id","status");--> statement-breakpoint
CREATE INDEX "federation_grants_peer_status_idx" ON "federation_grants" USING btree ("peer_id","status");--> statement-breakpoint
CREATE INDEX "federation_peers_cert_serial_idx" ON "federation_peers" USING btree ("cert_serial");--> statement-breakpoint
CREATE INDEX "federation_peers_state_idx" ON "federation_peers" USING btree ("state");

File diff suppressed because it is too large Load Diff

View File

@@ -57,6 +57,13 @@
"when": 1774227064500, "when": 1774227064500,
"tag": "0006_swift_shen", "tag": "0006_swift_shen",
"breakpoints": true "breakpoints": true
},
{
"idx": 8,
"version": "7",
"when": 1776822435828,
"tag": "0008_smart_lyja",
"breakpoints": true
} }
] ]
} }

View File

@@ -0,0 +1,424 @@
/**
* FED-M2-01 — Integration test: federation DB schema (peers / grants / audit_log).
*
* Prereq: docker compose -f docker-compose.federated.yml --profile federated up -d
* (or any postgres with the mosaic schema already applied)
* Run: FEDERATED_INTEGRATION=1 pnpm --filter @mosaicstack/db test src/federation.integration.test.ts
*
* Skipped when FEDERATED_INTEGRATION !== '1'.
*
* Strategy:
* - Applies the federation migration SQL directly (idempotent: CREATE TYPE/TABLE
* with IF NOT EXISTS guards applied via inline SQL before the migration DDL).
* - Assumes the base schema (users table etc.) already exists in the target DB.
* - All test rows use the `fed-m2-01-` prefix; cleanup in afterAll.
*
* Coverage:
* 1. Federation tables + enums apply cleanly against the existing schema.
* 2. Insert a sample user + peer + grant + audit row; verify round-trip.
* 3. FK cascade: deleting the user cascades to federation_grants.
* 4. FK set-null: deleting the peer sets federation_audit_log.peer_id to NULL.
* 5. Enum constraint: inserting an invalid status/state value throws a DB error.
* 6. Unique constraint: duplicate cert_serial throws a DB error.
*/
import postgres from 'postgres';
import { afterAll, beforeAll, describe, expect, it } from 'vitest';
const run = process.env['FEDERATED_INTEGRATION'] === '1';
const PG_URL = process.env['DATABASE_URL'] ?? 'postgresql://mosaic:mosaic@localhost:5433/mosaic';
/** Recognisable test-row prefix for safe cleanup without full-table truncation. */
const T = 'fed-m2-01';
// Deterministic IDs (UUID format required for uuid PK columns: 8-4-4-4-12 hex digits).
const PEER1_ID = `f2000001-0000-4000-8000-000000000001`;
const PEER2_ID = `f2000002-0000-4000-8000-000000000002`;
const USER1_ID = `${T}-user-1`;
let sql: ReturnType<typeof postgres> | undefined;
beforeAll(async () => {
if (!run) return;
sql = postgres(PG_URL, { max: 1, connect_timeout: 10, idle_timeout: 10 });
// Apply the federation enums and tables idempotently.
// This mirrors the migration file but uses IF NOT EXISTS guards so it can run
// against a DB that may not have had drizzle migrations tracked.
await sql`
DO $$ BEGIN
CREATE TYPE peer_state AS ENUM ('pending', 'active', 'suspended', 'revoked');
EXCEPTION WHEN duplicate_object THEN NULL;
END $$
`;
await sql`
DO $$ BEGIN
CREATE TYPE grant_status AS ENUM ('active', 'revoked', 'expired');
EXCEPTION WHEN duplicate_object THEN NULL;
END $$
`;
await sql`
CREATE TABLE IF NOT EXISTS federation_peers (
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
common_name text NOT NULL,
display_name text NOT NULL,
cert_pem text NOT NULL,
cert_serial text NOT NULL,
cert_not_after timestamp with time zone NOT NULL,
client_key_pem text,
state peer_state NOT NULL DEFAULT 'pending',
endpoint_url text,
last_seen_at timestamp with time zone,
created_at timestamp with time zone NOT NULL DEFAULT now(),
revoked_at timestamp with time zone,
CONSTRAINT federation_peers_common_name_unique UNIQUE (common_name),
CONSTRAINT federation_peers_cert_serial_unique UNIQUE (cert_serial)
)
`;
await sql`
CREATE INDEX IF NOT EXISTS federation_peers_cert_serial_idx ON federation_peers (cert_serial)
`;
await sql`
CREATE INDEX IF NOT EXISTS federation_peers_state_idx ON federation_peers (state)
`;
await sql`
CREATE TABLE IF NOT EXISTS federation_grants (
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
subject_user_id text NOT NULL REFERENCES users(id) ON DELETE CASCADE,
peer_id uuid NOT NULL REFERENCES federation_peers(id) ON DELETE CASCADE,
scope jsonb NOT NULL,
status grant_status NOT NULL DEFAULT 'active',
expires_at timestamp with time zone,
created_at timestamp with time zone NOT NULL DEFAULT now(),
revoked_at timestamp with time zone,
revoked_reason text
)
`;
await sql`
CREATE INDEX IF NOT EXISTS federation_grants_subject_status_idx ON federation_grants (subject_user_id, status)
`;
await sql`
CREATE INDEX IF NOT EXISTS federation_grants_peer_status_idx ON federation_grants (peer_id, status)
`;
await sql`
CREATE TABLE IF NOT EXISTS federation_audit_log (
id uuid PRIMARY KEY DEFAULT gen_random_uuid(),
request_id text NOT NULL,
peer_id uuid REFERENCES federation_peers(id) ON DELETE SET NULL,
subject_user_id text REFERENCES users(id) ON DELETE SET NULL,
grant_id uuid REFERENCES federation_grants(id) ON DELETE SET NULL,
verb text NOT NULL,
resource text NOT NULL,
status_code integer NOT NULL,
result_count integer,
denied_reason text,
latency_ms integer,
created_at timestamp with time zone NOT NULL DEFAULT now(),
query_hash text,
outcome text,
bytes_out integer
)
`;
await sql`
CREATE INDEX IF NOT EXISTS federation_audit_log_peer_created_at_idx
ON federation_audit_log (peer_id, created_at DESC NULLS LAST)
`;
await sql`
CREATE INDEX IF NOT EXISTS federation_audit_log_subject_created_at_idx
ON federation_audit_log (subject_user_id, created_at DESC NULLS LAST)
`;
await sql`
CREATE INDEX IF NOT EXISTS federation_audit_log_created_at_idx
ON federation_audit_log (created_at DESC NULLS LAST)
`;
});
afterAll(async () => {
if (!sql) return;
// Cleanup in FK-safe order (children before parents).
await sql`DELETE FROM federation_audit_log WHERE request_id LIKE ${T + '%'}`.catch(() => {});
await sql`
DELETE FROM federation_grants
WHERE subject_user_id LIKE ${T + '%'}
OR revoked_reason LIKE ${T + '%'}
`.catch(() => {});
await sql`DELETE FROM federation_peers WHERE common_name LIKE ${T + '%'}`.catch(() => {});
await sql`DELETE FROM users WHERE id LIKE ${T + '%'}`.catch(() => {});
await sql.end({ timeout: 3 }).catch(() => {});
});
describe.skipIf(!run)('federation schema — integration', () => {
// ── 1. Insert sample rows ──────────────────────────────────────────────────
it('inserts a user, peer, grant, and audit row without constraint violation', async () => {
const certPem = '-----BEGIN CERTIFICATE-----\nMIItest\n-----END CERTIFICATE-----';
// User — BetterAuth users.id is text (any string, not uuid).
await sql!`
INSERT INTO users (id, name, email, email_verified, created_at, updated_at)
VALUES (${USER1_ID}, ${'M2-01 Test User'}, ${USER1_ID + '@example.com'}, false, now(), now())
ON CONFLICT (id) DO NOTHING
`;
// Peer
await sql!`
INSERT INTO federation_peers
(id, common_name, display_name, cert_pem, cert_serial, cert_not_after, state, created_at)
VALUES (
${PEER1_ID},
${T + '-gateway-example-com'},
${'Test Peer'},
${certPem},
${T + '-serial-001'},
now() + interval '1 year',
${'active'},
now()
)
ON CONFLICT (id) DO NOTHING
`;
// Grant — scope is jsonb; pass as JSON string and cast server-side.
const scopeJson = JSON.stringify({
resources: ['tasks', 'notes'],
operations: ['list', 'get'],
});
const grants = await sql!`
INSERT INTO federation_grants
(subject_user_id, peer_id, scope, status, created_at)
VALUES (
${USER1_ID},
${PEER1_ID},
${scopeJson}::jsonb,
${'active'},
now()
)
RETURNING id
`;
expect(grants).toHaveLength(1);
const grantId = grants[0]!['id'] as string;
// Audit log row
await sql!`
INSERT INTO federation_audit_log
(request_id, peer_id, subject_user_id, grant_id, verb, resource, status_code, created_at)
VALUES (
${T + '-req-001'},
${PEER1_ID},
${USER1_ID},
${grantId},
${'list'},
${'tasks'},
${200},
now()
)
`;
// Verify the audit row is present and has correct data.
const auditRows = await sql!`
SELECT * FROM federation_audit_log WHERE request_id = ${T + '-req-001'}
`;
expect(auditRows).toHaveLength(1);
expect(auditRows[0]!['status_code']).toBe(200);
expect(auditRows[0]!['verb']).toBe('list');
expect(auditRows[0]!['resource']).toBe('tasks');
}, 30_000);
// ── 2. FK cascade: user delete cascades grants ─────────────────────────────
it('cascade-deletes federation_grants when the subject user is deleted', async () => {
const cascadeUserId = `${T}-cascade-user`;
await sql!`
INSERT INTO users (id, name, email, email_verified, created_at, updated_at)
VALUES (${cascadeUserId}, ${'Cascade User'}, ${cascadeUserId + '@example.com'}, false, now(), now())
ON CONFLICT (id) DO NOTHING
`;
const scopeJson = JSON.stringify({ resources: ['tasks'] });
await sql!`
INSERT INTO federation_grants
(subject_user_id, peer_id, scope, status, revoked_reason, created_at)
VALUES (
${cascadeUserId},
${PEER1_ID},
${scopeJson}::jsonb,
${'active'},
${T + '-cascade-test'},
now()
)
`;
const before = await sql!`
SELECT count(*)::int AS cnt FROM federation_grants WHERE subject_user_id = ${cascadeUserId}
`;
expect(before[0]!['cnt']).toBe(1);
// Delete user → grants should cascade-delete.
await sql!`DELETE FROM users WHERE id = ${cascadeUserId}`;
const after = await sql!`
SELECT count(*)::int AS cnt FROM federation_grants WHERE subject_user_id = ${cascadeUserId}
`;
expect(after[0]!['cnt']).toBe(0);
}, 15_000);
// ── 3. FK set-null: peer delete sets audit_log.peer_id to NULL ────────────
it('sets federation_audit_log.peer_id to NULL when the peer is deleted', async () => {
// Insert a throwaway peer for this specific cascade test.
await sql!`
INSERT INTO federation_peers
(id, common_name, display_name, cert_pem, cert_serial, cert_not_after, state, created_at)
VALUES (
${PEER2_ID},
${T + '-gateway-throwaway-com'},
${'Throwaway Peer'},
${'cert-pem-placeholder'},
${T + '-serial-002'},
now() + interval '1 year',
${'active'},
now()
)
ON CONFLICT (id) DO NOTHING
`;
const reqId = `${T}-req-setnull`;
await sql!`
INSERT INTO federation_audit_log
(request_id, peer_id, subject_user_id, verb, resource, status_code, created_at)
VALUES (
${reqId},
${PEER2_ID},
${USER1_ID},
${'get'},
${'tasks'},
${200},
now()
)
`;
await sql!`DELETE FROM federation_peers WHERE id = ${PEER2_ID}`;
const rows = await sql!`
SELECT peer_id FROM federation_audit_log WHERE request_id = ${reqId}
`;
expect(rows).toHaveLength(1);
expect(rows[0]!['peer_id']).toBeNull();
}, 15_000);
// ── 4. Enum constraint: invalid grant_status rejected ─────────────────────
it('rejects an invalid grant_status value with a DB error', async () => {
const scopeJson = JSON.stringify({ resources: ['tasks'] });
await expect(
sql!`
INSERT INTO federation_grants
(subject_user_id, peer_id, scope, status, created_at)
VALUES (
${USER1_ID},
${PEER1_ID},
${scopeJson}::jsonb,
${'invalid_status'},
now()
)
`,
).rejects.toThrow();
}, 10_000);
// ── 5. Enum constraint: invalid peer_state rejected ───────────────────────
it('rejects an invalid peer_state value with a DB error', async () => {
await expect(
sql!`
INSERT INTO federation_peers
(common_name, display_name, cert_pem, cert_serial, cert_not_after, state, created_at)
VALUES (
${'bad-state-peer'},
${'Bad State'},
${'pem'},
${'bad-serial-999'},
now() + interval '1 year',
${'invalid_state'},
now()
)
`,
).rejects.toThrow();
}, 10_000);
// ── 6. Unique constraint: duplicate cert_serial rejected ──────────────────
it('rejects a duplicate cert_serial with a unique constraint violation', async () => {
await expect(
sql!`
INSERT INTO federation_peers
(common_name, display_name, cert_pem, cert_serial, cert_not_after, state, created_at)
VALUES (
${T + '-dup-cn'},
${'Dup Peer'},
${'pem'},
${T + '-serial-001'},
now() + interval '1 year',
${'pending'},
now()
)
`,
).rejects.toThrow();
}, 10_000);
// ── 7. FK cascade: peer delete cascades to federation_grants ─────────────
it('cascade-deletes federation_grants when the owning peer is deleted', async () => {
const PEER3_ID = `f2000003-0000-4000-8000-000000000003`;
const cascadeGrantUserId = `${T}-cascade-grant-user`;
// Insert a dedicated user and peer for this test.
await sql!`
INSERT INTO users (id, name, email, email_verified, created_at, updated_at)
VALUES (${cascadeGrantUserId}, ${'Cascade Grant User'}, ${cascadeGrantUserId + '@example.com'}, false, now(), now())
ON CONFLICT (id) DO NOTHING
`;
await sql!`
INSERT INTO federation_peers
(id, common_name, display_name, cert_pem, cert_serial, cert_not_after, state, created_at)
VALUES (
${PEER3_ID},
${T + '-gateway-cascade-peer'},
${'Cascade Peer'},
${'cert-pem-cascade'},
${T + '-serial-003'},
now() + interval '1 year',
${'active'},
now()
)
ON CONFLICT (id) DO NOTHING
`;
const scopeJson = JSON.stringify({ resources: ['tasks'] });
await sql!`
INSERT INTO federation_grants
(subject_user_id, peer_id, scope, status, created_at)
VALUES (
${cascadeGrantUserId},
${PEER3_ID},
${scopeJson}::jsonb,
${'active'},
now()
)
`;
const before = await sql!`
SELECT count(*)::int AS cnt FROM federation_grants WHERE peer_id = ${PEER3_ID}
`;
expect(before[0]!['cnt']).toBe(1);
// Delete peer → grants should cascade-delete.
await sql!`DELETE FROM federation_peers WHERE id = ${PEER3_ID}`;
const after = await sql!`
SELECT count(*)::int AS cnt FROM federation_grants WHERE peer_id = ${PEER3_ID}
`;
expect(after[0]!['cnt']).toBe(0);
// Cleanup
await sql!`DELETE FROM users WHERE id = ${cascadeGrantUserId}`.catch(() => {});
}, 15_000);
});

View File

@@ -0,0 +1,20 @@
/**
* Federation schema re-exports.
*
* The actual table and enum definitions live in schema.ts (alongside all other
* Drizzle tables) to avoid CJS/ESM cross-import issues when drizzle-kit loads
* schema files via esbuild-register. Application code that wants named imports
* for federation symbols should import from this file.
*
* M2-01: DB tables and enums only. No business logic.
* M2-03 will add JSON schema validation for the `scope` column.
* M4 will write rows to federation_audit_log.
*/
export {
peerStateEnum,
grantStatusEnum,
federationPeers,
federationGrants,
federationAuditLog,
} from './schema.js';

View File

@@ -2,6 +2,7 @@ export { createDb, type Db, type DbHandle } from './client.js';
export { createPgliteDb } from './client-pglite.js'; export { createPgliteDb } from './client-pglite.js';
export { runMigrations } from './migrate.js'; export { runMigrations } from './migrate.js';
export * from './schema.js'; export * from './schema.js';
export * from './federation.js';
export { export {
eq, eq,
and, and,

View File

@@ -5,6 +5,7 @@
import { import {
pgTable, pgTable,
pgEnum,
text, text,
timestamp, timestamp,
boolean, boolean,
@@ -585,3 +586,194 @@ export const summarizationJobs = pgTable(
}, },
(t) => [index('summarization_jobs_status_idx').on(t.status)], (t) => [index('summarization_jobs_status_idx').on(t.status)],
); );
// ─── Federation ──────────────────────────────────────────────────────────────
// Enums declared before tables that reference them.
// All federation definitions live in this file (avoids CJS/ESM cross-import
// issues when drizzle-kit loads schema files via esbuild-register).
// Application code imports from `federation.ts` which re-exports from here.
/**
* Lifecycle state of a federation peer.
* - pending: registered but not yet approved / TLS handshake not confirmed
* - active: fully operational; mTLS verified
* - suspended: temporarily blocked; cert still valid
* - revoked: cert revoked; no traffic allowed
*/
export const peerStateEnum = pgEnum('peer_state', ['pending', 'active', 'suspended', 'revoked']);
/**
* Lifecycle state of a federation grant.
* - active: grant is in effect
* - revoked: manually revoked before expiry
* - expired: natural expiry (expires_at passed)
*/
export const grantStatusEnum = pgEnum('grant_status', ['active', 'revoked', 'expired']);
/**
* A registered peer gateway identified by its Step-CA certificate CN.
* Represents both inbound peers (other gateways querying us) and outbound
* peers (gateways we query — identified by client_key_pem being set).
*/
export const federationPeers = pgTable(
'federation_peers',
{
id: uuid('id').primaryKey().defaultRandom(),
/** Certificate CN, e.g. "gateway-uscllc-com". Unique — one row per peer identity. */
commonName: text('common_name').notNull().unique(),
/** Human-friendly label shown in admin UI. */
displayName: text('display_name').notNull(),
/** Pinned PEM certificate used for mTLS verification. */
certPem: text('cert_pem').notNull(),
/** Certificate serial number — used for CRL / revocation lookup. */
certSerial: text('cert_serial').notNull().unique(),
/** Certificate expiry — used by the renewal scheduler (FED-M6). */
certNotAfter: timestamp('cert_not_after', { withTimezone: true }).notNull(),
/**
* Sealed (encrypted) private key for outbound connections TO this peer.
* NULL for inbound-only peer rows (we serve them; we don't call them).
*/
clientKeyPem: text('client_key_pem'),
/** Current peer lifecycle state. */
state: peerStateEnum('state').notNull().default('pending'),
/** Base URL for outbound queries, e.g. "https://woltje.com:443". NULL for inbound-only peers. */
endpointUrl: text('endpoint_url'),
/** Timestamp of the most recent successful inbound or outbound request. */
lastSeenAt: timestamp('last_seen_at', { withTimezone: true }),
createdAt: timestamp('created_at', { withTimezone: true }).notNull().defaultNow(),
/** Populated when the cert is revoked; NULL while the peer is active. */
revokedAt: timestamp('revoked_at', { withTimezone: true }),
},
(t) => [
// CRL / revocation lookups by serial.
index('federation_peers_cert_serial_idx').on(t.certSerial),
// Filter peers by state (e.g. find all active peers for outbound routing).
index('federation_peers_state_idx').on(t.state),
],
);
/**
* A grant lets a specific peer cert query a specific local user's data within
* a defined scope. Scopes are validated by JSON Schema in M2-03; this table
* stores them as raw jsonb.
*/
export const federationGrants = pgTable(
'federation_grants',
{
id: uuid('id').primaryKey().defaultRandom(),
/**
* The local user whose data this grant exposes.
* Cascade delete: if the user account is deleted, revoke all their grants.
*/
subjectUserId: text('subject_user_id')
.notNull()
.references(() => users.id, { onDelete: 'cascade' }),
/**
* The peer gateway holding the grant.
* Cascade delete: if the peer record is removed, the grant is moot.
*/
peerId: uuid('peer_id')
.notNull()
.references(() => federationPeers.id, { onDelete: 'cascade' }),
/**
* Scope object — validated by JSON Schema (M2-03).
* Example: { "resources": ["tasks", "notes"], "operations": ["list", "get"] }
*/
scope: jsonb('scope').notNull(),
/** Current grant lifecycle state. */
status: grantStatusEnum('status').notNull().default('active'),
/** Optional hard expiry. NULL means the grant does not expire automatically. */
expiresAt: timestamp('expires_at', { withTimezone: true }),
createdAt: timestamp('created_at', { withTimezone: true }).notNull().defaultNow(),
/** Populated when the grant is explicitly revoked. */
revokedAt: timestamp('revoked_at', { withTimezone: true }),
/** Human-readable reason for revocation (audit trail). */
revokedReason: text('revoked_reason'),
},
(t) => [
// Hot path: look up active grants for a subject user (auth middleware).
index('federation_grants_subject_status_idx').on(t.subjectUserId, t.status),
// Hot path: look up active grants held by a peer (inbound request check).
index('federation_grants_peer_status_idx').on(t.peerId, t.status),
],
);
/**
* Append-only audit log of all federation requests.
* M4 writes rows here. M2 only creates the table.
*
* All FKs use SET NULL so audit rows survive peer/user/grant deletion.
*/
export const federationAuditLog = pgTable(
'federation_audit_log',
{
id: uuid('id').primaryKey().defaultRandom(),
/** UUIDv7 from the X-Request-ID header — correlates with OTEL traces. */
requestId: text('request_id').notNull(),
/** Peer that made the request. SET NULL if the peer is later deleted. */
peerId: uuid('peer_id').references(() => federationPeers.id, { onDelete: 'set null' }),
/** Subject user whose data was queried. SET NULL if the user is deleted. */
subjectUserId: text('subject_user_id').references(() => users.id, { onDelete: 'set null' }),
/** Grant under which the request was authorised. SET NULL if the grant is deleted. */
grantId: uuid('grant_id').references(() => federationGrants.id, { onDelete: 'set null' }),
/** Request verb: "list" | "get" | "search". */
verb: text('verb').notNull(),
/** Resource type: "tasks" | "notes" | "memory" | etc. */
resource: text('resource').notNull(),
/** HTTP status code returned to the peer. */
statusCode: integer('status_code').notNull(),
/** Number of items returned (NULL for non-list requests or errors). */
resultCount: integer('result_count'),
/** Why the request was denied (NULL when allowed). */
deniedReason: text('denied_reason'),
/** End-to-end latency in milliseconds. */
latencyMs: integer('latency_ms'),
createdAt: timestamp('created_at', { withTimezone: true }).notNull().defaultNow(),
// Reserved for M4 — see PRD 7.3
/** SHA-256 of the normalised GraphQL/REST query string; written by M4 search. */
queryHash: text('query_hash'),
/** Request outcome: "allowed" | "denied" | "partial"; written by M4. */
outcome: text('outcome'),
/** Response payload size in bytes; written by M4. */
bytesOut: integer('bytes_out'),
},
(t) => [
// Per-peer request history in reverse chronological order.
index('federation_audit_log_peer_created_at_idx').on(t.peerId, t.createdAt.desc()),
// Per-user access log in reverse chronological order.
index('federation_audit_log_subject_created_at_idx').on(t.subjectUserId, t.createdAt.desc()),
// Global time-range scans (dashboards, rate-limit windows).
index('federation_audit_log_created_at_idx').on(t.createdAt.desc()),
],
);

View File

@@ -0,0 +1,294 @@
/**
* Unit tests for gateway-doctor.ts (mosaic gateway doctor).
*
* All external I/O is mocked — no live services required.
*/
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
import type { TierHealthReport } from '@mosaicstack/storage';
/* ------------------------------------------------------------------ */
/* Shared mock state */
/* ------------------------------------------------------------------ */
const mocks = vi.hoisted(() => {
const mockLoadConfig = vi.fn();
const mockProbeServiceHealth = vi.fn();
const mockExistsSync = vi.fn();
return { mockLoadConfig, mockProbeServiceHealth, mockExistsSync };
});
/* ------------------------------------------------------------------ */
/* Module mocks */
/* ------------------------------------------------------------------ */
vi.mock('@mosaicstack/config', () => ({
loadConfig: mocks.mockLoadConfig,
}));
vi.mock('@mosaicstack/storage', () => ({
probeServiceHealth: mocks.mockProbeServiceHealth,
}));
vi.mock('node:fs', () => ({
existsSync: mocks.mockExistsSync,
}));
/* ------------------------------------------------------------------ */
/* Import SUT */
/* ------------------------------------------------------------------ */
import { runGatewayDoctor } from './gateway-doctor.js';
import type { MosaicConfig } from '@mosaicstack/config';
/* ------------------------------------------------------------------ */
/* Fixtures */
/* ------------------------------------------------------------------ */
const STANDALONE_CONFIG: MosaicConfig = {
tier: 'standalone',
storage: { type: 'postgres', url: 'postgresql://mosaic:mosaic@localhost:5432/mosaic' },
queue: { type: 'bullmq', url: 'redis://localhost:6380' },
memory: { type: 'keyword' },
};
const GREEN_REPORT: TierHealthReport = {
tier: 'standalone',
configPath: '/some/mosaic.config.json',
overall: 'green',
services: [
{ name: 'postgres', status: 'ok', host: 'localhost', port: 5432, durationMs: 42 },
{ name: 'valkey', status: 'ok', host: 'localhost', port: 6380, durationMs: 10 },
{ name: 'pgvector', status: 'skipped', durationMs: 0 },
],
};
const RED_REPORT: TierHealthReport = {
tier: 'standalone',
configPath: '/some/mosaic.config.json',
overall: 'red',
services: [
{
name: 'postgres',
status: 'fail',
host: 'localhost',
port: 5432,
durationMs: 5001,
error: {
message: 'connection refused',
remediation: 'Start Postgres: `docker compose ...`',
},
},
{ name: 'valkey', status: 'ok', host: 'localhost', port: 6380, durationMs: 8 },
{ name: 'pgvector', status: 'skipped', durationMs: 0 },
],
};
const FEDERATED_GREEN_REPORT: TierHealthReport = {
tier: 'federated',
configPath: '/some/mosaic.config.json',
overall: 'green',
services: [
{ name: 'postgres', status: 'ok', host: 'localhost', port: 5433, durationMs: 30 },
{ name: 'valkey', status: 'ok', host: 'localhost', port: 6380, durationMs: 5 },
{ name: 'pgvector', status: 'ok', host: 'localhost', port: 5433, durationMs: 25 },
],
};
/* ------------------------------------------------------------------ */
/* Process helpers */
/* ------------------------------------------------------------------ */
let stdoutCapture = '';
let exitCode: number | undefined;
function captureOutput(): void {
stdoutCapture = '';
exitCode = undefined;
vi.spyOn(process.stdout, 'write').mockImplementation((chunk) => {
stdoutCapture += typeof chunk === 'string' ? chunk : chunk.toString();
return true;
});
vi.spyOn(process.stderr, 'write').mockImplementation(() => true);
vi.spyOn(process, 'exit').mockImplementation((code?: string | number | null) => {
exitCode = typeof code === 'number' ? code : code != null ? Number(code) : undefined;
throw new Error(`process.exit(${String(code)})`);
});
vi.spyOn(console, 'log').mockImplementation((...args: unknown[]) => {
stdoutCapture += args.join(' ') + '\n';
});
}
/* ------------------------------------------------------------------ */
/* Tests */
/* ------------------------------------------------------------------ */
describe('runGatewayDoctor', () => {
beforeEach(() => {
vi.clearAllMocks();
captureOutput();
// By default: no config file on disk (env-detection path)
mocks.mockExistsSync.mockReturnValue(false);
mocks.mockLoadConfig.mockReturnValue(STANDALONE_CONFIG);
mocks.mockProbeServiceHealth.mockResolvedValue(GREEN_REPORT);
});
afterEach(() => {
vi.restoreAllMocks();
});
/* ---------------------------------------------------------------- */
/* 1. JSON mode: parseable JSON matching the schema */
/* ---------------------------------------------------------------- */
it('JSON mode emits parseable JSON matching TierHealthReport schema', async () => {
mocks.mockProbeServiceHealth.mockResolvedValue(GREEN_REPORT);
await runGatewayDoctor({ json: true });
const parsed = JSON.parse(stdoutCapture) as TierHealthReport;
expect(parsed.tier).toBe('standalone');
expect(parsed.overall).toBe('green');
expect(Array.isArray(parsed.services)).toBe(true);
expect(parsed.services).toHaveLength(3);
// Validate shape of each service check
for (const svc of parsed.services) {
expect(['postgres', 'valkey', 'pgvector']).toContain(svc.name);
expect(['ok', 'fail', 'skipped']).toContain(svc.status);
expect(typeof svc.durationMs).toBe('number');
}
// JSON mode must be silent on console.log — output goes to process.stdout only.
expect(console.log).not.toHaveBeenCalled();
});
it('JSON mode for federated with 3 ok services', async () => {
mocks.mockProbeServiceHealth.mockResolvedValue(FEDERATED_GREEN_REPORT);
await runGatewayDoctor({ json: true });
const parsed = JSON.parse(stdoutCapture) as TierHealthReport;
expect(parsed.tier).toBe('federated');
expect(parsed.overall).toBe('green');
expect(parsed.services.every((s) => s.status === 'ok')).toBe(true);
// JSON mode must be silent on console.log — output goes to process.stdout only.
expect(console.log).not.toHaveBeenCalled();
});
/* ---------------------------------------------------------------- */
/* 2. Plain text mode: service lines and overall verdict */
/* ---------------------------------------------------------------- */
it('plain text mode includes service lines for each service', async () => {
mocks.mockProbeServiceHealth.mockResolvedValue(GREEN_REPORT);
await runGatewayDoctor({});
expect(stdoutCapture).toContain('postgres');
expect(stdoutCapture).toContain('valkey');
expect(stdoutCapture).toContain('pgvector');
});
it('plain text mode includes Overall verdict', async () => {
mocks.mockProbeServiceHealth.mockResolvedValue(GREEN_REPORT);
await runGatewayDoctor({});
expect(stdoutCapture).toContain('Overall: GREEN');
});
it('plain text mode shows tier and config path in header', async () => {
mocks.mockProbeServiceHealth.mockResolvedValue(GREEN_REPORT);
await runGatewayDoctor({});
expect(stdoutCapture).toContain('Tier: standalone');
});
it('plain text mode shows remediation for failed services', async () => {
mocks.mockProbeServiceHealth.mockResolvedValue(RED_REPORT);
try {
await runGatewayDoctor({});
} catch {
// process.exit throws in test
}
expect(stdoutCapture).toContain('Remediations:');
expect(stdoutCapture).toContain('Start Postgres');
});
/* ---------------------------------------------------------------- */
/* 3. Exit codes */
/* ---------------------------------------------------------------- */
it('exits with code 1 when overall is red', async () => {
mocks.mockProbeServiceHealth.mockResolvedValue(RED_REPORT);
await expect(runGatewayDoctor({})).rejects.toThrow('process.exit(1)');
expect(exitCode).toBe(1);
});
it('exits with code 0 (no exit call) when overall is green', async () => {
mocks.mockProbeServiceHealth.mockResolvedValue(GREEN_REPORT);
await runGatewayDoctor({});
// process.exit should NOT have been called for green.
expect(exitCode).toBeUndefined();
});
it('JSON mode exits with code 1 when overall is red', async () => {
mocks.mockProbeServiceHealth.mockResolvedValue(RED_REPORT);
await expect(runGatewayDoctor({ json: true })).rejects.toThrow('process.exit(1)');
expect(exitCode).toBe(1);
});
/* ---------------------------------------------------------------- */
/* 4. --config path override is honored */
/* ---------------------------------------------------------------- */
it('passes --config path to loadConfig when provided', async () => {
const customPath = '/custom/path/mosaic.config.json';
await runGatewayDoctor({ config: customPath });
// loadConfig should have been called with the resolved custom path.
expect(mocks.mockLoadConfig).toHaveBeenCalledWith(
expect.stringContaining('mosaic.config.json'),
);
// The exact call should include the custom path (resolved).
const [calledPath] = mocks.mockLoadConfig.mock.calls[0] as [string | undefined];
expect(calledPath).toContain('custom/path/mosaic.config.json');
});
it('calls loadConfig without path when no --config and no file on disk', async () => {
mocks.mockExistsSync.mockReturnValue(false);
await runGatewayDoctor({});
const [calledPath] = mocks.mockLoadConfig.mock.calls[0] as [string | undefined];
// When no file found, resolveConfigPath returns undefined, so loadConfig is called with undefined
expect(calledPath).toBeUndefined();
});
it('finds config from cwd when mosaic.config.json exists there', async () => {
// First candidate (cwd/mosaic.config.json) exists.
mocks.mockExistsSync.mockImplementation((p: unknown) => {
return typeof p === 'string' && p.endsWith('mosaic.config.json');
});
await runGatewayDoctor({});
const [calledPath] = mocks.mockLoadConfig.mock.calls[0] as [string | undefined];
expect(calledPath).toBeDefined();
expect(typeof calledPath).toBe('string');
expect(calledPath!.endsWith('mosaic.config.json')).toBe(true);
});
});

View File

@@ -0,0 +1,143 @@
/**
* gateway-doctor.ts — `mosaic gateway doctor` implementation.
*
* Reports current tier and per-service health (PG, Valkey, pgvector) for the
* Mosaic gateway. Supports machine-readable JSON output for CI.
*
* Exit codes:
* 0 — overall green or yellow
* 1 — overall red (at least one required service failed)
*/
import { existsSync } from 'node:fs';
import { resolve, join } from 'node:path';
import { homedir } from 'node:os';
import { loadConfig } from '@mosaicstack/config';
import { probeServiceHealth } from '@mosaicstack/storage';
import type { TierHealthReport, ServiceCheck } from '@mosaicstack/storage';
/* ------------------------------------------------------------------ */
/* Config resolution */
/* ------------------------------------------------------------------ */
const CONFIG_CANDIDATES = [
resolve(process.cwd(), 'mosaic.config.json'),
join(homedir(), '.mosaic', 'mosaic.config.json'),
];
/**
* Resolve the config path to report in output.
*
* Priority:
* 1. Explicit `--config <path>` flag
* 2. `./mosaic.config.json` (cwd)
* 3. `~/.mosaic/mosaic.config.json`
* 4. undefined — `loadConfig()` falls back to env-var detection
*
* `loadConfig()` itself already handles priority 1-3 when passed an explicit
* path, and falls back to env-detection when none exists. We resolve here
* only so we can surface the path in the health report.
*/
function resolveConfigPath(explicit?: string): string | undefined {
if (explicit) return resolve(explicit);
for (const candidate of CONFIG_CANDIDATES) {
if (existsSync(candidate)) return candidate;
}
return undefined;
}
/* ------------------------------------------------------------------ */
/* Output helpers */
/* ------------------------------------------------------------------ */
const TICK = '\u2713'; // ✓
const CROSS = '\u2717'; // ✗
const SKIP = '-';
function padRight(s: string, n: number): string {
return s + ' '.repeat(Math.max(0, n - s.length));
}
function serviceLabel(svc: ServiceCheck): string {
const hostPort =
svc.host !== undefined && svc.port !== undefined ? `${svc.host}:${svc.port.toString()}` : '';
const duration = `(${svc.durationMs.toString()}ms)`;
switch (svc.status) {
case 'ok':
return ` ${TICK} ${padRight(svc.name, 10)} ${padRight(hostPort, 22)} ${duration}`;
case 'fail': {
const errMsg = svc.error?.message ?? 'unknown error';
return ` ${CROSS} ${padRight(svc.name, 10)} ${padRight(hostPort, 22)} ${duration} \u2192 ${errMsg}`;
}
case 'skipped':
return ` ${SKIP} ${padRight(svc.name, 10)} (skipped)`;
}
}
function printReport(report: TierHealthReport): void {
const configDisplay = report.configPath ?? '(auto-detected)';
console.log(`Tier: ${report.tier} Config: ${configDisplay}`);
console.log('');
for (const svc of report.services) {
console.log(serviceLabel(svc));
}
console.log('');
// Print remediations for failed services.
const failed = report.services.filter((s) => s.status === 'fail' && s.error);
if (failed.length > 0) {
console.log('Remediations:');
for (const svc of failed) {
if (svc.error) {
console.log(` ${svc.name}: ${svc.error.remediation}`);
}
}
console.log('');
}
console.log(`Overall: ${report.overall.toUpperCase()}`);
}
/* ------------------------------------------------------------------ */
/* Main runner */
/* ------------------------------------------------------------------ */
export interface GatewayDoctorOptions {
json?: boolean;
config?: string;
}
export async function runGatewayDoctor(opts: GatewayDoctorOptions): Promise<void> {
const configPath = resolveConfigPath(opts.config);
let mosaicConfig;
try {
mosaicConfig = loadConfig(configPath);
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
if (opts.json) {
process.stdout.write(
JSON.stringify({ error: `Failed to load config: ${msg}` }, null, 2) + '\n',
);
} else {
process.stderr.write(`Error: Failed to load config: ${msg}\n`);
}
process.exit(1);
}
const report = await probeServiceHealth(mosaicConfig, configPath);
if (opts.json) {
process.stdout.write(JSON.stringify(report, null, 2) + '\n');
} else {
printReport(report);
}
// Exit 1 if overall is red.
if (report.overall === 'red') {
process.exit(1);
}
}

View File

@@ -206,4 +206,15 @@ export function registerGatewayCommand(program: Command): void {
const { runUninstall } = await import('./gateway/uninstall.js'); const { runUninstall } = await import('./gateway/uninstall.js');
await runUninstall(); await runUninstall();
}); });
// ─── doctor ─────────────────────────────────────────────────────────────────
gw.command('doctor')
.description('Check gateway tier and per-service health (PG, Valkey, pgvector)')
.option('--json', 'Emit TierHealthReport as JSON to stdout (suppresses all other output)')
.option('--config <path>', 'Path to mosaic.config.json (defaults to cwd or ~/.mosaic/)')
.action(async (cmdOpts: { json?: boolean; config?: string }) => {
const { runGatewayDoctor } = await import('./gateway-doctor.js');
await runGatewayDoctor({ json: cmdOpts.json, config: cmdOpts.config });
});
} }

View File

@@ -6,6 +6,7 @@
"url": "https://git.mosaicstack.dev/mosaicstack/stack.git", "url": "https://git.mosaicstack.dev/mosaicstack/stack.git",
"directory": "packages/storage" "directory": "packages/storage"
}, },
"type": "module",
"main": "dist/index.js", "main": "dist/index.js",
"types": "dist/index.d.ts", "types": "dist/index.d.ts",
"exports": { "exports": {
@@ -25,9 +26,11 @@
"@mosaicstack/db": "workspace:^", "@mosaicstack/db": "workspace:^",
"@mosaicstack/types": "workspace:*", "@mosaicstack/types": "workspace:*",
"commander": "^13.0.0", "commander": "^13.0.0",
"ioredis": "^5.10.0",
"postgres": "^3.4.8" "postgres": "^3.4.8"
}, },
"devDependencies": { "devDependencies": {
"drizzle-orm": "^0.45.1",
"typescript": "^5.8.0", "typescript": "^5.8.0",
"vitest": "^2.0.0" "vitest": "^2.0.0"
}, },

View File

@@ -1,5 +1,6 @@
import type { Command } from 'commander'; import type { Command } from 'commander';
import type { MigrationSource } from './migrate-tier.js'; import type { MigrationSource } from './migrate-tier.js';
import { redactErrMsg } from './redact-error.js';
/** /**
* Reads the DATABASE_URL environment variable and redacts the password portion. * Reads the DATABASE_URL environment variable and redacts the password portion.
@@ -73,7 +74,7 @@ export function registerStorageCommand(parent: Command): void {
console.log('[storage] reachable: yes'); console.log('[storage] reachable: yes');
} catch (err) { } catch (err) {
console.log( console.log(
`[storage] reachable: no (${err instanceof Error ? err.message : String(err)})`, `[storage] reachable: no (${redactErrMsg(err instanceof Error ? err.message : String(err))})`,
); );
} }
} else { } else {
@@ -398,7 +399,7 @@ export function registerStorageCommand(parent: Command): void {
} }
} catch (err) { } catch (err) {
console.error( console.error(
`[migrate-tier] ERROR: ${err instanceof Error ? err.message : String(err)}`, `[migrate-tier] ERROR: ${redactErrMsg(err instanceof Error ? err.message : String(err))}`,
); );
process.exitCode = 1; process.exitCode = 1;
} finally { } finally {

View File

@@ -1,4 +1,6 @@
export type { StorageAdapter, StorageConfig } from './types.js'; export type { StorageAdapter, StorageConfig } from './types.js';
export { TierDetectionError, detectAndAssertTier, probeServiceHealth } from './tier-detection.js';
export type { ServiceCheck, TierHealthReport } from './tier-detection.js';
export { createStorageAdapter, registerStorageAdapter } from './factory.js'; export { createStorageAdapter, registerStorageAdapter } from './factory.js';
export { PostgresAdapter } from './adapters/postgres.js'; export { PostgresAdapter } from './adapters/postgres.js';
export { PgliteAdapter } from './adapters/pglite.js'; export { PgliteAdapter } from './adapters/pglite.js';

View File

@@ -0,0 +1,324 @@
/**
* FED-M1-08 — Integration test: PGlite → federated Postgres+pgvector migration.
*
* Prereq: docker compose -f docker-compose.federated.yml --profile federated up -d
* Run: FEDERATED_INTEGRATION=1 pnpm --filter @mosaicstack/storage test src/migrate-tier.integration.test.ts
*
* Skipped when FEDERATED_INTEGRATION !== '1'.
*
* Strategy: users.id (TEXT PK) uses the recognisable prefix `fed-m1-08-` for
* easy cleanup. UUID-PKed tables (teams, conversations, messages, team_members)
* use deterministic valid UUIDs in the `f0000xxx-…` namespace. Cleanup is
* explicit DELETE by id — no full-table truncation.
*/
import fs from 'node:fs/promises';
import os from 'node:os';
import path from 'node:path';
import { users, teams, teamMembers, conversations, messages } from '@mosaicstack/db';
import { createPgliteDbWithVector, runPgliteMigrations } from './test-utils/pglite-with-vector.js';
import postgres from 'postgres';
import { afterAll, describe, expect, it } from 'vitest';
import { DrizzleMigrationSource, PostgresMigrationTarget, runMigrateTier } from './migrate-tier.js';
/* ------------------------------------------------------------------ */
/* Constants */
/* ------------------------------------------------------------------ */
const run = process.env['FEDERATED_INTEGRATION'] === '1';
const FEDERATED_PG_URL = 'postgresql://mosaic:mosaic@localhost:5433/mosaic';
/**
* Deterministic IDs for the test's seed data.
*
* users.id is TEXT (any string) — we use a recognisable prefix for easy cleanup.
* All other tables use UUID primary keys — must be valid UUID v4 format.
* The 4th segment starts with '4' (version 4) and 5th starts with '8' (variant).
*/
const IDS = {
// text PK — can be any string
user1: 'fed-m1-08-user-1',
user2: 'fed-m1-08-user-2',
// UUID PKs — must be valid UUID format
team1: 'f0000001-0000-4000-8000-000000000001',
teamMember1: 'f0000002-0000-4000-8000-000000000001',
teamMember2: 'f0000002-0000-4000-8000-000000000002',
conv1: 'f0000003-0000-4000-8000-000000000001',
conv2: 'f0000003-0000-4000-8000-000000000002',
msg1: 'f0000004-0000-4000-8000-000000000001',
msg2: 'f0000004-0000-4000-8000-000000000002',
msg3: 'f0000004-0000-4000-8000-000000000003',
msg4: 'f0000004-0000-4000-8000-000000000004',
msg5: 'f0000004-0000-4000-8000-000000000005',
} as const;
/* ------------------------------------------------------------------ */
/* Shared handles for afterAll cleanup */
/* ------------------------------------------------------------------ */
let targetSql: ReturnType<typeof postgres> | undefined;
let pgliteDataDir: string | undefined;
afterAll(async () => {
if (targetSql) {
await cleanTarget(targetSql).catch(() => {});
await targetSql.end({ timeout: 5 }).catch(() => {});
}
if (pgliteDataDir) {
await fs.rm(pgliteDataDir, { recursive: true, force: true }).catch(() => {});
}
});
/* ------------------------------------------------------------------ */
/* Helpers */
/* ------------------------------------------------------------------ */
/** Delete all test-owned rows from target in safe FK order. */
async function cleanTarget(sql: ReturnType<typeof postgres>): Promise<void> {
// Reverse FK order: messages → conversations → team_members → teams → users
await sql.unsafe(`DELETE FROM messages WHERE id = ANY($1)`, [
[IDS.msg1, IDS.msg2, IDS.msg3, IDS.msg4, IDS.msg5],
] as never[]);
await sql.unsafe(`DELETE FROM conversations WHERE id = ANY($1)`, [
[IDS.conv1, IDS.conv2],
] as never[]);
await sql.unsafe(`DELETE FROM team_members WHERE id = ANY($1)`, [
[IDS.teamMember1, IDS.teamMember2],
] as never[]);
await sql.unsafe(`DELETE FROM teams WHERE id = $1`, [IDS.team1] as never[]);
await sql.unsafe(`DELETE FROM users WHERE id = ANY($1)`, [[IDS.user1, IDS.user2]] as never[]);
}
/* ------------------------------------------------------------------ */
/* Test suite */
/* ------------------------------------------------------------------ */
describe.skipIf(!run)('migrate-tier — PGlite → federated PG', () => {
it('seeds PGlite, runs migrate-tier, asserts row counts and sample rows on target', async () => {
/* ---- 1. Create a temp PGlite db ---------------------------------- */
pgliteDataDir = await fs.mkdtemp(path.join(os.tmpdir(), 'fed-m1-08-'));
const handle = createPgliteDbWithVector(pgliteDataDir);
// Run Drizzle migrations against PGlite.
// eslint-disable-next-line @typescript-eslint/no-explicit-any
await runPgliteMigrations(handle.db as any);
/* ---- 2. Seed representative data --------------------------------- */
const now = new Date();
const db = handle.db;
// users (2 rows)
// eslint-disable-next-line @typescript-eslint/no-explicit-any
await (db as any).insert(users).values([
{
id: IDS.user1,
name: 'Fed Test User One',
email: 'fed-m1-08-user1@test.invalid',
emailVerified: false,
role: 'member',
createdAt: now,
updatedAt: now,
},
{
id: IDS.user2,
name: 'Fed Test User Two',
email: 'fed-m1-08-user2@test.invalid',
emailVerified: false,
role: 'member',
createdAt: now,
updatedAt: now,
},
]);
// teams (1 row)
// eslint-disable-next-line @typescript-eslint/no-explicit-any
await (db as any).insert(teams).values([
{
id: IDS.team1,
name: 'Fed M1-08 Team',
slug: 'fed-m1-08-team',
ownerId: IDS.user1,
managerId: IDS.user1,
createdAt: now,
updatedAt: now,
},
]);
// team_members (2 rows linking both users to the team)
// eslint-disable-next-line @typescript-eslint/no-explicit-any
await (db as any).insert(teamMembers).values([
{
id: IDS.teamMember1,
teamId: IDS.team1,
userId: IDS.user1,
role: 'manager',
joinedAt: now,
},
{
id: IDS.teamMember2,
teamId: IDS.team1,
userId: IDS.user2,
role: 'member',
joinedAt: now,
},
]);
// conversations (2 rows)
// eslint-disable-next-line @typescript-eslint/no-explicit-any
await (db as any).insert(conversations).values([
{
id: IDS.conv1,
title: 'Fed M1-08 Conversation Alpha',
userId: IDS.user1,
archived: false,
createdAt: now,
updatedAt: now,
},
{
id: IDS.conv2,
title: 'Fed M1-08 Conversation Beta',
userId: IDS.user2,
archived: false,
createdAt: now,
updatedAt: now,
},
]);
// messages (5 rows across both conversations)
// eslint-disable-next-line @typescript-eslint/no-explicit-any
await (db as any).insert(messages).values([
{
id: IDS.msg1,
conversationId: IDS.conv1,
role: 'user',
content: 'Hello from conv1 msg1',
createdAt: now,
},
{
id: IDS.msg2,
conversationId: IDS.conv1,
role: 'assistant',
content: 'Reply in conv1 msg2',
createdAt: now,
},
{
id: IDS.msg3,
conversationId: IDS.conv1,
role: 'user',
content: 'Follow-up in conv1 msg3',
createdAt: now,
},
{
id: IDS.msg4,
conversationId: IDS.conv2,
role: 'user',
content: 'Hello from conv2 msg4',
createdAt: now,
},
{
id: IDS.msg5,
conversationId: IDS.conv2,
role: 'assistant',
content: 'Reply in conv2 msg5',
createdAt: now,
},
]);
/* ---- 3. Pre-clean the target so the test is repeatable ----------- */
targetSql = postgres(FEDERATED_PG_URL, {
max: 3,
connect_timeout: 10,
idle_timeout: 30,
});
await cleanTarget(targetSql);
/* ---- 4. Build source / target adapters and run migration --------- */
const source = new DrizzleMigrationSource(db, /* sourceHasVector= */ false);
const target = new PostgresMigrationTarget(FEDERATED_PG_URL);
try {
await runMigrateTier(
source,
target,
{
targetUrl: FEDERATED_PG_URL,
dryRun: false,
allowNonEmpty: true,
batchSize: 500,
onProgress: (_msg) => {
// Uncomment for debugging: console.log(_msg);
},
},
/* sourceHasVector= */ false,
);
} finally {
await target.close();
}
/* ---- 5. Assert: row counts in target match seed ------------------ */
const countUsers = await targetSql.unsafe<Array<{ n: string }>>(
`SELECT COUNT(*)::text AS n FROM users WHERE id = ANY($1)`,
[[IDS.user1, IDS.user2]] as never[],
);
expect(Number(countUsers[0]?.n)).toBe(2);
const countTeams = await targetSql.unsafe<Array<{ n: string }>>(
`SELECT COUNT(*)::text AS n FROM teams WHERE id = $1`,
[IDS.team1] as never[],
);
expect(Number(countTeams[0]?.n)).toBe(1);
const countTeamMembers = await targetSql.unsafe<Array<{ n: string }>>(
`SELECT COUNT(*)::text AS n FROM team_members WHERE id = ANY($1)`,
[[IDS.teamMember1, IDS.teamMember2]] as never[],
);
expect(Number(countTeamMembers[0]?.n)).toBe(2);
const countConvs = await targetSql.unsafe<Array<{ n: string }>>(
`SELECT COUNT(*)::text AS n FROM conversations WHERE id = ANY($1)`,
[[IDS.conv1, IDS.conv2]] as never[],
);
expect(Number(countConvs[0]?.n)).toBe(2);
const countMsgs = await targetSql.unsafe<Array<{ n: string }>>(
`SELECT COUNT(*)::text AS n FROM messages WHERE id = ANY($1)`,
[[IDS.msg1, IDS.msg2, IDS.msg3, IDS.msg4, IDS.msg5]] as never[],
);
expect(Number(countMsgs[0]?.n)).toBe(5);
/* ---- 6. Assert: sample row field values --------------------------- */
// User 1: check email and name
const userRows = await targetSql.unsafe<Array<{ id: string; email: string; name: string }>>(
`SELECT id, email, name FROM users WHERE id = $1`,
[IDS.user1] as never[],
);
expect(userRows[0]?.email).toBe('fed-m1-08-user1@test.invalid');
expect(userRows[0]?.name).toBe('Fed Test User One');
// Conversation 1: check title and user_id
const convRows = await targetSql.unsafe<Array<{ id: string; title: string; user_id: string }>>(
`SELECT id, title, user_id FROM conversations WHERE id = $1`,
[IDS.conv1] as never[],
);
expect(convRows[0]?.title).toBe('Fed M1-08 Conversation Alpha');
expect(convRows[0]?.user_id).toBe(IDS.user1);
/* ---- 7. Cleanup: delete test rows from target -------------------- */
await cleanTarget(targetSql);
// Close PGlite
await handle.close();
}, 60_000);
});

View File

@@ -29,6 +29,7 @@
import postgres from 'postgres'; import postgres from 'postgres';
import * as schema from '@mosaicstack/db'; import * as schema from '@mosaicstack/db';
import { sql as drizzleSql } from '@mosaicstack/db'; import { sql as drizzleSql } from '@mosaicstack/db';
import { redactErrMsg } from './redact-error.js';
/* ------------------------------------------------------------------ */ /* ------------------------------------------------------------------ */
/* Types */ /* Types */
@@ -72,6 +73,20 @@ export interface MigrationTarget {
/** Close the target connection. */ /** Close the target connection. */
close(): Promise<void>; close(): Promise<void>;
/**
* Attempt to acquire a session-level Postgres advisory lock for migrate-tier.
* Returns true if the lock was acquired, false if another process holds it.
* Targets that do not support advisory locks (e.g. test mocks) may omit this
* by not implementing the method — the caller skips locking gracefully.
*/
tryAcquireAdvisoryLock?(): Promise<boolean>;
/**
* Release the session-level advisory lock acquired by tryAcquireAdvisoryLock.
* Must be called in a finally block.
*/
releaseAdvisoryLock?(): Promise<void>;
} }
export interface MigrateTierOptions { export interface MigrateTierOptions {
@@ -107,9 +122,28 @@ export interface MigrateTierResult {
/** /**
* SKIP_TABLES: ephemeral or environment-specific tables not worth migrating. * SKIP_TABLES: ephemeral or environment-specific tables not worth migrating.
* *
* - sessions: TTL'd auth sessions — invalid in new environment. * WHY these tables are skipped:
* - verifications: one-time tokens (email verify, etc.) — already expired. * - sessions: TTL'd auth sessions — they are invalid in the new environment
* - admin_tokens: hashed tokens bound to old environment keys — re-issue. * and would immediately expire or fail JWT verification anyway.
* - verifications: one-time tokens (email verify, password-reset links, etc.)
* — they have already expired or been consumed; re-sending is
* the correct action on the new environment.
* - admin_tokens: hashed tokens bound to the old environment's secret keys —
* the hash is environment-specific and must be re-issued on
* the target.
*
* WHY these tables are NOT skipped (intentionally migrated):
* - accounts (OAuth tokens): durable credentials bound to the user's identity,
* not to the deployment environment. OAuth tokens survive environment changes
* and should follow the user to the federated tier.
* - provider_credentials (AI provider keys): durable, user-owned API keys for
* AI providers (e.g. OpenAI, Anthropic). These are bound to the user, not
* the server, and must be preserved so AI features work immediately after
* migration.
*
* OPERATOR NOTE: If migrating to a shared or multi-tenant federated tier, review
* whether `accounts` and `provider_credentials` should be wiped post-migration
* to prevent unintended cross-tenant credential exposure.
*/ */
export const SKIP_TABLES = new Set(['sessions', 'verifications', 'admin_tokens']); export const SKIP_TABLES = new Set(['sessions', 'verifications', 'admin_tokens']);
@@ -482,6 +516,33 @@ export class PostgresMigrationTarget implements MigrationTarget {
return rows.length > 0; return rows.length > 0;
} }
/**
* Attempt to acquire a non-blocking session-level Postgres advisory lock
* keyed by hashtext('mosaic-migrate-tier'). Returns true if acquired,
* false if another session already holds the lock.
*
* The lock is session-scoped: it is automatically released when the
* connection closes, and also explicitly released via releaseAdvisoryLock().
*/
async tryAcquireAdvisoryLock(): Promise<boolean> {
const rows = await this.sql`
SELECT pg_try_advisory_lock(hashtext('mosaic-migrate-tier')) AS acquired
`;
const row = rows[0] as { acquired: boolean } | undefined;
return row?.acquired ?? false;
}
/**
* Release the session-level advisory lock previously acquired by
* tryAcquireAdvisoryLock(). Safe to call even if the lock was not held
* (pg_advisory_unlock returns false but does not throw).
*/
async releaseAdvisoryLock(): Promise<void> {
await this.sql`
SELECT pg_advisory_unlock(hashtext('mosaic-migrate-tier'))
`;
}
async close(): Promise<void> { async close(): Promise<void> {
await this.sql.end(); await this.sql.end();
} }
@@ -491,11 +552,24 @@ export class PostgresMigrationTarget implements MigrationTarget {
/* Source-row normalisation */ /* Source-row normalisation */
/* ------------------------------------------------------------------ */ /* ------------------------------------------------------------------ */
/**
* Convert a camelCase key to snake_case.
* e.g. "userId" → "user_id", "emailVerified" → "email_verified".
* Keys that are already snake_case (no uppercase letters) are returned as-is.
*/
function toSnakeCase(key: string): string {
return key.replace(/[A-Z]/g, (c) => `_${c.toLowerCase()}`);
}
/** /**
* Drizzle returns rows as camelCase TypeScript objects (e.g. `userId`, not * Drizzle returns rows as camelCase TypeScript objects (e.g. `userId`, not
* `user_id`). The PostgresMigrationTarget upserts via raw SQL and uses the * `user_id`). The PostgresMigrationTarget upserts via raw SQL and uses the
* column names as given — the `insights` no-vector path uses snake_case column * column names as given. We must convert camelCase keys → snake_case before
* aliases in the SELECT, so those rows already arrive as snake_case. * building the INSERT statement so column names match the PG schema.
*
* Exception: the `insights` no-vector path already returns snake_case keys
* from its raw SQL projection — toSnakeCase() is idempotent for already-
* snake_case keys so this conversion is safe in all paths.
* *
* For vector tables (insights), if `embedding` is absent from the source row * For vector tables (insights), if `embedding` is absent from the source row
* (because DrizzleMigrationSource omitted it in the no-vector projection), we * (because DrizzleMigrationSource omitted it in the no-vector projection), we
@@ -509,7 +583,11 @@ export function normaliseSourceRow(
row: Record<string, unknown>, row: Record<string, unknown>,
sourceHasVector: boolean, sourceHasVector: boolean,
): Record<string, unknown> { ): Record<string, unknown> {
const out = { ...row }; // Convert all camelCase keys to snake_case for raw-SQL target compatibility.
const out: Record<string, unknown> = {};
for (const [k, v] of Object.entries(row)) {
out[toSnakeCase(k)] = v;
}
if (VECTOR_TABLES.has(tableName) && !sourceHasVector) { if (VECTOR_TABLES.has(tableName) && !sourceHasVector) {
// Source cannot have embeddings — explicitly null them so ON CONFLICT // Source cannot have embeddings — explicitly null them so ON CONFLICT
@@ -630,6 +708,24 @@ export async function runMigrateTier(
return { tables, totalRows: 0, dryRun: true }; return { tables, totalRows: 0, dryRun: true };
} }
// Acquire a Postgres advisory lock on the target BEFORE checking preconditions
// so that two concurrent invocations cannot both pass the non-empty guard and
// race each other. Use non-blocking pg_try_advisory_lock so we fail fast
// instead of deadlocking.
//
// Targets that don't implement tryAcquireAdvisoryLock (e.g. test mocks) skip
// this step — the optional chaining guard handles that case.
const lockAcquired = target.tryAcquireAdvisoryLock ? await target.tryAcquireAdvisoryLock() : true; // mocks / test doubles — no locking needed
if (!lockAcquired) {
throw new Error(
'Another migrate-tier process is already running against this target. ' +
'Wait for it to complete or check for stuck locks via ' +
"SELECT * FROM pg_locks WHERE locktype='advisory'.",
);
}
try {
// Check preconditions before writing. // Check preconditions before writing.
await checkTargetPreconditions(target, allowNonEmpty, tablesToMigrate); await checkTargetPreconditions(target, allowNonEmpty, tablesToMigrate);
@@ -669,7 +765,7 @@ export async function runMigrateTier(
); );
} }
} catch (err) { } catch (err) {
const errMsg = err instanceof Error ? err.message : String(err); const errMsg = redactErrMsg(err instanceof Error ? err.message : String(err));
throw new Error( throw new Error(
`[migrate-tier] Failed on table "${table}" after ${tableTotal.toString()} rows ` + `[migrate-tier] Failed on table "${table}" after ${tableTotal.toString()} rows ` +
`(last id: ${lastSuccessfulId ?? 'none'}). Error: ${errMsg}\n` + `(last id: ${lastSuccessfulId ?? 'none'}). Error: ${errMsg}\n` +
@@ -694,4 +790,10 @@ export async function runMigrateTier(
onProgress(`[migrate-tier] Complete. ${totalRows.toString()} total rows migrated.`); onProgress(`[migrate-tier] Complete. ${totalRows.toString()} total rows migrated.`);
return { tables: results, totalRows, dryRun: false }; return { tables: results, totalRows, dryRun: false };
} finally {
// Release the advisory lock regardless of success or failure.
if (target.releaseAdvisoryLock) {
await target.releaseAdvisoryLock();
}
}
} }

View File

@@ -0,0 +1,62 @@
import { describe, it, expect } from 'vitest';
import { redactErrMsg } from './redact-error.js';
describe('redactErrMsg', () => {
it('redacts user:password from a postgres:// URL embedded in an error message', () => {
const msg = 'connect ECONNREFUSED postgres://admin:s3cr3t@db.example.com:5432/mosaic';
expect(redactErrMsg(msg)).toBe(
'connect ECONNREFUSED postgres://***@db.example.com:5432/mosaic',
);
});
it('redacts user:password from a postgresql:// URL', () => {
const msg = 'connection failed: postgresql://myuser:mypass@localhost:5432/testdb';
expect(redactErrMsg(msg)).toBe('connection failed: postgresql://***@localhost:5432/testdb');
});
it('handles URLs with no password (user only) — still redacts userinfo', () => {
const msg = 'error postgres://justuser@host:5432/db';
expect(redactErrMsg(msg)).toBe('error postgres://***@host:5432/db');
});
it('returns the original message unchanged when no connection URL is present', () => {
const msg = 'connection timed out after 5 seconds';
expect(redactErrMsg(msg)).toBe('connection timed out after 5 seconds');
});
it('is case-insensitive for the scheme (scheme is normalized to lowercase in output)', () => {
// The regex replacement uses a lowercase literal, so the matched scheme is
// replaced with the lowercase form regardless of the original casing.
const msg = 'POSTGRES://admin:pass@host:5432/db';
expect(redactErrMsg(msg)).toBe('postgres://***@host:5432/db');
});
it('redacts multiple URLs in a single message', () => {
const msg = 'src postgres://u:p@host1/db1 dst postgresql://v:q@host2/db2';
expect(redactErrMsg(msg)).toBe('src postgres://***@host1/db1 dst postgresql://***@host2/db2');
});
it('does not alter a message with a postgres URL that has no userinfo', () => {
// No userinfo component — pattern does not match, message unchanged.
const msg = 'error at postgres://host:5432/db';
expect(redactErrMsg(msg)).toBe('error at postgres://host:5432/db');
});
it('redacts user:password from a redis:// URL', () => {
const msg = 'connect ECONNREFUSED redis://user:pass@host:6379';
expect(redactErrMsg(msg)).toBe('connect ECONNREFUSED redis://***@host:6379');
});
it('redacts user:password from a rediss:// URL (TLS)', () => {
const msg = 'connect ECONNREFUSED rediss://user:pass@host:6379';
expect(redactErrMsg(msg)).toBe('connect ECONNREFUSED rediss://***@host:6379');
});
it('redacts both a postgres URL and a redis URL in the same message', () => {
const msg =
'primary postgres://admin:s3cr3t@db:5432/mosaic cache redis://cacheuser:cachepass@cache:6379';
expect(redactErrMsg(msg)).toBe(
'primary postgres://***@db:5432/mosaic cache redis://***@cache:6379',
);
});
});

View File

@@ -0,0 +1,39 @@
/**
* redact-error.ts — Internal credential-scrubbing helper.
*
* The `postgres` npm package can embed the full DSN (including the password)
* in connection-failure error messages. This module provides a single helper
* that strips the user:password portion from any such message before it is
* re-thrown, logged, or surfaced in a structured health report.
*
* This file is intentionally NOT re-exported from the package index — it is
* an internal utility for use within packages/storage/src only.
*/
/**
* Redacts credentials from error messages that may include connection URLs.
* The `postgres` npm package can embed the full DSN in connection-failure
* messages, and ioredis can embed `redis://` / `rediss://` URLs similarly.
* This helper strips the user:password portion before display.
*
* Handles `postgres://`, `postgresql://`, `redis://`, and `rediss://`
* schemes (case-insensitive). Everything between `://` and `@` (the userinfo
* component) is replaced with `***` so that the host, port, and database name
* remain visible for diagnostics while the secret is never written to logs or
* CI output.
*
* @example
* redactErrMsg('connect ECONNREFUSED postgres://admin:s3cr3t@db:5432/mosaic')
* // → 'connect ECONNREFUSED postgres://***@db:5432/mosaic'
*
* redactErrMsg('connect ECONNREFUSED redis://user:pass@cache:6379')
* // → 'connect ECONNREFUSED redis://***@cache:6379'
*/
const CREDENTIAL_URL_RE = /(postgres(?:ql)?|rediss?):\/\/[^@\s]*@/gi;
export function redactErrMsg(msg: string): string {
return msg.replace(
CREDENTIAL_URL_RE,
(_match, scheme: string) => `${scheme.toLowerCase()}://***@`,
);
}

View File

@@ -0,0 +1,52 @@
/**
* Test-only helpers for creating a PGlite database with the pgvector extension
* and running Drizzle migrations against it.
*
* These are intentionally NOT exported from @mosaicstack/db to avoid pulling
* the WASM vector bundle into the public API surface.
*/
import { createRequire } from 'node:module';
import { dirname, resolve } from 'node:path';
import { PGlite } from '@electric-sql/pglite';
import { vector } from '@electric-sql/pglite/vector';
import { drizzle } from 'drizzle-orm/pglite';
import { migrate as migratePglite } from 'drizzle-orm/pglite/migrator';
import type { PgliteDatabase } from 'drizzle-orm/pglite';
import * as schema from '@mosaicstack/db';
import type { DbHandle } from '@mosaicstack/db';
/**
* Create a PGlite DB handle with the pgvector extension loaded.
* Required for running Drizzle migrations that include `CREATE EXTENSION vector`.
*/
export function createPgliteDbWithVector(dataDir: string): DbHandle {
const client = new PGlite(dataDir, { extensions: { vector } });
const db = drizzle(client, { schema });
return {
db: db as unknown as DbHandle['db'],
close: async () => {
await client.close();
},
};
}
/**
* Run Drizzle migrations against an already-open PGlite database handle.
* Resolves the migrations folder from @mosaicstack/db's installed location.
*
* @param db A PgliteDatabase instance (from drizzle-orm/pglite).
*/
export async function runPgliteMigrations(
// eslint-disable-next-line @typescript-eslint/no-explicit-any
db: PgliteDatabase<any>,
): Promise<void> {
// Resolve @mosaicstack/db package root to locate its drizzle migrations folder.
const _require = createRequire(import.meta.url);
const dbPkgMain = _require.resolve('@mosaicstack/db');
// dbPkgMain → …/packages/db/dist/index.js → dirname = dist/
// go up one level from dist/ to find the sibling drizzle/ folder
const migrationsFolder = resolve(dirname(dbPkgMain), '../drizzle');
await migratePglite(db, { migrationsFolder });
}

View File

@@ -1,5 +1,5 @@
/** /**
* Unit tests for tier-detector.ts. * Unit tests for tier-detection.ts.
* *
* All external I/O (postgres, ioredis) is mocked no live services required. * All external I/O (postgres, ioredis) is mocked no live services required.
* *
@@ -59,28 +59,26 @@ vi.mock('ioredis', () => ({
/* Import SUT after mocks are registered */ /* Import SUT after mocks are registered */
/* ------------------------------------------------------------------ */ /* ------------------------------------------------------------------ */
import { detectAndAssertTier, TierDetectionError } from './tier-detector.js'; import { detectAndAssertTier, probeServiceHealth, TierDetectionError } from './tier-detection.js';
import type { MosaicConfig } from '@mosaicstack/config'; import type { TierConfig } from './tier-detection.js';
/* ------------------------------------------------------------------ */ /* ------------------------------------------------------------------ */
/* Config fixtures */ /* Config fixtures */
/* ------------------------------------------------------------------ */ /* ------------------------------------------------------------------ */
const LOCAL_CONFIG: MosaicConfig = { const LOCAL_CONFIG: TierConfig = {
tier: 'local', tier: 'local',
storage: { type: 'pglite', dataDir: '.mosaic/pglite' }, storage: { type: 'pglite', dataDir: '.mosaic/pglite' },
queue: { type: 'local', dataDir: '.mosaic/queue' }, queue: { type: 'local' },
memory: { type: 'keyword' },
}; };
const STANDALONE_CONFIG: MosaicConfig = { const STANDALONE_CONFIG: TierConfig = {
tier: 'standalone', tier: 'standalone',
storage: { type: 'postgres', url: 'postgresql://mosaic:mosaic@db-host:5432/mosaic' }, storage: { type: 'postgres', url: 'postgresql://mosaic:mosaic@db-host:5432/mosaic' },
queue: { type: 'bullmq', url: 'redis://valkey-host:6380' }, queue: { type: 'bullmq', url: 'redis://valkey-host:6380' },
memory: { type: 'keyword' },
}; };
const FEDERATED_CONFIG: MosaicConfig = { const FEDERATED_CONFIG: TierConfig = {
tier: 'federated', tier: 'federated',
storage: { storage: {
type: 'postgres', type: 'postgres',
@@ -88,7 +86,6 @@ const FEDERATED_CONFIG: MosaicConfig = {
enableVector: true, enableVector: true,
}, },
queue: { type: 'bullmq', url: 'redis://valkey-host:6380' }, queue: { type: 'bullmq', url: 'redis://valkey-host:6380' },
memory: { type: 'pgvector' },
}; };
/* ------------------------------------------------------------------ */ /* ------------------------------------------------------------------ */
@@ -232,15 +229,17 @@ describe('detectAndAssertTier', () => {
}); });
/* ---------------------------------------------------------------- */ /* ---------------------------------------------------------------- */
/* 7. probeValkey honors connectTimeout */ /* 7. probeValkey honors connectTimeout and lazyConnect */
/* ---------------------------------------------------------------- */ /* ---------------------------------------------------------------- */
it('constructs the ioredis Redis client with connectTimeout: 5000', async () => { it('constructs the ioredis Redis client with connectTimeout: 5000', async () => {
await detectAndAssertTier(STANDALONE_CONFIG); await detectAndAssertTier(STANDALONE_CONFIG);
expect(mocks.MockRedis).toHaveBeenCalledOnce(); expect(mocks.MockRedis).toHaveBeenCalledOnce();
const [, options] = mocks.MockRedis.mock.calls[0] as [string, Record<string, unknown>]; expect(mocks.MockRedis).toHaveBeenCalledWith(
expect(options).toMatchObject({ connectTimeout: 5000 }); expect.any(String),
expect.objectContaining({ connectTimeout: 5000, lazyConnect: true }),
);
}); });
/* ---------------------------------------------------------------- */ /* ---------------------------------------------------------------- */
@@ -269,7 +268,7 @@ describe('detectAndAssertTier', () => {
/* ---------------------------------------------------------------- */ /* ---------------------------------------------------------------- */
it('mentions CREATE permission or superuser in remediation for a generic pgvector error', async () => { it('mentions CREATE permission or superuser in remediation for a generic pgvector error', async () => {
// SELECT 1 succeeds; CREATE EXTENSION fails with a permission error (not the library-missing message). // SELECT 1 succeeds; CREATE EXTENSION fails with a permission error.
mocks.mockSqlFn mocks.mockSqlFn
.mockResolvedValueOnce([]) // SELECT 1 (probePostgres) .mockResolvedValueOnce([]) // SELECT 1 (probePostgres)
.mockRejectedValueOnce(new Error('permission denied to create extension')); .mockRejectedValueOnce(new Error('permission denied to create extension'));
@@ -293,15 +292,14 @@ describe('detectAndAssertTier', () => {
/* ---------------------------------------------------------------- */ /* ---------------------------------------------------------------- */
it('throws TierDetectionError with service=config for federated tier with queue.type !== bullmq', async () => { it('throws TierDetectionError with service=config for federated tier with queue.type !== bullmq', async () => {
const badConfig: MosaicConfig = { const badConfig: TierConfig = {
tier: 'federated', tier: 'federated',
storage: { storage: {
type: 'postgres', type: 'postgres',
url: 'postgresql://mosaic:mosaic@db-host:5433/mosaic', url: 'postgresql://mosaic:mosaic@db-host:5433/mosaic',
enableVector: true, enableVector: true,
}, },
queue: { type: 'local', dataDir: '.mosaic/queue' }, queue: { type: 'local' },
memory: { type: 'pgvector' },
}; };
try { try {
@@ -342,3 +340,207 @@ describe('detectAndAssertTier', () => {
expect(caught!.message).toContain('db-host:5432'); expect(caught!.message).toContain('db-host:5432');
}); });
}); });
/* ------------------------------------------------------------------ */
/* probeServiceHealth tests */
/* ------------------------------------------------------------------ */
describe('probeServiceHealth', () => {
beforeEach(() => {
vi.clearAllMocks();
mocks.mockSqlFn.mockResolvedValue([]);
mocks.mockEnd.mockResolvedValue(undefined);
mocks.mockRedisConnect.mockResolvedValue(undefined);
mocks.mockRedisPing.mockResolvedValue('PONG');
mocks.mockPostgresConstructor.mockImplementation(() => {
const sql = mocks.mockSqlFn as ReturnType<typeof mocks.mockSqlFn>;
(sql as unknown as Record<string, unknown>)['end'] = mocks.mockEnd;
return sql;
});
mocks.MockRedis.mockImplementation(() => ({
connect: mocks.mockRedisConnect,
ping: mocks.mockRedisPing,
disconnect: mocks.mockRedisDisconnect,
}));
});
/* ---------------------------------------------------------------- */
/* 12. local tier — all skipped, green */
/* ---------------------------------------------------------------- */
it('returns all services as skipped and overall green for local tier', async () => {
const report = await probeServiceHealth(LOCAL_CONFIG);
expect(report.tier).toBe('local');
expect(report.overall).toBe('green');
expect(report.services).toHaveLength(3);
for (const svc of report.services) {
expect(svc.status).toBe('skipped');
}
expect(mocks.mockPostgresConstructor).not.toHaveBeenCalled();
expect(mocks.MockRedis).not.toHaveBeenCalled();
});
/* ---------------------------------------------------------------- */
/* 13. postgres fails, valkey ok → red */
/* ---------------------------------------------------------------- */
it('returns red overall with postgres fail and valkey ok for standalone when postgres fails', async () => {
mocks.mockSqlFn.mockRejectedValue(new Error('connection refused'));
const report = await probeServiceHealth(STANDALONE_CONFIG);
expect(report.overall).toBe('red');
const pgCheck = report.services.find((s) => s.name === 'postgres');
expect(pgCheck?.status).toBe('fail');
expect(pgCheck?.error).toBeDefined();
expect(pgCheck?.error?.remediation).toContain('docker compose');
const vkCheck = report.services.find((s) => s.name === 'valkey');
expect(vkCheck?.status).toBe('ok');
});
/* ---------------------------------------------------------------- */
/* 14. federated all green → 3 services ok */
/* ---------------------------------------------------------------- */
it('returns green overall with all 3 services ok for federated when all pass', async () => {
mocks.mockSqlFn.mockResolvedValue([]);
const report = await probeServiceHealth(FEDERATED_CONFIG);
expect(report.tier).toBe('federated');
expect(report.overall).toBe('green');
expect(report.services).toHaveLength(3);
for (const svc of report.services) {
expect(svc.status).toBe('ok');
}
});
/* ---------------------------------------------------------------- */
/* 15. durationMs is a non-negative number for every service check */
/* ---------------------------------------------------------------- */
it('sets durationMs as a non-negative number for every service check', async () => {
mocks.mockSqlFn.mockResolvedValue([]);
const report = await probeServiceHealth(FEDERATED_CONFIG);
for (const svc of report.services) {
expect(typeof svc.durationMs).toBe('number');
expect(svc.durationMs).toBeGreaterThanOrEqual(0);
}
});
it('sets durationMs >= 0 even for skipped services (local tier)', async () => {
const report = await probeServiceHealth(LOCAL_CONFIG);
for (const svc of report.services) {
expect(typeof svc.durationMs).toBe('number');
expect(svc.durationMs).toBeGreaterThanOrEqual(0);
}
});
/* ---------------------------------------------------------------- */
/* 16. configPath is passed through to the report */
/* ---------------------------------------------------------------- */
it('includes configPath in the report when provided', async () => {
const report = await probeServiceHealth(LOCAL_CONFIG, '/etc/mosaic/mosaic.config.json');
expect(report.configPath).toBe('/etc/mosaic/mosaic.config.json');
});
/* ---------------------------------------------------------------- */
/* 17. standalone — valkey fails, postgres ok → red */
/* ---------------------------------------------------------------- */
it('returns red with valkey fail and postgres ok for standalone when valkey fails', async () => {
mocks.mockSqlFn.mockResolvedValue([]);
mocks.mockRedisConnect.mockRejectedValue(new Error('ECONNREFUSED'));
const report = await probeServiceHealth(STANDALONE_CONFIG);
expect(report.overall).toBe('red');
const pgCheck = report.services.find((s) => s.name === 'postgres');
expect(pgCheck?.status).toBe('ok');
const vkCheck = report.services.find((s) => s.name === 'valkey');
expect(vkCheck?.status).toBe('fail');
expect(vkCheck?.error).toBeDefined();
});
/* ---------------------------------------------------------------- */
/* 18. federated — pgvector fails → red with remediation */
/* ---------------------------------------------------------------- */
it('returns red with pgvector fail for federated when pgvector probe fails', async () => {
mocks.mockSqlFn
.mockResolvedValueOnce([]) // postgres SELECT 1
.mockRejectedValueOnce(new Error('extension "vector" is not available')); // pgvector
const report = await probeServiceHealth(FEDERATED_CONFIG);
expect(report.overall).toBe('red');
const pvCheck = report.services.find((s) => s.name === 'pgvector');
expect(pvCheck?.status).toBe('fail');
expect(pvCheck?.error?.remediation).toContain('pgvector/pgvector:pg17');
});
/* ---------------------------------------------------------------- */
/* 19. federated — non-bullmq queue → red config error, no network */
/* ---------------------------------------------------------------- */
it('returns red overall with config error when federated tier has non-bullmq queue (no network call)', async () => {
const federatedBadQueueConfig: TierConfig = {
tier: 'federated',
storage: {
type: 'postgres',
url: 'postgresql://mosaic:mosaic@db-host:5433/mosaic',
enableVector: true,
},
queue: { type: 'local' },
};
const report = await probeServiceHealth(federatedBadQueueConfig);
expect(report.overall).toBe('red');
const valkey = report.services.find((s) => s.name === 'valkey');
expect(valkey?.status).toBe('fail');
expect(valkey?.error?.remediation).toMatch(/bullmq/i);
// Critically: no network call was made — MockRedis constructor must NOT have been called.
expect(mocks.MockRedis).not.toHaveBeenCalled();
});
/* ---------------------------------------------------------------- */
/* 20. durationMs actually measures real elapsed time */
/* ---------------------------------------------------------------- */
it('measures real elapsed time for service probes', async () => {
const DELAY_MS = 25;
// Make the postgres mock introduce a real wall-clock delay.
mocks.mockSqlFn.mockImplementation(
() =>
new Promise((resolve) =>
setTimeout(() => {
resolve([]);
}, DELAY_MS),
),
);
const report = await probeServiceHealth(STANDALONE_CONFIG);
const pgCheck = report.services.find((s) => s.name === 'postgres');
expect(pgCheck).toBeDefined();
// Must be >= 20ms (small slack for jitter). Would be 0 if timer were stubbed.
expect(pgCheck!.durationMs).toBeGreaterThanOrEqual(20);
});
});

View File

@@ -0,0 +1,559 @@
/**
* Tier Detection — pre-flight service reachability probes.
*
* Lifted from apps/gateway/src/bootstrap/tier-detector.ts so both the gateway
* and the mosaic CLI can share the same probe logic without duplicating code or
* creating circular workspace dependencies.
*
* Library choices:
* - Postgres: `postgres` npm package (already a dep via @mosaicstack/db / drizzle-orm).
* - Valkey: `ioredis` (compatible with Valkey; same URL convention used by bullmq).
*/
import postgres from 'postgres';
import { Redis } from 'ioredis';
import { redactErrMsg } from './redact-error.js';
/* ------------------------------------------------------------------ */
/* Local structural type — avoids circular dependency */
/* ------------------------------------------------------------------ */
/**
* Minimal structural shape required for tier detection.
* Mirrors the relevant fields of MosaicConfig (from @mosaicstack/config) without
* creating a dependency cycle (config depends on storage for StorageConfig).
* Any object that satisfies MosaicConfig also satisfies this type.
*/
export interface TierConfig {
tier: 'local' | 'standalone' | 'federated';
storage:
| { type: 'pglite'; dataDir?: string }
| { type: 'postgres'; url: string; enableVector?: boolean }
| { type: 'files'; dataDir: string; format?: 'json' | 'md' };
queue: { type: string; url?: string };
}
/* ------------------------------------------------------------------ */
/* Public types */
/* ------------------------------------------------------------------ */
export interface ServiceCheck {
name: 'postgres' | 'valkey' | 'pgvector';
status: 'ok' | 'fail' | 'skipped';
host?: string;
port?: number;
durationMs: number;
error?: { message: string; remediation: string };
}
export interface TierHealthReport {
tier: 'local' | 'standalone' | 'federated';
configPath?: string;
overall: 'green' | 'yellow' | 'red';
services: ServiceCheck[];
}
/* ------------------------------------------------------------------ */
/* Structured error type */
/* ------------------------------------------------------------------ */
export class TierDetectionError extends Error {
public readonly service: 'postgres' | 'valkey' | 'pgvector' | 'config';
public readonly host: string;
public readonly port: number;
public readonly remediation: string;
constructor(opts: {
service: 'postgres' | 'valkey' | 'pgvector' | 'config';
host: string;
port: number;
remediation: string;
cause?: unknown;
}) {
const message =
`[tier-detector] ${opts.service} unreachable or unusable at ` +
`${opts.host}:${opts.port}${opts.remediation}`;
super(message, { cause: opts.cause });
this.name = 'TierDetectionError';
this.service = opts.service;
this.host = opts.host;
this.port = opts.port;
this.remediation = opts.remediation;
}
}
/* ------------------------------------------------------------------ */
/* URL helpers */
/* ------------------------------------------------------------------ */
/** Extract host and port from a URL string, returning safe fallbacks on parse failure. */
function parseHostPort(url: string, defaultPort: number): { host: string; port: number } {
try {
const parsed = new URL(url);
const host = parsed.hostname || 'unknown';
const port = parsed.port ? parseInt(parsed.port, 10) : defaultPort;
return { host, port };
} catch {
return { host: 'unknown', port: defaultPort };
}
}
/* ------------------------------------------------------------------ */
/* Internal probe results */
/* ------------------------------------------------------------------ */
interface ProbeResult {
host: string;
port: number;
durationMs: number;
error?: { message: string; remediation: string };
}
/* ------------------------------------------------------------------ */
/* Postgres probe */
/* ------------------------------------------------------------------ */
async function probePostgres(url: string): Promise<void> {
const { host, port } = parseHostPort(url, 5432);
let sql: ReturnType<typeof postgres> | undefined;
try {
sql = postgres(url, {
max: 1,
connect_timeout: 5,
idle_timeout: 5,
});
// Run a trivial query to confirm connectivity.
await sql`SELECT 1`;
} catch (cause) {
throw new TierDetectionError({
service: 'postgres',
host,
port,
remediation:
'Start Postgres: `docker compose -f docker-compose.federated.yml --profile federated up -d postgres-federated`',
cause,
});
} finally {
if (sql) {
await sql.end({ timeout: 2 }).catch(() => {
// Ignore cleanup errors — we already have what we need.
});
}
}
}
async function probePostgresMeasured(url: string): Promise<ProbeResult> {
const { host, port } = parseHostPort(url, 5432);
const start = Date.now();
let sql: ReturnType<typeof postgres> | undefined;
try {
sql = postgres(url, {
max: 1,
connect_timeout: 5,
idle_timeout: 5,
});
await sql`SELECT 1`;
return { host, port, durationMs: Date.now() - start };
} catch (cause) {
return {
host,
port,
durationMs: Date.now() - start,
error: {
message: redactErrMsg(cause instanceof Error ? cause.message : String(cause)),
remediation:
'Start Postgres: `docker compose -f docker-compose.federated.yml --profile federated up -d postgres-federated`',
},
};
} finally {
if (sql) {
await sql.end({ timeout: 2 }).catch(() => {});
}
}
}
/* ------------------------------------------------------------------ */
/* pgvector probe */
/* ------------------------------------------------------------------ */
async function probePgvector(url: string): Promise<void> {
const { host, port } = parseHostPort(url, 5432);
let sql: ReturnType<typeof postgres> | undefined;
try {
sql = postgres(url, {
max: 1,
connect_timeout: 5,
idle_timeout: 5,
});
// This succeeds whether the extension is already installed or freshly created.
// It errors only if the pgvector shared library is missing from the Postgres binary.
await sql`CREATE EXTENSION IF NOT EXISTS vector`;
} catch (cause) {
const causeMsg = cause instanceof Error ? cause.message.toLowerCase() : '';
const isLibraryMissing = causeMsg.includes('extension "vector" is not available');
const remediation = isLibraryMissing
? 'Use the `pgvector/pgvector:pg17` image, not the stock `postgres:17` image. See `docker-compose.federated.yml`.'
: 'The database role lacks permission to CREATE EXTENSION. Grant `CREATE` on the database, or run as a superuser.';
throw new TierDetectionError({
service: 'pgvector',
host,
port,
remediation,
cause,
});
} finally {
if (sql) {
await sql.end({ timeout: 2 }).catch(() => {
// Ignore cleanup errors.
});
}
}
}
async function probePgvectorMeasured(url: string): Promise<ProbeResult> {
const { host, port } = parseHostPort(url, 5432);
const start = Date.now();
let sql: ReturnType<typeof postgres> | undefined;
try {
sql = postgres(url, {
max: 1,
connect_timeout: 5,
idle_timeout: 5,
});
await sql`CREATE EXTENSION IF NOT EXISTS vector`;
return { host, port, durationMs: Date.now() - start };
} catch (cause) {
const causeMsg = cause instanceof Error ? cause.message.toLowerCase() : '';
const isLibraryMissing = causeMsg.includes('extension "vector" is not available');
const remediation = isLibraryMissing
? 'Use the `pgvector/pgvector:pg17` image, not the stock `postgres:17` image. See `docker-compose.federated.yml`.'
: 'The database role lacks permission to CREATE EXTENSION. Grant `CREATE` on the database, or run as a superuser.';
return {
host,
port,
durationMs: Date.now() - start,
error: {
message: redactErrMsg(cause instanceof Error ? cause.message : String(cause)),
remediation,
},
};
} finally {
if (sql) {
await sql.end({ timeout: 2 }).catch(() => {});
}
}
}
/* ------------------------------------------------------------------ */
/* Valkey probe */
/* ------------------------------------------------------------------ */
const DEFAULT_VALKEY_URL = 'redis://localhost:6380';
async function probeValkey(url: string): Promise<void> {
const { host, port } = parseHostPort(url, 6380);
const client = new Redis(url, {
enableReadyCheck: false,
maxRetriesPerRequest: 0,
retryStrategy: () => null, // no retries — fail fast
lazyConnect: true,
connectTimeout: 5000, // fail-fast: 5-second hard cap on connection attempt
});
try {
await client.connect();
const pong = await client.ping();
if (pong !== 'PONG') {
throw new Error(`Unexpected PING response: ${pong}`);
}
} catch (cause) {
throw new TierDetectionError({
service: 'valkey',
host,
port,
remediation:
'Start Valkey: `docker compose -f docker-compose.federated.yml --profile federated up -d valkey-federated`',
cause,
});
} finally {
client.disconnect();
}
}
async function probeValkeyMeasured(url: string): Promise<ProbeResult> {
const { host, port } = parseHostPort(url, 6380);
const start = Date.now();
const client = new Redis(url, {
enableReadyCheck: false,
maxRetriesPerRequest: 0,
retryStrategy: () => null,
lazyConnect: true,
connectTimeout: 5000,
});
try {
await client.connect();
const pong = await client.ping();
if (pong !== 'PONG') {
throw new Error(`Unexpected PING response: ${pong}`);
}
return { host, port, durationMs: Date.now() - start };
} catch (cause) {
return {
host,
port,
durationMs: Date.now() - start,
error: {
message: redactErrMsg(cause instanceof Error ? cause.message : String(cause)),
remediation:
'Start Valkey: `docker compose -f docker-compose.federated.yml --profile federated up -d valkey-federated`',
},
};
} finally {
client.disconnect();
}
}
/* ------------------------------------------------------------------ */
/* Public entry points */
/* ------------------------------------------------------------------ */
/**
* Assert that all services required by `config.tier` are reachable.
*
* - `local` — no-op (PGlite is in-process; no external services).
* - `standalone` — assert Postgres + Valkey (if queue.type === 'bullmq').
* - `federated` — assert Postgres + Valkey + pgvector installability.
*
* Throws `TierDetectionError` on the first failure with host:port and
* a remediation hint.
*/
export async function detectAndAssertTier(config: TierConfig): Promise<void> {
if (config.tier === 'local') {
// PGlite runs in-process — nothing to probe.
return;
}
const pgUrl =
config.storage.type === 'postgres' ? config.storage.url : 'postgresql://localhost:5432/mosaic';
const valkeyUrl =
config.queue.type === 'bullmq' ? (config.queue.url ?? DEFAULT_VALKEY_URL) : null;
if (config.tier === 'standalone') {
await probePostgres(pgUrl);
if (valkeyUrl) {
await probeValkey(valkeyUrl);
}
return;
}
// tier === 'federated'
// Reject misconfigured queue upfront — federated requires bullmq + a Valkey URL.
if (config.queue.type !== 'bullmq') {
throw new TierDetectionError({
service: 'config',
host: 'localhost',
port: 0,
remediation:
"Federated tier requires queue.type === 'bullmq'. " +
"Set queue: { type: 'bullmq', url: 'redis://...' } in your mosaic.config.json.",
});
}
const federatedValkeyUrl = config.queue.url ?? DEFAULT_VALKEY_URL;
await probePostgres(pgUrl);
await probeValkey(federatedValkeyUrl);
await probePgvector(pgUrl);
}
/**
* Non-throwing variant for `mosaic gateway doctor`.
*
* Probes ALL required services even if some fail, returning a structured report.
* Services not required for the current tier are reported as `skipped`.
*
* Overall status:
* - `green` — all required services OK
* - `yellow` — all required services OK, but a non-critical check failed
* (currently unused — reserved for future optional probes)
* - `red` — at least one required service failed
*/
export async function probeServiceHealth(
config: TierConfig,
configPath?: string,
): Promise<TierHealthReport> {
const tier = config.tier;
// local tier: PGlite is in-process, no external services needed.
if (tier === 'local') {
return {
tier,
configPath,
overall: 'green',
services: [
{ name: 'postgres', status: 'skipped', durationMs: 0 },
{ name: 'valkey', status: 'skipped', durationMs: 0 },
{ name: 'pgvector', status: 'skipped', durationMs: 0 },
],
};
}
const pgUrl =
config.storage.type === 'postgres' ? config.storage.url : 'postgresql://localhost:5432/mosaic';
const valkeyUrl =
config.queue.type === 'bullmq' ? (config.queue.url ?? DEFAULT_VALKEY_URL) : null;
const services: ServiceCheck[] = [];
let hasFailure = false;
if (tier === 'standalone') {
// Postgres — required
const pgResult = await probePostgresMeasured(pgUrl);
if (pgResult.error) {
hasFailure = true;
services.push({
name: 'postgres',
status: 'fail',
host: pgResult.host,
port: pgResult.port,
durationMs: pgResult.durationMs,
error: pgResult.error,
});
} else {
services.push({
name: 'postgres',
status: 'ok',
host: pgResult.host,
port: pgResult.port,
durationMs: pgResult.durationMs,
});
}
// Valkey — required if bullmq
if (valkeyUrl) {
const vkResult = await probeValkeyMeasured(valkeyUrl);
if (vkResult.error) {
hasFailure = true;
services.push({
name: 'valkey',
status: 'fail',
host: vkResult.host,
port: vkResult.port,
durationMs: vkResult.durationMs,
error: vkResult.error,
});
} else {
services.push({
name: 'valkey',
status: 'ok',
host: vkResult.host,
port: vkResult.port,
durationMs: vkResult.durationMs,
});
}
} else {
services.push({ name: 'valkey', status: 'skipped', durationMs: 0 });
}
// pgvector — not required for standalone
services.push({ name: 'pgvector', status: 'skipped', durationMs: 0 });
return {
tier,
configPath,
overall: hasFailure ? 'red' : 'green',
services,
};
}
// tier === 'federated'
// Postgres — required
const pgResult = await probePostgresMeasured(pgUrl);
if (pgResult.error) {
hasFailure = true;
services.push({
name: 'postgres',
status: 'fail',
host: pgResult.host,
port: pgResult.port,
durationMs: pgResult.durationMs,
error: pgResult.error,
});
} else {
services.push({
name: 'postgres',
status: 'ok',
host: pgResult.host,
port: pgResult.port,
durationMs: pgResult.durationMs,
});
}
// Valkey — required for federated (queue.type must be bullmq)
if (config.queue.type !== 'bullmq') {
hasFailure = true;
services.push({
name: 'valkey',
status: 'fail',
host: 'localhost',
port: 0,
durationMs: 0,
error: {
message: "Federated tier requires queue.type === 'bullmq'",
remediation:
"Set queue: { type: 'bullmq', url: 'redis://...' } in your mosaic.config.json.",
},
});
} else {
const federatedValkeyUrl = config.queue.url ?? DEFAULT_VALKEY_URL;
const vkResult = await probeValkeyMeasured(federatedValkeyUrl);
if (vkResult.error) {
hasFailure = true;
services.push({
name: 'valkey',
status: 'fail',
host: vkResult.host,
port: vkResult.port,
durationMs: vkResult.durationMs,
error: vkResult.error,
});
} else {
services.push({
name: 'valkey',
status: 'ok',
host: vkResult.host,
port: vkResult.port,
durationMs: vkResult.durationMs,
});
}
}
// pgvector — required for federated
const pvResult = await probePgvectorMeasured(pgUrl);
if (pvResult.error) {
hasFailure = true;
services.push({
name: 'pgvector',
status: 'fail',
host: pvResult.host,
port: pvResult.port,
durationMs: pvResult.durationMs,
error: pvResult.error,
});
} else {
services.push({
name: 'pgvector',
status: 'ok',
host: pvResult.host,
port: pvResult.port,
durationMs: pvResult.durationMs,
});
}
return {
tier,
configPath,
overall: hasFailure ? 'red' : 'green',
services,
};
}

6
pnpm-lock.yaml generated
View File

@@ -648,10 +648,16 @@ importers:
commander: commander:
specifier: ^13.0.0 specifier: ^13.0.0
version: 13.1.0 version: 13.1.0
ioredis:
specifier: ^5.10.0
version: 5.10.0
postgres: postgres:
specifier: ^3.4.8 specifier: ^3.4.8
version: 3.4.8 version: 3.4.8
devDependencies: devDependencies:
drizzle-orm:
specifier: ^0.45.1
version: 0.45.1(@electric-sql/pglite@0.2.17)(@opentelemetry/api@1.9.0)(@types/better-sqlite3@7.6.13)(@types/pg@8.15.6)(better-sqlite3@12.8.0)(kysely@0.28.11)(postgres@3.4.8)
typescript: typescript:
specifier: ^5.8.0 specifier: ^5.8.0
version: 5.9.3 version: 5.9.3