Compare commits

...

8 Commits

Author SHA1 Message Date
54c422ab06 Merge pull request 'docs(federation): close FED-M1 milestone' (#481) from feat/federation-m1-close into main
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/push/publish Pipeline was successful
ci/woodpecker/tag/publish Pipeline was successful
2026-04-20 02:20:43 +00:00
Jarvis
b9fb8aab57 docs(federation): close FED-M1 milestone
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/pr/ci Pipeline was successful
- TASKS.md: mark FED-M1-12 done with PR/issue/tag references
- MISSION-MANIFEST.md: phase=M1 complete, progress 1/7, M1 row done with PR range #470-#481, session log appended
- scratchpad: Session 19 entry covering M1-09 → M1-12 with PR ledger and M1 retrospective learnings

Refs #460
2026-04-19 21:12:52 -05:00
78841f228a docs(federation): operator setup + migration guides (FED-M1-11) (#480)
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/push/publish Pipeline was successful
2026-04-20 02:07:15 +00:00
dc4afee848 fix(storage): redact credentials in driver errors + advisory lock (FED-M1-10) (#479)
Some checks failed
ci/woodpecker/push/ci Pipeline failed
ci/woodpecker/push/publish Pipeline failed
2026-04-20 02:02:57 +00:00
1e2b8ac8de test(federation): standalone regression canary — no breakage from M1 (FED-M1-09) (#478)
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/push/publish Pipeline was successful
2026-04-20 01:46:35 +00:00
15d849c166 test(storage): integration test for migrate-tier (FED-M1-08) + camelCase column fix (#477)
Some checks failed
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/push/publish Pipeline failed
2026-04-20 01:40:02 +00:00
78251d4af8 test(federation): integration tests for federated tier gateway boot (FED-M1-07) (#476)
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/push/publish Pipeline was successful
2026-04-20 01:13:10 +00:00
1a4b1ebbf1 feat(gateway,storage): mosaic gateway doctor with tier health JSON (FED-M1-06) (#475)
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/push/publish Pipeline was successful
2026-04-20 01:00:39 +00:00
25 changed files with 2477 additions and 325 deletions

View File

@@ -80,6 +80,8 @@ If you already have a gateway account but no token, use `mosaic gateway config r
### Configuration
Mosaic supports three storage tiers: `local` (PGlite, single-host), `standalone` (PostgreSQL, single-host), and `federated` (PostgreSQL + pgvector + Valkey, multi-host). See [Federated Tier Setup](docs/federation/SETUP.md) for multi-user and production deployments, or [Migrating to Federated](docs/guides/migrate-tier.md) to upgrade from existing tiers.
```bash
mosaic config show # Print full config as JSON
mosaic config get <key> # Read a specific key

View File

@@ -0,0 +1,64 @@
/**
* Test B — Gateway boot refuses (fail-fast) when PG is unreachable.
*
* Prereq: docker compose -f docker-compose.federated.yml --profile federated up -d
* (Valkey must be running; only PG is intentionally misconfigured.)
* Run: FEDERATED_INTEGRATION=1 pnpm --filter @mosaicstack/gateway test src/__tests__/integration/federated-boot.pg-unreachable.integration.test.ts
*
* Skipped when FEDERATED_INTEGRATION !== '1'.
*/
import net from 'node:net';
import { beforeAll, describe, expect, it } from 'vitest';
import { TierDetectionError, detectAndAssertTier } from '@mosaicstack/storage';
const run = process.env['FEDERATED_INTEGRATION'] === '1';
const VALKEY_URL = 'redis://localhost:6380';
/**
* Reserves a guaranteed-closed port at runtime by binding to an ephemeral OS
* port (port 0) and immediately releasing it. The OS will not reassign the
* port during the TIME_WAIT window, so it remains closed for the duration of
* this test.
*/
async function reserveClosedPort(): Promise<number> {
return new Promise((resolve, reject) => {
const server = net.createServer();
server.listen(0, '127.0.0.1', () => {
const addr = server.address();
if (typeof addr !== 'object' || !addr) return reject(new Error('no addr'));
const port = addr.port;
server.close(() => resolve(port));
});
server.on('error', reject);
});
}
describe.skipIf(!run)('federated boot — PG unreachable', () => {
let badPgUrl: string;
beforeAll(async () => {
const closedPort = await reserveClosedPort();
badPgUrl = `postgresql://mosaic:mosaic@localhost:${closedPort}/mosaic`;
});
it('detectAndAssertTier throws TierDetectionError with service: postgres when PG is down', async () => {
const brokenConfig = {
tier: 'federated' as const,
storage: {
type: 'postgres' as const,
url: badPgUrl,
enableVector: true,
},
queue: {
type: 'bullmq',
url: VALKEY_URL,
},
};
await expect(detectAndAssertTier(brokenConfig)).rejects.toSatisfy(
(err: unknown) => err instanceof TierDetectionError && err.service === 'postgres',
);
}, 10_000);
});

View File

@@ -0,0 +1,50 @@
/**
* Test A — Gateway boot succeeds when federated services are up.
*
* Prereq: docker compose -f docker-compose.federated.yml --profile federated up -d
* Run: FEDERATED_INTEGRATION=1 pnpm --filter @mosaicstack/gateway test src/__tests__/integration/federated-boot.success.integration.test.ts
*
* Skipped when FEDERATED_INTEGRATION !== '1'.
*/
import postgres from 'postgres';
import { afterAll, describe, expect, it } from 'vitest';
import { detectAndAssertTier } from '@mosaicstack/storage';
const run = process.env['FEDERATED_INTEGRATION'] === '1';
const PG_URL = 'postgresql://mosaic:mosaic@localhost:5433/mosaic';
const VALKEY_URL = 'redis://localhost:6380';
const federatedConfig = {
tier: 'federated' as const,
storage: {
type: 'postgres' as const,
url: PG_URL,
enableVector: true,
},
queue: {
type: 'bullmq',
url: VALKEY_URL,
},
};
describe.skipIf(!run)('federated boot — success path', () => {
let sql: ReturnType<typeof postgres> | undefined;
afterAll(async () => {
if (sql) {
await sql.end({ timeout: 2 }).catch(() => {});
}
});
it('detectAndAssertTier resolves without throwing when federated services are up', async () => {
await expect(detectAndAssertTier(federatedConfig)).resolves.toBeUndefined();
}, 10_000);
it('pgvector extension is registered (pg_extension row exists)', async () => {
sql = postgres(PG_URL, { max: 1, connect_timeout: 5, idle_timeout: 5 });
const rows = await sql`SELECT * FROM pg_extension WHERE extname = 'vector'`;
expect(rows).toHaveLength(1);
}, 10_000);
});

View File

@@ -0,0 +1,43 @@
/**
* Test C — pgvector extension is functional end-to-end.
*
* Creates a temp table with a vector(3) column, inserts a row, and queries it
* back — confirming the extension is not just registered but operational.
*
* Prereq: docker compose -f docker-compose.federated.yml --profile federated up -d
* Run: FEDERATED_INTEGRATION=1 pnpm --filter @mosaicstack/gateway test src/__tests__/integration/federated-pgvector.integration.test.ts
*
* Skipped when FEDERATED_INTEGRATION !== '1'.
*/
import postgres from 'postgres';
import { afterAll, describe, expect, it } from 'vitest';
const run = process.env['FEDERATED_INTEGRATION'] === '1';
const PG_URL = 'postgresql://mosaic:mosaic@localhost:5433/mosaic';
let sql: ReturnType<typeof postgres> | undefined;
afterAll(async () => {
if (sql) {
await sql.end({ timeout: 2 }).catch(() => {});
}
});
describe.skipIf(!run)('federated pgvector — functional end-to-end', () => {
it('vector ops round-trip: INSERT [1,2,3] and SELECT returns [1,2,3]', async () => {
sql = postgres(PG_URL, { max: 1, connect_timeout: 5, idle_timeout: 5 });
await sql`CREATE TEMP TABLE t (id int, embedding vector(3))`;
await sql`INSERT INTO t VALUES (1, '[1,2,3]')`;
const rows = await sql`SELECT embedding FROM t`;
expect(rows).toHaveLength(1);
// The postgres driver returns vector columns as strings like '[1,2,3]'.
// Normalise by parsing the string representation.
const raw = rows[0]?.['embedding'] as string;
const parsed = JSON.parse(raw) as number[];
expect(parsed).toEqual([1, 2, 3]);
}, 10_000);
});

View File

@@ -1,220 +0,0 @@
/**
* Tier Detector — pre-flight service reachability assertions.
*
* Runs BEFORE NestFactory.create() to surface actionable errors immediately
* rather than crashing mid-boot with an opaque stack trace.
*
* Library choices:
* - Postgres: `postgres` npm package (already a dep via @mosaicstack/db / drizzle-orm).
* The spec mentions `pg`, but only `postgres` is installed in this monorepo.
* - Valkey: `ioredis` (already a dep via @mosaicstack/queue → bullmq; same URL
* convention used by the bullmq adapter).
*/
import postgres from 'postgres';
import { Redis } from 'ioredis';
import type { MosaicConfig } from '@mosaicstack/config';
/* ------------------------------------------------------------------ */
/* Structured error type */
/* ------------------------------------------------------------------ */
export class TierDetectionError extends Error {
public readonly service: 'postgres' | 'valkey' | 'pgvector' | 'config';
public readonly host: string;
public readonly port: number;
public readonly remediation: string;
constructor(opts: {
service: 'postgres' | 'valkey' | 'pgvector' | 'config';
host: string;
port: number;
remediation: string;
cause?: unknown;
}) {
const message =
`[tier-detector] ${opts.service} unreachable or unusable at ` +
`${opts.host}:${opts.port}${opts.remediation}`;
super(message, { cause: opts.cause });
this.name = 'TierDetectionError';
this.service = opts.service;
this.host = opts.host;
this.port = opts.port;
this.remediation = opts.remediation;
}
}
/* ------------------------------------------------------------------ */
/* URL helpers */
/* ------------------------------------------------------------------ */
/** Extract host and port from a URL string, returning safe fallbacks on parse failure. */
function parseHostPort(url: string, defaultPort: number): { host: string; port: number } {
try {
const parsed = new URL(url);
const host = parsed.hostname || 'unknown';
const port = parsed.port ? parseInt(parsed.port, 10) : defaultPort;
return { host, port };
} catch {
return { host: 'unknown', port: defaultPort };
}
}
/* ------------------------------------------------------------------ */
/* Postgres probe */
/* ------------------------------------------------------------------ */
async function probePostgres(url: string): Promise<void> {
const { host, port } = parseHostPort(url, 5432);
let sql: ReturnType<typeof postgres> | undefined;
try {
sql = postgres(url, {
max: 1,
connect_timeout: 5,
idle_timeout: 5,
});
// Run a trivial query to confirm connectivity.
await sql`SELECT 1`;
} catch (cause) {
throw new TierDetectionError({
service: 'postgres',
host,
port,
remediation:
'Start Postgres: `docker compose -f docker-compose.federated.yml --profile federated up -d postgres-federated`',
cause,
});
} finally {
if (sql) {
await sql.end({ timeout: 2 }).catch(() => {
// Ignore cleanup errors — we already have what we need.
});
}
}
}
/* ------------------------------------------------------------------ */
/* pgvector probe */
/* ------------------------------------------------------------------ */
async function probePgvector(url: string): Promise<void> {
const { host, port } = parseHostPort(url, 5432);
let sql: ReturnType<typeof postgres> | undefined;
try {
sql = postgres(url, {
max: 1,
connect_timeout: 5,
idle_timeout: 5,
});
// This succeeds whether the extension is already installed or freshly created.
// It errors only if the pgvector shared library is missing from the Postgres binary.
await sql`CREATE EXTENSION IF NOT EXISTS vector`;
} catch (cause) {
const causeMsg = cause instanceof Error ? cause.message.toLowerCase() : '';
const isLibraryMissing = causeMsg.includes('extension "vector" is not available');
const remediation = isLibraryMissing
? 'Use the `pgvector/pgvector:pg17` image, not the stock `postgres:17` image. See `docker-compose.federated.yml`.'
: 'The database role lacks permission to CREATE EXTENSION. Grant `CREATE` on the database, or run as a superuser.';
throw new TierDetectionError({
service: 'pgvector',
host,
port,
remediation,
cause,
});
} finally {
if (sql) {
await sql.end({ timeout: 2 }).catch(() => {
// Ignore cleanup errors.
});
}
}
}
/* ------------------------------------------------------------------ */
/* Valkey probe */
/* ------------------------------------------------------------------ */
const DEFAULT_VALKEY_URL = 'redis://localhost:6380';
async function probeValkey(url: string): Promise<void> {
const { host, port } = parseHostPort(url, 6380);
const client = new Redis(url, {
enableReadyCheck: false,
maxRetriesPerRequest: 0,
retryStrategy: () => null, // no retries — fail fast
lazyConnect: true,
connectTimeout: 5000, // fail-fast: 5-second hard cap on connection attempt
});
try {
await client.connect();
const pong = await client.ping();
if (pong !== 'PONG') {
throw new Error(`Unexpected PING response: ${pong}`);
}
} catch (cause) {
throw new TierDetectionError({
service: 'valkey',
host,
port,
remediation:
'Start Valkey: `docker compose -f docker-compose.federated.yml --profile federated up -d valkey-federated`',
cause,
});
} finally {
client.disconnect();
}
}
/* ------------------------------------------------------------------ */
/* Public entry point */
/* ------------------------------------------------------------------ */
/**
* Assert that all services required by `config.tier` are reachable.
*
* - `local` — no-op (PGlite is in-process; no external services).
* - `standalone` — assert Postgres + Valkey (if queue.type === 'bullmq').
* - `federated` — assert Postgres + Valkey + pgvector installability.
*
* Throws `TierDetectionError` on the first failure with host:port and
* a remediation hint.
*/
export async function detectAndAssertTier(config: MosaicConfig): Promise<void> {
if (config.tier === 'local') {
// PGlite runs in-process — nothing to probe.
return;
}
const pgUrl =
config.storage.type === 'postgres' ? config.storage.url : 'postgresql://localhost:5432/mosaic';
const valkeyUrl =
config.queue.type === 'bullmq' ? (config.queue.url ?? DEFAULT_VALKEY_URL) : null;
if (config.tier === 'standalone') {
await probePostgres(pgUrl);
if (valkeyUrl) {
await probeValkey(valkeyUrl);
}
return;
}
// tier === 'federated'
// Reject misconfigured queue upfront — federated requires bullmq + a Valkey URL.
if (config.queue.type !== 'bullmq') {
throw new TierDetectionError({
service: 'config',
host: 'localhost',
port: 0,
remediation:
"Federated tier requires queue.type === 'bullmq'. " +
"Set queue: { type: 'bullmq', url: 'redis://...' } in your mosaic.config.json.",
});
}
const federatedValkeyUrl = config.queue.url ?? DEFAULT_VALKEY_URL;
await probePostgres(pgUrl);
await probeValkey(federatedValkeyUrl);
await probePgvector(pgUrl);
}

View File

@@ -25,7 +25,7 @@ import { AppModule } from './app.module.js';
import { mountAuthHandler } from './auth/auth.controller.js';
import { mountMcpHandler } from './mcp/mcp.controller.js';
import { McpService } from './mcp/mcp.service.js';
import { detectAndAssertTier, TierDetectionError } from './bootstrap/tier-detector.js';
import { detectAndAssertTier, TierDetectionError } from '@mosaicstack/storage';
async function bootstrap(): Promise<void> {
const logger = new Logger('Bootstrap');

View File

@@ -7,11 +7,11 @@
**ID:** federation-v1-20260419
**Statement:** Jarvis operates across 34 workstations in two physical locations (home, USC). The user currently reaches back to a single jarvis-brain checkout from every session; a prior OpenBrain attempt caused cache, latency, and opacity pain. This mission builds asymmetric federation between Mosaic Stack gateways so that a session on a user's home gateway can query their work gateway in real time without data ever persisting across the boundary, with full multi-tenant isolation and standard-PKI (X.509 / Step-CA) trust management.
**Phase:** Planning complete — M1 implementation not started
**Current Milestone:** FED-M1
**Progress:** 0 / 7 milestones
**Phase:** M1 complete — federated tier infrastructure ready for testing
**Current Milestone:** FED-M2 (next; deferred to mission planning)
**Progress:** 1 / 7 milestones
**Status:** active
**Last Updated:** 2026-04-19 (PRD + MILESTONES + tracking issues filed)
**Last Updated:** 2026-04-19 (M1 complete; tag `fed-v0.1.0-m1`)
**Parent Mission:** None — new mission
## Context
@@ -51,15 +51,15 @@ Key design references:
## Milestones
| # | ID | Name | Status | Branch | Issue | Started | Completed |
| --- | ------ | --------------------------------------------- | ----------- | ------ | ----- | ------- | --------- |
| 1 | FED-M1 | Federated tier infrastructure | not-started | — | #460 | — | — |
| 2 | FED-M2 | Step-CA + grant schema + admin CLI | not-started | — | #461 | — | — |
| 3 | FED-M3 | mTLS handshake + list/get + scope enforcement | not-started | — | #462 | — | — |
| 4 | FED-M4 | search verb + audit log + rate limit | not-started | — | #463 | — | — |
| 5 | FED-M5 | Cache + offline degradation + OTEL | not-started | — | #464 | — | — |
| 6 | FED-M6 | Revocation + auto-renewal + CRL | not-started | — | #465 | — | — |
| 7 | FED-M7 | Multi-user RBAC hardening + acceptance suite | not-started | — | #466 | — | — |
| # | ID | Name | Status | Branch | Issue | Started | Completed |
| --- | ------ | --------------------------------------------- | ----------- | ------------------ | ----- | ---------- | ---------- |
| 1 | FED-M1 | Federated tier infrastructure | done | (12 PRs #470-#481) | #460 | 2026-04-19 | 2026-04-19 |
| 2 | FED-M2 | Step-CA + grant schema + admin CLI | not-started | — | #461 | — | — |
| 3 | FED-M3 | mTLS handshake + list/get + scope enforcement | not-started | — | #462 | — | — |
| 4 | FED-M4 | search verb + audit log + rate limit | not-started | — | #463 | — | — |
| 5 | FED-M5 | Cache + offline degradation + OTEL | not-started | — | #464 | — | — |
| 6 | FED-M6 | Revocation + auto-renewal + CRL | not-started | — | #465 | — | — |
| 7 | FED-M7 | Multi-user RBAC hardening + acceptance suite | not-started | — | #466 | — | — |
## Budget
@@ -76,10 +76,13 @@ Key design references:
## Session History
| Session | Date | Runtime | Outcome |
| ------- | ---------- | ------- | --------------------------------------------------- |
| S1 | 2026-04-19 | claude | PRD authored, MILESTONES decomposed, 7 issues filed |
| Session | Date | Runtime | Outcome |
| ------- | ---------- | ------- | --------------------------------------------------------------------- |
| S1 | 2026-04-19 | claude | PRD authored, MILESTONES decomposed, 7 issues filed |
| S2-S4 | 2026-04-19 | claude | FED-M1 complete: 12 tasks (PRs #470-#481) merged; tag `fed-v0.1.0-m1` |
## Next Step
Begin FED-M1 implementation: federated tier infrastructure. Breakdown in `docs/federation/TASKS.md`.
FED-M1 complete (12 PRs #470-#481, tag `fed-v0.1.0-m1`). Federated tier infrastructure is testable end-to-end: see `docs/federation/SETUP.md` and `docs/guides/migrate-tier.md`.
Begin FED-M2 (Step-CA + grant schema + admin CLI) when planning is greenlit. Issue #461 tracks scope; orchestrator decomposes M2 into per-task rows in `docs/federation/TASKS.md` at the start of M2.

119
docs/federation/SETUP.md Normal file
View File

@@ -0,0 +1,119 @@
# Federated Tier Setup Guide
## What is the federated tier?
The federated tier is designed for multi-user and multi-host deployments. It consists of PostgreSQL 17 with pgvector extension (for embeddings and RAG), Valkey for distributed task queueing and caching, and a shared configuration across multiple Mosaic gateway instances. Use this tier when running Mosaic in production or when scaling beyond a single-host deployment.
## Prerequisites
- Docker and Docker Compose installed
- Ports 5433 (PostgreSQL) and 6380 (Valkey) available on your host (or adjust environment variables)
- At least 2 GB free disk space for data volumes
## Start the federated stack
Run the federated overlay:
```bash
docker compose -f docker-compose.federated.yml --profile federated up -d
```
This starts PostgreSQL 17 with pgvector and Valkey 8. The pgvector extension is created automatically on first boot.
Verify the services are running:
```bash
docker compose -f docker-compose.federated.yml ps
```
Expected output shows `postgres-federated` and `valkey-federated` both healthy.
## Configure mosaic for federated tier
Create or update your `mosaic.config.json`:
```json
{
"tier": "federated",
"database": "postgresql://mosaic:mosaic@localhost:5433/mosaic",
"queue": "redis://localhost:6380"
}
```
If you're using environment variables instead:
```bash
export DATABASE_URL="postgresql://mosaic:mosaic@localhost:5433/mosaic"
export REDIS_URL="redis://localhost:6380"
```
## Verify health
Run the health check:
```bash
mosaic gateway doctor
```
Expected output (green):
```
Tier: federated Config: mosaic.config.json
✓ postgres localhost:5433 (42ms)
✓ valkey localhost:6380 (8ms)
✓ pgvector (embedded) (15ms)
```
For JSON output (useful in CI/automation):
```bash
mosaic gateway doctor --json
```
## Troubleshooting
### Port conflicts
**Symptom:** `bind: address already in use`
**Fix:** Stop the base dev stack first:
```bash
docker compose down
docker compose -f docker-compose.federated.yml --profile federated up -d
```
Or change the host port with an environment variable:
```bash
PG_FEDERATED_HOST_PORT=5434 VALKEY_FEDERATED_HOST_PORT=6381 \
docker compose -f docker-compose.federated.yml --profile federated up -d
```
### pgvector extension error
**Symptom:** `ERROR: could not open extension control file`
**Fix:** pgvector is created at first boot. Check logs:
```bash
docker compose -f docker-compose.federated.yml logs postgres-federated | grep -i vector
```
If missing, exec into the container and create it manually:
```bash
docker exec <postgres-federated-id> psql -U mosaic -d mosaic -c "CREATE EXTENSION vector;"
```
### Valkey connection refused
**Symptom:** `Error: connect ECONNREFUSED 127.0.0.1:6380`
**Fix:** Check service health:
```bash
docker compose -f docker-compose.federated.yml logs valkey-federated
```
If Valkey is running, verify your firewall allows 6380. On macOS, Docker Desktop may require binding to `host.docker.internal` instead of `localhost`.

View File

@@ -15,20 +15,20 @@
Goal: Gateway runs in `federated` tier with containerized PG+pgvector+Valkey. No federation logic yet. Existing standalone behavior does not regress.
| id | status | description | issue | agent | branch | depends_on | estimate | notes |
| --------- | ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----- | ------ | ------------------------------- | ---------- | -------- | ---------------------------------------------------------------------------------------------------------------------------------- |
| FED-M1-01 | done | Extend `mosaic.config.json` schema: add `"federated"` to `tier` enum in validator + TS types. Keep `local` and `standalone` working. Update schema docs/README where referenced. | #460 | sonnet | feat/federation-m1-tier-config | — | 4K | Shipped in PR #470. Renamed `team``standalone`; added `team` deprecation alias; added `DEFAULT_FEDERATED_CONFIG`. |
| FED-M1-02 | done | Author `docker-compose.federated.yml` as an overlay profile: Postgres 17 + pgvector extension (port 5433), Valkey (6380), named volumes, healthchecks. Compose-up should boot cleanly on a clean machine. | #460 | sonnet | feat/federation-m1-compose | FED-M1-01 | 5K | Shipped in PR #471. Overlay defines `postgres-federated`/`valkey-federated`, profile-gated, with pg-init for pgvector extension. |
| FED-M1-03 | done | Add pgvector support to `packages/storage/src/adapters/postgres.ts`: create extension on init (idempotent), expose vector column type in schema helpers. No adapter changes for non-federated tiers. | #460 | sonnet | feat/federation-m1-pgvector | FED-M1-02 | 8K | Shipped in PR #472. `enableVector` flag on postgres StorageConfig; idempotent CREATE EXTENSION before migrations. |
| FED-M1-04 | done | Implement `apps/gateway/src/bootstrap/tier-detector.ts`: reads config, asserts PG/Valkey/pgvector reachable for `federated`, fail-fast with actionable error message on failure. Unit tests for each failure mode. | #460 | sonnet | feat/federation-m1-detector | FED-M1-03 | 8K | Shipped in PR #473. 12 tests; 5s timeouts on probes; pgvector library/permission discrimination; rejects non-bullmq for federated. |
| FED-M1-05 | in-progress | Write `scripts/migrate-to-federated.ts`: one-way migration from `local` (PGlite) / `standalone` (PG without pgvector) → `federated`. Dumps, transforms, loads; dry-run + confirm UX. Idempotent on re-run. | #460 | sonnet | feat/federation-m1-migrate | FED-M1-04 | 10K | Do NOT run automatically. CLI subcommand `mosaic migrate tier --to federated --dry-run`. Safety rails. |
| FED-M1-06 | not-started | Update `mosaic doctor`: report current tier, required services, actual health per service, pgvector presence, overall green/yellow/red. Machine-readable JSON output flag for CI use. | #460 | sonnet | feat/federation-m1-doctor | FED-M1-04 | 6K | Existing doctor output evolves; add `--json` flag. Green/yellow/red + remediation suggestions per issue. |
| FED-M1-07 | not-started | Integration test: gateway boots in `federated` tier with docker-compose `federated` profile; refuses to boot when PG unreachable (asserts fail-fast); pgvector extension query succeeds. | #460 | sonnet | feat/federation-m1-integration | FED-M1-04 | 8K | Vitest + docker-compose test profile. One test file per assertion; real services, no mocks. |
| FED-M1-08 | not-started | Integration test for migration script: seed a local PGlite with representative data (tasks, notes, users, teams), run migration, assert row counts + key samples equal on federated PG. | #460 | sonnet | feat/federation-m1-migrate-test | FED-M1-05 | 6K | Runs against docker-compose federated profile; uses temp PGlite file; deterministic seed. |
| FED-M1-09 | not-started | Standalone regression: full agent-session E2E on existing `standalone` tier with a gateway built from this branch. Must pass without referencing any federation module. | #460 | haiku | feat/federation-m1-regression | FED-M1-07 | 4K | Reuse existing e2e harness; just re-point at the federation branch build. Canary that we didn't break it. |
| FED-M1-10 | not-started | Code review pass: security-focused on the migration script (data-at-rest during migration) + tier detector (error-message sensitivity leakage). Independent reviewer, not authors of tasks 01-09. | #460 | sonnet | — | FED-M1-09 | 8K | Use `feature-dev:code-reviewer` agent. Specifically: no secrets in error messages; no partial-migration footguns. |
| FED-M1-11 | not-started | Docs update: `docs/federation/` operator notes for tier setup; README blurb on federated tier; `docs/guides/` entry for migration. Do NOT touch runbook yet (deferred to FED-M7). | #460 | haiku | feat/federation-m1-docs | FED-M1-10 | 4K | Short, actionable. Link from MISSION-MANIFEST. No decisions captured here — those belong in PRD. |
| FED-M1-12 | not-started | PR, CI green, merge to main, close #460. | #460 | — | (aggregate) | FED-M1-11 | 3K | Queue-guard before push; wait for green; merge squashed; tea `issue-close` #460. |
| id | status | description | issue | agent | branch | depends_on | estimate | notes |
| --------- | ------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----- | ------ | ---------------------------------- | ---------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------ |
| FED-M1-01 | done | Extend `mosaic.config.json` schema: add `"federated"` to `tier` enum in validator + TS types. Keep `local` and `standalone` working. Update schema docs/README where referenced. | #460 | sonnet | feat/federation-m1-tier-config | — | 4K | Shipped in PR #470. Renamed `team``standalone`; added `team` deprecation alias; added `DEFAULT_FEDERATED_CONFIG`. |
| FED-M1-02 | done | Author `docker-compose.federated.yml` as an overlay profile: Postgres 17 + pgvector extension (port 5433), Valkey (6380), named volumes, healthchecks. Compose-up should boot cleanly on a clean machine. | #460 | sonnet | feat/federation-m1-compose | FED-M1-01 | 5K | Shipped in PR #471. Overlay defines `postgres-federated`/`valkey-federated`, profile-gated, with pg-init for pgvector extension. |
| FED-M1-03 | done | Add pgvector support to `packages/storage/src/adapters/postgres.ts`: create extension on init (idempotent), expose vector column type in schema helpers. No adapter changes for non-federated tiers. | #460 | sonnet | feat/federation-m1-pgvector | FED-M1-02 | 8K | Shipped in PR #472. `enableVector` flag on postgres StorageConfig; idempotent CREATE EXTENSION before migrations. |
| FED-M1-04 | done | Implement `apps/gateway/src/bootstrap/tier-detector.ts`: reads config, asserts PG/Valkey/pgvector reachable for `federated`, fail-fast with actionable error message on failure. Unit tests for each failure mode. | #460 | sonnet | feat/federation-m1-detector | FED-M1-03 | 8K | Shipped in PR #473. 12 tests; 5s timeouts on probes; pgvector library/permission discrimination; rejects non-bullmq for federated. |
| FED-M1-05 | done | Write `scripts/migrate-to-federated.ts`: one-way migration from `local` (PGlite) / `standalone` (PG without pgvector) → `federated`. Dumps, transforms, loads; dry-run + confirm UX. Idempotent on re-run. | #460 | sonnet | feat/federation-m1-migrate | FED-M1-04 | 10K | Shipped in PR #474. `mosaic storage migrate-tier`; DrizzleMigrationSource (corrects P0 found in review); 32 tests; idempotent. |
| FED-M1-06 | done | Update `mosaic doctor`: report current tier, required services, actual health per service, pgvector presence, overall green/yellow/red. Machine-readable JSON output flag for CI use. | #460 | sonnet | feat/federation-m1-doctor | FED-M1-04 | 6K | Shipped in PR #475 as `mosaic gateway doctor`. Probes lifted to @mosaicstack/storage; structural TierConfig breaks dep cycle. |
| FED-M1-07 | done | Integration test: gateway boots in `federated` tier with docker-compose `federated` profile; refuses to boot when PG unreachable (asserts fail-fast); pgvector extension query succeeds. | #460 | sonnet | feat/federation-m1-integration | FED-M1-04 | 8K | Shipped in PR #476. 3 test files, 4 tests, gated by FEDERATED_INTEGRATION=1; reserved-port helper avoids host collisions. |
| FED-M1-08 | done | Integration test for migration script: seed a local PGlite with representative data (tasks, notes, users, teams), run migration, assert row counts + key samples equal on federated PG. | #460 | sonnet | feat/federation-m1-migrate-test | FED-M1-05 | 6K | Shipped in PR #477. Caught P0 in M1-05 (camelCase→snake_case) missed by mocked unit tests; fix in same PR. |
| FED-M1-09 | done | Standalone regression: full agent-session E2E on existing `standalone` tier with a gateway built from this branch. Must pass without referencing any federation module. | #460 | sonnet | feat/federation-m1-regression | FED-M1-07 | 4K | Clean canary. 351 gateway tests + 85 storage unit tests + full pnpm test all green; only FEDERATED_INTEGRATION-gated tests skip. |
| FED-M1-10 | done | Code review pass: security-focused on the migration script (data-at-rest during migration) + tier detector (error-message sensitivity leakage). Independent reviewer, not authors of tasks 01-09. | #460 | sonnet | feat/federation-m1-security-review | FED-M1-09 | 8K | 2 review rounds caught 7 issues: credential leak in pg/valkey/pgvector errors + redact-error util; missing advisory lock; SKIP_TABLES rationale. |
| FED-M1-11 | done | Docs update: `docs/federation/` operator notes for tier setup; README blurb on federated tier; `docs/guides/` entry for migration. Do NOT touch runbook yet (deferred to FED-M7). | #460 | haiku | feat/federation-m1-docs | FED-M1-10 | 4K | Shipped: `docs/federation/SETUP.md` (119 lines), `docs/guides/migrate-tier.md` (147 lines), README Configuration blurb. |
| FED-M1-12 | done | PR, CI green, merge to main, close #460. | #460 | sonnet | feat/federation-m1-close | FED-M1-11 | 3K | M1 closed. PRs #470-#480 merged across 11 tasks. Issue #460 closed; release tag `fed-v0.1.0-m1` published. |
**M1 total estimate:** ~74K tokens (over-budget vs 20K PRD estimate — explanation below)

147
docs/guides/migrate-tier.md Normal file
View File

@@ -0,0 +1,147 @@
# Migrating to the Federated Tier
Step-by-step guide to migrate from `local` (PGlite) or `standalone` (PostgreSQL without pgvector) to `federated` (PostgreSQL 17 + pgvector + Valkey).
## When to migrate
Migrate to federated tier when:
- Scaling from single-user to multi-user deployments
- Adding vector embeddings or RAG features
- Running Mosaic across multiple hosts
- Requires distributed task queueing and caching
- Moving to production with high availability
## Prerequisites
- Federated stack running and healthy (see [Federated Tier Setup](../federation/SETUP.md))
- Source database accessible and empty target database at the federated URL
- Backup of source database (recommended before any migration)
## Dry-run first
Always run a dry-run to validate the migration:
```bash
mosaic storage migrate-tier --to federated \
--target-url postgresql://mosaic:mosaic@localhost:5433/mosaic \
--dry-run
```
Expected output (partial example):
```
[migrate-tier] Analyzing source tier: pglite
[migrate-tier] Analyzing target tier: federated
[migrate-tier] Precondition: target is empty ✓
users: 5 rows
teams: 2 rows
conversations: 12 rows
messages: 187 rows
... (all tables listed)
[migrate-tier] NOTE: Source tier has no pgvector support. insights.embedding will be NULL on all migrated rows.
[migrate-tier] DRY-RUN COMPLETE (no data written). 206 total rows would be migrated.
```
Review the output. If it shows an error (e.g., target not empty), address it before proceeding.
## Run the migration
When ready, run without `--dry-run`:
```bash
mosaic storage migrate-tier --to federated \
--target-url postgresql://mosaic:mosaic@localhost:5433/mosaic \
--yes
```
The `--yes` flag skips the confirmation prompt (required in non-TTY environments like CI).
The command will:
1. Acquire an advisory lock (blocks concurrent invocations)
2. Copy data from source to target in dependency order
3. Report rows migrated per table
4. Display any warnings (e.g., null vector embeddings)
## What gets migrated
All persistent, user-bound data is migrated in dependency order:
- **users, teams, team_members** — user and team ownership
- **accounts** — OAuth provider tokens (durable credentials)
- **projects, agents, missions, tasks** — all project and agent definitions
- **conversations, messages** — all chat history
- **preferences, insights, agent_logs** — preferences and observability
- **provider_credentials** — stored API keys and secrets
- **tickets, events, skills, routing_rules, appreciations** — auxiliary records
Full order is defined in code (`MIGRATION_ORDER` in `packages/storage/src/migrate-tier.ts`).
## What gets skipped and why
Three tables are intentionally not migrated:
| Table | Reason |
| ----------------- | ----------------------------------------------------------------------------------------------- |
| **sessions** | TTL'd auth sessions from the old environment; they will fail JWT verification on the new target |
| **verifications** | One-time tokens (email verify, password reset) that have either expired or been consumed |
| **admin_tokens** | Hashed tokens bound to the old environment's secret keys; must be re-issued |
**Note on accounts and provider_credentials:** These durable credentials ARE migrated because they are user-bound and required for resuming agent work on the target environment. After migration to a multi-tenant federated deployment, operators may want to audit or wipe these if users are untrusted or credentials should not be shared.
## Idempotency and concurrency
The migration is **idempotent**:
- Re-running is safe (uses `ON CONFLICT DO UPDATE` internally)
- Ideal for retries on transient failures
- Concurrent invocations are blocked by a Postgres advisory lock; the second caller will wait
If a previous run is stuck, check for advisory locks:
```sql
SELECT * FROM pg_locks WHERE locktype='advisory';
```
If you need to force-unlock (dangerous):
```sql
SELECT pg_advisory_unlock(<lock_id>);
```
## Verify the migration
After migration completes, spot-check the target:
```bash
# Count rows on a few critical tables
psql postgresql://mosaic:mosaic@localhost:5433/mosaic -c \
"SELECT 'users' as table, COUNT(*) FROM users UNION ALL
SELECT 'conversations' as table, COUNT(*) FROM conversations UNION ALL
SELECT 'messages' as table, COUNT(*) FROM messages;"
```
Verify a known user or project exists by ID:
```bash
psql postgresql://mosaic:mosaic@localhost:5433/mosaic -c \
"SELECT id, email FROM users WHERE email='<your-email>';"
```
Ensure vector embeddings are NULL (if source was PGlite) or populated (if source was postgres + pgvector):
```bash
psql postgresql://mosaic:mosaic@localhost:5433/mosaic -c \
"SELECT embedding IS NOT NULL as has_vector FROM insights LIMIT 5;"
```
## Rollback
There is no in-place rollback. If the migration fails:
1. Restore the target database from a pre-migration backup
2. Investigate the failure logs
3. Rerun the migration
Always test migrations in a staging environment first.

View File

@@ -379,3 +379,147 @@ Initial verifier (haiku) on the first delivery returned "OK to ship" but missed
- #8: confirm `packages/config/dist` not git-tracked.
**Next:** PR for FED-M1-04 → CI wait → merge. Then FED-M1-05 (migration script, codex/sonnet, 10K).
---
## Session 18 — 2026-04-19 — FED-M1-07 + FED-M1-08
**Branches landed this run:** `feat/federation-m1-integration` (PR #476, FED-M1-07), `feat/federation-m1-migrate-test` (PR #477, FED-M1-08)
**Branch active at end:** none — both PRs merged to main, branches deleted
**M1 progress:** 8 of 12 tasks done. Remaining: M1-09 (regression e2e, haiku), M1-10 (security review, sonnet), M1-11 (docs, haiku), M1-12 (close + release, orchestrator).
### FED-M1-07 — Integration tests for federated tier gateway boot
Three test files under `apps/gateway/src/__tests__/integration/` gated by `FEDERATED_INTEGRATION=1`:
- `federated-boot.success.integration.test.ts``detectAndAssertTier` resolves; `pg_extension` row for `vector` exists
- `federated-boot.pg-unreachable.integration.test.ts` — throws `TierDetectionError` with `service: 'postgres'` when PG port is closed
- `federated-pgvector.integration.test.ts` — TEMP table with `vector(3)` column round-trips data
Independent code review (sonnet) returned VERDICT: B with two IMPORTANT items, both fixed in the same PR:
- Port 5499 collision risk → replaced with `net.createServer().listen(0)` reserved-port helper
- `afterAll` and `sql` scoped outside `describe` → moved both inside `describe.skipIf` block
Independent surface verifier (haiku) confirmed all claims. 4/4 tests pass live; 4/4 skip cleanly without env var.
### FED-M1-08 — Migration integration test (caught real P0 bug)
`packages/storage/src/migrate-tier.integration.test.ts` seeds temp PGlite with cross-table data (users, teams, team_members, conversations, messages), runs `runMigrateTier`, asserts row counts + spot-checks. Gated by `FEDERATED_INTEGRATION=1`.
**P0 bug surfaced and fixed in same PR:** `DrizzleMigrationSource.readTable()` returns Drizzle's camelCase keys (`emailVerified`, `userId`); `PostgresMigrationTarget.upsertBatch()` was using them verbatim as SQL identifiers, producing `column "emailVerified" does not exist` against real federated PG. The 32 unit tests in M1-05 missed this because both source and target were mocked. Fix: `normaliseSourceRow` now applies `toSnakeCase` (`/[A-Z]/g``_<lowercase>`), idempotent on already-snake_case keys.
Code review (sonnet) returned VERDICT: B with one IMPORTANT and one MINOR, both fixed:
- `createPgliteDbWithVector` and `runPgliteMigrations` were initially added to `@mosaicstack/db` public exports → moved to `packages/storage/src/test-utils/pglite-with-vector.ts` (avoids polluting prod consumers with WASM bundle)
- `afterAll` did not call `cleanTarget` → added before connection close, ensuring orphan rows cleaned even on test panic
Side change: `packages/storage/package.json` gained `"type": "module"` (codebase convention; required for `import.meta.url` in test-utils). All other workspace packages already declared this.
### Process notes for this session
- Review-then-verify pipeline now battle-tested: M1-08 reviewer caught the P0 bug + the public-API leak that the worker would have shipped. Without review, both would have gone to main.
- Integration tests are paying for themselves immediately: M1-08 caught a real P0 in M1-05 that 32 mocked unit tests missed. Going forward, **at least one real-services integration test per code-mutating PR** should become a soft norm where feasible.
- TASKS.md status updates continue to ride on the matching feature branch (avoids direct-to-main commits).
**Followup tasks tracked but still deferred (no change):**
- #7: `tier=local` hardcoded in gateway-config resume branches (~262, ~317)
- #8: confirm `packages/config/dist` not git-tracked
**Next:** FED-M1-09 — standalone regression e2e (haiku canary, ~4K). Verifies that the existing `standalone` tier behavior still works end-to-end on the federation-touched build, since M1 changes touched shared paths (storage, config, gateway boot).
---
## Session 19 — 2026-04-19 — FED-M1-09 → FED-M1-12 (M1 close)
**Branches landed this run:** `feat/federation-m1-regression` (PR #478, M1-09), `feat/federation-m1-security-review` (PR #479, M1-10), `feat/federation-m1-docs` (PR #480, M1-11), `feat/federation-m1-close` (PR #481, M1-12)
**Branch active at end:** none — M1 closed, all branches deleted, issue #460 closed, release tag `fed-v0.1.0-m1` published
**M1 progress:** 12 of 12 tasks done. **Milestone complete.**
### FED-M1-09 — Standalone regression canary
Verification-only milestone. Re-ran the existing standalone/local test suites against current `main` (with M1-01 → M1-08 merged):
- 4 target gateway test files: 148/148 pass (conversation-persistence, cross-user-isolation, resource-ownership, session-hardening)
- Full gateway suite: 351 pass, 4 skipped (FEDERATED_INTEGRATION-gated only)
- Storage unit tests: 85 pass, 1 skipped (integration-gated)
- Top-level `pnpm test`: all green; only env-gated skips
No regression in standalone or local tier. Federation M1 changes are non-disruptive.
### FED-M1-10 — Security review (two rounds, 7 findings)
Independent security review surfaced three high-impact and four medium findings; all fixed in same PR.
**Round 1 (4 findings):**
- MEDIUM: Credential leak via `postgres`/`ioredis` driver error messages (DSN strings) re-thrown by `migrate-tier.ts` → caller; `cli.ts:402` outer catch
- MEDIUM: Same leak in `tier-detection.ts` `probePostgresMeasured` / `probePgvectorMeasured` → emitted as JSON by `mosaic gateway doctor --json`
- LOW-MEDIUM: No advisory lock on `migrate-tier`; two concurrent invocations could both pass `checkTargetPreconditions` (non-atomic) and race
- ADVISORY: `SKIP_TABLES` lacked rationale comment
**Fixes:**
- New internal helper `packages/storage/src/redact-error.ts` — regex `(postgres(?:ql)?|rediss?):\/\/[^@\s]*@``<scheme>://***@`. NOT exported from package public surface. 10 unit tests covering all schemes, multi-URL, no-creds, case-insensitive.
- `redactErrMsg` applied at all 5 leak sites
- `PostgresMigrationTarget.tryAcquireAdvisoryLock()` / `releaseAdvisoryLock()` using session-scoped `pg_try_advisory_lock(hashtext('mosaic-migrate-tier'))`. Acquired before preflight, released in `finally`. Dry-run skips. Non-blocking.
- `SKIP_TABLES` comment expanded with rationale for skipped tables (TTL'd / one-time / env-bound) AND why `accounts` (OAuth) and `provider_credentials` (AI keys) are intentionally migrated (durable user-bound, not deployment-bound).
**Round 2 (3 findings missed by first round):**
- HIGH: Round 1 regex only covered `postgres` scheme, not `redis`/`rediss` — extended to `(postgres(?:ql)?|rediss?)`
- HIGH: `probeValkeyMeasured` was missed in Round 1 → applied `redactErrMsg`
- MEDIUM: `cli.ts:402` migrate-tier outer catch was missed in Round 1 → applied `redactErrMsg`
**Process validation:** the two-round review pattern proved load-bearing for security work. A single review-then-fix cycle would have shipped the Valkey credential leak.
### FED-M1-11 — Docs (haiku)
- `docs/federation/SETUP.md` (119 lines): federated tier setup — what it is, prerequisites, docker compose start, mosaic.config.json snippet, doctor health check, troubleshooting
- `docs/guides/migrate-tier.md` (147 lines): when to migrate, dry-run first, what migrates/skips with rationale, idempotency + advisory-lock semantics, no in-place rollback
- `README.md` Configuration blurb linking to both
- Runbook deferred to FED-M7 per TASKS.md scope rule
### FED-M1-12 — Aggregate close (this PR)
- Marked M1-12 done in TASKS.md
- MISSION-MANIFEST.md: phase → "M1 complete", progress 1/7, M1 row done with PR range #470-#481, session log appended
- This Session 19 entry added
- Issue #460 closed via `~/.config/mosaic/tools/git/issue-close.sh -i 460`
- Release tag `fed-v0.1.0-m1` created and pushed to gitea
### M1 PR ledger
| PR | Task | Branch |
| ---- | ----------------------------------------- | ---------------------------------- |
| #470 | M1-01 (tier config schema) | feat/federation-m1-tier-config |
| #471 | M1-02 (compose overlay) | feat/federation-m1-compose |
| #472 | M1-03 (pgvector adapter) | feat/federation-m1-pgvector |
| #473 | M1-04 (tier-detector) | feat/federation-m1-detector |
| #474 | M1-05 (migrate-tier script) | feat/federation-m1-migrate |
| #475 | M1-06 (gateway doctor) | feat/federation-m1-doctor |
| #476 | M1-07 (boot integration tests) | feat/federation-m1-integration |
| #477 | M1-08 (migrate integration test + P0 fix) | feat/federation-m1-migrate-test |
| #478 | M1-09 (standalone regression) | feat/federation-m1-regression |
| #479 | M1-10 (security review fixes) | feat/federation-m1-security-review |
| #480 | M1-11 (docs) | feat/federation-m1-docs |
| #481 | M1-12 (aggregate close) | feat/federation-m1-close |
### Process learnings (M1 retrospective)
1. **Two-round security review is non-negotiable for security work.** First round caught postgres credential leaks; second round caught equivalent valkey leaks the worker missed when extending the regex. Single-round would have shipped HIGH severity issues.
2. **Real-services integration tests catch what mocked unit tests cannot.** M1-08 caught a P0 in M1-05 (camelCase column names) that 32 mocked unit tests missed because both source and target were mocked. Going forward: at least one real-services test per code-mutating PR where feasible.
3. **Test-utils for live services co-locate with consumer, not in shared library.** M1-08 reviewer caught `createPgliteDbWithVector` initially being added to `@mosaicstack/db` public exports — would have polluted prod consumers with WASM bundle. Moved to `packages/storage/src/test-utils/`.
4. **Per-task budgets including tests/review/docs more accurate than PRD's implementation-only estimates.** M1 PRD estimated 20K; actual ~74K. Future milestones should budget the full delivery cycle.
5. **TASKS.md status updates ride feature branches, never direct-to-main.** Caught one violation early in M1; pattern held for all 12 tasks.
6. **Subagent tier matters.** Code review needs sonnet-level reasoning (haiku missed deep issues in M1-04); claim verification (line counts, file existence) is fine on haiku.
**Followup tasks still deferred (carry forward to M2):**
- #7: `tier=local` hardcoded in gateway-config resume branches (~262, ~317)
- #8: confirm `packages/config/dist` not git-tracked
**Next mission step:** FED-M2 (Step-CA + grant schema + admin CLI). Per TASKS.md scope rule, M2 will be decomposed when it enters active planning. Issue #461 tracks scope.

View File

@@ -0,0 +1,294 @@
/**
* Unit tests for gateway-doctor.ts (mosaic gateway doctor).
*
* All external I/O is mocked — no live services required.
*/
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
import type { TierHealthReport } from '@mosaicstack/storage';
/* ------------------------------------------------------------------ */
/* Shared mock state */
/* ------------------------------------------------------------------ */
const mocks = vi.hoisted(() => {
const mockLoadConfig = vi.fn();
const mockProbeServiceHealth = vi.fn();
const mockExistsSync = vi.fn();
return { mockLoadConfig, mockProbeServiceHealth, mockExistsSync };
});
/* ------------------------------------------------------------------ */
/* Module mocks */
/* ------------------------------------------------------------------ */
vi.mock('@mosaicstack/config', () => ({
loadConfig: mocks.mockLoadConfig,
}));
vi.mock('@mosaicstack/storage', () => ({
probeServiceHealth: mocks.mockProbeServiceHealth,
}));
vi.mock('node:fs', () => ({
existsSync: mocks.mockExistsSync,
}));
/* ------------------------------------------------------------------ */
/* Import SUT */
/* ------------------------------------------------------------------ */
import { runGatewayDoctor } from './gateway-doctor.js';
import type { MosaicConfig } from '@mosaicstack/config';
/* ------------------------------------------------------------------ */
/* Fixtures */
/* ------------------------------------------------------------------ */
const STANDALONE_CONFIG: MosaicConfig = {
tier: 'standalone',
storage: { type: 'postgres', url: 'postgresql://mosaic:mosaic@localhost:5432/mosaic' },
queue: { type: 'bullmq', url: 'redis://localhost:6380' },
memory: { type: 'keyword' },
};
const GREEN_REPORT: TierHealthReport = {
tier: 'standalone',
configPath: '/some/mosaic.config.json',
overall: 'green',
services: [
{ name: 'postgres', status: 'ok', host: 'localhost', port: 5432, durationMs: 42 },
{ name: 'valkey', status: 'ok', host: 'localhost', port: 6380, durationMs: 10 },
{ name: 'pgvector', status: 'skipped', durationMs: 0 },
],
};
const RED_REPORT: TierHealthReport = {
tier: 'standalone',
configPath: '/some/mosaic.config.json',
overall: 'red',
services: [
{
name: 'postgres',
status: 'fail',
host: 'localhost',
port: 5432,
durationMs: 5001,
error: {
message: 'connection refused',
remediation: 'Start Postgres: `docker compose ...`',
},
},
{ name: 'valkey', status: 'ok', host: 'localhost', port: 6380, durationMs: 8 },
{ name: 'pgvector', status: 'skipped', durationMs: 0 },
],
};
const FEDERATED_GREEN_REPORT: TierHealthReport = {
tier: 'federated',
configPath: '/some/mosaic.config.json',
overall: 'green',
services: [
{ name: 'postgres', status: 'ok', host: 'localhost', port: 5433, durationMs: 30 },
{ name: 'valkey', status: 'ok', host: 'localhost', port: 6380, durationMs: 5 },
{ name: 'pgvector', status: 'ok', host: 'localhost', port: 5433, durationMs: 25 },
],
};
/* ------------------------------------------------------------------ */
/* Process helpers */
/* ------------------------------------------------------------------ */
let stdoutCapture = '';
let exitCode: number | undefined;
function captureOutput(): void {
stdoutCapture = '';
exitCode = undefined;
vi.spyOn(process.stdout, 'write').mockImplementation((chunk) => {
stdoutCapture += typeof chunk === 'string' ? chunk : chunk.toString();
return true;
});
vi.spyOn(process.stderr, 'write').mockImplementation(() => true);
vi.spyOn(process, 'exit').mockImplementation((code?: string | number | null) => {
exitCode = typeof code === 'number' ? code : code != null ? Number(code) : undefined;
throw new Error(`process.exit(${String(code)})`);
});
vi.spyOn(console, 'log').mockImplementation((...args: unknown[]) => {
stdoutCapture += args.join(' ') + '\n';
});
}
/* ------------------------------------------------------------------ */
/* Tests */
/* ------------------------------------------------------------------ */
describe('runGatewayDoctor', () => {
beforeEach(() => {
vi.clearAllMocks();
captureOutput();
// By default: no config file on disk (env-detection path)
mocks.mockExistsSync.mockReturnValue(false);
mocks.mockLoadConfig.mockReturnValue(STANDALONE_CONFIG);
mocks.mockProbeServiceHealth.mockResolvedValue(GREEN_REPORT);
});
afterEach(() => {
vi.restoreAllMocks();
});
/* ---------------------------------------------------------------- */
/* 1. JSON mode: parseable JSON matching the schema */
/* ---------------------------------------------------------------- */
it('JSON mode emits parseable JSON matching TierHealthReport schema', async () => {
mocks.mockProbeServiceHealth.mockResolvedValue(GREEN_REPORT);
await runGatewayDoctor({ json: true });
const parsed = JSON.parse(stdoutCapture) as TierHealthReport;
expect(parsed.tier).toBe('standalone');
expect(parsed.overall).toBe('green');
expect(Array.isArray(parsed.services)).toBe(true);
expect(parsed.services).toHaveLength(3);
// Validate shape of each service check
for (const svc of parsed.services) {
expect(['postgres', 'valkey', 'pgvector']).toContain(svc.name);
expect(['ok', 'fail', 'skipped']).toContain(svc.status);
expect(typeof svc.durationMs).toBe('number');
}
// JSON mode must be silent on console.log — output goes to process.stdout only.
expect(console.log).not.toHaveBeenCalled();
});
it('JSON mode for federated with 3 ok services', async () => {
mocks.mockProbeServiceHealth.mockResolvedValue(FEDERATED_GREEN_REPORT);
await runGatewayDoctor({ json: true });
const parsed = JSON.parse(stdoutCapture) as TierHealthReport;
expect(parsed.tier).toBe('federated');
expect(parsed.overall).toBe('green');
expect(parsed.services.every((s) => s.status === 'ok')).toBe(true);
// JSON mode must be silent on console.log — output goes to process.stdout only.
expect(console.log).not.toHaveBeenCalled();
});
/* ---------------------------------------------------------------- */
/* 2. Plain text mode: service lines and overall verdict */
/* ---------------------------------------------------------------- */
it('plain text mode includes service lines for each service', async () => {
mocks.mockProbeServiceHealth.mockResolvedValue(GREEN_REPORT);
await runGatewayDoctor({});
expect(stdoutCapture).toContain('postgres');
expect(stdoutCapture).toContain('valkey');
expect(stdoutCapture).toContain('pgvector');
});
it('plain text mode includes Overall verdict', async () => {
mocks.mockProbeServiceHealth.mockResolvedValue(GREEN_REPORT);
await runGatewayDoctor({});
expect(stdoutCapture).toContain('Overall: GREEN');
});
it('plain text mode shows tier and config path in header', async () => {
mocks.mockProbeServiceHealth.mockResolvedValue(GREEN_REPORT);
await runGatewayDoctor({});
expect(stdoutCapture).toContain('Tier: standalone');
});
it('plain text mode shows remediation for failed services', async () => {
mocks.mockProbeServiceHealth.mockResolvedValue(RED_REPORT);
try {
await runGatewayDoctor({});
} catch {
// process.exit throws in test
}
expect(stdoutCapture).toContain('Remediations:');
expect(stdoutCapture).toContain('Start Postgres');
});
/* ---------------------------------------------------------------- */
/* 3. Exit codes */
/* ---------------------------------------------------------------- */
it('exits with code 1 when overall is red', async () => {
mocks.mockProbeServiceHealth.mockResolvedValue(RED_REPORT);
await expect(runGatewayDoctor({})).rejects.toThrow('process.exit(1)');
expect(exitCode).toBe(1);
});
it('exits with code 0 (no exit call) when overall is green', async () => {
mocks.mockProbeServiceHealth.mockResolvedValue(GREEN_REPORT);
await runGatewayDoctor({});
// process.exit should NOT have been called for green.
expect(exitCode).toBeUndefined();
});
it('JSON mode exits with code 1 when overall is red', async () => {
mocks.mockProbeServiceHealth.mockResolvedValue(RED_REPORT);
await expect(runGatewayDoctor({ json: true })).rejects.toThrow('process.exit(1)');
expect(exitCode).toBe(1);
});
/* ---------------------------------------------------------------- */
/* 4. --config path override is honored */
/* ---------------------------------------------------------------- */
it('passes --config path to loadConfig when provided', async () => {
const customPath = '/custom/path/mosaic.config.json';
await runGatewayDoctor({ config: customPath });
// loadConfig should have been called with the resolved custom path.
expect(mocks.mockLoadConfig).toHaveBeenCalledWith(
expect.stringContaining('mosaic.config.json'),
);
// The exact call should include the custom path (resolved).
const [calledPath] = mocks.mockLoadConfig.mock.calls[0] as [string | undefined];
expect(calledPath).toContain('custom/path/mosaic.config.json');
});
it('calls loadConfig without path when no --config and no file on disk', async () => {
mocks.mockExistsSync.mockReturnValue(false);
await runGatewayDoctor({});
const [calledPath] = mocks.mockLoadConfig.mock.calls[0] as [string | undefined];
// When no file found, resolveConfigPath returns undefined, so loadConfig is called with undefined
expect(calledPath).toBeUndefined();
});
it('finds config from cwd when mosaic.config.json exists there', async () => {
// First candidate (cwd/mosaic.config.json) exists.
mocks.mockExistsSync.mockImplementation((p: unknown) => {
return typeof p === 'string' && p.endsWith('mosaic.config.json');
});
await runGatewayDoctor({});
const [calledPath] = mocks.mockLoadConfig.mock.calls[0] as [string | undefined];
expect(calledPath).toBeDefined();
expect(typeof calledPath).toBe('string');
expect(calledPath!.endsWith('mosaic.config.json')).toBe(true);
});
});

View File

@@ -0,0 +1,143 @@
/**
* gateway-doctor.ts — `mosaic gateway doctor` implementation.
*
* Reports current tier and per-service health (PG, Valkey, pgvector) for the
* Mosaic gateway. Supports machine-readable JSON output for CI.
*
* Exit codes:
* 0 — overall green or yellow
* 1 — overall red (at least one required service failed)
*/
import { existsSync } from 'node:fs';
import { resolve, join } from 'node:path';
import { homedir } from 'node:os';
import { loadConfig } from '@mosaicstack/config';
import { probeServiceHealth } from '@mosaicstack/storage';
import type { TierHealthReport, ServiceCheck } from '@mosaicstack/storage';
/* ------------------------------------------------------------------ */
/* Config resolution */
/* ------------------------------------------------------------------ */
const CONFIG_CANDIDATES = [
resolve(process.cwd(), 'mosaic.config.json'),
join(homedir(), '.mosaic', 'mosaic.config.json'),
];
/**
* Resolve the config path to report in output.
*
* Priority:
* 1. Explicit `--config <path>` flag
* 2. `./mosaic.config.json` (cwd)
* 3. `~/.mosaic/mosaic.config.json`
* 4. undefined — `loadConfig()` falls back to env-var detection
*
* `loadConfig()` itself already handles priority 1-3 when passed an explicit
* path, and falls back to env-detection when none exists. We resolve here
* only so we can surface the path in the health report.
*/
function resolveConfigPath(explicit?: string): string | undefined {
if (explicit) return resolve(explicit);
for (const candidate of CONFIG_CANDIDATES) {
if (existsSync(candidate)) return candidate;
}
return undefined;
}
/* ------------------------------------------------------------------ */
/* Output helpers */
/* ------------------------------------------------------------------ */
const TICK = '\u2713'; // ✓
const CROSS = '\u2717'; // ✗
const SKIP = '-';
function padRight(s: string, n: number): string {
return s + ' '.repeat(Math.max(0, n - s.length));
}
function serviceLabel(svc: ServiceCheck): string {
const hostPort =
svc.host !== undefined && svc.port !== undefined ? `${svc.host}:${svc.port.toString()}` : '';
const duration = `(${svc.durationMs.toString()}ms)`;
switch (svc.status) {
case 'ok':
return ` ${TICK} ${padRight(svc.name, 10)} ${padRight(hostPort, 22)} ${duration}`;
case 'fail': {
const errMsg = svc.error?.message ?? 'unknown error';
return ` ${CROSS} ${padRight(svc.name, 10)} ${padRight(hostPort, 22)} ${duration} \u2192 ${errMsg}`;
}
case 'skipped':
return ` ${SKIP} ${padRight(svc.name, 10)} (skipped)`;
}
}
function printReport(report: TierHealthReport): void {
const configDisplay = report.configPath ?? '(auto-detected)';
console.log(`Tier: ${report.tier} Config: ${configDisplay}`);
console.log('');
for (const svc of report.services) {
console.log(serviceLabel(svc));
}
console.log('');
// Print remediations for failed services.
const failed = report.services.filter((s) => s.status === 'fail' && s.error);
if (failed.length > 0) {
console.log('Remediations:');
for (const svc of failed) {
if (svc.error) {
console.log(` ${svc.name}: ${svc.error.remediation}`);
}
}
console.log('');
}
console.log(`Overall: ${report.overall.toUpperCase()}`);
}
/* ------------------------------------------------------------------ */
/* Main runner */
/* ------------------------------------------------------------------ */
export interface GatewayDoctorOptions {
json?: boolean;
config?: string;
}
export async function runGatewayDoctor(opts: GatewayDoctorOptions): Promise<void> {
const configPath = resolveConfigPath(opts.config);
let mosaicConfig;
try {
mosaicConfig = loadConfig(configPath);
} catch (err) {
const msg = err instanceof Error ? err.message : String(err);
if (opts.json) {
process.stdout.write(
JSON.stringify({ error: `Failed to load config: ${msg}` }, null, 2) + '\n',
);
} else {
process.stderr.write(`Error: Failed to load config: ${msg}\n`);
}
process.exit(1);
}
const report = await probeServiceHealth(mosaicConfig, configPath);
if (opts.json) {
process.stdout.write(JSON.stringify(report, null, 2) + '\n');
} else {
printReport(report);
}
// Exit 1 if overall is red.
if (report.overall === 'red') {
process.exit(1);
}
}

View File

@@ -206,4 +206,15 @@ export function registerGatewayCommand(program: Command): void {
const { runUninstall } = await import('./gateway/uninstall.js');
await runUninstall();
});
// ─── doctor ─────────────────────────────────────────────────────────────────
gw.command('doctor')
.description('Check gateway tier and per-service health (PG, Valkey, pgvector)')
.option('--json', 'Emit TierHealthReport as JSON to stdout (suppresses all other output)')
.option('--config <path>', 'Path to mosaic.config.json (defaults to cwd or ~/.mosaic/)')
.action(async (cmdOpts: { json?: boolean; config?: string }) => {
const { runGatewayDoctor } = await import('./gateway-doctor.js');
await runGatewayDoctor({ json: cmdOpts.json, config: cmdOpts.config });
});
}

View File

@@ -6,6 +6,7 @@
"url": "https://git.mosaicstack.dev/mosaicstack/stack.git",
"directory": "packages/storage"
},
"type": "module",
"main": "dist/index.js",
"types": "dist/index.d.ts",
"exports": {
@@ -25,9 +26,11 @@
"@mosaicstack/db": "workspace:^",
"@mosaicstack/types": "workspace:*",
"commander": "^13.0.0",
"ioredis": "^5.10.0",
"postgres": "^3.4.8"
},
"devDependencies": {
"drizzle-orm": "^0.45.1",
"typescript": "^5.8.0",
"vitest": "^2.0.0"
},

View File

@@ -1,5 +1,6 @@
import type { Command } from 'commander';
import type { MigrationSource } from './migrate-tier.js';
import { redactErrMsg } from './redact-error.js';
/**
* Reads the DATABASE_URL environment variable and redacts the password portion.
@@ -73,7 +74,7 @@ export function registerStorageCommand(parent: Command): void {
console.log('[storage] reachable: yes');
} catch (err) {
console.log(
`[storage] reachable: no (${err instanceof Error ? err.message : String(err)})`,
`[storage] reachable: no (${redactErrMsg(err instanceof Error ? err.message : String(err))})`,
);
}
} else {
@@ -398,7 +399,7 @@ export function registerStorageCommand(parent: Command): void {
}
} catch (err) {
console.error(
`[migrate-tier] ERROR: ${err instanceof Error ? err.message : String(err)}`,
`[migrate-tier] ERROR: ${redactErrMsg(err instanceof Error ? err.message : String(err))}`,
);
process.exitCode = 1;
} finally {

View File

@@ -1,4 +1,6 @@
export type { StorageAdapter, StorageConfig } from './types.js';
export { TierDetectionError, detectAndAssertTier, probeServiceHealth } from './tier-detection.js';
export type { ServiceCheck, TierHealthReport } from './tier-detection.js';
export { createStorageAdapter, registerStorageAdapter } from './factory.js';
export { PostgresAdapter } from './adapters/postgres.js';
export { PgliteAdapter } from './adapters/pglite.js';

View File

@@ -0,0 +1,324 @@
/**
* FED-M1-08 — Integration test: PGlite → federated Postgres+pgvector migration.
*
* Prereq: docker compose -f docker-compose.federated.yml --profile federated up -d
* Run: FEDERATED_INTEGRATION=1 pnpm --filter @mosaicstack/storage test src/migrate-tier.integration.test.ts
*
* Skipped when FEDERATED_INTEGRATION !== '1'.
*
* Strategy: users.id (TEXT PK) uses the recognisable prefix `fed-m1-08-` for
* easy cleanup. UUID-PKed tables (teams, conversations, messages, team_members)
* use deterministic valid UUIDs in the `f0000xxx-…` namespace. Cleanup is
* explicit DELETE by id — no full-table truncation.
*/
import fs from 'node:fs/promises';
import os from 'node:os';
import path from 'node:path';
import { users, teams, teamMembers, conversations, messages } from '@mosaicstack/db';
import { createPgliteDbWithVector, runPgliteMigrations } from './test-utils/pglite-with-vector.js';
import postgres from 'postgres';
import { afterAll, describe, expect, it } from 'vitest';
import { DrizzleMigrationSource, PostgresMigrationTarget, runMigrateTier } from './migrate-tier.js';
/* ------------------------------------------------------------------ */
/* Constants */
/* ------------------------------------------------------------------ */
const run = process.env['FEDERATED_INTEGRATION'] === '1';
const FEDERATED_PG_URL = 'postgresql://mosaic:mosaic@localhost:5433/mosaic';
/**
* Deterministic IDs for the test's seed data.
*
* users.id is TEXT (any string) — we use a recognisable prefix for easy cleanup.
* All other tables use UUID primary keys — must be valid UUID v4 format.
* The 4th segment starts with '4' (version 4) and 5th starts with '8' (variant).
*/
const IDS = {
// text PK — can be any string
user1: 'fed-m1-08-user-1',
user2: 'fed-m1-08-user-2',
// UUID PKs — must be valid UUID format
team1: 'f0000001-0000-4000-8000-000000000001',
teamMember1: 'f0000002-0000-4000-8000-000000000001',
teamMember2: 'f0000002-0000-4000-8000-000000000002',
conv1: 'f0000003-0000-4000-8000-000000000001',
conv2: 'f0000003-0000-4000-8000-000000000002',
msg1: 'f0000004-0000-4000-8000-000000000001',
msg2: 'f0000004-0000-4000-8000-000000000002',
msg3: 'f0000004-0000-4000-8000-000000000003',
msg4: 'f0000004-0000-4000-8000-000000000004',
msg5: 'f0000004-0000-4000-8000-000000000005',
} as const;
/* ------------------------------------------------------------------ */
/* Shared handles for afterAll cleanup */
/* ------------------------------------------------------------------ */
let targetSql: ReturnType<typeof postgres> | undefined;
let pgliteDataDir: string | undefined;
afterAll(async () => {
if (targetSql) {
await cleanTarget(targetSql).catch(() => {});
await targetSql.end({ timeout: 5 }).catch(() => {});
}
if (pgliteDataDir) {
await fs.rm(pgliteDataDir, { recursive: true, force: true }).catch(() => {});
}
});
/* ------------------------------------------------------------------ */
/* Helpers */
/* ------------------------------------------------------------------ */
/** Delete all test-owned rows from target in safe FK order. */
async function cleanTarget(sql: ReturnType<typeof postgres>): Promise<void> {
// Reverse FK order: messages → conversations → team_members → teams → users
await sql.unsafe(`DELETE FROM messages WHERE id = ANY($1)`, [
[IDS.msg1, IDS.msg2, IDS.msg3, IDS.msg4, IDS.msg5],
] as never[]);
await sql.unsafe(`DELETE FROM conversations WHERE id = ANY($1)`, [
[IDS.conv1, IDS.conv2],
] as never[]);
await sql.unsafe(`DELETE FROM team_members WHERE id = ANY($1)`, [
[IDS.teamMember1, IDS.teamMember2],
] as never[]);
await sql.unsafe(`DELETE FROM teams WHERE id = $1`, [IDS.team1] as never[]);
await sql.unsafe(`DELETE FROM users WHERE id = ANY($1)`, [[IDS.user1, IDS.user2]] as never[]);
}
/* ------------------------------------------------------------------ */
/* Test suite */
/* ------------------------------------------------------------------ */
describe.skipIf(!run)('migrate-tier — PGlite → federated PG', () => {
it('seeds PGlite, runs migrate-tier, asserts row counts and sample rows on target', async () => {
/* ---- 1. Create a temp PGlite db ---------------------------------- */
pgliteDataDir = await fs.mkdtemp(path.join(os.tmpdir(), 'fed-m1-08-'));
const handle = createPgliteDbWithVector(pgliteDataDir);
// Run Drizzle migrations against PGlite.
// eslint-disable-next-line @typescript-eslint/no-explicit-any
await runPgliteMigrations(handle.db as any);
/* ---- 2. Seed representative data --------------------------------- */
const now = new Date();
const db = handle.db;
// users (2 rows)
// eslint-disable-next-line @typescript-eslint/no-explicit-any
await (db as any).insert(users).values([
{
id: IDS.user1,
name: 'Fed Test User One',
email: 'fed-m1-08-user1@test.invalid',
emailVerified: false,
role: 'member',
createdAt: now,
updatedAt: now,
},
{
id: IDS.user2,
name: 'Fed Test User Two',
email: 'fed-m1-08-user2@test.invalid',
emailVerified: false,
role: 'member',
createdAt: now,
updatedAt: now,
},
]);
// teams (1 row)
// eslint-disable-next-line @typescript-eslint/no-explicit-any
await (db as any).insert(teams).values([
{
id: IDS.team1,
name: 'Fed M1-08 Team',
slug: 'fed-m1-08-team',
ownerId: IDS.user1,
managerId: IDS.user1,
createdAt: now,
updatedAt: now,
},
]);
// team_members (2 rows linking both users to the team)
// eslint-disable-next-line @typescript-eslint/no-explicit-any
await (db as any).insert(teamMembers).values([
{
id: IDS.teamMember1,
teamId: IDS.team1,
userId: IDS.user1,
role: 'manager',
joinedAt: now,
},
{
id: IDS.teamMember2,
teamId: IDS.team1,
userId: IDS.user2,
role: 'member',
joinedAt: now,
},
]);
// conversations (2 rows)
// eslint-disable-next-line @typescript-eslint/no-explicit-any
await (db as any).insert(conversations).values([
{
id: IDS.conv1,
title: 'Fed M1-08 Conversation Alpha',
userId: IDS.user1,
archived: false,
createdAt: now,
updatedAt: now,
},
{
id: IDS.conv2,
title: 'Fed M1-08 Conversation Beta',
userId: IDS.user2,
archived: false,
createdAt: now,
updatedAt: now,
},
]);
// messages (5 rows across both conversations)
// eslint-disable-next-line @typescript-eslint/no-explicit-any
await (db as any).insert(messages).values([
{
id: IDS.msg1,
conversationId: IDS.conv1,
role: 'user',
content: 'Hello from conv1 msg1',
createdAt: now,
},
{
id: IDS.msg2,
conversationId: IDS.conv1,
role: 'assistant',
content: 'Reply in conv1 msg2',
createdAt: now,
},
{
id: IDS.msg3,
conversationId: IDS.conv1,
role: 'user',
content: 'Follow-up in conv1 msg3',
createdAt: now,
},
{
id: IDS.msg4,
conversationId: IDS.conv2,
role: 'user',
content: 'Hello from conv2 msg4',
createdAt: now,
},
{
id: IDS.msg5,
conversationId: IDS.conv2,
role: 'assistant',
content: 'Reply in conv2 msg5',
createdAt: now,
},
]);
/* ---- 3. Pre-clean the target so the test is repeatable ----------- */
targetSql = postgres(FEDERATED_PG_URL, {
max: 3,
connect_timeout: 10,
idle_timeout: 30,
});
await cleanTarget(targetSql);
/* ---- 4. Build source / target adapters and run migration --------- */
const source = new DrizzleMigrationSource(db, /* sourceHasVector= */ false);
const target = new PostgresMigrationTarget(FEDERATED_PG_URL);
try {
await runMigrateTier(
source,
target,
{
targetUrl: FEDERATED_PG_URL,
dryRun: false,
allowNonEmpty: true,
batchSize: 500,
onProgress: (_msg) => {
// Uncomment for debugging: console.log(_msg);
},
},
/* sourceHasVector= */ false,
);
} finally {
await target.close();
}
/* ---- 5. Assert: row counts in target match seed ------------------ */
const countUsers = await targetSql.unsafe<Array<{ n: string }>>(
`SELECT COUNT(*)::text AS n FROM users WHERE id = ANY($1)`,
[[IDS.user1, IDS.user2]] as never[],
);
expect(Number(countUsers[0]?.n)).toBe(2);
const countTeams = await targetSql.unsafe<Array<{ n: string }>>(
`SELECT COUNT(*)::text AS n FROM teams WHERE id = $1`,
[IDS.team1] as never[],
);
expect(Number(countTeams[0]?.n)).toBe(1);
const countTeamMembers = await targetSql.unsafe<Array<{ n: string }>>(
`SELECT COUNT(*)::text AS n FROM team_members WHERE id = ANY($1)`,
[[IDS.teamMember1, IDS.teamMember2]] as never[],
);
expect(Number(countTeamMembers[0]?.n)).toBe(2);
const countConvs = await targetSql.unsafe<Array<{ n: string }>>(
`SELECT COUNT(*)::text AS n FROM conversations WHERE id = ANY($1)`,
[[IDS.conv1, IDS.conv2]] as never[],
);
expect(Number(countConvs[0]?.n)).toBe(2);
const countMsgs = await targetSql.unsafe<Array<{ n: string }>>(
`SELECT COUNT(*)::text AS n FROM messages WHERE id = ANY($1)`,
[[IDS.msg1, IDS.msg2, IDS.msg3, IDS.msg4, IDS.msg5]] as never[],
);
expect(Number(countMsgs[0]?.n)).toBe(5);
/* ---- 6. Assert: sample row field values --------------------------- */
// User 1: check email and name
const userRows = await targetSql.unsafe<Array<{ id: string; email: string; name: string }>>(
`SELECT id, email, name FROM users WHERE id = $1`,
[IDS.user1] as never[],
);
expect(userRows[0]?.email).toBe('fed-m1-08-user1@test.invalid');
expect(userRows[0]?.name).toBe('Fed Test User One');
// Conversation 1: check title and user_id
const convRows = await targetSql.unsafe<Array<{ id: string; title: string; user_id: string }>>(
`SELECT id, title, user_id FROM conversations WHERE id = $1`,
[IDS.conv1] as never[],
);
expect(convRows[0]?.title).toBe('Fed M1-08 Conversation Alpha');
expect(convRows[0]?.user_id).toBe(IDS.user1);
/* ---- 7. Cleanup: delete test rows from target -------------------- */
await cleanTarget(targetSql);
// Close PGlite
await handle.close();
}, 60_000);
});

View File

@@ -29,6 +29,7 @@
import postgres from 'postgres';
import * as schema from '@mosaicstack/db';
import { sql as drizzleSql } from '@mosaicstack/db';
import { redactErrMsg } from './redact-error.js';
/* ------------------------------------------------------------------ */
/* Types */
@@ -72,6 +73,20 @@ export interface MigrationTarget {
/** Close the target connection. */
close(): Promise<void>;
/**
* Attempt to acquire a session-level Postgres advisory lock for migrate-tier.
* Returns true if the lock was acquired, false if another process holds it.
* Targets that do not support advisory locks (e.g. test mocks) may omit this
* by not implementing the method — the caller skips locking gracefully.
*/
tryAcquireAdvisoryLock?(): Promise<boolean>;
/**
* Release the session-level advisory lock acquired by tryAcquireAdvisoryLock.
* Must be called in a finally block.
*/
releaseAdvisoryLock?(): Promise<void>;
}
export interface MigrateTierOptions {
@@ -107,9 +122,28 @@ export interface MigrateTierResult {
/**
* SKIP_TABLES: ephemeral or environment-specific tables not worth migrating.
*
* - sessions: TTL'd auth sessions — invalid in new environment.
* - verifications: one-time tokens (email verify, etc.) — already expired.
* - admin_tokens: hashed tokens bound to old environment keys — re-issue.
* WHY these tables are skipped:
* - sessions: TTL'd auth sessions — they are invalid in the new environment
* and would immediately expire or fail JWT verification anyway.
* - verifications: one-time tokens (email verify, password-reset links, etc.)
* — they have already expired or been consumed; re-sending is
* the correct action on the new environment.
* - admin_tokens: hashed tokens bound to the old environment's secret keys —
* the hash is environment-specific and must be re-issued on
* the target.
*
* WHY these tables are NOT skipped (intentionally migrated):
* - accounts (OAuth tokens): durable credentials bound to the user's identity,
* not to the deployment environment. OAuth tokens survive environment changes
* and should follow the user to the federated tier.
* - provider_credentials (AI provider keys): durable, user-owned API keys for
* AI providers (e.g. OpenAI, Anthropic). These are bound to the user, not
* the server, and must be preserved so AI features work immediately after
* migration.
*
* OPERATOR NOTE: If migrating to a shared or multi-tenant federated tier, review
* whether `accounts` and `provider_credentials` should be wiped post-migration
* to prevent unintended cross-tenant credential exposure.
*/
export const SKIP_TABLES = new Set(['sessions', 'verifications', 'admin_tokens']);
@@ -482,6 +516,33 @@ export class PostgresMigrationTarget implements MigrationTarget {
return rows.length > 0;
}
/**
* Attempt to acquire a non-blocking session-level Postgres advisory lock
* keyed by hashtext('mosaic-migrate-tier'). Returns true if acquired,
* false if another session already holds the lock.
*
* The lock is session-scoped: it is automatically released when the
* connection closes, and also explicitly released via releaseAdvisoryLock().
*/
async tryAcquireAdvisoryLock(): Promise<boolean> {
const rows = await this.sql`
SELECT pg_try_advisory_lock(hashtext('mosaic-migrate-tier')) AS acquired
`;
const row = rows[0] as { acquired: boolean } | undefined;
return row?.acquired ?? false;
}
/**
* Release the session-level advisory lock previously acquired by
* tryAcquireAdvisoryLock(). Safe to call even if the lock was not held
* (pg_advisory_unlock returns false but does not throw).
*/
async releaseAdvisoryLock(): Promise<void> {
await this.sql`
SELECT pg_advisory_unlock(hashtext('mosaic-migrate-tier'))
`;
}
async close(): Promise<void> {
await this.sql.end();
}
@@ -491,11 +552,24 @@ export class PostgresMigrationTarget implements MigrationTarget {
/* Source-row normalisation */
/* ------------------------------------------------------------------ */
/**
* Convert a camelCase key to snake_case.
* e.g. "userId" → "user_id", "emailVerified" → "email_verified".
* Keys that are already snake_case (no uppercase letters) are returned as-is.
*/
function toSnakeCase(key: string): string {
return key.replace(/[A-Z]/g, (c) => `_${c.toLowerCase()}`);
}
/**
* Drizzle returns rows as camelCase TypeScript objects (e.g. `userId`, not
* `user_id`). The PostgresMigrationTarget upserts via raw SQL and uses the
* column names as given — the `insights` no-vector path uses snake_case column
* aliases in the SELECT, so those rows already arrive as snake_case.
* column names as given. We must convert camelCase keys → snake_case before
* building the INSERT statement so column names match the PG schema.
*
* Exception: the `insights` no-vector path already returns snake_case keys
* from its raw SQL projection — toSnakeCase() is idempotent for already-
* snake_case keys so this conversion is safe in all paths.
*
* For vector tables (insights), if `embedding` is absent from the source row
* (because DrizzleMigrationSource omitted it in the no-vector projection), we
@@ -509,7 +583,11 @@ export function normaliseSourceRow(
row: Record<string, unknown>,
sourceHasVector: boolean,
): Record<string, unknown> {
const out = { ...row };
// Convert all camelCase keys to snake_case for raw-SQL target compatibility.
const out: Record<string, unknown> = {};
for (const [k, v] of Object.entries(row)) {
out[toSnakeCase(k)] = v;
}
if (VECTOR_TABLES.has(tableName) && !sourceHasVector) {
// Source cannot have embeddings — explicitly null them so ON CONFLICT
@@ -630,68 +708,92 @@ export async function runMigrateTier(
return { tables, totalRows: 0, dryRun: true };
}
// Check preconditions before writing.
await checkTargetPreconditions(target, allowNonEmpty, tablesToMigrate);
// Acquire a Postgres advisory lock on the target BEFORE checking preconditions
// so that two concurrent invocations cannot both pass the non-empty guard and
// race each other. Use non-blocking pg_try_advisory_lock so we fail fast
// instead of deadlocking.
//
// Targets that don't implement tryAcquireAdvisoryLock (e.g. test mocks) skip
// this step — the optional chaining guard handles that case.
const lockAcquired = target.tryAcquireAdvisoryLock ? await target.tryAcquireAdvisoryLock() : true; // mocks / test doubles — no locking needed
const results: TableMigrationResult[] = [];
let totalRows = 0;
if (!lockAcquired) {
throw new Error(
'Another migrate-tier process is already running against this target. ' +
'Wait for it to complete or check for stuck locks via ' +
"SELECT * FROM pg_locks WHERE locktype='advisory'.",
);
}
for (const table of tablesToMigrate) {
const sourceCount = sourceCounts.get(table) ?? 0;
try {
// Check preconditions before writing.
await checkTargetPreconditions(target, allowNonEmpty, tablesToMigrate);
if (sourceCount === 0) {
onProgress(`[migrate-tier] ${table}: 0 rows — skipping.`);
results.push({ table, rowsMigrated: 0, skipped: false });
continue;
}
const results: TableMigrationResult[] = [];
let totalRows = 0;
onProgress(`[migrate-tier] ${table}: migrating ${sourceCount.toString()} rows...`);
for (const table of tablesToMigrate) {
const sourceCount = sourceCounts.get(table) ?? 0;
let offset = 0;
let tableTotal = 0;
let lastSuccessfulId: string | undefined;
if (sourceCount === 0) {
onProgress(`[migrate-tier] ${table}: 0 rows — skipping.`);
results.push({ table, rowsMigrated: 0, skipped: false });
continue;
}
try {
while (offset < sourceCount) {
const rows = await source.readTable(table, { limit: batchSize, offset });
if (rows.length === 0) break;
onProgress(`[migrate-tier] ${table}: migrating ${sourceCount.toString()} rows...`);
const normalised = rows.map((row) => normaliseSourceRow(table, row, sourceHasVector));
let offset = 0;
let tableTotal = 0;
let lastSuccessfulId: string | undefined;
await target.upsertBatch(table, normalised);
try {
while (offset < sourceCount) {
const rows = await source.readTable(table, { limit: batchSize, offset });
if (rows.length === 0) break;
lastSuccessfulId = rows[rows.length - 1]?.['id'] as string | undefined;
tableTotal += rows.length;
offset += rows.length;
const normalised = rows.map((row) => normaliseSourceRow(table, row, sourceHasVector));
onProgress(
`[migrate-tier] ${table}: ${tableTotal.toString()}/${sourceCount.toString()} rows written`,
await target.upsertBatch(table, normalised);
lastSuccessfulId = rows[rows.length - 1]?.['id'] as string | undefined;
tableTotal += rows.length;
offset += rows.length;
onProgress(
`[migrate-tier] ${table}: ${tableTotal.toString()}/${sourceCount.toString()} rows written`,
);
}
} catch (err) {
const errMsg = redactErrMsg(err instanceof Error ? err.message : String(err));
throw new Error(
`[migrate-tier] Failed on table "${table}" after ${tableTotal.toString()} rows ` +
`(last id: ${lastSuccessfulId ?? 'none'}). Error: ${errMsg}\n` +
`Remediation: Re-run with --allow-non-empty to resume (upsert is idempotent).`,
);
}
} catch (err) {
const errMsg = err instanceof Error ? err.message : String(err);
throw new Error(
`[migrate-tier] Failed on table "${table}" after ${tableTotal.toString()} rows ` +
`(last id: ${lastSuccessfulId ?? 'none'}). Error: ${errMsg}\n` +
`Remediation: Re-run with --allow-non-empty to resume (upsert is idempotent).`,
);
results.push({ table, rowsMigrated: tableTotal, skipped: false });
totalRows += tableTotal;
onProgress(`[migrate-tier] ${table}: done (${tableTotal.toString()} rows).`);
}
results.push({ table, rowsMigrated: tableTotal, skipped: false });
totalRows += tableTotal;
onProgress(`[migrate-tier] ${table}: done (${tableTotal.toString()} rows).`);
}
// Add skipped table records.
for (const skipped of SKIP_TABLES) {
results.push({
table: skipped,
rowsMigrated: 0,
skipped: true,
skipReason: 'ephemeral or environment-specific — re-issue on target',
});
}
// Add skipped table records.
for (const skipped of SKIP_TABLES) {
results.push({
table: skipped,
rowsMigrated: 0,
skipped: true,
skipReason: 'ephemeral or environment-specific — re-issue on target',
});
onProgress(`[migrate-tier] Complete. ${totalRows.toString()} total rows migrated.`);
return { tables: results, totalRows, dryRun: false };
} finally {
// Release the advisory lock regardless of success or failure.
if (target.releaseAdvisoryLock) {
await target.releaseAdvisoryLock();
}
}
onProgress(`[migrate-tier] Complete. ${totalRows.toString()} total rows migrated.`);
return { tables: results, totalRows, dryRun: false };
}

View File

@@ -0,0 +1,62 @@
import { describe, it, expect } from 'vitest';
import { redactErrMsg } from './redact-error.js';
describe('redactErrMsg', () => {
it('redacts user:password from a postgres:// URL embedded in an error message', () => {
const msg = 'connect ECONNREFUSED postgres://admin:s3cr3t@db.example.com:5432/mosaic';
expect(redactErrMsg(msg)).toBe(
'connect ECONNREFUSED postgres://***@db.example.com:5432/mosaic',
);
});
it('redacts user:password from a postgresql:// URL', () => {
const msg = 'connection failed: postgresql://myuser:mypass@localhost:5432/testdb';
expect(redactErrMsg(msg)).toBe('connection failed: postgresql://***@localhost:5432/testdb');
});
it('handles URLs with no password (user only) — still redacts userinfo', () => {
const msg = 'error postgres://justuser@host:5432/db';
expect(redactErrMsg(msg)).toBe('error postgres://***@host:5432/db');
});
it('returns the original message unchanged when no connection URL is present', () => {
const msg = 'connection timed out after 5 seconds';
expect(redactErrMsg(msg)).toBe('connection timed out after 5 seconds');
});
it('is case-insensitive for the scheme (scheme is normalized to lowercase in output)', () => {
// The regex replacement uses a lowercase literal, so the matched scheme is
// replaced with the lowercase form regardless of the original casing.
const msg = 'POSTGRES://admin:pass@host:5432/db';
expect(redactErrMsg(msg)).toBe('postgres://***@host:5432/db');
});
it('redacts multiple URLs in a single message', () => {
const msg = 'src postgres://u:p@host1/db1 dst postgresql://v:q@host2/db2';
expect(redactErrMsg(msg)).toBe('src postgres://***@host1/db1 dst postgresql://***@host2/db2');
});
it('does not alter a message with a postgres URL that has no userinfo', () => {
// No userinfo component — pattern does not match, message unchanged.
const msg = 'error at postgres://host:5432/db';
expect(redactErrMsg(msg)).toBe('error at postgres://host:5432/db');
});
it('redacts user:password from a redis:// URL', () => {
const msg = 'connect ECONNREFUSED redis://user:pass@host:6379';
expect(redactErrMsg(msg)).toBe('connect ECONNREFUSED redis://***@host:6379');
});
it('redacts user:password from a rediss:// URL (TLS)', () => {
const msg = 'connect ECONNREFUSED rediss://user:pass@host:6379';
expect(redactErrMsg(msg)).toBe('connect ECONNREFUSED rediss://***@host:6379');
});
it('redacts both a postgres URL and a redis URL in the same message', () => {
const msg =
'primary postgres://admin:s3cr3t@db:5432/mosaic cache redis://cacheuser:cachepass@cache:6379';
expect(redactErrMsg(msg)).toBe(
'primary postgres://***@db:5432/mosaic cache redis://***@cache:6379',
);
});
});

View File

@@ -0,0 +1,39 @@
/**
* redact-error.ts — Internal credential-scrubbing helper.
*
* The `postgres` npm package can embed the full DSN (including the password)
* in connection-failure error messages. This module provides a single helper
* that strips the user:password portion from any such message before it is
* re-thrown, logged, or surfaced in a structured health report.
*
* This file is intentionally NOT re-exported from the package index — it is
* an internal utility for use within packages/storage/src only.
*/
/**
* Redacts credentials from error messages that may include connection URLs.
* The `postgres` npm package can embed the full DSN in connection-failure
* messages, and ioredis can embed `redis://` / `rediss://` URLs similarly.
* This helper strips the user:password portion before display.
*
* Handles `postgres://`, `postgresql://`, `redis://`, and `rediss://`
* schemes (case-insensitive). Everything between `://` and `@` (the userinfo
* component) is replaced with `***` so that the host, port, and database name
* remain visible for diagnostics while the secret is never written to logs or
* CI output.
*
* @example
* redactErrMsg('connect ECONNREFUSED postgres://admin:s3cr3t@db:5432/mosaic')
* // → 'connect ECONNREFUSED postgres://***@db:5432/mosaic'
*
* redactErrMsg('connect ECONNREFUSED redis://user:pass@cache:6379')
* // → 'connect ECONNREFUSED redis://***@cache:6379'
*/
const CREDENTIAL_URL_RE = /(postgres(?:ql)?|rediss?):\/\/[^@\s]*@/gi;
export function redactErrMsg(msg: string): string {
return msg.replace(
CREDENTIAL_URL_RE,
(_match, scheme: string) => `${scheme.toLowerCase()}://***@`,
);
}

View File

@@ -0,0 +1,52 @@
/**
* Test-only helpers for creating a PGlite database with the pgvector extension
* and running Drizzle migrations against it.
*
* These are intentionally NOT exported from @mosaicstack/db to avoid pulling
* the WASM vector bundle into the public API surface.
*/
import { createRequire } from 'node:module';
import { dirname, resolve } from 'node:path';
import { PGlite } from '@electric-sql/pglite';
import { vector } from '@electric-sql/pglite/vector';
import { drizzle } from 'drizzle-orm/pglite';
import { migrate as migratePglite } from 'drizzle-orm/pglite/migrator';
import type { PgliteDatabase } from 'drizzle-orm/pglite';
import * as schema from '@mosaicstack/db';
import type { DbHandle } from '@mosaicstack/db';
/**
* Create a PGlite DB handle with the pgvector extension loaded.
* Required for running Drizzle migrations that include `CREATE EXTENSION vector`.
*/
export function createPgliteDbWithVector(dataDir: string): DbHandle {
const client = new PGlite(dataDir, { extensions: { vector } });
const db = drizzle(client, { schema });
return {
db: db as unknown as DbHandle['db'],
close: async () => {
await client.close();
},
};
}
/**
* Run Drizzle migrations against an already-open PGlite database handle.
* Resolves the migrations folder from @mosaicstack/db's installed location.
*
* @param db A PgliteDatabase instance (from drizzle-orm/pglite).
*/
export async function runPgliteMigrations(
// eslint-disable-next-line @typescript-eslint/no-explicit-any
db: PgliteDatabase<any>,
): Promise<void> {
// Resolve @mosaicstack/db package root to locate its drizzle migrations folder.
const _require = createRequire(import.meta.url);
const dbPkgMain = _require.resolve('@mosaicstack/db');
// dbPkgMain → …/packages/db/dist/index.js → dirname = dist/
// go up one level from dist/ to find the sibling drizzle/ folder
const migrationsFolder = resolve(dirname(dbPkgMain), '../drizzle');
await migratePglite(db, { migrationsFolder });
}

View File

@@ -1,5 +1,5 @@
/**
* Unit tests for tier-detector.ts.
* Unit tests for tier-detection.ts.
*
* All external I/O (postgres, ioredis) is mocked no live services required.
*
@@ -59,28 +59,26 @@ vi.mock('ioredis', () => ({
/* Import SUT after mocks are registered */
/* ------------------------------------------------------------------ */
import { detectAndAssertTier, TierDetectionError } from './tier-detector.js';
import type { MosaicConfig } from '@mosaicstack/config';
import { detectAndAssertTier, probeServiceHealth, TierDetectionError } from './tier-detection.js';
import type { TierConfig } from './tier-detection.js';
/* ------------------------------------------------------------------ */
/* Config fixtures */
/* ------------------------------------------------------------------ */
const LOCAL_CONFIG: MosaicConfig = {
const LOCAL_CONFIG: TierConfig = {
tier: 'local',
storage: { type: 'pglite', dataDir: '.mosaic/pglite' },
queue: { type: 'local', dataDir: '.mosaic/queue' },
memory: { type: 'keyword' },
queue: { type: 'local' },
};
const STANDALONE_CONFIG: MosaicConfig = {
const STANDALONE_CONFIG: TierConfig = {
tier: 'standalone',
storage: { type: 'postgres', url: 'postgresql://mosaic:mosaic@db-host:5432/mosaic' },
queue: { type: 'bullmq', url: 'redis://valkey-host:6380' },
memory: { type: 'keyword' },
};
const FEDERATED_CONFIG: MosaicConfig = {
const FEDERATED_CONFIG: TierConfig = {
tier: 'federated',
storage: {
type: 'postgres',
@@ -88,7 +86,6 @@ const FEDERATED_CONFIG: MosaicConfig = {
enableVector: true,
},
queue: { type: 'bullmq', url: 'redis://valkey-host:6380' },
memory: { type: 'pgvector' },
};
/* ------------------------------------------------------------------ */
@@ -232,15 +229,17 @@ describe('detectAndAssertTier', () => {
});
/* ---------------------------------------------------------------- */
/* 7. probeValkey honors connectTimeout */
/* 7. probeValkey honors connectTimeout and lazyConnect */
/* ---------------------------------------------------------------- */
it('constructs the ioredis Redis client with connectTimeout: 5000', async () => {
await detectAndAssertTier(STANDALONE_CONFIG);
expect(mocks.MockRedis).toHaveBeenCalledOnce();
const [, options] = mocks.MockRedis.mock.calls[0] as [string, Record<string, unknown>];
expect(options).toMatchObject({ connectTimeout: 5000 });
expect(mocks.MockRedis).toHaveBeenCalledWith(
expect.any(String),
expect.objectContaining({ connectTimeout: 5000, lazyConnect: true }),
);
});
/* ---------------------------------------------------------------- */
@@ -269,7 +268,7 @@ describe('detectAndAssertTier', () => {
/* ---------------------------------------------------------------- */
it('mentions CREATE permission or superuser in remediation for a generic pgvector error', async () => {
// SELECT 1 succeeds; CREATE EXTENSION fails with a permission error (not the library-missing message).
// SELECT 1 succeeds; CREATE EXTENSION fails with a permission error.
mocks.mockSqlFn
.mockResolvedValueOnce([]) // SELECT 1 (probePostgres)
.mockRejectedValueOnce(new Error('permission denied to create extension'));
@@ -293,15 +292,14 @@ describe('detectAndAssertTier', () => {
/* ---------------------------------------------------------------- */
it('throws TierDetectionError with service=config for federated tier with queue.type !== bullmq', async () => {
const badConfig: MosaicConfig = {
const badConfig: TierConfig = {
tier: 'federated',
storage: {
type: 'postgres',
url: 'postgresql://mosaic:mosaic@db-host:5433/mosaic',
enableVector: true,
},
queue: { type: 'local', dataDir: '.mosaic/queue' },
memory: { type: 'pgvector' },
queue: { type: 'local' },
};
try {
@@ -342,3 +340,207 @@ describe('detectAndAssertTier', () => {
expect(caught!.message).toContain('db-host:5432');
});
});
/* ------------------------------------------------------------------ */
/* probeServiceHealth tests */
/* ------------------------------------------------------------------ */
describe('probeServiceHealth', () => {
beforeEach(() => {
vi.clearAllMocks();
mocks.mockSqlFn.mockResolvedValue([]);
mocks.mockEnd.mockResolvedValue(undefined);
mocks.mockRedisConnect.mockResolvedValue(undefined);
mocks.mockRedisPing.mockResolvedValue('PONG');
mocks.mockPostgresConstructor.mockImplementation(() => {
const sql = mocks.mockSqlFn as ReturnType<typeof mocks.mockSqlFn>;
(sql as unknown as Record<string, unknown>)['end'] = mocks.mockEnd;
return sql;
});
mocks.MockRedis.mockImplementation(() => ({
connect: mocks.mockRedisConnect,
ping: mocks.mockRedisPing,
disconnect: mocks.mockRedisDisconnect,
}));
});
/* ---------------------------------------------------------------- */
/* 12. local tier — all skipped, green */
/* ---------------------------------------------------------------- */
it('returns all services as skipped and overall green for local tier', async () => {
const report = await probeServiceHealth(LOCAL_CONFIG);
expect(report.tier).toBe('local');
expect(report.overall).toBe('green');
expect(report.services).toHaveLength(3);
for (const svc of report.services) {
expect(svc.status).toBe('skipped');
}
expect(mocks.mockPostgresConstructor).not.toHaveBeenCalled();
expect(mocks.MockRedis).not.toHaveBeenCalled();
});
/* ---------------------------------------------------------------- */
/* 13. postgres fails, valkey ok → red */
/* ---------------------------------------------------------------- */
it('returns red overall with postgres fail and valkey ok for standalone when postgres fails', async () => {
mocks.mockSqlFn.mockRejectedValue(new Error('connection refused'));
const report = await probeServiceHealth(STANDALONE_CONFIG);
expect(report.overall).toBe('red');
const pgCheck = report.services.find((s) => s.name === 'postgres');
expect(pgCheck?.status).toBe('fail');
expect(pgCheck?.error).toBeDefined();
expect(pgCheck?.error?.remediation).toContain('docker compose');
const vkCheck = report.services.find((s) => s.name === 'valkey');
expect(vkCheck?.status).toBe('ok');
});
/* ---------------------------------------------------------------- */
/* 14. federated all green → 3 services ok */
/* ---------------------------------------------------------------- */
it('returns green overall with all 3 services ok for federated when all pass', async () => {
mocks.mockSqlFn.mockResolvedValue([]);
const report = await probeServiceHealth(FEDERATED_CONFIG);
expect(report.tier).toBe('federated');
expect(report.overall).toBe('green');
expect(report.services).toHaveLength(3);
for (const svc of report.services) {
expect(svc.status).toBe('ok');
}
});
/* ---------------------------------------------------------------- */
/* 15. durationMs is a non-negative number for every service check */
/* ---------------------------------------------------------------- */
it('sets durationMs as a non-negative number for every service check', async () => {
mocks.mockSqlFn.mockResolvedValue([]);
const report = await probeServiceHealth(FEDERATED_CONFIG);
for (const svc of report.services) {
expect(typeof svc.durationMs).toBe('number');
expect(svc.durationMs).toBeGreaterThanOrEqual(0);
}
});
it('sets durationMs >= 0 even for skipped services (local tier)', async () => {
const report = await probeServiceHealth(LOCAL_CONFIG);
for (const svc of report.services) {
expect(typeof svc.durationMs).toBe('number');
expect(svc.durationMs).toBeGreaterThanOrEqual(0);
}
});
/* ---------------------------------------------------------------- */
/* 16. configPath is passed through to the report */
/* ---------------------------------------------------------------- */
it('includes configPath in the report when provided', async () => {
const report = await probeServiceHealth(LOCAL_CONFIG, '/etc/mosaic/mosaic.config.json');
expect(report.configPath).toBe('/etc/mosaic/mosaic.config.json');
});
/* ---------------------------------------------------------------- */
/* 17. standalone — valkey fails, postgres ok → red */
/* ---------------------------------------------------------------- */
it('returns red with valkey fail and postgres ok for standalone when valkey fails', async () => {
mocks.mockSqlFn.mockResolvedValue([]);
mocks.mockRedisConnect.mockRejectedValue(new Error('ECONNREFUSED'));
const report = await probeServiceHealth(STANDALONE_CONFIG);
expect(report.overall).toBe('red');
const pgCheck = report.services.find((s) => s.name === 'postgres');
expect(pgCheck?.status).toBe('ok');
const vkCheck = report.services.find((s) => s.name === 'valkey');
expect(vkCheck?.status).toBe('fail');
expect(vkCheck?.error).toBeDefined();
});
/* ---------------------------------------------------------------- */
/* 18. federated — pgvector fails → red with remediation */
/* ---------------------------------------------------------------- */
it('returns red with pgvector fail for federated when pgvector probe fails', async () => {
mocks.mockSqlFn
.mockResolvedValueOnce([]) // postgres SELECT 1
.mockRejectedValueOnce(new Error('extension "vector" is not available')); // pgvector
const report = await probeServiceHealth(FEDERATED_CONFIG);
expect(report.overall).toBe('red');
const pvCheck = report.services.find((s) => s.name === 'pgvector');
expect(pvCheck?.status).toBe('fail');
expect(pvCheck?.error?.remediation).toContain('pgvector/pgvector:pg17');
});
/* ---------------------------------------------------------------- */
/* 19. federated — non-bullmq queue → red config error, no network */
/* ---------------------------------------------------------------- */
it('returns red overall with config error when federated tier has non-bullmq queue (no network call)', async () => {
const federatedBadQueueConfig: TierConfig = {
tier: 'federated',
storage: {
type: 'postgres',
url: 'postgresql://mosaic:mosaic@db-host:5433/mosaic',
enableVector: true,
},
queue: { type: 'local' },
};
const report = await probeServiceHealth(federatedBadQueueConfig);
expect(report.overall).toBe('red');
const valkey = report.services.find((s) => s.name === 'valkey');
expect(valkey?.status).toBe('fail');
expect(valkey?.error?.remediation).toMatch(/bullmq/i);
// Critically: no network call was made — MockRedis constructor must NOT have been called.
expect(mocks.MockRedis).not.toHaveBeenCalled();
});
/* ---------------------------------------------------------------- */
/* 20. durationMs actually measures real elapsed time */
/* ---------------------------------------------------------------- */
it('measures real elapsed time for service probes', async () => {
const DELAY_MS = 25;
// Make the postgres mock introduce a real wall-clock delay.
mocks.mockSqlFn.mockImplementation(
() =>
new Promise((resolve) =>
setTimeout(() => {
resolve([]);
}, DELAY_MS),
),
);
const report = await probeServiceHealth(STANDALONE_CONFIG);
const pgCheck = report.services.find((s) => s.name === 'postgres');
expect(pgCheck).toBeDefined();
// Must be >= 20ms (small slack for jitter). Would be 0 if timer were stubbed.
expect(pgCheck!.durationMs).toBeGreaterThanOrEqual(20);
});
});

View File

@@ -0,0 +1,559 @@
/**
* Tier Detection — pre-flight service reachability probes.
*
* Lifted from apps/gateway/src/bootstrap/tier-detector.ts so both the gateway
* and the mosaic CLI can share the same probe logic without duplicating code or
* creating circular workspace dependencies.
*
* Library choices:
* - Postgres: `postgres` npm package (already a dep via @mosaicstack/db / drizzle-orm).
* - Valkey: `ioredis` (compatible with Valkey; same URL convention used by bullmq).
*/
import postgres from 'postgres';
import { Redis } from 'ioredis';
import { redactErrMsg } from './redact-error.js';
/* ------------------------------------------------------------------ */
/* Local structural type — avoids circular dependency */
/* ------------------------------------------------------------------ */
/**
* Minimal structural shape required for tier detection.
* Mirrors the relevant fields of MosaicConfig (from @mosaicstack/config) without
* creating a dependency cycle (config depends on storage for StorageConfig).
* Any object that satisfies MosaicConfig also satisfies this type.
*/
export interface TierConfig {
tier: 'local' | 'standalone' | 'federated';
storage:
| { type: 'pglite'; dataDir?: string }
| { type: 'postgres'; url: string; enableVector?: boolean }
| { type: 'files'; dataDir: string; format?: 'json' | 'md' };
queue: { type: string; url?: string };
}
/* ------------------------------------------------------------------ */
/* Public types */
/* ------------------------------------------------------------------ */
export interface ServiceCheck {
name: 'postgres' | 'valkey' | 'pgvector';
status: 'ok' | 'fail' | 'skipped';
host?: string;
port?: number;
durationMs: number;
error?: { message: string; remediation: string };
}
export interface TierHealthReport {
tier: 'local' | 'standalone' | 'federated';
configPath?: string;
overall: 'green' | 'yellow' | 'red';
services: ServiceCheck[];
}
/* ------------------------------------------------------------------ */
/* Structured error type */
/* ------------------------------------------------------------------ */
export class TierDetectionError extends Error {
public readonly service: 'postgres' | 'valkey' | 'pgvector' | 'config';
public readonly host: string;
public readonly port: number;
public readonly remediation: string;
constructor(opts: {
service: 'postgres' | 'valkey' | 'pgvector' | 'config';
host: string;
port: number;
remediation: string;
cause?: unknown;
}) {
const message =
`[tier-detector] ${opts.service} unreachable or unusable at ` +
`${opts.host}:${opts.port}${opts.remediation}`;
super(message, { cause: opts.cause });
this.name = 'TierDetectionError';
this.service = opts.service;
this.host = opts.host;
this.port = opts.port;
this.remediation = opts.remediation;
}
}
/* ------------------------------------------------------------------ */
/* URL helpers */
/* ------------------------------------------------------------------ */
/** Extract host and port from a URL string, returning safe fallbacks on parse failure. */
function parseHostPort(url: string, defaultPort: number): { host: string; port: number } {
try {
const parsed = new URL(url);
const host = parsed.hostname || 'unknown';
const port = parsed.port ? parseInt(parsed.port, 10) : defaultPort;
return { host, port };
} catch {
return { host: 'unknown', port: defaultPort };
}
}
/* ------------------------------------------------------------------ */
/* Internal probe results */
/* ------------------------------------------------------------------ */
interface ProbeResult {
host: string;
port: number;
durationMs: number;
error?: { message: string; remediation: string };
}
/* ------------------------------------------------------------------ */
/* Postgres probe */
/* ------------------------------------------------------------------ */
async function probePostgres(url: string): Promise<void> {
const { host, port } = parseHostPort(url, 5432);
let sql: ReturnType<typeof postgres> | undefined;
try {
sql = postgres(url, {
max: 1,
connect_timeout: 5,
idle_timeout: 5,
});
// Run a trivial query to confirm connectivity.
await sql`SELECT 1`;
} catch (cause) {
throw new TierDetectionError({
service: 'postgres',
host,
port,
remediation:
'Start Postgres: `docker compose -f docker-compose.federated.yml --profile federated up -d postgres-federated`',
cause,
});
} finally {
if (sql) {
await sql.end({ timeout: 2 }).catch(() => {
// Ignore cleanup errors — we already have what we need.
});
}
}
}
async function probePostgresMeasured(url: string): Promise<ProbeResult> {
const { host, port } = parseHostPort(url, 5432);
const start = Date.now();
let sql: ReturnType<typeof postgres> | undefined;
try {
sql = postgres(url, {
max: 1,
connect_timeout: 5,
idle_timeout: 5,
});
await sql`SELECT 1`;
return { host, port, durationMs: Date.now() - start };
} catch (cause) {
return {
host,
port,
durationMs: Date.now() - start,
error: {
message: redactErrMsg(cause instanceof Error ? cause.message : String(cause)),
remediation:
'Start Postgres: `docker compose -f docker-compose.federated.yml --profile federated up -d postgres-federated`',
},
};
} finally {
if (sql) {
await sql.end({ timeout: 2 }).catch(() => {});
}
}
}
/* ------------------------------------------------------------------ */
/* pgvector probe */
/* ------------------------------------------------------------------ */
async function probePgvector(url: string): Promise<void> {
const { host, port } = parseHostPort(url, 5432);
let sql: ReturnType<typeof postgres> | undefined;
try {
sql = postgres(url, {
max: 1,
connect_timeout: 5,
idle_timeout: 5,
});
// This succeeds whether the extension is already installed or freshly created.
// It errors only if the pgvector shared library is missing from the Postgres binary.
await sql`CREATE EXTENSION IF NOT EXISTS vector`;
} catch (cause) {
const causeMsg = cause instanceof Error ? cause.message.toLowerCase() : '';
const isLibraryMissing = causeMsg.includes('extension "vector" is not available');
const remediation = isLibraryMissing
? 'Use the `pgvector/pgvector:pg17` image, not the stock `postgres:17` image. See `docker-compose.federated.yml`.'
: 'The database role lacks permission to CREATE EXTENSION. Grant `CREATE` on the database, or run as a superuser.';
throw new TierDetectionError({
service: 'pgvector',
host,
port,
remediation,
cause,
});
} finally {
if (sql) {
await sql.end({ timeout: 2 }).catch(() => {
// Ignore cleanup errors.
});
}
}
}
async function probePgvectorMeasured(url: string): Promise<ProbeResult> {
const { host, port } = parseHostPort(url, 5432);
const start = Date.now();
let sql: ReturnType<typeof postgres> | undefined;
try {
sql = postgres(url, {
max: 1,
connect_timeout: 5,
idle_timeout: 5,
});
await sql`CREATE EXTENSION IF NOT EXISTS vector`;
return { host, port, durationMs: Date.now() - start };
} catch (cause) {
const causeMsg = cause instanceof Error ? cause.message.toLowerCase() : '';
const isLibraryMissing = causeMsg.includes('extension "vector" is not available');
const remediation = isLibraryMissing
? 'Use the `pgvector/pgvector:pg17` image, not the stock `postgres:17` image. See `docker-compose.federated.yml`.'
: 'The database role lacks permission to CREATE EXTENSION. Grant `CREATE` on the database, or run as a superuser.';
return {
host,
port,
durationMs: Date.now() - start,
error: {
message: redactErrMsg(cause instanceof Error ? cause.message : String(cause)),
remediation,
},
};
} finally {
if (sql) {
await sql.end({ timeout: 2 }).catch(() => {});
}
}
}
/* ------------------------------------------------------------------ */
/* Valkey probe */
/* ------------------------------------------------------------------ */
const DEFAULT_VALKEY_URL = 'redis://localhost:6380';
async function probeValkey(url: string): Promise<void> {
const { host, port } = parseHostPort(url, 6380);
const client = new Redis(url, {
enableReadyCheck: false,
maxRetriesPerRequest: 0,
retryStrategy: () => null, // no retries — fail fast
lazyConnect: true,
connectTimeout: 5000, // fail-fast: 5-second hard cap on connection attempt
});
try {
await client.connect();
const pong = await client.ping();
if (pong !== 'PONG') {
throw new Error(`Unexpected PING response: ${pong}`);
}
} catch (cause) {
throw new TierDetectionError({
service: 'valkey',
host,
port,
remediation:
'Start Valkey: `docker compose -f docker-compose.federated.yml --profile federated up -d valkey-federated`',
cause,
});
} finally {
client.disconnect();
}
}
async function probeValkeyMeasured(url: string): Promise<ProbeResult> {
const { host, port } = parseHostPort(url, 6380);
const start = Date.now();
const client = new Redis(url, {
enableReadyCheck: false,
maxRetriesPerRequest: 0,
retryStrategy: () => null,
lazyConnect: true,
connectTimeout: 5000,
});
try {
await client.connect();
const pong = await client.ping();
if (pong !== 'PONG') {
throw new Error(`Unexpected PING response: ${pong}`);
}
return { host, port, durationMs: Date.now() - start };
} catch (cause) {
return {
host,
port,
durationMs: Date.now() - start,
error: {
message: redactErrMsg(cause instanceof Error ? cause.message : String(cause)),
remediation:
'Start Valkey: `docker compose -f docker-compose.federated.yml --profile federated up -d valkey-federated`',
},
};
} finally {
client.disconnect();
}
}
/* ------------------------------------------------------------------ */
/* Public entry points */
/* ------------------------------------------------------------------ */
/**
* Assert that all services required by `config.tier` are reachable.
*
* - `local` — no-op (PGlite is in-process; no external services).
* - `standalone` — assert Postgres + Valkey (if queue.type === 'bullmq').
* - `federated` — assert Postgres + Valkey + pgvector installability.
*
* Throws `TierDetectionError` on the first failure with host:port and
* a remediation hint.
*/
export async function detectAndAssertTier(config: TierConfig): Promise<void> {
if (config.tier === 'local') {
// PGlite runs in-process — nothing to probe.
return;
}
const pgUrl =
config.storage.type === 'postgres' ? config.storage.url : 'postgresql://localhost:5432/mosaic';
const valkeyUrl =
config.queue.type === 'bullmq' ? (config.queue.url ?? DEFAULT_VALKEY_URL) : null;
if (config.tier === 'standalone') {
await probePostgres(pgUrl);
if (valkeyUrl) {
await probeValkey(valkeyUrl);
}
return;
}
// tier === 'federated'
// Reject misconfigured queue upfront — federated requires bullmq + a Valkey URL.
if (config.queue.type !== 'bullmq') {
throw new TierDetectionError({
service: 'config',
host: 'localhost',
port: 0,
remediation:
"Federated tier requires queue.type === 'bullmq'. " +
"Set queue: { type: 'bullmq', url: 'redis://...' } in your mosaic.config.json.",
});
}
const federatedValkeyUrl = config.queue.url ?? DEFAULT_VALKEY_URL;
await probePostgres(pgUrl);
await probeValkey(federatedValkeyUrl);
await probePgvector(pgUrl);
}
/**
* Non-throwing variant for `mosaic gateway doctor`.
*
* Probes ALL required services even if some fail, returning a structured report.
* Services not required for the current tier are reported as `skipped`.
*
* Overall status:
* - `green` — all required services OK
* - `yellow` — all required services OK, but a non-critical check failed
* (currently unused — reserved for future optional probes)
* - `red` — at least one required service failed
*/
export async function probeServiceHealth(
config: TierConfig,
configPath?: string,
): Promise<TierHealthReport> {
const tier = config.tier;
// local tier: PGlite is in-process, no external services needed.
if (tier === 'local') {
return {
tier,
configPath,
overall: 'green',
services: [
{ name: 'postgres', status: 'skipped', durationMs: 0 },
{ name: 'valkey', status: 'skipped', durationMs: 0 },
{ name: 'pgvector', status: 'skipped', durationMs: 0 },
],
};
}
const pgUrl =
config.storage.type === 'postgres' ? config.storage.url : 'postgresql://localhost:5432/mosaic';
const valkeyUrl =
config.queue.type === 'bullmq' ? (config.queue.url ?? DEFAULT_VALKEY_URL) : null;
const services: ServiceCheck[] = [];
let hasFailure = false;
if (tier === 'standalone') {
// Postgres — required
const pgResult = await probePostgresMeasured(pgUrl);
if (pgResult.error) {
hasFailure = true;
services.push({
name: 'postgres',
status: 'fail',
host: pgResult.host,
port: pgResult.port,
durationMs: pgResult.durationMs,
error: pgResult.error,
});
} else {
services.push({
name: 'postgres',
status: 'ok',
host: pgResult.host,
port: pgResult.port,
durationMs: pgResult.durationMs,
});
}
// Valkey — required if bullmq
if (valkeyUrl) {
const vkResult = await probeValkeyMeasured(valkeyUrl);
if (vkResult.error) {
hasFailure = true;
services.push({
name: 'valkey',
status: 'fail',
host: vkResult.host,
port: vkResult.port,
durationMs: vkResult.durationMs,
error: vkResult.error,
});
} else {
services.push({
name: 'valkey',
status: 'ok',
host: vkResult.host,
port: vkResult.port,
durationMs: vkResult.durationMs,
});
}
} else {
services.push({ name: 'valkey', status: 'skipped', durationMs: 0 });
}
// pgvector — not required for standalone
services.push({ name: 'pgvector', status: 'skipped', durationMs: 0 });
return {
tier,
configPath,
overall: hasFailure ? 'red' : 'green',
services,
};
}
// tier === 'federated'
// Postgres — required
const pgResult = await probePostgresMeasured(pgUrl);
if (pgResult.error) {
hasFailure = true;
services.push({
name: 'postgres',
status: 'fail',
host: pgResult.host,
port: pgResult.port,
durationMs: pgResult.durationMs,
error: pgResult.error,
});
} else {
services.push({
name: 'postgres',
status: 'ok',
host: pgResult.host,
port: pgResult.port,
durationMs: pgResult.durationMs,
});
}
// Valkey — required for federated (queue.type must be bullmq)
if (config.queue.type !== 'bullmq') {
hasFailure = true;
services.push({
name: 'valkey',
status: 'fail',
host: 'localhost',
port: 0,
durationMs: 0,
error: {
message: "Federated tier requires queue.type === 'bullmq'",
remediation:
"Set queue: { type: 'bullmq', url: 'redis://...' } in your mosaic.config.json.",
},
});
} else {
const federatedValkeyUrl = config.queue.url ?? DEFAULT_VALKEY_URL;
const vkResult = await probeValkeyMeasured(federatedValkeyUrl);
if (vkResult.error) {
hasFailure = true;
services.push({
name: 'valkey',
status: 'fail',
host: vkResult.host,
port: vkResult.port,
durationMs: vkResult.durationMs,
error: vkResult.error,
});
} else {
services.push({
name: 'valkey',
status: 'ok',
host: vkResult.host,
port: vkResult.port,
durationMs: vkResult.durationMs,
});
}
}
// pgvector — required for federated
const pvResult = await probePgvectorMeasured(pgUrl);
if (pvResult.error) {
hasFailure = true;
services.push({
name: 'pgvector',
status: 'fail',
host: pvResult.host,
port: pvResult.port,
durationMs: pvResult.durationMs,
error: pvResult.error,
});
} else {
services.push({
name: 'pgvector',
status: 'ok',
host: pvResult.host,
port: pvResult.port,
durationMs: pvResult.durationMs,
});
}
return {
tier,
configPath,
overall: hasFailure ? 'red' : 'green',
services,
};
}

6
pnpm-lock.yaml generated
View File

@@ -648,10 +648,16 @@ importers:
commander:
specifier: ^13.0.0
version: 13.1.0
ioredis:
specifier: ^5.10.0
version: 5.10.0
postgres:
specifier: ^3.4.8
version: 3.4.8
devDependencies:
drizzle-orm:
specifier: ^0.45.1
version: 0.45.1(@electric-sql/pglite@0.2.17)(@opentelemetry/api@1.9.0)(@types/better-sqlite3@7.6.13)(@types/pg@8.15.6)(better-sqlite3@12.8.0)(kysely@0.28.11)(postgres@3.4.8)
typescript:
specifier: ^5.8.0
version: 5.9.3