diff --git a/apps/gateway/package.json b/apps/gateway/package.json index 7809f0d..7a0fe78 100644 --- a/apps/gateway/package.json +++ b/apps/gateway/package.json @@ -63,8 +63,10 @@ "class-validator": "^0.15.1", "dotenv": "^17.3.1", "fastify": "^5.0.0", + "ioredis": "^5.10.0", "node-cron": "^4.2.1", "openai": "^6.32.0", + "postgres": "^3.4.8", "reflect-metadata": "^0.2.0", "rxjs": "^7.8.0", "socket.io": "^4.8.0", diff --git a/apps/gateway/src/bootstrap/tier-detector.spec.ts b/apps/gateway/src/bootstrap/tier-detector.spec.ts new file mode 100644 index 0000000..3c6ca13 --- /dev/null +++ b/apps/gateway/src/bootstrap/tier-detector.spec.ts @@ -0,0 +1,344 @@ +/** + * Unit tests for tier-detector.ts. + * + * All external I/O (postgres, ioredis) is mocked — no live services required. + * + * Note on hoisting: vi.mock() factories are hoisted above all imports by vitest. + * Variables referenced inside factory callbacks must be declared via vi.hoisted() + * so they are available at hoist time. + */ + +import { describe, it, expect, vi, beforeEach } from 'vitest'; + +/* ------------------------------------------------------------------ */ +/* Hoist shared mock state so factories can reference it */ +/* ------------------------------------------------------------------ */ + +const mocks = vi.hoisted(() => { + const mockSqlFn = vi.fn(); + const mockEnd = vi.fn().mockResolvedValue(undefined); + const mockPostgresConstructor = vi.fn(() => { + const sql = mockSqlFn as ReturnType; + (sql as unknown as Record)['end'] = mockEnd; + return sql; + }); + + const mockRedisConnect = vi.fn().mockResolvedValue(undefined); + const mockRedisPing = vi.fn().mockResolvedValue('PONG'); + const mockRedisDisconnect = vi.fn(); + const MockRedis = vi.fn().mockImplementation(() => ({ + connect: mockRedisConnect, + ping: mockRedisPing, + disconnect: mockRedisDisconnect, + })); + + return { + mockSqlFn, + mockEnd, + mockPostgresConstructor, + mockRedisConnect, + mockRedisPing, + mockRedisDisconnect, + MockRedis, + }; +}); + +/* ------------------------------------------------------------------ */ +/* Module mocks (registered at hoist time) */ +/* ------------------------------------------------------------------ */ + +vi.mock('postgres', () => ({ + default: mocks.mockPostgresConstructor, +})); + +vi.mock('ioredis', () => ({ + Redis: mocks.MockRedis, +})); + +/* ------------------------------------------------------------------ */ +/* Import SUT after mocks are registered */ +/* ------------------------------------------------------------------ */ + +import { detectAndAssertTier, TierDetectionError } from './tier-detector.js'; +import type { MosaicConfig } from '@mosaicstack/config'; + +/* ------------------------------------------------------------------ */ +/* Config fixtures */ +/* ------------------------------------------------------------------ */ + +const LOCAL_CONFIG: MosaicConfig = { + tier: 'local', + storage: { type: 'pglite', dataDir: '.mosaic/pglite' }, + queue: { type: 'local', dataDir: '.mosaic/queue' }, + memory: { type: 'keyword' }, +}; + +const STANDALONE_CONFIG: MosaicConfig = { + tier: 'standalone', + storage: { type: 'postgres', url: 'postgresql://mosaic:mosaic@db-host:5432/mosaic' }, + queue: { type: 'bullmq', url: 'redis://valkey-host:6380' }, + memory: { type: 'keyword' }, +}; + +const FEDERATED_CONFIG: MosaicConfig = { + tier: 'federated', + storage: { + type: 'postgres', + url: 'postgresql://mosaic:mosaic@db-host:5433/mosaic', + enableVector: true, + }, + queue: { type: 'bullmq', url: 'redis://valkey-host:6380' }, + memory: { type: 'pgvector' }, +}; + +/* ------------------------------------------------------------------ */ +/* Tests */ +/* ------------------------------------------------------------------ */ + +describe('detectAndAssertTier', () => { + beforeEach(() => { + vi.clearAllMocks(); + + // Default: all probes succeed. + mocks.mockSqlFn.mockResolvedValue([]); + mocks.mockEnd.mockResolvedValue(undefined); + mocks.mockRedisConnect.mockResolvedValue(undefined); + mocks.mockRedisPing.mockResolvedValue('PONG'); + + // Re-wire constructor to return a fresh sql-like object each time. + mocks.mockPostgresConstructor.mockImplementation(() => { + const sql = mocks.mockSqlFn as ReturnType; + (sql as unknown as Record)['end'] = mocks.mockEnd; + return sql; + }); + mocks.MockRedis.mockImplementation(() => ({ + connect: mocks.mockRedisConnect, + ping: mocks.mockRedisPing, + disconnect: mocks.mockRedisDisconnect, + })); + }); + + /* ---------------------------------------------------------------- */ + /* 1. local — no-op */ + /* ---------------------------------------------------------------- */ + + it('resolves immediately for tier=local without touching postgres or ioredis', async () => { + await expect(detectAndAssertTier(LOCAL_CONFIG)).resolves.toBeUndefined(); + expect(mocks.mockPostgresConstructor).not.toHaveBeenCalled(); + expect(mocks.MockRedis).not.toHaveBeenCalled(); + }); + + /* ---------------------------------------------------------------- */ + /* 2. standalone — happy path */ + /* ---------------------------------------------------------------- */ + + it('resolves for tier=standalone when postgres and valkey are reachable', async () => { + await expect(detectAndAssertTier(STANDALONE_CONFIG)).resolves.toBeUndefined(); + + // Postgres was probed (SELECT 1 only — no pgvector check). + expect(mocks.mockPostgresConstructor).toHaveBeenCalledTimes(1); + expect(mocks.mockSqlFn).toHaveBeenCalledTimes(1); + // Valkey was probed. + expect(mocks.MockRedis).toHaveBeenCalledTimes(1); + expect(mocks.mockRedisPing).toHaveBeenCalledTimes(1); + }); + + /* ---------------------------------------------------------------- */ + /* 3. standalone — postgres unreachable */ + /* ---------------------------------------------------------------- */ + + it('throws TierDetectionError with service=postgres when postgres query rejects', async () => { + mocks.mockSqlFn.mockRejectedValueOnce(new Error('connection refused')); + + const promise = detectAndAssertTier(STANDALONE_CONFIG); + await expect(promise).rejects.toBeInstanceOf(TierDetectionError); + + // Confirm no valkey probe happened (fail fast on first error). + expect(mocks.MockRedis).not.toHaveBeenCalled(); + }); + + it('sets service=postgres on the error when postgres fails', async () => { + mocks.mockSqlFn.mockRejectedValue(new Error('connection refused')); + + try { + await detectAndAssertTier(STANDALONE_CONFIG); + expect.fail('should have thrown'); + } catch (err) { + expect(err).toBeInstanceOf(TierDetectionError); + const typed = err as TierDetectionError; + expect(typed.service).toBe('postgres'); + expect(typed.remediation).toContain('docker compose'); + } + }); + + /* ---------------------------------------------------------------- */ + /* 4. standalone — valkey unreachable */ + /* ---------------------------------------------------------------- */ + + it('throws TierDetectionError with service=valkey when ping fails', async () => { + // Postgres probe succeeds; valkey connect fails. + mocks.mockSqlFn.mockResolvedValue([]); + mocks.mockRedisConnect.mockRejectedValue(new Error('ECONNREFUSED')); + + try { + await detectAndAssertTier(STANDALONE_CONFIG); + expect.fail('should have thrown'); + } catch (err) { + expect(err).toBeInstanceOf(TierDetectionError); + const typed = err as TierDetectionError; + expect(typed.service).toBe('valkey'); + expect(typed.message).toContain('valkey'); + expect(typed.remediation).toContain('valkey-federated'); + } + }); + + /* ---------------------------------------------------------------- */ + /* 5. federated — happy path */ + /* ---------------------------------------------------------------- */ + + it('resolves for tier=federated when all three checks pass', async () => { + // SELECT 1 and CREATE EXTENSION both succeed. + mocks.mockSqlFn.mockResolvedValue([]); + + await expect(detectAndAssertTier(FEDERATED_CONFIG)).resolves.toBeUndefined(); + + // postgres probe (SELECT 1) + pgvector probe (CREATE EXTENSION) = 2 postgres constructors. + expect(mocks.mockPostgresConstructor).toHaveBeenCalledTimes(2); + expect(mocks.mockSqlFn).toHaveBeenCalledTimes(2); + // Valkey probed once. + expect(mocks.MockRedis).toHaveBeenCalledTimes(1); + }); + + /* ---------------------------------------------------------------- */ + /* 6. federated — pgvector not installable */ + /* ---------------------------------------------------------------- */ + + it('throws TierDetectionError with service=pgvector when CREATE EXTENSION fails', async () => { + // SELECT 1 succeeds (first call), CREATE EXTENSION fails (second call). + mocks.mockSqlFn + .mockResolvedValueOnce([]) // SELECT 1 + .mockRejectedValueOnce(new Error('extension "vector" is not available')); + + try { + await detectAndAssertTier(FEDERATED_CONFIG); + expect.fail('should have thrown'); + } catch (err) { + expect(err).toBeInstanceOf(TierDetectionError); + const typed = err as TierDetectionError; + expect(typed.service).toBe('pgvector'); + expect(typed.message).toContain('pgvector'); + expect(typed.remediation).toContain('pgvector/pgvector'); + } + }); + + /* ---------------------------------------------------------------- */ + /* 7. probeValkey honors connectTimeout */ + /* ---------------------------------------------------------------- */ + + it('constructs the ioredis Redis client with connectTimeout: 5000', async () => { + await detectAndAssertTier(STANDALONE_CONFIG); + + expect(mocks.MockRedis).toHaveBeenCalledOnce(); + const [, options] = mocks.MockRedis.mock.calls[0] as [string, Record]; + expect(options).toMatchObject({ connectTimeout: 5000 }); + }); + + /* ---------------------------------------------------------------- */ + /* 8. probePgvector — library-not-installed remediation */ + /* ---------------------------------------------------------------- */ + + it('includes pgvector/pgvector:pg17 in remediation when pgvector library is missing', async () => { + // SELECT 1 succeeds; CREATE EXTENSION fails with the canonical library-missing message. + mocks.mockSqlFn + .mockResolvedValueOnce([]) // SELECT 1 (probePostgres) + .mockRejectedValueOnce(new Error('extension "vector" is not available')); // probePgvector + + try { + await detectAndAssertTier(FEDERATED_CONFIG); + expect.fail('should have thrown'); + } catch (err) { + expect(err).toBeInstanceOf(TierDetectionError); + const typed = err as TierDetectionError; + expect(typed.service).toBe('pgvector'); + expect(typed.remediation).toContain('pgvector/pgvector:pg17'); + } + }); + + /* ---------------------------------------------------------------- */ + /* 9. probePgvector — permission / other error remediation */ + /* ---------------------------------------------------------------- */ + + it('mentions CREATE permission or superuser in remediation for a generic pgvector error', async () => { + // SELECT 1 succeeds; CREATE EXTENSION fails with a permission error (not the library-missing message). + mocks.mockSqlFn + .mockResolvedValueOnce([]) // SELECT 1 (probePostgres) + .mockRejectedValueOnce(new Error('permission denied to create extension')); + + try { + await detectAndAssertTier(FEDERATED_CONFIG); + expect.fail('should have thrown'); + } catch (err) { + expect(err).toBeInstanceOf(TierDetectionError); + const typed = err as TierDetectionError; + expect(typed.service).toBe('pgvector'); + // Must NOT point to the image fix — that's only for the library-missing case. + expect(typed.remediation).not.toContain('pgvector/pgvector:pg17'); + // Must mention permissions or superuser. + expect(typed.remediation).toMatch(/CREATE|superuser/i); + } + }); + + /* ---------------------------------------------------------------- */ + /* 10. federated tier rejects non-bullmq queue.type */ + /* ---------------------------------------------------------------- */ + + it('throws TierDetectionError with service=config for federated tier with queue.type !== bullmq', async () => { + const badConfig: MosaicConfig = { + tier: 'federated', + storage: { + type: 'postgres', + url: 'postgresql://mosaic:mosaic@db-host:5433/mosaic', + enableVector: true, + }, + queue: { type: 'local', dataDir: '.mosaic/queue' }, + memory: { type: 'pgvector' }, + }; + + try { + await detectAndAssertTier(badConfig); + expect.fail('should have thrown'); + } catch (err) { + expect(err).toBeInstanceOf(TierDetectionError); + const typed = err as TierDetectionError; + expect(typed.service).toBe('config'); + expect(typed.remediation).toContain('bullmq'); + } + + // No network probes should have been attempted. + expect(mocks.mockPostgresConstructor).not.toHaveBeenCalled(); + expect(mocks.MockRedis).not.toHaveBeenCalled(); + }); + + /* ---------------------------------------------------------------- */ + /* 11. Error fields populated */ + /* ---------------------------------------------------------------- */ + + it('populates host, port, and remediation on a thrown TierDetectionError', async () => { + mocks.mockSqlFn.mockRejectedValue(new Error('connection refused')); + + let caught: TierDetectionError | undefined; + try { + await detectAndAssertTier(STANDALONE_CONFIG); + } catch (err) { + caught = err as TierDetectionError; + } + + expect(caught).toBeInstanceOf(TierDetectionError); + expect(caught!.service).toBe('postgres'); + // Host and port are extracted from the Postgres URL in STANDALONE_CONFIG. + expect(caught!.host).toBe('db-host'); + expect(caught!.port).toBe(5432); + expect(caught!.remediation).toMatch(/docker compose/i); + expect(caught!.message).toContain('db-host:5432'); + }); +}); diff --git a/apps/gateway/src/bootstrap/tier-detector.ts b/apps/gateway/src/bootstrap/tier-detector.ts new file mode 100644 index 0000000..5b5fb3c --- /dev/null +++ b/apps/gateway/src/bootstrap/tier-detector.ts @@ -0,0 +1,220 @@ +/** + * Tier Detector — pre-flight service reachability assertions. + * + * Runs BEFORE NestFactory.create() to surface actionable errors immediately + * rather than crashing mid-boot with an opaque stack trace. + * + * Library choices: + * - Postgres: `postgres` npm package (already a dep via @mosaicstack/db / drizzle-orm). + * The spec mentions `pg`, but only `postgres` is installed in this monorepo. + * - Valkey: `ioredis` (already a dep via @mosaicstack/queue → bullmq; same URL + * convention used by the bullmq adapter). + */ + +import postgres from 'postgres'; +import { Redis } from 'ioredis'; +import type { MosaicConfig } from '@mosaicstack/config'; + +/* ------------------------------------------------------------------ */ +/* Structured error type */ +/* ------------------------------------------------------------------ */ + +export class TierDetectionError extends Error { + public readonly service: 'postgres' | 'valkey' | 'pgvector' | 'config'; + public readonly host: string; + public readonly port: number; + public readonly remediation: string; + + constructor(opts: { + service: 'postgres' | 'valkey' | 'pgvector' | 'config'; + host: string; + port: number; + remediation: string; + cause?: unknown; + }) { + const message = + `[tier-detector] ${opts.service} unreachable or unusable at ` + + `${opts.host}:${opts.port} — ${opts.remediation}`; + super(message, { cause: opts.cause }); + this.name = 'TierDetectionError'; + this.service = opts.service; + this.host = opts.host; + this.port = opts.port; + this.remediation = opts.remediation; + } +} + +/* ------------------------------------------------------------------ */ +/* URL helpers */ +/* ------------------------------------------------------------------ */ + +/** Extract host and port from a URL string, returning safe fallbacks on parse failure. */ +function parseHostPort(url: string, defaultPort: number): { host: string; port: number } { + try { + const parsed = new URL(url); + const host = parsed.hostname || 'unknown'; + const port = parsed.port ? parseInt(parsed.port, 10) : defaultPort; + return { host, port }; + } catch { + return { host: 'unknown', port: defaultPort }; + } +} + +/* ------------------------------------------------------------------ */ +/* Postgres probe */ +/* ------------------------------------------------------------------ */ + +async function probePostgres(url: string): Promise { + const { host, port } = parseHostPort(url, 5432); + let sql: ReturnType | undefined; + try { + sql = postgres(url, { + max: 1, + connect_timeout: 5, + idle_timeout: 5, + }); + // Run a trivial query to confirm connectivity. + await sql`SELECT 1`; + } catch (cause) { + throw new TierDetectionError({ + service: 'postgres', + host, + port, + remediation: + 'Start Postgres: `docker compose -f docker-compose.federated.yml --profile federated up -d postgres-federated`', + cause, + }); + } finally { + if (sql) { + await sql.end({ timeout: 2 }).catch(() => { + // Ignore cleanup errors — we already have what we need. + }); + } + } +} + +/* ------------------------------------------------------------------ */ +/* pgvector probe */ +/* ------------------------------------------------------------------ */ + +async function probePgvector(url: string): Promise { + const { host, port } = parseHostPort(url, 5432); + let sql: ReturnType | undefined; + try { + sql = postgres(url, { + max: 1, + connect_timeout: 5, + idle_timeout: 5, + }); + // This succeeds whether the extension is already installed or freshly created. + // It errors only if the pgvector shared library is missing from the Postgres binary. + await sql`CREATE EXTENSION IF NOT EXISTS vector`; + } catch (cause) { + const causeMsg = cause instanceof Error ? cause.message.toLowerCase() : ''; + const isLibraryMissing = causeMsg.includes('extension "vector" is not available'); + const remediation = isLibraryMissing + ? 'Use the `pgvector/pgvector:pg17` image, not the stock `postgres:17` image. See `docker-compose.federated.yml`.' + : 'The database role lacks permission to CREATE EXTENSION. Grant `CREATE` on the database, or run as a superuser.'; + throw new TierDetectionError({ + service: 'pgvector', + host, + port, + remediation, + cause, + }); + } finally { + if (sql) { + await sql.end({ timeout: 2 }).catch(() => { + // Ignore cleanup errors. + }); + } + } +} + +/* ------------------------------------------------------------------ */ +/* Valkey probe */ +/* ------------------------------------------------------------------ */ + +const DEFAULT_VALKEY_URL = 'redis://localhost:6380'; + +async function probeValkey(url: string): Promise { + const { host, port } = parseHostPort(url, 6380); + const client = new Redis(url, { + enableReadyCheck: false, + maxRetriesPerRequest: 0, + retryStrategy: () => null, // no retries — fail fast + lazyConnect: true, + connectTimeout: 5000, // fail-fast: 5-second hard cap on connection attempt + }); + + try { + await client.connect(); + const pong = await client.ping(); + if (pong !== 'PONG') { + throw new Error(`Unexpected PING response: ${pong}`); + } + } catch (cause) { + throw new TierDetectionError({ + service: 'valkey', + host, + port, + remediation: + 'Start Valkey: `docker compose -f docker-compose.federated.yml --profile federated up -d valkey-federated`', + cause, + }); + } finally { + client.disconnect(); + } +} + +/* ------------------------------------------------------------------ */ +/* Public entry point */ +/* ------------------------------------------------------------------ */ + +/** + * Assert that all services required by `config.tier` are reachable. + * + * - `local` — no-op (PGlite is in-process; no external services). + * - `standalone` — assert Postgres + Valkey (if queue.type === 'bullmq'). + * - `federated` — assert Postgres + Valkey + pgvector installability. + * + * Throws `TierDetectionError` on the first failure with host:port and + * a remediation hint. + */ +export async function detectAndAssertTier(config: MosaicConfig): Promise { + if (config.tier === 'local') { + // PGlite runs in-process — nothing to probe. + return; + } + + const pgUrl = + config.storage.type === 'postgres' ? config.storage.url : 'postgresql://localhost:5432/mosaic'; + + const valkeyUrl = + config.queue.type === 'bullmq' ? (config.queue.url ?? DEFAULT_VALKEY_URL) : null; + + if (config.tier === 'standalone') { + await probePostgres(pgUrl); + if (valkeyUrl) { + await probeValkey(valkeyUrl); + } + return; + } + + // tier === 'federated' + // Reject misconfigured queue upfront — federated requires bullmq + a Valkey URL. + if (config.queue.type !== 'bullmq') { + throw new TierDetectionError({ + service: 'config', + host: 'localhost', + port: 0, + remediation: + "Federated tier requires queue.type === 'bullmq'. " + + "Set queue: { type: 'bullmq', url: 'redis://...' } in your mosaic.config.json.", + }); + } + const federatedValkeyUrl = config.queue.url ?? DEFAULT_VALKEY_URL; + await probePostgres(pgUrl); + await probeValkey(federatedValkeyUrl); + await probePgvector(pgUrl); +} diff --git a/apps/gateway/src/main.ts b/apps/gateway/src/main.ts index 571bdba..60a1be4 100644 --- a/apps/gateway/src/main.ts +++ b/apps/gateway/src/main.ts @@ -20,10 +20,12 @@ import { Logger, ValidationPipe } from '@nestjs/common'; import { FastifyAdapter, type NestFastifyApplication } from '@nestjs/platform-fastify'; import helmet from '@fastify/helmet'; import { listSsoStartupWarnings } from '@mosaicstack/auth'; +import { loadConfig } from '@mosaicstack/config'; import { AppModule } from './app.module.js'; import { mountAuthHandler } from './auth/auth.controller.js'; import { mountMcpHandler } from './mcp/mcp.controller.js'; import { McpService } from './mcp/mcp.service.js'; +import { detectAndAssertTier, TierDetectionError } from './bootstrap/tier-detector.js'; async function bootstrap(): Promise { const logger = new Logger('Bootstrap'); @@ -32,6 +34,20 @@ async function bootstrap(): Promise { throw new Error('BETTER_AUTH_SECRET is required'); } + // Pre-flight: assert all external services required by the configured tier + // are reachable. Runs before NestFactory.create() so failures are visible + // immediately with actionable remediation hints. + const mosaicConfig = loadConfig(); + try { + await detectAndAssertTier(mosaicConfig); + } catch (err) { + if (err instanceof TierDetectionError) { + logger.error(`Tier detection failed: ${err.message}`); + logger.error(`Remediation: ${err.remediation}`); + } + throw err; + } + for (const warning of listSsoStartupWarnings()) { logger.warn(warning); } diff --git a/docs/federation/TASKS.md b/docs/federation/TASKS.md index 12585de..14f16a8 100644 --- a/docs/federation/TASKS.md +++ b/docs/federation/TASKS.md @@ -19,8 +19,8 @@ Goal: Gateway runs in `federated` tier with containerized PG+pgvector+Valkey. No | --------- | ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----- | ------ | ------------------------------- | ---------- | -------- | --------------------------------------------------------------------------------------------------------------------------------- | | FED-M1-01 | done | Extend `mosaic.config.json` schema: add `"federated"` to `tier` enum in validator + TS types. Keep `local` and `standalone` working. Update schema docs/README where referenced. | #460 | sonnet | feat/federation-m1-tier-config | — | 4K | Shipped in PR #470. Renamed `team` → `standalone`; added `team` deprecation alias; added `DEFAULT_FEDERATED_CONFIG`. | | FED-M1-02 | in-progress | Author `docker-compose.federated.yml` as an overlay profile: Postgres 17 + pgvector extension (port 5433), Valkey (6380), named volumes, healthchecks. Compose-up should boot cleanly on a clean machine. | #460 | sonnet | feat/federation-m1-compose | FED-M1-01 | 5K | Bumped PG16→PG17 to match base compose. Overlay defines distinct `postgres-federated`/`valkey-federated` services, profile-gated. | -| FED-M1-03 | not-started | Add pgvector support to `packages/storage/src/adapters/postgres.ts`: create extension on init (idempotent), expose vector column type in schema helpers. No adapter changes for non-federated tiers. | #460 | codex | feat/federation-m1-pgvector | FED-M1-02 | 8K | Extension create is idempotent `CREATE EXTENSION IF NOT EXISTS vector`. Gate on tier = federated. | -| FED-M1-04 | not-started | Implement `apps/gateway/src/bootstrap/tier-detector.ts`: reads config, asserts PG/Valkey/pgvector reachable for `federated`, fail-fast with actionable error message on failure. Unit tests for each failure mode. | #460 | codex | feat/federation-m1-detector | FED-M1-03 | 8K | Structured error type with remediation hints. Logs which service failed, with host:port attempted. | +| FED-M1-03 | done | Add pgvector support to `packages/storage/src/adapters/postgres.ts`: create extension on init (idempotent), expose vector column type in schema helpers. No adapter changes for non-federated tiers. | #460 | sonnet | feat/federation-m1-pgvector | FED-M1-02 | 8K | Shipped in PR #472. `enableVector` flag on postgres StorageConfig; idempotent CREATE EXTENSION before migrations. | +| FED-M1-04 | in-progress | Implement `apps/gateway/src/bootstrap/tier-detector.ts`: reads config, asserts PG/Valkey/pgvector reachable for `federated`, fail-fast with actionable error message on failure. Unit tests for each failure mode. | #460 | sonnet | feat/federation-m1-detector | FED-M1-03 | 8K | Worker delivered; reviewer flagged 3 issues (Valkey timeout, pgvector error discrimination, federated/non-bullmq guard) — fixed. | | FED-M1-05 | not-started | Write `scripts/migrate-to-federated.ts`: one-way migration from `local` (PGlite) / `standalone` (PG without pgvector) → `federated`. Dumps, transforms, loads; dry-run + confirm UX. Idempotent on re-run. | #460 | codex | feat/federation-m1-migrate | FED-M1-04 | 10K | Do NOT run automatically. CLI subcommand `mosaic migrate tier --to federated --dry-run`. Safety rails. | | FED-M1-06 | not-started | Update `mosaic doctor`: report current tier, required services, actual health per service, pgvector presence, overall green/yellow/red. Machine-readable JSON output flag for CI use. | #460 | sonnet | feat/federation-m1-doctor | FED-M1-04 | 6K | Existing doctor output evolves; add `--json` flag. Green/yellow/red + remediation suggestions per issue. | | FED-M1-07 | not-started | Integration test: gateway boots in `federated` tier with docker-compose `federated` profile; refuses to boot when PG unreachable (asserts fail-fast); pgvector extension query succeeds. | #460 | sonnet | feat/federation-m1-integration | FED-M1-04 | 8K | Vitest + docker-compose test profile. One test file per assertion; real services, no mocks. | diff --git a/docs/scratchpads/mvp-20260312.md b/docs/scratchpads/mvp-20260312.md index 2fbd5e2..35106b1 100644 --- a/docs/scratchpads/mvp-20260312.md +++ b/docs/scratchpads/mvp-20260312.md @@ -343,3 +343,39 @@ Affected files (storage-tier semantics only — Team/workspace usages unaffected - `MVP-T04` (sync `.mosaic/orchestrator/mission.json`) still deferred. - `team` tier rename touches install wizard headless env vars (`MOSAIC_STORAGE_TIER=team`); will need 0.0.x deprecation note in scratchpad if release notes are written this milestone. + +--- + +## Session 17 — 2026-04-19 — claude + +**Mode:** Delivery (W1 / FED-M1 execution; resumed after compaction) +**Branches landed this run:** `feat/federation-m1-tier-config` (PR #470), `feat/federation-m1-compose` (PR #471), `feat/federation-m1-pgvector` (PR #472) +**Branch active at end:** `feat/federation-m1-detector` (FED-M1-04, ready to push) + +**Tasks closed:** FED-M1-01, FED-M1-02, FED-M1-03 (all merged to `main` via squash, CI green, issue #460 still open as milestone). + +**FED-M1-04 — tier-detector:** Worker delivered `apps/gateway/src/bootstrap/tier-detector.ts` (~210 lines) + `tier-detector.spec.ts` (12 tests). Independent code review (sonnet) returned `changes-required` with 3 issues: + +1. CRITICAL: `probeValkey` missing `connectTimeout: 5000` on the ioredis Redis client (defaulted to 10s, violated fail-fast spec). +2. IMPORTANT: `probePgvector` catch block did not discriminate "library not installed" (use `pgvector/pgvector:pg17`) from permission errors. +3. IMPORTANT: Federated tier silently skipped Valkey probe when `queue.type !== 'bullmq'` (computed Valkey URL conditionally). + +Worker fix-up round addressed all three: + +- L147: `connectTimeout: 5000` added to Redis options +- L113-117: catch block branches on `extension "vector" is not available` substring → distinct remediation per failure mode +- L206-215: federated branch fails fast with `service: 'config'` if `queue.type !== 'bullmq'`, then probes Valkey unconditionally +- 4 new tests (8 → 12 total) cover each fix specifically + +Independent verifier (haiku) confirmed all 6 verification claims (line numbers, test presence, suite green: 12/12 PASS). + +**Process note — review pipeline working as designed:** + +Initial verifier (haiku) on the first delivery returned "OK to ship" but missed the 3 deeper issues that the sonnet code-reviewer caught. This validates the user's "always verify subagent claims independently with another subagent" rule — but specifically with the **right tier** for the task: code review needs sonnet-level reasoning, while haiku is fine for verifying surface claims (line counts, file existence) once review issues are known. Going forward: code review uses sonnet (`feature-dev:code-reviewer`), claim verification uses haiku. + +**Followup tasks tracked but deferred:** + +- #7: `tier=local` hardcoded in gateway-config resume branches (~262, ~317) — pre-existing bug, fix during M1-06 (doctor) or M1-09 (regression). +- #8: confirm `packages/config/dist` not git-tracked. + +**Next:** PR for FED-M1-04 → CI wait → merge. Then FED-M1-05 (migration script, codex/sonnet, 10K). diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 15eb594..f8ce05f 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -152,12 +152,18 @@ importers: fastify: specifier: ^5.0.0 version: 5.8.2 + ioredis: + specifier: ^5.10.0 + version: 5.10.0 node-cron: specifier: ^4.2.1 version: 4.2.1 openai: specifier: ^6.32.0 version: 6.32.0(ws@8.20.0)(zod@4.3.6) + postgres: + specifier: ^3.4.8 + version: 3.4.8 reflect-metadata: specifier: ^0.2.0 version: 0.2.2