feat(gateway): tier-detector with fail-fast PG/Valkey/pgvector probes (FED-M1-04)
Implements `apps/gateway/src/bootstrap/tier-detector.ts` invoked from
`main.ts` before NestJS bootstraps. For each tier:
- `local`: no-op (PGlite is in-process)
- `standalone`: probe Postgres + Valkey
- `federated`: probe Postgres + Valkey + pgvector extension; reject
config upfront if `queue.type !== 'bullmq'`
Each probe has a 5-second hard cap and emits a structured
`TierDetectionError` with service / host / port / remediation. The
remediation field discriminates pgvector failure modes ("library not
available" vs "permission denied") so operators get actionable hints
without leaking credentials.
Adds `postgres` and `ioredis` as direct gateway deps; previously only
transitive. 12 unit tests cover happy paths and each fail-fast branch.
Refs #460
This commit is contained in:
@@ -63,8 +63,10 @@
|
|||||||
"class-validator": "^0.15.1",
|
"class-validator": "^0.15.1",
|
||||||
"dotenv": "^17.3.1",
|
"dotenv": "^17.3.1",
|
||||||
"fastify": "^5.0.0",
|
"fastify": "^5.0.0",
|
||||||
|
"ioredis": "^5.10.0",
|
||||||
"node-cron": "^4.2.1",
|
"node-cron": "^4.2.1",
|
||||||
"openai": "^6.32.0",
|
"openai": "^6.32.0",
|
||||||
|
"postgres": "^3.4.8",
|
||||||
"reflect-metadata": "^0.2.0",
|
"reflect-metadata": "^0.2.0",
|
||||||
"rxjs": "^7.8.0",
|
"rxjs": "^7.8.0",
|
||||||
"socket.io": "^4.8.0",
|
"socket.io": "^4.8.0",
|
||||||
|
|||||||
344
apps/gateway/src/bootstrap/tier-detector.spec.ts
Normal file
344
apps/gateway/src/bootstrap/tier-detector.spec.ts
Normal file
@@ -0,0 +1,344 @@
|
|||||||
|
/**
|
||||||
|
* Unit tests for tier-detector.ts.
|
||||||
|
*
|
||||||
|
* All external I/O (postgres, ioredis) is mocked — no live services required.
|
||||||
|
*
|
||||||
|
* Note on hoisting: vi.mock() factories are hoisted above all imports by vitest.
|
||||||
|
* Variables referenced inside factory callbacks must be declared via vi.hoisted()
|
||||||
|
* so they are available at hoist time.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { describe, it, expect, vi, beforeEach } from 'vitest';
|
||||||
|
|
||||||
|
/* ------------------------------------------------------------------ */
|
||||||
|
/* Hoist shared mock state so factories can reference it */
|
||||||
|
/* ------------------------------------------------------------------ */
|
||||||
|
|
||||||
|
const mocks = vi.hoisted(() => {
|
||||||
|
const mockSqlFn = vi.fn();
|
||||||
|
const mockEnd = vi.fn().mockResolvedValue(undefined);
|
||||||
|
const mockPostgresConstructor = vi.fn(() => {
|
||||||
|
const sql = mockSqlFn as ReturnType<typeof mockSqlFn>;
|
||||||
|
(sql as unknown as Record<string, unknown>)['end'] = mockEnd;
|
||||||
|
return sql;
|
||||||
|
});
|
||||||
|
|
||||||
|
const mockRedisConnect = vi.fn().mockResolvedValue(undefined);
|
||||||
|
const mockRedisPing = vi.fn().mockResolvedValue('PONG');
|
||||||
|
const mockRedisDisconnect = vi.fn();
|
||||||
|
const MockRedis = vi.fn().mockImplementation(() => ({
|
||||||
|
connect: mockRedisConnect,
|
||||||
|
ping: mockRedisPing,
|
||||||
|
disconnect: mockRedisDisconnect,
|
||||||
|
}));
|
||||||
|
|
||||||
|
return {
|
||||||
|
mockSqlFn,
|
||||||
|
mockEnd,
|
||||||
|
mockPostgresConstructor,
|
||||||
|
mockRedisConnect,
|
||||||
|
mockRedisPing,
|
||||||
|
mockRedisDisconnect,
|
||||||
|
MockRedis,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
/* ------------------------------------------------------------------ */
|
||||||
|
/* Module mocks (registered at hoist time) */
|
||||||
|
/* ------------------------------------------------------------------ */
|
||||||
|
|
||||||
|
vi.mock('postgres', () => ({
|
||||||
|
default: mocks.mockPostgresConstructor,
|
||||||
|
}));
|
||||||
|
|
||||||
|
vi.mock('ioredis', () => ({
|
||||||
|
Redis: mocks.MockRedis,
|
||||||
|
}));
|
||||||
|
|
||||||
|
/* ------------------------------------------------------------------ */
|
||||||
|
/* Import SUT after mocks are registered */
|
||||||
|
/* ------------------------------------------------------------------ */
|
||||||
|
|
||||||
|
import { detectAndAssertTier, TierDetectionError } from './tier-detector.js';
|
||||||
|
import type { MosaicConfig } from '@mosaicstack/config';
|
||||||
|
|
||||||
|
/* ------------------------------------------------------------------ */
|
||||||
|
/* Config fixtures */
|
||||||
|
/* ------------------------------------------------------------------ */
|
||||||
|
|
||||||
|
const LOCAL_CONFIG: MosaicConfig = {
|
||||||
|
tier: 'local',
|
||||||
|
storage: { type: 'pglite', dataDir: '.mosaic/pglite' },
|
||||||
|
queue: { type: 'local', dataDir: '.mosaic/queue' },
|
||||||
|
memory: { type: 'keyword' },
|
||||||
|
};
|
||||||
|
|
||||||
|
const STANDALONE_CONFIG: MosaicConfig = {
|
||||||
|
tier: 'standalone',
|
||||||
|
storage: { type: 'postgres', url: 'postgresql://mosaic:mosaic@db-host:5432/mosaic' },
|
||||||
|
queue: { type: 'bullmq', url: 'redis://valkey-host:6380' },
|
||||||
|
memory: { type: 'keyword' },
|
||||||
|
};
|
||||||
|
|
||||||
|
const FEDERATED_CONFIG: MosaicConfig = {
|
||||||
|
tier: 'federated',
|
||||||
|
storage: {
|
||||||
|
type: 'postgres',
|
||||||
|
url: 'postgresql://mosaic:mosaic@db-host:5433/mosaic',
|
||||||
|
enableVector: true,
|
||||||
|
},
|
||||||
|
queue: { type: 'bullmq', url: 'redis://valkey-host:6380' },
|
||||||
|
memory: { type: 'pgvector' },
|
||||||
|
};
|
||||||
|
|
||||||
|
/* ------------------------------------------------------------------ */
|
||||||
|
/* Tests */
|
||||||
|
/* ------------------------------------------------------------------ */
|
||||||
|
|
||||||
|
describe('detectAndAssertTier', () => {
|
||||||
|
beforeEach(() => {
|
||||||
|
vi.clearAllMocks();
|
||||||
|
|
||||||
|
// Default: all probes succeed.
|
||||||
|
mocks.mockSqlFn.mockResolvedValue([]);
|
||||||
|
mocks.mockEnd.mockResolvedValue(undefined);
|
||||||
|
mocks.mockRedisConnect.mockResolvedValue(undefined);
|
||||||
|
mocks.mockRedisPing.mockResolvedValue('PONG');
|
||||||
|
|
||||||
|
// Re-wire constructor to return a fresh sql-like object each time.
|
||||||
|
mocks.mockPostgresConstructor.mockImplementation(() => {
|
||||||
|
const sql = mocks.mockSqlFn as ReturnType<typeof mocks.mockSqlFn>;
|
||||||
|
(sql as unknown as Record<string, unknown>)['end'] = mocks.mockEnd;
|
||||||
|
return sql;
|
||||||
|
});
|
||||||
|
mocks.MockRedis.mockImplementation(() => ({
|
||||||
|
connect: mocks.mockRedisConnect,
|
||||||
|
ping: mocks.mockRedisPing,
|
||||||
|
disconnect: mocks.mockRedisDisconnect,
|
||||||
|
}));
|
||||||
|
});
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------- */
|
||||||
|
/* 1. local — no-op */
|
||||||
|
/* ---------------------------------------------------------------- */
|
||||||
|
|
||||||
|
it('resolves immediately for tier=local without touching postgres or ioredis', async () => {
|
||||||
|
await expect(detectAndAssertTier(LOCAL_CONFIG)).resolves.toBeUndefined();
|
||||||
|
expect(mocks.mockPostgresConstructor).not.toHaveBeenCalled();
|
||||||
|
expect(mocks.MockRedis).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------- */
|
||||||
|
/* 2. standalone — happy path */
|
||||||
|
/* ---------------------------------------------------------------- */
|
||||||
|
|
||||||
|
it('resolves for tier=standalone when postgres and valkey are reachable', async () => {
|
||||||
|
await expect(detectAndAssertTier(STANDALONE_CONFIG)).resolves.toBeUndefined();
|
||||||
|
|
||||||
|
// Postgres was probed (SELECT 1 only — no pgvector check).
|
||||||
|
expect(mocks.mockPostgresConstructor).toHaveBeenCalledTimes(1);
|
||||||
|
expect(mocks.mockSqlFn).toHaveBeenCalledTimes(1);
|
||||||
|
// Valkey was probed.
|
||||||
|
expect(mocks.MockRedis).toHaveBeenCalledTimes(1);
|
||||||
|
expect(mocks.mockRedisPing).toHaveBeenCalledTimes(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------- */
|
||||||
|
/* 3. standalone — postgres unreachable */
|
||||||
|
/* ---------------------------------------------------------------- */
|
||||||
|
|
||||||
|
it('throws TierDetectionError with service=postgres when postgres query rejects', async () => {
|
||||||
|
mocks.mockSqlFn.mockRejectedValueOnce(new Error('connection refused'));
|
||||||
|
|
||||||
|
const promise = detectAndAssertTier(STANDALONE_CONFIG);
|
||||||
|
await expect(promise).rejects.toBeInstanceOf(TierDetectionError);
|
||||||
|
|
||||||
|
// Confirm no valkey probe happened (fail fast on first error).
|
||||||
|
expect(mocks.MockRedis).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('sets service=postgres on the error when postgres fails', async () => {
|
||||||
|
mocks.mockSqlFn.mockRejectedValue(new Error('connection refused'));
|
||||||
|
|
||||||
|
try {
|
||||||
|
await detectAndAssertTier(STANDALONE_CONFIG);
|
||||||
|
expect.fail('should have thrown');
|
||||||
|
} catch (err) {
|
||||||
|
expect(err).toBeInstanceOf(TierDetectionError);
|
||||||
|
const typed = err as TierDetectionError;
|
||||||
|
expect(typed.service).toBe('postgres');
|
||||||
|
expect(typed.remediation).toContain('docker compose');
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------- */
|
||||||
|
/* 4. standalone — valkey unreachable */
|
||||||
|
/* ---------------------------------------------------------------- */
|
||||||
|
|
||||||
|
it('throws TierDetectionError with service=valkey when ping fails', async () => {
|
||||||
|
// Postgres probe succeeds; valkey connect fails.
|
||||||
|
mocks.mockSqlFn.mockResolvedValue([]);
|
||||||
|
mocks.mockRedisConnect.mockRejectedValue(new Error('ECONNREFUSED'));
|
||||||
|
|
||||||
|
try {
|
||||||
|
await detectAndAssertTier(STANDALONE_CONFIG);
|
||||||
|
expect.fail('should have thrown');
|
||||||
|
} catch (err) {
|
||||||
|
expect(err).toBeInstanceOf(TierDetectionError);
|
||||||
|
const typed = err as TierDetectionError;
|
||||||
|
expect(typed.service).toBe('valkey');
|
||||||
|
expect(typed.message).toContain('valkey');
|
||||||
|
expect(typed.remediation).toContain('valkey-federated');
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------- */
|
||||||
|
/* 5. federated — happy path */
|
||||||
|
/* ---------------------------------------------------------------- */
|
||||||
|
|
||||||
|
it('resolves for tier=federated when all three checks pass', async () => {
|
||||||
|
// SELECT 1 and CREATE EXTENSION both succeed.
|
||||||
|
mocks.mockSqlFn.mockResolvedValue([]);
|
||||||
|
|
||||||
|
await expect(detectAndAssertTier(FEDERATED_CONFIG)).resolves.toBeUndefined();
|
||||||
|
|
||||||
|
// postgres probe (SELECT 1) + pgvector probe (CREATE EXTENSION) = 2 postgres constructors.
|
||||||
|
expect(mocks.mockPostgresConstructor).toHaveBeenCalledTimes(2);
|
||||||
|
expect(mocks.mockSqlFn).toHaveBeenCalledTimes(2);
|
||||||
|
// Valkey probed once.
|
||||||
|
expect(mocks.MockRedis).toHaveBeenCalledTimes(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------- */
|
||||||
|
/* 6. federated — pgvector not installable */
|
||||||
|
/* ---------------------------------------------------------------- */
|
||||||
|
|
||||||
|
it('throws TierDetectionError with service=pgvector when CREATE EXTENSION fails', async () => {
|
||||||
|
// SELECT 1 succeeds (first call), CREATE EXTENSION fails (second call).
|
||||||
|
mocks.mockSqlFn
|
||||||
|
.mockResolvedValueOnce([]) // SELECT 1
|
||||||
|
.mockRejectedValueOnce(new Error('extension "vector" is not available'));
|
||||||
|
|
||||||
|
try {
|
||||||
|
await detectAndAssertTier(FEDERATED_CONFIG);
|
||||||
|
expect.fail('should have thrown');
|
||||||
|
} catch (err) {
|
||||||
|
expect(err).toBeInstanceOf(TierDetectionError);
|
||||||
|
const typed = err as TierDetectionError;
|
||||||
|
expect(typed.service).toBe('pgvector');
|
||||||
|
expect(typed.message).toContain('pgvector');
|
||||||
|
expect(typed.remediation).toContain('pgvector/pgvector');
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------- */
|
||||||
|
/* 7. probeValkey honors connectTimeout */
|
||||||
|
/* ---------------------------------------------------------------- */
|
||||||
|
|
||||||
|
it('constructs the ioredis Redis client with connectTimeout: 5000', async () => {
|
||||||
|
await detectAndAssertTier(STANDALONE_CONFIG);
|
||||||
|
|
||||||
|
expect(mocks.MockRedis).toHaveBeenCalledOnce();
|
||||||
|
const [, options] = mocks.MockRedis.mock.calls[0] as [string, Record<string, unknown>];
|
||||||
|
expect(options).toMatchObject({ connectTimeout: 5000 });
|
||||||
|
});
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------- */
|
||||||
|
/* 8. probePgvector — library-not-installed remediation */
|
||||||
|
/* ---------------------------------------------------------------- */
|
||||||
|
|
||||||
|
it('includes pgvector/pgvector:pg17 in remediation when pgvector library is missing', async () => {
|
||||||
|
// SELECT 1 succeeds; CREATE EXTENSION fails with the canonical library-missing message.
|
||||||
|
mocks.mockSqlFn
|
||||||
|
.mockResolvedValueOnce([]) // SELECT 1 (probePostgres)
|
||||||
|
.mockRejectedValueOnce(new Error('extension "vector" is not available')); // probePgvector
|
||||||
|
|
||||||
|
try {
|
||||||
|
await detectAndAssertTier(FEDERATED_CONFIG);
|
||||||
|
expect.fail('should have thrown');
|
||||||
|
} catch (err) {
|
||||||
|
expect(err).toBeInstanceOf(TierDetectionError);
|
||||||
|
const typed = err as TierDetectionError;
|
||||||
|
expect(typed.service).toBe('pgvector');
|
||||||
|
expect(typed.remediation).toContain('pgvector/pgvector:pg17');
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------- */
|
||||||
|
/* 9. probePgvector — permission / other error remediation */
|
||||||
|
/* ---------------------------------------------------------------- */
|
||||||
|
|
||||||
|
it('mentions CREATE permission or superuser in remediation for a generic pgvector error', async () => {
|
||||||
|
// SELECT 1 succeeds; CREATE EXTENSION fails with a permission error (not the library-missing message).
|
||||||
|
mocks.mockSqlFn
|
||||||
|
.mockResolvedValueOnce([]) // SELECT 1 (probePostgres)
|
||||||
|
.mockRejectedValueOnce(new Error('permission denied to create extension'));
|
||||||
|
|
||||||
|
try {
|
||||||
|
await detectAndAssertTier(FEDERATED_CONFIG);
|
||||||
|
expect.fail('should have thrown');
|
||||||
|
} catch (err) {
|
||||||
|
expect(err).toBeInstanceOf(TierDetectionError);
|
||||||
|
const typed = err as TierDetectionError;
|
||||||
|
expect(typed.service).toBe('pgvector');
|
||||||
|
// Must NOT point to the image fix — that's only for the library-missing case.
|
||||||
|
expect(typed.remediation).not.toContain('pgvector/pgvector:pg17');
|
||||||
|
// Must mention permissions or superuser.
|
||||||
|
expect(typed.remediation).toMatch(/CREATE|superuser/i);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------- */
|
||||||
|
/* 10. federated tier rejects non-bullmq queue.type */
|
||||||
|
/* ---------------------------------------------------------------- */
|
||||||
|
|
||||||
|
it('throws TierDetectionError with service=config for federated tier with queue.type !== bullmq', async () => {
|
||||||
|
const badConfig: MosaicConfig = {
|
||||||
|
tier: 'federated',
|
||||||
|
storage: {
|
||||||
|
type: 'postgres',
|
||||||
|
url: 'postgresql://mosaic:mosaic@db-host:5433/mosaic',
|
||||||
|
enableVector: true,
|
||||||
|
},
|
||||||
|
queue: { type: 'local', dataDir: '.mosaic/queue' },
|
||||||
|
memory: { type: 'pgvector' },
|
||||||
|
};
|
||||||
|
|
||||||
|
try {
|
||||||
|
await detectAndAssertTier(badConfig);
|
||||||
|
expect.fail('should have thrown');
|
||||||
|
} catch (err) {
|
||||||
|
expect(err).toBeInstanceOf(TierDetectionError);
|
||||||
|
const typed = err as TierDetectionError;
|
||||||
|
expect(typed.service).toBe('config');
|
||||||
|
expect(typed.remediation).toContain('bullmq');
|
||||||
|
}
|
||||||
|
|
||||||
|
// No network probes should have been attempted.
|
||||||
|
expect(mocks.mockPostgresConstructor).not.toHaveBeenCalled();
|
||||||
|
expect(mocks.MockRedis).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------- */
|
||||||
|
/* 11. Error fields populated */
|
||||||
|
/* ---------------------------------------------------------------- */
|
||||||
|
|
||||||
|
it('populates host, port, and remediation on a thrown TierDetectionError', async () => {
|
||||||
|
mocks.mockSqlFn.mockRejectedValue(new Error('connection refused'));
|
||||||
|
|
||||||
|
let caught: TierDetectionError | undefined;
|
||||||
|
try {
|
||||||
|
await detectAndAssertTier(STANDALONE_CONFIG);
|
||||||
|
} catch (err) {
|
||||||
|
caught = err as TierDetectionError;
|
||||||
|
}
|
||||||
|
|
||||||
|
expect(caught).toBeInstanceOf(TierDetectionError);
|
||||||
|
expect(caught!.service).toBe('postgres');
|
||||||
|
// Host and port are extracted from the Postgres URL in STANDALONE_CONFIG.
|
||||||
|
expect(caught!.host).toBe('db-host');
|
||||||
|
expect(caught!.port).toBe(5432);
|
||||||
|
expect(caught!.remediation).toMatch(/docker compose/i);
|
||||||
|
expect(caught!.message).toContain('db-host:5432');
|
||||||
|
});
|
||||||
|
});
|
||||||
220
apps/gateway/src/bootstrap/tier-detector.ts
Normal file
220
apps/gateway/src/bootstrap/tier-detector.ts
Normal file
@@ -0,0 +1,220 @@
|
|||||||
|
/**
|
||||||
|
* Tier Detector — pre-flight service reachability assertions.
|
||||||
|
*
|
||||||
|
* Runs BEFORE NestFactory.create() to surface actionable errors immediately
|
||||||
|
* rather than crashing mid-boot with an opaque stack trace.
|
||||||
|
*
|
||||||
|
* Library choices:
|
||||||
|
* - Postgres: `postgres` npm package (already a dep via @mosaicstack/db / drizzle-orm).
|
||||||
|
* The spec mentions `pg`, but only `postgres` is installed in this monorepo.
|
||||||
|
* - Valkey: `ioredis` (already a dep via @mosaicstack/queue → bullmq; same URL
|
||||||
|
* convention used by the bullmq adapter).
|
||||||
|
*/
|
||||||
|
|
||||||
|
import postgres from 'postgres';
|
||||||
|
import { Redis } from 'ioredis';
|
||||||
|
import type { MosaicConfig } from '@mosaicstack/config';
|
||||||
|
|
||||||
|
/* ------------------------------------------------------------------ */
|
||||||
|
/* Structured error type */
|
||||||
|
/* ------------------------------------------------------------------ */
|
||||||
|
|
||||||
|
export class TierDetectionError extends Error {
|
||||||
|
public readonly service: 'postgres' | 'valkey' | 'pgvector' | 'config';
|
||||||
|
public readonly host: string;
|
||||||
|
public readonly port: number;
|
||||||
|
public readonly remediation: string;
|
||||||
|
|
||||||
|
constructor(opts: {
|
||||||
|
service: 'postgres' | 'valkey' | 'pgvector' | 'config';
|
||||||
|
host: string;
|
||||||
|
port: number;
|
||||||
|
remediation: string;
|
||||||
|
cause?: unknown;
|
||||||
|
}) {
|
||||||
|
const message =
|
||||||
|
`[tier-detector] ${opts.service} unreachable or unusable at ` +
|
||||||
|
`${opts.host}:${opts.port} — ${opts.remediation}`;
|
||||||
|
super(message, { cause: opts.cause });
|
||||||
|
this.name = 'TierDetectionError';
|
||||||
|
this.service = opts.service;
|
||||||
|
this.host = opts.host;
|
||||||
|
this.port = opts.port;
|
||||||
|
this.remediation = opts.remediation;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ------------------------------------------------------------------ */
|
||||||
|
/* URL helpers */
|
||||||
|
/* ------------------------------------------------------------------ */
|
||||||
|
|
||||||
|
/** Extract host and port from a URL string, returning safe fallbacks on parse failure. */
|
||||||
|
function parseHostPort(url: string, defaultPort: number): { host: string; port: number } {
|
||||||
|
try {
|
||||||
|
const parsed = new URL(url);
|
||||||
|
const host = parsed.hostname || 'unknown';
|
||||||
|
const port = parsed.port ? parseInt(parsed.port, 10) : defaultPort;
|
||||||
|
return { host, port };
|
||||||
|
} catch {
|
||||||
|
return { host: 'unknown', port: defaultPort };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ------------------------------------------------------------------ */
|
||||||
|
/* Postgres probe */
|
||||||
|
/* ------------------------------------------------------------------ */
|
||||||
|
|
||||||
|
async function probePostgres(url: string): Promise<void> {
|
||||||
|
const { host, port } = parseHostPort(url, 5432);
|
||||||
|
let sql: ReturnType<typeof postgres> | undefined;
|
||||||
|
try {
|
||||||
|
sql = postgres(url, {
|
||||||
|
max: 1,
|
||||||
|
connect_timeout: 5,
|
||||||
|
idle_timeout: 5,
|
||||||
|
});
|
||||||
|
// Run a trivial query to confirm connectivity.
|
||||||
|
await sql`SELECT 1`;
|
||||||
|
} catch (cause) {
|
||||||
|
throw new TierDetectionError({
|
||||||
|
service: 'postgres',
|
||||||
|
host,
|
||||||
|
port,
|
||||||
|
remediation:
|
||||||
|
'Start Postgres: `docker compose -f docker-compose.federated.yml --profile federated up -d postgres-federated`',
|
||||||
|
cause,
|
||||||
|
});
|
||||||
|
} finally {
|
||||||
|
if (sql) {
|
||||||
|
await sql.end({ timeout: 2 }).catch(() => {
|
||||||
|
// Ignore cleanup errors — we already have what we need.
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ------------------------------------------------------------------ */
|
||||||
|
/* pgvector probe */
|
||||||
|
/* ------------------------------------------------------------------ */
|
||||||
|
|
||||||
|
async function probePgvector(url: string): Promise<void> {
|
||||||
|
const { host, port } = parseHostPort(url, 5432);
|
||||||
|
let sql: ReturnType<typeof postgres> | undefined;
|
||||||
|
try {
|
||||||
|
sql = postgres(url, {
|
||||||
|
max: 1,
|
||||||
|
connect_timeout: 5,
|
||||||
|
idle_timeout: 5,
|
||||||
|
});
|
||||||
|
// This succeeds whether the extension is already installed or freshly created.
|
||||||
|
// It errors only if the pgvector shared library is missing from the Postgres binary.
|
||||||
|
await sql`CREATE EXTENSION IF NOT EXISTS vector`;
|
||||||
|
} catch (cause) {
|
||||||
|
const causeMsg = cause instanceof Error ? cause.message.toLowerCase() : '';
|
||||||
|
const isLibraryMissing = causeMsg.includes('extension "vector" is not available');
|
||||||
|
const remediation = isLibraryMissing
|
||||||
|
? 'Use the `pgvector/pgvector:pg17` image, not the stock `postgres:17` image. See `docker-compose.federated.yml`.'
|
||||||
|
: 'The database role lacks permission to CREATE EXTENSION. Grant `CREATE` on the database, or run as a superuser.';
|
||||||
|
throw new TierDetectionError({
|
||||||
|
service: 'pgvector',
|
||||||
|
host,
|
||||||
|
port,
|
||||||
|
remediation,
|
||||||
|
cause,
|
||||||
|
});
|
||||||
|
} finally {
|
||||||
|
if (sql) {
|
||||||
|
await sql.end({ timeout: 2 }).catch(() => {
|
||||||
|
// Ignore cleanup errors.
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ------------------------------------------------------------------ */
|
||||||
|
/* Valkey probe */
|
||||||
|
/* ------------------------------------------------------------------ */
|
||||||
|
|
||||||
|
const DEFAULT_VALKEY_URL = 'redis://localhost:6380';
|
||||||
|
|
||||||
|
async function probeValkey(url: string): Promise<void> {
|
||||||
|
const { host, port } = parseHostPort(url, 6380);
|
||||||
|
const client = new Redis(url, {
|
||||||
|
enableReadyCheck: false,
|
||||||
|
maxRetriesPerRequest: 0,
|
||||||
|
retryStrategy: () => null, // no retries — fail fast
|
||||||
|
lazyConnect: true,
|
||||||
|
connectTimeout: 5000, // fail-fast: 5-second hard cap on connection attempt
|
||||||
|
});
|
||||||
|
|
||||||
|
try {
|
||||||
|
await client.connect();
|
||||||
|
const pong = await client.ping();
|
||||||
|
if (pong !== 'PONG') {
|
||||||
|
throw new Error(`Unexpected PING response: ${pong}`);
|
||||||
|
}
|
||||||
|
} catch (cause) {
|
||||||
|
throw new TierDetectionError({
|
||||||
|
service: 'valkey',
|
||||||
|
host,
|
||||||
|
port,
|
||||||
|
remediation:
|
||||||
|
'Start Valkey: `docker compose -f docker-compose.federated.yml --profile federated up -d valkey-federated`',
|
||||||
|
cause,
|
||||||
|
});
|
||||||
|
} finally {
|
||||||
|
client.disconnect();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ------------------------------------------------------------------ */
|
||||||
|
/* Public entry point */
|
||||||
|
/* ------------------------------------------------------------------ */
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Assert that all services required by `config.tier` are reachable.
|
||||||
|
*
|
||||||
|
* - `local` — no-op (PGlite is in-process; no external services).
|
||||||
|
* - `standalone` — assert Postgres + Valkey (if queue.type === 'bullmq').
|
||||||
|
* - `federated` — assert Postgres + Valkey + pgvector installability.
|
||||||
|
*
|
||||||
|
* Throws `TierDetectionError` on the first failure with host:port and
|
||||||
|
* a remediation hint.
|
||||||
|
*/
|
||||||
|
export async function detectAndAssertTier(config: MosaicConfig): Promise<void> {
|
||||||
|
if (config.tier === 'local') {
|
||||||
|
// PGlite runs in-process — nothing to probe.
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const pgUrl =
|
||||||
|
config.storage.type === 'postgres' ? config.storage.url : 'postgresql://localhost:5432/mosaic';
|
||||||
|
|
||||||
|
const valkeyUrl =
|
||||||
|
config.queue.type === 'bullmq' ? (config.queue.url ?? DEFAULT_VALKEY_URL) : null;
|
||||||
|
|
||||||
|
if (config.tier === 'standalone') {
|
||||||
|
await probePostgres(pgUrl);
|
||||||
|
if (valkeyUrl) {
|
||||||
|
await probeValkey(valkeyUrl);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// tier === 'federated'
|
||||||
|
// Reject misconfigured queue upfront — federated requires bullmq + a Valkey URL.
|
||||||
|
if (config.queue.type !== 'bullmq') {
|
||||||
|
throw new TierDetectionError({
|
||||||
|
service: 'config',
|
||||||
|
host: 'localhost',
|
||||||
|
port: 0,
|
||||||
|
remediation:
|
||||||
|
"Federated tier requires queue.type === 'bullmq'. " +
|
||||||
|
"Set queue: { type: 'bullmq', url: 'redis://...' } in your mosaic.config.json.",
|
||||||
|
});
|
||||||
|
}
|
||||||
|
const federatedValkeyUrl = config.queue.url ?? DEFAULT_VALKEY_URL;
|
||||||
|
await probePostgres(pgUrl);
|
||||||
|
await probeValkey(federatedValkeyUrl);
|
||||||
|
await probePgvector(pgUrl);
|
||||||
|
}
|
||||||
@@ -20,10 +20,12 @@ import { Logger, ValidationPipe } from '@nestjs/common';
|
|||||||
import { FastifyAdapter, type NestFastifyApplication } from '@nestjs/platform-fastify';
|
import { FastifyAdapter, type NestFastifyApplication } from '@nestjs/platform-fastify';
|
||||||
import helmet from '@fastify/helmet';
|
import helmet from '@fastify/helmet';
|
||||||
import { listSsoStartupWarnings } from '@mosaicstack/auth';
|
import { listSsoStartupWarnings } from '@mosaicstack/auth';
|
||||||
|
import { loadConfig } from '@mosaicstack/config';
|
||||||
import { AppModule } from './app.module.js';
|
import { AppModule } from './app.module.js';
|
||||||
import { mountAuthHandler } from './auth/auth.controller.js';
|
import { mountAuthHandler } from './auth/auth.controller.js';
|
||||||
import { mountMcpHandler } from './mcp/mcp.controller.js';
|
import { mountMcpHandler } from './mcp/mcp.controller.js';
|
||||||
import { McpService } from './mcp/mcp.service.js';
|
import { McpService } from './mcp/mcp.service.js';
|
||||||
|
import { detectAndAssertTier, TierDetectionError } from './bootstrap/tier-detector.js';
|
||||||
|
|
||||||
async function bootstrap(): Promise<void> {
|
async function bootstrap(): Promise<void> {
|
||||||
const logger = new Logger('Bootstrap');
|
const logger = new Logger('Bootstrap');
|
||||||
@@ -32,6 +34,20 @@ async function bootstrap(): Promise<void> {
|
|||||||
throw new Error('BETTER_AUTH_SECRET is required');
|
throw new Error('BETTER_AUTH_SECRET is required');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Pre-flight: assert all external services required by the configured tier
|
||||||
|
// are reachable. Runs before NestFactory.create() so failures are visible
|
||||||
|
// immediately with actionable remediation hints.
|
||||||
|
const mosaicConfig = loadConfig();
|
||||||
|
try {
|
||||||
|
await detectAndAssertTier(mosaicConfig);
|
||||||
|
} catch (err) {
|
||||||
|
if (err instanceof TierDetectionError) {
|
||||||
|
logger.error(`Tier detection failed: ${err.message}`);
|
||||||
|
logger.error(`Remediation: ${err.remediation}`);
|
||||||
|
}
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
|
||||||
for (const warning of listSsoStartupWarnings()) {
|
for (const warning of listSsoStartupWarnings()) {
|
||||||
logger.warn(warning);
|
logger.warn(warning);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -19,8 +19,8 @@ Goal: Gateway runs in `federated` tier with containerized PG+pgvector+Valkey. No
|
|||||||
| --------- | ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----- | ------ | ------------------------------- | ---------- | -------- | --------------------------------------------------------------------------------------------------------------------------------- |
|
| --------- | ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----- | ------ | ------------------------------- | ---------- | -------- | --------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| FED-M1-01 | done | Extend `mosaic.config.json` schema: add `"federated"` to `tier` enum in validator + TS types. Keep `local` and `standalone` working. Update schema docs/README where referenced. | #460 | sonnet | feat/federation-m1-tier-config | — | 4K | Shipped in PR #470. Renamed `team` → `standalone`; added `team` deprecation alias; added `DEFAULT_FEDERATED_CONFIG`. |
|
| FED-M1-01 | done | Extend `mosaic.config.json` schema: add `"federated"` to `tier` enum in validator + TS types. Keep `local` and `standalone` working. Update schema docs/README where referenced. | #460 | sonnet | feat/federation-m1-tier-config | — | 4K | Shipped in PR #470. Renamed `team` → `standalone`; added `team` deprecation alias; added `DEFAULT_FEDERATED_CONFIG`. |
|
||||||
| FED-M1-02 | in-progress | Author `docker-compose.federated.yml` as an overlay profile: Postgres 17 + pgvector extension (port 5433), Valkey (6380), named volumes, healthchecks. Compose-up should boot cleanly on a clean machine. | #460 | sonnet | feat/federation-m1-compose | FED-M1-01 | 5K | Bumped PG16→PG17 to match base compose. Overlay defines distinct `postgres-federated`/`valkey-federated` services, profile-gated. |
|
| FED-M1-02 | in-progress | Author `docker-compose.federated.yml` as an overlay profile: Postgres 17 + pgvector extension (port 5433), Valkey (6380), named volumes, healthchecks. Compose-up should boot cleanly on a clean machine. | #460 | sonnet | feat/federation-m1-compose | FED-M1-01 | 5K | Bumped PG16→PG17 to match base compose. Overlay defines distinct `postgres-federated`/`valkey-federated` services, profile-gated. |
|
||||||
| FED-M1-03 | not-started | Add pgvector support to `packages/storage/src/adapters/postgres.ts`: create extension on init (idempotent), expose vector column type in schema helpers. No adapter changes for non-federated tiers. | #460 | codex | feat/federation-m1-pgvector | FED-M1-02 | 8K | Extension create is idempotent `CREATE EXTENSION IF NOT EXISTS vector`. Gate on tier = federated. |
|
| FED-M1-03 | done | Add pgvector support to `packages/storage/src/adapters/postgres.ts`: create extension on init (idempotent), expose vector column type in schema helpers. No adapter changes for non-federated tiers. | #460 | sonnet | feat/federation-m1-pgvector | FED-M1-02 | 8K | Shipped in PR #472. `enableVector` flag on postgres StorageConfig; idempotent CREATE EXTENSION before migrations. |
|
||||||
| FED-M1-04 | not-started | Implement `apps/gateway/src/bootstrap/tier-detector.ts`: reads config, asserts PG/Valkey/pgvector reachable for `federated`, fail-fast with actionable error message on failure. Unit tests for each failure mode. | #460 | codex | feat/federation-m1-detector | FED-M1-03 | 8K | Structured error type with remediation hints. Logs which service failed, with host:port attempted. |
|
| FED-M1-04 | in-progress | Implement `apps/gateway/src/bootstrap/tier-detector.ts`: reads config, asserts PG/Valkey/pgvector reachable for `federated`, fail-fast with actionable error message on failure. Unit tests for each failure mode. | #460 | sonnet | feat/federation-m1-detector | FED-M1-03 | 8K | Worker delivered; reviewer flagged 3 issues (Valkey timeout, pgvector error discrimination, federated/non-bullmq guard) — fixed. |
|
||||||
| FED-M1-05 | not-started | Write `scripts/migrate-to-federated.ts`: one-way migration from `local` (PGlite) / `standalone` (PG without pgvector) → `federated`. Dumps, transforms, loads; dry-run + confirm UX. Idempotent on re-run. | #460 | codex | feat/federation-m1-migrate | FED-M1-04 | 10K | Do NOT run automatically. CLI subcommand `mosaic migrate tier --to federated --dry-run`. Safety rails. |
|
| FED-M1-05 | not-started | Write `scripts/migrate-to-federated.ts`: one-way migration from `local` (PGlite) / `standalone` (PG without pgvector) → `federated`. Dumps, transforms, loads; dry-run + confirm UX. Idempotent on re-run. | #460 | codex | feat/federation-m1-migrate | FED-M1-04 | 10K | Do NOT run automatically. CLI subcommand `mosaic migrate tier --to federated --dry-run`. Safety rails. |
|
||||||
| FED-M1-06 | not-started | Update `mosaic doctor`: report current tier, required services, actual health per service, pgvector presence, overall green/yellow/red. Machine-readable JSON output flag for CI use. | #460 | sonnet | feat/federation-m1-doctor | FED-M1-04 | 6K | Existing doctor output evolves; add `--json` flag. Green/yellow/red + remediation suggestions per issue. |
|
| FED-M1-06 | not-started | Update `mosaic doctor`: report current tier, required services, actual health per service, pgvector presence, overall green/yellow/red. Machine-readable JSON output flag for CI use. | #460 | sonnet | feat/federation-m1-doctor | FED-M1-04 | 6K | Existing doctor output evolves; add `--json` flag. Green/yellow/red + remediation suggestions per issue. |
|
||||||
| FED-M1-07 | not-started | Integration test: gateway boots in `federated` tier with docker-compose `federated` profile; refuses to boot when PG unreachable (asserts fail-fast); pgvector extension query succeeds. | #460 | sonnet | feat/federation-m1-integration | FED-M1-04 | 8K | Vitest + docker-compose test profile. One test file per assertion; real services, no mocks. |
|
| FED-M1-07 | not-started | Integration test: gateway boots in `federated` tier with docker-compose `federated` profile; refuses to boot when PG unreachable (asserts fail-fast); pgvector extension query succeeds. | #460 | sonnet | feat/federation-m1-integration | FED-M1-04 | 8K | Vitest + docker-compose test profile. One test file per assertion; real services, no mocks. |
|
||||||
|
|||||||
@@ -343,3 +343,39 @@ Affected files (storage-tier semantics only — Team/workspace usages unaffected
|
|||||||
|
|
||||||
- `MVP-T04` (sync `.mosaic/orchestrator/mission.json`) still deferred.
|
- `MVP-T04` (sync `.mosaic/orchestrator/mission.json`) still deferred.
|
||||||
- `team` tier rename touches install wizard headless env vars (`MOSAIC_STORAGE_TIER=team`); will need 0.0.x deprecation note in scratchpad if release notes are written this milestone.
|
- `team` tier rename touches install wizard headless env vars (`MOSAIC_STORAGE_TIER=team`); will need 0.0.x deprecation note in scratchpad if release notes are written this milestone.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Session 17 — 2026-04-19 — claude
|
||||||
|
|
||||||
|
**Mode:** Delivery (W1 / FED-M1 execution; resumed after compaction)
|
||||||
|
**Branches landed this run:** `feat/federation-m1-tier-config` (PR #470), `feat/federation-m1-compose` (PR #471), `feat/federation-m1-pgvector` (PR #472)
|
||||||
|
**Branch active at end:** `feat/federation-m1-detector` (FED-M1-04, ready to push)
|
||||||
|
|
||||||
|
**Tasks closed:** FED-M1-01, FED-M1-02, FED-M1-03 (all merged to `main` via squash, CI green, issue #460 still open as milestone).
|
||||||
|
|
||||||
|
**FED-M1-04 — tier-detector:** Worker delivered `apps/gateway/src/bootstrap/tier-detector.ts` (~210 lines) + `tier-detector.spec.ts` (12 tests). Independent code review (sonnet) returned `changes-required` with 3 issues:
|
||||||
|
|
||||||
|
1. CRITICAL: `probeValkey` missing `connectTimeout: 5000` on the ioredis Redis client (defaulted to 10s, violated fail-fast spec).
|
||||||
|
2. IMPORTANT: `probePgvector` catch block did not discriminate "library not installed" (use `pgvector/pgvector:pg17`) from permission errors.
|
||||||
|
3. IMPORTANT: Federated tier silently skipped Valkey probe when `queue.type !== 'bullmq'` (computed Valkey URL conditionally).
|
||||||
|
|
||||||
|
Worker fix-up round addressed all three:
|
||||||
|
|
||||||
|
- L147: `connectTimeout: 5000` added to Redis options
|
||||||
|
- L113-117: catch block branches on `extension "vector" is not available` substring → distinct remediation per failure mode
|
||||||
|
- L206-215: federated branch fails fast with `service: 'config'` if `queue.type !== 'bullmq'`, then probes Valkey unconditionally
|
||||||
|
- 4 new tests (8 → 12 total) cover each fix specifically
|
||||||
|
|
||||||
|
Independent verifier (haiku) confirmed all 6 verification claims (line numbers, test presence, suite green: 12/12 PASS).
|
||||||
|
|
||||||
|
**Process note — review pipeline working as designed:**
|
||||||
|
|
||||||
|
Initial verifier (haiku) on the first delivery returned "OK to ship" but missed the 3 deeper issues that the sonnet code-reviewer caught. This validates the user's "always verify subagent claims independently with another subagent" rule — but specifically with the **right tier** for the task: code review needs sonnet-level reasoning, while haiku is fine for verifying surface claims (line counts, file existence) once review issues are known. Going forward: code review uses sonnet (`feature-dev:code-reviewer`), claim verification uses haiku.
|
||||||
|
|
||||||
|
**Followup tasks tracked but deferred:**
|
||||||
|
|
||||||
|
- #7: `tier=local` hardcoded in gateway-config resume branches (~262, ~317) — pre-existing bug, fix during M1-06 (doctor) or M1-09 (regression).
|
||||||
|
- #8: confirm `packages/config/dist` not git-tracked.
|
||||||
|
|
||||||
|
**Next:** PR for FED-M1-04 → CI wait → merge. Then FED-M1-05 (migration script, codex/sonnet, 10K).
|
||||||
|
|||||||
6
pnpm-lock.yaml
generated
6
pnpm-lock.yaml
generated
@@ -152,12 +152,18 @@ importers:
|
|||||||
fastify:
|
fastify:
|
||||||
specifier: ^5.0.0
|
specifier: ^5.0.0
|
||||||
version: 5.8.2
|
version: 5.8.2
|
||||||
|
ioredis:
|
||||||
|
specifier: ^5.10.0
|
||||||
|
version: 5.10.0
|
||||||
node-cron:
|
node-cron:
|
||||||
specifier: ^4.2.1
|
specifier: ^4.2.1
|
||||||
version: 4.2.1
|
version: 4.2.1
|
||||||
openai:
|
openai:
|
||||||
specifier: ^6.32.0
|
specifier: ^6.32.0
|
||||||
version: 6.32.0(ws@8.20.0)(zod@4.3.6)
|
version: 6.32.0(ws@8.20.0)(zod@4.3.6)
|
||||||
|
postgres:
|
||||||
|
specifier: ^3.4.8
|
||||||
|
version: 3.4.8
|
||||||
reflect-metadata:
|
reflect-metadata:
|
||||||
specifier: ^0.2.0
|
specifier: ^0.2.0
|
||||||
version: 0.2.2
|
version: 0.2.2
|
||||||
|
|||||||
Reference in New Issue
Block a user