fix(storage): redact credentials in driver errors + advisory lock (FED-M1-10) (#479)
This commit was merged in pull request #479.
This commit is contained in:
@@ -16,7 +16,7 @@
|
||||
Goal: Gateway runs in `federated` tier with containerized PG+pgvector+Valkey. No federation logic yet. Existing standalone behavior does not regress.
|
||||
|
||||
| id | status | description | issue | agent | branch | depends_on | estimate | notes |
|
||||
| --------- | ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----- | ------ | ------------------------------- | ---------- | -------- | ---------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| --------- | ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----- | ------ | ---------------------------------- | ---------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| FED-M1-01 | done | Extend `mosaic.config.json` schema: add `"federated"` to `tier` enum in validator + TS types. Keep `local` and `standalone` working. Update schema docs/README where referenced. | #460 | sonnet | feat/federation-m1-tier-config | — | 4K | Shipped in PR #470. Renamed `team` → `standalone`; added `team` deprecation alias; added `DEFAULT_FEDERATED_CONFIG`. |
|
||||
| FED-M1-02 | done | Author `docker-compose.federated.yml` as an overlay profile: Postgres 17 + pgvector extension (port 5433), Valkey (6380), named volumes, healthchecks. Compose-up should boot cleanly on a clean machine. | #460 | sonnet | feat/federation-m1-compose | FED-M1-01 | 5K | Shipped in PR #471. Overlay defines `postgres-federated`/`valkey-federated`, profile-gated, with pg-init for pgvector extension. |
|
||||
| FED-M1-03 | done | Add pgvector support to `packages/storage/src/adapters/postgres.ts`: create extension on init (idempotent), expose vector column type in schema helpers. No adapter changes for non-federated tiers. | #460 | sonnet | feat/federation-m1-pgvector | FED-M1-02 | 8K | Shipped in PR #472. `enableVector` flag on postgres StorageConfig; idempotent CREATE EXTENSION before migrations. |
|
||||
@@ -26,7 +26,7 @@ Goal: Gateway runs in `federated` tier with containerized PG+pgvector+Valkey. No
|
||||
| FED-M1-07 | done | Integration test: gateway boots in `federated` tier with docker-compose `federated` profile; refuses to boot when PG unreachable (asserts fail-fast); pgvector extension query succeeds. | #460 | sonnet | feat/federation-m1-integration | FED-M1-04 | 8K | Shipped in PR #476. 3 test files, 4 tests, gated by FEDERATED_INTEGRATION=1; reserved-port helper avoids host collisions. |
|
||||
| FED-M1-08 | done | Integration test for migration script: seed a local PGlite with representative data (tasks, notes, users, teams), run migration, assert row counts + key samples equal on federated PG. | #460 | sonnet | feat/federation-m1-migrate-test | FED-M1-05 | 6K | Shipped in PR #477. Caught P0 in M1-05 (camelCase→snake_case) missed by mocked unit tests; fix in same PR. |
|
||||
| FED-M1-09 | done | Standalone regression: full agent-session E2E on existing `standalone` tier with a gateway built from this branch. Must pass without referencing any federation module. | #460 | sonnet | feat/federation-m1-regression | FED-M1-07 | 4K | Clean canary. 351 gateway tests + 85 storage unit tests + full pnpm test all green; only FEDERATED_INTEGRATION-gated tests skip. |
|
||||
| FED-M1-10 | not-started | Code review pass: security-focused on the migration script (data-at-rest during migration) + tier detector (error-message sensitivity leakage). Independent reviewer, not authors of tasks 01-09. | #460 | sonnet | — | FED-M1-09 | 8K | Use `feature-dev:code-reviewer` agent. Specifically: no secrets in error messages; no partial-migration footguns. |
|
||||
| FED-M1-10 | done | Code review pass: security-focused on the migration script (data-at-rest during migration) + tier detector (error-message sensitivity leakage). Independent reviewer, not authors of tasks 01-09. | #460 | sonnet | feat/federation-m1-security-review | FED-M1-09 | 8K | 2 review rounds caught 7 issues: credential leak in pg/valkey/pgvector errors + redact-error util; missing advisory lock; SKIP_TABLES rationale. |
|
||||
| FED-M1-11 | not-started | Docs update: `docs/federation/` operator notes for tier setup; README blurb on federated tier; `docs/guides/` entry for migration. Do NOT touch runbook yet (deferred to FED-M7). | #460 | haiku | feat/federation-m1-docs | FED-M1-10 | 4K | Short, actionable. Link from MISSION-MANIFEST. No decisions captured here — those belong in PRD. |
|
||||
| FED-M1-12 | not-started | PR, CI green, merge to main, close #460. | #460 | — | (aggregate) | FED-M1-11 | 3K | Queue-guard before push; wait for green; merge squashed; tea `issue-close` #460. |
|
||||
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import type { Command } from 'commander';
|
||||
import type { MigrationSource } from './migrate-tier.js';
|
||||
import { redactErrMsg } from './redact-error.js';
|
||||
|
||||
/**
|
||||
* Reads the DATABASE_URL environment variable and redacts the password portion.
|
||||
@@ -73,7 +74,7 @@ export function registerStorageCommand(parent: Command): void {
|
||||
console.log('[storage] reachable: yes');
|
||||
} catch (err) {
|
||||
console.log(
|
||||
`[storage] reachable: no (${err instanceof Error ? err.message : String(err)})`,
|
||||
`[storage] reachable: no (${redactErrMsg(err instanceof Error ? err.message : String(err))})`,
|
||||
);
|
||||
}
|
||||
} else {
|
||||
@@ -398,7 +399,7 @@ export function registerStorageCommand(parent: Command): void {
|
||||
}
|
||||
} catch (err) {
|
||||
console.error(
|
||||
`[migrate-tier] ERROR: ${err instanceof Error ? err.message : String(err)}`,
|
||||
`[migrate-tier] ERROR: ${redactErrMsg(err instanceof Error ? err.message : String(err))}`,
|
||||
);
|
||||
process.exitCode = 1;
|
||||
} finally {
|
||||
|
||||
@@ -29,6 +29,7 @@
|
||||
import postgres from 'postgres';
|
||||
import * as schema from '@mosaicstack/db';
|
||||
import { sql as drizzleSql } from '@mosaicstack/db';
|
||||
import { redactErrMsg } from './redact-error.js';
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* Types */
|
||||
@@ -72,6 +73,20 @@ export interface MigrationTarget {
|
||||
|
||||
/** Close the target connection. */
|
||||
close(): Promise<void>;
|
||||
|
||||
/**
|
||||
* Attempt to acquire a session-level Postgres advisory lock for migrate-tier.
|
||||
* Returns true if the lock was acquired, false if another process holds it.
|
||||
* Targets that do not support advisory locks (e.g. test mocks) may omit this
|
||||
* by not implementing the method — the caller skips locking gracefully.
|
||||
*/
|
||||
tryAcquireAdvisoryLock?(): Promise<boolean>;
|
||||
|
||||
/**
|
||||
* Release the session-level advisory lock acquired by tryAcquireAdvisoryLock.
|
||||
* Must be called in a finally block.
|
||||
*/
|
||||
releaseAdvisoryLock?(): Promise<void>;
|
||||
}
|
||||
|
||||
export interface MigrateTierOptions {
|
||||
@@ -107,9 +122,28 @@ export interface MigrateTierResult {
|
||||
/**
|
||||
* SKIP_TABLES: ephemeral or environment-specific tables not worth migrating.
|
||||
*
|
||||
* - sessions: TTL'd auth sessions — invalid in new environment.
|
||||
* - verifications: one-time tokens (email verify, etc.) — already expired.
|
||||
* - admin_tokens: hashed tokens bound to old environment keys — re-issue.
|
||||
* WHY these tables are skipped:
|
||||
* - sessions: TTL'd auth sessions — they are invalid in the new environment
|
||||
* and would immediately expire or fail JWT verification anyway.
|
||||
* - verifications: one-time tokens (email verify, password-reset links, etc.)
|
||||
* — they have already expired or been consumed; re-sending is
|
||||
* the correct action on the new environment.
|
||||
* - admin_tokens: hashed tokens bound to the old environment's secret keys —
|
||||
* the hash is environment-specific and must be re-issued on
|
||||
* the target.
|
||||
*
|
||||
* WHY these tables are NOT skipped (intentionally migrated):
|
||||
* - accounts (OAuth tokens): durable credentials bound to the user's identity,
|
||||
* not to the deployment environment. OAuth tokens survive environment changes
|
||||
* and should follow the user to the federated tier.
|
||||
* - provider_credentials (AI provider keys): durable, user-owned API keys for
|
||||
* AI providers (e.g. OpenAI, Anthropic). These are bound to the user, not
|
||||
* the server, and must be preserved so AI features work immediately after
|
||||
* migration.
|
||||
*
|
||||
* OPERATOR NOTE: If migrating to a shared or multi-tenant federated tier, review
|
||||
* whether `accounts` and `provider_credentials` should be wiped post-migration
|
||||
* to prevent unintended cross-tenant credential exposure.
|
||||
*/
|
||||
export const SKIP_TABLES = new Set(['sessions', 'verifications', 'admin_tokens']);
|
||||
|
||||
@@ -482,6 +516,33 @@ export class PostgresMigrationTarget implements MigrationTarget {
|
||||
return rows.length > 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempt to acquire a non-blocking session-level Postgres advisory lock
|
||||
* keyed by hashtext('mosaic-migrate-tier'). Returns true if acquired,
|
||||
* false if another session already holds the lock.
|
||||
*
|
||||
* The lock is session-scoped: it is automatically released when the
|
||||
* connection closes, and also explicitly released via releaseAdvisoryLock().
|
||||
*/
|
||||
async tryAcquireAdvisoryLock(): Promise<boolean> {
|
||||
const rows = await this.sql`
|
||||
SELECT pg_try_advisory_lock(hashtext('mosaic-migrate-tier')) AS acquired
|
||||
`;
|
||||
const row = rows[0] as { acquired: boolean } | undefined;
|
||||
return row?.acquired ?? false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Release the session-level advisory lock previously acquired by
|
||||
* tryAcquireAdvisoryLock(). Safe to call even if the lock was not held
|
||||
* (pg_advisory_unlock returns false but does not throw).
|
||||
*/
|
||||
async releaseAdvisoryLock(): Promise<void> {
|
||||
await this.sql`
|
||||
SELECT pg_advisory_unlock(hashtext('mosaic-migrate-tier'))
|
||||
`;
|
||||
}
|
||||
|
||||
async close(): Promise<void> {
|
||||
await this.sql.end();
|
||||
}
|
||||
@@ -647,6 +708,24 @@ export async function runMigrateTier(
|
||||
return { tables, totalRows: 0, dryRun: true };
|
||||
}
|
||||
|
||||
// Acquire a Postgres advisory lock on the target BEFORE checking preconditions
|
||||
// so that two concurrent invocations cannot both pass the non-empty guard and
|
||||
// race each other. Use non-blocking pg_try_advisory_lock so we fail fast
|
||||
// instead of deadlocking.
|
||||
//
|
||||
// Targets that don't implement tryAcquireAdvisoryLock (e.g. test mocks) skip
|
||||
// this step — the optional chaining guard handles that case.
|
||||
const lockAcquired = target.tryAcquireAdvisoryLock ? await target.tryAcquireAdvisoryLock() : true; // mocks / test doubles — no locking needed
|
||||
|
||||
if (!lockAcquired) {
|
||||
throw new Error(
|
||||
'Another migrate-tier process is already running against this target. ' +
|
||||
'Wait for it to complete or check for stuck locks via ' +
|
||||
"SELECT * FROM pg_locks WHERE locktype='advisory'.",
|
||||
);
|
||||
}
|
||||
|
||||
try {
|
||||
// Check preconditions before writing.
|
||||
await checkTargetPreconditions(target, allowNonEmpty, tablesToMigrate);
|
||||
|
||||
@@ -686,7 +765,7 @@ export async function runMigrateTier(
|
||||
);
|
||||
}
|
||||
} catch (err) {
|
||||
const errMsg = err instanceof Error ? err.message : String(err);
|
||||
const errMsg = redactErrMsg(err instanceof Error ? err.message : String(err));
|
||||
throw new Error(
|
||||
`[migrate-tier] Failed on table "${table}" after ${tableTotal.toString()} rows ` +
|
||||
`(last id: ${lastSuccessfulId ?? 'none'}). Error: ${errMsg}\n` +
|
||||
@@ -711,4 +790,10 @@ export async function runMigrateTier(
|
||||
|
||||
onProgress(`[migrate-tier] Complete. ${totalRows.toString()} total rows migrated.`);
|
||||
return { tables: results, totalRows, dryRun: false };
|
||||
} finally {
|
||||
// Release the advisory lock regardless of success or failure.
|
||||
if (target.releaseAdvisoryLock) {
|
||||
await target.releaseAdvisoryLock();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
62
packages/storage/src/redact-error.test.ts
Normal file
62
packages/storage/src/redact-error.test.ts
Normal file
@@ -0,0 +1,62 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { redactErrMsg } from './redact-error.js';
|
||||
|
||||
describe('redactErrMsg', () => {
|
||||
it('redacts user:password from a postgres:// URL embedded in an error message', () => {
|
||||
const msg = 'connect ECONNREFUSED postgres://admin:s3cr3t@db.example.com:5432/mosaic';
|
||||
expect(redactErrMsg(msg)).toBe(
|
||||
'connect ECONNREFUSED postgres://***@db.example.com:5432/mosaic',
|
||||
);
|
||||
});
|
||||
|
||||
it('redacts user:password from a postgresql:// URL', () => {
|
||||
const msg = 'connection failed: postgresql://myuser:mypass@localhost:5432/testdb';
|
||||
expect(redactErrMsg(msg)).toBe('connection failed: postgresql://***@localhost:5432/testdb');
|
||||
});
|
||||
|
||||
it('handles URLs with no password (user only) — still redacts userinfo', () => {
|
||||
const msg = 'error postgres://justuser@host:5432/db';
|
||||
expect(redactErrMsg(msg)).toBe('error postgres://***@host:5432/db');
|
||||
});
|
||||
|
||||
it('returns the original message unchanged when no connection URL is present', () => {
|
||||
const msg = 'connection timed out after 5 seconds';
|
||||
expect(redactErrMsg(msg)).toBe('connection timed out after 5 seconds');
|
||||
});
|
||||
|
||||
it('is case-insensitive for the scheme (scheme is normalized to lowercase in output)', () => {
|
||||
// The regex replacement uses a lowercase literal, so the matched scheme is
|
||||
// replaced with the lowercase form regardless of the original casing.
|
||||
const msg = 'POSTGRES://admin:pass@host:5432/db';
|
||||
expect(redactErrMsg(msg)).toBe('postgres://***@host:5432/db');
|
||||
});
|
||||
|
||||
it('redacts multiple URLs in a single message', () => {
|
||||
const msg = 'src postgres://u:p@host1/db1 dst postgresql://v:q@host2/db2';
|
||||
expect(redactErrMsg(msg)).toBe('src postgres://***@host1/db1 dst postgresql://***@host2/db2');
|
||||
});
|
||||
|
||||
it('does not alter a message with a postgres URL that has no userinfo', () => {
|
||||
// No userinfo component — pattern does not match, message unchanged.
|
||||
const msg = 'error at postgres://host:5432/db';
|
||||
expect(redactErrMsg(msg)).toBe('error at postgres://host:5432/db');
|
||||
});
|
||||
|
||||
it('redacts user:password from a redis:// URL', () => {
|
||||
const msg = 'connect ECONNREFUSED redis://user:pass@host:6379';
|
||||
expect(redactErrMsg(msg)).toBe('connect ECONNREFUSED redis://***@host:6379');
|
||||
});
|
||||
|
||||
it('redacts user:password from a rediss:// URL (TLS)', () => {
|
||||
const msg = 'connect ECONNREFUSED rediss://user:pass@host:6379';
|
||||
expect(redactErrMsg(msg)).toBe('connect ECONNREFUSED rediss://***@host:6379');
|
||||
});
|
||||
|
||||
it('redacts both a postgres URL and a redis URL in the same message', () => {
|
||||
const msg =
|
||||
'primary postgres://admin:s3cr3t@db:5432/mosaic cache redis://cacheuser:cachepass@cache:6379';
|
||||
expect(redactErrMsg(msg)).toBe(
|
||||
'primary postgres://***@db:5432/mosaic cache redis://***@cache:6379',
|
||||
);
|
||||
});
|
||||
});
|
||||
39
packages/storage/src/redact-error.ts
Normal file
39
packages/storage/src/redact-error.ts
Normal file
@@ -0,0 +1,39 @@
|
||||
/**
|
||||
* redact-error.ts — Internal credential-scrubbing helper.
|
||||
*
|
||||
* The `postgres` npm package can embed the full DSN (including the password)
|
||||
* in connection-failure error messages. This module provides a single helper
|
||||
* that strips the user:password portion from any such message before it is
|
||||
* re-thrown, logged, or surfaced in a structured health report.
|
||||
*
|
||||
* This file is intentionally NOT re-exported from the package index — it is
|
||||
* an internal utility for use within packages/storage/src only.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Redacts credentials from error messages that may include connection URLs.
|
||||
* The `postgres` npm package can embed the full DSN in connection-failure
|
||||
* messages, and ioredis can embed `redis://` / `rediss://` URLs similarly.
|
||||
* This helper strips the user:password portion before display.
|
||||
*
|
||||
* Handles `postgres://`, `postgresql://`, `redis://`, and `rediss://`
|
||||
* schemes (case-insensitive). Everything between `://` and `@` (the userinfo
|
||||
* component) is replaced with `***` so that the host, port, and database name
|
||||
* remain visible for diagnostics while the secret is never written to logs or
|
||||
* CI output.
|
||||
*
|
||||
* @example
|
||||
* redactErrMsg('connect ECONNREFUSED postgres://admin:s3cr3t@db:5432/mosaic')
|
||||
* // → 'connect ECONNREFUSED postgres://***@db:5432/mosaic'
|
||||
*
|
||||
* redactErrMsg('connect ECONNREFUSED redis://user:pass@cache:6379')
|
||||
* // → 'connect ECONNREFUSED redis://***@cache:6379'
|
||||
*/
|
||||
const CREDENTIAL_URL_RE = /(postgres(?:ql)?|rediss?):\/\/[^@\s]*@/gi;
|
||||
|
||||
export function redactErrMsg(msg: string): string {
|
||||
return msg.replace(
|
||||
CREDENTIAL_URL_RE,
|
||||
(_match, scheme: string) => `${scheme.toLowerCase()}://***@`,
|
||||
);
|
||||
}
|
||||
@@ -12,6 +12,7 @@
|
||||
|
||||
import postgres from 'postgres';
|
||||
import { Redis } from 'ioredis';
|
||||
import { redactErrMsg } from './redact-error.js';
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* Local structural type — avoids circular dependency */
|
||||
@@ -159,7 +160,7 @@ async function probePostgresMeasured(url: string): Promise<ProbeResult> {
|
||||
port,
|
||||
durationMs: Date.now() - start,
|
||||
error: {
|
||||
message: cause instanceof Error ? cause.message : String(cause),
|
||||
message: redactErrMsg(cause instanceof Error ? cause.message : String(cause)),
|
||||
remediation:
|
||||
'Start Postgres: `docker compose -f docker-compose.federated.yml --profile federated up -d postgres-federated`',
|
||||
},
|
||||
@@ -231,7 +232,10 @@ async function probePgvectorMeasured(url: string): Promise<ProbeResult> {
|
||||
host,
|
||||
port,
|
||||
durationMs: Date.now() - start,
|
||||
error: { message: cause instanceof Error ? cause.message : String(cause), remediation },
|
||||
error: {
|
||||
message: redactErrMsg(cause instanceof Error ? cause.message : String(cause)),
|
||||
remediation,
|
||||
},
|
||||
};
|
||||
} finally {
|
||||
if (sql) {
|
||||
@@ -299,7 +303,7 @@ async function probeValkeyMeasured(url: string): Promise<ProbeResult> {
|
||||
port,
|
||||
durationMs: Date.now() - start,
|
||||
error: {
|
||||
message: cause instanceof Error ? cause.message : String(cause),
|
||||
message: redactErrMsg(cause instanceof Error ? cause.message : String(cause)),
|
||||
remediation:
|
||||
'Start Valkey: `docker compose -f docker-compose.federated.yml --profile federated up -d valkey-federated`',
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user