Files
stack/packages/storage/src/migrate-tier.spec.ts
jason.woltje ccad30dd27
All checks were successful
ci/woodpecker/push/ci Pipeline was successful
ci/woodpecker/push/publish Pipeline was successful
feat(storage): mosaic storage migrate-tier with dry-run + idempotency (FED-M1-05) (#474)
2026-04-20 00:35:08 +00:00

496 lines
17 KiB
TypeScript

/**
* migrate-tier.spec.ts — Unit tests for the migrate-tier core logic.
*
* These are pure unit tests — no real database connections.
* FED-M1-08 will add integration tests against real services.
*/
import { describe, it, expect, vi } from 'vitest';
import {
getMigrationOrder,
topoSort,
runMigrateTier,
checkTargetPreconditions,
normaliseSourceRow,
SKIP_TABLES,
MigrationPreconditionError,
type MigrationSource,
type MigrationTarget,
} from './migrate-tier.js';
/* ------------------------------------------------------------------ */
/* Mock factories */
/* ------------------------------------------------------------------ */
/**
* Build a mock MigrationSource backed by an in-memory table map.
* Implements the DrizzleMigrationSource-shaped contract:
* - readTable(tableName, opts?) returns paginated rows
* - count(tableName) returns row count
*
* The `sourceHasVector` flag controls whether the mock simulates the
* no-pgvector projection: when false and tableName is 'insights', rows
* are returned WITHOUT the 'embedding' field (matching DrizzleMigrationSource
* behaviour for local/PGlite sources).
*/
function makeMockSource(
data: Record<string, Record<string, unknown>[]>,
sourceHasVector = true,
): MigrationSource & {
readTableCalls: Array<{ table: string; opts?: { limit?: number; offset?: number } }>;
} {
const readTableCalls: Array<{ table: string; opts?: { limit?: number; offset?: number } }> = [];
return {
readTableCalls,
readTable: vi.fn(async (tableName: string, opts?: { limit?: number; offset?: number }) => {
readTableCalls.push({ table: tableName, opts });
let rows = data[tableName] ?? [];
// Simulate no-vector projection: omit 'embedding' from insights rows
// when sourceHasVector is false (matches DrizzleMigrationSource behaviour).
if (tableName === 'insights' && !sourceHasVector) {
rows = rows.map(({ embedding: _omit, ...rest }) => rest);
}
const offset = opts?.offset ?? 0;
const limit = opts?.limit ?? rows.length;
return rows.slice(offset, offset + limit);
}),
count: vi.fn(async (tableName: string) => (data[tableName] ?? []).length),
close: vi.fn(async () => undefined),
};
}
function makeMockTarget(opts?: {
hasPgvector?: boolean;
nonEmptyTable?: string;
}): MigrationTarget & { upsertCalls: Array<{ table: string; rows: Record<string, unknown>[] }> } {
const upsertCalls: Array<{ table: string; rows: Record<string, unknown>[] }> = [];
const storedCounts: Record<string, number> = {};
return {
upsertCalls,
upsertBatch: vi.fn(async (table: string, rows: Record<string, unknown>[]) => {
upsertCalls.push({ table, rows });
storedCounts[table] = (storedCounts[table] ?? 0) + rows.length;
}),
count: vi.fn(async (table: string) => {
if (opts?.nonEmptyTable === table) return 5;
return storedCounts[table] ?? 0;
}),
hasPgvector: vi.fn(async () => opts?.hasPgvector ?? true),
close: vi.fn(async () => undefined),
};
}
function noopProgress(): (msg: string) => void {
return () => undefined;
}
/* ------------------------------------------------------------------ */
/* 1. Topological ordering */
/* ------------------------------------------------------------------ */
describe('topoSort', () => {
it('returns empty array for empty input', () => {
expect(topoSort(new Map())).toEqual([]);
});
it('orders parents before children — linear chain', () => {
// users -> teams -> messages
const deps = new Map([
['users', []],
['teams', ['users']],
['messages', ['teams']],
]);
const order = topoSort(deps);
expect(order.indexOf('users')).toBeLessThan(order.indexOf('teams'));
expect(order.indexOf('teams')).toBeLessThan(order.indexOf('messages'));
});
it('orders parents before children — diamond graph', () => {
// a -> (b, c) -> d
const deps = new Map([
['a', []],
['b', ['a']],
['c', ['a']],
['d', ['b', 'c']],
]);
const order = topoSort(deps);
expect(order.indexOf('a')).toBeLessThan(order.indexOf('b'));
expect(order.indexOf('a')).toBeLessThan(order.indexOf('c'));
expect(order.indexOf('b')).toBeLessThan(order.indexOf('d'));
expect(order.indexOf('c')).toBeLessThan(order.indexOf('d'));
});
it('throws on cyclic dependencies', () => {
const deps = new Map([
['a', ['b']],
['b', ['a']],
]);
expect(() => topoSort(deps)).toThrow('Cycle detected');
});
});
/* ------------------------------------------------------------------ */
/* 2. getMigrationOrder — sessions / verifications excluded */
/* ------------------------------------------------------------------ */
describe('getMigrationOrder', () => {
it('does not include "sessions"', () => {
expect(getMigrationOrder()).not.toContain('sessions');
});
it('does not include "verifications"', () => {
expect(getMigrationOrder()).not.toContain('verifications');
});
it('does not include "admin_tokens"', () => {
expect(getMigrationOrder()).not.toContain('admin_tokens');
});
it('includes "users" before "teams"', () => {
const order = getMigrationOrder();
expect(order.indexOf('users')).toBeLessThan(order.indexOf('teams'));
});
it('includes "users" before "conversations"', () => {
const order = getMigrationOrder();
expect(order.indexOf('users')).toBeLessThan(order.indexOf('conversations'));
});
it('includes "conversations" before "messages"', () => {
const order = getMigrationOrder();
expect(order.indexOf('conversations')).toBeLessThan(order.indexOf('messages'));
});
it('includes "projects" before "agents"', () => {
const order = getMigrationOrder();
expect(order.indexOf('projects')).toBeLessThan(order.indexOf('agents'));
});
it('includes "agents" before "conversations"', () => {
const order = getMigrationOrder();
expect(order.indexOf('agents')).toBeLessThan(order.indexOf('conversations'));
});
it('includes "missions" before "mission_tasks"', () => {
const order = getMigrationOrder();
expect(order.indexOf('missions')).toBeLessThan(order.indexOf('mission_tasks'));
});
it('includes all expected tables', () => {
const order = getMigrationOrder();
const expected = [
'users',
'teams',
'accounts',
'projects',
'agents',
'conversations',
'messages',
'insights',
];
for (const t of expected) {
expect(order).toContain(t);
}
});
});
/* ------------------------------------------------------------------ */
/* 3. Dry-run makes no writes */
/* ------------------------------------------------------------------ */
describe('runMigrateTier — dry-run', () => {
it('makes no calls to upsertBatch', async () => {
const source = makeMockSource({
users: [{ id: 'u1', name: 'Alice', email: 'alice@example.com' }],
});
const target = makeMockTarget();
const result = await runMigrateTier(source, target, {
targetUrl: 'postgresql://localhost/test',
dryRun: true,
allowNonEmpty: false,
batchSize: 100,
onProgress: noopProgress(),
});
expect(target.upsertCalls).toHaveLength(0);
expect(result.dryRun).toBe(true);
expect(result.totalRows).toBe(0);
});
it('does not call checkTargetPreconditions in dry-run', async () => {
// Even if hasPgvector is false, dry-run should not throw.
const source = makeMockSource({});
const target = makeMockTarget({ hasPgvector: false });
await expect(
runMigrateTier(source, target, {
targetUrl: 'postgresql://localhost/test',
dryRun: true,
allowNonEmpty: false,
batchSize: 100,
onProgress: noopProgress(),
}),
).resolves.not.toThrow();
// hasPgvector should NOT have been called during dry run.
expect(target.hasPgvector).not.toHaveBeenCalled();
});
});
/* ------------------------------------------------------------------ */
/* 4. Idempotency */
/* ------------------------------------------------------------------ */
describe('runMigrateTier — idempotency', () => {
it('produces the same logical row count on second run (upsert semantics)', async () => {
const userData = [
{ id: 'u1', name: 'Alice', email: 'alice@example.com' },
{ id: 'u2', name: 'Bob', email: 'bob@example.com' },
];
const source = makeMockSource({ users: userData });
// First run target.
const target1 = makeMockTarget();
await runMigrateTier(source, target1, {
targetUrl: 'postgresql://localhost/test',
dryRun: false,
allowNonEmpty: false,
batchSize: 100,
onProgress: noopProgress(),
});
const firstRunUpserts = target1.upsertCalls.filter((c) => c.table === 'users');
const firstRunRows = firstRunUpserts.reduce((acc, c) => acc + c.rows.length, 0);
// Second run — allowNonEmpty because first run already wrote rows.
const target2 = makeMockTarget();
await runMigrateTier(source, target2, {
targetUrl: 'postgresql://localhost/test',
dryRun: false,
allowNonEmpty: true,
batchSize: 100,
onProgress: noopProgress(),
});
const secondRunUpserts = target2.upsertCalls.filter((c) => c.table === 'users');
const secondRunRows = secondRunUpserts.reduce((acc, c) => acc + c.rows.length, 0);
// Both runs write the same number of rows (upsert — second run updates in place).
expect(firstRunRows).toBe(userData.length);
expect(secondRunRows).toBe(userData.length);
});
});
/* ------------------------------------------------------------------ */
/* 5. Empty-target precondition */
/* ------------------------------------------------------------------ */
describe('checkTargetPreconditions', () => {
it('throws when target table is non-empty and allowNonEmpty is false', async () => {
const target = makeMockTarget({ nonEmptyTable: 'users' });
await expect(checkTargetPreconditions(target, false, ['users'])).rejects.toThrow(
MigrationPreconditionError,
);
});
it('includes remediation hint in thrown error', async () => {
const target = makeMockTarget({ nonEmptyTable: 'users' });
await expect(checkTargetPreconditions(target, false, ['users'])).rejects.toMatchObject({
name: 'MigrationPreconditionError',
remediation: expect.stringContaining('--allow-non-empty'),
});
});
it('does NOT throw when allowNonEmpty is true', async () => {
const target = makeMockTarget({ nonEmptyTable: 'users' });
await expect(checkTargetPreconditions(target, true, ['users'])).resolves.not.toThrow();
});
it('throws when pgvector extension is missing', async () => {
const target = makeMockTarget({ hasPgvector: false });
await expect(checkTargetPreconditions(target, false, ['users'])).rejects.toMatchObject({
name: 'MigrationPreconditionError',
remediation: expect.stringContaining('pgvector'),
});
});
it('passes when target is empty and pgvector is present', async () => {
const target = makeMockTarget({ hasPgvector: true });
await expect(checkTargetPreconditions(target, false, ['users'])).resolves.not.toThrow();
});
});
/* ------------------------------------------------------------------ */
/* 6. Skipped tables documented */
/* ------------------------------------------------------------------ */
describe('SKIP_TABLES', () => {
it('includes "sessions"', () => {
expect(SKIP_TABLES.has('sessions')).toBe(true);
});
it('includes "verifications"', () => {
expect(SKIP_TABLES.has('verifications')).toBe(true);
});
it('includes "admin_tokens"', () => {
expect(SKIP_TABLES.has('admin_tokens')).toBe(true);
});
it('migration result includes skipped table entries', async () => {
const source = makeMockSource({});
const target = makeMockTarget();
const result = await runMigrateTier(source, target, {
targetUrl: 'postgresql://localhost/test',
dryRun: false,
allowNonEmpty: false,
batchSize: 100,
onProgress: noopProgress(),
});
const skippedNames = result.tables.filter((t) => t.skipped).map((t) => t.table);
expect(skippedNames).toContain('sessions');
expect(skippedNames).toContain('verifications');
expect(skippedNames).toContain('admin_tokens');
});
});
/* ------------------------------------------------------------------ */
/* 7. Embedding NULL on migrate from non-pgvector source */
/* ------------------------------------------------------------------ */
describe('normaliseSourceRow — embedding handling', () => {
it('sets embedding to null when sourceHasVector is false and table is insights', () => {
const row: Record<string, unknown> = {
id: 'ins-1',
content: 'Some insight',
userId: 'u1',
};
const normalised = normaliseSourceRow('insights', row, false);
expect(normalised['embedding']).toBeNull();
});
it('preserves existing embedding when sourceHasVector is true', () => {
const embedding = [0.1, 0.2, 0.3];
const row: Record<string, unknown> = {
id: 'ins-1',
content: 'Some insight',
userId: 'u1',
embedding,
};
const normalised = normaliseSourceRow('insights', row, true);
expect(normalised['embedding']).toBe(embedding);
});
it('does not add embedding field to non-vector tables', () => {
const row: Record<string, unknown> = { id: 'u1', name: 'Alice' };
const normalised = normaliseSourceRow('users', row, false);
expect('embedding' in normalised).toBe(false);
});
it('passes through rows for non-vector tables unchanged', () => {
const row: Record<string, unknown> = { id: 'u1', name: 'Alice', email: 'alice@test.com' };
const normalised = normaliseSourceRow('users', row, false);
expect(normalised).toEqual(row);
});
});
/* ------------------------------------------------------------------ */
/* 8. End-to-end: correct order of upsert calls */
/* ------------------------------------------------------------------ */
describe('runMigrateTier — migration order', () => {
it('writes users before messages', async () => {
const source = makeMockSource({
users: [{ id: 'u1', name: 'Alice', email: 'alice@test.com' }],
messages: [{ id: 'm1', conversationId: 'c1', role: 'user', content: 'Hi' }],
});
const target = makeMockTarget();
await runMigrateTier(source, target, {
targetUrl: 'postgresql://localhost/test',
dryRun: false,
allowNonEmpty: false,
batchSize: 100,
onProgress: noopProgress(),
});
const tableOrder = target.upsertCalls.map((c) => c.table);
const usersIdx = tableOrder.indexOf('users');
const messagesIdx = tableOrder.indexOf('messages');
// users must appear before messages in the upsert call sequence.
expect(usersIdx).toBeGreaterThanOrEqual(0);
expect(messagesIdx).toBeGreaterThanOrEqual(0);
expect(usersIdx).toBeLessThan(messagesIdx);
});
});
/* ------------------------------------------------------------------ */
/* 9. Embedding-null projection: no-pgvector source */
/* ------------------------------------------------------------------ */
describe('DrizzleMigrationSource embedding-null projection', () => {
it(
'when sourceHasVector is false, readTable for insights omits embedding column ' +
'and normaliseSourceRow sets it to null for the target insert',
async () => {
// Source has insights data but no vector — embedding omitted at read time.
const insightRowWithEmbedding = {
id: 'ins-1',
userId: 'u1',
content: 'Test insight',
embedding: [0.1, 0.2, 0.3], // present in raw data but omitted by source
source: 'agent',
category: 'general',
relevanceScore: 1.0,
};
// makeMockSource with sourceHasVector=false simulates DrizzleMigrationSource
// behaviour: the embedding field is stripped from the returned row.
const source = makeMockSource(
{
users: [{ id: 'u1', name: 'Alice', email: 'alice@test.com' }],
insights: [insightRowWithEmbedding],
},
/* sourceHasVector= */ false,
);
const target = makeMockTarget();
await runMigrateTier(
source,
target,
{
targetUrl: 'postgresql://localhost/test',
dryRun: false,
allowNonEmpty: false,
batchSize: 100,
onProgress: noopProgress(),
},
/* sourceHasVector= */ false,
);
// Assert: readTable was called for insights
const insightsRead = source.readTableCalls.find((c) => c.table === 'insights');
expect(insightsRead).toBeDefined();
// Assert: the upsert to insights has embedding === null (not the original vector)
const insightsUpsert = target.upsertCalls.find((c) => c.table === 'insights');
expect(insightsUpsert).toBeDefined();
const upsertedRow = insightsUpsert!.rows[0];
expect(upsertedRow).toBeDefined();
// embedding must be null — not the original [0.1, 0.2, 0.3]
expect(upsertedRow!['embedding']).toBeNull();
// Other fields must pass through unchanged
expect(upsertedRow!['id']).toBe('ins-1');
expect(upsertedRow!['content']).toBe('Test insight');
},
);
});