feat(storage): mosaic storage migrate-tier with dry-run + idempotency (FED-M1-05) (#474)
This commit was merged in pull request #474.
This commit is contained in:
697
packages/storage/src/migrate-tier.ts
Normal file
697
packages/storage/src/migrate-tier.ts
Normal file
@@ -0,0 +1,697 @@
|
||||
/**
|
||||
* migrate-tier.ts — Core logic for `mosaic storage migrate-tier`.
|
||||
*
|
||||
* Migrates data from `tier: local` (PGlite, normalized Drizzle schema) or
|
||||
* `tier: standalone` (Postgres without pgvector) → `tier: federated`
|
||||
* (Postgres + pgvector).
|
||||
*
|
||||
* Source: DrizzleMigrationSource — reads from the NORMALIZED Drizzle/relational
|
||||
* schema tables (not the flat `id TEXT + data JSONB` PgliteAdapter schema).
|
||||
* Both local (PGlite) and standalone (Postgres) sources use the same Drizzle
|
||||
* abstraction via createPgliteDb() or createDb() from @mosaicstack/db.
|
||||
* Target: PostgresMigrationTarget — upserts via raw SQL into the same schema.
|
||||
*
|
||||
* Key design decisions:
|
||||
* - Tables are migrated in topological (FK-dependency) order so that
|
||||
* parent rows exist before child rows are inserted.
|
||||
* - sessions + verifications are skipped — they are ephemeral / TTL'd.
|
||||
* - adminTokens is skipped — token hashes are environment-specific
|
||||
* and should be re-issued on the target.
|
||||
* - insights.embedding is omitted from source SELECT when the source lacks
|
||||
* pgvector (local/PGlite tier); target insert gets NULL for that column.
|
||||
* insights.embedding is nullable per schema (no .notNull() constraint).
|
||||
* - Each table's batch is wrapped in a transaction for atomicity.
|
||||
* - Upsert semantics (ON CONFLICT DO UPDATE) make re-runs idempotent.
|
||||
*
|
||||
* TODO (FED-M1-08): Add integration tests against real PGlite → real PG.
|
||||
*/
|
||||
|
||||
import postgres from 'postgres';
|
||||
import * as schema from '@mosaicstack/db';
|
||||
import { sql as drizzleSql } from '@mosaicstack/db';
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* Types */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
export interface MigrationSource {
|
||||
/**
|
||||
* Return all rows from a table (normalized Drizzle schema rows).
|
||||
* When sourceHasVector is false and the table has a vector column,
|
||||
* the source MUST omit the vector column from the result and the
|
||||
* caller will set it to null (see normaliseSourceRow).
|
||||
*/
|
||||
readTable(
|
||||
tableName: string,
|
||||
opts?: { limit?: number; offset?: number },
|
||||
): Promise<Record<string, unknown>[]>;
|
||||
|
||||
/** Count rows in a table. */
|
||||
count(tableName: string): Promise<number>;
|
||||
|
||||
/** Close the source connection. */
|
||||
close(): Promise<void>;
|
||||
}
|
||||
|
||||
export interface MigrationTarget {
|
||||
/**
|
||||
* Upsert a batch of rows into a table.
|
||||
* Must use ON CONFLICT (id) DO UPDATE semantics.
|
||||
*/
|
||||
upsertBatch(tableName: string, rows: Record<string, unknown>[]): Promise<void>;
|
||||
|
||||
/**
|
||||
* Count rows in a target table.
|
||||
*/
|
||||
count(tableName: string): Promise<number>;
|
||||
|
||||
/**
|
||||
* Check whether pgvector extension is installed.
|
||||
*/
|
||||
hasPgvector(): Promise<boolean>;
|
||||
|
||||
/** Close the target connection. */
|
||||
close(): Promise<void>;
|
||||
}
|
||||
|
||||
export interface MigrateTierOptions {
|
||||
/** Target postgres connection URL. */
|
||||
targetUrl: string;
|
||||
/** Whether to skip all writes (dry-run). */
|
||||
dryRun: boolean;
|
||||
/** Skip the non-empty target guard. */
|
||||
allowNonEmpty: boolean;
|
||||
/** Rows per transaction batch. */
|
||||
batchSize: number;
|
||||
/** Called with progress messages. */
|
||||
onProgress: (msg: string) => void;
|
||||
}
|
||||
|
||||
export interface TableMigrationResult {
|
||||
table: string;
|
||||
rowsMigrated: number;
|
||||
skipped: boolean;
|
||||
skipReason?: string;
|
||||
}
|
||||
|
||||
export interface MigrateTierResult {
|
||||
tables: TableMigrationResult[];
|
||||
totalRows: number;
|
||||
dryRun: boolean;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* Schema: FK-aware topological table order */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
/**
|
||||
* SKIP_TABLES: ephemeral or environment-specific tables not worth migrating.
|
||||
*
|
||||
* - sessions: TTL'd auth sessions — invalid in new environment.
|
||||
* - verifications: one-time tokens (email verify, etc.) — already expired.
|
||||
* - admin_tokens: hashed tokens bound to old environment keys — re-issue.
|
||||
*/
|
||||
export const SKIP_TABLES = new Set(['sessions', 'verifications', 'admin_tokens']);
|
||||
|
||||
/**
|
||||
* Topologically ordered table list (parents before children).
|
||||
*
|
||||
* Derived from FK references in packages/db/src/schema.ts:
|
||||
*
|
||||
* users (no FKs)
|
||||
* teams → users
|
||||
* team_members → teams, users
|
||||
* accounts → users
|
||||
* projects → users, teams
|
||||
* agents → projects, users
|
||||
* missions → projects, users
|
||||
* tasks → projects, missions
|
||||
* mission_tasks → missions, tasks, users
|
||||
* conversations → users, projects, agents
|
||||
* messages → conversations
|
||||
* preferences → users
|
||||
* insights → users [has embedding vector column]
|
||||
* agent_logs → users
|
||||
* skills → users (installedBy, nullable)
|
||||
* routing_rules → users (userId, nullable)
|
||||
* provider_credentials → users
|
||||
* appreciations (no FKs)
|
||||
* events (no FKs)
|
||||
* tickets (no FKs)
|
||||
* summarization_jobs (no FKs)
|
||||
*
|
||||
* Skipped (not in this list):
|
||||
* sessions → users (ephemeral)
|
||||
* verifications (no FKs, ephemeral)
|
||||
* admin_tokens → users (environment-specific)
|
||||
*/
|
||||
export const MIGRATION_ORDER: string[] = [
|
||||
'users',
|
||||
'teams',
|
||||
'team_members',
|
||||
'accounts',
|
||||
'projects',
|
||||
'agents',
|
||||
'missions',
|
||||
'tasks',
|
||||
'mission_tasks',
|
||||
'conversations',
|
||||
'messages',
|
||||
'preferences',
|
||||
'insights',
|
||||
'agent_logs',
|
||||
'skills',
|
||||
'routing_rules',
|
||||
'provider_credentials',
|
||||
'appreciations',
|
||||
'events',
|
||||
'tickets',
|
||||
'summarization_jobs',
|
||||
];
|
||||
|
||||
/** Tables that carry a vector embedding column on the target. */
|
||||
const VECTOR_TABLES = new Set(['insights']);
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* Utility: derive topological order from an adjacency list */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
/**
|
||||
* Given an adjacency list (table → list of tables it depends on),
|
||||
* return a valid topological ordering (Kahn's algorithm).
|
||||
*
|
||||
* Exposed for unit testing.
|
||||
*/
|
||||
export function topoSort(deps: Map<string, string[]>): string[] {
|
||||
const nodes = [...deps.keys()];
|
||||
const inDegree = new Map<string, number>();
|
||||
const adjReverse = new Map<string, string[]>();
|
||||
|
||||
for (const node of nodes) {
|
||||
if (!inDegree.has(node)) inDegree.set(node, 0);
|
||||
if (!adjReverse.has(node)) adjReverse.set(node, []);
|
||||
for (const dep of deps.get(node) ?? []) {
|
||||
inDegree.set(node, (inDegree.get(node) ?? 0) + 1);
|
||||
if (!adjReverse.has(dep)) adjReverse.set(dep, []);
|
||||
adjReverse.get(dep)!.push(node);
|
||||
}
|
||||
}
|
||||
|
||||
// Start with nodes that have no dependencies.
|
||||
const queue: string[] = [];
|
||||
for (const [node, deg] of inDegree) {
|
||||
if (deg === 0) queue.push(node);
|
||||
}
|
||||
|
||||
const result: string[] = [];
|
||||
while (queue.length > 0) {
|
||||
const node = queue.shift()!;
|
||||
result.push(node);
|
||||
for (const dependent of adjReverse.get(node) ?? []) {
|
||||
const newDeg = (inDegree.get(dependent) ?? 0) - 1;
|
||||
inDegree.set(dependent, newDeg);
|
||||
if (newDeg === 0) queue.push(dependent);
|
||||
}
|
||||
}
|
||||
|
||||
if (result.length !== nodes.length) {
|
||||
throw new Error('Cycle detected in FK dependency graph');
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the migration table order, excluding SKIP_TABLES.
|
||||
* Uses the pre-computed MIGRATION_ORDER (verified against schema.ts).
|
||||
*/
|
||||
export function getMigrationOrder(): string[] {
|
||||
return MIGRATION_ORDER.filter((t) => !SKIP_TABLES.has(t));
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* TABLE_OBJECTS: migration table name → Drizzle table object */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
/**
|
||||
* Maps MIGRATION_ORDER table names to their corresponding Drizzle table
|
||||
* objects from the normalized schema. Used by DrizzleMigrationSource to
|
||||
* execute typed `db.select().from(table)` queries.
|
||||
*
|
||||
* Keyed by snake_case table name (matching MIGRATION_ORDER + SKIP_TABLES).
|
||||
*/
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const TABLE_OBJECTS: Record<string, any> = {
|
||||
users: schema.users,
|
||||
teams: schema.teams,
|
||||
team_members: schema.teamMembers,
|
||||
accounts: schema.accounts,
|
||||
projects: schema.projects,
|
||||
agents: schema.agents,
|
||||
missions: schema.missions,
|
||||
tasks: schema.tasks,
|
||||
mission_tasks: schema.missionTasks,
|
||||
conversations: schema.conversations,
|
||||
messages: schema.messages,
|
||||
preferences: schema.preferences,
|
||||
insights: schema.insights,
|
||||
agent_logs: schema.agentLogs,
|
||||
skills: schema.skills,
|
||||
routing_rules: schema.routingRules,
|
||||
provider_credentials: schema.providerCredentials,
|
||||
appreciations: schema.appreciations,
|
||||
events: schema.events,
|
||||
tickets: schema.tickets,
|
||||
summarization_jobs: schema.summarizationJobs,
|
||||
// Skipped tables — included so count() works for preflight but never passed
|
||||
// to upsertBatch.
|
||||
sessions: schema.sessions,
|
||||
verifications: schema.verifications,
|
||||
admin_tokens: schema.adminTokens,
|
||||
};
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* DrizzleMigrationSource */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
/**
|
||||
* MigrationSource backed by a Drizzle DB handle (works with both
|
||||
* PostgresJsDatabase and PgliteDatabase — they share the same Drizzle
|
||||
* query API for schema-defined tables).
|
||||
*
|
||||
* For the `insights` table (the only vector-column table), when the source
|
||||
* lacks pgvector (local/PGlite tier), the `embedding` column is excluded
|
||||
* from the SELECT projection via a raw `db.execute()` query that lists
|
||||
* only non-vector columns. This prevents a type-registration error from
|
||||
* PGlite, which does not know the `vector` type. The caller (runMigrateTier
|
||||
* via normaliseSourceRow) will set embedding to null on the resulting rows.
|
||||
*
|
||||
* Column projection is opt-in: pass `sourceHasVector: false` to activate it.
|
||||
*/
|
||||
export class DrizzleMigrationSource implements MigrationSource {
|
||||
constructor(
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
private readonly db: any,
|
||||
private readonly sourceHasVector: boolean = true,
|
||||
) {}
|
||||
|
||||
/**
|
||||
* Columns of the insights table that do NOT include the vector embedding.
|
||||
* Used for the no-pgvector projection path.
|
||||
*/
|
||||
private static readonly INSIGHTS_COLUMNS_NO_VECTOR = [
|
||||
'id',
|
||||
'user_id',
|
||||
'content',
|
||||
'source',
|
||||
'category',
|
||||
'relevance_score',
|
||||
'metadata',
|
||||
'created_at',
|
||||
'updated_at',
|
||||
'decayed_at',
|
||||
] as const;
|
||||
|
||||
async readTable(
|
||||
tableName: string,
|
||||
opts?: { limit?: number; offset?: number },
|
||||
): Promise<Record<string, unknown>[]> {
|
||||
const table = TABLE_OBJECTS[tableName];
|
||||
if (!table) throw new Error(`DrizzleMigrationSource: unknown table "${tableName}"`);
|
||||
|
||||
// For vector tables when source lacks pgvector: use column-allowlist raw query
|
||||
// to avoid type-registration errors.
|
||||
if (VECTOR_TABLES.has(tableName) && !this.sourceHasVector) {
|
||||
const cols = DrizzleMigrationSource.INSIGHTS_COLUMNS_NO_VECTOR.map((c) => `"${c}"`).join(
|
||||
', ',
|
||||
);
|
||||
let sql = `SELECT ${cols} FROM "${tableName}"`;
|
||||
const params: unknown[] = [];
|
||||
if (opts?.limit !== undefined) {
|
||||
params.push(opts.limit);
|
||||
sql += ` LIMIT $${params.length.toString()}`;
|
||||
}
|
||||
if (opts?.offset !== undefined) {
|
||||
params.push(opts.offset);
|
||||
sql += ` OFFSET $${params.length.toString()}`;
|
||||
}
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const result = await (this.db as any).execute(
|
||||
// drizzle-orm/pglite and drizzle-orm/postgres-js both accept a raw
|
||||
// SQL template; use the tagged-template sql helper from drizzle-orm.
|
||||
// Since we need dynamic params, we use db.execute with a raw string
|
||||
// via the PGlite/postgres.js driver directly.
|
||||
{ sql, params, typings: [] },
|
||||
);
|
||||
// drizzle execute returns { rows: unknown[][] } for PGlite driver,
|
||||
// or a RowList for postgres.js. Normalise both shapes.
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const raw = result as any;
|
||||
if (Array.isArray(raw)) {
|
||||
// postgres.js shape: array of row objects
|
||||
return raw as Record<string, unknown>[];
|
||||
}
|
||||
if (raw?.rows && Array.isArray(raw.rows)) {
|
||||
// PGlite shape: { rows: unknown[][] } OR { rows: Record<string,unknown>[] }
|
||||
const rows = raw.rows as unknown[];
|
||||
if (rows.length === 0) return [];
|
||||
if (Array.isArray(rows[0])) {
|
||||
// Columnar: convert to objects using fields array if available
|
||||
const fields: string[] =
|
||||
(raw.fields as Array<{ name: string }> | undefined)?.map((f) => f.name) ??
|
||||
DrizzleMigrationSource.INSIGHTS_COLUMNS_NO_VECTOR.slice();
|
||||
return (rows as unknown[][]).map((row) => {
|
||||
const obj: Record<string, unknown> = {};
|
||||
for (let i = 0; i < fields.length; i++) {
|
||||
obj[fields[i]!] = row[i];
|
||||
}
|
||||
return obj;
|
||||
});
|
||||
}
|
||||
return rows as Record<string, unknown>[];
|
||||
}
|
||||
return [];
|
||||
}
|
||||
|
||||
// Standard Drizzle select for all other tables (and vector tables when
|
||||
// the source has pgvector registered).
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
let query = (this.db as any).select().from(table);
|
||||
if (opts?.limit !== undefined) query = query.limit(opts.limit);
|
||||
if (opts?.offset !== undefined) query = query.offset(opts.offset);
|
||||
return (await query) as Record<string, unknown>[];
|
||||
}
|
||||
|
||||
async count(tableName: string): Promise<number> {
|
||||
const table = TABLE_OBJECTS[tableName];
|
||||
if (!table) throw new Error(`DrizzleMigrationSource: unknown table "${tableName}"`);
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const [row] = await (this.db as any)
|
||||
.select({ n: drizzleSql<number>`COUNT(*)::int` })
|
||||
.from(table);
|
||||
return (row as { n: number } | undefined)?.n ?? 0;
|
||||
}
|
||||
|
||||
async close(): Promise<void> {
|
||||
// Lifecycle managed externally — caller closes the db handle.
|
||||
}
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* Real postgres target adapter */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
/**
|
||||
* Live implementation of MigrationTarget backed by a real Postgres connection.
|
||||
* Used by the CLI; mocked in tests.
|
||||
*/
|
||||
export class PostgresMigrationTarget implements MigrationTarget {
|
||||
private sql: ReturnType<typeof postgres>;
|
||||
|
||||
constructor(url: string) {
|
||||
this.sql = postgres(url, {
|
||||
max: 5,
|
||||
connect_timeout: 10,
|
||||
idle_timeout: 30,
|
||||
});
|
||||
}
|
||||
|
||||
async upsertBatch(tableName: string, rows: Record<string, unknown>[]): Promise<void> {
|
||||
if (rows.length === 0) return;
|
||||
|
||||
// Collect all column names from the batch (union of all row keys).
|
||||
const colSet = new Set<string>();
|
||||
for (const row of rows) {
|
||||
for (const k of Object.keys(row)) colSet.add(k);
|
||||
}
|
||||
const cols = [...colSet];
|
||||
if (!cols.includes('id')) {
|
||||
throw new Error(`Table ${tableName}: rows missing 'id' column`);
|
||||
}
|
||||
|
||||
// Build VALUES list — use postgres tagged-template helpers for safety.
|
||||
// postgres.js supports bulk inserts via array of objects.
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
await this.sql.begin(async (tx: any) => {
|
||||
// Insert in chunks to avoid enormous single queries.
|
||||
for (let i = 0; i < rows.length; i += 500) {
|
||||
const chunk = rows.slice(i, i + 500);
|
||||
|
||||
// Normalise rows: fill missing columns with null.
|
||||
const normalised = chunk.map((row) => {
|
||||
const out: Record<string, unknown> = {};
|
||||
for (const col of cols) {
|
||||
out[col] = row[col] ?? null;
|
||||
}
|
||||
return out;
|
||||
});
|
||||
|
||||
const colList = cols.map((c) => `"${c}"`).join(', ');
|
||||
const updateList = cols
|
||||
.filter((c) => c !== 'id')
|
||||
.map((c) => `"${c}" = EXCLUDED."${c}"`)
|
||||
.join(', ');
|
||||
|
||||
// Build values placeholders
|
||||
const valuePlaceholders = normalised
|
||||
.map((_, ri) => `(${cols.map((_, ci) => `$${ri * cols.length + ci + 1}`).join(', ')})`)
|
||||
.join(', ');
|
||||
|
||||
const flatValues = normalised.flatMap((row) => cols.map((c) => row[c] ?? null));
|
||||
|
||||
const query = `
|
||||
INSERT INTO "${tableName}" (${colList})
|
||||
VALUES ${valuePlaceholders}
|
||||
ON CONFLICT (id) DO UPDATE SET ${updateList}
|
||||
`;
|
||||
|
||||
await tx.unsafe(query, flatValues as never[]);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
async count(tableName: string): Promise<number> {
|
||||
const rows = await this.sql.unsafe(`SELECT COUNT(*)::int AS n FROM "${tableName}"`);
|
||||
const row = rows[0] as unknown as { n: number } | undefined;
|
||||
return row?.n ?? 0;
|
||||
}
|
||||
|
||||
async hasPgvector(): Promise<boolean> {
|
||||
const rows = await this.sql`
|
||||
SELECT 1 FROM pg_extension WHERE extname = 'vector'
|
||||
`;
|
||||
return rows.length > 0;
|
||||
}
|
||||
|
||||
async close(): Promise<void> {
|
||||
await this.sql.end();
|
||||
}
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* Source-row normalisation */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
/**
|
||||
* Drizzle returns rows as camelCase TypeScript objects (e.g. `userId`, not
|
||||
* `user_id`). The PostgresMigrationTarget upserts via raw SQL and uses the
|
||||
* column names as given — the `insights` no-vector path uses snake_case column
|
||||
* aliases in the SELECT, so those rows already arrive as snake_case.
|
||||
*
|
||||
* For vector tables (insights), if `embedding` is absent from the source row
|
||||
* (because DrizzleMigrationSource omitted it in the no-vector projection), we
|
||||
* explicitly set it to null so the target ON CONFLICT UPDATE doesn't error.
|
||||
*
|
||||
* NOTE: insights.embedding is defined as `vector('embedding', { dimensions:
|
||||
* 1536 })` with no `.notNull()` in schema.ts — it accepts NULL.
|
||||
*/
|
||||
export function normaliseSourceRow(
|
||||
tableName: string,
|
||||
row: Record<string, unknown>,
|
||||
sourceHasVector: boolean,
|
||||
): Record<string, unknown> {
|
||||
const out = { ...row };
|
||||
|
||||
if (VECTOR_TABLES.has(tableName) && !sourceHasVector) {
|
||||
// Source cannot have embeddings — explicitly null them so ON CONFLICT
|
||||
// UPDATE doesn't try to write undefined.
|
||||
out['embedding'] = null;
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* Precondition checks */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
export class MigrationPreconditionError extends Error {
|
||||
constructor(
|
||||
message: string,
|
||||
public readonly remediation: string,
|
||||
) {
|
||||
super(message);
|
||||
this.name = 'MigrationPreconditionError';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Verify target preconditions before writing any data.
|
||||
*
|
||||
* Checks:
|
||||
* 1. pgvector extension installed.
|
||||
* 2. User-data tables are empty (unless --allow-non-empty).
|
||||
*/
|
||||
export async function checkTargetPreconditions(
|
||||
target: MigrationTarget,
|
||||
allowNonEmpty: boolean,
|
||||
tablesToMigrate: string[],
|
||||
): Promise<void> {
|
||||
const hasVector = await target.hasPgvector();
|
||||
if (!hasVector) {
|
||||
throw new MigrationPreconditionError(
|
||||
'Target Postgres does not have the pgvector extension installed.',
|
||||
'Run: CREATE EXTENSION IF NOT EXISTS vector; — or use the pgvector/pgvector:pg17 Docker image.',
|
||||
);
|
||||
}
|
||||
|
||||
if (!allowNonEmpty) {
|
||||
// Check the first non-empty user-data table.
|
||||
for (const table of tablesToMigrate) {
|
||||
const n = await target.count(table);
|
||||
if (n > 0) {
|
||||
throw new MigrationPreconditionError(
|
||||
`Target table "${table}" already contains ${n.toString()} rows.`,
|
||||
'Pass --allow-non-empty to overwrite existing data (upsert semantics), ' +
|
||||
'or point to an empty target database.',
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* Core migration runner */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
/**
|
||||
* Run the tier migration.
|
||||
*
|
||||
* @param source Adapter for reading source rows.
|
||||
* @param target Adapter for writing rows to target.
|
||||
* @param opts Migration options.
|
||||
* @param sourceHasVector True if the source tier supports vector columns.
|
||||
*/
|
||||
export async function runMigrateTier(
|
||||
source: MigrationSource,
|
||||
target: MigrationTarget,
|
||||
opts: MigrateTierOptions,
|
||||
sourceHasVector = false,
|
||||
): Promise<MigrateTierResult> {
|
||||
const { dryRun, allowNonEmpty, batchSize, onProgress } = opts;
|
||||
|
||||
const tablesToMigrate = getMigrationOrder();
|
||||
|
||||
// Preflight: gather row counts from source.
|
||||
onProgress('[migrate-tier] Gathering source row counts...');
|
||||
const sourceCounts = new Map<string, number>();
|
||||
for (const table of tablesToMigrate) {
|
||||
const n = await source.count(table);
|
||||
sourceCounts.set(table, n);
|
||||
}
|
||||
|
||||
// Log preflight summary.
|
||||
onProgress('[migrate-tier] Tables to migrate:');
|
||||
for (const table of tablesToMigrate) {
|
||||
const n = sourceCounts.get(table) ?? 0;
|
||||
onProgress(` ${table}: ${n.toString()} rows`);
|
||||
}
|
||||
for (const skipped of SKIP_TABLES) {
|
||||
onProgress(` ${skipped}: SKIPPED (ephemeral/environment-specific)`);
|
||||
}
|
||||
|
||||
// Vector column notice.
|
||||
if (!sourceHasVector) {
|
||||
onProgress(
|
||||
'[migrate-tier] NOTE: Source tier has no pgvector support. ' +
|
||||
'insights.embedding will be NULL on all migrated rows.',
|
||||
);
|
||||
}
|
||||
|
||||
if (dryRun) {
|
||||
onProgress('[migrate-tier] DRY RUN — no writes will be made.');
|
||||
const tables: TableMigrationResult[] = tablesToMigrate.map((t) => ({
|
||||
table: t,
|
||||
rowsMigrated: 0,
|
||||
skipped: false,
|
||||
}));
|
||||
for (const skipped of SKIP_TABLES) {
|
||||
tables.push({ table: skipped, rowsMigrated: 0, skipped: true, skipReason: 'ephemeral' });
|
||||
}
|
||||
return { tables, totalRows: 0, dryRun: true };
|
||||
}
|
||||
|
||||
// Check preconditions before writing.
|
||||
await checkTargetPreconditions(target, allowNonEmpty, tablesToMigrate);
|
||||
|
||||
const results: TableMigrationResult[] = [];
|
||||
let totalRows = 0;
|
||||
|
||||
for (const table of tablesToMigrate) {
|
||||
const sourceCount = sourceCounts.get(table) ?? 0;
|
||||
|
||||
if (sourceCount === 0) {
|
||||
onProgress(`[migrate-tier] ${table}: 0 rows — skipping.`);
|
||||
results.push({ table, rowsMigrated: 0, skipped: false });
|
||||
continue;
|
||||
}
|
||||
|
||||
onProgress(`[migrate-tier] ${table}: migrating ${sourceCount.toString()} rows...`);
|
||||
|
||||
let offset = 0;
|
||||
let tableTotal = 0;
|
||||
let lastSuccessfulId: string | undefined;
|
||||
|
||||
try {
|
||||
while (offset < sourceCount) {
|
||||
const rows = await source.readTable(table, { limit: batchSize, offset });
|
||||
if (rows.length === 0) break;
|
||||
|
||||
const normalised = rows.map((row) => normaliseSourceRow(table, row, sourceHasVector));
|
||||
|
||||
await target.upsertBatch(table, normalised);
|
||||
|
||||
lastSuccessfulId = rows[rows.length - 1]?.['id'] as string | undefined;
|
||||
tableTotal += rows.length;
|
||||
offset += rows.length;
|
||||
|
||||
onProgress(
|
||||
`[migrate-tier] ${table}: ${tableTotal.toString()}/${sourceCount.toString()} rows written`,
|
||||
);
|
||||
}
|
||||
} catch (err) {
|
||||
const errMsg = err instanceof Error ? err.message : String(err);
|
||||
throw new Error(
|
||||
`[migrate-tier] Failed on table "${table}" after ${tableTotal.toString()} rows ` +
|
||||
`(last id: ${lastSuccessfulId ?? 'none'}). Error: ${errMsg}\n` +
|
||||
`Remediation: Re-run with --allow-non-empty to resume (upsert is idempotent).`,
|
||||
);
|
||||
}
|
||||
|
||||
results.push({ table, rowsMigrated: tableTotal, skipped: false });
|
||||
totalRows += tableTotal;
|
||||
onProgress(`[migrate-tier] ${table}: done (${tableTotal.toString()} rows).`);
|
||||
}
|
||||
|
||||
// Add skipped table records.
|
||||
for (const skipped of SKIP_TABLES) {
|
||||
results.push({
|
||||
table: skipped,
|
||||
rowsMigrated: 0,
|
||||
skipped: true,
|
||||
skipReason: 'ephemeral or environment-specific — re-issue on target',
|
||||
});
|
||||
}
|
||||
|
||||
onProgress(`[migrate-tier] Complete. ${totalRows.toString()} total rows migrated.`);
|
||||
return { tables: results, totalRows, dryRun: false };
|
||||
}
|
||||
Reference in New Issue
Block a user