candle-annotator/scripts/migrate-sqlite-to-postgres.ts
Marko Djordjevic bfe437857b feat: add Python migration script and successfully test SQLite to PostgreSQL data migration
- Created scripts/migrate-sqlite-to-postgres.py as alternative to TypeScript version
- Handles all type conversions: timestamps, booleans, and JSONB fields
- Successfully migrated all 2,836 rows from SQLite to PostgreSQL
- Verified data integrity: all 6 tables migrated correctly
- Charts: 1, Candles: 2,592, Annotations: 4, Span annotations: 223
2026-02-17 14:01:21 +01:00

494 lines
14 KiB
TypeScript
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env tsx
/**
* SQLite to PostgreSQL Migration Script
*
* Migrates data from the legacy SQLite database to PostgreSQL.
*
* Features:
* - Migrates all 6 tables: charts, candles, annotation_types, annotations, span_label_types, span_annotations
* - Applies type conversions: integer timestamps → PostgreSQL timestamps, integer booleans → booleans, text JSON → jsonb
* - Idempotent: Can be run multiple times safely (skips existing data by default)
* - Supports --clear flag to delete all data before migrating
*
* Usage:
* npm run migrate:sqlite-to-postgres # Migrate (skip existing)
* npm run migrate:sqlite-to-postgres -- --clear # Clear and re-migrate
* npm run migrate:sqlite-to-postgres -- --help # Show help
*/
import Database from 'better-sqlite3';
import { drizzle as drizzlePg } from 'drizzle-orm/node-postgres';
import { Pool } from 'pg';
import { sql } from 'drizzle-orm';
import * as schema from '../src/lib/db/schema';
// Command-line arguments
const args = process.argv.slice(2);
const shouldClear = args.includes('--clear');
const showHelp = args.includes('--help') || args.includes('-h');
if (showHelp) {
console.log(`
SQLite to PostgreSQL Migration Script
Usage:
npm run migrate:sqlite-to-postgres # Migrate (skip existing data)
npm run migrate:sqlite-to-postgres -- --clear # Clear all data before migrating
npm run migrate:sqlite-to-postgres -- --help # Show this help
Environment Variables:
DATABASE_PATH (default: ./data/candles.db) # SQLite database path
DATABASE_URL # PostgreSQL connection string
`);
process.exit(0);
}
// Configuration
const SQLITE_PATH = process.env.DATABASE_PATH || './data/candles.db';
const POSTGRES_URL = process.env.DATABASE_URL;
if (!POSTGRES_URL) {
console.error('ERROR: DATABASE_URL environment variable is required');
process.exit(1);
}
console.log('='.repeat(60));
console.log('SQLite to PostgreSQL Migration');
console.log('='.repeat(60));
console.log(`SQLite source: ${SQLITE_PATH}`);
console.log(`PostgreSQL target: ${POSTGRES_URL.replace(/:[^:@]+@/, ':****@')}`);
console.log(`Mode: ${shouldClear ? 'CLEAR AND MIGRATE' : 'SKIP EXISTING'}`);
console.log('='.repeat(60));
console.log();
// Initialize databases
const sqlite = new Database(SQLITE_PATH, { readonly: true });
const pgPool = new Pool({ connectionString: POSTGRES_URL });
const pg = drizzlePg(pgPool, { schema });
interface MigrationStats {
table: string;
sourceCount: number;
migratedCount: number;
skippedCount: number;
errorCount: number;
}
const stats: MigrationStats[] = [];
/**
* Convert SQLite integer timestamp (Unix seconds) to JavaScript Date
*/
function sqliteTimestampToDate(timestamp: number | null): Date | null {
if (!timestamp) return null;
return new Date(timestamp * 1000);
}
/**
* Convert SQLite integer boolean (0/1) to JavaScript boolean
*/
function sqliteBooleanToBoolean(value: number | null): boolean {
return value === 1;
}
/**
* Parse SQLite JSON text to object
*/
function sqliteJsonToObject(json: string | null): any {
if (!json) return null;
try {
return JSON.parse(json);
} catch (e) {
console.warn('Failed to parse JSON:', json);
return null;
}
}
/**
* Clear all data from PostgreSQL tables
*/
async function clearPostgresData() {
console.log('Clearing PostgreSQL data...');
const tables = [
'span_annotations',
'annotations',
'candles',
'span_label_types',
'annotation_types',
'charts',
];
for (const table of tables) {
await pgPool.query(`DELETE FROM ${table}`);
console.log(` Cleared table: ${table}`);
}
console.log('All tables cleared.\n');
}
/**
* Migrate charts table
*/
async function migrateCharts() {
const tableName = 'charts';
console.log(`Migrating ${tableName}...`);
const rows = sqlite.prepare('SELECT * FROM charts').all() as any[];
let migrated = 0;
let skipped = 0;
let errors = 0;
for (const row of rows) {
try {
// Check if already exists
if (!shouldClear) {
const existing = await pg.query.charts.findFirst({
where: sql`${schema.charts.id} = ${row.id}`,
});
if (existing) {
skipped++;
continue;
}
}
await pg.insert(schema.charts).values({
id: row.id,
name: row.name,
created_at: sqliteTimestampToDate(row.created_at),
});
migrated++;
} catch (e: any) {
console.error(` Error migrating chart ${row.id}:`, e.message);
errors++;
}
}
stats.push({ table: tableName, sourceCount: rows.length, migratedCount: migrated, skippedCount: skipped, errorCount: errors });
console.log(` ${tableName}: ${migrated} migrated, ${skipped} skipped, ${errors} errors\n`);
}
/**
* Migrate candles table
*/
async function migrateCandles() {
const tableName = 'candles';
console.log(`Migrating ${tableName}...`);
const rows = sqlite.prepare('SELECT * FROM candles').all() as any[];
let migrated = 0;
let skipped = 0;
let errors = 0;
for (const row of rows) {
try {
// Check if already exists
if (!shouldClear) {
const existing = await pg.query.candles.findFirst({
where: sql`${schema.candles.id} = ${row.id}`,
});
if (existing) {
skipped++;
continue;
}
}
await pg.insert(schema.candles).values({
id: row.id,
chart_id: row.chart_id,
time: sqliteTimestampToDate(row.time),
open: row.open,
high: row.high,
low: row.low,
close: row.close,
});
migrated++;
} catch (e: any) {
console.error(` Error migrating candle ${row.id}:`, e.message);
errors++;
}
}
stats.push({ table: tableName, sourceCount: rows.length, migratedCount: migrated, skippedCount: skipped, errorCount: errors });
console.log(` ${tableName}: ${migrated} migrated, ${skipped} skipped, ${errors} errors\n`);
}
/**
* Migrate annotation_types table
*/
async function migrateAnnotationTypes() {
const tableName = 'annotation_types';
console.log(`Migrating ${tableName}...`);
const rows = sqlite.prepare('SELECT * FROM annotation_types').all() as any[];
let migrated = 0;
let skipped = 0;
let errors = 0;
for (const row of rows) {
try {
// Check if already exists
if (!shouldClear) {
const existing = await pg.query.annotationTypes.findFirst({
where: sql`${schema.annotationTypes.id} = ${row.id}`,
});
if (existing) {
skipped++;
continue;
}
}
await pg.insert(schema.annotationTypes).values({
id: row.id,
name: row.name,
display_name: row.display_name,
color: row.color,
category: row.category,
icon: row.icon,
is_active: sqliteBooleanToBoolean(row.is_active),
created_at: sqliteTimestampToDate(row.created_at),
});
migrated++;
} catch (e: any) {
console.error(` Error migrating annotation_type ${row.id}:`, e.message);
errors++;
}
}
stats.push({ table: tableName, sourceCount: rows.length, migratedCount: migrated, skippedCount: skipped, errorCount: errors });
console.log(` ${tableName}: ${migrated} migrated, ${skipped} skipped, ${errors} errors\n`);
}
/**
* Migrate annotations table
*/
async function migrateAnnotations() {
const tableName = 'annotations';
console.log(`Migrating ${tableName}...`);
const rows = sqlite.prepare('SELECT * FROM annotations').all() as any[];
let migrated = 0;
let skipped = 0;
let errors = 0;
for (const row of rows) {
try {
// Check if already exists
if (!shouldClear) {
const existing = await pg.query.annotations.findFirst({
where: sql`${schema.annotations.id} = ${row.id}`,
});
if (existing) {
skipped++;
continue;
}
}
await pg.insert(schema.annotations).values({
id: row.id,
chart_id: row.chart_id,
timestamp: sqliteTimestampToDate(row.timestamp),
label_type: row.label_type,
geometry: sqliteJsonToObject(row.geometry),
color: row.color || '#3b82f6',
created_at: sqliteTimestampToDate(row.created_at),
});
migrated++;
} catch (e: any) {
console.error(` Error migrating annotation ${row.id}:`, e.message);
errors++;
}
}
stats.push({ table: tableName, sourceCount: rows.length, migratedCount: migrated, skippedCount: skipped, errorCount: errors });
console.log(` ${tableName}: ${migrated} migrated, ${skipped} skipped, ${errors} errors\n`);
}
/**
* Migrate span_label_types table
*/
async function migrateSpanLabelTypes() {
const tableName = 'span_label_types';
console.log(`Migrating ${tableName}...`);
const rows = sqlite.prepare('SELECT * FROM span_label_types').all() as any[];
let migrated = 0;
let skipped = 0;
let errors = 0;
for (const row of rows) {
try {
// Check if already exists
if (!shouldClear) {
const existing = await pg.query.spanLabelTypes.findFirst({
where: sql`${schema.spanLabelTypes.id} = ${row.id}`,
});
if (existing) {
skipped++;
continue;
}
}
await pg.insert(schema.spanLabelTypes).values({
id: row.id,
name: row.name,
display_name: row.display_name,
color: row.color,
hotkey: row.hotkey,
is_active: sqliteBooleanToBoolean(row.is_active),
sort_order: row.sort_order || 0,
created_at: sqliteTimestampToDate(row.created_at),
});
migrated++;
} catch (e: any) {
console.error(` Error migrating span_label_type ${row.id}:`, e.message);
errors++;
}
}
stats.push({ table: tableName, sourceCount: rows.length, migratedCount: migrated, skippedCount: skipped, errorCount: errors });
console.log(` ${tableName}: ${migrated} migrated, ${skipped} skipped, ${errors} errors\n`);
}
/**
* Migrate span_annotations table
*/
async function migrateSpanAnnotations() {
const tableName = 'span_annotations';
console.log(`Migrating ${tableName}...`);
const rows = sqlite.prepare('SELECT * FROM span_annotations').all() as any[];
let migrated = 0;
let skipped = 0;
let errors = 0;
for (const row of rows) {
try {
// Check if already exists
if (!shouldClear) {
const existing = await pg.query.spanAnnotations.findFirst({
where: sql`${schema.spanAnnotations.id} = ${row.id}`,
});
if (existing) {
skipped++;
continue;
}
}
await pg.insert(schema.spanAnnotations).values({
id: row.id,
chart_id: row.chart_id,
start_time: sqliteTimestampToDate(row.start_time),
end_time: sqliteTimestampToDate(row.end_time),
label: row.label,
confidence: row.confidence,
outcome: row.outcome,
notes: row.notes,
sub_spans: sqliteJsonToObject(row.sub_spans),
color: row.color || '#2196F3',
source: row.source || 'human',
model_prediction: sqliteJsonToObject(row.model_prediction),
created_at: sqliteTimestampToDate(row.created_at),
});
migrated++;
} catch (e: any) {
console.error(` Error migrating span_annotation ${row.id}:`, e.message);
errors++;
}
}
stats.push({ table: tableName, sourceCount: rows.length, migratedCount: migrated, skippedCount: skipped, errorCount: errors });
console.log(` ${tableName}: ${migrated} migrated, ${skipped} skipped, ${errors} errors\n`);
}
/**
* Print migration summary
*/
function printSummary() {
console.log('='.repeat(60));
console.log('Migration Summary');
console.log('='.repeat(60));
console.log();
console.log('Table | Source | Migrated | Skipped | Errors');
console.log('-'.repeat(60));
let totalSource = 0;
let totalMigrated = 0;
let totalSkipped = 0;
let totalErrors = 0;
for (const stat of stats) {
console.log(
`${stat.table.padEnd(24)} | ${String(stat.sourceCount).padStart(6)} | ${String(stat.migratedCount).padStart(8)} | ${String(stat.skippedCount).padStart(7)} | ${String(stat.errorCount).padStart(6)}`
);
totalSource += stat.sourceCount;
totalMigrated += stat.migratedCount;
totalSkipped += stat.skippedCount;
totalErrors += stat.errorCount;
}
console.log('-'.repeat(60));
console.log(
`${'TOTAL'.padEnd(24)} | ${String(totalSource).padStart(6)} | ${String(totalMigrated).padStart(8)} | ${String(totalSkipped).padStart(7)} | ${String(totalErrors).padStart(6)}`
);
console.log('='.repeat(60));
if (totalErrors > 0) {
console.log(`\n⚠ Migration completed with ${totalErrors} errors. Check logs above.`);
} else {
console.log('\n✅ Migration completed successfully!');
}
}
/**
* Main migration function
*/
async function main() {
try {
// Clear data if requested
if (shouldClear) {
await clearPostgresData();
}
// Migrate tables in dependency order
await migrateCharts();
await migrateCandles();
await migrateAnnotationTypes();
await migrateAnnotations();
await migrateSpanLabelTypes();
await migrateSpanAnnotations();
// Print summary
printSummary();
} catch (error: any) {
console.error('\n❌ Migration failed:', error.message);
process.exit(1);
} finally {
// Close connections
sqlite.close();
await pgPool.end();
}
}
// Run migration
main();