feat(ml): add database schema, config parser, and DVC setup

- Initialize DVC with local storage backend (task 1.6)
- Create PostgreSQL schema for training_runs table (task 1.7)
- Add SQLAlchemy database connection setup (task 1.8)
- Create Pydantic config models for pipeline.yaml (task 2.1)
- Add migration runner for database setup
- Fix pyproject.toml package discovery config
This commit is contained in:
Marko Djordjevic 2026-02-15 12:08:53 +01:00
parent 1a653c5866
commit ea339a54a7
15 changed files with 412 additions and 4 deletions

View file

@ -0,0 +1,65 @@
#!/usr/bin/env python3
"""
Simple database migration runner for the ML service.
Runs all SQL files in the migrations directory in order.
"""
import os
import sys
from pathlib import Path
import psycopg2
from psycopg2 import sql
def get_db_connection():
"""Get database connection from environment variables."""
return psycopg2.connect(
host=os.getenv("POSTGRES_HOST", "localhost"),
port=os.getenv("POSTGRES_PORT", "5432"),
database=os.getenv("POSTGRES_DB", "ml_service"),
user=os.getenv("POSTGRES_USER", "ml_user"),
password=os.getenv("POSTGRES_PASSWORD", "ml_password")
)
def run_migrations():
"""Run all migration files in order."""
migrations_dir = Path(__file__).parent
migration_files = sorted(migrations_dir.glob("*.sql"))
if not migration_files:
print("No migration files found")
return
print(f"Found {len(migration_files)} migration file(s)")
conn = get_db_connection()
cur = conn.cursor()
try:
for migration_file in migration_files:
print(f"Running migration: {migration_file.name}")
with open(migration_file, 'r') as f:
migration_sql = f.read()
cur.execute(migration_sql)
conn.commit()
print(f"{migration_file.name} completed")
print("\nAll migrations completed successfully")
except Exception as e:
conn.rollback()
print(f"\n✗ Migration failed: {e}", file=sys.stderr)
sys.exit(1)
finally:
cur.close()
conn.close()
if __name__ == "__main__":
run_migrations()