- Initialize DVC with local storage backend (task 1.6) - Create PostgreSQL schema for training_runs table (task 1.7) - Add SQLAlchemy database connection setup (task 1.8) - Create Pydantic config models for pipeline.yaml (task 2.1) - Add migration runner for database setup - Fix pyproject.toml package discovery config
27 lines
1.1 KiB
SQL
27 lines
1.1 KiB
SQL
-- Create training_runs table for tracking ML training runs
|
|
CREATE TABLE IF NOT EXISTS training_runs (
|
|
id SERIAL PRIMARY KEY,
|
|
run_id VARCHAR(255) NOT NULL UNIQUE,
|
|
model_type VARCHAR(100) NOT NULL,
|
|
experiment_name VARCHAR(255) NOT NULL,
|
|
pipeline_config_hash VARCHAR(64) NOT NULL,
|
|
dataset_version VARCHAR(100),
|
|
metrics_summary JSONB,
|
|
status VARCHAR(50) NOT NULL DEFAULT 'running',
|
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
|
completed_at TIMESTAMP WITH TIME ZONE,
|
|
|
|
CONSTRAINT valid_status CHECK (status IN ('running', 'completed', 'failed', 'cancelled'))
|
|
);
|
|
|
|
-- Create index on run_id for faster lookups
|
|
CREATE INDEX idx_training_runs_run_id ON training_runs(run_id);
|
|
|
|
-- Create index on experiment_name for filtering by experiment
|
|
CREATE INDEX idx_training_runs_experiment ON training_runs(experiment_name);
|
|
|
|
-- Create index on status for filtering active runs
|
|
CREATE INDEX idx_training_runs_status ON training_runs(status);
|
|
|
|
-- Create index on created_at for chronological queries
|
|
CREATE INDEX idx_training_runs_created_at ON training_runs(created_at DESC);
|