feat(ml): implement training stage with MLflow tracking and model wrappers

- Create RandomForestModel and XGBoostModel wrappers with class weight support - Implement temporal and random train/val/test splitting - Add MLflow experiment tracking with full parameter and metric logging - Create evaluation module for confusion matrix, feature importance, and classification reports - Implement model training with sklearn/xgboost flavor logging and optional registry registration - Store training run metadata in PostgreSQL - Wire training stage into pipeline.py orchestrator - Support both RandomForest and XGBoost models with configurable hyperparameters
2026-02-15 14:22:19 +01:00 · 2026-02-15 14:22:19 +01:00 · f4c0f9a836
commit f4c0f9a836
parent 16763b967e
8 changed files with 900 additions and 14 deletions
--- a/services/ml/pipeline.py
+++ b/services/ml/pipeline.py
@ -93,10 +93,16 @@ def run_training(config: PipelineConfig) -> None:
        return
    
    # Import here to avoid circular dependencies
-    from training.train import run_training_stage
+    from training.train import train
    
    logger.info(f"Reading labeled data from: {config.data.labeled_path}")
-    run_training_stage(config)
+    
+    # Set output model path from config
+    output_model_path = Path(config.stages.inference.local_model_path)
+    
+    # Run training
+    run_id = train(config, Path(config.data.labeled_path), output_model_path)
+    logger.info(f"Training completed. MLflow run ID: {run_id}")
    logger.info("Training stage completed successfully")