feat(ml): implement training stage with MLflow tracking and model wrappers

- Create RandomForestModel and XGBoostModel wrappers with class weight support
- Implement temporal and random train/val/test splitting
- Add MLflow experiment tracking with full parameter and metric logging
- Create evaluation module for confusion matrix, feature importance, and classification reports
- Implement model training with sklearn/xgboost flavor logging and optional registry registration
- Store training run metadata in PostgreSQL
- Wire training stage into pipeline.py orchestrator
- Support both RandomForest and XGBoost models with configurable hyperparameters
This commit is contained in:
Marko Djordjevic 2026-02-15 14:22:19 +01:00
parent 16763b967e
commit f4c0f9a836
8 changed files with 900 additions and 14 deletions

View file

@ -93,10 +93,16 @@ def run_training(config: PipelineConfig) -> None:
return
# Import here to avoid circular dependencies
from training.train import run_training_stage
from training.train import train
logger.info(f"Reading labeled data from: {config.data.labeled_path}")
run_training_stage(config)
# Set output model path from config
output_model_path = Path(config.stages.inference.local_model_path)
# Run training
run_id = train(config, Path(config.data.labeled_path), output_model_path)
logger.info(f"Training completed. MLflow run ID: {run_id}")
logger.info("Training stage completed successfully")