feat(ml): implement training stage with MLflow tracking and model wrappers
- Create RandomForestModel and XGBoostModel wrappers with class weight support - Implement temporal and random train/val/test splitting - Add MLflow experiment tracking with full parameter and metric logging - Create evaluation module for confusion matrix, feature importance, and classification reports - Implement model training with sklearn/xgboost flavor logging and optional registry registration - Store training run metadata in PostgreSQL - Wire training stage into pipeline.py orchestrator - Support both RandomForest and XGBoost models with configurable hyperparameters
This commit is contained in:
parent
16763b967e
commit
f4c0f9a836
8 changed files with 900 additions and 14 deletions
|
|
@ -93,10 +93,16 @@ def run_training(config: PipelineConfig) -> None:
|
|||
return
|
||||
|
||||
# Import here to avoid circular dependencies
|
||||
from training.train import run_training_stage
|
||||
from training.train import train
|
||||
|
||||
logger.info(f"Reading labeled data from: {config.data.labeled_path}")
|
||||
run_training_stage(config)
|
||||
|
||||
# Set output model path from config
|
||||
output_model_path = Path(config.stages.inference.local_model_path)
|
||||
|
||||
# Run training
|
||||
run_id = train(config, Path(config.data.labeled_path), output_model_path)
|
||||
logger.info(f"Training completed. MLflow run ID: {run_id}")
|
||||
logger.info("Training stage completed successfully")
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue