Replace hardcoded localhost:5000 with mlflow:5000 in pipeline.yaml and main.py health check fallback so containers can reach MLflow over Docker's internal network. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
152 lines
3.6 KiB
YAML
152 lines
3.6 KiB
YAML
# ML Pipeline Configuration
|
|
# Full config for feature engineering, annotation ingestion, training, and inference
|
|
|
|
data:
|
|
raw_path: "data/raw/OHLCV.csv"
|
|
enriched_path: "data/enriched/features.csv"
|
|
labeled_path: "data/labeled/dataset.csv"
|
|
annotations_path: "data/annotations/export.json"
|
|
|
|
stages:
|
|
feature_engineering:
|
|
enabled: true
|
|
|
|
# TA-Lib technical indicators
|
|
talib_indicators:
|
|
- name: "RSI"
|
|
params:
|
|
timeperiod: 14
|
|
- name: "EMA"
|
|
params:
|
|
timeperiod: 20
|
|
- name: "EMA"
|
|
params:
|
|
timeperiod: 50
|
|
- name: "MACD"
|
|
params:
|
|
fastperiod: 12
|
|
slowperiod: 26
|
|
signalperiod: 9
|
|
- name: "BBANDS"
|
|
params:
|
|
timeperiod: 20
|
|
nbdevup: 2
|
|
nbdevdn: 2
|
|
- name: "ATR"
|
|
params:
|
|
timeperiod: 14
|
|
- name: "ADX"
|
|
params:
|
|
timeperiod: 14
|
|
- name: "CCI"
|
|
params:
|
|
timeperiod: 14
|
|
- name: "MFI"
|
|
params:
|
|
timeperiod: 14
|
|
- name: "STOCH"
|
|
params:
|
|
fastk_period: 14
|
|
slowk_period: 3
|
|
slowd_period: 3
|
|
|
|
# Candle-derived features
|
|
candle_features: true
|
|
|
|
# Custom feature functions (module paths)
|
|
custom_features: []
|
|
|
|
annotation_ingestion:
|
|
enabled: true
|
|
|
|
# Label encoding: "window" or "bio"
|
|
label_encoding: "window"
|
|
|
|
# For windowed classification
|
|
window_size: 30
|
|
|
|
# Context padding (candles before/after)
|
|
context_padding: 20
|
|
|
|
# Minimum confidence for human annotations
|
|
min_confidence: 1
|
|
|
|
# Programmatic TA-Lib pattern labels
|
|
programmatic_labels:
|
|
enabled: true
|
|
talib_patterns:
|
|
- "CDLENGULFING"
|
|
- "CDLHAMMER"
|
|
- "CDLINVERTEDHAMMER"
|
|
- "CDLSHOOTINGSTAR"
|
|
- "CDLDOJI"
|
|
- "CDLDOJISTAR"
|
|
- "CDLMORNINGSTAR"
|
|
- "CDLEVENINGSTAR"
|
|
- "CDLHARAMI"
|
|
- "CDLPIERCING"
|
|
- "CDLDARKCLOUDCOVER"
|
|
- "CDL3WHITESOLDIERS"
|
|
- "CDL3BLACKCROWS"
|
|
|
|
# Label merge strategy: "human_priority", "programmatic_priority", "both"
|
|
merge_strategy: "human_priority"
|
|
|
|
training:
|
|
enabled: true
|
|
|
|
# Model type: "random_forest", "xgboost"
|
|
model_type: "random_forest"
|
|
|
|
# Train/test split
|
|
split_method: "temporal" # "temporal" or "random"
|
|
test_split: 0.2
|
|
validation_split: 0.1
|
|
|
|
# Class balancing
|
|
class_weights: "balanced" # "balanced" or null
|
|
|
|
# Hyperparameters (model-specific)
|
|
hyperparameters:
|
|
# RandomForest
|
|
n_estimators: 200
|
|
max_depth: 15
|
|
min_samples_split: 5
|
|
min_samples_leaf: 2
|
|
random_state: 42
|
|
n_jobs: -1
|
|
|
|
# XGBoost (when model_type is "xgboost")
|
|
# n_estimators: 500
|
|
# max_depth: 6
|
|
# learning_rate: 0.01
|
|
# subsample: 0.8
|
|
# colsample_bytree: 0.8
|
|
# random_state: 42
|
|
# n_jobs: -1
|
|
|
|
# MLflow settings
|
|
mlflow:
|
|
tracking_uri: "http://mlflow:5000"
|
|
experiment_name: "candlestick_patterns"
|
|
log_artifacts: true
|
|
register_model: false # Set to true to register in model registry
|
|
|
|
inference:
|
|
enabled: true
|
|
|
|
# Model source: "mlflow" or "local"
|
|
model_source: "local"
|
|
|
|
# For MLflow source
|
|
mlflow_model_name: "candlestick_pattern_v1"
|
|
mlflow_model_stage: "Production" # "Production", "Staging", "None"
|
|
|
|
# For local source
|
|
local_model_path: "models/best_model.pkl"
|
|
|
|
# Batch processing
|
|
batch_size: 1000
|
|
|
|
# Preprocessing config loaded from MLflow artifact or use current config
|
|
use_training_config: true
|