feat: add ML service scaffolding with Python FastAPI, Docker, and MLflow setup
This commit is contained in:
parent
92abab5316
commit
1a653c5866
18 changed files with 1952 additions and 2593 deletions
152
services/ml/config/pipeline.yaml
Normal file
152
services/ml/config/pipeline.yaml
Normal file
|
|
@ -0,0 +1,152 @@
|
|||
# ML Pipeline Configuration
|
||||
# Full config for feature engineering, annotation ingestion, training, and inference
|
||||
|
||||
data:
|
||||
raw_path: "data/raw/OHLCV.csv"
|
||||
enriched_path: "data/enriched/features.csv"
|
||||
labeled_path: "data/labeled/dataset.csv"
|
||||
annotations_path: "data/annotations/export.json"
|
||||
|
||||
stages:
|
||||
feature_engineering:
|
||||
enabled: true
|
||||
|
||||
# TA-Lib technical indicators
|
||||
talib_indicators:
|
||||
- name: "RSI"
|
||||
params:
|
||||
timeperiod: 14
|
||||
- name: "EMA"
|
||||
params:
|
||||
timeperiod: 20
|
||||
- name: "EMA"
|
||||
params:
|
||||
timeperiod: 50
|
||||
- name: "MACD"
|
||||
params:
|
||||
fastperiod: 12
|
||||
slowperiod: 26
|
||||
signalperiod: 9
|
||||
- name: "BBANDS"
|
||||
params:
|
||||
timeperiod: 20
|
||||
nbdevup: 2
|
||||
nbdevdn: 2
|
||||
- name: "ATR"
|
||||
params:
|
||||
timeperiod: 14
|
||||
- name: "ADX"
|
||||
params:
|
||||
timeperiod: 14
|
||||
- name: "CCI"
|
||||
params:
|
||||
timeperiod: 14
|
||||
- name: "MFI"
|
||||
params:
|
||||
timeperiod: 14
|
||||
- name: "STOCH"
|
||||
params:
|
||||
fastk_period: 14
|
||||
slowk_period: 3
|
||||
slowd_period: 3
|
||||
|
||||
# Candle-derived features
|
||||
candle_features: true
|
||||
|
||||
# Custom feature functions (module paths)
|
||||
custom_features: []
|
||||
|
||||
annotation_ingestion:
|
||||
enabled: true
|
||||
|
||||
# Label encoding: "window" or "bio"
|
||||
label_encoding: "window"
|
||||
|
||||
# For windowed classification
|
||||
window_size: 30
|
||||
|
||||
# Context padding (candles before/after)
|
||||
context_padding: 20
|
||||
|
||||
# Minimum confidence for human annotations
|
||||
min_confidence: 1
|
||||
|
||||
# Programmatic TA-Lib pattern labels
|
||||
programmatic_labels:
|
||||
enabled: true
|
||||
talib_patterns:
|
||||
- "CDLENGULFING"
|
||||
- "CDLHAMMER"
|
||||
- "CDLINVERTEDHAMMER"
|
||||
- "CDLSHOOTINGSTAR"
|
||||
- "CDLDOJI"
|
||||
- "CDLDOJISTAR"
|
||||
- "CDLMORNINGSTAR"
|
||||
- "CDLEVENINGSTAR"
|
||||
- "CDLHARAMI"
|
||||
- "CDLPIERCING"
|
||||
- "CDLDARKCLOUDCOVER"
|
||||
- "CDLTHREEWHITESOLDIERS"
|
||||
- "CDLTHREEBLACKCROWS"
|
||||
|
||||
# Label merge strategy: "human_priority", "programmatic_priority", "both"
|
||||
merge_strategy: "human_priority"
|
||||
|
||||
training:
|
||||
enabled: true
|
||||
|
||||
# Model type: "random_forest", "xgboost"
|
||||
model_type: "random_forest"
|
||||
|
||||
# Train/test split
|
||||
split_method: "temporal" # "temporal" or "random"
|
||||
test_split: 0.2
|
||||
validation_split: 0.1
|
||||
|
||||
# Class balancing
|
||||
class_weights: "balanced" # "balanced" or null
|
||||
|
||||
# Hyperparameters (model-specific)
|
||||
hyperparameters:
|
||||
# RandomForest
|
||||
n_estimators: 200
|
||||
max_depth: 15
|
||||
min_samples_split: 5
|
||||
min_samples_leaf: 2
|
||||
random_state: 42
|
||||
n_jobs: -1
|
||||
|
||||
# XGBoost (when model_type is "xgboost")
|
||||
# n_estimators: 500
|
||||
# max_depth: 6
|
||||
# learning_rate: 0.01
|
||||
# subsample: 0.8
|
||||
# colsample_bytree: 0.8
|
||||
# random_state: 42
|
||||
# n_jobs: -1
|
||||
|
||||
# MLflow settings
|
||||
mlflow:
|
||||
tracking_uri: "http://mlflow:5000"
|
||||
experiment_name: "candlestick_patterns"
|
||||
log_artifacts: true
|
||||
register_model: false # Set to true to register in model registry
|
||||
|
||||
inference:
|
||||
enabled: true
|
||||
|
||||
# Model source: "mlflow" or "local"
|
||||
model_source: "local"
|
||||
|
||||
# For MLflow source
|
||||
mlflow_model_name: "candlestick_pattern_v1"
|
||||
mlflow_model_stage: "Production" # "Production", "Staging", "None"
|
||||
|
||||
# For local source
|
||||
local_model_path: "models/best_model.pkl"
|
||||
|
||||
# Batch processing
|
||||
batch_size: 1000
|
||||
|
||||
# Preprocessing config loaded from MLflow artifact or use current config
|
||||
use_training_config: true
|
||||
Loading…
Add table
Add a link
Reference in a new issue