candle-annotator/services/ml/mlruns/358560345319124639/26c245a4e6dc45e8b11e617e5702be1a/artifacts/pipeline_config.yaml
Marko Djordjevic aa81d4f3d0 fix(ml): complete ML pipeline fixes and setup
- Fix CCI indicator to use HLC prices instead of close only
- Parse datetime column when loading enriched CSV
- Strip timezone from annotation timestamps
- Fix TA-Lib pattern names (CDL3WHITESOLDIERS, CDL3BLACKCROWS)
- Exclude programmatic label columns from training features
- Fix classification report to handle missing classes
- Update MLflow tracking to use localhost:5000
- Grant PostgreSQL permissions to ml_user

Pipeline now runs successfully end-to-end:
- Feature engineering: 2543 rows, 31 columns
- Annotation ingestion: 286 samples
- Training: 89.47% test accuracy with Random Forest
2026-02-15 21:29:54 +01:00

97 lines
2.1 KiB
YAML

data:
annotations_path: data/annotations/export.json
enriched_path: data/enriched/features.csv
labeled_path: data/labeled/dataset.csv
raw_path: data/raw/OHLCV.csv
stages:
annotation_ingestion:
context_padding: 20
enabled: true
label_encoding: window
merge_strategy: human_priority
min_confidence: 1
programmatic_labels:
enabled: true
talib_patterns:
- CDLENGULFING
- CDLHAMMER
- CDLINVERTEDHAMMER
- CDLSHOOTINGSTAR
- CDLDOJI
- CDLDOJISTAR
- CDLMORNINGSTAR
- CDLEVENINGSTAR
- CDLHARAMI
- CDLPIERCING
- CDLDARKCLOUDCOVER
- CDL3WHITESOLDIERS
- CDL3BLACKCROWS
window_size: 30
feature_engineering:
candle_features: true
custom_features: []
enabled: true
talib_indicators:
- name: RSI
params:
timeperiod: 14
- name: EMA
params:
timeperiod: 20
- name: EMA
params:
timeperiod: 50
- name: MACD
params:
fastperiod: 12
signalperiod: 9
slowperiod: 26
- name: BBANDS
params:
nbdevdn: 2
nbdevup: 2
timeperiod: 20
- name: ATR
params:
timeperiod: 14
- name: ADX
params:
timeperiod: 14
- name: CCI
params:
timeperiod: 14
- name: MFI
params:
timeperiod: 14
- name: STOCH
params:
fastk_period: 14
slowd_period: 3
slowk_period: 3
inference:
batch_size: 1000
enabled: true
local_model_path: models/best_model.pkl
mlflow_model_name: candlestick_pattern_v1
mlflow_model_stage: Production
model_source: local
use_training_config: true
training:
class_weights: balanced
enabled: true
hyperparameters:
max_depth: 15
min_samples_leaf: 2
min_samples_split: 5
n_estimators: 200
n_jobs: -1
random_state: 42
mlflow:
experiment_name: candlestick_patterns
log_artifacts: true
register_model: false
tracking_uri: http://localhost:5000
model_type: random_forest
split_method: temporal
test_split: 0.2
validation_split: 0.1