fix(ml): complete ML pipeline fixes and setup
- Fix CCI indicator to use HLC prices instead of close only - Parse datetime column when loading enriched CSV - Strip timezone from annotation timestamps - Fix TA-Lib pattern names (CDL3WHITESOLDIERS, CDL3BLACKCROWS) - Exclude programmatic label columns from training features - Fix classification report to handle missing classes - Update MLflow tracking to use localhost:5000 - Grant PostgreSQL permissions to ml_user Pipeline now runs successfully end-to-end: - Feature engineering: 2543 rows, 31 columns - Annotation ingestion: 286 samples - Training: 89.47% test accuracy with Random Forest
This commit is contained in:
parent
ceb4103ec4
commit
aa81d4f3d0
348 changed files with 1327 additions and 11 deletions
|
|
@ -0,0 +1,8 @@
|
|||
precision recall f1-score support
|
||||
|
||||
Bearish Engulfing 0.8065 1.0000 0.8929 25
|
||||
Bullish Engulfing 1.0000 0.8125 0.8966 32
|
||||
|
||||
accuracy 0.8947 57
|
||||
macro avg 0.9032 0.9062 0.8947 57
|
||||
weighted avg 0.9151 0.8947 0.8949 57
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 55 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 79 KiB |
|
|
@ -0,0 +1,97 @@
|
|||
data:
|
||||
annotations_path: data/annotations/export.json
|
||||
enriched_path: data/enriched/features.csv
|
||||
labeled_path: data/labeled/dataset.csv
|
||||
raw_path: data/raw/OHLCV.csv
|
||||
stages:
|
||||
annotation_ingestion:
|
||||
context_padding: 20
|
||||
enabled: true
|
||||
label_encoding: window
|
||||
merge_strategy: human_priority
|
||||
min_confidence: 1
|
||||
programmatic_labels:
|
||||
enabled: true
|
||||
talib_patterns:
|
||||
- CDLENGULFING
|
||||
- CDLHAMMER
|
||||
- CDLINVERTEDHAMMER
|
||||
- CDLSHOOTINGSTAR
|
||||
- CDLDOJI
|
||||
- CDLDOJISTAR
|
||||
- CDLMORNINGSTAR
|
||||
- CDLEVENINGSTAR
|
||||
- CDLHARAMI
|
||||
- CDLPIERCING
|
||||
- CDLDARKCLOUDCOVER
|
||||
- CDL3WHITESOLDIERS
|
||||
- CDL3BLACKCROWS
|
||||
window_size: 30
|
||||
feature_engineering:
|
||||
candle_features: true
|
||||
custom_features: []
|
||||
enabled: true
|
||||
talib_indicators:
|
||||
- name: RSI
|
||||
params:
|
||||
timeperiod: 14
|
||||
- name: EMA
|
||||
params:
|
||||
timeperiod: 20
|
||||
- name: EMA
|
||||
params:
|
||||
timeperiod: 50
|
||||
- name: MACD
|
||||
params:
|
||||
fastperiod: 12
|
||||
signalperiod: 9
|
||||
slowperiod: 26
|
||||
- name: BBANDS
|
||||
params:
|
||||
nbdevdn: 2
|
||||
nbdevup: 2
|
||||
timeperiod: 20
|
||||
- name: ATR
|
||||
params:
|
||||
timeperiod: 14
|
||||
- name: ADX
|
||||
params:
|
||||
timeperiod: 14
|
||||
- name: CCI
|
||||
params:
|
||||
timeperiod: 14
|
||||
- name: MFI
|
||||
params:
|
||||
timeperiod: 14
|
||||
- name: STOCH
|
||||
params:
|
||||
fastk_period: 14
|
||||
slowd_period: 3
|
||||
slowk_period: 3
|
||||
inference:
|
||||
batch_size: 1000
|
||||
enabled: true
|
||||
local_model_path: models/best_model.pkl
|
||||
mlflow_model_name: candlestick_pattern_v1
|
||||
mlflow_model_stage: Production
|
||||
model_source: local
|
||||
use_training_config: true
|
||||
training:
|
||||
class_weights: balanced
|
||||
enabled: true
|
||||
hyperparameters:
|
||||
max_depth: 15
|
||||
min_samples_leaf: 2
|
||||
min_samples_split: 5
|
||||
n_estimators: 200
|
||||
n_jobs: -1
|
||||
random_state: 42
|
||||
mlflow:
|
||||
experiment_name: candlestick_patterns
|
||||
log_artifacts: true
|
||||
register_model: false
|
||||
tracking_uri: http://localhost:5000
|
||||
model_type: random_forest
|
||||
split_method: temporal
|
||||
test_split: 0.2
|
||||
validation_split: 0.1
|
||||
Loading…
Add table
Add a link
Reference in a new issue