fix(ml): complete ML pipeline fixes and setup
- Fix CCI indicator to use HLC prices instead of close only - Parse datetime column when loading enriched CSV - Strip timezone from annotation timestamps - Fix TA-Lib pattern names (CDL3WHITESOLDIERS, CDL3BLACKCROWS) - Exclude programmatic label columns from training features - Fix classification report to handle missing classes - Update MLflow tracking to use localhost:5000 - Grant PostgreSQL permissions to ml_user Pipeline now runs successfully end-to-end: - Feature engineering: 2543 rows, 31 columns - Annotation ingestion: 286 samples - Training: 89.47% test accuracy with Random Forest
This commit is contained in:
parent
ceb4103ec4
commit
aa81d4f3d0
348 changed files with 1327 additions and 11 deletions
Binary file not shown.
|
|
@ -15,6 +15,11 @@ from sqlalchemy.orm import sessionmaker, Session
|
|||
from sqlalchemy.sql import func
|
||||
|
||||
|
||||
# CREATE DATABASE ml_service;
|
||||
# CREATE USER ml_user WITH ENCRYPTED PASSWORD 'ml_password';
|
||||
# GRANT ALL PRIVILEGES ON DATABASE ml_service TO ml_user;
|
||||
|
||||
|
||||
# Database connection configuration from environment
|
||||
DATABASE_URL = os.getenv(
|
||||
"DATABASE_URL",
|
||||
|
|
@ -43,9 +48,9 @@ Base = declarative_base()
|
|||
# Training runs model
|
||||
class TrainingRun(Base):
|
||||
"""Model for tracking ML training runs."""
|
||||
|
||||
|
||||
__tablename__ = "training_runs"
|
||||
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
run_id = Column(String(255), unique=True, nullable=False, index=True)
|
||||
model_type = Column(String(100), nullable=False)
|
||||
|
|
@ -56,7 +61,7 @@ class TrainingRun(Base):
|
|||
status = Column(String(50), nullable=False, default="running", index=True)
|
||||
created_at = Column(DateTime(timezone=True), server_default=func.now(), index=True)
|
||||
completed_at = Column(DateTime(timezone=True))
|
||||
|
||||
|
||||
def __repr__(self):
|
||||
return f"<TrainingRun(run_id='{self.run_id}', status='{self.status}')>"
|
||||
|
||||
|
|
@ -73,14 +78,14 @@ def init_db():
|
|||
def get_db() -> Generator[Session, None, None]:
|
||||
"""
|
||||
Context manager for database sessions.
|
||||
|
||||
|
||||
Usage:
|
||||
with get_db() as db:
|
||||
# Use db session here
|
||||
training_run = TrainingRun(run_id="123", ...)
|
||||
db.add(training_run)
|
||||
db.commit()
|
||||
|
||||
|
||||
Yields:
|
||||
Database session
|
||||
"""
|
||||
|
|
@ -94,12 +99,12 @@ def get_db() -> Generator[Session, None, None]:
|
|||
def get_db_session() -> Session:
|
||||
"""
|
||||
Get a database session (for dependency injection).
|
||||
|
||||
|
||||
Usage with FastAPI:
|
||||
@app.get("/")
|
||||
def endpoint(db: Session = Depends(get_db_session)):
|
||||
# Use db here
|
||||
|
||||
|
||||
Returns:
|
||||
Database session (caller must close it)
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -127,7 +127,7 @@ stages:
|
|||
|
||||
# MLflow settings
|
||||
mlflow:
|
||||
tracking_uri: "http://mlflow:5000"
|
||||
tracking_uri: "http://localhost:5000"
|
||||
experiment_name: "candlestick_patterns"
|
||||
log_artifacts: true
|
||||
register_model: false # Set to true to register in model registry
|
||||
|
|
|
|||
Binary file not shown.
|
After Width: | Height: | Size: 55 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 79 KiB |
|
|
@ -0,0 +1,97 @@
|
|||
data:
|
||||
annotations_path: data/annotations/export.json
|
||||
enriched_path: data/enriched/features.csv
|
||||
labeled_path: data/labeled/dataset.csv
|
||||
raw_path: data/raw/OHLCV.csv
|
||||
stages:
|
||||
annotation_ingestion:
|
||||
context_padding: 20
|
||||
enabled: true
|
||||
label_encoding: window
|
||||
merge_strategy: human_priority
|
||||
min_confidence: 1
|
||||
programmatic_labels:
|
||||
enabled: true
|
||||
talib_patterns:
|
||||
- CDLENGULFING
|
||||
- CDLHAMMER
|
||||
- CDLINVERTEDHAMMER
|
||||
- CDLSHOOTINGSTAR
|
||||
- CDLDOJI
|
||||
- CDLDOJISTAR
|
||||
- CDLMORNINGSTAR
|
||||
- CDLEVENINGSTAR
|
||||
- CDLHARAMI
|
||||
- CDLPIERCING
|
||||
- CDLDARKCLOUDCOVER
|
||||
- CDL3WHITESOLDIERS
|
||||
- CDL3BLACKCROWS
|
||||
window_size: 30
|
||||
feature_engineering:
|
||||
candle_features: true
|
||||
custom_features: []
|
||||
enabled: true
|
||||
talib_indicators:
|
||||
- name: RSI
|
||||
params:
|
||||
timeperiod: 14
|
||||
- name: EMA
|
||||
params:
|
||||
timeperiod: 20
|
||||
- name: EMA
|
||||
params:
|
||||
timeperiod: 50
|
||||
- name: MACD
|
||||
params:
|
||||
fastperiod: 12
|
||||
signalperiod: 9
|
||||
slowperiod: 26
|
||||
- name: BBANDS
|
||||
params:
|
||||
nbdevdn: 2
|
||||
nbdevup: 2
|
||||
timeperiod: 20
|
||||
- name: ATR
|
||||
params:
|
||||
timeperiod: 14
|
||||
- name: ADX
|
||||
params:
|
||||
timeperiod: 14
|
||||
- name: CCI
|
||||
params:
|
||||
timeperiod: 14
|
||||
- name: MFI
|
||||
params:
|
||||
timeperiod: 14
|
||||
- name: STOCH
|
||||
params:
|
||||
fastk_period: 14
|
||||
slowd_period: 3
|
||||
slowk_period: 3
|
||||
inference:
|
||||
batch_size: 1000
|
||||
enabled: true
|
||||
local_model_path: models/best_model.pkl
|
||||
mlflow_model_name: candlestick_pattern_v1
|
||||
mlflow_model_stage: Production
|
||||
model_source: local
|
||||
use_training_config: true
|
||||
training:
|
||||
class_weights: balanced
|
||||
enabled: true
|
||||
hyperparameters:
|
||||
max_depth: 15
|
||||
min_samples_leaf: 2
|
||||
min_samples_split: 5
|
||||
n_estimators: 200
|
||||
n_jobs: -1
|
||||
random_state: 42
|
||||
mlflow:
|
||||
experiment_name: candlestick_patterns
|
||||
log_artifacts: true
|
||||
register_model: false
|
||||
tracking_uri: http://localhost:5000
|
||||
model_type: random_forest
|
||||
split_method: temporal
|
||||
test_split: 0.2
|
||||
validation_split: 0.1
|
||||
|
|
@ -0,0 +1,14 @@
|
|||
artifact_uri: file:///home/homoludens/projekti/bitcon/candle_annotator/services/ml/mlruns/358560345319124639/0114b8ddfb2a45dabcb10e9836872de0/artifacts
|
||||
end_time: 1771187325601
|
||||
entry_point_name: ''
|
||||
experiment_id: '358560345319124639'
|
||||
lifecycle_stage: active
|
||||
run_id: 0114b8ddfb2a45dabcb10e9836872de0
|
||||
run_name: abundant-crane-122
|
||||
source_name: ''
|
||||
source_type: 4
|
||||
source_version: ''
|
||||
start_time: 1771187322888
|
||||
status: 4
|
||||
tags: []
|
||||
user_id: homoludens
|
||||
|
|
@ -0,0 +1 @@
|
|||
1771187324134 0.8947368421052632 0
|
||||
|
|
@ -0,0 +1 @@
|
|||
1771187324195 0.8928571428571429 0
|
||||
|
|
@ -0,0 +1 @@
|
|||
1771187324229 0.896551724137931 0
|
||||
|
|
@ -0,0 +1 @@
|
|||
1771187324147 0.8947044334975369 0
|
||||
|
|
@ -0,0 +1 @@
|
|||
1771187324267 0.0 0
|
||||
|
|
@ -0,0 +1 @@
|
|||
1771187324158 0.8949312937516204 0
|
||||
|
|
@ -0,0 +1 @@
|
|||
1771187324176 0.8064516129032258 0
|
||||
|
|
@ -0,0 +1 @@
|
|||
1771187324206 1.0 0
|
||||
|
|
@ -0,0 +1 @@
|
|||
1771187324241 0.0 0
|
||||
|
|
@ -0,0 +1 @@
|
|||
1771187324185 1.0 0
|
||||
|
|
@ -0,0 +1 @@
|
|||
1771187324217 0.8125 0
|
||||
|
|
@ -0,0 +1 @@
|
|||
1771187324252 0.0 0
|
||||
|
|
@ -0,0 +1 @@
|
|||
1771187323996 1.0 0
|
||||
|
|
@ -0,0 +1 @@
|
|||
1771187324007 1.0 0
|
||||
|
|
@ -0,0 +1 @@
|
|||
1771187324018 1.0 0
|
||||
|
|
@ -0,0 +1 @@
|
|||
balanced
|
||||
|
|
@ -0,0 +1 @@
|
|||
15
|
||||
|
|
@ -0,0 +1 @@
|
|||
2
|
||||
|
|
@ -0,0 +1 @@
|
|||
5
|
||||
|
|
@ -0,0 +1 @@
|
|||
random_forest
|
||||
|
|
@ -0,0 +1 @@
|
|||
3
|
||||
|
|
@ -0,0 +1 @@
|
|||
200
|
||||
|
|
@ -0,0 +1 @@
|
|||
2820
|
||||
|
|
@ -0,0 +1 @@
|
|||
-1
|
||||
|
|
@ -0,0 +1 @@
|
|||
57
|
||||
|
|
@ -0,0 +1 @@
|
|||
207
|
||||
|
|
@ -0,0 +1 @@
|
|||
22
|
||||
|
|
@ -0,0 +1 @@
|
|||
42
|
||||
|
|
@ -0,0 +1 @@
|
|||
temporal
|
||||
|
|
@ -0,0 +1 @@
|
|||
0.2
|
||||
|
|
@ -0,0 +1 @@
|
|||
121
|
||||
|
|
@ -0,0 +1 @@
|
|||
84
|
||||
|
|
@ -0,0 +1 @@
|
|||
2
|
||||
|
|
@ -0,0 +1 @@
|
|||
0.1
|
||||
|
|
@ -0,0 +1 @@
|
|||
abundant-crane-122
|
||||
|
|
@ -0,0 +1 @@
|
|||
ceb4103ec4eefe0f0b6444db2bab6efbe6526f5e
|
||||
|
|
@ -0,0 +1 @@
|
|||
pipeline.py
|
||||
|
|
@ -0,0 +1 @@
|
|||
LOCAL
|
||||
|
|
@ -0,0 +1 @@
|
|||
homoludens
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 55 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 79 KiB |
|
|
@ -0,0 +1,97 @@
|
|||
data:
|
||||
annotations_path: data/annotations/export.json
|
||||
enriched_path: data/enriched/features.csv
|
||||
labeled_path: data/labeled/dataset.csv
|
||||
raw_path: data/raw/OHLCV.csv
|
||||
stages:
|
||||
annotation_ingestion:
|
||||
context_padding: 20
|
||||
enabled: true
|
||||
label_encoding: window
|
||||
merge_strategy: human_priority
|
||||
min_confidence: 1
|
||||
programmatic_labels:
|
||||
enabled: true
|
||||
talib_patterns:
|
||||
- CDLENGULFING
|
||||
- CDLHAMMER
|
||||
- CDLINVERTEDHAMMER
|
||||
- CDLSHOOTINGSTAR
|
||||
- CDLDOJI
|
||||
- CDLDOJISTAR
|
||||
- CDLMORNINGSTAR
|
||||
- CDLEVENINGSTAR
|
||||
- CDLHARAMI
|
||||
- CDLPIERCING
|
||||
- CDLDARKCLOUDCOVER
|
||||
- CDL3WHITESOLDIERS
|
||||
- CDL3BLACKCROWS
|
||||
window_size: 30
|
||||
feature_engineering:
|
||||
candle_features: true
|
||||
custom_features: []
|
||||
enabled: true
|
||||
talib_indicators:
|
||||
- name: RSI
|
||||
params:
|
||||
timeperiod: 14
|
||||
- name: EMA
|
||||
params:
|
||||
timeperiod: 20
|
||||
- name: EMA
|
||||
params:
|
||||
timeperiod: 50
|
||||
- name: MACD
|
||||
params:
|
||||
fastperiod: 12
|
||||
signalperiod: 9
|
||||
slowperiod: 26
|
||||
- name: BBANDS
|
||||
params:
|
||||
nbdevdn: 2
|
||||
nbdevup: 2
|
||||
timeperiod: 20
|
||||
- name: ATR
|
||||
params:
|
||||
timeperiod: 14
|
||||
- name: ADX
|
||||
params:
|
||||
timeperiod: 14
|
||||
- name: CCI
|
||||
params:
|
||||
timeperiod: 14
|
||||
- name: MFI
|
||||
params:
|
||||
timeperiod: 14
|
||||
- name: STOCH
|
||||
params:
|
||||
fastk_period: 14
|
||||
slowd_period: 3
|
||||
slowk_period: 3
|
||||
inference:
|
||||
batch_size: 1000
|
||||
enabled: true
|
||||
local_model_path: models/best_model.pkl
|
||||
mlflow_model_name: candlestick_pattern_v1
|
||||
mlflow_model_stage: Production
|
||||
model_source: local
|
||||
use_training_config: true
|
||||
training:
|
||||
class_weights: balanced
|
||||
enabled: true
|
||||
hyperparameters:
|
||||
max_depth: 15
|
||||
min_samples_leaf: 2
|
||||
min_samples_split: 5
|
||||
n_estimators: 200
|
||||
n_jobs: -1
|
||||
random_state: 42
|
||||
mlflow:
|
||||
experiment_name: candlestick_patterns
|
||||
log_artifacts: true
|
||||
register_model: false
|
||||
tracking_uri: http://localhost:5000
|
||||
model_type: random_forest
|
||||
split_method: temporal
|
||||
test_split: 0.2
|
||||
validation_split: 0.1
|
||||
|
|
@ -0,0 +1,14 @@
|
|||
artifact_uri: file:///home/homoludens/projekti/bitcon/candle_annotator/services/ml/mlruns/358560345319124639/26c245a4e6dc45e8b11e617e5702be1a/artifacts
|
||||
end_time: 1771187348138
|
||||
entry_point_name: ''
|
||||
experiment_id: '358560345319124639'
|
||||
lifecycle_stage: active
|
||||
run_id: 26c245a4e6dc45e8b11e617e5702be1a
|
||||
run_name: bald-bear-921
|
||||
source_name: ''
|
||||
source_type: 4
|
||||
source_version: ''
|
||||
start_time: 1771187345480
|
||||
status: 4
|
||||
tags: []
|
||||
user_id: homoludens
|
||||
|
|
@ -0,0 +1 @@
|
|||
1771187346671 0.8947368421052632 0
|
||||
|
|
@ -0,0 +1 @@
|
|||
1771187346744 0.8928571428571429 0
|
||||
|
|
@ -0,0 +1 @@
|
|||
1771187346777 0.896551724137931 0
|
||||
|
|
@ -0,0 +1 @@
|
|||
1771187346686 0.8947044334975369 0
|
||||
|
|
@ -0,0 +1 @@
|
|||
1771187346814 0.0 0
|
||||
|
|
@ -0,0 +1 @@
|
|||
1771187346702 0.8949312937516204 0
|
||||
|
|
@ -0,0 +1 @@
|
|||
1771187346722 0.8064516129032258 0
|
||||
|
|
@ -0,0 +1 @@
|
|||
1771187346754 1.0 0
|
||||
|
|
@ -0,0 +1 @@
|
|||
1771187346786 0.0 0
|
||||
|
|
@ -0,0 +1 @@
|
|||
1771187346733 1.0 0
|
||||
|
|
@ -0,0 +1 @@
|
|||
1771187346766 0.8125 0
|
||||
|
|
@ -0,0 +1 @@
|
|||
1771187346801 0.0 0
|
||||
|
|
@ -0,0 +1 @@
|
|||
1771187346550 1.0 0
|
||||
|
|
@ -0,0 +1 @@
|
|||
1771187346559 1.0 0
|
||||
|
|
@ -0,0 +1 @@
|
|||
1771187346568 1.0 0
|
||||
|
|
@ -0,0 +1 @@
|
|||
balanced
|
||||
|
|
@ -0,0 +1 @@
|
|||
15
|
||||
|
|
@ -0,0 +1 @@
|
|||
2
|
||||
|
|
@ -0,0 +1 @@
|
|||
5
|
||||
|
|
@ -0,0 +1 @@
|
|||
random_forest
|
||||
|
|
@ -0,0 +1 @@
|
|||
3
|
||||
|
|
@ -0,0 +1 @@
|
|||
200
|
||||
|
|
@ -0,0 +1 @@
|
|||
2820
|
||||
|
|
@ -0,0 +1 @@
|
|||
-1
|
||||
|
|
@ -0,0 +1 @@
|
|||
57
|
||||
|
|
@ -0,0 +1 @@
|
|||
207
|
||||
|
|
@ -0,0 +1 @@
|
|||
22
|
||||
|
|
@ -0,0 +1 @@
|
|||
42
|
||||
|
|
@ -0,0 +1 @@
|
|||
temporal
|
||||
|
|
@ -0,0 +1 @@
|
|||
0.2
|
||||
|
|
@ -0,0 +1 @@
|
|||
121
|
||||
|
|
@ -0,0 +1 @@
|
|||
84
|
||||
|
|
@ -0,0 +1 @@
|
|||
2
|
||||
|
|
@ -0,0 +1 @@
|
|||
0.1
|
||||
|
|
@ -0,0 +1 @@
|
|||
bald-bear-921
|
||||
|
|
@ -0,0 +1 @@
|
|||
ceb4103ec4eefe0f0b6444db2bab6efbe6526f5e
|
||||
|
|
@ -0,0 +1 @@
|
|||
pipeline.py
|
||||
|
|
@ -0,0 +1 @@
|
|||
LOCAL
|
||||
|
|
@ -0,0 +1 @@
|
|||
homoludens
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 55 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 79 KiB |
|
|
@ -0,0 +1,97 @@
|
|||
data:
|
||||
annotations_path: data/annotations/export.json
|
||||
enriched_path: data/enriched/features.csv
|
||||
labeled_path: data/labeled/dataset.csv
|
||||
raw_path: data/raw/OHLCV.csv
|
||||
stages:
|
||||
annotation_ingestion:
|
||||
context_padding: 20
|
||||
enabled: true
|
||||
label_encoding: window
|
||||
merge_strategy: human_priority
|
||||
min_confidence: 1
|
||||
programmatic_labels:
|
||||
enabled: true
|
||||
talib_patterns:
|
||||
- CDLENGULFING
|
||||
- CDLHAMMER
|
||||
- CDLINVERTEDHAMMER
|
||||
- CDLSHOOTINGSTAR
|
||||
- CDLDOJI
|
||||
- CDLDOJISTAR
|
||||
- CDLMORNINGSTAR
|
||||
- CDLEVENINGSTAR
|
||||
- CDLHARAMI
|
||||
- CDLPIERCING
|
||||
- CDLDARKCLOUDCOVER
|
||||
- CDL3WHITESOLDIERS
|
||||
- CDL3BLACKCROWS
|
||||
window_size: 30
|
||||
feature_engineering:
|
||||
candle_features: true
|
||||
custom_features: []
|
||||
enabled: true
|
||||
talib_indicators:
|
||||
- name: RSI
|
||||
params:
|
||||
timeperiod: 14
|
||||
- name: EMA
|
||||
params:
|
||||
timeperiod: 20
|
||||
- name: EMA
|
||||
params:
|
||||
timeperiod: 50
|
||||
- name: MACD
|
||||
params:
|
||||
fastperiod: 12
|
||||
signalperiod: 9
|
||||
slowperiod: 26
|
||||
- name: BBANDS
|
||||
params:
|
||||
nbdevdn: 2
|
||||
nbdevup: 2
|
||||
timeperiod: 20
|
||||
- name: ATR
|
||||
params:
|
||||
timeperiod: 14
|
||||
- name: ADX
|
||||
params:
|
||||
timeperiod: 14
|
||||
- name: CCI
|
||||
params:
|
||||
timeperiod: 14
|
||||
- name: MFI
|
||||
params:
|
||||
timeperiod: 14
|
||||
- name: STOCH
|
||||
params:
|
||||
fastk_period: 14
|
||||
slowd_period: 3
|
||||
slowk_period: 3
|
||||
inference:
|
||||
batch_size: 1000
|
||||
enabled: true
|
||||
local_model_path: models/best_model.pkl
|
||||
mlflow_model_name: candlestick_pattern_v1
|
||||
mlflow_model_stage: Production
|
||||
model_source: local
|
||||
use_training_config: true
|
||||
training:
|
||||
class_weights: balanced
|
||||
enabled: true
|
||||
hyperparameters:
|
||||
max_depth: 15
|
||||
min_samples_leaf: 2
|
||||
min_samples_split: 5
|
||||
n_estimators: 200
|
||||
n_jobs: -1
|
||||
random_state: 42
|
||||
mlflow:
|
||||
experiment_name: candlestick_patterns
|
||||
log_artifacts: true
|
||||
register_model: false
|
||||
tracking_uri: http://localhost:5000
|
||||
model_type: random_forest
|
||||
split_method: temporal
|
||||
test_split: 0.2
|
||||
validation_split: 0.1
|
||||
|
|
@ -0,0 +1,14 @@
|
|||
artifact_uri: file:///home/homoludens/projekti/bitcon/candle_annotator/services/ml/mlruns/358560345319124639/808c2e97d90c40feb8d35a7e348c8e4c/artifacts
|
||||
end_time: 1771187283377
|
||||
entry_point_name: ''
|
||||
experiment_id: '358560345319124639'
|
||||
lifecycle_stage: active
|
||||
run_id: 808c2e97d90c40feb8d35a7e348c8e4c
|
||||
run_name: carefree-sheep-418
|
||||
source_name: ''
|
||||
source_type: 4
|
||||
source_version: ''
|
||||
start_time: 1771187280389
|
||||
status: 4
|
||||
tags: []
|
||||
user_id: homoludens
|
||||
|
|
@ -0,0 +1 @@
|
|||
1771187281809 0.8947368421052632 0
|
||||
|
|
@ -0,0 +1 @@
|
|||
1771187281869 0.8928571428571429 0
|
||||
|
|
@ -0,0 +1 @@
|
|||
1771187281896 0.896551724137931 0
|
||||
|
|
@ -0,0 +1 @@
|
|||
1771187281826 0.8947044334975369 0
|
||||
|
|
@ -0,0 +1 @@
|
|||
1771187281930 0.0 0
|
||||
|
|
@ -0,0 +1 @@
|
|||
1771187281836 0.8949312937516204 0
|
||||
|
|
@ -0,0 +1 @@
|
|||
1771187281852 0.8064516129032258 0
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue