feat: add ML service scaffolding with Python FastAPI, Docker, and MLflow setup

This commit is contained in:
Marko Djordjevic 2026-02-15 11:58:31 +01:00
parent 92abab5316
commit 1a653c5866
18 changed files with 1952 additions and 2593 deletions

28
services/ml/Dockerfile Normal file
View file

@ -0,0 +1,28 @@
FROM python:3.11-slim
# Install system dependencies including TA-Lib C library
RUN apt-get update && apt-get install -y \
build-essential \
wget \
libta-lib-dev \
libpq-dev \
&& rm -rf /var/lib/apt/lists/*
# Set working directory
WORKDIR /app
# Copy dependency files
COPY pyproject.toml ./
# Install Python dependencies
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir .
# Copy application code
COPY . .
# Expose port for FastAPI
EXPOSE 8001
# Run the inference server by default
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8001"]

View file

@ -0,0 +1,152 @@
# ML Pipeline Configuration
# Full config for feature engineering, annotation ingestion, training, and inference
data:
raw_path: "data/raw/OHLCV.csv"
enriched_path: "data/enriched/features.csv"
labeled_path: "data/labeled/dataset.csv"
annotations_path: "data/annotations/export.json"
stages:
feature_engineering:
enabled: true
# TA-Lib technical indicators
talib_indicators:
- name: "RSI"
params:
timeperiod: 14
- name: "EMA"
params:
timeperiod: 20
- name: "EMA"
params:
timeperiod: 50
- name: "MACD"
params:
fastperiod: 12
slowperiod: 26
signalperiod: 9
- name: "BBANDS"
params:
timeperiod: 20
nbdevup: 2
nbdevdn: 2
- name: "ATR"
params:
timeperiod: 14
- name: "ADX"
params:
timeperiod: 14
- name: "CCI"
params:
timeperiod: 14
- name: "MFI"
params:
timeperiod: 14
- name: "STOCH"
params:
fastk_period: 14
slowk_period: 3
slowd_period: 3
# Candle-derived features
candle_features: true
# Custom feature functions (module paths)
custom_features: []
annotation_ingestion:
enabled: true
# Label encoding: "window" or "bio"
label_encoding: "window"
# For windowed classification
window_size: 30
# Context padding (candles before/after)
context_padding: 20
# Minimum confidence for human annotations
min_confidence: 1
# Programmatic TA-Lib pattern labels
programmatic_labels:
enabled: true
talib_patterns:
- "CDLENGULFING"
- "CDLHAMMER"
- "CDLINVERTEDHAMMER"
- "CDLSHOOTINGSTAR"
- "CDLDOJI"
- "CDLDOJISTAR"
- "CDLMORNINGSTAR"
- "CDLEVENINGSTAR"
- "CDLHARAMI"
- "CDLPIERCING"
- "CDLDARKCLOUDCOVER"
- "CDLTHREEWHITESOLDIERS"
- "CDLTHREEBLACKCROWS"
# Label merge strategy: "human_priority", "programmatic_priority", "both"
merge_strategy: "human_priority"
training:
enabled: true
# Model type: "random_forest", "xgboost"
model_type: "random_forest"
# Train/test split
split_method: "temporal" # "temporal" or "random"
test_split: 0.2
validation_split: 0.1
# Class balancing
class_weights: "balanced" # "balanced" or null
# Hyperparameters (model-specific)
hyperparameters:
# RandomForest
n_estimators: 200
max_depth: 15
min_samples_split: 5
min_samples_leaf: 2
random_state: 42
n_jobs: -1
# XGBoost (when model_type is "xgboost")
# n_estimators: 500
# max_depth: 6
# learning_rate: 0.01
# subsample: 0.8
# colsample_bytree: 0.8
# random_state: 42
# n_jobs: -1
# MLflow settings
mlflow:
tracking_uri: "http://mlflow:5000"
experiment_name: "candlestick_patterns"
log_artifacts: true
register_model: false # Set to true to register in model registry
inference:
enabled: true
# Model source: "mlflow" or "local"
model_source: "local"
# For MLflow source
mlflow_model_name: "candlestick_pattern_v1"
mlflow_model_stage: "Production" # "Production", "Staging", "None"
# For local source
local_model_path: "models/best_model.pkl"
# Batch processing
batch_size: 1000
# Preprocessing config loaded from MLflow artifact or use current config
use_training_config: true

View file

@ -0,0 +1,28 @@
[project]
name = "candle-ml"
version = "0.1.0"
description = "ML service for candlestick pattern recognition"
requires-python = ">=3.11"
dependencies = [
"fastapi>=0.109.0",
"uvicorn[standard]>=0.27.0",
"scikit-learn>=1.4.0",
"xgboost>=2.0.3",
"pandas>=2.2.0",
"numpy>=1.26.0",
"joblib>=1.3.2",
"mlflow>=2.10.0",
"pyyaml>=6.0.1",
"TA-Lib>=0.4.28",
"dvc>=3.40.0",
"sqlalchemy>=2.0.25",
"psycopg2-binary>=2.9.9",
"pydantic>=2.5.0",
"pydantic-settings>=2.1.0",
"matplotlib>=3.8.2",
"seaborn>=0.13.1",
]
[build-system]
requires = ["setuptools>=61.0"]
build-backend = "setuptools.build_meta"