fix(ml): add windowed feature flattening for inference parity

The model was trained on 94-candle sliding windows flattened to 2820
features (94 candles x 30 features). Inference was sending raw per-candle
features (27 columns).

Changes:
- Rewrite preprocessing to return (X, window_times) tuple
- Add sliding window creation with correct feature ordering
- Fill missing columns (average, barCount) with 0 for feature parity
- Fill NaN from indicator warmup with 0 instead of dropping rows
- Always compute all indicators (including MFI) for feature parity
- Update predict and batch predict endpoints for new signature
This commit is contained in:
Marko Djordjevic 2026-02-15 22:07:06 +01:00
parent 4c7b3f2676
commit 40d6d1739e
2 changed files with 111 additions and 63 deletions

View file

@ -524,14 +524,8 @@ async def predict(request: PredictRequest):
# Convert candles to list of dicts
candles_data = [candle.model_dump() for candle in request.candles]
# Preprocess candles (feature engineering)
df_preprocessed = preprocess_candles(candles_data, state.pipeline_config)
# Keep times for results mapping
times = df_preprocessed['time'].values
# Extract feature columns (exclude 'time')
X = extract_feature_columns(df_preprocessed)
# Preprocess candles (feature engineering + windowing)
X, window_times = preprocess_candles(candles_data, state.pipeline_config)
# Get predictions and probabilities
if hasattr(state.model, 'predict_proba'):
@ -547,20 +541,18 @@ async def predict(request: PredictRequest):
# Get label names (handle both string and int predictions)
if state.label_encoder is not None:
# Model predicts integers, map to labels
labels = [state.label_encoder.get(int(pred), f"unknown_{pred}") for pred in y_pred]
else:
# Model predicts strings directly
labels = [str(pred) for pred in y_pred]
# Build per-candle predictions
# Build per-window predictions (each window maps to its last candle time)
predictions = [
PredictionResult(
time=int(time),
label=label,
confidence=float(conf)
)
for time, label, conf in zip(times, labels, confidences)
for time, label, conf in zip(window_times, labels, confidences)
]
# Group into spans
@ -577,7 +569,7 @@ async def predict(request: PredictRequest):
)
logger.info(
f"Prediction complete: {len(predictions)} candles, "
f"Prediction complete: {len(predictions)} windows, "
f"{len(spans)} spans, {len([p for p in predictions if p.label != 'O'])} patterns"
)
@ -675,14 +667,8 @@ async def predict_batch(request: BatchPredictRequest):
# Convert batch to candles format
batch_candles = batch_df.to_dict('records')
# Preprocess
df_preprocessed = preprocess_candles(batch_candles, state.pipeline_config)
# Keep times
times = df_preprocessed['time'].values
# Extract features
X = extract_feature_columns(df_preprocessed)
# Preprocess (feature engineering + windowing)
X, window_times = preprocess_candles(batch_candles, state.pipeline_config)
# Predict
if hasattr(state.model, 'predict_proba'):
@ -706,7 +692,7 @@ async def predict_batch(request: BatchPredictRequest):
label=label,
confidence=float(conf)
)
for time, label, conf in zip(times, labels, confidences)
for time, label, conf in zip(window_times, labels, confidences)
]
all_predictions.extend(batch_predictions)