fix(ml): add windowed feature flattening for inference parity
The model was trained on 94-candle sliding windows flattened to 2820 features (94 candles x 30 features). Inference was sending raw per-candle features (27 columns). Changes: - Rewrite preprocessing to return (X, window_times) tuple - Add sliding window creation with correct feature ordering - Fill missing columns (average, barCount) with 0 for feature parity - Fill NaN from indicator warmup with 0 instead of dropping rows - Always compute all indicators (including MFI) for feature parity - Update predict and batch predict endpoints for new signature
This commit is contained in:
parent
4c7b3f2676
commit
40d6d1739e
2 changed files with 111 additions and 63 deletions
|
|
@ -524,14 +524,8 @@ async def predict(request: PredictRequest):
|
|||
# Convert candles to list of dicts
|
||||
candles_data = [candle.model_dump() for candle in request.candles]
|
||||
|
||||
# Preprocess candles (feature engineering)
|
||||
df_preprocessed = preprocess_candles(candles_data, state.pipeline_config)
|
||||
|
||||
# Keep times for results mapping
|
||||
times = df_preprocessed['time'].values
|
||||
|
||||
# Extract feature columns (exclude 'time')
|
||||
X = extract_feature_columns(df_preprocessed)
|
||||
# Preprocess candles (feature engineering + windowing)
|
||||
X, window_times = preprocess_candles(candles_data, state.pipeline_config)
|
||||
|
||||
# Get predictions and probabilities
|
||||
if hasattr(state.model, 'predict_proba'):
|
||||
|
|
@ -547,20 +541,18 @@ async def predict(request: PredictRequest):
|
|||
|
||||
# Get label names (handle both string and int predictions)
|
||||
if state.label_encoder is not None:
|
||||
# Model predicts integers, map to labels
|
||||
labels = [state.label_encoder.get(int(pred), f"unknown_{pred}") for pred in y_pred]
|
||||
else:
|
||||
# Model predicts strings directly
|
||||
labels = [str(pred) for pred in y_pred]
|
||||
|
||||
# Build per-candle predictions
|
||||
# Build per-window predictions (each window maps to its last candle time)
|
||||
predictions = [
|
||||
PredictionResult(
|
||||
time=int(time),
|
||||
label=label,
|
||||
confidence=float(conf)
|
||||
)
|
||||
for time, label, conf in zip(times, labels, confidences)
|
||||
for time, label, conf in zip(window_times, labels, confidences)
|
||||
]
|
||||
|
||||
# Group into spans
|
||||
|
|
@ -577,7 +569,7 @@ async def predict(request: PredictRequest):
|
|||
)
|
||||
|
||||
logger.info(
|
||||
f"Prediction complete: {len(predictions)} candles, "
|
||||
f"Prediction complete: {len(predictions)} windows, "
|
||||
f"{len(spans)} spans, {len([p for p in predictions if p.label != 'O'])} patterns"
|
||||
)
|
||||
|
||||
|
|
@ -675,14 +667,8 @@ async def predict_batch(request: BatchPredictRequest):
|
|||
# Convert batch to candles format
|
||||
batch_candles = batch_df.to_dict('records')
|
||||
|
||||
# Preprocess
|
||||
df_preprocessed = preprocess_candles(batch_candles, state.pipeline_config)
|
||||
|
||||
# Keep times
|
||||
times = df_preprocessed['time'].values
|
||||
|
||||
# Extract features
|
||||
X = extract_feature_columns(df_preprocessed)
|
||||
# Preprocess (feature engineering + windowing)
|
||||
X, window_times = preprocess_candles(batch_candles, state.pipeline_config)
|
||||
|
||||
# Predict
|
||||
if hasattr(state.model, 'predict_proba'):
|
||||
|
|
@ -706,7 +692,7 @@ async def predict_batch(request: BatchPredictRequest):
|
|||
label=label,
|
||||
confidence=float(conf)
|
||||
)
|
||||
for time, label, conf in zip(times, labels, confidences)
|
||||
for time, label, conf in zip(window_times, labels, confidences)
|
||||
]
|
||||
|
||||
all_predictions.extend(batch_predictions)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue