feat: add candle time-sort and duplicate timestamp validation to POST /predict

Sort incoming candles by their time field in ascending chronological order before processing to ensure correct feature engineering regardless of input order. Also validate that no duplicate timestamps are present, returning HTTP 400 with details if duplicates are found. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-18 11:28:02 +01:00 · 2026-02-18 11:28:02 +01:00 · d75b05b585
commit d75b05b585
parent ff952aa083
2 changed files with 16 additions and 3 deletions
--- a/openspec/changes/code-review-fix/tasks.md
+++ b/openspec/changes/code-review-fix/tasks.md
@ -45,7 +45,7 @@
 - [x] 5.2 `[opus]` Add SHA256 model integrity check: create `models/checksums.sha256` manifest, verify hash before `joblib.load()` in `services/ml/app/main.py:266`
 - [x] 5.3 `[sonnet]` Add `_model_swap_lock` to prediction reads (not just writes) in `services/ml/app/main.py` for thread-safe model access
 - [x] 5.4 `[sonnet]` Add date range validation (max 1 year) to `POST /predict/batch` in `services/ml/app/main.py`
- [ ] 5.5 `[sonnet]` Add candle time-sort validation/auto-sort to `POST /predict` in `services/ml/app/main.py`
+- [x] 5.5 `[sonnet]` Add candle time-sort validation/auto-sort to `POST /predict` in `services/ml/app/main.py`
 - [ ] 5.6 `[sonnet]` Implement real health checks: `SELECT 1` for PostgreSQL, MLflow API ping in `services/ml/app/main.py:396-409`
 - [ ] 5.7 `[sonnet]` Add training resource limits: 500MB dataset size check, 30-minute timeout with status update on expiry in `services/ml/app/main.py:907-1030`
 - [ ] 5.8 `[haiku]` Add `run_id` format validation to `DELETE /training/runs/{run_id}` and `GET /training/runs/{run_id}` endpoints
--- a/services/ml/app/main.py
+++ b/services/ml/app/main.py
@ -657,6 +657,19 @@ async def predict(request: PredictRequest):

    logger.info(f"Predict request: {request.pair or 'unknown'} {request.timeframe or 'unknown'}, {len(request.candles)} candles")

+    # Auto-sort candles by time in ascending order to ensure chronological processing
+    # regardless of the order in which the client sends them.
+    candles_sorted = sorted(request.candles, key=lambda c: c.time)
+
+    # Validate that there are no duplicate timestamps after sorting.
+    times = [c.time for c in candles_sorted]
+    duplicate_times = [t for i, t in enumerate(times) if i > 0 and t == times[i - 1]]
+    if duplicate_times:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=f"Duplicate candle timestamps detected: {duplicate_times[:5]}"
+        )
+
    # Grab model reference under lock to prevent reading a partially-swapped model
    with _model_swap_lock:
        current_model = state.model
@ -669,8 +682,8 @@ async def predict(request: PredictRequest):
        )

    try:
-        # Convert candles to list of dicts
-        candles_data = [candle.model_dump() for candle in request.candles]
+        # Convert sorted candles to list of dicts
+        candles_data = [candle.model_dump() for candle in candles_sorted]

        # Preprocess candles (feature engineering + windowing)
        X, window_times = preprocess_candles(candles_data, state.pipeline_config)