From d75b05b585be8fa838685358bee6c4cf1f36a960 Mon Sep 17 00:00:00 2001 From: Marko Djordjevic Date: Wed, 18 Feb 2026 11:28:02 +0100 Subject: [PATCH] feat: add candle time-sort and duplicate timestamp validation to POST /predict Sort incoming candles by their time field in ascending chronological order before processing to ensure correct feature engineering regardless of input order. Also validate that no duplicate timestamps are present, returning HTTP 400 with details if duplicates are found. Co-Authored-By: Claude Sonnet 4.6 --- openspec/changes/code-review-fix/tasks.md | 2 +- services/ml/app/main.py | 17 +++++++++++++++-- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/openspec/changes/code-review-fix/tasks.md b/openspec/changes/code-review-fix/tasks.md index 868a17d..ec9fa0b 100644 --- a/openspec/changes/code-review-fix/tasks.md +++ b/openspec/changes/code-review-fix/tasks.md @@ -45,7 +45,7 @@ - [x] 5.2 `[opus]` Add SHA256 model integrity check: create `models/checksums.sha256` manifest, verify hash before `joblib.load()` in `services/ml/app/main.py:266` - [x] 5.3 `[sonnet]` Add `_model_swap_lock` to prediction reads (not just writes) in `services/ml/app/main.py` for thread-safe model access - [x] 5.4 `[sonnet]` Add date range validation (max 1 year) to `POST /predict/batch` in `services/ml/app/main.py` -- [ ] 5.5 `[sonnet]` Add candle time-sort validation/auto-sort to `POST /predict` in `services/ml/app/main.py` +- [x] 5.5 `[sonnet]` Add candle time-sort validation/auto-sort to `POST /predict` in `services/ml/app/main.py` - [ ] 5.6 `[sonnet]` Implement real health checks: `SELECT 1` for PostgreSQL, MLflow API ping in `services/ml/app/main.py:396-409` - [ ] 5.7 `[sonnet]` Add training resource limits: 500MB dataset size check, 30-minute timeout with status update on expiry in `services/ml/app/main.py:907-1030` - [ ] 5.8 `[haiku]` Add `run_id` format validation to `DELETE /training/runs/{run_id}` and `GET /training/runs/{run_id}` endpoints diff --git a/services/ml/app/main.py b/services/ml/app/main.py index a77a543..c8bb5c8 100644 --- a/services/ml/app/main.py +++ b/services/ml/app/main.py @@ -657,6 +657,19 @@ async def predict(request: PredictRequest): logger.info(f"Predict request: {request.pair or 'unknown'} {request.timeframe or 'unknown'}, {len(request.candles)} candles") + # Auto-sort candles by time in ascending order to ensure chronological processing + # regardless of the order in which the client sends them. + candles_sorted = sorted(request.candles, key=lambda c: c.time) + + # Validate that there are no duplicate timestamps after sorting. + times = [c.time for c in candles_sorted] + duplicate_times = [t for i, t in enumerate(times) if i > 0 and t == times[i - 1]] + if duplicate_times: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Duplicate candle timestamps detected: {duplicate_times[:5]}" + ) + # Grab model reference under lock to prevent reading a partially-swapped model with _model_swap_lock: current_model = state.model @@ -669,8 +682,8 @@ async def predict(request: PredictRequest): ) try: - # Convert candles to list of dicts - candles_data = [candle.model_dump() for candle in request.candles] + # Convert sorted candles to list of dicts + candles_data = [candle.model_dump() for candle in candles_sorted] # Preprocess candles (feature engineering + windowing) X, window_times = preprocess_candles(candles_data, state.pipeline_config)