diff --git a/openspec/changes/code-review-fix/tasks.md b/openspec/changes/code-review-fix/tasks.md index 868a17d..ec9fa0b 100644 --- a/openspec/changes/code-review-fix/tasks.md +++ b/openspec/changes/code-review-fix/tasks.md @@ -45,7 +45,7 @@ - [x] 5.2 `[opus]` Add SHA256 model integrity check: create `models/checksums.sha256` manifest, verify hash before `joblib.load()` in `services/ml/app/main.py:266` - [x] 5.3 `[sonnet]` Add `_model_swap_lock` to prediction reads (not just writes) in `services/ml/app/main.py` for thread-safe model access - [x] 5.4 `[sonnet]` Add date range validation (max 1 year) to `POST /predict/batch` in `services/ml/app/main.py` -- [ ] 5.5 `[sonnet]` Add candle time-sort validation/auto-sort to `POST /predict` in `services/ml/app/main.py` +- [x] 5.5 `[sonnet]` Add candle time-sort validation/auto-sort to `POST /predict` in `services/ml/app/main.py` - [ ] 5.6 `[sonnet]` Implement real health checks: `SELECT 1` for PostgreSQL, MLflow API ping in `services/ml/app/main.py:396-409` - [ ] 5.7 `[sonnet]` Add training resource limits: 500MB dataset size check, 30-minute timeout with status update on expiry in `services/ml/app/main.py:907-1030` - [ ] 5.8 `[haiku]` Add `run_id` format validation to `DELETE /training/runs/{run_id}` and `GET /training/runs/{run_id}` endpoints diff --git a/services/ml/app/main.py b/services/ml/app/main.py index a77a543..c8bb5c8 100644 --- a/services/ml/app/main.py +++ b/services/ml/app/main.py @@ -657,6 +657,19 @@ async def predict(request: PredictRequest): logger.info(f"Predict request: {request.pair or 'unknown'} {request.timeframe or 'unknown'}, {len(request.candles)} candles") + # Auto-sort candles by time in ascending order to ensure chronological processing + # regardless of the order in which the client sends them. + candles_sorted = sorted(request.candles, key=lambda c: c.time) + + # Validate that there are no duplicate timestamps after sorting. + times = [c.time for c in candles_sorted] + duplicate_times = [t for i, t in enumerate(times) if i > 0 and t == times[i - 1]] + if duplicate_times: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Duplicate candle timestamps detected: {duplicate_times[:5]}" + ) + # Grab model reference under lock to prevent reading a partially-swapped model with _model_swap_lock: current_model = state.model @@ -669,8 +682,8 @@ async def predict(request: PredictRequest): ) try: - # Convert candles to list of dicts - candles_data = [candle.model_dump() for candle in request.candles] + # Convert sorted candles to list of dicts + candles_data = [candle.model_dump() for candle in candles_sorted] # Preprocess candles (feature engineering + windowing) X, window_times = preprocess_candles(candles_data, state.pipeline_config)