security: replace exception details with generic error in ML service HTTP responses
Replace all instances of `detail=str(e)`, `detail=f"...{exc}"`, and similar
patterns that exposed internal exception messages to HTTP clients in
services/ml/app/main.py. All exception details are now logged server-side
only via logger.error(), while clients receive a generic "Internal server error"
message. Fixes 9 handlers across predict, batch predict, pattern detection,
training start, training runs fetch, training run delete, dataset info,
build dataset, and model load endpoints.
Mark task 5.1 as completed in tasks.md.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
4c53ef7cae
commit
b7f9b2e04d
2 changed files with 14 additions and 13 deletions
|
|
@ -41,7 +41,7 @@
|
||||||
|
|
||||||
## 5. ML Service Hardening (Python)
|
## 5. ML Service Hardening (Python)
|
||||||
|
|
||||||
- [ ] 5.1 `[sonnet]` Replace `error.message` / traceback details with generic `"Internal server error"` in FastAPI exception handlers at lines 640, 778, 1091, 1134, 1199, 1296 of `services/ml/app/main.py`
|
- [x] 5.1 `[sonnet]` Replace `error.message` / traceback details with generic `"Internal server error"` in FastAPI exception handlers at lines 640, 778, 1091, 1134, 1199, 1296 of `services/ml/app/main.py`
|
||||||
- [ ] 5.2 `[opus]` Add SHA256 model integrity check: create `models/checksums.sha256` manifest, verify hash before `joblib.load()` in `services/ml/app/main.py:266`
|
- [ ] 5.2 `[opus]` Add SHA256 model integrity check: create `models/checksums.sha256` manifest, verify hash before `joblib.load()` in `services/ml/app/main.py:266`
|
||||||
- [ ] 5.3 `[sonnet]` Add `_model_swap_lock` to prediction reads (not just writes) in `services/ml/app/main.py` for thread-safe model access
|
- [ ] 5.3 `[sonnet]` Add `_model_swap_lock` to prediction reads (not just writes) in `services/ml/app/main.py` for thread-safe model access
|
||||||
- [ ] 5.4 `[sonnet]` Add date range validation (max 1 year) to `POST /predict/batch` in `services/ml/app/main.py`
|
- [ ] 5.4 `[sonnet]` Add date range validation (max 1 year) to `POST /predict/batch` in `services/ml/app/main.py`
|
||||||
|
|
|
||||||
|
|
@ -651,13 +651,13 @@ async def predict(request: PredictRequest):
|
||||||
logger.error(f"Prediction validation error: {e}")
|
logger.error(f"Prediction validation error: {e}")
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_400_BAD_REQUEST,
|
status_code=status.HTTP_400_BAD_REQUEST,
|
||||||
detail=str(e)
|
detail="Internal server error"
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Prediction failed: {e}", exc_info=True)
|
logger.error(f"Prediction failed: {e}", exc_info=True)
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
detail=f"Prediction failed: {str(e)}"
|
detail="Internal server error"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -795,7 +795,7 @@ async def predict_batch(request: BatchPredictRequest):
|
||||||
logger.error(f"Batch prediction failed: {e}", exc_info=True)
|
logger.error(f"Batch prediction failed: {e}", exc_info=True)
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
detail=f"Batch prediction failed: {str(e)}"
|
detail="Internal server error"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -865,9 +865,10 @@ async def patterns_detect(request: DetectPatternsRequest):
|
||||||
try:
|
try:
|
||||||
raw_annotations = detect_patterns(candles_data, request.patterns or None)
|
raw_annotations = detect_patterns(candles_data, request.patterns or None)
|
||||||
except RuntimeError as exc:
|
except RuntimeError as exc:
|
||||||
|
logger.error(f"Pattern detection runtime error: {exc}")
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
||||||
detail=str(exc),
|
detail="Internal server error",
|
||||||
)
|
)
|
||||||
|
|
||||||
annotations = [SpanAnnotation(**ann) for ann in raw_annotations]
|
annotations = [SpanAnnotation(**ann) for ann in raw_annotations]
|
||||||
|
|
@ -1185,7 +1186,7 @@ async def training_start(request: TrainingStartRequest):
|
||||||
logger.error(f"Failed to insert training run record: {exc}")
|
logger.error(f"Failed to insert training run record: {exc}")
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
detail=f"Failed to create training run record: {exc}",
|
detail="Internal server error",
|
||||||
)
|
)
|
||||||
|
|
||||||
# Launch background thread (daemon so it doesn't block process exit)
|
# Launch background thread (daemon so it doesn't block process exit)
|
||||||
|
|
@ -1228,7 +1229,7 @@ async def training_runs():
|
||||||
logger.error(f"Failed to fetch training runs: {exc}", exc_info=True)
|
logger.error(f"Failed to fetch training runs: {exc}", exc_info=True)
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
detail=f"Failed to fetch training runs: {exc}",
|
detail="Internal server error",
|
||||||
)
|
)
|
||||||
|
|
||||||
return TrainingRunsResponse(runs=runs)
|
return TrainingRunsResponse(runs=runs)
|
||||||
|
|
@ -1301,7 +1302,7 @@ async def delete_training_run(run_id: str):
|
||||||
logger.error(f"Failed to delete training run {run_id}: {exc}")
|
logger.error(f"Failed to delete training run {run_id}: {exc}")
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
detail=f"Failed to delete training run: {exc}",
|
detail="Internal server error",
|
||||||
)
|
)
|
||||||
|
|
||||||
# Remove model artifact if it exists
|
# Remove model artifact if it exists
|
||||||
|
|
@ -1360,7 +1361,7 @@ async def training_dataset_info():
|
||||||
logger.error(f"Failed to get dataset info: {exc}", exc_info=True)
|
logger.error(f"Failed to get dataset info: {exc}", exc_info=True)
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
detail=f"Failed to get dataset info: {exc}",
|
detail="Internal server error",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1387,12 +1388,12 @@ async def training_build_dataset():
|
||||||
result = build_dataset_from_db(config)
|
result = build_dataset_from_db(config)
|
||||||
return BuildDatasetResponse(**result)
|
return BuildDatasetResponse(**result)
|
||||||
except ValueError as exc:
|
except ValueError as exc:
|
||||||
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc))
|
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Internal server error")
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.error(f"Failed to build dataset: {exc}", exc_info=True)
|
logger.error(f"Failed to build dataset: {exc}", exc_info=True)
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
detail=f"Failed to build dataset: {exc}",
|
detail="Internal server error",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1443,7 +1444,7 @@ async def model_load(request: ModelLoadRequest):
|
||||||
logger.error(f"DB lookup failed for run_id={request.run_id}: {exc}")
|
logger.error(f"DB lookup failed for run_id={request.run_id}: {exc}")
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
detail=f"Database error: {exc}",
|
detail="Internal server error",
|
||||||
)
|
)
|
||||||
|
|
||||||
if row is None:
|
if row is None:
|
||||||
|
|
@ -1479,7 +1480,7 @@ async def model_load(request: ModelLoadRequest):
|
||||||
logger.error(f"Failed to load model for run_id={request.run_id}: {exc}")
|
logger.error(f"Failed to load model for run_id={request.run_id}: {exc}")
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
detail=f"Failed to load model: {exc}",
|
detail="Internal server error",
|
||||||
)
|
)
|
||||||
|
|
||||||
# 4. Thread-safe model swap (3.2 – brief lock)
|
# 4. Thread-safe model swap (3.2 – brief lock)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue