From 317f925c4340d051083d632f31b50dab5cbcb0bc Mon Sep 17 00:00:00 2001 From: Marko Djordjevic Date: Sun, 15 Feb 2026 21:56:14 +0100 Subject: [PATCH] fix(ml): handle missing volume data and skip volume-dependent indicators - Fill volume with 0 when column is absent from candle data - Skip MFI/OBV/AD/ADOSC indicators when no real volume data available - Fix pandas FutureWarning for inplace fillna in candle_features - Remove temporary debug NaN logging --- services/ml/app/preprocessing.py | 19 +++++++++++++++++-- services/ml/features/candle_features.py | 2 +- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/services/ml/app/preprocessing.py b/services/ml/app/preprocessing.py index 86f4a7f..a4595ea 100644 --- a/services/ml/app/preprocessing.py +++ b/services/ml/app/preprocessing.py @@ -60,6 +60,11 @@ def preprocess_candles( except Exception as e: raise ValueError(f"Candle data validation failed: {e}") + # Handle missing volume column - fill with 0 if absent + if 'volume' not in df.columns: + logger.warning("Volume column missing from candle data, filling with 0") + df['volume'] = 0.0 + # Get feature engineering config fe_config = pipeline_config.stages.feature_engineering @@ -69,9 +74,19 @@ def preprocess_candles( # Compute TA-Lib indicators if fe_config.talib_indicators: - logger.info(f"Computing {len(fe_config.talib_indicators)} TA-Lib indicators") + indicators = fe_config.talib_indicators + # Skip volume-dependent indicators when volume data is unavailable + volume_indicators = {'MFI', 'OBV', 'AD', 'ADOSC'} + has_real_volume = df['volume'].sum() > 0 + if not has_real_volume: + skipped = [i.name for i in indicators if i.name.upper() in volume_indicators] + if skipped: + logger.warning(f"Skipping volume-dependent indicators (no volume data): {skipped}") + indicators = [i for i in indicators if i.name.upper() not in volume_indicators] + + logger.info(f"Computing {len(indicators)} TA-Lib indicators") try: - df = compute_talib_indicators(df, fe_config.talib_indicators) + df = compute_talib_indicators(df, indicators) except Exception as e: logger.error(f"Failed to compute TA-Lib indicators: {e}") raise ValueError(f"Indicator computation failed: {e}") diff --git a/services/ml/features/candle_features.py b/services/ml/features/candle_features.py index 3c42c09..3f33655 100644 --- a/services/ml/features/candle_features.py +++ b/services/ml/features/candle_features.py @@ -88,7 +88,7 @@ def compute_candle_features(df: pd.DataFrame) -> pd.DataFrame: # Gap (open - previous close) # For the first candle, gap is 0 result_df['gap'] = result_df['open'] - result_df['close'].shift(1) - result_df['gap'].fillna(0.0, inplace=True) + result_df['gap'] = result_df['gap'].fillna(0.0) logger.info("Computed 8 candle features: body_size, body_direction, upper_wick, " "lower_wick, wick_ratio, body_to_range, gap, range")