fix(ml): handle missing volume data and skip volume-dependent indicators

- Fill volume with 0 when column is absent from candle data
- Skip MFI/OBV/AD/ADOSC indicators when no real volume data available
- Fix pandas FutureWarning for inplace fillna in candle_features
- Remove temporary debug NaN logging
This commit is contained in:
Marko Djordjevic 2026-02-15 21:56:14 +01:00
parent b6b37160a7
commit 317f925c43
2 changed files with 18 additions and 3 deletions

View file

@ -60,6 +60,11 @@ def preprocess_candles(
except Exception as e: except Exception as e:
raise ValueError(f"Candle data validation failed: {e}") raise ValueError(f"Candle data validation failed: {e}")
# Handle missing volume column - fill with 0 if absent
if 'volume' not in df.columns:
logger.warning("Volume column missing from candle data, filling with 0")
df['volume'] = 0.0
# Get feature engineering config # Get feature engineering config
fe_config = pipeline_config.stages.feature_engineering fe_config = pipeline_config.stages.feature_engineering
@ -69,9 +74,19 @@ def preprocess_candles(
# Compute TA-Lib indicators # Compute TA-Lib indicators
if fe_config.talib_indicators: if fe_config.talib_indicators:
logger.info(f"Computing {len(fe_config.talib_indicators)} TA-Lib indicators") indicators = fe_config.talib_indicators
# Skip volume-dependent indicators when volume data is unavailable
volume_indicators = {'MFI', 'OBV', 'AD', 'ADOSC'}
has_real_volume = df['volume'].sum() > 0
if not has_real_volume:
skipped = [i.name for i in indicators if i.name.upper() in volume_indicators]
if skipped:
logger.warning(f"Skipping volume-dependent indicators (no volume data): {skipped}")
indicators = [i for i in indicators if i.name.upper() not in volume_indicators]
logger.info(f"Computing {len(indicators)} TA-Lib indicators")
try: try:
df = compute_talib_indicators(df, fe_config.talib_indicators) df = compute_talib_indicators(df, indicators)
except Exception as e: except Exception as e:
logger.error(f"Failed to compute TA-Lib indicators: {e}") logger.error(f"Failed to compute TA-Lib indicators: {e}")
raise ValueError(f"Indicator computation failed: {e}") raise ValueError(f"Indicator computation failed: {e}")

View file

@ -88,7 +88,7 @@ def compute_candle_features(df: pd.DataFrame) -> pd.DataFrame:
# Gap (open - previous close) # Gap (open - previous close)
# For the first candle, gap is 0 # For the first candle, gap is 0
result_df['gap'] = result_df['open'] - result_df['close'].shift(1) result_df['gap'] = result_df['open'] - result_df['close'].shift(1)
result_df['gap'].fillna(0.0, inplace=True) result_df['gap'] = result_df['gap'].fillna(0.0)
logger.info("Computed 8 candle features: body_size, body_direction, upper_wick, " logger.info("Computed 8 candle features: body_size, body_direction, upper_wick, "
"lower_wick, wick_ratio, body_to_range, gap, range") "lower_wick, wick_ratio, body_to_range, gap, range")