feat(ml): implement feature engineering pipeline

- Create pipeline.py with CLI argument parsing for running stages
- Implement TA-Lib indicator computation with multi-output support
- Add candle feature extraction (body_size, wicks, ratios, etc.)
- Create custom feature loader with dynamic module import
- Wire all feature engineering stages with NaN handling
- Tasks completed: 2.2, 2.3, 3.1, 3.2, 3.3, 3.4, 3.5
This commit is contained in:
Marko Djordjevic 2026-02-15 12:22:59 +01:00
parent ea339a54a7
commit fd29ab91e0
6 changed files with 889 additions and 7 deletions

View file

@ -0,0 +1,136 @@
"""
Custom feature function loader.
Dynamically imports and executes custom feature functions from configured module paths.
"""
import logging
import importlib
from typing import List
import pandas as pd
logger = logging.getLogger(__name__)
def load_custom_features(
df: pd.DataFrame,
custom_feature_paths: List[str]
) -> pd.DataFrame:
"""
Load and apply custom feature functions.
Each custom feature path should be a Python module path (e.g., "features.custom.trend_slope").
The module should define a function with the same name as the module's last component.
The function should accept a pandas DataFrame and return a pandas Series.
Args:
df: DataFrame with OHLCV + computed features
custom_feature_paths: List of module paths to import
Returns:
DataFrame with original columns + custom feature columns
Raises:
ImportError: If a custom feature module cannot be imported
AttributeError: If the expected function is not found in the module
ValueError: If the custom function doesn't return a Series
"""
if not custom_feature_paths:
logger.debug("No custom features configured")
return df
logger.info(f"Loading {len(custom_feature_paths)} custom feature(s)")
# Make a copy to avoid modifying the original
result_df = df.copy()
for feature_path in custom_feature_paths:
logger.debug(f"Loading custom feature: {feature_path}")
try:
# Import the module
module = importlib.import_module(feature_path)
# Get the function name (last component of the path)
function_name = feature_path.split('.')[-1]
# Get the function from the module
if not hasattr(module, function_name):
raise AttributeError(
f"Module '{feature_path}' does not have a function named '{function_name}'. "
f"Custom feature modules must define a function with the same name as the module."
)
feature_func = getattr(module, function_name)
# Call the function with the current DataFrame
logger.debug(f"Calling custom feature function: {function_name}")
feature_result = feature_func(result_df)
# Validate the result is a Series
if not isinstance(feature_result, pd.Series):
raise ValueError(
f"Custom feature function '{function_name}' must return a pandas Series, "
f"but returned {type(feature_result).__name__}"
)
# Check the Series has the right length
if len(feature_result) != len(result_df):
raise ValueError(
f"Custom feature function '{function_name}' returned Series with "
f"{len(feature_result)} rows, but DataFrame has {len(result_df)} rows"
)
# Add the feature as a new column
result_df[function_name] = feature_result.values
logger.info(f"Added custom feature: {function_name}")
except ImportError as e:
logger.error(f"Failed to import custom feature module '{feature_path}': {e}")
raise ImportError(
f"Cannot import custom feature module '{feature_path}'. "
f"Ensure the module exists and is in the Python path. Error: {e}"
)
except AttributeError as e:
logger.error(f"Custom feature function not found: {e}")
raise
except Exception as e:
logger.error(f"Error applying custom feature '{feature_path}': {e}")
raise
return result_df
def validate_custom_feature_function(func, function_name: str) -> None:
"""
Validate that a custom feature function has the correct signature.
Args:
func: The function to validate
function_name: Name of the function for error messages
Raises:
ValueError: If the function signature is invalid
"""
import inspect
sig = inspect.signature(func)
params = list(sig.parameters.values())
if len(params) != 1:
raise ValueError(
f"Custom feature function '{function_name}' must accept exactly 1 parameter "
f"(a pandas DataFrame), but has {len(params)} parameters"
)
# Check if the parameter is annotated as DataFrame (optional check)
param = params[0]
if param.annotation != inspect.Parameter.empty:
if param.annotation not in [pd.DataFrame, 'pd.DataFrame', 'DataFrame']:
logger.warning(
f"Custom feature function '{function_name}' parameter is annotated as "
f"{param.annotation}, but should be pd.DataFrame"
)