- Create pipeline.py with CLI argument parsing for running stages - Implement TA-Lib indicator computation with multi-output support - Add candle feature extraction (body_size, wicks, ratios, etc.) - Create custom feature loader with dynamic module import - Wire all feature engineering stages with NaN handling - Tasks completed: 2.2, 2.3, 3.1, 3.2, 3.3, 3.4, 3.5
136 lines
4.9 KiB
Python
136 lines
4.9 KiB
Python
"""
|
|
Custom feature function loader.
|
|
|
|
Dynamically imports and executes custom feature functions from configured module paths.
|
|
"""
|
|
|
|
import logging
|
|
import importlib
|
|
from typing import List
|
|
|
|
import pandas as pd
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def load_custom_features(
|
|
df: pd.DataFrame,
|
|
custom_feature_paths: List[str]
|
|
) -> pd.DataFrame:
|
|
"""
|
|
Load and apply custom feature functions.
|
|
|
|
Each custom feature path should be a Python module path (e.g., "features.custom.trend_slope").
|
|
The module should define a function with the same name as the module's last component.
|
|
The function should accept a pandas DataFrame and return a pandas Series.
|
|
|
|
Args:
|
|
df: DataFrame with OHLCV + computed features
|
|
custom_feature_paths: List of module paths to import
|
|
|
|
Returns:
|
|
DataFrame with original columns + custom feature columns
|
|
|
|
Raises:
|
|
ImportError: If a custom feature module cannot be imported
|
|
AttributeError: If the expected function is not found in the module
|
|
ValueError: If the custom function doesn't return a Series
|
|
"""
|
|
if not custom_feature_paths:
|
|
logger.debug("No custom features configured")
|
|
return df
|
|
|
|
logger.info(f"Loading {len(custom_feature_paths)} custom feature(s)")
|
|
|
|
# Make a copy to avoid modifying the original
|
|
result_df = df.copy()
|
|
|
|
for feature_path in custom_feature_paths:
|
|
logger.debug(f"Loading custom feature: {feature_path}")
|
|
|
|
try:
|
|
# Import the module
|
|
module = importlib.import_module(feature_path)
|
|
|
|
# Get the function name (last component of the path)
|
|
function_name = feature_path.split('.')[-1]
|
|
|
|
# Get the function from the module
|
|
if not hasattr(module, function_name):
|
|
raise AttributeError(
|
|
f"Module '{feature_path}' does not have a function named '{function_name}'. "
|
|
f"Custom feature modules must define a function with the same name as the module."
|
|
)
|
|
|
|
feature_func = getattr(module, function_name)
|
|
|
|
# Call the function with the current DataFrame
|
|
logger.debug(f"Calling custom feature function: {function_name}")
|
|
feature_result = feature_func(result_df)
|
|
|
|
# Validate the result is a Series
|
|
if not isinstance(feature_result, pd.Series):
|
|
raise ValueError(
|
|
f"Custom feature function '{function_name}' must return a pandas Series, "
|
|
f"but returned {type(feature_result).__name__}"
|
|
)
|
|
|
|
# Check the Series has the right length
|
|
if len(feature_result) != len(result_df):
|
|
raise ValueError(
|
|
f"Custom feature function '{function_name}' returned Series with "
|
|
f"{len(feature_result)} rows, but DataFrame has {len(result_df)} rows"
|
|
)
|
|
|
|
# Add the feature as a new column
|
|
result_df[function_name] = feature_result.values
|
|
|
|
logger.info(f"Added custom feature: {function_name}")
|
|
|
|
except ImportError as e:
|
|
logger.error(f"Failed to import custom feature module '{feature_path}': {e}")
|
|
raise ImportError(
|
|
f"Cannot import custom feature module '{feature_path}'. "
|
|
f"Ensure the module exists and is in the Python path. Error: {e}"
|
|
)
|
|
except AttributeError as e:
|
|
logger.error(f"Custom feature function not found: {e}")
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error applying custom feature '{feature_path}': {e}")
|
|
raise
|
|
|
|
return result_df
|
|
|
|
|
|
def validate_custom_feature_function(func, function_name: str) -> None:
|
|
"""
|
|
Validate that a custom feature function has the correct signature.
|
|
|
|
Args:
|
|
func: The function to validate
|
|
function_name: Name of the function for error messages
|
|
|
|
Raises:
|
|
ValueError: If the function signature is invalid
|
|
"""
|
|
import inspect
|
|
|
|
sig = inspect.signature(func)
|
|
params = list(sig.parameters.values())
|
|
|
|
if len(params) != 1:
|
|
raise ValueError(
|
|
f"Custom feature function '{function_name}' must accept exactly 1 parameter "
|
|
f"(a pandas DataFrame), but has {len(params)} parameters"
|
|
)
|
|
|
|
# Check if the parameter is annotated as DataFrame (optional check)
|
|
param = params[0]
|
|
if param.annotation != inspect.Parameter.empty:
|
|
if param.annotation not in [pd.DataFrame, 'pd.DataFrame', 'DataFrame']:
|
|
logger.warning(
|
|
f"Custom feature function '{function_name}' parameter is annotated as "
|
|
f"{param.annotation}, but should be pd.DataFrame"
|
|
)
|