feat(ml): implement feature engineering pipeline
- Create pipeline.py with CLI argument parsing for running stages - Implement TA-Lib indicator computation with multi-output support - Add candle feature extraction (body_size, wicks, ratios, etc.) - Create custom feature loader with dynamic module import - Wire all feature engineering stages with NaN handling - Tasks completed: 2.2, 2.3, 3.1, 3.2, 3.3, 3.4, 3.5
This commit is contained in:
parent
ea339a54a7
commit
fd29ab91e0
6 changed files with 889 additions and 7 deletions
136
services/ml/features/custom_loader.py
Normal file
136
services/ml/features/custom_loader.py
Normal file
|
|
@ -0,0 +1,136 @@
|
|||
"""
|
||||
Custom feature function loader.
|
||||
|
||||
Dynamically imports and executes custom feature functions from configured module paths.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import importlib
|
||||
from typing import List
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def load_custom_features(
|
||||
df: pd.DataFrame,
|
||||
custom_feature_paths: List[str]
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Load and apply custom feature functions.
|
||||
|
||||
Each custom feature path should be a Python module path (e.g., "features.custom.trend_slope").
|
||||
The module should define a function with the same name as the module's last component.
|
||||
The function should accept a pandas DataFrame and return a pandas Series.
|
||||
|
||||
Args:
|
||||
df: DataFrame with OHLCV + computed features
|
||||
custom_feature_paths: List of module paths to import
|
||||
|
||||
Returns:
|
||||
DataFrame with original columns + custom feature columns
|
||||
|
||||
Raises:
|
||||
ImportError: If a custom feature module cannot be imported
|
||||
AttributeError: If the expected function is not found in the module
|
||||
ValueError: If the custom function doesn't return a Series
|
||||
"""
|
||||
if not custom_feature_paths:
|
||||
logger.debug("No custom features configured")
|
||||
return df
|
||||
|
||||
logger.info(f"Loading {len(custom_feature_paths)} custom feature(s)")
|
||||
|
||||
# Make a copy to avoid modifying the original
|
||||
result_df = df.copy()
|
||||
|
||||
for feature_path in custom_feature_paths:
|
||||
logger.debug(f"Loading custom feature: {feature_path}")
|
||||
|
||||
try:
|
||||
# Import the module
|
||||
module = importlib.import_module(feature_path)
|
||||
|
||||
# Get the function name (last component of the path)
|
||||
function_name = feature_path.split('.')[-1]
|
||||
|
||||
# Get the function from the module
|
||||
if not hasattr(module, function_name):
|
||||
raise AttributeError(
|
||||
f"Module '{feature_path}' does not have a function named '{function_name}'. "
|
||||
f"Custom feature modules must define a function with the same name as the module."
|
||||
)
|
||||
|
||||
feature_func = getattr(module, function_name)
|
||||
|
||||
# Call the function with the current DataFrame
|
||||
logger.debug(f"Calling custom feature function: {function_name}")
|
||||
feature_result = feature_func(result_df)
|
||||
|
||||
# Validate the result is a Series
|
||||
if not isinstance(feature_result, pd.Series):
|
||||
raise ValueError(
|
||||
f"Custom feature function '{function_name}' must return a pandas Series, "
|
||||
f"but returned {type(feature_result).__name__}"
|
||||
)
|
||||
|
||||
# Check the Series has the right length
|
||||
if len(feature_result) != len(result_df):
|
||||
raise ValueError(
|
||||
f"Custom feature function '{function_name}' returned Series with "
|
||||
f"{len(feature_result)} rows, but DataFrame has {len(result_df)} rows"
|
||||
)
|
||||
|
||||
# Add the feature as a new column
|
||||
result_df[function_name] = feature_result.values
|
||||
|
||||
logger.info(f"Added custom feature: {function_name}")
|
||||
|
||||
except ImportError as e:
|
||||
logger.error(f"Failed to import custom feature module '{feature_path}': {e}")
|
||||
raise ImportError(
|
||||
f"Cannot import custom feature module '{feature_path}'. "
|
||||
f"Ensure the module exists and is in the Python path. Error: {e}"
|
||||
)
|
||||
except AttributeError as e:
|
||||
logger.error(f"Custom feature function not found: {e}")
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error applying custom feature '{feature_path}': {e}")
|
||||
raise
|
||||
|
||||
return result_df
|
||||
|
||||
|
||||
def validate_custom_feature_function(func, function_name: str) -> None:
|
||||
"""
|
||||
Validate that a custom feature function has the correct signature.
|
||||
|
||||
Args:
|
||||
func: The function to validate
|
||||
function_name: Name of the function for error messages
|
||||
|
||||
Raises:
|
||||
ValueError: If the function signature is invalid
|
||||
"""
|
||||
import inspect
|
||||
|
||||
sig = inspect.signature(func)
|
||||
params = list(sig.parameters.values())
|
||||
|
||||
if len(params) != 1:
|
||||
raise ValueError(
|
||||
f"Custom feature function '{function_name}' must accept exactly 1 parameter "
|
||||
f"(a pandas DataFrame), but has {len(params)} parameters"
|
||||
)
|
||||
|
||||
# Check if the parameter is annotated as DataFrame (optional check)
|
||||
param = params[0]
|
||||
if param.annotation != inspect.Parameter.empty:
|
||||
if param.annotation not in [pd.DataFrame, 'pd.DataFrame', 'DataFrame']:
|
||||
logger.warning(
|
||||
f"Custom feature function '{function_name}' parameter is annotated as "
|
||||
f"{param.annotation}, but should be pd.DataFrame"
|
||||
)
|
||||
Loading…
Add table
Add a link
Reference in a new issue