""" XGBoost model wrapper for candlestick pattern classification. Provides a wrapper around XGBoost's XGBClassifier with support for class weight balancing. """ from typing import Any, Dict, Optional import numpy as np from xgboost import XGBClassifier from sklearn.utils.class_weight import compute_class_weight from sklearn.preprocessing import LabelEncoder class XGBoostModel: """ XGBoost classifier wrapper for candlestick patterns. Attributes: model: The underlying XGBClassifier instance classes_: Fitted class labels feature_importances_: Feature importance scores (after fitting) """ def __init__(self, hyperparameters: Dict[str, Any], class_weights: Optional[str] = None): """ Initialize XGBoost model. Args: hyperparameters: Model hyperparameters from config class_weights: "balanced" for inverse-frequency weighting, None for no weighting """ self.hyperparameters = hyperparameters.copy() self.class_weights = class_weights self._sample_weights = None self.label_encoder_ = None # XGBoost doesn't have built-in class_weight parameter like sklearn # We'll compute sample weights manually when class_weights is "balanced" # Initialize XGBoost model self.model = XGBClassifier(**self.hyperparameters) def fit(self, X: np.ndarray, y: np.ndarray): """ Train the XGBoost model. Args: X: Training features (n_samples, n_features) y: Training labels (n_samples,) Returns: self """ classes = np.unique(y) if classes.size < 2: raise ValueError( f"XGBoost requires at least 2 classes for training; got {classes.size} ({classes})" ) y_encoded = y if not ( np.issubdtype(np.asarray(y).dtype, np.integer) and np.array_equal(np.sort(classes), np.arange(classes.size)) ): self.label_encoder_ = LabelEncoder() y_encoded = self.label_encoder_.fit_transform(y) # Compute sample weights if class weighting is enabled if self.class_weights == "balanced": # Compute class weights class_weights = compute_class_weight( class_weight="balanced", classes=np.unique(y_encoded), y=y_encoded ) # Map class weights to sample weights class_weight_dict = dict(zip(np.unique(y_encoded), class_weights)) sample_weights = np.array([class_weight_dict[label] for label in y_encoded]) # Fit with sample weights self.model.fit(X, y_encoded, sample_weight=sample_weights) else: # Fit without sample weights self.model.fit(X, y_encoded) return self def predict(self, X: np.ndarray) -> np.ndarray: """ Predict class labels. Args: X: Features (n_samples, n_features) Returns: Predicted labels (n_samples,) """ preds = self.model.predict(X) if self.label_encoder_ is not None: return self.label_encoder_.inverse_transform(preds.astype(int)) return preds def predict_proba(self, X: np.ndarray) -> np.ndarray: """ Predict class probabilities. Args: X: Features (n_samples, n_features) Returns: Class probabilities (n_samples, n_classes) """ return self.model.predict_proba(X) @property def classes_(self): """Get fitted class labels.""" if self.label_encoder_ is not None: return self.label_encoder_.classes_ return self.model.classes_ @property def feature_importances_(self): """Get feature importance scores.""" return self.model.feature_importances_ def get_params(self) -> Dict[str, Any]: """ Get model parameters. Returns: Dictionary of model hyperparameters """ return self.model.get_params() def __repr__(self): return f"XGBoostModel(n_estimators={self.hyperparameters.get('n_estimators', 100)})"