Fix XGBoost label encoding and single-class guard

This commit is contained in:
Marko Djordjevic 2026-02-18 23:58:24 +01:00
parent 73c10a4156
commit 26f2c44509

View file

@ -10,6 +10,7 @@ from typing import Any, Dict, Optional
import numpy as np import numpy as np
from xgboost import XGBClassifier from xgboost import XGBClassifier
from sklearn.utils.class_weight import compute_class_weight from sklearn.utils.class_weight import compute_class_weight
from sklearn.preprocessing import LabelEncoder
class XGBoostModel: class XGBoostModel:
@ -33,6 +34,7 @@ class XGBoostModel:
self.hyperparameters = hyperparameters.copy() self.hyperparameters = hyperparameters.copy()
self.class_weights = class_weights self.class_weights = class_weights
self._sample_weights = None self._sample_weights = None
self.label_encoder_ = None
# XGBoost doesn't have built-in class_weight parameter like sklearn # XGBoost doesn't have built-in class_weight parameter like sklearn
# We'll compute sample weights manually when class_weights is "balanced" # We'll compute sample weights manually when class_weights is "balanced"
@ -51,25 +53,38 @@ class XGBoostModel:
Returns: Returns:
self self
""" """
classes = np.unique(y)
if classes.size < 2:
raise ValueError(
f"XGBoost requires at least 2 classes for training; got {classes.size} ({classes})"
)
y_encoded = y
if not (
np.issubdtype(np.asarray(y).dtype, np.integer)
and np.array_equal(np.sort(classes), np.arange(classes.size))
):
self.label_encoder_ = LabelEncoder()
y_encoded = self.label_encoder_.fit_transform(y)
# Compute sample weights if class weighting is enabled # Compute sample weights if class weighting is enabled
if self.class_weights == "balanced": if self.class_weights == "balanced":
# Compute class weights # Compute class weights
classes = np.unique(y)
class_weights = compute_class_weight( class_weights = compute_class_weight(
class_weight="balanced", class_weight="balanced",
classes=classes, classes=np.unique(y_encoded),
y=y y=y_encoded
) )
# Map class weights to sample weights # Map class weights to sample weights
class_weight_dict = dict(zip(classes, class_weights)) class_weight_dict = dict(zip(np.unique(y_encoded), class_weights))
sample_weights = np.array([class_weight_dict[label] for label in y]) sample_weights = np.array([class_weight_dict[label] for label in y_encoded])
# Fit with sample weights # Fit with sample weights
self.model.fit(X, y, sample_weight=sample_weights) self.model.fit(X, y_encoded, sample_weight=sample_weights)
else: else:
# Fit without sample weights # Fit without sample weights
self.model.fit(X, y) self.model.fit(X, y_encoded)
return self return self
@ -83,7 +98,10 @@ class XGBoostModel:
Returns: Returns:
Predicted labels (n_samples,) Predicted labels (n_samples,)
""" """
return self.model.predict(X) preds = self.model.predict(X)
if self.label_encoder_ is not None:
return self.label_encoder_.inverse_transform(preds.astype(int))
return preds
def predict_proba(self, X: np.ndarray) -> np.ndarray: def predict_proba(self, X: np.ndarray) -> np.ndarray:
""" """
@ -100,6 +118,8 @@ class XGBoostModel:
@property @property
def classes_(self): def classes_(self):
"""Get fitted class labels.""" """Get fitted class labels."""
if self.label_encoder_ is not None:
return self.label_encoder_.classes_
return self.model.classes_ return self.model.classes_
@property @property