candle-annotator/services/ml/training/evaluation.py

"""
Model evaluation utilities.

Generate confusion matrix plots, feature importance plots, and classification reports.
"""

from pathlib import Path
from typing import List, Optional
import io

import numpy as np
import pandas as pd
import matplotlib
matplotlib.use('Agg')  # Non-interactive backend for server
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report


def generate_confusion_matrix_plot(
    y_true: np.ndarray,
    y_pred: np.ndarray,
    labels: Optional[List[str]] = None,
    normalize: bool = True
) -> bytes:
    """
    Generate confusion matrix plot as PNG bytes.

    Args:
        y_true: True labels
        y_pred: Predicted labels
        labels: Class label names (optional, inferred if not provided)
        normalize: Whether to normalize to percentages

    Returns:
        PNG image as bytes
    """
    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred, labels=labels)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        fmt = '.2%'
    else:
        fmt = 'd'

    # Create figure
    fig, ax = plt.subplots(figsize=(10, 8))

    # Plot heatmap
    sns.heatmap(
        cm,
        annot=True,
        fmt=fmt,
        cmap='Blues',
        xticklabels=labels if labels else 'auto',
        yticklabels=labels if labels else 'auto',
        ax=ax
    )

    ax.set_xlabel('Predicted Label')
    ax.set_ylabel('True Label')
    ax.set_title('Confusion Matrix')

    # Convert to bytes
    buf = io.BytesIO()
    plt.tight_layout()
    plt.savefig(buf, format='png', dpi=150)
    plt.close(fig)
    buf.seek(0)

    return buf.read()


def generate_feature_importance_plot(
    feature_names: List[str],
    importances: np.ndarray,
    top_n: int = 20
) -> bytes:
    """
    Generate feature importance plot as PNG bytes.

    Args:
        feature_names: Names of features
        importances: Feature importance scores
        top_n: Number of top features to display

    Returns:
        PNG image as bytes
    """
    # Create DataFrame and sort by importance
    df = pd.DataFrame({
        'feature': feature_names,
        'importance': importances
    })
    df = df.sort_values('importance', ascending=False).head(top_n)

    # Create figure
    fig, ax = plt.subplots(figsize=(10, max(6, top_n * 0.3)))

    # Horizontal bar plot
    ax.barh(range(len(df)), df['importance'].values)
    ax.set_yticks(range(len(df)))
    ax.set_yticklabels(df['feature'].values)
    ax.invert_yaxis()
    ax.set_xlabel('Importance Score')
    ax.set_title(f'Top {top_n} Feature Importances')
    ax.grid(axis='x', alpha=0.3)

    # Convert to bytes
    buf = io.BytesIO()
    plt.tight_layout()
    plt.savefig(buf, format='png', dpi=150)
    plt.close(fig)
    buf.seek(0)

    return buf.read()


def generate_classification_report_text(
    y_true: np.ndarray,
    y_pred: np.ndarray,
    labels: Optional[List[str]] = None
) -> str:
    """
    Generate classification report as text.

    Args:
        y_true: True labels
        y_pred: Predicted labels
        labels: Class label names (optional)

    Returns:
        Classification report as string
    """
    # Get unique labels present in y_true and y_pred
    present_labels = np.unique(np.concatenate([y_true, y_pred]))

    # If labels provided, use them as target names for the present labels
    if labels is not None:
        # If labels are strings, filter to only present ones
        if isinstance(labels[0], str):
            target_names = [label for label in labels if label in present_labels]
        else:
            # If labels are indices, map them
            target_names = [labels[i] if i < len(labels) else str(i)
                           for i in present_labels]
    else:
        target_names = None

    return classification_report(
        y_true,
        y_pred,
        labels=present_labels if labels is None or isinstance(labels[0], str) else None,
        target_names=target_names,
        digits=4,
        zero_division=0
    )


def save_confusion_matrix_plot(
    y_true: np.ndarray,
    y_pred: np.ndarray,
    output_path: Path,
    labels: Optional[List[str]] = None,
    normalize: bool = True
):
    """
    Generate and save confusion matrix plot to file.

    Args:
        y_true: True labels
        y_pred: Predicted labels
        output_path: Path to save PNG file
        labels: Class label names (optional)
        normalize: Whether to normalize to percentages
    """
    png_bytes = generate_confusion_matrix_plot(y_true, y_pred, labels, normalize)

    output_path = Path(output_path)
    output_path.parent.mkdir(parents=True, exist_ok=True)

    with open(output_path, 'wb') as f:
        f.write(png_bytes)


def save_feature_importance_plot(
    feature_names: List[str],
    importances: np.ndarray,
    output_path: Path,
    top_n: int = 20
):
    """
    Generate and save feature importance plot to file.

    Args:
        feature_names: Names of features
        importances: Feature importance scores
        output_path: Path to save PNG file
        top_n: Number of top features to display
    """
    png_bytes = generate_feature_importance_plot(feature_names, importances, top_n)

    output_path = Path(output_path)
    output_path.parent.mkdir(parents=True, exist_ok=True)

    with open(output_path, 'wb') as f:
        f.write(png_bytes)


def save_classification_report(
    y_true: np.ndarray,
    y_pred: np.ndarray,
    output_path: Path,
    labels: Optional[List[str]] = None
):
    """
    Generate and save classification report to text file.

    Args:
        y_true: True labels
        y_pred: Predicted labels
        output_path: Path to save text file
        labels: Class label names (optional)
    """
    report = generate_classification_report_text(y_true, y_pred, labels)

    output_path = Path(output_path)
    output_path.parent.mkdir(parents=True, exist_ok=True)

    with open(output_path, 'w') as f:
        f.write(report)