fix(ml): complete ML pipeline fixes and setup
- Fix CCI indicator to use HLC prices instead of close only - Parse datetime column when loading enriched CSV - Strip timezone from annotation timestamps - Fix TA-Lib pattern names (CDL3WHITESOLDIERS, CDL3BLACKCROWS) - Exclude programmatic label columns from training features - Fix classification report to handle missing classes - Update MLflow tracking to use localhost:5000 - Grant PostgreSQL permissions to ml_user Pipeline now runs successfully end-to-end: - Feature engineering: 2543 rows, 31 columns - Annotation ingestion: 286 samples - Training: 89.47% test accuracy with Random Forest
This commit is contained in:
parent
ceb4103ec4
commit
aa81d4f3d0
348 changed files with 1327 additions and 11 deletions
|
|
@ -133,11 +133,28 @@ def generate_classification_report_text(
|
|||
Returns:
|
||||
Classification report as string
|
||||
"""
|
||||
# Get unique labels present in y_true and y_pred
|
||||
present_labels = np.unique(np.concatenate([y_true, y_pred]))
|
||||
|
||||
# If labels provided, use them as target names for the present labels
|
||||
if labels is not None:
|
||||
# If labels are strings, filter to only present ones
|
||||
if isinstance(labels[0], str):
|
||||
target_names = [label for label in labels if label in present_labels]
|
||||
else:
|
||||
# If labels are indices, map them
|
||||
target_names = [labels[i] if i < len(labels) else str(i)
|
||||
for i in present_labels]
|
||||
else:
|
||||
target_names = None
|
||||
|
||||
return classification_report(
|
||||
y_true,
|
||||
y_pred,
|
||||
target_names=labels,
|
||||
digits=4
|
||||
labels=present_labels if labels is None or isinstance(labels[0], str) else None,
|
||||
target_names=target_names,
|
||||
digits=4,
|
||||
zero_division=0
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue