fix(ml): parse datetime column and fix TA-Lib pattern names

- Add parse_dates parameter when loading enriched CSV
- Strip timezone from annotation timestamps to match data
- Fix pattern names: CDLTHREEWHITESOLDIERS -> CDL3WHITESOLDIERS
- Fix pattern names: CDLTHREEBLACKCROWS -> CDL3BLACKCROWS
This commit is contained in:
Marko Djordjevic 2026-02-15 21:13:20 +01:00
parent 2b86524436
commit ceb4103ec4
10 changed files with 5 additions and 5 deletions

Binary file not shown.

View file

@ -188,8 +188,8 @@ class AnnotationIngestion:
for ann in annotations: for ann in annotations:
label = ann['label'] label = ann['label']
start_time = pd.Timestamp(ann['start_time']) start_time = pd.Timestamp(ann['start_time']).tz_localize(None)
end_time = pd.Timestamp(ann['end_time']) end_time = pd.Timestamp(ann['end_time']).tz_localize(None)
# Find candles in span # Find candles in span
span_mask = (df['time'] >= start_time) & (df['time'] <= end_time) span_mask = (df['time'] >= start_time) & (df['time'] <= end_time)
@ -504,7 +504,7 @@ def run_annotation_ingestion(
logger.info("Running annotation ingestion stage") logger.info("Running annotation ingestion stage")
# Load enriched data # Load enriched data
enriched_df = pd.read_csv(enriched_path) enriched_df = pd.read_csv(enriched_path, parse_dates=['time'])
logger.info(f"Loaded enriched data: {enriched_df.shape}") logger.info(f"Loaded enriched data: {enriched_df.shape}")
# Process annotations # Process annotations

View file

@ -86,8 +86,8 @@ stages:
- "CDLHARAMI" - "CDLHARAMI"
- "CDLPIERCING" - "CDLPIERCING"
- "CDLDARKCLOUDCOVER" - "CDLDARKCLOUDCOVER"
- "CDLTHREEWHITESOLDIERS" - "CDL3WHITESOLDIERS"
- "CDLTHREEBLACKCROWS" - "CDL3BLACKCROWS"
# Label merge strategy: "human_priority", "programmatic_priority", "both" # Label merge strategy: "human_priority", "programmatic_priority", "both"
merge_strategy: "human_priority" merge_strategy: "human_priority"

Binary file not shown.