From a68a681c9b74dc074aa3c3e5288218fd7a378f5e Mon Sep 17 00:00:00 2001 From: Marko Djordjevic Date: Sun, 15 Feb 2026 19:30:38 +0100 Subject: [PATCH] fix(ml): handle date strings in TA-Lib annotation generator - Convert date strings to Unix timestamps in load_ohlcv() - Fix duplicate pattern names (CDL3WHITESOLDIERS/CDL3BLACKCROWS) - Ensure time column is always integer type --- services/ml/generate_talib_annotations.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/services/ml/generate_talib_annotations.py b/services/ml/generate_talib_annotations.py index 72205f2..ee1da96 100644 --- a/services/ml/generate_talib_annotations.py +++ b/services/ml/generate_talib_annotations.py @@ -42,12 +42,10 @@ TALIB_PATTERNS = { 'CDLHARAMICROSS': 'Harami Cross', 'CDLPIERCING': 'Piercing', 'CDLDARKCLOUDCOVER': 'Dark Cloud Cover', - 'CDLTHREEWHITESOLDIERS': 'Three White Soldiers', - 'CDLTHREEBLACKCROWS': 'Three Black Crows', 'CDLMARUBOZU': 'Marubozu', 'CDLSPINNINGTOP': 'Spinning Top', - 'CDL3BLACKCROWS': 'Three Black Crows', 'CDL3WHITESOLDIERS': 'Three White Soldiers', + 'CDL3BLACKCROWS': 'Three Black Crows', 'CDLABANDONEDBABY': 'Abandoned Baby', 'CDLADVANCEBLOCK': 'Advance Block', 'CDLBELTHOLD': 'Belt Hold', @@ -94,6 +92,7 @@ def load_ohlcv(input_path: str) -> pd.DataFrame: Load OHLCV data from CSV file. Expected columns: time, open, high, low, close[, volume] + Time can be Unix timestamp or date string. """ logger.info(f"Loading OHLCV data from {input_path}") df = pd.read_csv(input_path) @@ -103,6 +102,14 @@ def load_ohlcv(input_path: str) -> pd.DataFrame: if missing: raise ValueError(f"Missing required columns: {missing}") + # Convert time to Unix timestamp if it's a date string + if df['time'].dtype == 'object': + logger.info("Converting date strings to Unix timestamps...") + df['time'] = pd.to_datetime(df['time']).astype(int) // 10**9 + + # Ensure time is integer + df['time'] = df['time'].astype(int) + logger.info(f"Loaded {len(df)} candles") return df