fix(ml): handle date strings in TA-Lib annotation generator

- Convert date strings to Unix timestamps in load_ohlcv()
- Fix duplicate pattern names (CDL3WHITESOLDIERS/CDL3BLACKCROWS)
- Ensure time column is always integer type
This commit is contained in:
Marko Djordjevic 2026-02-15 19:30:38 +01:00
parent 847ff67986
commit a68a681c9b

View file

@ -42,12 +42,10 @@ TALIB_PATTERNS = {
'CDLHARAMICROSS': 'Harami Cross',
'CDLPIERCING': 'Piercing',
'CDLDARKCLOUDCOVER': 'Dark Cloud Cover',
'CDLTHREEWHITESOLDIERS': 'Three White Soldiers',
'CDLTHREEBLACKCROWS': 'Three Black Crows',
'CDLMARUBOZU': 'Marubozu',
'CDLSPINNINGTOP': 'Spinning Top',
'CDL3BLACKCROWS': 'Three Black Crows',
'CDL3WHITESOLDIERS': 'Three White Soldiers',
'CDL3BLACKCROWS': 'Three Black Crows',
'CDLABANDONEDBABY': 'Abandoned Baby',
'CDLADVANCEBLOCK': 'Advance Block',
'CDLBELTHOLD': 'Belt Hold',
@ -94,6 +92,7 @@ def load_ohlcv(input_path: str) -> pd.DataFrame:
Load OHLCV data from CSV file.
Expected columns: time, open, high, low, close[, volume]
Time can be Unix timestamp or date string.
"""
logger.info(f"Loading OHLCV data from {input_path}")
df = pd.read_csv(input_path)
@ -103,6 +102,14 @@ def load_ohlcv(input_path: str) -> pd.DataFrame:
if missing:
raise ValueError(f"Missing required columns: {missing}")
# Convert time to Unix timestamp if it's a date string
if df['time'].dtype == 'object':
logger.info("Converting date strings to Unix timestamps...")
df['time'] = pd.to_datetime(df['time']).astype(int) // 10**9
# Ensure time is integer
df['time'] = df['time'].astype(int)
logger.info(f"Loaded {len(df)} candles")
return df