fix(ml): complete ML pipeline fixes and setup
- Fix CCI indicator to use HLC prices instead of close only - Parse datetime column when loading enriched CSV - Strip timezone from annotation timestamps - Fix TA-Lib pattern names (CDL3WHITESOLDIERS, CDL3BLACKCROWS) - Exclude programmatic label columns from training features - Fix classification report to handle missing classes - Update MLflow tracking to use localhost:5000 - Grant PostgreSQL permissions to ml_user Pipeline now runs successfully end-to-end: - Feature engineering: 2543 rows, 31 columns - Annotation ingestion: 286 samples - Training: 89.47% test accuracy with Random Forest
This commit is contained in:
parent
ceb4103ec4
commit
aa81d4f3d0
348 changed files with 1327 additions and 11 deletions
Binary file not shown.
|
|
@ -15,6 +15,11 @@ from sqlalchemy.orm import sessionmaker, Session
|
|||
from sqlalchemy.sql import func
|
||||
|
||||
|
||||
# CREATE DATABASE ml_service;
|
||||
# CREATE USER ml_user WITH ENCRYPTED PASSWORD 'ml_password';
|
||||
# GRANT ALL PRIVILEGES ON DATABASE ml_service TO ml_user;
|
||||
|
||||
|
||||
# Database connection configuration from environment
|
||||
DATABASE_URL = os.getenv(
|
||||
"DATABASE_URL",
|
||||
|
|
@ -43,9 +48,9 @@ Base = declarative_base()
|
|||
# Training runs model
|
||||
class TrainingRun(Base):
|
||||
"""Model for tracking ML training runs."""
|
||||
|
||||
|
||||
__tablename__ = "training_runs"
|
||||
|
||||
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
run_id = Column(String(255), unique=True, nullable=False, index=True)
|
||||
model_type = Column(String(100), nullable=False)
|
||||
|
|
@ -56,7 +61,7 @@ class TrainingRun(Base):
|
|||
status = Column(String(50), nullable=False, default="running", index=True)
|
||||
created_at = Column(DateTime(timezone=True), server_default=func.now(), index=True)
|
||||
completed_at = Column(DateTime(timezone=True))
|
||||
|
||||
|
||||
def __repr__(self):
|
||||
return f"<TrainingRun(run_id='{self.run_id}', status='{self.status}')>"
|
||||
|
||||
|
|
@ -73,14 +78,14 @@ def init_db():
|
|||
def get_db() -> Generator[Session, None, None]:
|
||||
"""
|
||||
Context manager for database sessions.
|
||||
|
||||
|
||||
Usage:
|
||||
with get_db() as db:
|
||||
# Use db session here
|
||||
training_run = TrainingRun(run_id="123", ...)
|
||||
db.add(training_run)
|
||||
db.commit()
|
||||
|
||||
|
||||
Yields:
|
||||
Database session
|
||||
"""
|
||||
|
|
@ -94,12 +99,12 @@ def get_db() -> Generator[Session, None, None]:
|
|||
def get_db_session() -> Session:
|
||||
"""
|
||||
Get a database session (for dependency injection).
|
||||
|
||||
|
||||
Usage with FastAPI:
|
||||
@app.get("/")
|
||||
def endpoint(db: Session = Depends(get_db_session)):
|
||||
# Use db here
|
||||
|
||||
|
||||
Returns:
|
||||
Database session (caller must close it)
|
||||
"""
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue