feat(api): add Next.js proxy routes for ML inference service

2026-02-15 14:30:09 +01:00 · 2026-02-15 14:30:09 +01:00 · 205021e810
commit 205021e810
parent 3a83fd38e9
5 changed files with 210 additions and 4 deletions
--- a/.env.example
+++ b/.env.example
@ -1,3 +1,9 @@
 NODE_ENV=production
 PORT=3000
 DATABASE_PATH=/app/data/candles.db
 # ML Inference Service Configuration
 INFERENCE_API_URL=http://localhost:8001
 INFERENCE_API_TIMEOUT=30000
 INFERENCE_BATCH_TIMEOUT=120000
 NEXT_PUBLIC_PREDICTIONS_ENABLED=true
--- a/openspec/changes/candle-backend/tasks.md
+++ b/openspec/changes/candle-backend/tasks.md
@ -64,10 +64,10 @@
 ## 7. Next.js API Proxy Routes
- [ ] 7.1 Create `src/app/api/predict/route.ts` — POST proxy to `${INFERENCE_API_URL}/predict` with timeout handling
+- [x] 7.1 Create `src/app/api/predict/route.ts` — POST proxy to `${INFERENCE_API_URL}/predict` with timeout handling
- [ ] 7.2 Create `src/app/api/predict/batch/route.ts` — POST proxy to `${INFERENCE_API_URL}/predict/batch` with INFERENCE_BATCH_TIMEOUT
+- [x] 7.2 Create `src/app/api/predict/batch/route.ts` — POST proxy to `${INFERENCE_API_URL}/predict/batch` with INFERENCE_BATCH_TIMEOUT
- [ ] 7.3 Create `src/app/api/model/info/route.ts` — GET proxy to `${INFERENCE_API_URL}/model/info`
+- [x] 7.3 Create `src/app/api/model/info/route.ts` — GET proxy to `${INFERENCE_API_URL}/model/info`
- [ ] 7.4 Add environment variables to `.env.local`: INFERENCE_API_URL, INFERENCE_API_TIMEOUT, INFERENCE_BATCH_TIMEOUT, NEXT_PUBLIC_PREDICTIONS_ENABLED
+- [x] 7.4 Add environment variables to `.env.local`: INFERENCE_API_URL, INFERENCE_API_TIMEOUT, INFERENCE_BATCH_TIMEOUT, NEXT_PUBLIC_PREDICTIONS_ENABLED
 ## 8. Span Annotation Export & Feedback
--- a/src/app/api/model/info/route.ts
+++ b/src/app/api/model/info/route.ts
@ -0,0 +1,70 @@
 import { NextRequest, NextResponse } from 'next/server';
 const INFERENCE_API_URL = process.env.INFERENCE_API_URL || 'http://localhost:8001';
 const INFERENCE_API_TIMEOUT = parseInt(process.env.INFERENCE_API_TIMEOUT || '10000', 10);
 export async function GET(request: NextRequest) {
  try {
    // Forward request to Python inference service
    const controller = new AbortController();
    const timeoutId = setTimeout(() => controller.abort(), INFERENCE_API_TIMEOUT);
    try {
      const response = await fetch(`${INFERENCE_API_URL}/model/info`, {
        method: 'GET',
        headers: {
          'Content-Type': 'application/json',
        },
        signal: controller.signal,
      });
      clearTimeout(timeoutId);
      // Forward the response from the inference service
      const data = await response.json();
      if (!response.ok) {
        // Handle 503 (no model available) specifically
        if (response.status === 503) {
          return NextResponse.json(
            { error: 'No model available' },
            { status: 503 }
          );
        }
        return NextResponse.json(
          { error: data.detail || 'Failed to get model info' },
          { status: response.status }
        );
      }
      return NextResponse.json(data);
    } catch (fetchError: any) {
      clearTimeout(timeoutId);
      if (fetchError.name === 'AbortError') {
        return NextResponse.json(
          { error: 'Model info request timed out' },
          { status: 504 }
        );
      }
      throw fetchError;
    }
  } catch (error: any) {
    console.error('Model info proxy error:', error);
    // Check if it's a connection error
    if (error.cause?.code === 'ECONNREFUSED' || error.message?.includes('fetch failed')) {
      return NextResponse.json(
        { error: 'Inference service unavailable' },
        { status: 503 }
      );
    }
    return NextResponse.json(
      { error: 'Internal server error' },
      { status: 500 }
    );
  }
 }
--- a/src/app/api/predict/batch/route.ts
+++ b/src/app/api/predict/batch/route.ts
@ -0,0 +1,65 @@
 import { NextRequest, NextResponse } from 'next/server';
 const INFERENCE_API_URL = process.env.INFERENCE_API_URL || 'http://localhost:8001';
 const INFERENCE_BATCH_TIMEOUT = parseInt(process.env.INFERENCE_BATCH_TIMEOUT || '120000', 10);
 export async function POST(request: NextRequest) {
  try {
    const body = await request.json();
    // Forward request to Python inference service
    const controller = new AbortController();
    const timeoutId = setTimeout(() => controller.abort(), INFERENCE_BATCH_TIMEOUT);
    try {
      const response = await fetch(`${INFERENCE_API_URL}/predict/batch`, {
        method: 'POST',
        headers: {
          'Content-Type': 'application/json',
        },
        body: JSON.stringify(body),
        signal: controller.signal,
      });
      clearTimeout(timeoutId);
      // Forward the response from the inference service
      const data = await response.json();
      if (!response.ok) {
        return NextResponse.json(
          { error: data.detail || 'Batch prediction failed' },
          { status: response.status }
        );
      }
      return NextResponse.json(data);
    } catch (fetchError: any) {
      clearTimeout(timeoutId);
      if (fetchError.name === 'AbortError') {
        return NextResponse.json(
          { error: 'Batch prediction timed out' },
          { status: 504 }
        );
      }
      throw fetchError;
    }
  } catch (error: any) {
    console.error('Batch predict proxy error:', error);
    // Check if it's a connection error
    if (error.cause?.code === 'ECONNREFUSED' || error.message?.includes('fetch failed')) {
      return NextResponse.json(
        { error: 'Inference service unavailable' },
        { status: 503 }
      );
    }
    return NextResponse.json(
      { error: 'Internal server error' },
      { status: 500 }
    );
  }
 }
--- a/src/app/api/predict/route.ts
+++ b/src/app/api/predict/route.ts
@ -0,0 +1,65 @@
 import { NextRequest, NextResponse } from 'next/server';
 const INFERENCE_API_URL = process.env.INFERENCE_API_URL || 'http://localhost:8001';
 const INFERENCE_API_TIMEOUT = parseInt(process.env.INFERENCE_API_TIMEOUT || '30000', 10);
 export async function POST(request: NextRequest) {
  try {
    const body = await request.json();
    // Forward request to Python inference service
    const controller = new AbortController();
    const timeoutId = setTimeout(() => controller.abort(), INFERENCE_API_TIMEOUT);
    try {
      const response = await fetch(`${INFERENCE_API_URL}/predict`, {
        method: 'POST',
        headers: {
          'Content-Type': 'application/json',
        },
        body: JSON.stringify(body),
        signal: controller.signal,
      });
      clearTimeout(timeoutId);
      // Forward the response from the inference service
      const data = await response.json();
      if (!response.ok) {
        return NextResponse.json(
          { error: data.detail || 'Prediction failed' },
          { status: response.status }
        );
      }
      return NextResponse.json(data);
    } catch (fetchError: any) {
      clearTimeout(timeoutId);
      if (fetchError.name === 'AbortError') {
        return NextResponse.json(
          { error: 'Prediction request timed out' },
          { status: 504 }
        );
      }
      throw fetchError;
    }
  } catch (error: any) {
    console.error('Predict proxy error:', error);
    // Check if it's a connection error
    if (error.cause?.code === 'ECONNREFUSED' || error.message?.includes('fetch failed')) {
      return NextResponse.json(
        { error: 'Inference service unavailable' },
        { status: 503 }
      );
    }
    return NextResponse.json(
      { error: 'Internal server error' },
      { status: 500 }
    );
  }
 }