feat(api): add Next.js proxy routes for ML inference service

2026-02-15 14:30:09 +01:00 · 2026-02-15 14:30:09 +01:00 · 205021e810
commit 205021e810
parent 3a83fd38e9
5 changed files with 210 additions and 4 deletions
--- a/.env.example
+++ b/.env.example
@ -1,3 +1,9 @@
 NODE_ENV=production
 PORT=3000
 DATABASE_PATH=/app/data/candles.db
+
+# ML Inference Service Configuration
+INFERENCE_API_URL=http://localhost:8001
+INFERENCE_API_TIMEOUT=30000
+INFERENCE_BATCH_TIMEOUT=120000
+NEXT_PUBLIC_PREDICTIONS_ENABLED=true
--- a/openspec/changes/candle-backend/tasks.md
+++ b/openspec/changes/candle-backend/tasks.md
@ -64,10 +64,10 @@

 ## 7. Next.js API Proxy Routes

- [ ] 7.1 Create `src/app/api/predict/route.ts` — POST proxy to `${INFERENCE_API_URL}/predict` with timeout handling
- [ ] 7.2 Create `src/app/api/predict/batch/route.ts` — POST proxy to `${INFERENCE_API_URL}/predict/batch` with INFERENCE_BATCH_TIMEOUT
- [ ] 7.3 Create `src/app/api/model/info/route.ts` — GET proxy to `${INFERENCE_API_URL}/model/info`
- [ ] 7.4 Add environment variables to `.env.local`: INFERENCE_API_URL, INFERENCE_API_TIMEOUT, INFERENCE_BATCH_TIMEOUT, NEXT_PUBLIC_PREDICTIONS_ENABLED
+- [x] 7.1 Create `src/app/api/predict/route.ts` — POST proxy to `${INFERENCE_API_URL}/predict` with timeout handling
+- [x] 7.2 Create `src/app/api/predict/batch/route.ts` — POST proxy to `${INFERENCE_API_URL}/predict/batch` with INFERENCE_BATCH_TIMEOUT
+- [x] 7.3 Create `src/app/api/model/info/route.ts` — GET proxy to `${INFERENCE_API_URL}/model/info`
+- [x] 7.4 Add environment variables to `.env.local`: INFERENCE_API_URL, INFERENCE_API_TIMEOUT, INFERENCE_BATCH_TIMEOUT, NEXT_PUBLIC_PREDICTIONS_ENABLED

 ## 8. Span Annotation Export & Feedback

--- a/src/app/api/model/info/route.ts
+++ b/src/app/api/model/info/route.ts
@ -0,0 +1,70 @@
+import { NextRequest, NextResponse } from 'next/server';
+
+const INFERENCE_API_URL = process.env.INFERENCE_API_URL || 'http://localhost:8001';
+const INFERENCE_API_TIMEOUT = parseInt(process.env.INFERENCE_API_TIMEOUT || '10000', 10);
+
+export async function GET(request: NextRequest) {
+  try {
+    // Forward request to Python inference service
+    const controller = new AbortController();
+    const timeoutId = setTimeout(() => controller.abort(), INFERENCE_API_TIMEOUT);
+
+    try {
+      const response = await fetch(`${INFERENCE_API_URL}/model/info`, {
+        method: 'GET',
+        headers: {
+          'Content-Type': 'application/json',
+        },
+        signal: controller.signal,
+      });
+
+      clearTimeout(timeoutId);
+
+      // Forward the response from the inference service
+      const data = await response.json();
+
+      if (!response.ok) {
+        // Handle 503 (no model available) specifically
+        if (response.status === 503) {
+          return NextResponse.json(
+            { error: 'No model available' },
+            { status: 503 }
+          );
+        }
+
+        return NextResponse.json(
+          { error: data.detail || 'Failed to get model info' },
+          { status: response.status }
+        );
+      }
+
+      return NextResponse.json(data);
+    } catch (fetchError: any) {
+      clearTimeout(timeoutId);
+
+      if (fetchError.name === 'AbortError') {
+        return NextResponse.json(
+          { error: 'Model info request timed out' },
+          { status: 504 }
+        );
+      }
+
+      throw fetchError;
+    }
+  } catch (error: any) {
+    console.error('Model info proxy error:', error);
+
+    // Check if it's a connection error
+    if (error.cause?.code === 'ECONNREFUSED' || error.message?.includes('fetch failed')) {
+      return NextResponse.json(
+        { error: 'Inference service unavailable' },
+        { status: 503 }
+      );
+    }
+
+    return NextResponse.json(
+      { error: 'Internal server error' },
+      { status: 500 }
+    );
+  }
+}
--- a/src/app/api/predict/batch/route.ts
+++ b/src/app/api/predict/batch/route.ts
@ -0,0 +1,65 @@
+import { NextRequest, NextResponse } from 'next/server';
+
+const INFERENCE_API_URL = process.env.INFERENCE_API_URL || 'http://localhost:8001';
+const INFERENCE_BATCH_TIMEOUT = parseInt(process.env.INFERENCE_BATCH_TIMEOUT || '120000', 10);
+
+export async function POST(request: NextRequest) {
+  try {
+    const body = await request.json();
+
+    // Forward request to Python inference service
+    const controller = new AbortController();
+    const timeoutId = setTimeout(() => controller.abort(), INFERENCE_BATCH_TIMEOUT);
+
+    try {
+      const response = await fetch(`${INFERENCE_API_URL}/predict/batch`, {
+        method: 'POST',
+        headers: {
+          'Content-Type': 'application/json',
+        },
+        body: JSON.stringify(body),
+        signal: controller.signal,
+      });
+
+      clearTimeout(timeoutId);
+
+      // Forward the response from the inference service
+      const data = await response.json();
+
+      if (!response.ok) {
+        return NextResponse.json(
+          { error: data.detail || 'Batch prediction failed' },
+          { status: response.status }
+        );
+      }
+
+      return NextResponse.json(data);
+    } catch (fetchError: any) {
+      clearTimeout(timeoutId);
+
+      if (fetchError.name === 'AbortError') {
+        return NextResponse.json(
+          { error: 'Batch prediction timed out' },
+          { status: 504 }
+        );
+      }
+
+      throw fetchError;
+    }
+  } catch (error: any) {
+    console.error('Batch predict proxy error:', error);
+
+    // Check if it's a connection error
+    if (error.cause?.code === 'ECONNREFUSED' || error.message?.includes('fetch failed')) {
+      return NextResponse.json(
+        { error: 'Inference service unavailable' },
+        { status: 503 }
+      );
+    }
+
+    return NextResponse.json(
+      { error: 'Internal server error' },
+      { status: 500 }
+    );
+  }
+}
--- a/src/app/api/predict/route.ts
+++ b/src/app/api/predict/route.ts
@ -0,0 +1,65 @@
+import { NextRequest, NextResponse } from 'next/server';
+
+const INFERENCE_API_URL = process.env.INFERENCE_API_URL || 'http://localhost:8001';
+const INFERENCE_API_TIMEOUT = parseInt(process.env.INFERENCE_API_TIMEOUT || '30000', 10);
+
+export async function POST(request: NextRequest) {
+  try {
+    const body = await request.json();
+
+    // Forward request to Python inference service
+    const controller = new AbortController();
+    const timeoutId = setTimeout(() => controller.abort(), INFERENCE_API_TIMEOUT);
+
+    try {
+      const response = await fetch(`${INFERENCE_API_URL}/predict`, {
+        method: 'POST',
+        headers: {
+          'Content-Type': 'application/json',
+        },
+        body: JSON.stringify(body),
+        signal: controller.signal,
+      });
+
+      clearTimeout(timeoutId);
+
+      // Forward the response from the inference service
+      const data = await response.json();
+
+      if (!response.ok) {
+        return NextResponse.json(
+          { error: data.detail || 'Prediction failed' },
+          { status: response.status }
+        );
+      }
+
+      return NextResponse.json(data);
+    } catch (fetchError: any) {
+      clearTimeout(timeoutId);
+
+      if (fetchError.name === 'AbortError') {
+        return NextResponse.json(
+          { error: 'Prediction request timed out' },
+          { status: 504 }
+        );
+      }
+
+      throw fetchError;
+    }
+  } catch (error: any) {
+    console.error('Predict proxy error:', error);
+
+    // Check if it's a connection error
+    if (error.cause?.code === 'ECONNREFUSED' || error.message?.includes('fetch failed')) {
+      return NextResponse.json(
+        { error: 'Inference service unavailable' },
+        { status: 503 }
+      );
+    }
+
+    return NextResponse.json(
+      { error: 'Internal server error' },
+      { status: 500 }
+    );
+  }
+}