build: token-monitor v0.1.0 — modular LLM API quota visibility

Implements modular provider probing with two distinct header schemas: - Teams direct (unified schema): 5h/7d utilization floats, status, reset countdown - Shelley proxy (classic schema): token/request counts + Exedev-Gateway-Cost (USD/call) - api-ateam: reports no billing data (confirmed non-existent by recon) Key: uses claude-haiku-4-5-20251001 for minimal probe calls (1 token). Rate-limit headers present on ALL responses (200 and 429). 113/113 tests passing. Built from Face recon (trentuna/a-team#91) — live header capture confirmed unified schema with utilization floats replaces old per-count schema.
2026-04-04 17:01:05 +00:00 · 2026-04-04 17:01:05 +00:00 · 07a544c50d
commit 07a544c50d
parent 760049a25e
10 changed files with 1093 additions and 1 deletions
--- a/providers/anthropic-api.js
+++ b/providers/anthropic-api.js
@ -0,0 +1,75 @@
+/**
+ * anthropic-api.js — api-ateam (pay-per-use, api03 keys)
+ *
+ * Anthropic's REST API does not expose billing or quota data via any endpoint
+ * (confirmed by recon, issue trentuna/a-team#91). This module reports that
+ * billing data is unavailable — always. Key validity could be confirmed with
+ * a real HTTP call, but for monitoring purposes the static result is authoritative.
+ */
+
+import { getSeverity } from '../report.js';
+
+/**
+ * Return the static api-ateam status object.
+ * Synchronous — no HTTP call needed because no billing API exists.
+ * @returns {Object}
+ */
+export function getApiAteamStatus() {
+  return {
+    type: 'api-direct',
+    status: 'no_billing_data',
+    message: 'Anthropic API does not expose billing/quota via REST. Key validity not checked.',
+    severity: getSeverity({ type: 'api-direct' }),
+  };
+}
+
+/**
+ * Probe the api-ateam provider.
+ * Makes a minimal call to confirm key validity, but always reports no billing data.
+ * @param {string} providerName
+ * @param {string} baseUrl
+ * @param {string} apiKey
+ * @returns {Promise<Object>}
+ */
+export async function probeApiProvider(providerName, baseUrl, apiKey) {
+  try {
+    const response = await fetch(`${baseUrl}/v1/messages`, {
+      method: 'POST',
+      headers: {
+        'x-api-key': apiKey,
+        'anthropic-version': '2023-06-01',
+        'content-type': 'application/json',
+      },
+      body: JSON.stringify({
+        model: 'claude-haiku-4-5-20251001',
+        max_tokens: 1,
+        messages: [{ role: 'user', content: 'Hi' }],
+      }),
+    });
+
+    if (response.status === 401) {
+      return {
+        type: 'api-direct',
+        status: 'invalid_key',
+        message: 'Invalid API key (401)',
+        severity: 'unknown',
+      };
+    }
+
+    // No billing endpoint exists — always report no_billing_data
+    return {
+      type: 'api-direct',
+      status: 'no_billing_data',
+      message: 'Anthropic API does not expose billing/quota via REST. Key appears valid.',
+      http_status: response.status,
+      severity: 'unknown',
+    };
+  } catch (err) {
+    return {
+      type: 'api-direct',
+      status: 'error',
+      message: err.message,
+      severity: 'unknown',
+    };
+  }
+}
--- a/providers/anthropic-teams.js
+++ b/providers/anthropic-teams.js
@ -0,0 +1,130 @@
+/**
+ * anthropic-teams.js — Unified schema parser for Anthropic Teams direct providers.
+ *
+ * Teams providers (team-vigilio, team-ludo, team-molto, team-nadja, team-buio) use the
+ * anthropic-ratelimit-unified-* header family. Headers are present on EVERY response
+ * (200 and 429). A 429 is expected when the 7d budget is exhausted and contains valid
+ * quota data — do not treat it as an error, extract the headers normally.
+ *
+ * Header reference (from Face's recon, issue trentuna/a-team#91):
+ *   anthropic-ratelimit-unified-status            allowed|rejected
+ *   anthropic-ratelimit-unified-5h-status         allowed|rejected
+ *   anthropic-ratelimit-unified-5h-utilization    0.0–1.0
+ *   anthropic-ratelimit-unified-5h-reset          Unix timestamp
+ *   anthropic-ratelimit-unified-7d-status         allowed|rejected
+ *   anthropic-ratelimit-unified-7d-utilization    0.0–1.0
+ *   anthropic-ratelimit-unified-7d-reset          Unix timestamp
+ *   anthropic-ratelimit-unified-7d-surpassed-threshold  (present only when maxed)
+ *   anthropic-ratelimit-unified-representative-claim  five_hour|seven_day
+ *   anthropic-ratelimit-unified-fallback-percentage   0.0–1.0
+ *   anthropic-ratelimit-unified-reset             Unix timestamp (binding reset)
+ *   anthropic-ratelimit-unified-overage-status    rejected
+ *   anthropic-ratelimit-unified-overage-disabled-reason  org_level_disabled
+ *   anthropic-organization-id                     UUID
+ *   retry-after                                   seconds (only on 429)
+ */
+
+import { getSeverity } from '../report.js';
+
+/**
+ * Parse unified rate-limit headers from a Teams API response.
+ *
+ * @param {Object} headers — fetch Response.headers (or compatible mock with .get(name))
+ * @param {number} httpStatus — HTTP status code of the response
+ * @param {string} providerName — name for logging/context
+ * @returns {Object} normalized provider result
+ */
+export function parseTeamsHeaders(headers, httpStatus, providerName) {
+  const h = (name) => headers.get(name);
+
+  // 401 = invalid API key — no quota data available
+  if (httpStatus === 401) {
+    return {
+      type: 'teams-direct',
+      status: 'invalid_key',
+      utilization_5h: null,
+      utilization_7d: null,
+      severity: 'unknown',
+    };
+  }
+
+  const status = h('anthropic-ratelimit-unified-status') || (httpStatus === 429 ? 'rejected' : 'allowed');
+  const util5h = parseFloat(h('anthropic-ratelimit-unified-5h-utilization'));
+  const util7d = parseFloat(h('anthropic-ratelimit-unified-7d-utilization'));
+  const resetTs = parseInt(h('anthropic-ratelimit-unified-reset'), 10);
+  const retryAfter = h('retry-after') ? parseInt(h('retry-after'), 10) : null;
+
+  // Compute reset_in_seconds from the binding reset Unix timestamp
+  const nowSec = Math.floor(Date.now() / 1000);
+  const resetInSeconds = !isNaN(resetTs) ? Math.max(0, resetTs - nowSec) : null;
+
+  const result = {
+    type: 'teams-direct',
+    status,
+    utilization_5h: isNaN(util5h) ? null : util5h,
+    utilization_7d: isNaN(util7d) ? null : util7d,
+    representative_claim: h('anthropic-ratelimit-unified-representative-claim') || null,
+    reset_timestamp: isNaN(resetTs) ? null : resetTs,
+    reset_in_seconds: resetInSeconds,
+    organization_id: h('anthropic-organization-id') || null,
+    // Additional detail headers
+    status_5h: h('anthropic-ratelimit-unified-5h-status') || null,
+    status_7d: h('anthropic-ratelimit-unified-7d-status') || null,
+    overage_status: h('anthropic-ratelimit-unified-overage-status') || null,
+    fallback_percentage: h('anthropic-ratelimit-unified-fallback-percentage')
+      ? parseFloat(h('anthropic-ratelimit-unified-fallback-percentage'))
+      : null,
+  };
+
+  // Include retry_after_seconds only when present (429 responses)
+  if (retryAfter !== null) {
+    result.retry_after_seconds = retryAfter;
+  }
+
+  // Include surpassed threshold when present (maxed budget)
+  const surpassed = h('anthropic-ratelimit-unified-7d-surpassed-threshold');
+  if (surpassed !== null) {
+    result.surpassed_threshold_7d = parseFloat(surpassed);
+  }
+
+  result.severity = getSeverity(result);
+  return result;
+}
+
+/**
+ * Probe a single Teams provider by making a minimal API call.
+ * Extracts headers regardless of whether the response is 200 or 429.
+ *
+ * @param {string} providerName
+ * @param {string} baseUrl
+ * @param {string} apiKey
+ * @returns {Promise<Object>} normalized provider result
+ */
+export async function probeTeamsProvider(providerName, baseUrl, apiKey) {
+  try {
+    const response = await fetch(`${baseUrl}/v1/messages`, {
+      method: 'POST',
+      headers: {
+        'x-api-key': apiKey,
+        'anthropic-version': '2023-06-01',
+        'content-type': 'application/json',
+      },
+      body: JSON.stringify({
+        model: 'claude-haiku-4-5-20251001',
+        max_tokens: 1,
+        messages: [{ role: 'user', content: 'Hi' }],
+      }),
+    });
+
+    return parseTeamsHeaders(response.headers, response.status, providerName);
+  } catch (err) {
+    return {
+      type: 'teams-direct',
+      status: 'error',
+      message: err.message,
+      utilization_5h: null,
+      utilization_7d: null,
+      severity: 'unknown',
+    };
+  }
+}
--- a/providers/index.js
+++ b/providers/index.js
@ -0,0 +1,47 @@
+/**
+ * providers/index.js — provider registry
+ *
+ * Reads ~/.pi/agent/models.json and returns typed provider config for all
+ * providers we know how to probe. Non-anthropic-messages providers (e.g. zai)
+ * are silently skipped.
+ */
+
+import { readFileSync } from 'fs';
+import { homedir } from 'os';
+
+/**
+ * Classify a provider by name and config.
+ * @returns {'teams-direct'|'shelley-proxy'|'api-direct'|null}
+ */
+function classifyProvider(name, config) {
+  if (name === 'shelley-proxy') return 'shelley-proxy';
+  if (name === 'api-ateam') return 'api-direct';
+  if (config.api === 'anthropic-messages' && name.startsWith('team-')) return 'teams-direct';
+  return null; // skip (zai, etc.)
+}
+
+/**
+ * Load and classify all providers from models.json.
+ * @returns {Object} map of provider name → { name, type, baseUrl, apiKey }
+ */
+export function getProviders() {
+  const modelsJson = JSON.parse(
+    readFileSync(`${homedir()}/.pi/agent/models.json`, 'utf-8')
+  );
+
+  const providers = {};
+  for (const [name, config] of Object.entries(modelsJson.providers)) {
+    const type = classifyProvider(name, config);
+    if (!type) continue;
+    providers[name] = {
+      name,
+      type,
+      baseUrl: config.baseUrl,
+      apiKey: config.apiKey || null,
+    };
+  }
+  return providers;
+}
+
+// Alias for backwards compatibility
+export const loadProviders = getProviders;
--- a/providers/shelley-proxy.js
+++ b/providers/shelley-proxy.js
@ -0,0 +1,96 @@
+/**
+ * shelley-proxy.js — Shelley/exe.dev proxy (classic schema + Exedev-Gateway-Cost)
+ *
+ * The Shelley proxy returns standard Anthropic rate-limit headers (classic schema,
+ * not the unified Teams schema) plus an Exedev-Gateway-Cost header with per-call
+ * USD cost. No API key is required — the proxy handles auth internally.
+ *
+ * Header reference:
+ *   Anthropic-Ratelimit-Tokens-Limit         total token budget
+ *   Anthropic-Ratelimit-Tokens-Remaining     remaining tokens
+ *   Anthropic-Ratelimit-Tokens-Reset         ISO 8601 reset time
+ *   Anthropic-Ratelimit-Requests-Limit       total request budget
+ *   Anthropic-Ratelimit-Requests-Remaining   remaining requests
+ *   Anthropic-Ratelimit-Requests-Reset       ISO 8601 reset time
+ *   Exedev-Gateway-Cost                      per-call USD cost (float)
+ *   anthropic-organization-id                organization UUID
+ */
+
+import { getSeverity } from '../report.js';
+
+/**
+ * Parse classic Anthropic rate-limit headers from a Shelley proxy response.
+ *
+ * @param {Object} headers — fetch Response.headers (or compatible mock with .get(name))
+ * @param {number} httpStatus — HTTP status code
+ * @returns {Object} normalized provider result
+ */
+export function parseShelleyHeaders(headers, httpStatus) {
+  const h = (name) => headers.get(name) || headers.get(name.toLowerCase());
+
+  const tokensLimit     = parseInt(h('Anthropic-Ratelimit-Tokens-Limit'), 10);
+  const tokensRemaining = parseInt(h('Anthropic-Ratelimit-Tokens-Remaining'), 10);
+  const tokensReset     = h('Anthropic-Ratelimit-Tokens-Reset');
+  const requestsLimit     = parseInt(h('Anthropic-Ratelimit-Requests-Limit'), 10);
+  const requestsRemaining = parseInt(h('Anthropic-Ratelimit-Requests-Remaining'), 10);
+  const requestsReset     = h('Anthropic-Ratelimit-Requests-Reset');
+  const costPerCall = h('Exedev-Gateway-Cost');
+  const orgId       = h('anthropic-organization-id');
+
+  const result = {
+    type: 'shelley-proxy',
+    status: httpStatus === 429 ? 'rate_limited' : (httpStatus === 200 ? 'ok' : 'error'),
+    tokens_limit:      isNaN(tokensLimit)      ? null : tokensLimit,
+    tokens_remaining:  isNaN(tokensRemaining)  ? null : tokensRemaining,
+    tokens_reset:      tokensReset             || null,
+    requests_limit:    isNaN(requestsLimit)    ? null : requestsLimit,
+    requests_remaining: isNaN(requestsRemaining) ? null : requestsRemaining,
+    requests_reset:    requestsReset           || null,
+    cost_per_call_usd: costPerCall ? parseFloat(costPerCall) : null,
+    organization_id:   orgId || null,
+  };
+
+  result.severity = getSeverity(result);
+  return result;
+}
+
+// Alias used in some internal tooling
+export const parseClassicHeaders = parseShelleyHeaders;
+
+/**
+ * Probe the Shelley proxy by making a minimal API call.
+ * @param {string} providerName
+ * @param {string} baseUrl
+ * @returns {Promise<Object>} normalized provider result
+ */
+export async function probeShelleyProxy(providerName, baseUrl) {
+  try {
+    const response = await fetch(`${baseUrl}/v1/messages`, {
+      method: 'POST',
+      headers: {
+        'x-api-key': 'not-needed',
+        'anthropic-version': '2023-06-01',
+        'content-type': 'application/json',
+      },
+      body: JSON.stringify({
+        model: 'claude-haiku-4-5-20251001',
+        max_tokens: 1,
+        messages: [{ role: 'user', content: 'Hi' }],
+      }),
+    });
+
+    return parseShelleyHeaders(response.headers, response.status);
+  } catch (err) {
+    return {
+      type: 'shelley-proxy',
+      status: 'error',
+      message: err.message,
+      tokens_limit: null,
+      tokens_remaining: null,
+      requests_limit: null,
+      requests_remaining: null,
+      cost_per_call_usd: null,
+      severity: 'unknown',
+    };
+  }
+}