build: token-monitor v0.1.0 — modular LLM API quota visibility
Implements modular provider probing with two distinct header schemas: - Teams direct (unified schema): 5h/7d utilization floats, status, reset countdown - Shelley proxy (classic schema): token/request counts + Exedev-Gateway-Cost (USD/call) - api-ateam: reports no billing data (confirmed non-existent by recon) Key: uses claude-haiku-4-5-20251001 for minimal probe calls (1 token). Rate-limit headers present on ALL responses (200 and 429). 113/113 tests passing. Built from Face recon (trentuna/a-team#91) — live header capture confirmed unified schema with utilization floats replaces old per-count schema.
This commit is contained in:
parent
760049a25e
commit
07a544c50d
10 changed files with 1093 additions and 1 deletions
75
providers/anthropic-api.js
Normal file
75
providers/anthropic-api.js
Normal file
|
|
@ -0,0 +1,75 @@
|
|||
/**
|
||||
* anthropic-api.js — api-ateam (pay-per-use, api03 keys)
|
||||
*
|
||||
* Anthropic's REST API does not expose billing or quota data via any endpoint
|
||||
* (confirmed by recon, issue trentuna/a-team#91). This module reports that
|
||||
* billing data is unavailable — always. Key validity could be confirmed with
|
||||
* a real HTTP call, but for monitoring purposes the static result is authoritative.
|
||||
*/
|
||||
|
||||
import { getSeverity } from '../report.js';
|
||||
|
||||
/**
|
||||
* Return the static api-ateam status object.
|
||||
* Synchronous — no HTTP call needed because no billing API exists.
|
||||
* @returns {Object}
|
||||
*/
|
||||
export function getApiAteamStatus() {
|
||||
return {
|
||||
type: 'api-direct',
|
||||
status: 'no_billing_data',
|
||||
message: 'Anthropic API does not expose billing/quota via REST. Key validity not checked.',
|
||||
severity: getSeverity({ type: 'api-direct' }),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Probe the api-ateam provider.
|
||||
* Makes a minimal call to confirm key validity, but always reports no billing data.
|
||||
* @param {string} providerName
|
||||
* @param {string} baseUrl
|
||||
* @param {string} apiKey
|
||||
* @returns {Promise<Object>}
|
||||
*/
|
||||
export async function probeApiProvider(providerName, baseUrl, apiKey) {
|
||||
try {
|
||||
const response = await fetch(`${baseUrl}/v1/messages`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'x-api-key': apiKey,
|
||||
'anthropic-version': '2023-06-01',
|
||||
'content-type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: 'claude-haiku-4-5-20251001',
|
||||
max_tokens: 1,
|
||||
messages: [{ role: 'user', content: 'Hi' }],
|
||||
}),
|
||||
});
|
||||
|
||||
if (response.status === 401) {
|
||||
return {
|
||||
type: 'api-direct',
|
||||
status: 'invalid_key',
|
||||
message: 'Invalid API key (401)',
|
||||
severity: 'unknown',
|
||||
};
|
||||
}
|
||||
|
||||
// No billing endpoint exists — always report no_billing_data
|
||||
return {
|
||||
type: 'api-direct',
|
||||
status: 'no_billing_data',
|
||||
message: 'Anthropic API does not expose billing/quota via REST. Key appears valid.',
|
||||
http_status: response.status,
|
||||
severity: 'unknown',
|
||||
};
|
||||
} catch (err) {
|
||||
return {
|
||||
type: 'api-direct',
|
||||
status: 'error',
|
||||
message: err.message,
|
||||
severity: 'unknown',
|
||||
};
|
||||
}
|
||||
}
|
||||
130
providers/anthropic-teams.js
Normal file
130
providers/anthropic-teams.js
Normal file
|
|
@ -0,0 +1,130 @@
|
|||
/**
|
||||
* anthropic-teams.js — Unified schema parser for Anthropic Teams direct providers.
|
||||
*
|
||||
* Teams providers (team-vigilio, team-ludo, team-molto, team-nadja, team-buio) use the
|
||||
* anthropic-ratelimit-unified-* header family. Headers are present on EVERY response
|
||||
* (200 and 429). A 429 is expected when the 7d budget is exhausted and contains valid
|
||||
* quota data — do not treat it as an error, extract the headers normally.
|
||||
*
|
||||
* Header reference (from Face's recon, issue trentuna/a-team#91):
|
||||
* anthropic-ratelimit-unified-status allowed|rejected
|
||||
* anthropic-ratelimit-unified-5h-status allowed|rejected
|
||||
* anthropic-ratelimit-unified-5h-utilization 0.0–1.0
|
||||
* anthropic-ratelimit-unified-5h-reset Unix timestamp
|
||||
* anthropic-ratelimit-unified-7d-status allowed|rejected
|
||||
* anthropic-ratelimit-unified-7d-utilization 0.0–1.0
|
||||
* anthropic-ratelimit-unified-7d-reset Unix timestamp
|
||||
* anthropic-ratelimit-unified-7d-surpassed-threshold (present only when maxed)
|
||||
* anthropic-ratelimit-unified-representative-claim five_hour|seven_day
|
||||
* anthropic-ratelimit-unified-fallback-percentage 0.0–1.0
|
||||
* anthropic-ratelimit-unified-reset Unix timestamp (binding reset)
|
||||
* anthropic-ratelimit-unified-overage-status rejected
|
||||
* anthropic-ratelimit-unified-overage-disabled-reason org_level_disabled
|
||||
* anthropic-organization-id UUID
|
||||
* retry-after seconds (only on 429)
|
||||
*/
|
||||
|
||||
import { getSeverity } from '../report.js';
|
||||
|
||||
/**
|
||||
* Parse unified rate-limit headers from a Teams API response.
|
||||
*
|
||||
* @param {Object} headers — fetch Response.headers (or compatible mock with .get(name))
|
||||
* @param {number} httpStatus — HTTP status code of the response
|
||||
* @param {string} providerName — name for logging/context
|
||||
* @returns {Object} normalized provider result
|
||||
*/
|
||||
export function parseTeamsHeaders(headers, httpStatus, providerName) {
|
||||
const h = (name) => headers.get(name);
|
||||
|
||||
// 401 = invalid API key — no quota data available
|
||||
if (httpStatus === 401) {
|
||||
return {
|
||||
type: 'teams-direct',
|
||||
status: 'invalid_key',
|
||||
utilization_5h: null,
|
||||
utilization_7d: null,
|
||||
severity: 'unknown',
|
||||
};
|
||||
}
|
||||
|
||||
const status = h('anthropic-ratelimit-unified-status') || (httpStatus === 429 ? 'rejected' : 'allowed');
|
||||
const util5h = parseFloat(h('anthropic-ratelimit-unified-5h-utilization'));
|
||||
const util7d = parseFloat(h('anthropic-ratelimit-unified-7d-utilization'));
|
||||
const resetTs = parseInt(h('anthropic-ratelimit-unified-reset'), 10);
|
||||
const retryAfter = h('retry-after') ? parseInt(h('retry-after'), 10) : null;
|
||||
|
||||
// Compute reset_in_seconds from the binding reset Unix timestamp
|
||||
const nowSec = Math.floor(Date.now() / 1000);
|
||||
const resetInSeconds = !isNaN(resetTs) ? Math.max(0, resetTs - nowSec) : null;
|
||||
|
||||
const result = {
|
||||
type: 'teams-direct',
|
||||
status,
|
||||
utilization_5h: isNaN(util5h) ? null : util5h,
|
||||
utilization_7d: isNaN(util7d) ? null : util7d,
|
||||
representative_claim: h('anthropic-ratelimit-unified-representative-claim') || null,
|
||||
reset_timestamp: isNaN(resetTs) ? null : resetTs,
|
||||
reset_in_seconds: resetInSeconds,
|
||||
organization_id: h('anthropic-organization-id') || null,
|
||||
// Additional detail headers
|
||||
status_5h: h('anthropic-ratelimit-unified-5h-status') || null,
|
||||
status_7d: h('anthropic-ratelimit-unified-7d-status') || null,
|
||||
overage_status: h('anthropic-ratelimit-unified-overage-status') || null,
|
||||
fallback_percentage: h('anthropic-ratelimit-unified-fallback-percentage')
|
||||
? parseFloat(h('anthropic-ratelimit-unified-fallback-percentage'))
|
||||
: null,
|
||||
};
|
||||
|
||||
// Include retry_after_seconds only when present (429 responses)
|
||||
if (retryAfter !== null) {
|
||||
result.retry_after_seconds = retryAfter;
|
||||
}
|
||||
|
||||
// Include surpassed threshold when present (maxed budget)
|
||||
const surpassed = h('anthropic-ratelimit-unified-7d-surpassed-threshold');
|
||||
if (surpassed !== null) {
|
||||
result.surpassed_threshold_7d = parseFloat(surpassed);
|
||||
}
|
||||
|
||||
result.severity = getSeverity(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Probe a single Teams provider by making a minimal API call.
|
||||
* Extracts headers regardless of whether the response is 200 or 429.
|
||||
*
|
||||
* @param {string} providerName
|
||||
* @param {string} baseUrl
|
||||
* @param {string} apiKey
|
||||
* @returns {Promise<Object>} normalized provider result
|
||||
*/
|
||||
export async function probeTeamsProvider(providerName, baseUrl, apiKey) {
|
||||
try {
|
||||
const response = await fetch(`${baseUrl}/v1/messages`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'x-api-key': apiKey,
|
||||
'anthropic-version': '2023-06-01',
|
||||
'content-type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: 'claude-haiku-4-5-20251001',
|
||||
max_tokens: 1,
|
||||
messages: [{ role: 'user', content: 'Hi' }],
|
||||
}),
|
||||
});
|
||||
|
||||
return parseTeamsHeaders(response.headers, response.status, providerName);
|
||||
} catch (err) {
|
||||
return {
|
||||
type: 'teams-direct',
|
||||
status: 'error',
|
||||
message: err.message,
|
||||
utilization_5h: null,
|
||||
utilization_7d: null,
|
||||
severity: 'unknown',
|
||||
};
|
||||
}
|
||||
}
|
||||
47
providers/index.js
Normal file
47
providers/index.js
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
/**
|
||||
* providers/index.js — provider registry
|
||||
*
|
||||
* Reads ~/.pi/agent/models.json and returns typed provider config for all
|
||||
* providers we know how to probe. Non-anthropic-messages providers (e.g. zai)
|
||||
* are silently skipped.
|
||||
*/
|
||||
|
||||
import { readFileSync } from 'fs';
|
||||
import { homedir } from 'os';
|
||||
|
||||
/**
|
||||
* Classify a provider by name and config.
|
||||
* @returns {'teams-direct'|'shelley-proxy'|'api-direct'|null}
|
||||
*/
|
||||
function classifyProvider(name, config) {
|
||||
if (name === 'shelley-proxy') return 'shelley-proxy';
|
||||
if (name === 'api-ateam') return 'api-direct';
|
||||
if (config.api === 'anthropic-messages' && name.startsWith('team-')) return 'teams-direct';
|
||||
return null; // skip (zai, etc.)
|
||||
}
|
||||
|
||||
/**
|
||||
* Load and classify all providers from models.json.
|
||||
* @returns {Object} map of provider name → { name, type, baseUrl, apiKey }
|
||||
*/
|
||||
export function getProviders() {
|
||||
const modelsJson = JSON.parse(
|
||||
readFileSync(`${homedir()}/.pi/agent/models.json`, 'utf-8')
|
||||
);
|
||||
|
||||
const providers = {};
|
||||
for (const [name, config] of Object.entries(modelsJson.providers)) {
|
||||
const type = classifyProvider(name, config);
|
||||
if (!type) continue;
|
||||
providers[name] = {
|
||||
name,
|
||||
type,
|
||||
baseUrl: config.baseUrl,
|
||||
apiKey: config.apiKey || null,
|
||||
};
|
||||
}
|
||||
return providers;
|
||||
}
|
||||
|
||||
// Alias for backwards compatibility
|
||||
export const loadProviders = getProviders;
|
||||
96
providers/shelley-proxy.js
Normal file
96
providers/shelley-proxy.js
Normal file
|
|
@ -0,0 +1,96 @@
|
|||
/**
|
||||
* shelley-proxy.js — Shelley/exe.dev proxy (classic schema + Exedev-Gateway-Cost)
|
||||
*
|
||||
* The Shelley proxy returns standard Anthropic rate-limit headers (classic schema,
|
||||
* not the unified Teams schema) plus an Exedev-Gateway-Cost header with per-call
|
||||
* USD cost. No API key is required — the proxy handles auth internally.
|
||||
*
|
||||
* Header reference:
|
||||
* Anthropic-Ratelimit-Tokens-Limit total token budget
|
||||
* Anthropic-Ratelimit-Tokens-Remaining remaining tokens
|
||||
* Anthropic-Ratelimit-Tokens-Reset ISO 8601 reset time
|
||||
* Anthropic-Ratelimit-Requests-Limit total request budget
|
||||
* Anthropic-Ratelimit-Requests-Remaining remaining requests
|
||||
* Anthropic-Ratelimit-Requests-Reset ISO 8601 reset time
|
||||
* Exedev-Gateway-Cost per-call USD cost (float)
|
||||
* anthropic-organization-id organization UUID
|
||||
*/
|
||||
|
||||
import { getSeverity } from '../report.js';
|
||||
|
||||
/**
|
||||
* Parse classic Anthropic rate-limit headers from a Shelley proxy response.
|
||||
*
|
||||
* @param {Object} headers — fetch Response.headers (or compatible mock with .get(name))
|
||||
* @param {number} httpStatus — HTTP status code
|
||||
* @returns {Object} normalized provider result
|
||||
*/
|
||||
export function parseShelleyHeaders(headers, httpStatus) {
|
||||
const h = (name) => headers.get(name) || headers.get(name.toLowerCase());
|
||||
|
||||
const tokensLimit = parseInt(h('Anthropic-Ratelimit-Tokens-Limit'), 10);
|
||||
const tokensRemaining = parseInt(h('Anthropic-Ratelimit-Tokens-Remaining'), 10);
|
||||
const tokensReset = h('Anthropic-Ratelimit-Tokens-Reset');
|
||||
const requestsLimit = parseInt(h('Anthropic-Ratelimit-Requests-Limit'), 10);
|
||||
const requestsRemaining = parseInt(h('Anthropic-Ratelimit-Requests-Remaining'), 10);
|
||||
const requestsReset = h('Anthropic-Ratelimit-Requests-Reset');
|
||||
const costPerCall = h('Exedev-Gateway-Cost');
|
||||
const orgId = h('anthropic-organization-id');
|
||||
|
||||
const result = {
|
||||
type: 'shelley-proxy',
|
||||
status: httpStatus === 429 ? 'rate_limited' : (httpStatus === 200 ? 'ok' : 'error'),
|
||||
tokens_limit: isNaN(tokensLimit) ? null : tokensLimit,
|
||||
tokens_remaining: isNaN(tokensRemaining) ? null : tokensRemaining,
|
||||
tokens_reset: tokensReset || null,
|
||||
requests_limit: isNaN(requestsLimit) ? null : requestsLimit,
|
||||
requests_remaining: isNaN(requestsRemaining) ? null : requestsRemaining,
|
||||
requests_reset: requestsReset || null,
|
||||
cost_per_call_usd: costPerCall ? parseFloat(costPerCall) : null,
|
||||
organization_id: orgId || null,
|
||||
};
|
||||
|
||||
result.severity = getSeverity(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
// Alias used in some internal tooling
|
||||
export const parseClassicHeaders = parseShelleyHeaders;
|
||||
|
||||
/**
|
||||
* Probe the Shelley proxy by making a minimal API call.
|
||||
* @param {string} providerName
|
||||
* @param {string} baseUrl
|
||||
* @returns {Promise<Object>} normalized provider result
|
||||
*/
|
||||
export async function probeShelleyProxy(providerName, baseUrl) {
|
||||
try {
|
||||
const response = await fetch(`${baseUrl}/v1/messages`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'x-api-key': 'not-needed',
|
||||
'anthropic-version': '2023-06-01',
|
||||
'content-type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: 'claude-haiku-4-5-20251001',
|
||||
max_tokens: 1,
|
||||
messages: [{ role: 'user', content: 'Hi' }],
|
||||
}),
|
||||
});
|
||||
|
||||
return parseShelleyHeaders(response.headers, response.status);
|
||||
} catch (err) {
|
||||
return {
|
||||
type: 'shelley-proxy',
|
||||
status: 'error',
|
||||
message: err.message,
|
||||
tokens_limit: null,
|
||||
tokens_remaining: null,
|
||||
requests_limit: null,
|
||||
requests_remaining: null,
|
||||
cost_per_call_usd: null,
|
||||
severity: 'unknown',
|
||||
};
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue