build: token-monitor v0.1.0 — modular LLM API quota visibility
Implements modular provider probing with two distinct header schemas: - Teams direct (unified schema): 5h/7d utilization floats, status, reset countdown - Shelley proxy (classic schema): token/request counts + Exedev-Gateway-Cost (USD/call) - api-ateam: reports no billing data (confirmed non-existent by recon) Key: uses claude-haiku-4-5-20251001 for minimal probe calls (1 token). Rate-limit headers present on ALL responses (200 and 429). 113/113 tests passing. Built from Face recon (trentuna/a-team#91) — live header capture confirmed unified schema with utilization floats replaces old per-count schema.
This commit is contained in:
parent
760049a25e
commit
07a544c50d
10 changed files with 1093 additions and 1 deletions
96
providers/shelley-proxy.js
Normal file
96
providers/shelley-proxy.js
Normal file
|
|
@ -0,0 +1,96 @@
|
|||
/**
|
||||
* shelley-proxy.js — Shelley/exe.dev proxy (classic schema + Exedev-Gateway-Cost)
|
||||
*
|
||||
* The Shelley proxy returns standard Anthropic rate-limit headers (classic schema,
|
||||
* not the unified Teams schema) plus an Exedev-Gateway-Cost header with per-call
|
||||
* USD cost. No API key is required — the proxy handles auth internally.
|
||||
*
|
||||
* Header reference:
|
||||
* Anthropic-Ratelimit-Tokens-Limit total token budget
|
||||
* Anthropic-Ratelimit-Tokens-Remaining remaining tokens
|
||||
* Anthropic-Ratelimit-Tokens-Reset ISO 8601 reset time
|
||||
* Anthropic-Ratelimit-Requests-Limit total request budget
|
||||
* Anthropic-Ratelimit-Requests-Remaining remaining requests
|
||||
* Anthropic-Ratelimit-Requests-Reset ISO 8601 reset time
|
||||
* Exedev-Gateway-Cost per-call USD cost (float)
|
||||
* anthropic-organization-id organization UUID
|
||||
*/
|
||||
|
||||
import { getSeverity } from '../report.js';
|
||||
|
||||
/**
|
||||
* Parse classic Anthropic rate-limit headers from a Shelley proxy response.
|
||||
*
|
||||
* @param {Object} headers — fetch Response.headers (or compatible mock with .get(name))
|
||||
* @param {number} httpStatus — HTTP status code
|
||||
* @returns {Object} normalized provider result
|
||||
*/
|
||||
export function parseShelleyHeaders(headers, httpStatus) {
|
||||
const h = (name) => headers.get(name) || headers.get(name.toLowerCase());
|
||||
|
||||
const tokensLimit = parseInt(h('Anthropic-Ratelimit-Tokens-Limit'), 10);
|
||||
const tokensRemaining = parseInt(h('Anthropic-Ratelimit-Tokens-Remaining'), 10);
|
||||
const tokensReset = h('Anthropic-Ratelimit-Tokens-Reset');
|
||||
const requestsLimit = parseInt(h('Anthropic-Ratelimit-Requests-Limit'), 10);
|
||||
const requestsRemaining = parseInt(h('Anthropic-Ratelimit-Requests-Remaining'), 10);
|
||||
const requestsReset = h('Anthropic-Ratelimit-Requests-Reset');
|
||||
const costPerCall = h('Exedev-Gateway-Cost');
|
||||
const orgId = h('anthropic-organization-id');
|
||||
|
||||
const result = {
|
||||
type: 'shelley-proxy',
|
||||
status: httpStatus === 429 ? 'rate_limited' : (httpStatus === 200 ? 'ok' : 'error'),
|
||||
tokens_limit: isNaN(tokensLimit) ? null : tokensLimit,
|
||||
tokens_remaining: isNaN(tokensRemaining) ? null : tokensRemaining,
|
||||
tokens_reset: tokensReset || null,
|
||||
requests_limit: isNaN(requestsLimit) ? null : requestsLimit,
|
||||
requests_remaining: isNaN(requestsRemaining) ? null : requestsRemaining,
|
||||
requests_reset: requestsReset || null,
|
||||
cost_per_call_usd: costPerCall ? parseFloat(costPerCall) : null,
|
||||
organization_id: orgId || null,
|
||||
};
|
||||
|
||||
result.severity = getSeverity(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
// Alias used in some internal tooling
|
||||
export const parseClassicHeaders = parseShelleyHeaders;
|
||||
|
||||
/**
|
||||
* Probe the Shelley proxy by making a minimal API call.
|
||||
* @param {string} providerName
|
||||
* @param {string} baseUrl
|
||||
* @returns {Promise<Object>} normalized provider result
|
||||
*/
|
||||
export async function probeShelleyProxy(providerName, baseUrl) {
|
||||
try {
|
||||
const response = await fetch(`${baseUrl}/v1/messages`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'x-api-key': 'not-needed',
|
||||
'anthropic-version': '2023-06-01',
|
||||
'content-type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: 'claude-haiku-4-5-20251001',
|
||||
max_tokens: 1,
|
||||
messages: [{ role: 'user', content: 'Hi' }],
|
||||
}),
|
||||
});
|
||||
|
||||
return parseShelleyHeaders(response.headers, response.status);
|
||||
} catch (err) {
|
||||
return {
|
||||
type: 'shelley-proxy',
|
||||
status: 'error',
|
||||
message: err.message,
|
||||
tokens_limit: null,
|
||||
tokens_remaining: null,
|
||||
requests_limit: null,
|
||||
requests_remaining: null,
|
||||
cost_per_call_usd: null,
|
||||
severity: 'unknown',
|
||||
};
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue