build: token-monitor v0.1.0 — modular LLM API quota visibility

Implements modular provider probing with two distinct header schemas:
- Teams direct (unified schema): 5h/7d utilization floats, status, reset countdown
- Shelley proxy (classic schema): token/request counts + Exedev-Gateway-Cost (USD/call)
- api-ateam: reports no billing data (confirmed non-existent by recon)

Key: uses claude-haiku-4-5-20251001 for minimal probe calls (1 token).
Rate-limit headers present on ALL responses (200 and 429).

113/113 tests passing.

Built from Face recon (trentuna/a-team#91) — live header capture confirmed
unified schema with utilization floats replaces old per-count schema.
This commit is contained in:
Hannibal Smith 2026-04-04 17:01:05 +00:00
parent 760049a25e
commit 07a544c50d
Signed by: hannibal
GPG key ID: 6EB37F7E6190AF1C
10 changed files with 1093 additions and 1 deletions

View file

@ -0,0 +1,130 @@
/**
* anthropic-teams.js Unified schema parser for Anthropic Teams direct providers.
*
* Teams providers (team-vigilio, team-ludo, team-molto, team-nadja, team-buio) use the
* anthropic-ratelimit-unified-* header family. Headers are present on EVERY response
* (200 and 429). A 429 is expected when the 7d budget is exhausted and contains valid
* quota data do not treat it as an error, extract the headers normally.
*
* Header reference (from Face's recon, issue trentuna/a-team#91):
* anthropic-ratelimit-unified-status allowed|rejected
* anthropic-ratelimit-unified-5h-status allowed|rejected
* anthropic-ratelimit-unified-5h-utilization 0.01.0
* anthropic-ratelimit-unified-5h-reset Unix timestamp
* anthropic-ratelimit-unified-7d-status allowed|rejected
* anthropic-ratelimit-unified-7d-utilization 0.01.0
* anthropic-ratelimit-unified-7d-reset Unix timestamp
* anthropic-ratelimit-unified-7d-surpassed-threshold (present only when maxed)
* anthropic-ratelimit-unified-representative-claim five_hour|seven_day
* anthropic-ratelimit-unified-fallback-percentage 0.01.0
* anthropic-ratelimit-unified-reset Unix timestamp (binding reset)
* anthropic-ratelimit-unified-overage-status rejected
* anthropic-ratelimit-unified-overage-disabled-reason org_level_disabled
* anthropic-organization-id UUID
* retry-after seconds (only on 429)
*/
import { getSeverity } from '../report.js';
/**
* Parse unified rate-limit headers from a Teams API response.
*
* @param {Object} headers fetch Response.headers (or compatible mock with .get(name))
* @param {number} httpStatus HTTP status code of the response
* @param {string} providerName name for logging/context
* @returns {Object} normalized provider result
*/
export function parseTeamsHeaders(headers, httpStatus, providerName) {
const h = (name) => headers.get(name);
// 401 = invalid API key — no quota data available
if (httpStatus === 401) {
return {
type: 'teams-direct',
status: 'invalid_key',
utilization_5h: null,
utilization_7d: null,
severity: 'unknown',
};
}
const status = h('anthropic-ratelimit-unified-status') || (httpStatus === 429 ? 'rejected' : 'allowed');
const util5h = parseFloat(h('anthropic-ratelimit-unified-5h-utilization'));
const util7d = parseFloat(h('anthropic-ratelimit-unified-7d-utilization'));
const resetTs = parseInt(h('anthropic-ratelimit-unified-reset'), 10);
const retryAfter = h('retry-after') ? parseInt(h('retry-after'), 10) : null;
// Compute reset_in_seconds from the binding reset Unix timestamp
const nowSec = Math.floor(Date.now() / 1000);
const resetInSeconds = !isNaN(resetTs) ? Math.max(0, resetTs - nowSec) : null;
const result = {
type: 'teams-direct',
status,
utilization_5h: isNaN(util5h) ? null : util5h,
utilization_7d: isNaN(util7d) ? null : util7d,
representative_claim: h('anthropic-ratelimit-unified-representative-claim') || null,
reset_timestamp: isNaN(resetTs) ? null : resetTs,
reset_in_seconds: resetInSeconds,
organization_id: h('anthropic-organization-id') || null,
// Additional detail headers
status_5h: h('anthropic-ratelimit-unified-5h-status') || null,
status_7d: h('anthropic-ratelimit-unified-7d-status') || null,
overage_status: h('anthropic-ratelimit-unified-overage-status') || null,
fallback_percentage: h('anthropic-ratelimit-unified-fallback-percentage')
? parseFloat(h('anthropic-ratelimit-unified-fallback-percentage'))
: null,
};
// Include retry_after_seconds only when present (429 responses)
if (retryAfter !== null) {
result.retry_after_seconds = retryAfter;
}
// Include surpassed threshold when present (maxed budget)
const surpassed = h('anthropic-ratelimit-unified-7d-surpassed-threshold');
if (surpassed !== null) {
result.surpassed_threshold_7d = parseFloat(surpassed);
}
result.severity = getSeverity(result);
return result;
}
/**
* Probe a single Teams provider by making a minimal API call.
* Extracts headers regardless of whether the response is 200 or 429.
*
* @param {string} providerName
* @param {string} baseUrl
* @param {string} apiKey
* @returns {Promise<Object>} normalized provider result
*/
export async function probeTeamsProvider(providerName, baseUrl, apiKey) {
try {
const response = await fetch(`${baseUrl}/v1/messages`, {
method: 'POST',
headers: {
'x-api-key': apiKey,
'anthropic-version': '2023-06-01',
'content-type': 'application/json',
},
body: JSON.stringify({
model: 'claude-haiku-4-5-20251001',
max_tokens: 1,
messages: [{ role: 'user', content: 'Hi' }],
}),
});
return parseTeamsHeaders(response.headers, response.status, providerName);
} catch (err) {
return {
type: 'teams-direct',
status: 'error',
message: err.message,
utilization_5h: null,
utilization_7d: null,
severity: 'unknown',
};
}
}