diff --git a/configure-key-limits.js b/configure-key-limits.js new file mode 100644 index 0000000..081e6d4 --- /dev/null +++ b/configure-key-limits.js @@ -0,0 +1,170 @@ +#!/usr/bin/env node +/** + * configure-key-limits.js — Apply per-key rate limits to xAI API keys + * + * Uses the xAI Management API to set QPS/QPM limits on each key by role. + * Safe to re-run: idempotent (re-applies same limits). + * + * Usage: + * node configure-key-limits.js # apply limits + * node configure-key-limits.js --dry-run # show what would be applied (no changes) + * node configure-key-limits.js --show # show current limits for all keys + * + * Env vars required (both in ~/.secrets/keys.env): + * XAI_MANAGEMENT_KEY + * XAI_TEAM_ID + * + * API docs: PUT /auth/api-keys/{api_key_id} + * https://management-api.x.ai + */ + +const MANAGEMENT_BASE = 'https://management-api.x.ai'; + +// ── Target limits per key name ────────────────────────────────────────────── +// +// These are intentionally conservative. The goal is not to throttle normal +// operation but to put a ceiling on runaway usage. xAI's team-level rate +// limits are already generous; per-key limits add a per-agent budget. +// +// qps: queries per second (burst protection) +// qpm: queries per minute (sustained rate protection) +// tpm: tokens per minute (omit to leave unlimited — hard to calibrate without data) +// +const KEY_LIMITS = { + 'xai-ba': { qps: 2, qpm: 30, notes: 'builder — one task at a time, grok-code-fast-1' }, + 'xai-vigilio': { qps: 3, qpm: 30, notes: 'search + ops — xai_search tool, highest legit rate' }, + 'xai-face': { qps: 2, qpm: 20, notes: 'analyst — research bursts ok' }, + 'xai-amy': { qps: 2, qpm: 20, notes: 'analyst — review work, occasional' }, + 'xai-murdock': { qps: 2, qpm: 20, notes: 'analyst — recon + planning' }, +}; + +// ── Helpers ───────────────────────────────────────────────────────────────── + +function getEnv(name) { + const val = process.env[name]; + if (!val) { + console.error(`Missing required env var: ${name}`); + console.error('Source ~/.secrets/keys.env before running.'); + process.exit(1); + } + return val; +} + +async function listKeys(managementKey, teamId) { + const resp = await fetch( + `${MANAGEMENT_BASE}/auth/teams/${teamId}/api-keys`, + { headers: { 'Authorization': `Bearer ${managementKey}` } } + ); + if (!resp.ok) { + const text = await resp.text().catch(() => ''); + throw new Error(`List keys failed ${resp.status}: ${text}`); + } + const data = await resp.json(); + return data.apiKeys || []; +} + +async function applyLimit(managementKey, keyId, limits) { + const { qps, qpm } = limits; + const body = { + apiKey: { qps, qpm }, + fieldMask: 'qps,qpm', + }; + const resp = await fetch( + `${MANAGEMENT_BASE}/auth/api-keys/${keyId}`, + { + method: 'PUT', + headers: { + 'Authorization': `Bearer ${managementKey}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify(body), + } + ); + if (!resp.ok) { + const text = await resp.text().catch(() => ''); + throw new Error(`PUT key ${keyId} failed ${resp.status}: ${text}`); + } + return resp.json(); +} + +function fmtLimit(val) { + if (val === undefined || val === null || val === 0) return 'unset'; + return String(val); +} + +// ── Main ──────────────────────────────────────────────────────────────────── + +async function main() { + const args = process.argv.slice(2); + const dryRun = args.includes('--dry-run'); + const showOnly = args.includes('--show'); + + const managementKey = getEnv('XAI_MANAGEMENT_KEY'); + const teamId = getEnv('XAI_TEAM_ID'); + + const keys = await listKeys(managementKey, teamId); + + if (showOnly) { + console.log('Current key limits:\n'); + console.log(` ${'Name'.padEnd(16)} ${'qps'.padStart(5)} ${'qpm'.padStart(6)} ${'tpm'.padStart(8)}`); + console.log(` ${'─'.repeat(16)} ${'─'.repeat(5)} ${'─'.repeat(6)} ${'─'.repeat(8)}`); + for (const k of keys) { + const name = k.name.padEnd(16); + const qps = fmtLimit(k.qps).padStart(5); + const qpm = fmtLimit(k.qpm).padStart(6); + const tpm = fmtLimit(k.tpm).padStart(8); + const desired = KEY_LIMITS[k.name]; + const marker = desired ? '' : ' (no target)'; + console.log(` ${name} ${qps} ${qpm} ${tpm}${marker}`); + } + return; + } + + if (dryRun) { + console.log('[DRY RUN] Would apply the following limits:\n'); + } else { + console.log('Applying rate limits to xAI API keys...\n'); + } + + console.log(` ${'Name'.padEnd(16)} ${'qps'.padStart(5)} ${'qpm'.padStart(6)} Notes`); + console.log(` ${'─'.repeat(16)} ${'─'.repeat(5)} ${'─'.repeat(6)} ${'─'.repeat(40)}`); + + let applied = 0; + let skipped = 0; + + for (const k of keys) { + const target = KEY_LIMITS[k.name]; + if (!target) { + console.log(` ${k.name.padEnd(16)} (no target — skipping)`); + skipped++; + continue; + } + + const qpsStr = String(target.qps).padStart(5); + const qpmStr = String(target.qpm).padStart(6); + console.log(` ${k.name.padEnd(16)} ${qpsStr} ${qpmStr} ${target.notes}`); + + if (!dryRun) { + try { + await applyLimit(managementKey, k.apiKeyId, target); + applied++; + } catch (err) { + console.error(` ✗ Failed to update ${k.name}: ${err.message}`); + } + } else { + applied++; + } + } + + console.log(''); + if (dryRun) { + console.log(`[DRY RUN] Would apply limits to ${applied} key(s), skip ${skipped} (no target).`); + } else { + console.log(`Applied limits to ${applied} key(s). Skipped ${skipped} (no target).`); + } +} + +main().catch(err => { + console.error('Error:', err.message); + process.exit(1); +});