add configure-key-limits.js — per-key QPS/QPM rate limit script

- PUT /auth/api-keys/{id} with fieldMask qps,qpm
- Defines limits per role: ba=2/30, vigilio=3/30, analysts=2/20
- --dry-run and --show flags included
- Blocked on UpdateApiKey ACL for management key (needs console.x.ai)
- See token-monitor#2 for Ludo action required
This commit is contained in:
Vigilio Desto 2026-04-06 11:00:09 +00:00
parent 2371e02d57
commit 350097a46d
Signed by: vigilio
GPG key ID: 159D6AD58C8E55E9

170
configure-key-limits.js Normal file
View file

@ -0,0 +1,170 @@
#!/usr/bin/env node
/**
* configure-key-limits.js Apply per-key rate limits to xAI API keys
*
* Uses the xAI Management API to set QPS/QPM limits on each key by role.
* Safe to re-run: idempotent (re-applies same limits).
*
* Usage:
* node configure-key-limits.js # apply limits
* node configure-key-limits.js --dry-run # show what would be applied (no changes)
* node configure-key-limits.js --show # show current limits for all keys
*
* Env vars required (both in ~/.secrets/keys.env):
* XAI_MANAGEMENT_KEY
* XAI_TEAM_ID
*
* API docs: PUT /auth/api-keys/{api_key_id}
* https://management-api.x.ai
*/
const MANAGEMENT_BASE = 'https://management-api.x.ai';
// ── Target limits per key name ──────────────────────────────────────────────
//
// These are intentionally conservative. The goal is not to throttle normal
// operation but to put a ceiling on runaway usage. xAI's team-level rate
// limits are already generous; per-key limits add a per-agent budget.
//
// qps: queries per second (burst protection)
// qpm: queries per minute (sustained rate protection)
// tpm: tokens per minute (omit to leave unlimited — hard to calibrate without data)
//
const KEY_LIMITS = {
'xai-ba': { qps: 2, qpm: 30, notes: 'builder — one task at a time, grok-code-fast-1' },
'xai-vigilio': { qps: 3, qpm: 30, notes: 'search + ops — xai_search tool, highest legit rate' },
'xai-face': { qps: 2, qpm: 20, notes: 'analyst — research bursts ok' },
'xai-amy': { qps: 2, qpm: 20, notes: 'analyst — review work, occasional' },
'xai-murdock': { qps: 2, qpm: 20, notes: 'analyst — recon + planning' },
};
// ── Helpers ─────────────────────────────────────────────────────────────────
function getEnv(name) {
const val = process.env[name];
if (!val) {
console.error(`Missing required env var: ${name}`);
console.error('Source ~/.secrets/keys.env before running.');
process.exit(1);
}
return val;
}
async function listKeys(managementKey, teamId) {
const resp = await fetch(
`${MANAGEMENT_BASE}/auth/teams/${teamId}/api-keys`,
{ headers: { 'Authorization': `Bearer ${managementKey}` } }
);
if (!resp.ok) {
const text = await resp.text().catch(() => '');
throw new Error(`List keys failed ${resp.status}: ${text}`);
}
const data = await resp.json();
return data.apiKeys || [];
}
async function applyLimit(managementKey, keyId, limits) {
const { qps, qpm } = limits;
const body = {
apiKey: { qps, qpm },
fieldMask: 'qps,qpm',
};
const resp = await fetch(
`${MANAGEMENT_BASE}/auth/api-keys/${keyId}`,
{
method: 'PUT',
headers: {
'Authorization': `Bearer ${managementKey}`,
'Content-Type': 'application/json',
},
body: JSON.stringify(body),
}
);
if (!resp.ok) {
const text = await resp.text().catch(() => '');
throw new Error(`PUT key ${keyId} failed ${resp.status}: ${text}`);
}
return resp.json();
}
function fmtLimit(val) {
if (val === undefined || val === null || val === 0) return 'unset';
return String(val);
}
// ── Main ────────────────────────────────────────────────────────────────────
async function main() {
const args = process.argv.slice(2);
const dryRun = args.includes('--dry-run');
const showOnly = args.includes('--show');
const managementKey = getEnv('XAI_MANAGEMENT_KEY');
const teamId = getEnv('XAI_TEAM_ID');
const keys = await listKeys(managementKey, teamId);
if (showOnly) {
console.log('Current key limits:\n');
console.log(` ${'Name'.padEnd(16)} ${'qps'.padStart(5)} ${'qpm'.padStart(6)} ${'tpm'.padStart(8)}`);
console.log(` ${'─'.repeat(16)} ${'─'.repeat(5)} ${'─'.repeat(6)} ${'─'.repeat(8)}`);
for (const k of keys) {
const name = k.name.padEnd(16);
const qps = fmtLimit(k.qps).padStart(5);
const qpm = fmtLimit(k.qpm).padStart(6);
const tpm = fmtLimit(k.tpm).padStart(8);
const desired = KEY_LIMITS[k.name];
const marker = desired ? '' : ' (no target)';
console.log(` ${name} ${qps} ${qpm} ${tpm}${marker}`);
}
return;
}
if (dryRun) {
console.log('[DRY RUN] Would apply the following limits:\n');
} else {
console.log('Applying rate limits to xAI API keys...\n');
}
console.log(` ${'Name'.padEnd(16)} ${'qps'.padStart(5)} ${'qpm'.padStart(6)} Notes`);
console.log(` ${'─'.repeat(16)} ${'─'.repeat(5)} ${'─'.repeat(6)} ${'─'.repeat(40)}`);
let applied = 0;
let skipped = 0;
for (const k of keys) {
const target = KEY_LIMITS[k.name];
if (!target) {
console.log(` ${k.name.padEnd(16)} (no target — skipping)`);
skipped++;
continue;
}
const qpsStr = String(target.qps).padStart(5);
const qpmStr = String(target.qpm).padStart(6);
console.log(` ${k.name.padEnd(16)} ${qpsStr} ${qpmStr} ${target.notes}`);
if (!dryRun) {
try {
await applyLimit(managementKey, k.apiKeyId, target);
applied++;
} catch (err) {
console.error(` ✗ Failed to update ${k.name}: ${err.message}`);
}
} else {
applied++;
}
}
console.log('');
if (dryRun) {
console.log(`[DRY RUN] Would apply limits to ${applied} key(s), skip ${skipped} (no target).`);
} else {
console.log(`Applied limits to ${applied} key(s). Skipped ${skipped} (no target).`);
}
}
main().catch(err => {
console.error('Error:', err.message);
process.exit(1);
});