add recommend.js — budget-aware provider selection
Selects optimal Teams provider from chain based on real 7d utilization. Uses cached monitor data (no extra API calls if fresh cache exists). - Phase 1: first provider in chain with 7d util < SWITCH_THRESHOLD (default 75%) - Phase 2: all over threshold → pick lowest 7d allowed provider - Phase 3: all rejected → emergency=true, signals shelley-proxy needed - Always fails safe: returns team-vigilio on any error
This commit is contained in:
parent
350097a46d
commit
6e6d93f3bf
1 changed files with 159 additions and 0 deletions
159
recommend.js
Normal file
159
recommend.js
Normal file
|
|
@ -0,0 +1,159 @@
|
|||
#!/usr/bin/env node
|
||||
/**
|
||||
* recommend.js — Select optimal provider using token budget intelligence
|
||||
*
|
||||
* Reads from the cached token-monitor run (if fresh) or probes directly.
|
||||
* Returns the best Teams provider considering 7d budget utilization.
|
||||
*
|
||||
* Selection rules:
|
||||
* 1. status must be "allowed"
|
||||
* 2. Scan chain in order; take first with utilization_7d < SWITCH_THRESHOLD
|
||||
* 3. If none under threshold, take lowest-utilization allowed provider
|
||||
* 4. If no allowed Teams providers, return emergency=true (shelley-proxy)
|
||||
*
|
||||
* Usage:
|
||||
* node recommend.js # JSON output
|
||||
* node recommend.js --threshold 0.80 # switch above 80% 7d
|
||||
* node recommend.js --chain "vigilio,ludo,molto" # provider order
|
||||
*
|
||||
* Output JSON:
|
||||
* { provider, model, reason, emergency, alternatives }
|
||||
*/
|
||||
|
||||
import { getProviders } from './providers/index.js';
|
||||
import { probeTeamsProvider } from './providers/anthropic-teams.js';
|
||||
import { getCachedRun } from './logger.js';
|
||||
|
||||
const args = process.argv.slice(2);
|
||||
|
||||
// Parse --threshold
|
||||
const threshIdx = args.indexOf('--threshold');
|
||||
const SWITCH_THRESHOLD = threshIdx !== -1 ? parseFloat(args[threshIdx + 1]) : 0.75;
|
||||
|
||||
// Parse --chain
|
||||
const chainIdx = args.indexOf('--chain');
|
||||
const DEFAULT_CHAIN = ['team-vigilio', 'team-ludo', 'team-molto', 'team-nadja'];
|
||||
const PROVIDER_CHAIN = chainIdx !== -1
|
||||
? args[chainIdx + 1].split(',').map(s => s.trim())
|
||||
: DEFAULT_CHAIN;
|
||||
|
||||
const DEFAULT_MODEL = 'claude-sonnet-4-6';
|
||||
const EMERGENCY_FALLBACK = 'shelley-proxy';
|
||||
|
||||
// Format utilization as percentage string
|
||||
function pct(v) {
|
||||
if (v == null) return '?';
|
||||
return `${Math.round(v * 100)}%`;
|
||||
}
|
||||
|
||||
async function getProviderData() {
|
||||
// Try fresh cache first (within 20 minutes from monitor.js runs)
|
||||
const cached = getCachedRun(20);
|
||||
if (cached && cached.providers) {
|
||||
return { source: 'cache', providers: cached.providers };
|
||||
}
|
||||
|
||||
// No cache — probe Teams providers directly (no full monitor run, targeted probes only)
|
||||
const allProviders = getProviders();
|
||||
const providers = {};
|
||||
|
||||
for (const name of PROVIDER_CHAIN) {
|
||||
const p = allProviders[name];
|
||||
if (!p || p.type !== 'teams-direct') continue;
|
||||
try {
|
||||
const result = await probeTeamsProvider(p.name, p.baseUrl, p.apiKey);
|
||||
providers[name] = result;
|
||||
} catch (e) {
|
||||
providers[name] = { type: 'teams-direct', status: 'error', utilization_7d: null };
|
||||
}
|
||||
}
|
||||
|
||||
return { source: 'fresh', providers };
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const { source, providers } = await getProviderData();
|
||||
|
||||
// Filter to chain members that are Teams providers with data
|
||||
const candidates = PROVIDER_CHAIN
|
||||
.filter(name => providers[name] && providers[name].type === 'teams-direct')
|
||||
.map(name => ({ name, ...providers[name] }));
|
||||
|
||||
// Phase 1: first provider under threshold with status=allowed
|
||||
let best = null;
|
||||
for (const p of candidates) {
|
||||
if (p.status === 'allowed') {
|
||||
const util7d = p.utilization_7d ?? 0;
|
||||
if (util7d < SWITCH_THRESHOLD) {
|
||||
best = {
|
||||
name: p.name,
|
||||
reason: `7d at ${pct(util7d)}, 5h at ${pct(p.utilization_5h)} — under ${pct(SWITCH_THRESHOLD)} threshold (data: ${source})`,
|
||||
};
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Phase 2: all over threshold — pick lowest 7d utilization that's still allowed
|
||||
if (!best) {
|
||||
let lowestUtil = Infinity;
|
||||
let lowestCandidate = null;
|
||||
for (const p of candidates) {
|
||||
if (p.status === 'allowed') {
|
||||
const util7d = p.utilization_7d ?? 0;
|
||||
if (util7d < lowestUtil) {
|
||||
lowestUtil = util7d;
|
||||
lowestCandidate = p;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (lowestCandidate) {
|
||||
best = {
|
||||
name: lowestCandidate.name,
|
||||
reason: `all over threshold — best available at ${pct(lowestUtil)} 7d (data: ${source})`,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Build alternatives summary
|
||||
const alternatives = candidates
|
||||
.filter(p => !best || p.name !== best.name)
|
||||
.map(p => ({
|
||||
name: p.name,
|
||||
status: p.status,
|
||||
utilization_7d: p.utilization_7d,
|
||||
utilization_5h: p.utilization_5h,
|
||||
}));
|
||||
|
||||
// Phase 3: no allowed Teams providers
|
||||
if (!best) {
|
||||
console.log(JSON.stringify({
|
||||
provider: EMERGENCY_FALLBACK,
|
||||
model: DEFAULT_MODEL,
|
||||
reason: 'all Teams providers exhausted — emergency fallback',
|
||||
emergency: true,
|
||||
alternatives,
|
||||
}, null, 2));
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(JSON.stringify({
|
||||
provider: best.name,
|
||||
model: DEFAULT_MODEL,
|
||||
reason: best.reason,
|
||||
emergency: false,
|
||||
alternatives,
|
||||
}, null, 2));
|
||||
}
|
||||
|
||||
main().catch(e => {
|
||||
// Safe fallback — never crash beat.sh
|
||||
console.log(JSON.stringify({
|
||||
provider: 'team-vigilio',
|
||||
model: DEFAULT_MODEL,
|
||||
reason: `recommend.js error — defaulting: ${e.message}`,
|
||||
emergency: false,
|
||||
alternatives: [],
|
||||
}, null, 2));
|
||||
process.exit(0);
|
||||
});
|
||||
Loading…
Add table
Add a link
Reference in a new issue