token-monitor/recommend.js
Vigilio Desto a71474e38d
recommend.js: use provider-check.json as primary availability signal
With extra-usage credit (post April 4 2026), 7d per-seat limits no
longer block sessions — broken OAuth tokens do. provider-check.json
(written hourly by health-pulse) tests actual pi session startup.

Changes:
- Load /tmp/provider-check.json (if fresh, < 2h old) before selection
- Filter candidates to pi-usable providers only
- If filter would empty the pool, fall through to budget-only logic
- Reason string includes 'pi-check' when filter was applied
- Handles stale file, missing file, parse errors gracefully

This fixes the monitoring gap where budget API probes and pi session
usability diverge (e.g. team-buio: budget OK, pi ETIMEDOUT at 12:01)

Refs: trentuna/token-monitor#4
2026-04-08 14:33:05 +02:00

220 lines
7.9 KiB
JavaScript

#!/usr/bin/env node
/**
* recommend.js — Select optimal provider using token budget intelligence
*
* Reads from the cached token-monitor run (if fresh) or probes directly.
* Returns the best Teams provider considering 7d budget utilization.
*
* Selection rules (extra-usage credit era, post April 4 2026):
* 0. Consult provider-check.json (pi session usability) — primary availability signal
* With extra-usage credit, 7d limits don't block sessions; broken OAuth tokens do.
* 1. Among pi-usable providers: status must be "allowed" or "allowed_warning"
* 2. Scan chain in order; take first with utilization_7d < SWITCH_THRESHOLD
* 3. If none under threshold, take lowest-utilization allowed/allowed_warning provider
* 4. If no usable Teams providers, return emergency=true (shelley-proxy)
*
* Note on provider-check.json:
* Written by ~/projects/provider-check/provider-check.ts (run by health-pulse hourly)
* at /tmp/provider-check.json. Stale if > 2 hours old — fallback to budget-only logic.
*
* Usage:
* node recommend.js # JSON output
* node recommend.js --threshold 0.80 # switch above 80% 7d
* node recommend.js --chain "vigilio,ludo,molto" # provider order
*
* Output JSON:
* { provider, model, reason, emergency, alternatives }
*/
import { getProviders } from './providers/index.js';
import { probeTeamsProvider } from './providers/anthropic-teams.js';
import { getCachedRun } from './logger.js';
import { readFileSync, existsSync, statSync } from 'fs';
const args = process.argv.slice(2);
// Parse --threshold
const threshIdx = args.indexOf('--threshold');
const SWITCH_THRESHOLD = threshIdx !== -1 ? parseFloat(args[threshIdx + 1]) : 0.75;
// Parse --chain
const chainIdx = args.indexOf('--chain');
const DEFAULT_CHAIN = ['team-vigilio', 'team-ludo', 'team-buio', 'team-molto', 'team-nadja'];
const PROVIDER_CHAIN = chainIdx !== -1
? args[chainIdx + 1].split(',').map(s => s.trim())
: DEFAULT_CHAIN;
const DEFAULT_MODEL = 'claude-sonnet-4-6';
const EMERGENCY_FALLBACK = 'shelley-proxy';
const PROVIDER_CHECK_PATH = '/tmp/provider-check.json';
const PROVIDER_CHECK_MAX_AGE_MS = 2 * 60 * 60 * 1000; // 2 hours
// Load provider-check.json — returns Set of provider names that can start pi sessions
// Returns null if file missing, stale, or unreadable (fall through to budget-only logic)
function getPiUsableProviders() {
try {
if (!existsSync(PROVIDER_CHECK_PATH)) return null;
const stat = statSync(PROVIDER_CHECK_PATH);
const ageMs = Date.now() - stat.mtimeMs;
if (ageMs > PROVIDER_CHECK_MAX_AGE_MS) return null; // stale — don't trust it
const data = JSON.parse(readFileSync(PROVIDER_CHECK_PATH, 'utf8'));
if (!data.results) return null;
const usable = new Set(
data.results
.filter(r => r.status === 'ok')
.map(r => r.provider)
);
return usable.size > 0 ? usable : null;
} catch {
return null;
}
}
// Format utilization as percentage string
function pct(v) {
if (v == null) return '?';
return `${Math.round(v * 100)}%`;
}
async function getProviderData() {
// Try fresh cache first (within 20 minutes from monitor.js runs)
const cached = getCachedRun(20);
if (cached && cached.providers) {
// Cache hit — but check if all chain members are invalid_key
// If so, probe fresh: invalid_key can be transient (API 401s during key rotation)
// vs. rejected/exhausted which are stable budget states worth caching
const chainProviders = PROVIDER_CHAIN
.map(name => cached.providers[name])
.filter(Boolean);
const allInvalidOrMissing = chainProviders.length === 0 ||
chainProviders.every(p => p.status === 'invalid_key' || p.status === 'error');
if (!allInvalidOrMissing) {
return { source: 'cache', providers: cached.providers };
}
// All invalid — fall through to fresh probe
}
// No cache — probe Teams providers directly (no full monitor run, targeted probes only)
const allProviders = getProviders();
const providers = {};
for (const name of PROVIDER_CHAIN) {
const p = allProviders[name];
if (!p || p.type !== 'teams-direct') continue;
try {
const result = await probeTeamsProvider(p.name, p.baseUrl, p.apiKey);
providers[name] = result;
} catch (e) {
providers[name] = { type: 'teams-direct', status: 'error', utilization_7d: null };
}
}
return { source: 'fresh', providers };
}
async function main() {
const { source, providers } = await getProviderData();
// Filter to chain members that are Teams providers with data
let candidates = PROVIDER_CHAIN
.filter(name => providers[name] && providers[name].type === 'teams-direct')
.map(name => ({ name, ...providers[name] }));
// Apply pi-usability filter from provider-check.json
// With extra-usage credit, 7d limits don't block sessions — OAuth token validity does.
// provider-check.json tests actual pi session startup: the ground truth for usability.
const piUsable = getPiUsableProviders();
let piFilterApplied = false;
if (piUsable) {
const filtered = candidates.filter(p => piUsable.has(p.name));
if (filtered.length > 0) {
candidates = filtered;
piFilterApplied = true;
}
// If no candidates pass pi check — don't filter (stale data or misconfigured check)
// Fall through to budget-only selection so we don't always emergency-fallback
}
// Phase 1: first provider under threshold with status=allowed or allowed_warning
// Both statuses can serve requests; allowed_warning just means approaching limit
let best = null;
for (const p of candidates) {
if (p.status === 'allowed' || p.status === 'allowed_warning') {
const util7d = p.utilization_7d ?? 0;
if (util7d < SWITCH_THRESHOLD) {
best = {
name: p.name,
reason: `7d at ${pct(util7d)}, 5h at ${pct(p.utilization_5h)} — under ${pct(SWITCH_THRESHOLD)} threshold (data: ${source}${piFilterApplied ? ', pi-check' : ''})`,
piFilterApplied,
};
break;
}
}
}
// Phase 2: all over threshold — pick lowest 7d utilization (allowed or allowed_warning)
// Prefer allowed over allowed_warning; both beat emergency fallback
if (!best) {
let lowestUtil = Infinity;
let lowestCandidate = null;
for (const p of candidates) {
if (p.status === 'allowed' || p.status === 'allowed_warning') {
const util7d = p.utilization_7d ?? 0;
if (util7d < lowestUtil) {
lowestUtil = util7d;
lowestCandidate = p;
}
}
}
if (lowestCandidate) {
const warningTag = lowestCandidate.status === 'allowed_warning' ? ', warning' : '';
best = {
name: lowestCandidate.name,
reason: `all over threshold — best available at ${pct(lowestUtil)} 7d${warningTag} (data: ${source}${piFilterApplied ? ', pi-check' : ''})`,
piFilterApplied,
};
}
}
// Build alternatives summary
const alternatives = candidates
.filter(p => !best || p.name !== best.name)
.map(p => ({
name: p.name,
status: p.status,
utilization_7d: p.utilization_7d,
utilization_5h: p.utilization_5h,
}));
// Phase 3: no allowed Teams providers
if (!best) {
console.log(JSON.stringify({
provider: EMERGENCY_FALLBACK,
model: DEFAULT_MODEL,
reason: 'all Teams providers exhausted — emergency fallback',
emergency: true,
alternatives,
}, null, 2));
return;
}
console.log(JSON.stringify({
provider: best.name,
model: DEFAULT_MODEL,
reason: best.reason,
emergency: false,
alternatives,
}, null, 2));
}
main().catch(e => {
// Safe fallback — never crash beat.sh
console.log(JSON.stringify({
provider: 'team-vigilio',
model: DEFAULT_MODEL,
reason: `recommend.js error — defaulting: ${e.message}`,
emergency: false,
alternatives: [],
}, null, 2));
process.exit(0);
});