This repository has been archived on 2026-06-07. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
master-ai/vibn-agent-runner/dist/llm.js

353 lines
16 KiB
JavaScript

"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.AnthropicVertexClient = exports.GeminiClient = exports.VertexOpenAIClient = void 0;
exports.createLLM = createLLM;
exports.toOAITools = toOAITools;
const google_auth_library_1 = require("google-auth-library");
const genai_1 = require("@google/genai");
const vertex_sdk_1 = __importDefault(require("@anthropic-ai/vertex-sdk"));
const uuid_1 = require("uuid");
// ---------------------------------------------------------------------------
// Vertex AI OpenAI-compatible client
// Used for: zai-org/glm-5-maas, anthropic/claude-sonnet-4-6, etc.
// ---------------------------------------------------------------------------
let _cachedToken = '';
let _tokenExpiry = 0;
// Build GoogleAuth with explicit service account credentials when available.
// GCP_SA_KEY_BASE64: base64-encoded service account JSON key — safe to pass as
// an env var since it contains no newlines or special shell characters.
// Falls back to the GCP metadata server (works on VMs with correct scopes).
function buildGoogleAuth() {
const b64Key = process.env.GCP_SA_KEY_BASE64;
if (b64Key) {
try {
const jsonStr = Buffer.from(b64Key, 'base64').toString('utf8');
const credentials = JSON.parse(jsonStr);
return new google_auth_library_1.GoogleAuth({ credentials, scopes: ['https://www.googleapis.com/auth/cloud-platform'] });
}
catch {
console.warn('[llm] GCP_SA_KEY_BASE64 is set but failed to decode/parse — falling back to metadata server');
}
}
return new google_auth_library_1.GoogleAuth({ scopes: ['https://www.googleapis.com/auth/cloud-platform'] });
}
const _googleAuth = buildGoogleAuth();
async function getVertexToken() {
const now = Date.now();
if (_cachedToken && now < _tokenExpiry)
return _cachedToken;
const client = await _googleAuth.getClient();
const tokenResponse = await client.getAccessToken();
_cachedToken = tokenResponse.token;
_tokenExpiry = now + 55 * 60 * 1000; // tokens last 1hr, refresh at 55min
return _cachedToken;
}
class VertexOpenAIClient {
constructor(modelId, opts) {
this.modelId = modelId;
this.projectId = opts?.projectId ?? process.env.GCP_PROJECT_ID ?? 'master-ai-484822';
this.region = opts?.region ?? 'global';
this.temperature = opts?.temperature ?? 0.3;
}
async chat(messages, tools, maxTokens = 4096) {
const base = this.region === 'global'
? 'https://aiplatform.googleapis.com'
: `https://${this.region}-aiplatform.googleapis.com`;
const url = `${base}/v1/projects/${this.projectId}/locations/${this.region}/endpoints/openapi/chat/completions`;
const body = {
model: this.modelId,
messages,
max_tokens: maxTokens,
temperature: this.temperature,
stream: false
};
if (tools && tools.length > 0) {
body.tools = tools;
body.tool_choice = 'auto';
}
// Retry with exponential backoff on 429 / 503 (rate limit / overload)
const MAX_RETRIES = 4;
const RETRY_STATUSES = new Set([429, 503]);
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
const token = await getVertexToken();
const res = await fetch(url, {
method: 'POST',
headers: {
'Authorization': `Bearer ${token}`,
'Content-Type': 'application/json'
},
body: JSON.stringify(body)
});
if (res.ok) {
const data = await res.json();
const choice = data.choices?.[0];
const message = choice?.message ?? {};
return {
content: message.content ?? null,
reasoning: message.reasoning_content ?? null,
tool_calls: message.tool_calls ?? [],
finish_reason: choice?.finish_reason ?? 'stop',
usage: data.usage
};
}
const errText = await res.text();
// Force token refresh on 401
if (res.status === 401)
_tokenExpiry = 0;
if (RETRY_STATUSES.has(res.status) && attempt < MAX_RETRIES) {
// Check for Retry-After header, otherwise use exponential backoff
const retryAfter = res.headers.get('retry-after');
const waitMs = retryAfter
? Math.min(parseInt(retryAfter, 10) * 1000, 60000)
: Math.min(2 ** attempt * 2000 + Math.random() * 500, 30000);
console.warn(`[llm] Vertex ${res.status} on attempt ${attempt + 1}/${MAX_RETRIES + 1} — retrying in ${Math.round(waitMs / 1000)}s`);
await new Promise(r => setTimeout(r, waitMs));
continue;
}
throw new Error(`Vertex API ${res.status}: ${errText.slice(0, 400)}`);
}
// TypeScript requires an explicit throw after the loop (unreachable in practice)
throw new Error('Vertex API: exceeded max retries');
}
}
exports.VertexOpenAIClient = VertexOpenAIClient;
// ---------------------------------------------------------------------------
// Gemini client via @google/genai SDK
// Used for: Tier A (fast/cheap routing, summaries, log parsing)
// Converts to/from OpenAI message format internally.
// ---------------------------------------------------------------------------
class GeminiClient {
constructor(modelId = 'gemini-2.5-flash', opts) {
this.modelId = modelId;
this.temperature = opts?.temperature ?? 0.2;
}
async chat(messages, tools, maxTokens = 8192) {
const apiKey = process.env.GOOGLE_API_KEY;
if (!apiKey)
throw new Error('GOOGLE_API_KEY not set');
const genai = new genai_1.GoogleGenAI({ apiKey });
const systemMsg = messages.find(m => m.role === 'system');
const nonSystem = messages.filter(m => m.role !== 'system');
const functionDeclarations = (tools ?? []).map(t => ({
name: t.function.name,
description: t.function.description,
parameters: t.function.parameters
}));
const response = await genai.models.generateContent({
model: this.modelId,
contents: toGeminiContents(nonSystem),
config: {
systemInstruction: systemMsg?.content ?? undefined,
tools: functionDeclarations.length > 0 ? [{ functionDeclarations }] : undefined,
temperature: this.temperature,
maxOutputTokens: maxTokens
}
});
const candidate = response.candidates?.[0];
if (!candidate)
throw new Error('No response from Gemini');
const parts = candidate.content?.parts ?? [];
const textContent = parts.filter(p => p.text).map(p => p.text).join('') || null;
const fnCalls = parts.filter(p => p.functionCall);
const tool_calls = fnCalls.map(p => ({
id: `call_${(0, uuid_1.v4)().replace(/-/g, '').slice(0, 12)}`,
type: 'function',
function: {
name: p.functionCall.name ?? '',
arguments: JSON.stringify(p.functionCall.args ?? {})
}
}));
return {
content: textContent,
reasoning: null,
tool_calls,
finish_reason: fnCalls.length > 0 ? 'tool_calls' : 'stop'
};
}
}
exports.GeminiClient = GeminiClient;
/** Convert OpenAI message format → Gemini Content[] format */
function toGeminiContents(messages) {
const contents = [];
for (const msg of messages) {
if (msg.role === 'assistant') {
const parts = [];
if (msg.content)
parts.push({ text: msg.content });
for (const tc of msg.tool_calls ?? []) {
parts.push({
functionCall: {
name: tc.function.name,
args: JSON.parse(tc.function.arguments || '{}')
}
});
}
contents.push({ role: 'model', parts });
}
else if (msg.role === 'tool') {
// Parse content back — could be JSON or plain text
let resultValue = msg.content;
try {
resultValue = JSON.parse(msg.content ?? 'null');
}
catch { /* keep as string */ }
contents.push({
role: 'user',
parts: [{
functionResponse: {
name: msg.name ?? 'tool',
response: { result: resultValue }
}
}]
});
}
else {
contents.push({ role: 'user', parts: [{ text: msg.content ?? '' }] });
}
}
return contents;
}
// ---------------------------------------------------------------------------
// Anthropic Vertex client
// Used for: claude-* models via Vertex AI (proper Anthropic Messages API)
// Handles tool_calls by converting to/from Anthropic's tool_use blocks.
// ---------------------------------------------------------------------------
class AnthropicVertexClient {
constructor(modelId, opts) {
// Strip the "anthropic/" prefix if present — the SDK uses bare model names
this.modelId = modelId.startsWith('anthropic/') ? modelId.slice(10) : modelId;
this.projectId = opts?.projectId ?? process.env.GCP_PROJECT_ID ?? 'master-ai-484822';
this.region = opts?.region ?? process.env.CLAUDE_REGION ?? 'us-east5';
}
buildClient() {
const b64Key = process.env.GCP_SA_KEY_BASE64;
if (b64Key) {
try {
const jsonStr = Buffer.from(b64Key, 'base64').toString('utf8');
const credentials = JSON.parse(jsonStr);
return new vertex_sdk_1.default({
projectId: this.projectId,
region: this.region,
googleAuth: new google_auth_library_1.GoogleAuth({ credentials, scopes: ['https://www.googleapis.com/auth/cloud-platform'] }),
});
}
catch {
console.warn('[llm] AnthropicVertex: SA key decode failed, falling back to metadata server');
}
}
return new vertex_sdk_1.default({ projectId: this.projectId, region: this.region });
}
async chat(messages, tools, maxTokens = 8192) {
const client = this.buildClient();
const system = messages.find(m => m.role === 'system')?.content ?? undefined;
const nonSystem = messages.filter(m => m.role !== 'system');
// Convert OpenAI message format → Anthropic format
const anthropicMessages = nonSystem.map(m => {
if (m.role === 'assistant') {
const parts = [];
if (m.content)
parts.push({ type: 'text', text: m.content });
for (const tc of m.tool_calls ?? []) {
parts.push({
type: 'tool_use',
id: tc.id,
name: tc.function.name,
input: JSON.parse(tc.function.arguments || '{}'),
});
}
return { role: 'assistant', content: parts.length === 1 && parts[0].type === 'text' ? parts[0].text : parts };
}
if (m.role === 'tool') {
return {
role: 'user',
content: [{ type: 'tool_result', tool_use_id: m.tool_call_id, content: m.content ?? '' }],
};
}
return { role: 'user', content: m.content ?? '' };
});
const anthropicTools = (tools ?? []).map(t => ({
name: t.function.name,
description: t.function.description,
input_schema: t.function.parameters,
}));
const MAX_RETRIES = 4;
const RETRY_STATUSES = new Set([429, 503]);
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
try {
const response = await client.messages.create({
model: this.modelId,
max_tokens: maxTokens,
system: system ?? undefined,
messages: anthropicMessages,
tools: anthropicTools.length > 0 ? anthropicTools : undefined,
});
const textContent = response.content
.filter((b) => b.type === 'text')
.map((b) => b.text)
.join('') || null;
const tool_calls = response.content
.filter((b) => b.type === 'tool_use')
.map((b) => ({
id: b.id,
type: 'function',
function: { name: b.name, arguments: JSON.stringify(b.input ?? {}) },
}));
return {
content: textContent,
reasoning: null,
tool_calls,
finish_reason: response.stop_reason === 'tool_use' ? 'tool_calls' : 'stop',
usage: response.usage
? { prompt_tokens: response.usage.input_tokens, completion_tokens: response.usage.output_tokens, total_tokens: response.usage.input_tokens + response.usage.output_tokens }
: undefined,
};
}
catch (err) {
const status = err?.status ?? err?.statusCode ?? 0;
if (RETRY_STATUSES.has(status) && attempt < MAX_RETRIES) {
const waitMs = Math.min(2 ** attempt * 2000 + Math.random() * 500, 30000);
console.warn(`[llm] Anthropic Vertex ${status} on attempt ${attempt + 1}/${MAX_RETRIES + 1} — retrying in ${Math.round(waitMs / 1000)}s`);
await new Promise(r => setTimeout(r, waitMs));
continue;
}
throw new Error(`Anthropic Vertex error: ${err?.message ?? String(err)}`);
}
}
throw new Error('Anthropic Vertex: exceeded max retries');
}
}
exports.AnthropicVertexClient = AnthropicVertexClient;
const TIER_MODELS = {
A: process.env.TIER_A_MODEL ?? 'gemini-2.5-flash',
B: process.env.TIER_B_MODEL ?? 'claude-sonnet-4-6',
C: process.env.TIER_C_MODEL ?? 'claude-sonnet-4-6'
};
function createLLM(modelOrTier, opts) {
const modelId = (modelOrTier === 'A' || modelOrTier === 'B' || modelOrTier === 'C')
? TIER_MODELS[modelOrTier]
: modelOrTier;
if (modelId.startsWith('gemini-')) {
return new GeminiClient(modelId, opts);
}
if (modelId.startsWith('anthropic/') || modelId.startsWith('claude-')) {
return new AnthropicVertexClient(modelId);
}
return new VertexOpenAIClient(modelId, { temperature: opts?.temperature });
}
// ---------------------------------------------------------------------------
// Helper — convert our ToolDefinition[] → LLMTool[] (OpenAI format)
// ---------------------------------------------------------------------------
function toOAITools(tools) {
return tools.map(t => ({
type: 'function',
function: {
name: t.name,
description: t.description,
parameters: t.parameters
}
}));
}