"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.AnthropicVertexClient = exports.GeminiClient = exports.VertexOpenAIClient = void 0; exports.createLLM = createLLM; exports.toOAITools = toOAITools; const google_auth_library_1 = require("google-auth-library"); const genai_1 = require("@google/genai"); const vertex_sdk_1 = __importDefault(require("@anthropic-ai/vertex-sdk")); const uuid_1 = require("uuid"); // --------------------------------------------------------------------------- // Vertex AI OpenAI-compatible client // Used for: zai-org/glm-5-maas, anthropic/claude-sonnet-4-6, etc. // --------------------------------------------------------------------------- let _cachedToken = ''; let _tokenExpiry = 0; // Build GoogleAuth with explicit service account credentials when available. // GCP_SA_KEY_BASE64: base64-encoded service account JSON key — safe to pass as // an env var since it contains no newlines or special shell characters. // Falls back to the GCP metadata server (works on VMs with correct scopes). function buildGoogleAuth() { const b64Key = process.env.GCP_SA_KEY_BASE64; if (b64Key) { try { const jsonStr = Buffer.from(b64Key, 'base64').toString('utf8'); const credentials = JSON.parse(jsonStr); return new google_auth_library_1.GoogleAuth({ credentials, scopes: ['https://www.googleapis.com/auth/cloud-platform'] }); } catch { console.warn('[llm] GCP_SA_KEY_BASE64 is set but failed to decode/parse — falling back to metadata server'); } } return new google_auth_library_1.GoogleAuth({ scopes: ['https://www.googleapis.com/auth/cloud-platform'] }); } const _googleAuth = buildGoogleAuth(); async function getVertexToken() { const now = Date.now(); if (_cachedToken && now < _tokenExpiry) return _cachedToken; const client = await _googleAuth.getClient(); const tokenResponse = await client.getAccessToken(); _cachedToken = tokenResponse.token; _tokenExpiry = now + 55 * 60 * 1000; // tokens last 1hr, refresh at 55min return _cachedToken; } class VertexOpenAIClient { constructor(modelId, opts) { this.modelId = modelId; this.projectId = opts?.projectId ?? process.env.GCP_PROJECT_ID ?? 'master-ai-484822'; this.region = opts?.region ?? 'global'; this.temperature = opts?.temperature ?? 0.3; } async chat(messages, tools, maxTokens = 4096) { const base = this.region === 'global' ? 'https://aiplatform.googleapis.com' : `https://${this.region}-aiplatform.googleapis.com`; const url = `${base}/v1/projects/${this.projectId}/locations/${this.region}/endpoints/openapi/chat/completions`; const body = { model: this.modelId, messages, max_tokens: maxTokens, temperature: this.temperature, stream: false }; if (tools && tools.length > 0) { body.tools = tools; body.tool_choice = 'auto'; } // Retry with exponential backoff on 429 / 503 (rate limit / overload) const MAX_RETRIES = 4; const RETRY_STATUSES = new Set([429, 503]); for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { const token = await getVertexToken(); const res = await fetch(url, { method: 'POST', headers: { 'Authorization': `Bearer ${token}`, 'Content-Type': 'application/json' }, body: JSON.stringify(body) }); if (res.ok) { const data = await res.json(); const choice = data.choices?.[0]; const message = choice?.message ?? {}; return { content: message.content ?? null, reasoning: message.reasoning_content ?? null, tool_calls: message.tool_calls ?? [], finish_reason: choice?.finish_reason ?? 'stop', usage: data.usage }; } const errText = await res.text(); // Force token refresh on 401 if (res.status === 401) _tokenExpiry = 0; if (RETRY_STATUSES.has(res.status) && attempt < MAX_RETRIES) { // Check for Retry-After header, otherwise use exponential backoff const retryAfter = res.headers.get('retry-after'); const waitMs = retryAfter ? Math.min(parseInt(retryAfter, 10) * 1000, 60000) : Math.min(2 ** attempt * 2000 + Math.random() * 500, 30000); console.warn(`[llm] Vertex ${res.status} on attempt ${attempt + 1}/${MAX_RETRIES + 1} — retrying in ${Math.round(waitMs / 1000)}s`); await new Promise(r => setTimeout(r, waitMs)); continue; } throw new Error(`Vertex API ${res.status}: ${errText.slice(0, 400)}`); } // TypeScript requires an explicit throw after the loop (unreachable in practice) throw new Error('Vertex API: exceeded max retries'); } } exports.VertexOpenAIClient = VertexOpenAIClient; // --------------------------------------------------------------------------- // Gemini client via @google/genai SDK // Used for: Tier A (fast/cheap routing, summaries, log parsing) // Converts to/from OpenAI message format internally. // --------------------------------------------------------------------------- class GeminiClient { constructor(modelId = 'gemini-2.5-flash', opts) { this.modelId = modelId; this.temperature = opts?.temperature ?? 0.2; } async chat(messages, tools, maxTokens = 8192) { const apiKey = process.env.GOOGLE_API_KEY; if (!apiKey) throw new Error('GOOGLE_API_KEY not set'); const genai = new genai_1.GoogleGenAI({ apiKey }); const systemMsg = messages.find(m => m.role === 'system'); const nonSystem = messages.filter(m => m.role !== 'system'); const functionDeclarations = (tools ?? []).map(t => ({ name: t.function.name, description: t.function.description, parameters: t.function.parameters })); const response = await genai.models.generateContent({ model: this.modelId, contents: toGeminiContents(nonSystem), config: { systemInstruction: systemMsg?.content ?? undefined, tools: functionDeclarations.length > 0 ? [{ functionDeclarations }] : undefined, temperature: this.temperature, maxOutputTokens: maxTokens } }); const candidate = response.candidates?.[0]; if (!candidate) throw new Error('No response from Gemini'); const parts = candidate.content?.parts ?? []; const textContent = parts.filter(p => p.text).map(p => p.text).join('') || null; const fnCalls = parts.filter(p => p.functionCall); const tool_calls = fnCalls.map(p => ({ id: `call_${(0, uuid_1.v4)().replace(/-/g, '').slice(0, 12)}`, type: 'function', function: { name: p.functionCall.name ?? '', arguments: JSON.stringify(p.functionCall.args ?? {}) } })); return { content: textContent, reasoning: null, tool_calls, finish_reason: fnCalls.length > 0 ? 'tool_calls' : 'stop' }; } } exports.GeminiClient = GeminiClient; /** Convert OpenAI message format → Gemini Content[] format */ function toGeminiContents(messages) { const contents = []; for (const msg of messages) { if (msg.role === 'assistant') { const parts = []; if (msg.content) parts.push({ text: msg.content }); for (const tc of msg.tool_calls ?? []) { parts.push({ functionCall: { name: tc.function.name, args: JSON.parse(tc.function.arguments || '{}') } }); } contents.push({ role: 'model', parts }); } else if (msg.role === 'tool') { // Parse content back — could be JSON or plain text let resultValue = msg.content; try { resultValue = JSON.parse(msg.content ?? 'null'); } catch { /* keep as string */ } contents.push({ role: 'user', parts: [{ functionResponse: { name: msg.name ?? 'tool', response: { result: resultValue } } }] }); } else { contents.push({ role: 'user', parts: [{ text: msg.content ?? '' }] }); } } return contents; } // --------------------------------------------------------------------------- // Anthropic Vertex client // Used for: claude-* models via Vertex AI (proper Anthropic Messages API) // Handles tool_calls by converting to/from Anthropic's tool_use blocks. // --------------------------------------------------------------------------- class AnthropicVertexClient { constructor(modelId, opts) { // Strip the "anthropic/" prefix if present — the SDK uses bare model names this.modelId = modelId.startsWith('anthropic/') ? modelId.slice(10) : modelId; this.projectId = opts?.projectId ?? process.env.GCP_PROJECT_ID ?? 'master-ai-484822'; this.region = opts?.region ?? process.env.CLAUDE_REGION ?? 'us-east5'; } buildClient() { const b64Key = process.env.GCP_SA_KEY_BASE64; if (b64Key) { try { const jsonStr = Buffer.from(b64Key, 'base64').toString('utf8'); const credentials = JSON.parse(jsonStr); return new vertex_sdk_1.default({ projectId: this.projectId, region: this.region, googleAuth: new google_auth_library_1.GoogleAuth({ credentials, scopes: ['https://www.googleapis.com/auth/cloud-platform'] }), }); } catch { console.warn('[llm] AnthropicVertex: SA key decode failed, falling back to metadata server'); } } return new vertex_sdk_1.default({ projectId: this.projectId, region: this.region }); } async chat(messages, tools, maxTokens = 8192) { const client = this.buildClient(); const system = messages.find(m => m.role === 'system')?.content ?? undefined; const nonSystem = messages.filter(m => m.role !== 'system'); // Convert OpenAI message format → Anthropic format const anthropicMessages = nonSystem.map(m => { if (m.role === 'assistant') { const parts = []; if (m.content) parts.push({ type: 'text', text: m.content }); for (const tc of m.tool_calls ?? []) { parts.push({ type: 'tool_use', id: tc.id, name: tc.function.name, input: JSON.parse(tc.function.arguments || '{}'), }); } return { role: 'assistant', content: parts.length === 1 && parts[0].type === 'text' ? parts[0].text : parts }; } if (m.role === 'tool') { return { role: 'user', content: [{ type: 'tool_result', tool_use_id: m.tool_call_id, content: m.content ?? '' }], }; } return { role: 'user', content: m.content ?? '' }; }); const anthropicTools = (tools ?? []).map(t => ({ name: t.function.name, description: t.function.description, input_schema: t.function.parameters, })); const MAX_RETRIES = 4; const RETRY_STATUSES = new Set([429, 503]); for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { try { const response = await client.messages.create({ model: this.modelId, max_tokens: maxTokens, system: system ?? undefined, messages: anthropicMessages, tools: anthropicTools.length > 0 ? anthropicTools : undefined, }); const textContent = response.content .filter((b) => b.type === 'text') .map((b) => b.text) .join('') || null; const tool_calls = response.content .filter((b) => b.type === 'tool_use') .map((b) => ({ id: b.id, type: 'function', function: { name: b.name, arguments: JSON.stringify(b.input ?? {}) }, })); return { content: textContent, reasoning: null, tool_calls, finish_reason: response.stop_reason === 'tool_use' ? 'tool_calls' : 'stop', usage: response.usage ? { prompt_tokens: response.usage.input_tokens, completion_tokens: response.usage.output_tokens, total_tokens: response.usage.input_tokens + response.usage.output_tokens } : undefined, }; } catch (err) { const status = err?.status ?? err?.statusCode ?? 0; if (RETRY_STATUSES.has(status) && attempt < MAX_RETRIES) { const waitMs = Math.min(2 ** attempt * 2000 + Math.random() * 500, 30000); console.warn(`[llm] Anthropic Vertex ${status} on attempt ${attempt + 1}/${MAX_RETRIES + 1} — retrying in ${Math.round(waitMs / 1000)}s`); await new Promise(r => setTimeout(r, waitMs)); continue; } throw new Error(`Anthropic Vertex error: ${err?.message ?? String(err)}`); } } throw new Error('Anthropic Vertex: exceeded max retries'); } } exports.AnthropicVertexClient = AnthropicVertexClient; const TIER_MODELS = { A: process.env.TIER_A_MODEL ?? 'gemini-2.5-flash', B: process.env.TIER_B_MODEL ?? 'claude-sonnet-4-6', C: process.env.TIER_C_MODEL ?? 'claude-sonnet-4-6' }; function createLLM(modelOrTier, opts) { const modelId = (modelOrTier === 'A' || modelOrTier === 'B' || modelOrTier === 'C') ? TIER_MODELS[modelOrTier] : modelOrTier; if (modelId.startsWith('gemini-')) { return new GeminiClient(modelId, opts); } if (modelId.startsWith('anthropic/') || modelId.startsWith('claude-')) { return new AnthropicVertexClient(modelId); } return new VertexOpenAIClient(modelId, { temperature: opts?.temperature }); } // --------------------------------------------------------------------------- // Helper — convert our ToolDefinition[] → LLMTool[] (OpenAI format) // --------------------------------------------------------------------------- function toOAITools(tools) { return tools.map(t => ({ type: 'function', function: { name: t.name, description: t.description, parameters: t.parameters } })); }