"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.GeminiClient = exports.VertexOpenAIClient = void 0; exports.createLLM = createLLM; exports.toOAITools = toOAITools; const google_auth_library_1 = require("google-auth-library"); const genai_1 = require("@google/genai"); const uuid_1 = require("uuid"); // --------------------------------------------------------------------------- // Vertex AI OpenAI-compatible client // Used for: zai-org/glm-5-maas, anthropic/claude-sonnet-4-6, etc. // --------------------------------------------------------------------------- let _cachedToken = ''; let _tokenExpiry = 0; // Build GoogleAuth with explicit service account credentials when available. // GCP_SA_KEY_BASE64: base64-encoded service account JSON key — safe to pass as // an env var since it contains no newlines or special shell characters. // Falls back to the GCP metadata server (works on VMs with correct scopes). function buildGoogleAuth() { const b64Key = process.env.GCP_SA_KEY_BASE64; if (b64Key) { try { const jsonStr = Buffer.from(b64Key, 'base64').toString('utf8'); const credentials = JSON.parse(jsonStr); return new google_auth_library_1.GoogleAuth({ credentials, scopes: ['https://www.googleapis.com/auth/cloud-platform'] }); } catch { console.warn('[llm] GCP_SA_KEY_BASE64 is set but failed to decode/parse — falling back to metadata server'); } } return new google_auth_library_1.GoogleAuth({ scopes: ['https://www.googleapis.com/auth/cloud-platform'] }); } const _googleAuth = buildGoogleAuth(); async function getVertexToken() { const now = Date.now(); if (_cachedToken && now < _tokenExpiry) return _cachedToken; const client = await _googleAuth.getClient(); const tokenResponse = await client.getAccessToken(); _cachedToken = tokenResponse.token; _tokenExpiry = now + 55 * 60 * 1000; // tokens last 1hr, refresh at 55min return _cachedToken; } class VertexOpenAIClient { constructor(modelId, opts) { this.modelId = modelId; this.projectId = opts?.projectId ?? process.env.GCP_PROJECT_ID ?? 'master-ai-484822'; this.region = opts?.region ?? 'global'; this.temperature = opts?.temperature ?? 0.3; } async chat(messages, tools, maxTokens = 4096) { const token = await getVertexToken(); const base = this.region === 'global' ? 'https://aiplatform.googleapis.com' : `https://${this.region}-aiplatform.googleapis.com`; const url = `${base}/v1/projects/${this.projectId}/locations/${this.region}/endpoints/openapi/chat/completions`; const body = { model: this.modelId, messages, max_tokens: maxTokens, temperature: this.temperature, stream: false }; if (tools && tools.length > 0) { body.tools = tools; body.tool_choice = 'auto'; } const res = await fetch(url, { method: 'POST', headers: { 'Authorization': `Bearer ${token}`, 'Content-Type': 'application/json' }, body: JSON.stringify(body) }); if (!res.ok) { const errText = await res.text(); // Force token refresh on 401 if (res.status === 401) _tokenExpiry = 0; throw new Error(`Vertex API ${res.status}: ${errText.slice(0, 400)}`); } const data = await res.json(); const choice = data.choices?.[0]; const message = choice?.message ?? {}; return { content: message.content ?? null, reasoning: message.reasoning_content ?? null, tool_calls: message.tool_calls ?? [], finish_reason: choice?.finish_reason ?? 'stop', usage: data.usage }; } } exports.VertexOpenAIClient = VertexOpenAIClient; // --------------------------------------------------------------------------- // Gemini client via @google/genai SDK // Used for: Tier A (fast/cheap routing, summaries, log parsing) // Converts to/from OpenAI message format internally. // --------------------------------------------------------------------------- class GeminiClient { constructor(modelId = 'gemini-2.5-flash', opts) { this.modelId = modelId; this.temperature = opts?.temperature ?? 0.2; } async chat(messages, tools, maxTokens = 8192) { const apiKey = process.env.GOOGLE_API_KEY; if (!apiKey) throw new Error('GOOGLE_API_KEY not set'); const genai = new genai_1.GoogleGenAI({ apiKey }); const systemMsg = messages.find(m => m.role === 'system'); const nonSystem = messages.filter(m => m.role !== 'system'); const functionDeclarations = (tools ?? []).map(t => ({ name: t.function.name, description: t.function.description, parameters: t.function.parameters })); const response = await genai.models.generateContent({ model: this.modelId, contents: toGeminiContents(nonSystem), config: { systemInstruction: systemMsg?.content ?? undefined, tools: functionDeclarations.length > 0 ? [{ functionDeclarations }] : undefined, temperature: this.temperature, maxOutputTokens: maxTokens } }); const candidate = response.candidates?.[0]; if (!candidate) throw new Error('No response from Gemini'); const parts = candidate.content?.parts ?? []; const textContent = parts.filter(p => p.text).map(p => p.text).join('') || null; const fnCalls = parts.filter(p => p.functionCall); const tool_calls = fnCalls.map(p => ({ id: `call_${(0, uuid_1.v4)().replace(/-/g, '').slice(0, 12)}`, type: 'function', function: { name: p.functionCall.name ?? '', arguments: JSON.stringify(p.functionCall.args ?? {}) } })); return { content: textContent, reasoning: null, tool_calls, finish_reason: fnCalls.length > 0 ? 'tool_calls' : 'stop' }; } } exports.GeminiClient = GeminiClient; /** Convert OpenAI message format → Gemini Content[] format */ function toGeminiContents(messages) { const contents = []; for (const msg of messages) { if (msg.role === 'assistant') { const parts = []; if (msg.content) parts.push({ text: msg.content }); for (const tc of msg.tool_calls ?? []) { parts.push({ functionCall: { name: tc.function.name, args: JSON.parse(tc.function.arguments || '{}') } }); } contents.push({ role: 'model', parts }); } else if (msg.role === 'tool') { // Parse content back — could be JSON or plain text let resultValue = msg.content; try { resultValue = JSON.parse(msg.content ?? 'null'); } catch { /* keep as string */ } contents.push({ role: 'user', parts: [{ functionResponse: { name: msg.name ?? 'tool', response: { result: resultValue } } }] }); } else { contents.push({ role: 'user', parts: [{ text: msg.content ?? '' }] }); } } return contents; } const TIER_MODELS = { A: process.env.TIER_A_MODEL ?? 'gemini-2.5-flash', B: process.env.TIER_B_MODEL ?? 'zai-org/glm-5-maas', C: process.env.TIER_C_MODEL ?? 'zai-org/glm-5-maas' }; function createLLM(modelOrTier, opts) { const modelId = (modelOrTier === 'A' || modelOrTier === 'B' || modelOrTier === 'C') ? TIER_MODELS[modelOrTier] : modelOrTier; if (modelId.startsWith('gemini-')) { return new GeminiClient(modelId, opts); } return new VertexOpenAIClient(modelId, { temperature: opts?.temperature }); } // --------------------------------------------------------------------------- // Helper — convert our ToolDefinition[] → LLMTool[] (OpenAI format) // --------------------------------------------------------------------------- function toOAITools(tools) { return tools.map(t => ({ type: 'function', function: { name: t.name, description: t.description, parameters: t.parameters } })); }