"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.AnthropicVertexClient = exports.GeminiClient = exports.VertexOpenAIClient = void 0; exports.createLLM = createLLM; exports.toOAITools = toOAITools; const google_auth_library_1 = require("google-auth-library"); const genai_1 = require("@google/genai"); const vertex_sdk_1 = __importDefault(require("@anthropic-ai/vertex-sdk")); const uuid_1 = require("uuid"); /** * Strips DeepSeek-specific XML tags like and from content * so it doesn't leak into the model's history and cause subsequent hallucinations. */ function stripModelMarkup(text) { if (!text) return null; return (text .replace(/[\s\S]*?<\/tool_calls>/g, "") .replace(/[\s\S]*?<\/think>/g, "") .trim() || null); } // --------------------------------------------------------------------------- // Vertex AI OpenAI-compatible client // Used for: zai-org/glm-5-maas, anthropic/claude-sonnet-4-6, etc. // --------------------------------------------------------------------------- let _cachedToken = ""; let _tokenExpiry = 0; // Build GoogleAuth with explicit service account credentials when available. // GCP_SA_KEY_BASE64: base64-encoded service account JSON key — safe to pass as // an env var since it contains no newlines or special shell characters. // Falls back to the GCP metadata server (works on VMs with correct scopes). function buildGoogleAuth() { const b64Key = process.env.GCP_SA_KEY_BASE64; if (b64Key) { try { const jsonStr = Buffer.from(b64Key, "base64").toString("utf8"); const credentials = JSON.parse(jsonStr); return new google_auth_library_1.GoogleAuth({ credentials, scopes: ["https://www.googleapis.com/auth/cloud-platform"], }); } catch { console.warn("[llm] GCP_SA_KEY_BASE64 is set but failed to decode/parse — falling back to metadata server"); } } return new google_auth_library_1.GoogleAuth({ scopes: ["https://www.googleapis.com/auth/cloud-platform"], }); } const _googleAuth = buildGoogleAuth(); async function getVertexToken() { const now = Date.now(); if (_cachedToken && now < _tokenExpiry) return _cachedToken; const client = await _googleAuth.getClient(); const tokenResponse = await client.getAccessToken(); _cachedToken = tokenResponse.token; _tokenExpiry = now + 55 * 60 * 1000; // tokens last 1hr, refresh at 55min return _cachedToken; } class VertexOpenAIClient { constructor(modelId, opts) { this.modelId = modelId; this.projectId = opts?.projectId ?? process.env.GCP_PROJECT_ID ?? "master-ai-484822"; this.region = opts?.region ?? "global"; this.temperature = opts?.temperature ?? 0.3; } async chat(messages, tools, maxTokens = 4096) { const base = this.region === "global" ? "https://aiplatform.googleapis.com" : `https://${this.region}-aiplatform.googleapis.com`; const url = `${base}/v1/projects/${this.projectId}/locations/${this.region}/endpoints/openapi/chat/completions`; const body = { model: this.modelId, messages, max_tokens: maxTokens, temperature: this.temperature, stream: false, }; if (tools && tools.length > 0) { body.tools = tools; body.tool_choice = "auto"; } // Retry with exponential backoff on 429 / 503 (rate limit / overload) const MAX_RETRIES = 4; const RETRY_STATUSES = new Set([429, 503]); for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { const token = await getVertexToken(); const res = await fetch(url, { method: "POST", headers: { Authorization: `Bearer ${token}`, "Content-Type": "application/json", }, body: JSON.stringify(body), }); if (res.ok) { const data = (await res.json()); const choice = data.choices?.[0]; const message = choice?.message ?? {}; return { content: stripModelMarkup(message.content), reasoning: stripModelMarkup(message.reasoning_content), tool_calls: message.tool_calls ?? [], finish_reason: choice?.finish_reason ?? "stop", usage: data.usage, }; } const errText = await res.text(); // Force token refresh on 401 if (res.status === 401) _tokenExpiry = 0; if (RETRY_STATUSES.has(res.status) && attempt < MAX_RETRIES) { // Check for Retry-After header, otherwise use exponential backoff const retryAfter = res.headers.get("retry-after"); const waitMs = retryAfter ? Math.min(parseInt(retryAfter, 10) * 1000, 60000) : Math.min(2 ** attempt * 2000 + Math.random() * 500, 30000); console.warn(`[llm] Vertex ${res.status} on attempt ${attempt + 1}/${MAX_RETRIES + 1} — retrying in ${Math.round(waitMs / 1000)}s`); await new Promise((r) => setTimeout(r, waitMs)); continue; } throw new Error(`Vertex API ${res.status}: ${errText.slice(0, 400)}`); } // TypeScript requires an explicit throw after the loop (unreachable in practice) throw new Error("Vertex API: exceeded max retries"); } } exports.VertexOpenAIClient = VertexOpenAIClient; // --------------------------------------------------------------------------- // Gemini client via @google/genai SDK // Used for: Tier A (fast/cheap routing, summaries, log parsing) // Converts to/from OpenAI message format internally. // --------------------------------------------------------------------------- class GeminiClient { constructor(modelId = "gemini-3.1-pro-preview", opts) { this.modelId = modelId; this.temperature = opts?.temperature ?? 0.2; } async chat(messages, tools, maxTokens = 8192) { const apiKey = process.env.GOOGLE_API_KEY; if (!apiKey) throw new Error("GOOGLE_API_KEY not set"); const genai = new genai_1.GoogleGenAI({ apiKey }); const systemMsg = messages.find((m) => m.role === "system"); const nonSystem = messages.filter((m) => m.role !== "system"); const functionDeclarations = (tools ?? []).map((t) => ({ name: t.function.name, description: t.function.description, parameters: t.function.parameters, })); const response = await genai.models.generateContent({ model: this.modelId, contents: toGeminiContents(nonSystem), config: { systemInstruction: systemMsg?.content ?? undefined, tools: functionDeclarations.length > 0 ? [{ functionDeclarations }] : undefined, temperature: this.temperature, maxOutputTokens: maxTokens, }, }); const candidate = response.candidates?.[0]; if (!candidate) throw new Error("No response from Gemini"); const parts = candidate.content?.parts ?? []; const textContent = parts .filter((p) => p.text) .map((p) => p.text) .join("") || null; const fnCalls = parts.filter((p) => p.functionCall); const tool_calls = fnCalls.map((p) => ({ id: `call_${(0, uuid_1.v4)().replace(/-/g, "").slice(0, 12)}`, type: "function", function: { name: p.functionCall.name ?? "", arguments: JSON.stringify(p.functionCall.args ?? {}), }, })); return { content: stripModelMarkup(textContent), reasoning: null, tool_calls, finish_reason: fnCalls.length > 0 ? "tool_calls" : "stop", }; } } exports.GeminiClient = GeminiClient; /** Convert OpenAI message format → Gemini Content[] format */ function toGeminiContents(messages) { const contents = []; for (const msg of messages) { if (msg.role === "assistant") { const parts = []; if (msg.content) parts.push({ text: msg.content }); for (const tc of msg.tool_calls ?? []) { parts.push({ functionCall: { name: tc.function.name, args: JSON.parse(tc.function.arguments || "{}"), }, }); } contents.push({ role: "model", parts }); } else if (msg.role === "tool") { // Parse content back — could be JSON or plain text let resultValue = msg.content; try { resultValue = JSON.parse(msg.content ?? "null"); } catch { /* keep as string */ } contents.push({ role: "user", parts: [ { functionResponse: { name: msg.name ?? "tool", response: { result: resultValue }, }, }, ], }); } else { contents.push({ role: "user", parts: [{ text: msg.content ?? "" }] }); } } return contents; } // --------------------------------------------------------------------------- // Anthropic Vertex client // Used for: claude-* models via Vertex AI (proper Anthropic Messages API) // Handles tool_calls by converting to/from Anthropic's tool_use blocks. // --------------------------------------------------------------------------- class AnthropicVertexClient { constructor(modelId, opts) { // Strip the "anthropic/" prefix if present — the SDK uses bare model names this.modelId = modelId.startsWith("anthropic/") ? modelId.slice(10) : modelId; this.projectId = opts?.projectId ?? process.env.GCP_PROJECT_ID ?? "master-ai-484822"; this.region = opts?.region ?? process.env.CLAUDE_REGION ?? "us-east5"; } buildClient() { const b64Key = process.env.GCP_SA_KEY_BASE64; if (b64Key) { try { const jsonStr = Buffer.from(b64Key, "base64").toString("utf8"); const credentials = JSON.parse(jsonStr); return new vertex_sdk_1.default({ projectId: this.projectId, region: this.region, googleAuth: new google_auth_library_1.GoogleAuth({ credentials, scopes: ["https://www.googleapis.com/auth/cloud-platform"], }), }); } catch { console.warn("[llm] AnthropicVertex: SA key decode failed, falling back to metadata server"); } } return new vertex_sdk_1.default({ projectId: this.projectId, region: this.region, }); } async chat(messages, tools, maxTokens = 8192) { const client = this.buildClient(); const system = messages.find((m) => m.role === "system")?.content ?? undefined; const nonSystem = messages.filter((m) => m.role !== "system"); // Convert OpenAI message format → Anthropic format const anthropicMessages = nonSystem.map((m) => { if (m.role === "assistant") { const parts = []; if (m.content) parts.push({ type: "text", text: m.content }); for (const tc of m.tool_calls ?? []) { parts.push({ type: "tool_use", id: tc.id, name: tc.function.name, input: JSON.parse(tc.function.arguments || "{}"), }); } return { role: "assistant", content: parts.length === 1 && parts[0].type === "text" ? parts[0].text : parts, }; } if (m.role === "tool") { return { role: "user", content: [ { type: "tool_result", tool_use_id: m.tool_call_id, content: m.content ?? "", }, ], }; } return { role: "user", content: m.content ?? "" }; }); const anthropicTools = (tools ?? []).map((t) => ({ name: t.function.name, description: t.function.description, input_schema: t.function.parameters, })); const MAX_RETRIES = 4; const RETRY_STATUSES = new Set([429, 503]); for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { try { const response = await client.messages.create({ model: this.modelId, max_tokens: maxTokens, system: system ?? undefined, messages: anthropicMessages, tools: anthropicTools.length > 0 ? anthropicTools : undefined, }); const textContent = response.content .filter((b) => b.type === "text") .map((b) => b.text) .join("") || null; const tool_calls = response.content .filter((b) => b.type === "tool_use") .map((b) => ({ id: b.id, type: "function", function: { name: b.name, arguments: JSON.stringify(b.input ?? {}), }, })); return { content: stripModelMarkup(textContent), reasoning: null, tool_calls, finish_reason: response.stop_reason === "tool_use" ? "tool_calls" : "stop", usage: response.usage ? { prompt_tokens: response.usage.input_tokens, completion_tokens: response.usage.output_tokens, total_tokens: response.usage.input_tokens + response.usage.output_tokens, } : undefined, }; } catch (err) { const status = err?.status ?? err?.statusCode ?? 0; if (RETRY_STATUSES.has(status) && attempt < MAX_RETRIES) { const waitMs = Math.min(2 ** attempt * 2000 + Math.random() * 500, 30000); console.warn(`[llm] Anthropic Vertex ${status} on attempt ${attempt + 1}/${MAX_RETRIES + 1} — retrying in ${Math.round(waitMs / 1000)}s`); await new Promise((r) => setTimeout(r, waitMs)); continue; } throw new Error(`Anthropic Vertex error: ${err?.message ?? String(err)}`); } } throw new Error("Anthropic Vertex: exceeded max retries"); } } exports.AnthropicVertexClient = AnthropicVertexClient; const TIER_MODELS = { A: process.env.TIER_A_MODEL ?? "gemini-3.1-pro-preview", B: process.env.TIER_B_MODEL ?? "claude-sonnet-4-6", C: process.env.TIER_C_MODEL ?? "claude-sonnet-4-6", }; function createLLM(modelOrTier, opts) { const modelId = modelOrTier === "A" || modelOrTier === "B" || modelOrTier === "C" ? TIER_MODELS[modelOrTier] : modelOrTier; if (modelId.startsWith("gemini-")) { return new GeminiClient(modelId, opts); } if (modelId.startsWith("anthropic/") || modelId.startsWith("claude-")) { return new AnthropicVertexClient(modelId); } return new VertexOpenAIClient(modelId, { temperature: opts?.temperature }); } // --------------------------------------------------------------------------- // Helper — convert our ToolDefinition[] → LLMTool[] (OpenAI format) // --------------------------------------------------------------------------- function toOAITools(tools) { return tools.map((t) => ({ type: "function", function: { name: t.name, description: t.description, parameters: t.parameters, }, })); }