vibn-frontend/vibn-agent-runner/dist/llm.js

"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
    return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.AnthropicVertexClient = exports.GeminiClient = exports.VertexOpenAIClient = void 0;
exports.createLLM = createLLM;
exports.toOAITools = toOAITools;
const google_auth_library_1 = require("google-auth-library");
const genai_1 = require("@google/genai");
const vertex_sdk_1 = __importDefault(require("@anthropic-ai/vertex-sdk"));
const uuid_1 = require("uuid");
/**
 * Strips DeepSeek-specific XML tags like <tool_calls> and <think> from content
 * so it doesn't leak into the model's history and cause subsequent hallucinations.
 */
function stripModelMarkup(text) {
    if (!text)
        return null;
    return (text
        .replace(/<tool_calls>[\s\S]*?<\/tool_calls>/g, "")
        .replace(/<think>[\s\S]*?<\/think>/g, "")
        .trim() || null);
}
// ---------------------------------------------------------------------------
// Vertex AI OpenAI-compatible client
// Used for: zai-org/glm-5-maas, anthropic/claude-sonnet-4-6, etc.
// ---------------------------------------------------------------------------
let _cachedToken = "";
let _tokenExpiry = 0;
// Build GoogleAuth with explicit service account credentials when available.
// GCP_SA_KEY_BASE64: base64-encoded service account JSON key — safe to pass as
// an env var since it contains no newlines or special shell characters.
// Falls back to the GCP metadata server (works on VMs with correct scopes).
function buildGoogleAuth() {
    const b64Key = process.env.GCP_SA_KEY_BASE64;
    if (b64Key) {
        try {
            const jsonStr = Buffer.from(b64Key, "base64").toString("utf8");
            const credentials = JSON.parse(jsonStr);
            return new google_auth_library_1.GoogleAuth({
                credentials,
                scopes: ["https://www.googleapis.com/auth/cloud-platform"],
            });
        }
        catch {
            console.warn("[llm] GCP_SA_KEY_BASE64 is set but failed to decode/parse — falling back to metadata server");
        }
    }
    return new google_auth_library_1.GoogleAuth({
        scopes: ["https://www.googleapis.com/auth/cloud-platform"],
    });
}
const _googleAuth = buildGoogleAuth();
async function getVertexToken() {
    const now = Date.now();
    if (_cachedToken && now < _tokenExpiry)
        return _cachedToken;
    const client = await _googleAuth.getClient();
    const tokenResponse = await client.getAccessToken();
    _cachedToken = tokenResponse.token;
    _tokenExpiry = now + 55 * 60 * 1000; // tokens last 1hr, refresh at 55min
    return _cachedToken;
}
class VertexOpenAIClient {
    constructor(modelId, opts) {
        this.modelId = modelId;
        this.projectId =
            opts?.projectId ?? process.env.GCP_PROJECT_ID ?? "master-ai-484822";
        this.region = opts?.region ?? "global";
        this.temperature = opts?.temperature ?? 0.3;
    }
    async chat(messages, tools, maxTokens = 4096) {
        const base = this.region === "global"
            ? "https://aiplatform.googleapis.com"
            : `https://${this.region}-aiplatform.googleapis.com`;
        const url = `${base}/v1/projects/${this.projectId}/locations/${this.region}/endpoints/openapi/chat/completions`;
        const body = {
            model: this.modelId,
            messages,
            max_tokens: maxTokens,
            temperature: this.temperature,
            stream: false,
        };
        if (tools && tools.length > 0) {
            body.tools = tools;
            body.tool_choice = "auto";
        }
        // Retry with exponential backoff on 429 / 503 (rate limit / overload)
        const MAX_RETRIES = 4;
        const RETRY_STATUSES = new Set([429, 503]);
        for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
            const token = await getVertexToken();
            const res = await fetch(url, {
                method: "POST",
                headers: {
                    Authorization: `Bearer ${token}`,
                    "Content-Type": "application/json",
                },
                body: JSON.stringify(body),
            });
            if (res.ok) {
                const data = (await res.json());
                const choice = data.choices?.[0];
                const message = choice?.message ?? {};
                return {
                    content: stripModelMarkup(message.content),
                    reasoning: stripModelMarkup(message.reasoning_content),
                    tool_calls: message.tool_calls ?? [],
                    finish_reason: choice?.finish_reason ?? "stop",
                    usage: data.usage,
                };
            }
            const errText = await res.text();
            // Force token refresh on 401
            if (res.status === 401)
                _tokenExpiry = 0;
            if (RETRY_STATUSES.has(res.status) && attempt < MAX_RETRIES) {
                // Check for Retry-After header, otherwise use exponential backoff
                const retryAfter = res.headers.get("retry-after");
                const waitMs = retryAfter
                    ? Math.min(parseInt(retryAfter, 10) * 1000, 60000)
                    : Math.min(2 ** attempt * 2000 + Math.random() * 500, 30000);
                console.warn(`[llm] Vertex ${res.status} on attempt ${attempt + 1}/${MAX_RETRIES + 1} — retrying in ${Math.round(waitMs / 1000)}s`);
                await new Promise((r) => setTimeout(r, waitMs));
                continue;
            }
            throw new Error(`Vertex API ${res.status}: ${errText.slice(0, 400)}`);
        }
        // TypeScript requires an explicit throw after the loop (unreachable in practice)
        throw new Error("Vertex API: exceeded max retries");
    }
}
exports.VertexOpenAIClient = VertexOpenAIClient;
// ---------------------------------------------------------------------------
// Gemini client via @google/genai SDK
// Used for: Tier A (fast/cheap routing, summaries, log parsing)
// Converts to/from OpenAI message format internally.
// ---------------------------------------------------------------------------
class GeminiClient {
    constructor(modelId = "gemini-3.1-pro-preview", opts) {
        this.modelId = modelId;
        this.temperature = opts?.temperature ?? 0.2;
    }
    async chat(messages, tools, maxTokens = 8192) {
        const apiKey = process.env.GOOGLE_API_KEY;
        if (!apiKey)
            throw new Error("GOOGLE_API_KEY not set");
        const genai = new genai_1.GoogleGenAI({ apiKey });
        const systemMsg = messages.find((m) => m.role === "system");
        const nonSystem = messages.filter((m) => m.role !== "system");
        const functionDeclarations = (tools ?? []).map((t) => ({
            name: t.function.name,
            description: t.function.description,
            parameters: t.function.parameters,
        }));
        const response = await genai.models.generateContent({
            model: this.modelId,
            contents: toGeminiContents(nonSystem),
            config: {
                systemInstruction: systemMsg?.content ?? undefined,
                tools: functionDeclarations.length > 0
                    ? [{ functionDeclarations }]
                    : undefined,
                temperature: this.temperature,
                maxOutputTokens: maxTokens,
            },
        });
        const candidate = response.candidates?.[0];
        if (!candidate)
            throw new Error("No response from Gemini");
        const parts = candidate.content?.parts ?? [];
        const textContent = parts
            .filter((p) => p.text)
            .map((p) => p.text)
            .join("") || null;
        const fnCalls = parts.filter((p) => p.functionCall);
        const tool_calls = fnCalls.map((p) => ({
            id: `call_${(0, uuid_1.v4)().replace(/-/g, "").slice(0, 12)}`,
            type: "function",
            function: {
                name: p.functionCall.name ?? "",
                arguments: JSON.stringify(p.functionCall.args ?? {}),
            },
        }));
        return {
            content: stripModelMarkup(textContent),
            reasoning: null,
            tool_calls,
            finish_reason: fnCalls.length > 0 ? "tool_calls" : "stop",
        };
    }
}
exports.GeminiClient = GeminiClient;
/** Convert OpenAI message format → Gemini Content[] format */
function toGeminiContents(messages) {
    const contents = [];
    for (const msg of messages) {
        if (msg.role === "assistant") {
            const parts = [];
            if (msg.content)
                parts.push({ text: msg.content });
            for (const tc of msg.tool_calls ?? []) {
                parts.push({
                    functionCall: {
                        name: tc.function.name,
                        args: JSON.parse(tc.function.arguments || "{}"),
                    },
                });
            }
            contents.push({ role: "model", parts });
        }
        else if (msg.role === "tool") {
            // Parse content back — could be JSON or plain text
            let resultValue = msg.content;
            try {
                resultValue = JSON.parse(msg.content ?? "null");
            }
            catch {
                /* keep as string */
            }
            contents.push({
                role: "user",
                parts: [
                    {
                        functionResponse: {
                            name: msg.name ?? "tool",
                            response: { result: resultValue },
                        },
                    },
                ],
            });
        }
        else {
            contents.push({ role: "user", parts: [{ text: msg.content ?? "" }] });
        }
    }
    return contents;
}
// ---------------------------------------------------------------------------
// Anthropic Vertex client
// Used for: claude-* models via Vertex AI (proper Anthropic Messages API)
// Handles tool_calls by converting to/from Anthropic's tool_use blocks.
// ---------------------------------------------------------------------------
class AnthropicVertexClient {
    constructor(modelId, opts) {
        // Strip the "anthropic/" prefix if present — the SDK uses bare model names
        this.modelId = modelId.startsWith("anthropic/")
            ? modelId.slice(10)
            : modelId;
        this.projectId =
            opts?.projectId ?? process.env.GCP_PROJECT_ID ?? "master-ai-484822";
        this.region = opts?.region ?? process.env.CLAUDE_REGION ?? "us-east5";
    }
    buildClient() {
        const b64Key = process.env.GCP_SA_KEY_BASE64;
        if (b64Key) {
            try {
                const jsonStr = Buffer.from(b64Key, "base64").toString("utf8");
                const credentials = JSON.parse(jsonStr);
                return new vertex_sdk_1.default({
                    projectId: this.projectId,
                    region: this.region,
                    googleAuth: new google_auth_library_1.GoogleAuth({
                        credentials,
                        scopes: ["https://www.googleapis.com/auth/cloud-platform"],
                    }),
                });
            }
            catch {
                console.warn("[llm] AnthropicVertex: SA key decode failed, falling back to metadata server");
            }
        }
        return new vertex_sdk_1.default({
            projectId: this.projectId,
            region: this.region,
        });
    }
    async chat(messages, tools, maxTokens = 8192) {
        const client = this.buildClient();
        const system = messages.find((m) => m.role === "system")?.content ?? undefined;
        const nonSystem = messages.filter((m) => m.role !== "system");
        // Convert OpenAI message format → Anthropic format
        const anthropicMessages = nonSystem.map((m) => {
            if (m.role === "assistant") {
                const parts = [];
                if (m.content)
                    parts.push({ type: "text", text: m.content });
                for (const tc of m.tool_calls ?? []) {
                    parts.push({
                        type: "tool_use",
                        id: tc.id,
                        name: tc.function.name,
                        input: JSON.parse(tc.function.arguments || "{}"),
                    });
                }
                return {
                    role: "assistant",
                    content: parts.length === 1 && parts[0].type === "text"
                        ? parts[0].text
                        : parts,
                };
            }
            if (m.role === "tool") {
                return {
                    role: "user",
                    content: [
                        {
                            type: "tool_result",
                            tool_use_id: m.tool_call_id,
                            content: m.content ?? "",
                        },
                    ],
                };
            }
            return { role: "user", content: m.content ?? "" };
        });
        const anthropicTools = (tools ?? []).map((t) => ({
            name: t.function.name,
            description: t.function.description,
            input_schema: t.function.parameters,
        }));
        const MAX_RETRIES = 4;
        const RETRY_STATUSES = new Set([429, 503]);
        for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
            try {
                const response = await client.messages.create({
                    model: this.modelId,
                    max_tokens: maxTokens,
                    system: system ?? undefined,
                    messages: anthropicMessages,
                    tools: anthropicTools.length > 0 ? anthropicTools : undefined,
                });
                const textContent = response.content
                    .filter((b) => b.type === "text")
                    .map((b) => b.text)
                    .join("") || null;
                const tool_calls = response.content
                    .filter((b) => b.type === "tool_use")
                    .map((b) => ({
                    id: b.id,
                    type: "function",
                    function: {
                        name: b.name,
                        arguments: JSON.stringify(b.input ?? {}),
                    },
                }));
                return {
                    content: stripModelMarkup(textContent),
                    reasoning: null,
                    tool_calls,
                    finish_reason: response.stop_reason === "tool_use" ? "tool_calls" : "stop",
                    usage: response.usage
                        ? {
                            prompt_tokens: response.usage.input_tokens,
                            completion_tokens: response.usage.output_tokens,
                            total_tokens: response.usage.input_tokens + response.usage.output_tokens,
                        }
                        : undefined,
                };
            }
            catch (err) {
                const status = err?.status ?? err?.statusCode ?? 0;
                if (RETRY_STATUSES.has(status) && attempt < MAX_RETRIES) {
                    const waitMs = Math.min(2 ** attempt * 2000 + Math.random() * 500, 30000);
                    console.warn(`[llm] Anthropic Vertex ${status} on attempt ${attempt + 1}/${MAX_RETRIES + 1} — retrying in ${Math.round(waitMs / 1000)}s`);
                    await new Promise((r) => setTimeout(r, waitMs));
                    continue;
                }
                throw new Error(`Anthropic Vertex error: ${err?.message ?? String(err)}`);
            }
        }
        throw new Error("Anthropic Vertex: exceeded max retries");
    }
}
exports.AnthropicVertexClient = AnthropicVertexClient;
const TIER_MODELS = {
    A: process.env.TIER_A_MODEL ?? "gemini-3.1-pro-preview",
    B: process.env.TIER_B_MODEL ?? "claude-sonnet-4-6",
    C: process.env.TIER_C_MODEL ?? "claude-sonnet-4-6",
};
function createLLM(modelOrTier, opts) {
    const modelId = modelOrTier === "A" || modelOrTier === "B" || modelOrTier === "C"
        ? TIER_MODELS[modelOrTier]
        : modelOrTier;
    if (modelId.startsWith("gemini-")) {
        return new GeminiClient(modelId, opts);
    }
    if (modelId.startsWith("anthropic/") || modelId.startsWith("claude-")) {
        return new AnthropicVertexClient(modelId);
    }
    return new VertexOpenAIClient(modelId, { temperature: opts?.temperature });
}
// ---------------------------------------------------------------------------
// Helper — convert our ToolDefinition[] → LLMTool[] (OpenAI format)
// ---------------------------------------------------------------------------
function toOAITools(tools) {
    return tools.map((t) => ({
        type: "function",
        function: {
            name: t.name,
            description: t.description,
            parameters: t.parameters,
        },
    }));
}