407 lines
17 KiB
JavaScript
407 lines
17 KiB
JavaScript
"use strict";
|
|
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
};
|
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
exports.AnthropicVertexClient = exports.GeminiClient = exports.VertexOpenAIClient = void 0;
|
|
exports.createLLM = createLLM;
|
|
exports.toOAITools = toOAITools;
|
|
const google_auth_library_1 = require("google-auth-library");
|
|
const genai_1 = require("@google/genai");
|
|
const vertex_sdk_1 = __importDefault(require("@anthropic-ai/vertex-sdk"));
|
|
const uuid_1 = require("uuid");
|
|
/**
|
|
* Strips DeepSeek-specific XML tags like <tool_calls> and <think> from content
|
|
* so it doesn't leak into the model's history and cause subsequent hallucinations.
|
|
*/
|
|
function stripModelMarkup(text) {
|
|
if (!text)
|
|
return null;
|
|
return (text
|
|
.replace(/<tool_calls>[\s\S]*?<\/tool_calls>/g, "")
|
|
.replace(/<think>[\s\S]*?<\/think>/g, "")
|
|
.trim() || null);
|
|
}
|
|
// ---------------------------------------------------------------------------
|
|
// Vertex AI OpenAI-compatible client
|
|
// Used for: zai-org/glm-5-maas, anthropic/claude-sonnet-4-6, etc.
|
|
// ---------------------------------------------------------------------------
|
|
let _cachedToken = "";
|
|
let _tokenExpiry = 0;
|
|
// Build GoogleAuth with explicit service account credentials when available.
|
|
// GCP_SA_KEY_BASE64: base64-encoded service account JSON key — safe to pass as
|
|
// an env var since it contains no newlines or special shell characters.
|
|
// Falls back to the GCP metadata server (works on VMs with correct scopes).
|
|
function buildGoogleAuth() {
|
|
const b64Key = process.env.GCP_SA_KEY_BASE64;
|
|
if (b64Key) {
|
|
try {
|
|
const jsonStr = Buffer.from(b64Key, "base64").toString("utf8");
|
|
const credentials = JSON.parse(jsonStr);
|
|
return new google_auth_library_1.GoogleAuth({
|
|
credentials,
|
|
scopes: ["https://www.googleapis.com/auth/cloud-platform"],
|
|
});
|
|
}
|
|
catch {
|
|
console.warn("[llm] GCP_SA_KEY_BASE64 is set but failed to decode/parse — falling back to metadata server");
|
|
}
|
|
}
|
|
return new google_auth_library_1.GoogleAuth({
|
|
scopes: ["https://www.googleapis.com/auth/cloud-platform"],
|
|
});
|
|
}
|
|
const _googleAuth = buildGoogleAuth();
|
|
async function getVertexToken() {
|
|
const now = Date.now();
|
|
if (_cachedToken && now < _tokenExpiry)
|
|
return _cachedToken;
|
|
const client = await _googleAuth.getClient();
|
|
const tokenResponse = await client.getAccessToken();
|
|
_cachedToken = tokenResponse.token;
|
|
_tokenExpiry = now + 55 * 60 * 1000; // tokens last 1hr, refresh at 55min
|
|
return _cachedToken;
|
|
}
|
|
class VertexOpenAIClient {
|
|
constructor(modelId, opts) {
|
|
this.modelId = modelId;
|
|
this.projectId =
|
|
opts?.projectId ?? process.env.GCP_PROJECT_ID ?? "master-ai-484822";
|
|
this.region = opts?.region ?? "global";
|
|
this.temperature = opts?.temperature ?? 0.3;
|
|
}
|
|
async chat(messages, tools, maxTokens = 4096) {
|
|
const base = this.region === "global"
|
|
? "https://aiplatform.googleapis.com"
|
|
: `https://${this.region}-aiplatform.googleapis.com`;
|
|
const url = `${base}/v1/projects/${this.projectId}/locations/${this.region}/endpoints/openapi/chat/completions`;
|
|
const body = {
|
|
model: this.modelId,
|
|
messages,
|
|
max_tokens: maxTokens,
|
|
temperature: this.temperature,
|
|
stream: false,
|
|
};
|
|
if (tools && tools.length > 0) {
|
|
body.tools = tools;
|
|
body.tool_choice = "auto";
|
|
}
|
|
// Retry with exponential backoff on 429 / 503 (rate limit / overload)
|
|
const MAX_RETRIES = 4;
|
|
const RETRY_STATUSES = new Set([429, 503]);
|
|
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
|
|
const token = await getVertexToken();
|
|
const res = await fetch(url, {
|
|
method: "POST",
|
|
headers: {
|
|
Authorization: `Bearer ${token}`,
|
|
"Content-Type": "application/json",
|
|
},
|
|
body: JSON.stringify(body),
|
|
});
|
|
if (res.ok) {
|
|
const data = (await res.json());
|
|
const choice = data.choices?.[0];
|
|
const message = choice?.message ?? {};
|
|
return {
|
|
content: stripModelMarkup(message.content),
|
|
reasoning: stripModelMarkup(message.reasoning_content),
|
|
tool_calls: message.tool_calls ?? [],
|
|
finish_reason: choice?.finish_reason ?? "stop",
|
|
usage: data.usage,
|
|
};
|
|
}
|
|
const errText = await res.text();
|
|
// Force token refresh on 401
|
|
if (res.status === 401)
|
|
_tokenExpiry = 0;
|
|
if (RETRY_STATUSES.has(res.status) && attempt < MAX_RETRIES) {
|
|
// Check for Retry-After header, otherwise use exponential backoff
|
|
const retryAfter = res.headers.get("retry-after");
|
|
const waitMs = retryAfter
|
|
? Math.min(parseInt(retryAfter, 10) * 1000, 60000)
|
|
: Math.min(2 ** attempt * 2000 + Math.random() * 500, 30000);
|
|
console.warn(`[llm] Vertex ${res.status} on attempt ${attempt + 1}/${MAX_RETRIES + 1} — retrying in ${Math.round(waitMs / 1000)}s`);
|
|
await new Promise((r) => setTimeout(r, waitMs));
|
|
continue;
|
|
}
|
|
throw new Error(`Vertex API ${res.status}: ${errText.slice(0, 400)}`);
|
|
}
|
|
// TypeScript requires an explicit throw after the loop (unreachable in practice)
|
|
throw new Error("Vertex API: exceeded max retries");
|
|
}
|
|
}
|
|
exports.VertexOpenAIClient = VertexOpenAIClient;
|
|
// ---------------------------------------------------------------------------
|
|
// Gemini client via @google/genai SDK
|
|
// Used for: Tier A (fast/cheap routing, summaries, log parsing)
|
|
// Converts to/from OpenAI message format internally.
|
|
// ---------------------------------------------------------------------------
|
|
class GeminiClient {
|
|
constructor(modelId = "gemini-3.1-pro-preview", opts) {
|
|
this.modelId = modelId;
|
|
this.temperature = opts?.temperature ?? 0.2;
|
|
}
|
|
async chat(messages, tools, maxTokens = 8192) {
|
|
const apiKey = process.env.GOOGLE_API_KEY;
|
|
if (!apiKey)
|
|
throw new Error("GOOGLE_API_KEY not set");
|
|
const genai = new genai_1.GoogleGenAI({ apiKey });
|
|
const systemMsg = messages.find((m) => m.role === "system");
|
|
const nonSystem = messages.filter((m) => m.role !== "system");
|
|
const functionDeclarations = (tools ?? []).map((t) => ({
|
|
name: t.function.name,
|
|
description: t.function.description,
|
|
parameters: t.function.parameters,
|
|
}));
|
|
const response = await genai.models.generateContent({
|
|
model: this.modelId,
|
|
contents: toGeminiContents(nonSystem),
|
|
config: {
|
|
systemInstruction: systemMsg?.content ?? undefined,
|
|
tools: functionDeclarations.length > 0
|
|
? [{ functionDeclarations }]
|
|
: undefined,
|
|
temperature: this.temperature,
|
|
maxOutputTokens: maxTokens,
|
|
},
|
|
});
|
|
const candidate = response.candidates?.[0];
|
|
if (!candidate)
|
|
throw new Error("No response from Gemini");
|
|
const parts = candidate.content?.parts ?? [];
|
|
const textContent = parts
|
|
.filter((p) => p.text)
|
|
.map((p) => p.text)
|
|
.join("") || null;
|
|
const fnCalls = parts.filter((p) => p.functionCall);
|
|
const tool_calls = fnCalls.map((p) => ({
|
|
id: `call_${(0, uuid_1.v4)().replace(/-/g, "").slice(0, 12)}`,
|
|
type: "function",
|
|
function: {
|
|
name: p.functionCall.name ?? "",
|
|
arguments: JSON.stringify(p.functionCall.args ?? {}),
|
|
},
|
|
}));
|
|
return {
|
|
content: stripModelMarkup(textContent),
|
|
reasoning: null,
|
|
tool_calls,
|
|
finish_reason: fnCalls.length > 0 ? "tool_calls" : "stop",
|
|
};
|
|
}
|
|
}
|
|
exports.GeminiClient = GeminiClient;
|
|
/** Convert OpenAI message format → Gemini Content[] format */
|
|
function toGeminiContents(messages) {
|
|
const contents = [];
|
|
for (const msg of messages) {
|
|
if (msg.role === "assistant") {
|
|
const parts = [];
|
|
if (msg.content)
|
|
parts.push({ text: msg.content });
|
|
for (const tc of msg.tool_calls ?? []) {
|
|
parts.push({
|
|
functionCall: {
|
|
name: tc.function.name,
|
|
args: JSON.parse(tc.function.arguments || "{}"),
|
|
},
|
|
});
|
|
}
|
|
contents.push({ role: "model", parts });
|
|
}
|
|
else if (msg.role === "tool") {
|
|
// Parse content back — could be JSON or plain text
|
|
let resultValue = msg.content;
|
|
try {
|
|
resultValue = JSON.parse(msg.content ?? "null");
|
|
}
|
|
catch {
|
|
/* keep as string */
|
|
}
|
|
contents.push({
|
|
role: "user",
|
|
parts: [
|
|
{
|
|
functionResponse: {
|
|
name: msg.name ?? "tool",
|
|
response: { result: resultValue },
|
|
},
|
|
},
|
|
],
|
|
});
|
|
}
|
|
else {
|
|
contents.push({ role: "user", parts: [{ text: msg.content ?? "" }] });
|
|
}
|
|
}
|
|
return contents;
|
|
}
|
|
// ---------------------------------------------------------------------------
|
|
// Anthropic Vertex client
|
|
// Used for: claude-* models via Vertex AI (proper Anthropic Messages API)
|
|
// Handles tool_calls by converting to/from Anthropic's tool_use blocks.
|
|
// ---------------------------------------------------------------------------
|
|
class AnthropicVertexClient {
|
|
constructor(modelId, opts) {
|
|
// Strip the "anthropic/" prefix if present — the SDK uses bare model names
|
|
this.modelId = modelId.startsWith("anthropic/")
|
|
? modelId.slice(10)
|
|
: modelId;
|
|
this.projectId =
|
|
opts?.projectId ?? process.env.GCP_PROJECT_ID ?? "master-ai-484822";
|
|
this.region = opts?.region ?? process.env.CLAUDE_REGION ?? "us-east5";
|
|
}
|
|
buildClient() {
|
|
const b64Key = process.env.GCP_SA_KEY_BASE64;
|
|
if (b64Key) {
|
|
try {
|
|
const jsonStr = Buffer.from(b64Key, "base64").toString("utf8");
|
|
const credentials = JSON.parse(jsonStr);
|
|
return new vertex_sdk_1.default({
|
|
projectId: this.projectId,
|
|
region: this.region,
|
|
googleAuth: new google_auth_library_1.GoogleAuth({
|
|
credentials,
|
|
scopes: ["https://www.googleapis.com/auth/cloud-platform"],
|
|
}),
|
|
});
|
|
}
|
|
catch {
|
|
console.warn("[llm] AnthropicVertex: SA key decode failed, falling back to metadata server");
|
|
}
|
|
}
|
|
return new vertex_sdk_1.default({
|
|
projectId: this.projectId,
|
|
region: this.region,
|
|
});
|
|
}
|
|
async chat(messages, tools, maxTokens = 8192) {
|
|
const client = this.buildClient();
|
|
const system = messages.find((m) => m.role === "system")?.content ?? undefined;
|
|
const nonSystem = messages.filter((m) => m.role !== "system");
|
|
// Convert OpenAI message format → Anthropic format
|
|
const anthropicMessages = nonSystem.map((m) => {
|
|
if (m.role === "assistant") {
|
|
const parts = [];
|
|
if (m.content)
|
|
parts.push({ type: "text", text: m.content });
|
|
for (const tc of m.tool_calls ?? []) {
|
|
parts.push({
|
|
type: "tool_use",
|
|
id: tc.id,
|
|
name: tc.function.name,
|
|
input: JSON.parse(tc.function.arguments || "{}"),
|
|
});
|
|
}
|
|
return {
|
|
role: "assistant",
|
|
content: parts.length === 1 && parts[0].type === "text"
|
|
? parts[0].text
|
|
: parts,
|
|
};
|
|
}
|
|
if (m.role === "tool") {
|
|
return {
|
|
role: "user",
|
|
content: [
|
|
{
|
|
type: "tool_result",
|
|
tool_use_id: m.tool_call_id,
|
|
content: m.content ?? "",
|
|
},
|
|
],
|
|
};
|
|
}
|
|
return { role: "user", content: m.content ?? "" };
|
|
});
|
|
const anthropicTools = (tools ?? []).map((t) => ({
|
|
name: t.function.name,
|
|
description: t.function.description,
|
|
input_schema: t.function.parameters,
|
|
}));
|
|
const MAX_RETRIES = 4;
|
|
const RETRY_STATUSES = new Set([429, 503]);
|
|
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
|
|
try {
|
|
const response = await client.messages.create({
|
|
model: this.modelId,
|
|
max_tokens: maxTokens,
|
|
system: system ?? undefined,
|
|
messages: anthropicMessages,
|
|
tools: anthropicTools.length > 0 ? anthropicTools : undefined,
|
|
});
|
|
const textContent = response.content
|
|
.filter((b) => b.type === "text")
|
|
.map((b) => b.text)
|
|
.join("") || null;
|
|
const tool_calls = response.content
|
|
.filter((b) => b.type === "tool_use")
|
|
.map((b) => ({
|
|
id: b.id,
|
|
type: "function",
|
|
function: {
|
|
name: b.name,
|
|
arguments: JSON.stringify(b.input ?? {}),
|
|
},
|
|
}));
|
|
return {
|
|
content: stripModelMarkup(textContent),
|
|
reasoning: null,
|
|
tool_calls,
|
|
finish_reason: response.stop_reason === "tool_use" ? "tool_calls" : "stop",
|
|
usage: response.usage
|
|
? {
|
|
prompt_tokens: response.usage.input_tokens,
|
|
completion_tokens: response.usage.output_tokens,
|
|
total_tokens: response.usage.input_tokens + response.usage.output_tokens,
|
|
}
|
|
: undefined,
|
|
};
|
|
}
|
|
catch (err) {
|
|
const status = err?.status ?? err?.statusCode ?? 0;
|
|
if (RETRY_STATUSES.has(status) && attempt < MAX_RETRIES) {
|
|
const waitMs = Math.min(2 ** attempt * 2000 + Math.random() * 500, 30000);
|
|
console.warn(`[llm] Anthropic Vertex ${status} on attempt ${attempt + 1}/${MAX_RETRIES + 1} — retrying in ${Math.round(waitMs / 1000)}s`);
|
|
await new Promise((r) => setTimeout(r, waitMs));
|
|
continue;
|
|
}
|
|
throw new Error(`Anthropic Vertex error: ${err?.message ?? String(err)}`);
|
|
}
|
|
}
|
|
throw new Error("Anthropic Vertex: exceeded max retries");
|
|
}
|
|
}
|
|
exports.AnthropicVertexClient = AnthropicVertexClient;
|
|
const TIER_MODELS = {
|
|
A: process.env.TIER_A_MODEL ?? "gemini-3.1-pro-preview",
|
|
B: process.env.TIER_B_MODEL ?? "claude-sonnet-4-6",
|
|
C: process.env.TIER_C_MODEL ?? "claude-sonnet-4-6",
|
|
};
|
|
function createLLM(modelOrTier, opts) {
|
|
const modelId = modelOrTier === "A" || modelOrTier === "B" || modelOrTier === "C"
|
|
? TIER_MODELS[modelOrTier]
|
|
: modelOrTier;
|
|
if (modelId.startsWith("gemini-")) {
|
|
return new GeminiClient(modelId, opts);
|
|
}
|
|
if (modelId.startsWith("anthropic/") || modelId.startsWith("claude-")) {
|
|
return new AnthropicVertexClient(modelId);
|
|
}
|
|
return new VertexOpenAIClient(modelId, { temperature: opts?.temperature });
|
|
}
|
|
// ---------------------------------------------------------------------------
|
|
// Helper — convert our ToolDefinition[] → LLMTool[] (OpenAI format)
|
|
// ---------------------------------------------------------------------------
|
|
function toOAITools(tools) {
|
|
return tools.map((t) => ({
|
|
type: "function",
|
|
function: {
|
|
name: t.name,
|
|
description: t.description,
|
|
parameters: t.parameters,
|
|
},
|
|
}));
|
|
}
|