import { GoogleAuth } from 'google-auth-library'; import { GoogleGenAI } from '@google/genai'; import { v4 as uuidv4 } from 'uuid'; // ============================================================================= // Unified LLM client — OpenAI-compatible message format throughout // // Two backends: // VertexOpenAIClient — for GLM-5, Claude Sonnet, etc. via Vertex global endpoint // GeminiFlashClient — for Gemini Flash/Pro via @google/genai SDK // // Model tier defaults (overridable via env): // Tier A: gemini-2.5-flash ($0.15/$0.60 per 1M) — routing, summaries, log parsing // Tier B: zai-org/glm-5-maas ($1.00/$3.20 per 1M) — coding, feature work // Tier C: zai-org/glm-5-maas ($1.00/$3.20 per 1M) — complex decisions, escalation // ============================================================================= // --------------------------------------------------------------------------- // Shared message types (OpenAI format — used everywhere internally) // --------------------------------------------------------------------------- export interface LLMMessage { role: 'system' | 'user' | 'assistant' | 'tool'; content: string | null; tool_calls?: LLMToolCall[]; tool_call_id?: string; // set on role=tool messages name?: string; // function name on role=tool messages } export interface LLMToolCall { id: string; type: 'function'; function: { name: string; arguments: string; // JSON-encoded string }; } export interface LLMTool { type: 'function'; function: { name: string; description: string; parameters: Record; }; } export interface LLMResponse { content: string | null; reasoning: string | null; // GLM-5 chain-of-thought tool_calls: LLMToolCall[]; finish_reason: string; usage?: { prompt_tokens: number; completion_tokens: number; total_tokens: number; }; } export interface LLMClient { modelId: string; chat(messages: LLMMessage[], tools?: LLMTool[], maxTokens?: number): Promise; } // --------------------------------------------------------------------------- // Vertex AI OpenAI-compatible client // Used for: zai-org/glm-5-maas, anthropic/claude-sonnet-4-6, etc. // --------------------------------------------------------------------------- let _cachedToken = ''; let _tokenExpiry = 0; const _googleAuth = new GoogleAuth({ scopes: ['https://www.googleapis.com/auth/cloud-platform'] }); async function getVertexToken(): Promise { const now = Date.now(); if (_cachedToken && now < _tokenExpiry) return _cachedToken; const client = await _googleAuth.getClient(); const tokenResponse = await client.getAccessToken(); _cachedToken = tokenResponse.token!; _tokenExpiry = now + 55 * 60 * 1000; // tokens last 1hr, refresh at 55min return _cachedToken; } export class VertexOpenAIClient implements LLMClient { modelId: string; private projectId: string; private region: string; private temperature: number; constructor(modelId: string, opts?: { projectId?: string; region?: string; temperature?: number }) { this.modelId = modelId; this.projectId = opts?.projectId ?? process.env.GCP_PROJECT_ID ?? 'master-ai-484822'; this.region = opts?.region ?? 'global'; this.temperature = opts?.temperature ?? 0.3; } async chat(messages: LLMMessage[], tools?: LLMTool[], maxTokens = 4096): Promise { const token = await getVertexToken(); const base = this.region === 'global' ? 'https://aiplatform.googleapis.com' : `https://${this.region}-aiplatform.googleapis.com`; const url = `${base}/v1/projects/${this.projectId}/locations/${this.region}/endpoints/openapi/chat/completions`; const body: Record = { model: this.modelId, messages, max_tokens: maxTokens, temperature: this.temperature, stream: false }; if (tools && tools.length > 0) { body.tools = tools; body.tool_choice = 'auto'; } const res = await fetch(url, { method: 'POST', headers: { 'Authorization': `Bearer ${token}`, 'Content-Type': 'application/json' }, body: JSON.stringify(body) }); if (!res.ok) { const errText = await res.text(); // Force token refresh on 401 if (res.status === 401) _tokenExpiry = 0; throw new Error(`Vertex API ${res.status}: ${errText.slice(0, 400)}`); } const data = await res.json() as any; const choice = data.choices?.[0]; const message = choice?.message ?? {}; return { content: message.content ?? null, reasoning: message.reasoning_content ?? null, tool_calls: message.tool_calls ?? [], finish_reason: choice?.finish_reason ?? 'stop', usage: data.usage }; } } // --------------------------------------------------------------------------- // Gemini client via @google/genai SDK // Used for: Tier A (fast/cheap routing, summaries, log parsing) // Converts to/from OpenAI message format internally. // --------------------------------------------------------------------------- export class GeminiClient implements LLMClient { modelId: string; private temperature: number; constructor(modelId = 'gemini-2.5-flash', opts?: { temperature?: number }) { this.modelId = modelId; this.temperature = opts?.temperature ?? 0.2; } async chat(messages: LLMMessage[], tools?: LLMTool[], maxTokens = 8192): Promise { const apiKey = process.env.GOOGLE_API_KEY; if (!apiKey) throw new Error('GOOGLE_API_KEY not set'); const genai = new GoogleGenAI({ apiKey }); const systemMsg = messages.find(m => m.role === 'system'); const nonSystem = messages.filter(m => m.role !== 'system'); const functionDeclarations = (tools ?? []).map(t => ({ name: t.function.name, description: t.function.description, parameters: t.function.parameters as any })); const response = await genai.models.generateContent({ model: this.modelId, contents: toGeminiContents(nonSystem), config: { systemInstruction: systemMsg?.content ?? undefined, tools: functionDeclarations.length > 0 ? [{ functionDeclarations }] : undefined, temperature: this.temperature, maxOutputTokens: maxTokens } }); const candidate = response.candidates?.[0]; if (!candidate) throw new Error('No response from Gemini'); const parts = candidate.content?.parts ?? []; const textContent = parts.filter(p => p.text).map(p => p.text).join('') || null; const fnCalls = parts.filter(p => p.functionCall); const tool_calls: LLMToolCall[] = fnCalls.map(p => ({ id: `call_${uuidv4().replace(/-/g, '').slice(0, 12)}`, type: 'function' as const, function: { name: p.functionCall!.name ?? '', arguments: JSON.stringify(p.functionCall!.args ?? {}) } })); return { content: textContent, reasoning: null, tool_calls, finish_reason: fnCalls.length > 0 ? 'tool_calls' : 'stop' }; } } /** Convert OpenAI message format → Gemini Content[] format */ function toGeminiContents(messages: LLMMessage[]): any[] { const contents: any[] = []; for (const msg of messages) { if (msg.role === 'assistant') { const parts: any[] = []; if (msg.content) parts.push({ text: msg.content }); for (const tc of msg.tool_calls ?? []) { parts.push({ functionCall: { name: tc.function.name, args: JSON.parse(tc.function.arguments || '{}') } }); } contents.push({ role: 'model', parts }); } else if (msg.role === 'tool') { // Parse content back — could be JSON or plain text let resultValue: unknown = msg.content; try { resultValue = JSON.parse(msg.content ?? 'null'); } catch { /* keep as string */ } contents.push({ role: 'user', parts: [{ functionResponse: { name: msg.name ?? 'tool', response: { result: resultValue } } }] }); } else { contents.push({ role: 'user', parts: [{ text: msg.content ?? '' }] }); } } return contents; } // --------------------------------------------------------------------------- // Factory — createLLM(modelId | tier) // --------------------------------------------------------------------------- export type ModelTier = 'A' | 'B' | 'C'; const TIER_MODELS: Record = { A: process.env.TIER_A_MODEL ?? 'gemini-2.5-flash', B: process.env.TIER_B_MODEL ?? 'zai-org/glm-5-maas', C: process.env.TIER_C_MODEL ?? 'zai-org/glm-5-maas' }; export function createLLM(modelOrTier: string | ModelTier, opts?: { temperature?: number }): LLMClient { const modelId = (modelOrTier === 'A' || modelOrTier === 'B' || modelOrTier === 'C') ? TIER_MODELS[modelOrTier] : modelOrTier; if (modelId.startsWith('gemini-')) { return new GeminiClient(modelId, opts); } return new VertexOpenAIClient(modelId, { temperature: opts?.temperature }); } // --------------------------------------------------------------------------- // Helper — convert our ToolDefinition[] → LLMTool[] (OpenAI format) // --------------------------------------------------------------------------- export function toOAITools( tools: Array<{ name: string; description: string; parameters: Record }> ): LLMTool[] { return tools.map(t => ({ type: 'function', function: { name: t.name, description: t.description, parameters: t.parameters } })); }