gcloud is not available inside the Docker container. Use google-auth-library instead, which reads credentials from the GCP metadata server (works on any GCP VM) or GOOGLE_APPLICATION_CREDENTIALS env var. Also rebuilds dist/. Made-with: Cursor
292 lines
10 KiB
TypeScript
292 lines
10 KiB
TypeScript
import { GoogleAuth } from 'google-auth-library';
|
|
import { GoogleGenAI } from '@google/genai';
|
|
import { v4 as uuidv4 } from 'uuid';
|
|
|
|
// =============================================================================
|
|
// Unified LLM client — OpenAI-compatible message format throughout
|
|
//
|
|
// Two backends:
|
|
// VertexOpenAIClient — for GLM-5, Claude Sonnet, etc. via Vertex global endpoint
|
|
// GeminiFlashClient — for Gemini Flash/Pro via @google/genai SDK
|
|
//
|
|
// Model tier defaults (overridable via env):
|
|
// Tier A: gemini-2.5-flash ($0.15/$0.60 per 1M) — routing, summaries, log parsing
|
|
// Tier B: zai-org/glm-5-maas ($1.00/$3.20 per 1M) — coding, feature work
|
|
// Tier C: zai-org/glm-5-maas ($1.00/$3.20 per 1M) — complex decisions, escalation
|
|
// =============================================================================
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Shared message types (OpenAI format — used everywhere internally)
|
|
// ---------------------------------------------------------------------------
|
|
|
|
export interface LLMMessage {
|
|
role: 'system' | 'user' | 'assistant' | 'tool';
|
|
content: string | null;
|
|
tool_calls?: LLMToolCall[];
|
|
tool_call_id?: string; // set on role=tool messages
|
|
name?: string; // function name on role=tool messages
|
|
}
|
|
|
|
export interface LLMToolCall {
|
|
id: string;
|
|
type: 'function';
|
|
function: {
|
|
name: string;
|
|
arguments: string; // JSON-encoded string
|
|
};
|
|
}
|
|
|
|
export interface LLMTool {
|
|
type: 'function';
|
|
function: {
|
|
name: string;
|
|
description: string;
|
|
parameters: Record<string, unknown>;
|
|
};
|
|
}
|
|
|
|
export interface LLMResponse {
|
|
content: string | null;
|
|
reasoning: string | null; // GLM-5 chain-of-thought
|
|
tool_calls: LLMToolCall[];
|
|
finish_reason: string;
|
|
usage?: {
|
|
prompt_tokens: number;
|
|
completion_tokens: number;
|
|
total_tokens: number;
|
|
};
|
|
}
|
|
|
|
export interface LLMClient {
|
|
modelId: string;
|
|
chat(messages: LLMMessage[], tools?: LLMTool[], maxTokens?: number): Promise<LLMResponse>;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Vertex AI OpenAI-compatible client
|
|
// Used for: zai-org/glm-5-maas, anthropic/claude-sonnet-4-6, etc.
|
|
// ---------------------------------------------------------------------------
|
|
|
|
let _cachedToken = '';
|
|
let _tokenExpiry = 0;
|
|
|
|
const _googleAuth = new GoogleAuth({
|
|
scopes: ['https://www.googleapis.com/auth/cloud-platform']
|
|
});
|
|
|
|
async function getVertexToken(): Promise<string> {
|
|
const now = Date.now();
|
|
if (_cachedToken && now < _tokenExpiry) return _cachedToken;
|
|
const client = await _googleAuth.getClient();
|
|
const tokenResponse = await client.getAccessToken();
|
|
_cachedToken = tokenResponse.token!;
|
|
_tokenExpiry = now + 55 * 60 * 1000; // tokens last 1hr, refresh at 55min
|
|
return _cachedToken;
|
|
}
|
|
|
|
export class VertexOpenAIClient implements LLMClient {
|
|
modelId: string;
|
|
private projectId: string;
|
|
private region: string;
|
|
private temperature: number;
|
|
|
|
constructor(modelId: string, opts?: { projectId?: string; region?: string; temperature?: number }) {
|
|
this.modelId = modelId;
|
|
this.projectId = opts?.projectId ?? process.env.GCP_PROJECT_ID ?? 'master-ai-484822';
|
|
this.region = opts?.region ?? 'global';
|
|
this.temperature = opts?.temperature ?? 0.3;
|
|
}
|
|
|
|
async chat(messages: LLMMessage[], tools?: LLMTool[], maxTokens = 4096): Promise<LLMResponse> {
|
|
const token = await getVertexToken();
|
|
const base = this.region === 'global'
|
|
? 'https://aiplatform.googleapis.com'
|
|
: `https://${this.region}-aiplatform.googleapis.com`;
|
|
const url = `${base}/v1/projects/${this.projectId}/locations/${this.region}/endpoints/openapi/chat/completions`;
|
|
|
|
const body: Record<string, unknown> = {
|
|
model: this.modelId,
|
|
messages,
|
|
max_tokens: maxTokens,
|
|
temperature: this.temperature,
|
|
stream: false
|
|
};
|
|
|
|
if (tools && tools.length > 0) {
|
|
body.tools = tools;
|
|
body.tool_choice = 'auto';
|
|
}
|
|
|
|
const res = await fetch(url, {
|
|
method: 'POST',
|
|
headers: {
|
|
'Authorization': `Bearer ${token}`,
|
|
'Content-Type': 'application/json'
|
|
},
|
|
body: JSON.stringify(body)
|
|
});
|
|
|
|
if (!res.ok) {
|
|
const errText = await res.text();
|
|
// Force token refresh on 401
|
|
if (res.status === 401) _tokenExpiry = 0;
|
|
throw new Error(`Vertex API ${res.status}: ${errText.slice(0, 400)}`);
|
|
}
|
|
|
|
const data = await res.json() as any;
|
|
const choice = data.choices?.[0];
|
|
const message = choice?.message ?? {};
|
|
|
|
return {
|
|
content: message.content ?? null,
|
|
reasoning: message.reasoning_content ?? null,
|
|
tool_calls: message.tool_calls ?? [],
|
|
finish_reason: choice?.finish_reason ?? 'stop',
|
|
usage: data.usage
|
|
};
|
|
}
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Gemini client via @google/genai SDK
|
|
// Used for: Tier A (fast/cheap routing, summaries, log parsing)
|
|
// Converts to/from OpenAI message format internally.
|
|
// ---------------------------------------------------------------------------
|
|
|
|
export class GeminiClient implements LLMClient {
|
|
modelId: string;
|
|
private temperature: number;
|
|
|
|
constructor(modelId = 'gemini-2.5-flash', opts?: { temperature?: number }) {
|
|
this.modelId = modelId;
|
|
this.temperature = opts?.temperature ?? 0.2;
|
|
}
|
|
|
|
async chat(messages: LLMMessage[], tools?: LLMTool[], maxTokens = 8192): Promise<LLMResponse> {
|
|
const apiKey = process.env.GOOGLE_API_KEY;
|
|
if (!apiKey) throw new Error('GOOGLE_API_KEY not set');
|
|
|
|
const genai = new GoogleGenAI({ apiKey });
|
|
|
|
const systemMsg = messages.find(m => m.role === 'system');
|
|
const nonSystem = messages.filter(m => m.role !== 'system');
|
|
|
|
const functionDeclarations = (tools ?? []).map(t => ({
|
|
name: t.function.name,
|
|
description: t.function.description,
|
|
parameters: t.function.parameters as any
|
|
}));
|
|
|
|
const response = await genai.models.generateContent({
|
|
model: this.modelId,
|
|
contents: toGeminiContents(nonSystem),
|
|
config: {
|
|
systemInstruction: systemMsg?.content ?? undefined,
|
|
tools: functionDeclarations.length > 0 ? [{ functionDeclarations }] : undefined,
|
|
temperature: this.temperature,
|
|
maxOutputTokens: maxTokens
|
|
}
|
|
});
|
|
|
|
const candidate = response.candidates?.[0];
|
|
if (!candidate) throw new Error('No response from Gemini');
|
|
|
|
const parts = candidate.content?.parts ?? [];
|
|
const textContent = parts.filter(p => p.text).map(p => p.text).join('') || null;
|
|
const fnCalls = parts.filter(p => p.functionCall);
|
|
|
|
const tool_calls: LLMToolCall[] = fnCalls.map(p => ({
|
|
id: `call_${uuidv4().replace(/-/g, '').slice(0, 12)}`,
|
|
type: 'function' as const,
|
|
function: {
|
|
name: p.functionCall!.name ?? '',
|
|
arguments: JSON.stringify(p.functionCall!.args ?? {})
|
|
}
|
|
}));
|
|
|
|
return {
|
|
content: textContent,
|
|
reasoning: null,
|
|
tool_calls,
|
|
finish_reason: fnCalls.length > 0 ? 'tool_calls' : 'stop'
|
|
};
|
|
}
|
|
}
|
|
|
|
/** Convert OpenAI message format → Gemini Content[] format */
|
|
function toGeminiContents(messages: LLMMessage[]): any[] {
|
|
const contents: any[] = [];
|
|
for (const msg of messages) {
|
|
if (msg.role === 'assistant') {
|
|
const parts: any[] = [];
|
|
if (msg.content) parts.push({ text: msg.content });
|
|
for (const tc of msg.tool_calls ?? []) {
|
|
parts.push({
|
|
functionCall: {
|
|
name: tc.function.name,
|
|
args: JSON.parse(tc.function.arguments || '{}')
|
|
}
|
|
});
|
|
}
|
|
contents.push({ role: 'model', parts });
|
|
} else if (msg.role === 'tool') {
|
|
// Parse content back — could be JSON or plain text
|
|
let resultValue: unknown = msg.content;
|
|
try { resultValue = JSON.parse(msg.content ?? 'null'); } catch { /* keep as string */ }
|
|
contents.push({
|
|
role: 'user',
|
|
parts: [{
|
|
functionResponse: {
|
|
name: msg.name ?? 'tool',
|
|
response: { result: resultValue }
|
|
}
|
|
}]
|
|
});
|
|
} else {
|
|
contents.push({ role: 'user', parts: [{ text: msg.content ?? '' }] });
|
|
}
|
|
}
|
|
return contents;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Factory — createLLM(modelId | tier)
|
|
// ---------------------------------------------------------------------------
|
|
|
|
export type ModelTier = 'A' | 'B' | 'C';
|
|
|
|
const TIER_MODELS: Record<ModelTier, string> = {
|
|
A: process.env.TIER_A_MODEL ?? 'gemini-2.5-flash',
|
|
B: process.env.TIER_B_MODEL ?? 'zai-org/glm-5-maas',
|
|
C: process.env.TIER_C_MODEL ?? 'zai-org/glm-5-maas'
|
|
};
|
|
|
|
export function createLLM(modelOrTier: string | ModelTier, opts?: { temperature?: number }): LLMClient {
|
|
const modelId = (modelOrTier === 'A' || modelOrTier === 'B' || modelOrTier === 'C')
|
|
? TIER_MODELS[modelOrTier]
|
|
: modelOrTier;
|
|
|
|
if (modelId.startsWith('gemini-')) {
|
|
return new GeminiClient(modelId, opts);
|
|
}
|
|
|
|
return new VertexOpenAIClient(modelId, { temperature: opts?.temperature });
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Helper — convert our ToolDefinition[] → LLMTool[] (OpenAI format)
|
|
// ---------------------------------------------------------------------------
|
|
|
|
export function toOAITools(
|
|
tools: Array<{ name: string; description: string; parameters: Record<string, unknown> }>
|
|
): LLMTool[] {
|
|
return tools.map(t => ({
|
|
type: 'function',
|
|
function: {
|
|
name: t.name,
|
|
description: t.description,
|
|
parameters: t.parameters
|
|
}
|
|
}));
|
|
}
|