fix: add all missing source files (llm.ts, updated agent-runner/agents)

src/llm.ts was never committed — this caused the Docker build to fail with "Cannot find module './llm'". Also commit updated agent-runner.ts, agents.ts, and .env.example that reference the new LLM client. Made-with: Cursor
2026-02-27 19:33:59 -08:00
parent f2fc976637
commit 59fe313963
4 changed files with 426 additions and 166 deletions
--- a/src/llm.ts
+++ b/src/llm.ts
@@ -0,0 +1,285 @@
+import { execSync } from 'child_process';
+import { GoogleGenAI } from '@google/genai';
+import { v4 as uuidv4 } from 'uuid';
+
+// =============================================================================
+// Unified LLM client — OpenAI-compatible message format throughout
+//
+// Two backends:
+//   VertexOpenAIClient  — for GLM-5, Claude Sonnet, etc. via Vertex global endpoint
+//   GeminiFlashClient   — for Gemini Flash/Pro via @google/genai SDK
+//
+// Model tier defaults (overridable via env):
+//   Tier A: gemini-2.5-flash  ($0.15/$0.60 per 1M)  — routing, summaries, log parsing
+//   Tier B: zai-org/glm-5-maas ($1.00/$3.20 per 1M) — coding, feature work
+//   Tier C: zai-org/glm-5-maas ($1.00/$3.20 per 1M) — complex decisions, escalation
+// =============================================================================
+
+// ---------------------------------------------------------------------------
+// Shared message types (OpenAI format — used everywhere internally)
+// ---------------------------------------------------------------------------
+
+export interface LLMMessage {
+    role: 'system' | 'user' | 'assistant' | 'tool';
+    content: string | null;
+    tool_calls?: LLMToolCall[];
+    tool_call_id?: string; // set on role=tool messages
+    name?: string;         // function name on role=tool messages
+}
+
+export interface LLMToolCall {
+    id: string;
+    type: 'function';
+    function: {
+        name: string;
+        arguments: string; // JSON-encoded string
+    };
+}
+
+export interface LLMTool {
+    type: 'function';
+    function: {
+        name: string;
+        description: string;
+        parameters: Record<string, unknown>;
+    };
+}
+
+export interface LLMResponse {
+    content: string | null;
+    reasoning: string | null; // GLM-5 chain-of-thought
+    tool_calls: LLMToolCall[];
+    finish_reason: string;
+    usage?: {
+        prompt_tokens: number;
+        completion_tokens: number;
+        total_tokens: number;
+    };
+}
+
+export interface LLMClient {
+    modelId: string;
+    chat(messages: LLMMessage[], tools?: LLMTool[], maxTokens?: number): Promise<LLMResponse>;
+}
+
+// ---------------------------------------------------------------------------
+// Vertex AI OpenAI-compatible client
+// Used for: zai-org/glm-5-maas, anthropic/claude-sonnet-4-6, etc.
+// ---------------------------------------------------------------------------
+
+let _cachedToken = '';
+let _tokenExpiry = 0;
+
+function getVertexToken(): string {
+    const now = Date.now();
+    if (_cachedToken && now < _tokenExpiry) return _cachedToken;
+    _cachedToken = execSync('gcloud auth print-access-token', { encoding: 'utf8' }).trim();
+    _tokenExpiry = now + 55 * 60 * 1000; // tokens last 1hr, refresh at 55min
+    return _cachedToken;
+}
+
+export class VertexOpenAIClient implements LLMClient {
+    modelId: string;
+    private projectId: string;
+    private region: string;
+    private temperature: number;
+
+    constructor(modelId: string, opts?: { projectId?: string; region?: string; temperature?: number }) {
+        this.modelId = modelId;
+        this.projectId = opts?.projectId ?? process.env.GCP_PROJECT_ID ?? 'master-ai-484822';
+        this.region = opts?.region ?? 'global';
+        this.temperature = opts?.temperature ?? 0.3;
+    }
+
+    async chat(messages: LLMMessage[], tools?: LLMTool[], maxTokens = 4096): Promise<LLMResponse> {
+        const token = getVertexToken();
+        const base = this.region === 'global'
+            ? 'https://aiplatform.googleapis.com'
+            : `https://${this.region}-aiplatform.googleapis.com`;
+        const url = `${base}/v1/projects/${this.projectId}/locations/${this.region}/endpoints/openapi/chat/completions`;
+
+        const body: Record<string, unknown> = {
+            model: this.modelId,
+            messages,
+            max_tokens: maxTokens,
+            temperature: this.temperature,
+            stream: false
+        };
+
+        if (tools && tools.length > 0) {
+            body.tools = tools;
+            body.tool_choice = 'auto';
+        }
+
+        const res = await fetch(url, {
+            method: 'POST',
+            headers: {
+                'Authorization': `Bearer ${token}`,
+                'Content-Type': 'application/json'
+            },
+            body: JSON.stringify(body)
+        });
+
+        if (!res.ok) {
+            const errText = await res.text();
+            // Force token refresh on 401
+            if (res.status === 401) _tokenExpiry = 0;
+            throw new Error(`Vertex API ${res.status}: ${errText.slice(0, 400)}`);
+        }
+
+        const data = await res.json() as any;
+        const choice = data.choices?.[0];
+        const message = choice?.message ?? {};
+
+        return {
+            content: message.content ?? null,
+            reasoning: message.reasoning_content ?? null,
+            tool_calls: message.tool_calls ?? [],
+            finish_reason: choice?.finish_reason ?? 'stop',
+            usage: data.usage
+        };
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Gemini client via @google/genai SDK
+// Used for: Tier A (fast/cheap routing, summaries, log parsing)
+// Converts to/from OpenAI message format internally.
+// ---------------------------------------------------------------------------
+
+export class GeminiClient implements LLMClient {
+    modelId: string;
+    private temperature: number;
+
+    constructor(modelId = 'gemini-2.5-flash', opts?: { temperature?: number }) {
+        this.modelId = modelId;
+        this.temperature = opts?.temperature ?? 0.2;
+    }
+
+    async chat(messages: LLMMessage[], tools?: LLMTool[], maxTokens = 8192): Promise<LLMResponse> {
+        const apiKey = process.env.GOOGLE_API_KEY;
+        if (!apiKey) throw new Error('GOOGLE_API_KEY not set');
+
+        const genai = new GoogleGenAI({ apiKey });
+
+        const systemMsg = messages.find(m => m.role === 'system');
+        const nonSystem = messages.filter(m => m.role !== 'system');
+
+        const functionDeclarations = (tools ?? []).map(t => ({
+            name: t.function.name,
+            description: t.function.description,
+            parameters: t.function.parameters as any
+        }));
+
+        const response = await genai.models.generateContent({
+            model: this.modelId,
+            contents: toGeminiContents(nonSystem),
+            config: {
+                systemInstruction: systemMsg?.content ?? undefined,
+                tools: functionDeclarations.length > 0 ? [{ functionDeclarations }] : undefined,
+                temperature: this.temperature,
+                maxOutputTokens: maxTokens
+            }
+        });
+
+        const candidate = response.candidates?.[0];
+        if (!candidate) throw new Error('No response from Gemini');
+
+        const parts = candidate.content?.parts ?? [];
+        const textContent = parts.filter(p => p.text).map(p => p.text).join('') || null;
+        const fnCalls = parts.filter(p => p.functionCall);
+
+        const tool_calls: LLMToolCall[] = fnCalls.map(p => ({
+            id: `call_${uuidv4().replace(/-/g, '').slice(0, 12)}`,
+            type: 'function' as const,
+            function: {
+                name: p.functionCall!.name ?? '',
+                arguments: JSON.stringify(p.functionCall!.args ?? {})
+            }
+        }));
+
+        return {
+            content: textContent,
+            reasoning: null,
+            tool_calls,
+            finish_reason: fnCalls.length > 0 ? 'tool_calls' : 'stop'
+        };
+    }
+}
+
+/** Convert OpenAI message format → Gemini Content[] format */
+function toGeminiContents(messages: LLMMessage[]): any[] {
+    const contents: any[] = [];
+    for (const msg of messages) {
+        if (msg.role === 'assistant') {
+            const parts: any[] = [];
+            if (msg.content) parts.push({ text: msg.content });
+            for (const tc of msg.tool_calls ?? []) {
+                parts.push({
+                    functionCall: {
+                        name: tc.function.name,
+                        args: JSON.parse(tc.function.arguments || '{}')
+                    }
+                });
+            }
+            contents.push({ role: 'model', parts });
+        } else if (msg.role === 'tool') {
+            // Parse content back — could be JSON or plain text
+            let resultValue: unknown = msg.content;
+            try { resultValue = JSON.parse(msg.content ?? 'null'); } catch { /* keep as string */ }
+            contents.push({
+                role: 'user',
+                parts: [{
+                    functionResponse: {
+                        name: msg.name ?? 'tool',
+                        response: { result: resultValue }
+                    }
+                }]
+            });
+        } else {
+            contents.push({ role: 'user', parts: [{ text: msg.content ?? '' }] });
+        }
+    }
+    return contents;
+}
+
+// ---------------------------------------------------------------------------
+// Factory — createLLM(modelId | tier)
+// ---------------------------------------------------------------------------
+
+export type ModelTier = 'A' | 'B' | 'C';
+
+const TIER_MODELS: Record<ModelTier, string> = {
+    A: process.env.TIER_A_MODEL ?? 'gemini-2.5-flash',
+    B: process.env.TIER_B_MODEL ?? 'zai-org/glm-5-maas',
+    C: process.env.TIER_C_MODEL ?? 'zai-org/glm-5-maas'
+};
+
+export function createLLM(modelOrTier: string | ModelTier, opts?: { temperature?: number }): LLMClient {
+    const modelId = (modelOrTier === 'A' || modelOrTier === 'B' || modelOrTier === 'C')
+        ? TIER_MODELS[modelOrTier]
+        : modelOrTier;
+
+    if (modelId.startsWith('gemini-')) {
+        return new GeminiClient(modelId, opts);
+    }
+
+    return new VertexOpenAIClient(modelId, { temperature: opts?.temperature });
+}
+
+// ---------------------------------------------------------------------------
+// Helper — convert our ToolDefinition[] → LLMTool[] (OpenAI format)
+// ---------------------------------------------------------------------------
+
+export function toOAITools(
+    tools: Array<{ name: string; description: string; parameters: Record<string, unknown> }>
+): LLMTool[] {
+    return tools.map(t => ({
+        type: 'function',
+        function: {
+            name: t.name,
+            description: t.description,
+            parameters: t.parameters
+        }
+    }));
+}