fix: add all missing source files (llm.ts, updated agent-runner/agents)

src/llm.ts was never committed — this caused the Docker build to fail with "Cannot find module './llm'". Also commit updated agent-runner.ts, agents.ts, and .env.example that reference the new LLM client. Made-with: Cursor
2026-02-27 19:33:59 -08:00
parent f2fc976637
commit 59fe313963
4 changed files with 426 additions and 166 deletions
--- a/.env.example
+++ b/.env.example
@@ -1,23 +1,48 @@
-# Google AI — required for all agents
+# ---------------------------------------------------------------------------
+# AI Models — 3-tier routing
+# ---------------------------------------------------------------------------
+
+# Tier A — fast/cheap: routing, summaries, log parsing (Gemini Flash)
+TIER_A_MODEL=gemini-2.5-flash
+
+# Tier B — workhorse coder: features, diffs, standard bug fixes (GLM-5 on Vertex)
+TIER_B_MODEL=zai-org/glm-5-maas
+
+# Tier C — premium escalation: architecture decisions, complex debugging
+# Options: zai-org/glm-5-maas | anthropic/claude-sonnet-4-6
+TIER_C_MODEL=zai-org/glm-5-maas
+
+# Orchestrator model (defaults to Tier B if not set)
+ORCHESTRATOR_MODEL=B
+
+# Tier A fallback — Gemini API key (required if using gemini-* models)
 GOOGLE_API_KEY=your-gemini-api-key

-# Gitea — required for git push, issue management
+# GCP project for Vertex AI (GLM-5, Claude Sonnet via Vertex)
+GCP_PROJECT_ID=master-ai-484822
+
+# ---------------------------------------------------------------------------
+# Gitea — required for git push and issue management
+# ---------------------------------------------------------------------------
 GITEA_API_URL=https://git.vibnai.com
 GITEA_API_TOKEN=your-gitea-token
 GITEA_USERNAME=your-gitea-username

+# ---------------------------------------------------------------------------
 # Coolify — required for deployment tools
+# ---------------------------------------------------------------------------
 COOLIFY_API_URL=https://coolify.vibnai.com
 COOLIFY_API_TOKEN=your-coolify-token

+# ---------------------------------------------------------------------------
 # Server config
+# ---------------------------------------------------------------------------
 PORT=3333

-# Base path where agent workspaces are cloned
-# Each repo gets a subfolder: /workspaces/owner_reponame
+# Base path where agent workspaces are cloned (owner_reponame subdirs)
 WORKSPACE_BASE=/workspaces

-# Optional: internal URL of this service (used by spawn_agent to self-call)
+# Internal URL of this service (used by spawn_agent to self-call)
 AGENT_RUNNER_URL=http://localhost:3333

 # Optional: shared secret for validating Gitea webhook POSTs
--- a/src/agent-runner.ts
+++ b/src/agent-runner.ts
@@ -1,22 +1,22 @@
-import { GoogleGenAI, Content, Tool, FunctionDeclaration } from '@google/genai';
+import { createLLM, toOAITools, LLMMessage } from './llm';
 import { AgentConfig } from './agents';
 import { executeTool, ToolContext } from './tools';
 import { Job, updateJob } from './job-store';

-const MAX_TURNS = 40; // safety cap — prevents infinite loops
+const MAX_TURNS = 40;

 export interface RunResult {
    finalText: string;
    toolCallCount: number;
    turns: number;
+    model: string;
 }

 /**
- * Core Gemini agent loop.
+ * Core agent execution loop — model-agnostic via the unified LLM client.
 *
- * Sends the task to Gemini with the agent's system prompt and tools,
- * then loops: execute tool calls → send results back → repeat until
- * the model stops calling tools or MAX_TURNS is reached.
+ * Agents use their configured model tier (A/B/C) or a specific model ID.
+ * Tool calling uses OpenAI format throughout.
 */
 export async function runAgent(
    job: Job,
@@ -24,126 +24,79 @@ export async function runAgent(
    task: string,
    ctx: ToolContext
 ): Promise<RunResult> {
-    const apiKey = process.env.GOOGLE_API_KEY;
-    if (!apiKey) {
-        throw new Error('GOOGLE_API_KEY environment variable is not set');
-    }
+    const llm = createLLM(config.model, { temperature: 0.2 });
+    const oaiTools = toOAITools(config.tools);

-    const genai = new GoogleGenAI({ apiKey });
-
-    // Build Gemini function declarations from our tool definitions
-    const functionDeclarations: FunctionDeclaration[] = config.tools.map(tool => ({
-        name: tool.name,
-        description: tool.description,
-        parameters: tool.parameters as any
-    }));
-
-    const tools: Tool[] = functionDeclarations.length > 0
-        ? [{ functionDeclarations }]
-        : [];
-
-    const model = genai.models;
-
-    // Build conversation history
-    const history: Content[] = [];
-
-    // Initial user message
-    let currentMessage: Content = {
-        role: 'user',
-        parts: [{ text: task }]
-    };
+    const history: LLMMessage[] = [
+        { role: 'user', content: task }
+    ];

    let toolCallCount = 0;
    let turn = 0;
    let finalText = '';

-    updateJob(job.id, { status: 'running', progress: `Starting ${config.name} agent...` });
+    updateJob(job.id, { status: 'running', progress: `Starting ${config.name} (${llm.modelId})…` });

    while (turn < MAX_TURNS) {
        turn++;

-        // Add current message to history
-        history.push(currentMessage);
+        const messages: LLMMessage[] = [
+            { role: 'system', content: config.systemPrompt },
+            ...history
+        ];

-        // Call Gemini
-        const response = await model.generateContent({
-            model: config.model || 'gemini-2.0-flash',
-            contents: history,
-            config: {
-                systemInstruction: config.systemPrompt,
-                tools: tools.length > 0 ? tools : undefined,
-                temperature: 0.2,
-                maxOutputTokens: 8192
-            }
-        });
+        const response = await llm.chat(messages, oaiTools, 8192);

-        const candidate = response.candidates?.[0];
-        if (!candidate) {
-            throw new Error('No response from Gemini');
-        }
-
-        // Add model response to history
-        const modelContent: Content = {
-            role: 'model',
-            parts: candidate.content?.parts || []
+        // Build assistant message for history
+        const assistantMsg: LLMMessage = {
+            role: 'assistant',
+            content: response.content,
+            tool_calls: response.tool_calls.length > 0 ? response.tool_calls : undefined
        };
-        history.push(modelContent);
+        history.push(assistantMsg);

-        // Extract function calls from the response
-        const functionCalls = candidate.content?.parts?.filter(p => p.functionCall) ?? [];
-
-        if (functionCalls.length === 0) {
-            // No tool calls — the agent is done
-            finalText = candidate.content?.parts
-                ?.filter(p => p.text)
-                .map(p => p.text)
-                .join('') ?? '';
+        // No tool calls — agent is done
+        if (response.tool_calls.length === 0) {
+            finalText = response.content ?? '';
            break;
        }

-        // Execute all tool calls
-        const toolResultParts: any[] = [];
-        for (const part of functionCalls) {
-            const call = part.functionCall!;
-            const callName = call.name ?? 'unknown';
-            const callArgs = (call.args ?? {}) as Record<string, unknown>;
-            toolCallCount++;
+        // Execute tool calls
+        for (const tc of response.tool_calls) {
+            const fnName = tc.function.name;
+            let fnArgs: Record<string, unknown> = {};
+            try { fnArgs = JSON.parse(tc.function.arguments || '{}'); } catch { /* bad JSON */ }

+            toolCallCount++;
            updateJob(job.id, {
-                progress: `Turn ${turn}: calling ${callName}...`,
+                progress: `Turn ${turn}: calling ${fnName}…`,
                toolCalls: [...(job.toolCalls || []), {
                    turn,
-                    tool: callName,
-                    args: callArgs,
+                    tool: fnName,
+                    args: fnArgs,
                    timestamp: new Date().toISOString()
                }]
            });

            let result: unknown;
            try {
-                result = await executeTool(callName, callArgs, ctx);
+                result = await executeTool(fnName, fnArgs, ctx);
            } catch (err) {
                result = { error: err instanceof Error ? err.message : String(err) };
            }

-            toolResultParts.push({
-                functionResponse: {
-                    name: callName,
-                    response: { result }
-                }
+            history.push({
+                role: 'tool',
+                tool_call_id: tc.id,
+                name: fnName,
+                content: typeof result === 'string' ? result : JSON.stringify(result)
            });
        }
-
-        // Next turn: send tool results back to the model
-        currentMessage = {
-            role: 'user',
-            parts: toolResultParts
-        };
    }

    if (turn >= MAX_TURNS && !finalText) {
-        finalText = `Agent reached the ${MAX_TURNS}-turn safety limit. Last tool call count: ${toolCallCount}.`;
+        finalText = `Agent hit the ${MAX_TURNS}-turn safety limit. Tool calls made: ${toolCallCount}.`;
    }

-    return { finalText, toolCallCount, turns: turn };
+    return { finalText, toolCallCount, turns: turn, model: llm.modelId };
 }
--- a/src/agents.ts
+++ b/src/agents.ts
@@ -1,23 +1,23 @@
 import { ToolDefinition, ALL_TOOLS } from './tools';

 // ---------------------------------------------------------------------------
-// Agent configuration — which tools each agent gets + system prompt
+// Agent configuration
 // ---------------------------------------------------------------------------

 export interface AgentConfig {
    name: string;
    description: string;
-    model: string;
+    model: string; // model ID or tier ('A' | 'B' | 'C')
    systemPrompt: string;
    tools: ToolDefinition[];
 }

-const FILE_TOOLS = ['read_file', 'write_file', 'replace_in_file', 'list_directory', 'find_files', 'search_code'];
-const SHELL_TOOLS = ['execute_command'];
-const GIT_TOOLS = ['git_commit_and_push'];
+const FILE_TOOLS    = ['read_file', 'write_file', 'replace_in_file', 'list_directory', 'find_files', 'search_code'];
+const SHELL_TOOLS   = ['execute_command'];
+const GIT_TOOLS     = ['git_commit_and_push'];
 const COOLIFY_TOOLS = ['coolify_list_projects', 'coolify_list_applications', 'coolify_deploy', 'coolify_get_logs'];
-const GITEA_TOOLS = ['gitea_create_issue', 'gitea_list_issues', 'gitea_close_issue'];
-const SPAWN_TOOL = ['spawn_agent'];
+const GITEA_TOOLS   = ['gitea_create_issue', 'gitea_list_issues', 'gitea_close_issue'];
+const SPAWN_TOOL    = ['spawn_agent'];

 function pick(names: string[]): ToolDefinition[] {
    return ALL_TOOLS.filter(t => names.includes(t.name));
@@ -25,112 +25,109 @@ function pick(names: string[]): ToolDefinition[] {

 // ---------------------------------------------------------------------------
 // Agent definitions
+//
+// model is a tier ('A' | 'B' | 'C') or a specific model ID.
+// Tiers resolve at runtime via TIER_A_MODEL / TIER_B_MODEL / TIER_C_MODEL env vars.
+//
+//   Tier A = gemini-2.5-flash  — fast, cheap: routing, summaries, monitoring
+//   Tier B = zai-org/glm-5-maas — workhorse coding model
+//   Tier C = zai-org/glm-5-maas — complex decisions (or Claude Sonnet via TIER_C_MODEL)
 // ---------------------------------------------------------------------------

 export const AGENTS: Record<string, AgentConfig> = {
+
    Orchestrator: {
        name: 'Orchestrator',
-        description: 'Master coordinator that breaks down high-level goals and delegates to specialist agents',
-        model: 'gemini-2.5-flash',
-        systemPrompt: `You are the Orchestrator for Vibn, an autonomous AI system for software development.
+        description: 'Master coordinator — breaks down goals and delegates to specialist agents',
+        model: 'B', // GLM-5 — good planner, chain-of-thought reasoning
+        systemPrompt: `You are the Orchestrator for Vibn, an autonomous AI platform for software development.

-Your role is to:
-1. Understand the high-level goal provided in the task.
-2. Break it down into concrete sub-tasks.
-3. Delegate sub-tasks to the appropriate specialist agents using the spawn_agent tool.
-4. Use Gitea to track progress: create an issue at the start, close it when done.
-5. Summarize what was done when complete.
+Your role:
+1. Understand the high-level goal.
+2. Break it into concrete sub-tasks.
+3. Delegate to the right specialist agents via spawn_agent.
+4. Track progress via Gitea issues.
+5. Summarize results when done.

-Available specialist agents and when to use them:
- **Coder**: Any code changes — features, bug fixes, refactors, tests.
- **PM**: Project management — issue triage, sprint planning, documentation updates.
- **Marketing**: Content and copy — blog posts, landing page copy, release notes.
+Agents available:
+- Coder: code changes, features, bug fixes, tests.
+- PM: issue triage, docs, sprint planning.
+- Marketing: copy, blog posts, release notes.

 Rules:
- Always create a Gitea issue first to track the work.
- Delegate to ONE agent at a time unless tasks are fully independent.
- Check back on progress by listing issues.
- Never try to write code yourself — delegate to Coder.
- Be concise in your task descriptions when spawning agents.`,
+- Create a Gitea issue first to track the work.
+- Delegate one agent at a time unless tasks are fully independent.
+- Never write code yourself — delegate to Coder.
+- Be specific in task descriptions when spawning agents.`,
        tools: pick([...GITEA_TOOLS, ...SPAWN_TOOL, ...COOLIFY_TOOLS])
    },

    Coder: {
        name: 'Coder',
-        description: 'Senior software engineer — writes, edits, and tests code. Commits and pushes when done.',
-        model: 'gemini-2.5-flash',
-        systemPrompt: `You are an expert senior software engineer working autonomously on a git repository.
+        description: 'Senior software engineer — writes, edits, tests, commits, and pushes code',
+        model: 'B', // GLM-5 — strong at code generation and diffs
+        systemPrompt: `You are an expert senior software engineer working autonomously on a Git repository.

-Your job is to complete the coding task given to you. Follow these rules:
-
-**Workflow:**
-1. Start by exploring the codebase: list_directory, find_files, read_file to understand structure.
-2. Search for relevant code: search_code to find existing patterns.
+Workflow:
+1. Explore the codebase: list_directory, find_files, read_file.
+2. Search for patterns: search_code.
 3. Plan your changes before making them.
 4. Read every file BEFORE editing it.
 5. Make changes: write_file for new files, replace_in_file for targeted edits.
-6. Run tests or lint if applicable: execute_command.
-7. Commit and push when the task is complete: git_commit_and_push.
+6. Run tests/lint if applicable: execute_command.
+7. Commit and push when complete: git_commit_and_push.

-**Code quality rules:**
- Match existing code style exactly.
- Never leave TODO comments — implement or skip.
+Code quality:
+- Match existing style exactly.
+- No TODO comments — implement or skip.
 - Write complete files, not partial snippets.
- If tests exist, run them and fix failures before committing.
- Commit message should be concise and in imperative mood (e.g. "add user authentication").
+- Run tests and fix failures before committing.
+- Commit messages: imperative mood, concise (e.g. "add user authentication").

-**Safety rules:**
- Never delete files unless explicitly instructed.
- Never modify .env files or credentials.
+Safety:
+- Never delete files unless explicitly told to.
+- Never touch .env files or credentials.
 - Never commit secrets or API keys.

-**If you were triggered by a Gitea issue:**
- After committing, close the issue using gitea_close_issue.
- The repo name is in the format "owner/name".
-
-Be methodical. Read before you write. Test before you commit.`,
+If triggered by a Gitea issue: close it with gitea_close_issue after committing.`,
        tools: pick([...FILE_TOOLS, ...SHELL_TOOLS, ...GIT_TOOLS, ...GITEA_TOOLS])
    },

    PM: {
        name: 'PM',
-        description: 'Product manager — manages Gitea issues, writes documentation, tracks project health',
-        model: 'gemini-2.5-flash',
+        description: 'Product manager — docs, issue management, project health reports',
+        model: 'A', // Gemini Flash — lightweight, cheap for docs/issue work
        systemPrompt: `You are an autonomous Product Manager for a software project hosted on Gitea.

-Your responsibilities:
-1. Create, update, and close Gitea issues to track work.
-2. Write and update documentation files in the repository.
+Responsibilities:
+1. Create, update, and close Gitea issues.
+2. Write and update docs in the repository.
 3. Summarize project state and create reports.
-4. Prioritize and triage bugs/features based on impact.
+4. Triage bugs and features by impact.

-When writing documentation:
- Be clear and concise.
- Use markdown formatting.
- Focus on what users and developers need to know.
- Keep docs up to date with the actual codebase state.
-
-Always commit documentation updates after writing them.`,
+When writing docs:
+- Clear and concise.
+- Markdown formatting.
+- Keep docs in sync with the codebase.
+- Always commit after writing.`,
        tools: pick([...GITEA_TOOLS, ...FILE_TOOLS, ...GIT_TOOLS])
    },

    Marketing: {
        name: 'Marketing',
-        description: 'Marketing specialist — writes copy, blog posts, release notes, and landing page content',
-        model: 'gemini-2.5-flash',
+        description: 'Marketing specialist — copy, blog posts, release notes, landing page content',
+        model: 'A', // Gemini Flash — cheap for content generation
        systemPrompt: `You are an autonomous Marketing specialist for a SaaS product called Vibn.

-Vibn is a cloud-based AI-powered development environment. It helps development teams build faster with AI agents that can write code, manage projects, and deploy automatically.
+Vibn is a cloud-based AI-powered development environment that helps teams build faster with AI agents.

-Your responsibilities:
-1. Write compelling marketing copy for landing pages, email campaigns, and social media.
-2. Write technical blog posts that explain features in an accessible way.
+Responsibilities:
+1. Write landing page copy, emails, and social media content.
+2. Write technical blog posts explaining features accessibly.
 3. Write release notes that highlight user-facing value.
-4. Ensure all copy is on-brand: professional, clear, forward-thinking, and developer-friendly.
+4. Maintain brand voice: smart, confident, practical. No hype, no jargon.

-Brand voice: Smart, confident, practical. No hype. No jargon. Show don't tell.
-
-When writing content, create actual files in the repository (e.g. blog/2026-02-release.md) and commit them.`,
+Always create real files in the repo (e.g. blog/2026-02-release.md) and commit them.`,
        tools: pick([...FILE_TOOLS, ...GIT_TOOLS])
    }
 };
--- a/src/llm.ts
+++ b/src/llm.ts
@@ -0,0 +1,285 @@
+import { execSync } from 'child_process';
+import { GoogleGenAI } from '@google/genai';
+import { v4 as uuidv4 } from 'uuid';
+
+// =============================================================================
+// Unified LLM client — OpenAI-compatible message format throughout
+//
+// Two backends:
+//   VertexOpenAIClient  — for GLM-5, Claude Sonnet, etc. via Vertex global endpoint
+//   GeminiFlashClient   — for Gemini Flash/Pro via @google/genai SDK
+//
+// Model tier defaults (overridable via env):
+//   Tier A: gemini-2.5-flash  ($0.15/$0.60 per 1M)  — routing, summaries, log parsing
+//   Tier B: zai-org/glm-5-maas ($1.00/$3.20 per 1M) — coding, feature work
+//   Tier C: zai-org/glm-5-maas ($1.00/$3.20 per 1M) — complex decisions, escalation
+// =============================================================================
+
+// ---------------------------------------------------------------------------
+// Shared message types (OpenAI format — used everywhere internally)
+// ---------------------------------------------------------------------------
+
+export interface LLMMessage {
+    role: 'system' | 'user' | 'assistant' | 'tool';
+    content: string | null;
+    tool_calls?: LLMToolCall[];
+    tool_call_id?: string; // set on role=tool messages
+    name?: string;         // function name on role=tool messages
+}
+
+export interface LLMToolCall {
+    id: string;
+    type: 'function';
+    function: {
+        name: string;
+        arguments: string; // JSON-encoded string
+    };
+}
+
+export interface LLMTool {
+    type: 'function';
+    function: {
+        name: string;
+        description: string;
+        parameters: Record<string, unknown>;
+    };
+}
+
+export interface LLMResponse {
+    content: string | null;
+    reasoning: string | null; // GLM-5 chain-of-thought
+    tool_calls: LLMToolCall[];
+    finish_reason: string;
+    usage?: {
+        prompt_tokens: number;
+        completion_tokens: number;
+        total_tokens: number;
+    };
+}
+
+export interface LLMClient {
+    modelId: string;
+    chat(messages: LLMMessage[], tools?: LLMTool[], maxTokens?: number): Promise<LLMResponse>;
+}
+
+// ---------------------------------------------------------------------------
+// Vertex AI OpenAI-compatible client
+// Used for: zai-org/glm-5-maas, anthropic/claude-sonnet-4-6, etc.
+// ---------------------------------------------------------------------------
+
+let _cachedToken = '';
+let _tokenExpiry = 0;
+
+function getVertexToken(): string {
+    const now = Date.now();
+    if (_cachedToken && now < _tokenExpiry) return _cachedToken;
+    _cachedToken = execSync('gcloud auth print-access-token', { encoding: 'utf8' }).trim();
+    _tokenExpiry = now + 55 * 60 * 1000; // tokens last 1hr, refresh at 55min
+    return _cachedToken;
+}
+
+export class VertexOpenAIClient implements LLMClient {
+    modelId: string;
+    private projectId: string;
+    private region: string;
+    private temperature: number;
+
+    constructor(modelId: string, opts?: { projectId?: string; region?: string; temperature?: number }) {
+        this.modelId = modelId;
+        this.projectId = opts?.projectId ?? process.env.GCP_PROJECT_ID ?? 'master-ai-484822';
+        this.region = opts?.region ?? 'global';
+        this.temperature = opts?.temperature ?? 0.3;
+    }
+
+    async chat(messages: LLMMessage[], tools?: LLMTool[], maxTokens = 4096): Promise<LLMResponse> {
+        const token = getVertexToken();
+        const base = this.region === 'global'
+            ? 'https://aiplatform.googleapis.com'
+            : `https://${this.region}-aiplatform.googleapis.com`;
+        const url = `${base}/v1/projects/${this.projectId}/locations/${this.region}/endpoints/openapi/chat/completions`;
+
+        const body: Record<string, unknown> = {
+            model: this.modelId,
+            messages,
+            max_tokens: maxTokens,
+            temperature: this.temperature,
+            stream: false
+        };
+
+        if (tools && tools.length > 0) {
+            body.tools = tools;
+            body.tool_choice = 'auto';
+        }
+
+        const res = await fetch(url, {
+            method: 'POST',
+            headers: {
+                'Authorization': `Bearer ${token}`,
+                'Content-Type': 'application/json'
+            },
+            body: JSON.stringify(body)
+        });
+
+        if (!res.ok) {
+            const errText = await res.text();
+            // Force token refresh on 401
+            if (res.status === 401) _tokenExpiry = 0;
+            throw new Error(`Vertex API ${res.status}: ${errText.slice(0, 400)}`);
+        }
+
+        const data = await res.json() as any;
+        const choice = data.choices?.[0];
+        const message = choice?.message ?? {};
+
+        return {
+            content: message.content ?? null,
+            reasoning: message.reasoning_content ?? null,
+            tool_calls: message.tool_calls ?? [],
+            finish_reason: choice?.finish_reason ?? 'stop',
+            usage: data.usage
+        };
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Gemini client via @google/genai SDK
+// Used for: Tier A (fast/cheap routing, summaries, log parsing)
+// Converts to/from OpenAI message format internally.
+// ---------------------------------------------------------------------------
+
+export class GeminiClient implements LLMClient {
+    modelId: string;
+    private temperature: number;
+
+    constructor(modelId = 'gemini-2.5-flash', opts?: { temperature?: number }) {
+        this.modelId = modelId;
+        this.temperature = opts?.temperature ?? 0.2;
+    }
+
+    async chat(messages: LLMMessage[], tools?: LLMTool[], maxTokens = 8192): Promise<LLMResponse> {
+        const apiKey = process.env.GOOGLE_API_KEY;
+        if (!apiKey) throw new Error('GOOGLE_API_KEY not set');
+
+        const genai = new GoogleGenAI({ apiKey });
+
+        const systemMsg = messages.find(m => m.role === 'system');
+        const nonSystem = messages.filter(m => m.role !== 'system');
+
+        const functionDeclarations = (tools ?? []).map(t => ({
+            name: t.function.name,
+            description: t.function.description,
+            parameters: t.function.parameters as any
+        }));
+
+        const response = await genai.models.generateContent({
+            model: this.modelId,
+            contents: toGeminiContents(nonSystem),
+            config: {
+                systemInstruction: systemMsg?.content ?? undefined,
+                tools: functionDeclarations.length > 0 ? [{ functionDeclarations }] : undefined,
+                temperature: this.temperature,
+                maxOutputTokens: maxTokens
+            }
+        });
+
+        const candidate = response.candidates?.[0];
+        if (!candidate) throw new Error('No response from Gemini');
+
+        const parts = candidate.content?.parts ?? [];
+        const textContent = parts.filter(p => p.text).map(p => p.text).join('') || null;
+        const fnCalls = parts.filter(p => p.functionCall);
+
+        const tool_calls: LLMToolCall[] = fnCalls.map(p => ({
+            id: `call_${uuidv4().replace(/-/g, '').slice(0, 12)}`,
+            type: 'function' as const,
+            function: {
+                name: p.functionCall!.name ?? '',
+                arguments: JSON.stringify(p.functionCall!.args ?? {})
+            }
+        }));
+
+        return {
+            content: textContent,
+            reasoning: null,
+            tool_calls,
+            finish_reason: fnCalls.length > 0 ? 'tool_calls' : 'stop'
+        };
+    }
+}
+
+/** Convert OpenAI message format → Gemini Content[] format */
+function toGeminiContents(messages: LLMMessage[]): any[] {
+    const contents: any[] = [];
+    for (const msg of messages) {
+        if (msg.role === 'assistant') {
+            const parts: any[] = [];
+            if (msg.content) parts.push({ text: msg.content });
+            for (const tc of msg.tool_calls ?? []) {
+                parts.push({
+                    functionCall: {
+                        name: tc.function.name,
+                        args: JSON.parse(tc.function.arguments || '{}')
+                    }
+                });
+            }
+            contents.push({ role: 'model', parts });
+        } else if (msg.role === 'tool') {
+            // Parse content back — could be JSON or plain text
+            let resultValue: unknown = msg.content;
+            try { resultValue = JSON.parse(msg.content ?? 'null'); } catch { /* keep as string */ }
+            contents.push({
+                role: 'user',
+                parts: [{
+                    functionResponse: {
+                        name: msg.name ?? 'tool',
+                        response: { result: resultValue }
+                    }
+                }]
+            });
+        } else {
+            contents.push({ role: 'user', parts: [{ text: msg.content ?? '' }] });
+        }
+    }
+    return contents;
+}
+
+// ---------------------------------------------------------------------------
+// Factory — createLLM(modelId | tier)
+// ---------------------------------------------------------------------------
+
+export type ModelTier = 'A' | 'B' | 'C';
+
+const TIER_MODELS: Record<ModelTier, string> = {
+    A: process.env.TIER_A_MODEL ?? 'gemini-2.5-flash',
+    B: process.env.TIER_B_MODEL ?? 'zai-org/glm-5-maas',
+    C: process.env.TIER_C_MODEL ?? 'zai-org/glm-5-maas'
+};
+
+export function createLLM(modelOrTier: string | ModelTier, opts?: { temperature?: number }): LLMClient {
+    const modelId = (modelOrTier === 'A' || modelOrTier === 'B' || modelOrTier === 'C')
+        ? TIER_MODELS[modelOrTier]
+        : modelOrTier;
+
+    if (modelId.startsWith('gemini-')) {
+        return new GeminiClient(modelId, opts);
+    }
+
+    return new VertexOpenAIClient(modelId, { temperature: opts?.temperature });
+}
+
+// ---------------------------------------------------------------------------
+// Helper — convert our ToolDefinition[] → LLMTool[] (OpenAI format)
+// ---------------------------------------------------------------------------
+
+export function toOAITools(
+    tools: Array<{ name: string; description: string; parameters: Record<string, unknown> }>
+): LLMTool[] {
+    return tools.map(t => ({
+        type: 'function',
+        function: {
+            name: t.name,
+            description: t.description,
+            parameters: t.parameters
+        }
+    }));
+}