diff --git a/.env.example b/.env.example index 11f6867..1d77531 100644 --- a/.env.example +++ b/.env.example @@ -1,23 +1,48 @@ -# Google AI — required for all agents +# --------------------------------------------------------------------------- +# AI Models — 3-tier routing +# --------------------------------------------------------------------------- + +# Tier A — fast/cheap: routing, summaries, log parsing (Gemini Flash) +TIER_A_MODEL=gemini-2.5-flash + +# Tier B — workhorse coder: features, diffs, standard bug fixes (GLM-5 on Vertex) +TIER_B_MODEL=zai-org/glm-5-maas + +# Tier C — premium escalation: architecture decisions, complex debugging +# Options: zai-org/glm-5-maas | anthropic/claude-sonnet-4-6 +TIER_C_MODEL=zai-org/glm-5-maas + +# Orchestrator model (defaults to Tier B if not set) +ORCHESTRATOR_MODEL=B + +# Tier A fallback — Gemini API key (required if using gemini-* models) GOOGLE_API_KEY=your-gemini-api-key -# Gitea — required for git push, issue management +# GCP project for Vertex AI (GLM-5, Claude Sonnet via Vertex) +GCP_PROJECT_ID=master-ai-484822 + +# --------------------------------------------------------------------------- +# Gitea — required for git push and issue management +# --------------------------------------------------------------------------- GITEA_API_URL=https://git.vibnai.com GITEA_API_TOKEN=your-gitea-token GITEA_USERNAME=your-gitea-username +# --------------------------------------------------------------------------- # Coolify — required for deployment tools +# --------------------------------------------------------------------------- COOLIFY_API_URL=https://coolify.vibnai.com COOLIFY_API_TOKEN=your-coolify-token +# --------------------------------------------------------------------------- # Server config +# --------------------------------------------------------------------------- PORT=3333 -# Base path where agent workspaces are cloned -# Each repo gets a subfolder: /workspaces/owner_reponame +# Base path where agent workspaces are cloned (owner_reponame subdirs) WORKSPACE_BASE=/workspaces -# Optional: internal URL of this service (used by spawn_agent to self-call) +# Internal URL of this service (used by spawn_agent to self-call) AGENT_RUNNER_URL=http://localhost:3333 # Optional: shared secret for validating Gitea webhook POSTs diff --git a/src/agent-runner.ts b/src/agent-runner.ts index 602e2d8..3230c56 100644 --- a/src/agent-runner.ts +++ b/src/agent-runner.ts @@ -1,22 +1,22 @@ -import { GoogleGenAI, Content, Tool, FunctionDeclaration } from '@google/genai'; +import { createLLM, toOAITools, LLMMessage } from './llm'; import { AgentConfig } from './agents'; import { executeTool, ToolContext } from './tools'; import { Job, updateJob } from './job-store'; -const MAX_TURNS = 40; // safety cap — prevents infinite loops +const MAX_TURNS = 40; export interface RunResult { finalText: string; toolCallCount: number; turns: number; + model: string; } /** - * Core Gemini agent loop. + * Core agent execution loop — model-agnostic via the unified LLM client. * - * Sends the task to Gemini with the agent's system prompt and tools, - * then loops: execute tool calls → send results back → repeat until - * the model stops calling tools or MAX_TURNS is reached. + * Agents use their configured model tier (A/B/C) or a specific model ID. + * Tool calling uses OpenAI format throughout. */ export async function runAgent( job: Job, @@ -24,126 +24,79 @@ export async function runAgent( task: string, ctx: ToolContext ): Promise { - const apiKey = process.env.GOOGLE_API_KEY; - if (!apiKey) { - throw new Error('GOOGLE_API_KEY environment variable is not set'); - } + const llm = createLLM(config.model, { temperature: 0.2 }); + const oaiTools = toOAITools(config.tools); - const genai = new GoogleGenAI({ apiKey }); - - // Build Gemini function declarations from our tool definitions - const functionDeclarations: FunctionDeclaration[] = config.tools.map(tool => ({ - name: tool.name, - description: tool.description, - parameters: tool.parameters as any - })); - - const tools: Tool[] = functionDeclarations.length > 0 - ? [{ functionDeclarations }] - : []; - - const model = genai.models; - - // Build conversation history - const history: Content[] = []; - - // Initial user message - let currentMessage: Content = { - role: 'user', - parts: [{ text: task }] - }; + const history: LLMMessage[] = [ + { role: 'user', content: task } + ]; let toolCallCount = 0; let turn = 0; let finalText = ''; - updateJob(job.id, { status: 'running', progress: `Starting ${config.name} agent...` }); + updateJob(job.id, { status: 'running', progress: `Starting ${config.name} (${llm.modelId})…` }); while (turn < MAX_TURNS) { turn++; - // Add current message to history - history.push(currentMessage); + const messages: LLMMessage[] = [ + { role: 'system', content: config.systemPrompt }, + ...history + ]; - // Call Gemini - const response = await model.generateContent({ - model: config.model || 'gemini-2.0-flash', - contents: history, - config: { - systemInstruction: config.systemPrompt, - tools: tools.length > 0 ? tools : undefined, - temperature: 0.2, - maxOutputTokens: 8192 - } - }); + const response = await llm.chat(messages, oaiTools, 8192); - const candidate = response.candidates?.[0]; - if (!candidate) { - throw new Error('No response from Gemini'); - } - - // Add model response to history - const modelContent: Content = { - role: 'model', - parts: candidate.content?.parts || [] + // Build assistant message for history + const assistantMsg: LLMMessage = { + role: 'assistant', + content: response.content, + tool_calls: response.tool_calls.length > 0 ? response.tool_calls : undefined }; - history.push(modelContent); + history.push(assistantMsg); - // Extract function calls from the response - const functionCalls = candidate.content?.parts?.filter(p => p.functionCall) ?? []; - - if (functionCalls.length === 0) { - // No tool calls — the agent is done - finalText = candidate.content?.parts - ?.filter(p => p.text) - .map(p => p.text) - .join('') ?? ''; + // No tool calls — agent is done + if (response.tool_calls.length === 0) { + finalText = response.content ?? ''; break; } - // Execute all tool calls - const toolResultParts: any[] = []; - for (const part of functionCalls) { - const call = part.functionCall!; - const callName = call.name ?? 'unknown'; - const callArgs = (call.args ?? {}) as Record; - toolCallCount++; + // Execute tool calls + for (const tc of response.tool_calls) { + const fnName = tc.function.name; + let fnArgs: Record = {}; + try { fnArgs = JSON.parse(tc.function.arguments || '{}'); } catch { /* bad JSON */ } + toolCallCount++; updateJob(job.id, { - progress: `Turn ${turn}: calling ${callName}...`, + progress: `Turn ${turn}: calling ${fnName}…`, toolCalls: [...(job.toolCalls || []), { turn, - tool: callName, - args: callArgs, + tool: fnName, + args: fnArgs, timestamp: new Date().toISOString() }] }); let result: unknown; try { - result = await executeTool(callName, callArgs, ctx); + result = await executeTool(fnName, fnArgs, ctx); } catch (err) { result = { error: err instanceof Error ? err.message : String(err) }; } - toolResultParts.push({ - functionResponse: { - name: callName, - response: { result } - } + history.push({ + role: 'tool', + tool_call_id: tc.id, + name: fnName, + content: typeof result === 'string' ? result : JSON.stringify(result) }); } - - // Next turn: send tool results back to the model - currentMessage = { - role: 'user', - parts: toolResultParts - }; } if (turn >= MAX_TURNS && !finalText) { - finalText = `Agent reached the ${MAX_TURNS}-turn safety limit. Last tool call count: ${toolCallCount}.`; + finalText = `Agent hit the ${MAX_TURNS}-turn safety limit. Tool calls made: ${toolCallCount}.`; } - return { finalText, toolCallCount, turns: turn }; + return { finalText, toolCallCount, turns: turn, model: llm.modelId }; } diff --git a/src/agents.ts b/src/agents.ts index 952706b..ea2cec9 100644 --- a/src/agents.ts +++ b/src/agents.ts @@ -1,23 +1,23 @@ import { ToolDefinition, ALL_TOOLS } from './tools'; // --------------------------------------------------------------------------- -// Agent configuration — which tools each agent gets + system prompt +// Agent configuration // --------------------------------------------------------------------------- export interface AgentConfig { name: string; description: string; - model: string; + model: string; // model ID or tier ('A' | 'B' | 'C') systemPrompt: string; tools: ToolDefinition[]; } -const FILE_TOOLS = ['read_file', 'write_file', 'replace_in_file', 'list_directory', 'find_files', 'search_code']; -const SHELL_TOOLS = ['execute_command']; -const GIT_TOOLS = ['git_commit_and_push']; +const FILE_TOOLS = ['read_file', 'write_file', 'replace_in_file', 'list_directory', 'find_files', 'search_code']; +const SHELL_TOOLS = ['execute_command']; +const GIT_TOOLS = ['git_commit_and_push']; const COOLIFY_TOOLS = ['coolify_list_projects', 'coolify_list_applications', 'coolify_deploy', 'coolify_get_logs']; -const GITEA_TOOLS = ['gitea_create_issue', 'gitea_list_issues', 'gitea_close_issue']; -const SPAWN_TOOL = ['spawn_agent']; +const GITEA_TOOLS = ['gitea_create_issue', 'gitea_list_issues', 'gitea_close_issue']; +const SPAWN_TOOL = ['spawn_agent']; function pick(names: string[]): ToolDefinition[] { return ALL_TOOLS.filter(t => names.includes(t.name)); @@ -25,112 +25,109 @@ function pick(names: string[]): ToolDefinition[] { // --------------------------------------------------------------------------- // Agent definitions +// +// model is a tier ('A' | 'B' | 'C') or a specific model ID. +// Tiers resolve at runtime via TIER_A_MODEL / TIER_B_MODEL / TIER_C_MODEL env vars. +// +// Tier A = gemini-2.5-flash — fast, cheap: routing, summaries, monitoring +// Tier B = zai-org/glm-5-maas — workhorse coding model +// Tier C = zai-org/glm-5-maas — complex decisions (or Claude Sonnet via TIER_C_MODEL) // --------------------------------------------------------------------------- export const AGENTS: Record = { + Orchestrator: { name: 'Orchestrator', - description: 'Master coordinator that breaks down high-level goals and delegates to specialist agents', - model: 'gemini-2.5-flash', - systemPrompt: `You are the Orchestrator for Vibn, an autonomous AI system for software development. + description: 'Master coordinator — breaks down goals and delegates to specialist agents', + model: 'B', // GLM-5 — good planner, chain-of-thought reasoning + systemPrompt: `You are the Orchestrator for Vibn, an autonomous AI platform for software development. -Your role is to: -1. Understand the high-level goal provided in the task. -2. Break it down into concrete sub-tasks. -3. Delegate sub-tasks to the appropriate specialist agents using the spawn_agent tool. -4. Use Gitea to track progress: create an issue at the start, close it when done. -5. Summarize what was done when complete. +Your role: +1. Understand the high-level goal. +2. Break it into concrete sub-tasks. +3. Delegate to the right specialist agents via spawn_agent. +4. Track progress via Gitea issues. +5. Summarize results when done. -Available specialist agents and when to use them: -- **Coder**: Any code changes — features, bug fixes, refactors, tests. -- **PM**: Project management — issue triage, sprint planning, documentation updates. -- **Marketing**: Content and copy — blog posts, landing page copy, release notes. +Agents available: +- Coder: code changes, features, bug fixes, tests. +- PM: issue triage, docs, sprint planning. +- Marketing: copy, blog posts, release notes. Rules: -- Always create a Gitea issue first to track the work. -- Delegate to ONE agent at a time unless tasks are fully independent. -- Check back on progress by listing issues. -- Never try to write code yourself — delegate to Coder. -- Be concise in your task descriptions when spawning agents.`, +- Create a Gitea issue first to track the work. +- Delegate one agent at a time unless tasks are fully independent. +- Never write code yourself — delegate to Coder. +- Be specific in task descriptions when spawning agents.`, tools: pick([...GITEA_TOOLS, ...SPAWN_TOOL, ...COOLIFY_TOOLS]) }, Coder: { name: 'Coder', - description: 'Senior software engineer — writes, edits, and tests code. Commits and pushes when done.', - model: 'gemini-2.5-flash', - systemPrompt: `You are an expert senior software engineer working autonomously on a git repository. + description: 'Senior software engineer — writes, edits, tests, commits, and pushes code', + model: 'B', // GLM-5 — strong at code generation and diffs + systemPrompt: `You are an expert senior software engineer working autonomously on a Git repository. -Your job is to complete the coding task given to you. Follow these rules: - -**Workflow:** -1. Start by exploring the codebase: list_directory, find_files, read_file to understand structure. -2. Search for relevant code: search_code to find existing patterns. +Workflow: +1. Explore the codebase: list_directory, find_files, read_file. +2. Search for patterns: search_code. 3. Plan your changes before making them. 4. Read every file BEFORE editing it. 5. Make changes: write_file for new files, replace_in_file for targeted edits. -6. Run tests or lint if applicable: execute_command. -7. Commit and push when the task is complete: git_commit_and_push. +6. Run tests/lint if applicable: execute_command. +7. Commit and push when complete: git_commit_and_push. -**Code quality rules:** -- Match existing code style exactly. -- Never leave TODO comments — implement or skip. +Code quality: +- Match existing style exactly. +- No TODO comments — implement or skip. - Write complete files, not partial snippets. -- If tests exist, run them and fix failures before committing. -- Commit message should be concise and in imperative mood (e.g. "add user authentication"). +- Run tests and fix failures before committing. +- Commit messages: imperative mood, concise (e.g. "add user authentication"). -**Safety rules:** -- Never delete files unless explicitly instructed. -- Never modify .env files or credentials. +Safety: +- Never delete files unless explicitly told to. +- Never touch .env files or credentials. - Never commit secrets or API keys. -**If you were triggered by a Gitea issue:** -- After committing, close the issue using gitea_close_issue. -- The repo name is in the format "owner/name". - -Be methodical. Read before you write. Test before you commit.`, +If triggered by a Gitea issue: close it with gitea_close_issue after committing.`, tools: pick([...FILE_TOOLS, ...SHELL_TOOLS, ...GIT_TOOLS, ...GITEA_TOOLS]) }, PM: { name: 'PM', - description: 'Product manager — manages Gitea issues, writes documentation, tracks project health', - model: 'gemini-2.5-flash', + description: 'Product manager — docs, issue management, project health reports', + model: 'A', // Gemini Flash — lightweight, cheap for docs/issue work systemPrompt: `You are an autonomous Product Manager for a software project hosted on Gitea. -Your responsibilities: -1. Create, update, and close Gitea issues to track work. -2. Write and update documentation files in the repository. +Responsibilities: +1. Create, update, and close Gitea issues. +2. Write and update docs in the repository. 3. Summarize project state and create reports. -4. Prioritize and triage bugs/features based on impact. +4. Triage bugs and features by impact. -When writing documentation: -- Be clear and concise. -- Use markdown formatting. -- Focus on what users and developers need to know. -- Keep docs up to date with the actual codebase state. - -Always commit documentation updates after writing them.`, +When writing docs: +- Clear and concise. +- Markdown formatting. +- Keep docs in sync with the codebase. +- Always commit after writing.`, tools: pick([...GITEA_TOOLS, ...FILE_TOOLS, ...GIT_TOOLS]) }, Marketing: { name: 'Marketing', - description: 'Marketing specialist — writes copy, blog posts, release notes, and landing page content', - model: 'gemini-2.5-flash', + description: 'Marketing specialist — copy, blog posts, release notes, landing page content', + model: 'A', // Gemini Flash — cheap for content generation systemPrompt: `You are an autonomous Marketing specialist for a SaaS product called Vibn. -Vibn is a cloud-based AI-powered development environment. It helps development teams build faster with AI agents that can write code, manage projects, and deploy automatically. +Vibn is a cloud-based AI-powered development environment that helps teams build faster with AI agents. -Your responsibilities: -1. Write compelling marketing copy for landing pages, email campaigns, and social media. -2. Write technical blog posts that explain features in an accessible way. +Responsibilities: +1. Write landing page copy, emails, and social media content. +2. Write technical blog posts explaining features accessibly. 3. Write release notes that highlight user-facing value. -4. Ensure all copy is on-brand: professional, clear, forward-thinking, and developer-friendly. +4. Maintain brand voice: smart, confident, practical. No hype, no jargon. -Brand voice: Smart, confident, practical. No hype. No jargon. Show don't tell. - -When writing content, create actual files in the repository (e.g. blog/2026-02-release.md) and commit them.`, +Always create real files in the repo (e.g. blog/2026-02-release.md) and commit them.`, tools: pick([...FILE_TOOLS, ...GIT_TOOLS]) } }; diff --git a/src/llm.ts b/src/llm.ts new file mode 100644 index 0000000..0cc429d --- /dev/null +++ b/src/llm.ts @@ -0,0 +1,285 @@ +import { execSync } from 'child_process'; +import { GoogleGenAI } from '@google/genai'; +import { v4 as uuidv4 } from 'uuid'; + +// ============================================================================= +// Unified LLM client — OpenAI-compatible message format throughout +// +// Two backends: +// VertexOpenAIClient — for GLM-5, Claude Sonnet, etc. via Vertex global endpoint +// GeminiFlashClient — for Gemini Flash/Pro via @google/genai SDK +// +// Model tier defaults (overridable via env): +// Tier A: gemini-2.5-flash ($0.15/$0.60 per 1M) — routing, summaries, log parsing +// Tier B: zai-org/glm-5-maas ($1.00/$3.20 per 1M) — coding, feature work +// Tier C: zai-org/glm-5-maas ($1.00/$3.20 per 1M) — complex decisions, escalation +// ============================================================================= + +// --------------------------------------------------------------------------- +// Shared message types (OpenAI format — used everywhere internally) +// --------------------------------------------------------------------------- + +export interface LLMMessage { + role: 'system' | 'user' | 'assistant' | 'tool'; + content: string | null; + tool_calls?: LLMToolCall[]; + tool_call_id?: string; // set on role=tool messages + name?: string; // function name on role=tool messages +} + +export interface LLMToolCall { + id: string; + type: 'function'; + function: { + name: string; + arguments: string; // JSON-encoded string + }; +} + +export interface LLMTool { + type: 'function'; + function: { + name: string; + description: string; + parameters: Record; + }; +} + +export interface LLMResponse { + content: string | null; + reasoning: string | null; // GLM-5 chain-of-thought + tool_calls: LLMToolCall[]; + finish_reason: string; + usage?: { + prompt_tokens: number; + completion_tokens: number; + total_tokens: number; + }; +} + +export interface LLMClient { + modelId: string; + chat(messages: LLMMessage[], tools?: LLMTool[], maxTokens?: number): Promise; +} + +// --------------------------------------------------------------------------- +// Vertex AI OpenAI-compatible client +// Used for: zai-org/glm-5-maas, anthropic/claude-sonnet-4-6, etc. +// --------------------------------------------------------------------------- + +let _cachedToken = ''; +let _tokenExpiry = 0; + +function getVertexToken(): string { + const now = Date.now(); + if (_cachedToken && now < _tokenExpiry) return _cachedToken; + _cachedToken = execSync('gcloud auth print-access-token', { encoding: 'utf8' }).trim(); + _tokenExpiry = now + 55 * 60 * 1000; // tokens last 1hr, refresh at 55min + return _cachedToken; +} + +export class VertexOpenAIClient implements LLMClient { + modelId: string; + private projectId: string; + private region: string; + private temperature: number; + + constructor(modelId: string, opts?: { projectId?: string; region?: string; temperature?: number }) { + this.modelId = modelId; + this.projectId = opts?.projectId ?? process.env.GCP_PROJECT_ID ?? 'master-ai-484822'; + this.region = opts?.region ?? 'global'; + this.temperature = opts?.temperature ?? 0.3; + } + + async chat(messages: LLMMessage[], tools?: LLMTool[], maxTokens = 4096): Promise { + const token = getVertexToken(); + const base = this.region === 'global' + ? 'https://aiplatform.googleapis.com' + : `https://${this.region}-aiplatform.googleapis.com`; + const url = `${base}/v1/projects/${this.projectId}/locations/${this.region}/endpoints/openapi/chat/completions`; + + const body: Record = { + model: this.modelId, + messages, + max_tokens: maxTokens, + temperature: this.temperature, + stream: false + }; + + if (tools && tools.length > 0) { + body.tools = tools; + body.tool_choice = 'auto'; + } + + const res = await fetch(url, { + method: 'POST', + headers: { + 'Authorization': `Bearer ${token}`, + 'Content-Type': 'application/json' + }, + body: JSON.stringify(body) + }); + + if (!res.ok) { + const errText = await res.text(); + // Force token refresh on 401 + if (res.status === 401) _tokenExpiry = 0; + throw new Error(`Vertex API ${res.status}: ${errText.slice(0, 400)}`); + } + + const data = await res.json() as any; + const choice = data.choices?.[0]; + const message = choice?.message ?? {}; + + return { + content: message.content ?? null, + reasoning: message.reasoning_content ?? null, + tool_calls: message.tool_calls ?? [], + finish_reason: choice?.finish_reason ?? 'stop', + usage: data.usage + }; + } +} + +// --------------------------------------------------------------------------- +// Gemini client via @google/genai SDK +// Used for: Tier A (fast/cheap routing, summaries, log parsing) +// Converts to/from OpenAI message format internally. +// --------------------------------------------------------------------------- + +export class GeminiClient implements LLMClient { + modelId: string; + private temperature: number; + + constructor(modelId = 'gemini-2.5-flash', opts?: { temperature?: number }) { + this.modelId = modelId; + this.temperature = opts?.temperature ?? 0.2; + } + + async chat(messages: LLMMessage[], tools?: LLMTool[], maxTokens = 8192): Promise { + const apiKey = process.env.GOOGLE_API_KEY; + if (!apiKey) throw new Error('GOOGLE_API_KEY not set'); + + const genai = new GoogleGenAI({ apiKey }); + + const systemMsg = messages.find(m => m.role === 'system'); + const nonSystem = messages.filter(m => m.role !== 'system'); + + const functionDeclarations = (tools ?? []).map(t => ({ + name: t.function.name, + description: t.function.description, + parameters: t.function.parameters as any + })); + + const response = await genai.models.generateContent({ + model: this.modelId, + contents: toGeminiContents(nonSystem), + config: { + systemInstruction: systemMsg?.content ?? undefined, + tools: functionDeclarations.length > 0 ? [{ functionDeclarations }] : undefined, + temperature: this.temperature, + maxOutputTokens: maxTokens + } + }); + + const candidate = response.candidates?.[0]; + if (!candidate) throw new Error('No response from Gemini'); + + const parts = candidate.content?.parts ?? []; + const textContent = parts.filter(p => p.text).map(p => p.text).join('') || null; + const fnCalls = parts.filter(p => p.functionCall); + + const tool_calls: LLMToolCall[] = fnCalls.map(p => ({ + id: `call_${uuidv4().replace(/-/g, '').slice(0, 12)}`, + type: 'function' as const, + function: { + name: p.functionCall!.name ?? '', + arguments: JSON.stringify(p.functionCall!.args ?? {}) + } + })); + + return { + content: textContent, + reasoning: null, + tool_calls, + finish_reason: fnCalls.length > 0 ? 'tool_calls' : 'stop' + }; + } +} + +/** Convert OpenAI message format → Gemini Content[] format */ +function toGeminiContents(messages: LLMMessage[]): any[] { + const contents: any[] = []; + for (const msg of messages) { + if (msg.role === 'assistant') { + const parts: any[] = []; + if (msg.content) parts.push({ text: msg.content }); + for (const tc of msg.tool_calls ?? []) { + parts.push({ + functionCall: { + name: tc.function.name, + args: JSON.parse(tc.function.arguments || '{}') + } + }); + } + contents.push({ role: 'model', parts }); + } else if (msg.role === 'tool') { + // Parse content back — could be JSON or plain text + let resultValue: unknown = msg.content; + try { resultValue = JSON.parse(msg.content ?? 'null'); } catch { /* keep as string */ } + contents.push({ + role: 'user', + parts: [{ + functionResponse: { + name: msg.name ?? 'tool', + response: { result: resultValue } + } + }] + }); + } else { + contents.push({ role: 'user', parts: [{ text: msg.content ?? '' }] }); + } + } + return contents; +} + +// --------------------------------------------------------------------------- +// Factory — createLLM(modelId | tier) +// --------------------------------------------------------------------------- + +export type ModelTier = 'A' | 'B' | 'C'; + +const TIER_MODELS: Record = { + A: process.env.TIER_A_MODEL ?? 'gemini-2.5-flash', + B: process.env.TIER_B_MODEL ?? 'zai-org/glm-5-maas', + C: process.env.TIER_C_MODEL ?? 'zai-org/glm-5-maas' +}; + +export function createLLM(modelOrTier: string | ModelTier, opts?: { temperature?: number }): LLMClient { + const modelId = (modelOrTier === 'A' || modelOrTier === 'B' || modelOrTier === 'C') + ? TIER_MODELS[modelOrTier] + : modelOrTier; + + if (modelId.startsWith('gemini-')) { + return new GeminiClient(modelId, opts); + } + + return new VertexOpenAIClient(modelId, { temperature: opts?.temperature }); +} + +// --------------------------------------------------------------------------- +// Helper — convert our ToolDefinition[] → LLMTool[] (OpenAI format) +// --------------------------------------------------------------------------- + +export function toOAITools( + tools: Array<{ name: string; description: string; parameters: Record }> +): LLMTool[] { + return tools.map(t => ({ + type: 'function', + function: { + name: t.name, + description: t.description, + parameters: t.parameters + } + })); +}