fix: add all missing source files (llm.ts, updated agent-runner/agents)

src/llm.ts was never committed — this caused the Docker build to fail
with "Cannot find module './llm'". Also commit updated agent-runner.ts,
agents.ts, and .env.example that reference the new LLM client.

Made-with: Cursor
This commit is contained in:
2026-02-27 19:33:59 -08:00
parent f2fc976637
commit 59fe313963
4 changed files with 426 additions and 166 deletions

View File

@@ -1,23 +1,48 @@
# Google AI — required for all agents # ---------------------------------------------------------------------------
# AI Models — 3-tier routing
# ---------------------------------------------------------------------------
# Tier A — fast/cheap: routing, summaries, log parsing (Gemini Flash)
TIER_A_MODEL=gemini-2.5-flash
# Tier B — workhorse coder: features, diffs, standard bug fixes (GLM-5 on Vertex)
TIER_B_MODEL=zai-org/glm-5-maas
# Tier C — premium escalation: architecture decisions, complex debugging
# Options: zai-org/glm-5-maas | anthropic/claude-sonnet-4-6
TIER_C_MODEL=zai-org/glm-5-maas
# Orchestrator model (defaults to Tier B if not set)
ORCHESTRATOR_MODEL=B
# Tier A fallback — Gemini API key (required if using gemini-* models)
GOOGLE_API_KEY=your-gemini-api-key GOOGLE_API_KEY=your-gemini-api-key
# Gitea — required for git push, issue management # GCP project for Vertex AI (GLM-5, Claude Sonnet via Vertex)
GCP_PROJECT_ID=master-ai-484822
# ---------------------------------------------------------------------------
# Gitea — required for git push and issue management
# ---------------------------------------------------------------------------
GITEA_API_URL=https://git.vibnai.com GITEA_API_URL=https://git.vibnai.com
GITEA_API_TOKEN=your-gitea-token GITEA_API_TOKEN=your-gitea-token
GITEA_USERNAME=your-gitea-username GITEA_USERNAME=your-gitea-username
# ---------------------------------------------------------------------------
# Coolify — required for deployment tools # Coolify — required for deployment tools
# ---------------------------------------------------------------------------
COOLIFY_API_URL=https://coolify.vibnai.com COOLIFY_API_URL=https://coolify.vibnai.com
COOLIFY_API_TOKEN=your-coolify-token COOLIFY_API_TOKEN=your-coolify-token
# ---------------------------------------------------------------------------
# Server config # Server config
# ---------------------------------------------------------------------------
PORT=3333 PORT=3333
# Base path where agent workspaces are cloned # Base path where agent workspaces are cloned (owner_reponame subdirs)
# Each repo gets a subfolder: /workspaces/owner_reponame
WORKSPACE_BASE=/workspaces WORKSPACE_BASE=/workspaces
# Optional: internal URL of this service (used by spawn_agent to self-call) # Internal URL of this service (used by spawn_agent to self-call)
AGENT_RUNNER_URL=http://localhost:3333 AGENT_RUNNER_URL=http://localhost:3333
# Optional: shared secret for validating Gitea webhook POSTs # Optional: shared secret for validating Gitea webhook POSTs

View File

@@ -1,22 +1,22 @@
import { GoogleGenAI, Content, Tool, FunctionDeclaration } from '@google/genai'; import { createLLM, toOAITools, LLMMessage } from './llm';
import { AgentConfig } from './agents'; import { AgentConfig } from './agents';
import { executeTool, ToolContext } from './tools'; import { executeTool, ToolContext } from './tools';
import { Job, updateJob } from './job-store'; import { Job, updateJob } from './job-store';
const MAX_TURNS = 40; // safety cap — prevents infinite loops const MAX_TURNS = 40;
export interface RunResult { export interface RunResult {
finalText: string; finalText: string;
toolCallCount: number; toolCallCount: number;
turns: number; turns: number;
model: string;
} }
/** /**
* Core Gemini agent loop. * Core agent execution loop — model-agnostic via the unified LLM client.
* *
* Sends the task to Gemini with the agent's system prompt and tools, * Agents use their configured model tier (A/B/C) or a specific model ID.
* then loops: execute tool calls → send results back → repeat until * Tool calling uses OpenAI format throughout.
* the model stops calling tools or MAX_TURNS is reached.
*/ */
export async function runAgent( export async function runAgent(
job: Job, job: Job,
@@ -24,126 +24,79 @@ export async function runAgent(
task: string, task: string,
ctx: ToolContext ctx: ToolContext
): Promise<RunResult> { ): Promise<RunResult> {
const apiKey = process.env.GOOGLE_API_KEY; const llm = createLLM(config.model, { temperature: 0.2 });
if (!apiKey) { const oaiTools = toOAITools(config.tools);
throw new Error('GOOGLE_API_KEY environment variable is not set');
}
const genai = new GoogleGenAI({ apiKey }); const history: LLMMessage[] = [
{ role: 'user', content: task }
// Build Gemini function declarations from our tool definitions ];
const functionDeclarations: FunctionDeclaration[] = config.tools.map(tool => ({
name: tool.name,
description: tool.description,
parameters: tool.parameters as any
}));
const tools: Tool[] = functionDeclarations.length > 0
? [{ functionDeclarations }]
: [];
const model = genai.models;
// Build conversation history
const history: Content[] = [];
// Initial user message
let currentMessage: Content = {
role: 'user',
parts: [{ text: task }]
};
let toolCallCount = 0; let toolCallCount = 0;
let turn = 0; let turn = 0;
let finalText = ''; let finalText = '';
updateJob(job.id, { status: 'running', progress: `Starting ${config.name} agent...` }); updateJob(job.id, { status: 'running', progress: `Starting ${config.name} (${llm.modelId})…` });
while (turn < MAX_TURNS) { while (turn < MAX_TURNS) {
turn++; turn++;
// Add current message to history const messages: LLMMessage[] = [
history.push(currentMessage); { role: 'system', content: config.systemPrompt },
...history
];
// Call Gemini const response = await llm.chat(messages, oaiTools, 8192);
const response = await model.generateContent({
model: config.model || 'gemini-2.0-flash',
contents: history,
config: {
systemInstruction: config.systemPrompt,
tools: tools.length > 0 ? tools : undefined,
temperature: 0.2,
maxOutputTokens: 8192
}
});
const candidate = response.candidates?.[0]; // Build assistant message for history
if (!candidate) { const assistantMsg: LLMMessage = {
throw new Error('No response from Gemini'); role: 'assistant',
} content: response.content,
tool_calls: response.tool_calls.length > 0 ? response.tool_calls : undefined
// Add model response to history
const modelContent: Content = {
role: 'model',
parts: candidate.content?.parts || []
}; };
history.push(modelContent); history.push(assistantMsg);
// Extract function calls from the response // No tool calls — agent is done
const functionCalls = candidate.content?.parts?.filter(p => p.functionCall) ?? []; if (response.tool_calls.length === 0) {
finalText = response.content ?? '';
if (functionCalls.length === 0) {
// No tool calls — the agent is done
finalText = candidate.content?.parts
?.filter(p => p.text)
.map(p => p.text)
.join('') ?? '';
break; break;
} }
// Execute all tool calls // Execute tool calls
const toolResultParts: any[] = []; for (const tc of response.tool_calls) {
for (const part of functionCalls) { const fnName = tc.function.name;
const call = part.functionCall!; let fnArgs: Record<string, unknown> = {};
const callName = call.name ?? 'unknown'; try { fnArgs = JSON.parse(tc.function.arguments || '{}'); } catch { /* bad JSON */ }
const callArgs = (call.args ?? {}) as Record<string, unknown>;
toolCallCount++;
toolCallCount++;
updateJob(job.id, { updateJob(job.id, {
progress: `Turn ${turn}: calling ${callName}...`, progress: `Turn ${turn}: calling ${fnName}`,
toolCalls: [...(job.toolCalls || []), { toolCalls: [...(job.toolCalls || []), {
turn, turn,
tool: callName, tool: fnName,
args: callArgs, args: fnArgs,
timestamp: new Date().toISOString() timestamp: new Date().toISOString()
}] }]
}); });
let result: unknown; let result: unknown;
try { try {
result = await executeTool(callName, callArgs, ctx); result = await executeTool(fnName, fnArgs, ctx);
} catch (err) { } catch (err) {
result = { error: err instanceof Error ? err.message : String(err) }; result = { error: err instanceof Error ? err.message : String(err) };
} }
toolResultParts.push({ history.push({
functionResponse: { role: 'tool',
name: callName, tool_call_id: tc.id,
response: { result } name: fnName,
} content: typeof result === 'string' ? result : JSON.stringify(result)
}); });
} }
// Next turn: send tool results back to the model
currentMessage = {
role: 'user',
parts: toolResultParts
};
} }
if (turn >= MAX_TURNS && !finalText) { if (turn >= MAX_TURNS && !finalText) {
finalText = `Agent reached the ${MAX_TURNS}-turn safety limit. Last tool call count: ${toolCallCount}.`; finalText = `Agent hit the ${MAX_TURNS}-turn safety limit. Tool calls made: ${toolCallCount}.`;
} }
return { finalText, toolCallCount, turns: turn }; return { finalText, toolCallCount, turns: turn, model: llm.modelId };
} }

View File

@@ -1,23 +1,23 @@
import { ToolDefinition, ALL_TOOLS } from './tools'; import { ToolDefinition, ALL_TOOLS } from './tools';
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
// Agent configuration — which tools each agent gets + system prompt // Agent configuration
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
export interface AgentConfig { export interface AgentConfig {
name: string; name: string;
description: string; description: string;
model: string; model: string; // model ID or tier ('A' | 'B' | 'C')
systemPrompt: string; systemPrompt: string;
tools: ToolDefinition[]; tools: ToolDefinition[];
} }
const FILE_TOOLS = ['read_file', 'write_file', 'replace_in_file', 'list_directory', 'find_files', 'search_code']; const FILE_TOOLS = ['read_file', 'write_file', 'replace_in_file', 'list_directory', 'find_files', 'search_code'];
const SHELL_TOOLS = ['execute_command']; const SHELL_TOOLS = ['execute_command'];
const GIT_TOOLS = ['git_commit_and_push']; const GIT_TOOLS = ['git_commit_and_push'];
const COOLIFY_TOOLS = ['coolify_list_projects', 'coolify_list_applications', 'coolify_deploy', 'coolify_get_logs']; const COOLIFY_TOOLS = ['coolify_list_projects', 'coolify_list_applications', 'coolify_deploy', 'coolify_get_logs'];
const GITEA_TOOLS = ['gitea_create_issue', 'gitea_list_issues', 'gitea_close_issue']; const GITEA_TOOLS = ['gitea_create_issue', 'gitea_list_issues', 'gitea_close_issue'];
const SPAWN_TOOL = ['spawn_agent']; const SPAWN_TOOL = ['spawn_agent'];
function pick(names: string[]): ToolDefinition[] { function pick(names: string[]): ToolDefinition[] {
return ALL_TOOLS.filter(t => names.includes(t.name)); return ALL_TOOLS.filter(t => names.includes(t.name));
@@ -25,112 +25,109 @@ function pick(names: string[]): ToolDefinition[] {
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
// Agent definitions // Agent definitions
//
// model is a tier ('A' | 'B' | 'C') or a specific model ID.
// Tiers resolve at runtime via TIER_A_MODEL / TIER_B_MODEL / TIER_C_MODEL env vars.
//
// Tier A = gemini-2.5-flash — fast, cheap: routing, summaries, monitoring
// Tier B = zai-org/glm-5-maas — workhorse coding model
// Tier C = zai-org/glm-5-maas — complex decisions (or Claude Sonnet via TIER_C_MODEL)
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
export const AGENTS: Record<string, AgentConfig> = { export const AGENTS: Record<string, AgentConfig> = {
Orchestrator: { Orchestrator: {
name: 'Orchestrator', name: 'Orchestrator',
description: 'Master coordinator that breaks down high-level goals and delegates to specialist agents', description: 'Master coordinator breaks down goals and delegates to specialist agents',
model: 'gemini-2.5-flash', model: 'B', // GLM-5 — good planner, chain-of-thought reasoning
systemPrompt: `You are the Orchestrator for Vibn, an autonomous AI system for software development. systemPrompt: `You are the Orchestrator for Vibn, an autonomous AI platform for software development.
Your role is to: Your role:
1. Understand the high-level goal provided in the task. 1. Understand the high-level goal.
2. Break it down into concrete sub-tasks. 2. Break it into concrete sub-tasks.
3. Delegate sub-tasks to the appropriate specialist agents using the spawn_agent tool. 3. Delegate to the right specialist agents via spawn_agent.
4. Use Gitea to track progress: create an issue at the start, close it when done. 4. Track progress via Gitea issues.
5. Summarize what was done when complete. 5. Summarize results when done.
Available specialist agents and when to use them: Agents available:
- **Coder**: Any code changes features, bug fixes, refactors, tests. - Coder: code changes, features, bug fixes, tests.
- **PM**: Project management — issue triage, sprint planning, documentation updates. - PM: issue triage, docs, sprint planning.
- **Marketing**: Content and copy blog posts, landing page copy, release notes. - Marketing: copy, blog posts, release notes.
Rules: Rules:
- Always create a Gitea issue first to track the work. - Create a Gitea issue first to track the work.
- Delegate to ONE agent at a time unless tasks are fully independent. - Delegate one agent at a time unless tasks are fully independent.
- Check back on progress by listing issues. - Never write code yourself — delegate to Coder.
- Never try to write code yourself — delegate to Coder. - Be specific in task descriptions when spawning agents.`,
- Be concise in your task descriptions when spawning agents.`,
tools: pick([...GITEA_TOOLS, ...SPAWN_TOOL, ...COOLIFY_TOOLS]) tools: pick([...GITEA_TOOLS, ...SPAWN_TOOL, ...COOLIFY_TOOLS])
}, },
Coder: { Coder: {
name: 'Coder', name: 'Coder',
description: 'Senior software engineer — writes, edits, and tests code. Commits and pushes when done.', description: 'Senior software engineer — writes, edits, tests, commits, and pushes code',
model: 'gemini-2.5-flash', model: 'B', // GLM-5 — strong at code generation and diffs
systemPrompt: `You are an expert senior software engineer working autonomously on a git repository. systemPrompt: `You are an expert senior software engineer working autonomously on a Git repository.
Your job is to complete the coding task given to you. Follow these rules: Workflow:
1. Explore the codebase: list_directory, find_files, read_file.
**Workflow:** 2. Search for patterns: search_code.
1. Start by exploring the codebase: list_directory, find_files, read_file to understand structure.
2. Search for relevant code: search_code to find existing patterns.
3. Plan your changes before making them. 3. Plan your changes before making them.
4. Read every file BEFORE editing it. 4. Read every file BEFORE editing it.
5. Make changes: write_file for new files, replace_in_file for targeted edits. 5. Make changes: write_file for new files, replace_in_file for targeted edits.
6. Run tests or lint if applicable: execute_command. 6. Run tests/lint if applicable: execute_command.
7. Commit and push when the task is complete: git_commit_and_push. 7. Commit and push when complete: git_commit_and_push.
**Code quality rules:** Code quality:
- Match existing code style exactly. - Match existing style exactly.
- Never leave TODO comments — implement or skip. - No TODO comments — implement or skip.
- Write complete files, not partial snippets. - Write complete files, not partial snippets.
- If tests exist, run them and fix failures before committing. - Run tests and fix failures before committing.
- Commit message should be concise and in imperative mood (e.g. "add user authentication"). - Commit messages: imperative mood, concise (e.g. "add user authentication").
**Safety rules:** Safety:
- Never delete files unless explicitly instructed. - Never delete files unless explicitly told to.
- Never modify .env files or credentials. - Never touch .env files or credentials.
- Never commit secrets or API keys. - Never commit secrets or API keys.
**If you were triggered by a Gitea issue:** If triggered by a Gitea issue: close it with gitea_close_issue after committing.`,
- After committing, close the issue using gitea_close_issue.
- The repo name is in the format "owner/name".
Be methodical. Read before you write. Test before you commit.`,
tools: pick([...FILE_TOOLS, ...SHELL_TOOLS, ...GIT_TOOLS, ...GITEA_TOOLS]) tools: pick([...FILE_TOOLS, ...SHELL_TOOLS, ...GIT_TOOLS, ...GITEA_TOOLS])
}, },
PM: { PM: {
name: 'PM', name: 'PM',
description: 'Product manager — manages Gitea issues, writes documentation, tracks project health', description: 'Product manager — docs, issue management, project health reports',
model: 'gemini-2.5-flash', model: 'A', // Gemini Flash — lightweight, cheap for docs/issue work
systemPrompt: `You are an autonomous Product Manager for a software project hosted on Gitea. systemPrompt: `You are an autonomous Product Manager for a software project hosted on Gitea.
Your responsibilities: Responsibilities:
1. Create, update, and close Gitea issues to track work. 1. Create, update, and close Gitea issues.
2. Write and update documentation files in the repository. 2. Write and update docs in the repository.
3. Summarize project state and create reports. 3. Summarize project state and create reports.
4. Prioritize and triage bugs/features based on impact. 4. Triage bugs and features by impact.
When writing documentation: When writing docs:
- Be clear and concise. - Clear and concise.
- Use markdown formatting. - Markdown formatting.
- Focus on what users and developers need to know. - Keep docs in sync with the codebase.
- Keep docs up to date with the actual codebase state. - Always commit after writing.`,
Always commit documentation updates after writing them.`,
tools: pick([...GITEA_TOOLS, ...FILE_TOOLS, ...GIT_TOOLS]) tools: pick([...GITEA_TOOLS, ...FILE_TOOLS, ...GIT_TOOLS])
}, },
Marketing: { Marketing: {
name: 'Marketing', name: 'Marketing',
description: 'Marketing specialist — writes copy, blog posts, release notes, and landing page content', description: 'Marketing specialist — copy, blog posts, release notes, landing page content',
model: 'gemini-2.5-flash', model: 'A', // Gemini Flash — cheap for content generation
systemPrompt: `You are an autonomous Marketing specialist for a SaaS product called Vibn. systemPrompt: `You are an autonomous Marketing specialist for a SaaS product called Vibn.
Vibn is a cloud-based AI-powered development environment. It helps development teams build faster with AI agents that can write code, manage projects, and deploy automatically. Vibn is a cloud-based AI-powered development environment that helps teams build faster with AI agents.
Your responsibilities: Responsibilities:
1. Write compelling marketing copy for landing pages, email campaigns, and social media. 1. Write landing page copy, emails, and social media content.
2. Write technical blog posts that explain features in an accessible way. 2. Write technical blog posts explaining features accessibly.
3. Write release notes that highlight user-facing value. 3. Write release notes that highlight user-facing value.
4. Ensure all copy is on-brand: professional, clear, forward-thinking, and developer-friendly. 4. Maintain brand voice: smart, confident, practical. No hype, no jargon.
Brand voice: Smart, confident, practical. No hype. No jargon. Show don't tell. Always create real files in the repo (e.g. blog/2026-02-release.md) and commit them.`,
When writing content, create actual files in the repository (e.g. blog/2026-02-release.md) and commit them.`,
tools: pick([...FILE_TOOLS, ...GIT_TOOLS]) tools: pick([...FILE_TOOLS, ...GIT_TOOLS])
} }
}; };

285
src/llm.ts Normal file
View File

@@ -0,0 +1,285 @@
import { execSync } from 'child_process';
import { GoogleGenAI } from '@google/genai';
import { v4 as uuidv4 } from 'uuid';
// =============================================================================
// Unified LLM client — OpenAI-compatible message format throughout
//
// Two backends:
// VertexOpenAIClient — for GLM-5, Claude Sonnet, etc. via Vertex global endpoint
// GeminiFlashClient — for Gemini Flash/Pro via @google/genai SDK
//
// Model tier defaults (overridable via env):
// Tier A: gemini-2.5-flash ($0.15/$0.60 per 1M) — routing, summaries, log parsing
// Tier B: zai-org/glm-5-maas ($1.00/$3.20 per 1M) — coding, feature work
// Tier C: zai-org/glm-5-maas ($1.00/$3.20 per 1M) — complex decisions, escalation
// =============================================================================
// ---------------------------------------------------------------------------
// Shared message types (OpenAI format — used everywhere internally)
// ---------------------------------------------------------------------------
export interface LLMMessage {
role: 'system' | 'user' | 'assistant' | 'tool';
content: string | null;
tool_calls?: LLMToolCall[];
tool_call_id?: string; // set on role=tool messages
name?: string; // function name on role=tool messages
}
export interface LLMToolCall {
id: string;
type: 'function';
function: {
name: string;
arguments: string; // JSON-encoded string
};
}
export interface LLMTool {
type: 'function';
function: {
name: string;
description: string;
parameters: Record<string, unknown>;
};
}
export interface LLMResponse {
content: string | null;
reasoning: string | null; // GLM-5 chain-of-thought
tool_calls: LLMToolCall[];
finish_reason: string;
usage?: {
prompt_tokens: number;
completion_tokens: number;
total_tokens: number;
};
}
export interface LLMClient {
modelId: string;
chat(messages: LLMMessage[], tools?: LLMTool[], maxTokens?: number): Promise<LLMResponse>;
}
// ---------------------------------------------------------------------------
// Vertex AI OpenAI-compatible client
// Used for: zai-org/glm-5-maas, anthropic/claude-sonnet-4-6, etc.
// ---------------------------------------------------------------------------
let _cachedToken = '';
let _tokenExpiry = 0;
function getVertexToken(): string {
const now = Date.now();
if (_cachedToken && now < _tokenExpiry) return _cachedToken;
_cachedToken = execSync('gcloud auth print-access-token', { encoding: 'utf8' }).trim();
_tokenExpiry = now + 55 * 60 * 1000; // tokens last 1hr, refresh at 55min
return _cachedToken;
}
export class VertexOpenAIClient implements LLMClient {
modelId: string;
private projectId: string;
private region: string;
private temperature: number;
constructor(modelId: string, opts?: { projectId?: string; region?: string; temperature?: number }) {
this.modelId = modelId;
this.projectId = opts?.projectId ?? process.env.GCP_PROJECT_ID ?? 'master-ai-484822';
this.region = opts?.region ?? 'global';
this.temperature = opts?.temperature ?? 0.3;
}
async chat(messages: LLMMessage[], tools?: LLMTool[], maxTokens = 4096): Promise<LLMResponse> {
const token = getVertexToken();
const base = this.region === 'global'
? 'https://aiplatform.googleapis.com'
: `https://${this.region}-aiplatform.googleapis.com`;
const url = `${base}/v1/projects/${this.projectId}/locations/${this.region}/endpoints/openapi/chat/completions`;
const body: Record<string, unknown> = {
model: this.modelId,
messages,
max_tokens: maxTokens,
temperature: this.temperature,
stream: false
};
if (tools && tools.length > 0) {
body.tools = tools;
body.tool_choice = 'auto';
}
const res = await fetch(url, {
method: 'POST',
headers: {
'Authorization': `Bearer ${token}`,
'Content-Type': 'application/json'
},
body: JSON.stringify(body)
});
if (!res.ok) {
const errText = await res.text();
// Force token refresh on 401
if (res.status === 401) _tokenExpiry = 0;
throw new Error(`Vertex API ${res.status}: ${errText.slice(0, 400)}`);
}
const data = await res.json() as any;
const choice = data.choices?.[0];
const message = choice?.message ?? {};
return {
content: message.content ?? null,
reasoning: message.reasoning_content ?? null,
tool_calls: message.tool_calls ?? [],
finish_reason: choice?.finish_reason ?? 'stop',
usage: data.usage
};
}
}
// ---------------------------------------------------------------------------
// Gemini client via @google/genai SDK
// Used for: Tier A (fast/cheap routing, summaries, log parsing)
// Converts to/from OpenAI message format internally.
// ---------------------------------------------------------------------------
export class GeminiClient implements LLMClient {
modelId: string;
private temperature: number;
constructor(modelId = 'gemini-2.5-flash', opts?: { temperature?: number }) {
this.modelId = modelId;
this.temperature = opts?.temperature ?? 0.2;
}
async chat(messages: LLMMessage[], tools?: LLMTool[], maxTokens = 8192): Promise<LLMResponse> {
const apiKey = process.env.GOOGLE_API_KEY;
if (!apiKey) throw new Error('GOOGLE_API_KEY not set');
const genai = new GoogleGenAI({ apiKey });
const systemMsg = messages.find(m => m.role === 'system');
const nonSystem = messages.filter(m => m.role !== 'system');
const functionDeclarations = (tools ?? []).map(t => ({
name: t.function.name,
description: t.function.description,
parameters: t.function.parameters as any
}));
const response = await genai.models.generateContent({
model: this.modelId,
contents: toGeminiContents(nonSystem),
config: {
systemInstruction: systemMsg?.content ?? undefined,
tools: functionDeclarations.length > 0 ? [{ functionDeclarations }] : undefined,
temperature: this.temperature,
maxOutputTokens: maxTokens
}
});
const candidate = response.candidates?.[0];
if (!candidate) throw new Error('No response from Gemini');
const parts = candidate.content?.parts ?? [];
const textContent = parts.filter(p => p.text).map(p => p.text).join('') || null;
const fnCalls = parts.filter(p => p.functionCall);
const tool_calls: LLMToolCall[] = fnCalls.map(p => ({
id: `call_${uuidv4().replace(/-/g, '').slice(0, 12)}`,
type: 'function' as const,
function: {
name: p.functionCall!.name ?? '',
arguments: JSON.stringify(p.functionCall!.args ?? {})
}
}));
return {
content: textContent,
reasoning: null,
tool_calls,
finish_reason: fnCalls.length > 0 ? 'tool_calls' : 'stop'
};
}
}
/** Convert OpenAI message format → Gemini Content[] format */
function toGeminiContents(messages: LLMMessage[]): any[] {
const contents: any[] = [];
for (const msg of messages) {
if (msg.role === 'assistant') {
const parts: any[] = [];
if (msg.content) parts.push({ text: msg.content });
for (const tc of msg.tool_calls ?? []) {
parts.push({
functionCall: {
name: tc.function.name,
args: JSON.parse(tc.function.arguments || '{}')
}
});
}
contents.push({ role: 'model', parts });
} else if (msg.role === 'tool') {
// Parse content back — could be JSON or plain text
let resultValue: unknown = msg.content;
try { resultValue = JSON.parse(msg.content ?? 'null'); } catch { /* keep as string */ }
contents.push({
role: 'user',
parts: [{
functionResponse: {
name: msg.name ?? 'tool',
response: { result: resultValue }
}
}]
});
} else {
contents.push({ role: 'user', parts: [{ text: msg.content ?? '' }] });
}
}
return contents;
}
// ---------------------------------------------------------------------------
// Factory — createLLM(modelId | tier)
// ---------------------------------------------------------------------------
export type ModelTier = 'A' | 'B' | 'C';
const TIER_MODELS: Record<ModelTier, string> = {
A: process.env.TIER_A_MODEL ?? 'gemini-2.5-flash',
B: process.env.TIER_B_MODEL ?? 'zai-org/glm-5-maas',
C: process.env.TIER_C_MODEL ?? 'zai-org/glm-5-maas'
};
export function createLLM(modelOrTier: string | ModelTier, opts?: { temperature?: number }): LLMClient {
const modelId = (modelOrTier === 'A' || modelOrTier === 'B' || modelOrTier === 'C')
? TIER_MODELS[modelOrTier]
: modelOrTier;
if (modelId.startsWith('gemini-')) {
return new GeminiClient(modelId, opts);
}
return new VertexOpenAIClient(modelId, { temperature: opts?.temperature });
}
// ---------------------------------------------------------------------------
// Helper — convert our ToolDefinition[] → LLMTool[] (OpenAI format)
// ---------------------------------------------------------------------------
export function toOAITools(
tools: Array<{ name: string; description: string; parameters: Record<string, unknown> }>
): LLMTool[] {
return tools.map(t => ({
type: 'function',
function: {
name: t.name,
description: t.description,
parameters: t.parameters
}
}));
}