fix: add all missing source files (llm.ts, updated agent-runner/agents)
src/llm.ts was never committed — this caused the Docker build to fail with "Cannot find module './llm'". Also commit updated agent-runner.ts, agents.ts, and .env.example that reference the new LLM client. Made-with: Cursor
This commit is contained in:
35
.env.example
35
.env.example
@@ -1,23 +1,48 @@
|
||||
# Google AI — required for all agents
|
||||
# ---------------------------------------------------------------------------
|
||||
# AI Models — 3-tier routing
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Tier A — fast/cheap: routing, summaries, log parsing (Gemini Flash)
|
||||
TIER_A_MODEL=gemini-2.5-flash
|
||||
|
||||
# Tier B — workhorse coder: features, diffs, standard bug fixes (GLM-5 on Vertex)
|
||||
TIER_B_MODEL=zai-org/glm-5-maas
|
||||
|
||||
# Tier C — premium escalation: architecture decisions, complex debugging
|
||||
# Options: zai-org/glm-5-maas | anthropic/claude-sonnet-4-6
|
||||
TIER_C_MODEL=zai-org/glm-5-maas
|
||||
|
||||
# Orchestrator model (defaults to Tier B if not set)
|
||||
ORCHESTRATOR_MODEL=B
|
||||
|
||||
# Tier A fallback — Gemini API key (required if using gemini-* models)
|
||||
GOOGLE_API_KEY=your-gemini-api-key
|
||||
|
||||
# Gitea — required for git push, issue management
|
||||
# GCP project for Vertex AI (GLM-5, Claude Sonnet via Vertex)
|
||||
GCP_PROJECT_ID=master-ai-484822
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Gitea — required for git push and issue management
|
||||
# ---------------------------------------------------------------------------
|
||||
GITEA_API_URL=https://git.vibnai.com
|
||||
GITEA_API_TOKEN=your-gitea-token
|
||||
GITEA_USERNAME=your-gitea-username
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Coolify — required for deployment tools
|
||||
# ---------------------------------------------------------------------------
|
||||
COOLIFY_API_URL=https://coolify.vibnai.com
|
||||
COOLIFY_API_TOKEN=your-coolify-token
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Server config
|
||||
# ---------------------------------------------------------------------------
|
||||
PORT=3333
|
||||
|
||||
# Base path where agent workspaces are cloned
|
||||
# Each repo gets a subfolder: /workspaces/owner_reponame
|
||||
# Base path where agent workspaces are cloned (owner_reponame subdirs)
|
||||
WORKSPACE_BASE=/workspaces
|
||||
|
||||
# Optional: internal URL of this service (used by spawn_agent to self-call)
|
||||
# Internal URL of this service (used by spawn_agent to self-call)
|
||||
AGENT_RUNNER_URL=http://localhost:3333
|
||||
|
||||
# Optional: shared secret for validating Gitea webhook POSTs
|
||||
|
||||
@@ -1,22 +1,22 @@
|
||||
import { GoogleGenAI, Content, Tool, FunctionDeclaration } from '@google/genai';
|
||||
import { createLLM, toOAITools, LLMMessage } from './llm';
|
||||
import { AgentConfig } from './agents';
|
||||
import { executeTool, ToolContext } from './tools';
|
||||
import { Job, updateJob } from './job-store';
|
||||
|
||||
const MAX_TURNS = 40; // safety cap — prevents infinite loops
|
||||
const MAX_TURNS = 40;
|
||||
|
||||
export interface RunResult {
|
||||
finalText: string;
|
||||
toolCallCount: number;
|
||||
turns: number;
|
||||
model: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Core Gemini agent loop.
|
||||
* Core agent execution loop — model-agnostic via the unified LLM client.
|
||||
*
|
||||
* Sends the task to Gemini with the agent's system prompt and tools,
|
||||
* then loops: execute tool calls → send results back → repeat until
|
||||
* the model stops calling tools or MAX_TURNS is reached.
|
||||
* Agents use their configured model tier (A/B/C) or a specific model ID.
|
||||
* Tool calling uses OpenAI format throughout.
|
||||
*/
|
||||
export async function runAgent(
|
||||
job: Job,
|
||||
@@ -24,126 +24,79 @@ export async function runAgent(
|
||||
task: string,
|
||||
ctx: ToolContext
|
||||
): Promise<RunResult> {
|
||||
const apiKey = process.env.GOOGLE_API_KEY;
|
||||
if (!apiKey) {
|
||||
throw new Error('GOOGLE_API_KEY environment variable is not set');
|
||||
}
|
||||
const llm = createLLM(config.model, { temperature: 0.2 });
|
||||
const oaiTools = toOAITools(config.tools);
|
||||
|
||||
const genai = new GoogleGenAI({ apiKey });
|
||||
|
||||
// Build Gemini function declarations from our tool definitions
|
||||
const functionDeclarations: FunctionDeclaration[] = config.tools.map(tool => ({
|
||||
name: tool.name,
|
||||
description: tool.description,
|
||||
parameters: tool.parameters as any
|
||||
}));
|
||||
|
||||
const tools: Tool[] = functionDeclarations.length > 0
|
||||
? [{ functionDeclarations }]
|
||||
: [];
|
||||
|
||||
const model = genai.models;
|
||||
|
||||
// Build conversation history
|
||||
const history: Content[] = [];
|
||||
|
||||
// Initial user message
|
||||
let currentMessage: Content = {
|
||||
role: 'user',
|
||||
parts: [{ text: task }]
|
||||
};
|
||||
const history: LLMMessage[] = [
|
||||
{ role: 'user', content: task }
|
||||
];
|
||||
|
||||
let toolCallCount = 0;
|
||||
let turn = 0;
|
||||
let finalText = '';
|
||||
|
||||
updateJob(job.id, { status: 'running', progress: `Starting ${config.name} agent...` });
|
||||
updateJob(job.id, { status: 'running', progress: `Starting ${config.name} (${llm.modelId})…` });
|
||||
|
||||
while (turn < MAX_TURNS) {
|
||||
turn++;
|
||||
|
||||
// Add current message to history
|
||||
history.push(currentMessage);
|
||||
const messages: LLMMessage[] = [
|
||||
{ role: 'system', content: config.systemPrompt },
|
||||
...history
|
||||
];
|
||||
|
||||
// Call Gemini
|
||||
const response = await model.generateContent({
|
||||
model: config.model || 'gemini-2.0-flash',
|
||||
contents: history,
|
||||
config: {
|
||||
systemInstruction: config.systemPrompt,
|
||||
tools: tools.length > 0 ? tools : undefined,
|
||||
temperature: 0.2,
|
||||
maxOutputTokens: 8192
|
||||
}
|
||||
});
|
||||
const response = await llm.chat(messages, oaiTools, 8192);
|
||||
|
||||
const candidate = response.candidates?.[0];
|
||||
if (!candidate) {
|
||||
throw new Error('No response from Gemini');
|
||||
}
|
||||
|
||||
// Add model response to history
|
||||
const modelContent: Content = {
|
||||
role: 'model',
|
||||
parts: candidate.content?.parts || []
|
||||
// Build assistant message for history
|
||||
const assistantMsg: LLMMessage = {
|
||||
role: 'assistant',
|
||||
content: response.content,
|
||||
tool_calls: response.tool_calls.length > 0 ? response.tool_calls : undefined
|
||||
};
|
||||
history.push(modelContent);
|
||||
history.push(assistantMsg);
|
||||
|
||||
// Extract function calls from the response
|
||||
const functionCalls = candidate.content?.parts?.filter(p => p.functionCall) ?? [];
|
||||
|
||||
if (functionCalls.length === 0) {
|
||||
// No tool calls — the agent is done
|
||||
finalText = candidate.content?.parts
|
||||
?.filter(p => p.text)
|
||||
.map(p => p.text)
|
||||
.join('') ?? '';
|
||||
// No tool calls — agent is done
|
||||
if (response.tool_calls.length === 0) {
|
||||
finalText = response.content ?? '';
|
||||
break;
|
||||
}
|
||||
|
||||
// Execute all tool calls
|
||||
const toolResultParts: any[] = [];
|
||||
for (const part of functionCalls) {
|
||||
const call = part.functionCall!;
|
||||
const callName = call.name ?? 'unknown';
|
||||
const callArgs = (call.args ?? {}) as Record<string, unknown>;
|
||||
toolCallCount++;
|
||||
// Execute tool calls
|
||||
for (const tc of response.tool_calls) {
|
||||
const fnName = tc.function.name;
|
||||
let fnArgs: Record<string, unknown> = {};
|
||||
try { fnArgs = JSON.parse(tc.function.arguments || '{}'); } catch { /* bad JSON */ }
|
||||
|
||||
toolCallCount++;
|
||||
updateJob(job.id, {
|
||||
progress: `Turn ${turn}: calling ${callName}...`,
|
||||
progress: `Turn ${turn}: calling ${fnName}…`,
|
||||
toolCalls: [...(job.toolCalls || []), {
|
||||
turn,
|
||||
tool: callName,
|
||||
args: callArgs,
|
||||
tool: fnName,
|
||||
args: fnArgs,
|
||||
timestamp: new Date().toISOString()
|
||||
}]
|
||||
});
|
||||
|
||||
let result: unknown;
|
||||
try {
|
||||
result = await executeTool(callName, callArgs, ctx);
|
||||
result = await executeTool(fnName, fnArgs, ctx);
|
||||
} catch (err) {
|
||||
result = { error: err instanceof Error ? err.message : String(err) };
|
||||
}
|
||||
|
||||
toolResultParts.push({
|
||||
functionResponse: {
|
||||
name: callName,
|
||||
response: { result }
|
||||
}
|
||||
history.push({
|
||||
role: 'tool',
|
||||
tool_call_id: tc.id,
|
||||
name: fnName,
|
||||
content: typeof result === 'string' ? result : JSON.stringify(result)
|
||||
});
|
||||
}
|
||||
|
||||
// Next turn: send tool results back to the model
|
||||
currentMessage = {
|
||||
role: 'user',
|
||||
parts: toolResultParts
|
||||
};
|
||||
}
|
||||
|
||||
if (turn >= MAX_TURNS && !finalText) {
|
||||
finalText = `Agent reached the ${MAX_TURNS}-turn safety limit. Last tool call count: ${toolCallCount}.`;
|
||||
finalText = `Agent hit the ${MAX_TURNS}-turn safety limit. Tool calls made: ${toolCallCount}.`;
|
||||
}
|
||||
|
||||
return { finalText, toolCallCount, turns: turn };
|
||||
return { finalText, toolCallCount, turns: turn, model: llm.modelId };
|
||||
}
|
||||
|
||||
139
src/agents.ts
139
src/agents.ts
@@ -1,23 +1,23 @@
|
||||
import { ToolDefinition, ALL_TOOLS } from './tools';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Agent configuration — which tools each agent gets + system prompt
|
||||
// Agent configuration
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface AgentConfig {
|
||||
name: string;
|
||||
description: string;
|
||||
model: string;
|
||||
model: string; // model ID or tier ('A' | 'B' | 'C')
|
||||
systemPrompt: string;
|
||||
tools: ToolDefinition[];
|
||||
}
|
||||
|
||||
const FILE_TOOLS = ['read_file', 'write_file', 'replace_in_file', 'list_directory', 'find_files', 'search_code'];
|
||||
const SHELL_TOOLS = ['execute_command'];
|
||||
const GIT_TOOLS = ['git_commit_and_push'];
|
||||
const FILE_TOOLS = ['read_file', 'write_file', 'replace_in_file', 'list_directory', 'find_files', 'search_code'];
|
||||
const SHELL_TOOLS = ['execute_command'];
|
||||
const GIT_TOOLS = ['git_commit_and_push'];
|
||||
const COOLIFY_TOOLS = ['coolify_list_projects', 'coolify_list_applications', 'coolify_deploy', 'coolify_get_logs'];
|
||||
const GITEA_TOOLS = ['gitea_create_issue', 'gitea_list_issues', 'gitea_close_issue'];
|
||||
const SPAWN_TOOL = ['spawn_agent'];
|
||||
const GITEA_TOOLS = ['gitea_create_issue', 'gitea_list_issues', 'gitea_close_issue'];
|
||||
const SPAWN_TOOL = ['spawn_agent'];
|
||||
|
||||
function pick(names: string[]): ToolDefinition[] {
|
||||
return ALL_TOOLS.filter(t => names.includes(t.name));
|
||||
@@ -25,112 +25,109 @@ function pick(names: string[]): ToolDefinition[] {
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Agent definitions
|
||||
//
|
||||
// model is a tier ('A' | 'B' | 'C') or a specific model ID.
|
||||
// Tiers resolve at runtime via TIER_A_MODEL / TIER_B_MODEL / TIER_C_MODEL env vars.
|
||||
//
|
||||
// Tier A = gemini-2.5-flash — fast, cheap: routing, summaries, monitoring
|
||||
// Tier B = zai-org/glm-5-maas — workhorse coding model
|
||||
// Tier C = zai-org/glm-5-maas — complex decisions (or Claude Sonnet via TIER_C_MODEL)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export const AGENTS: Record<string, AgentConfig> = {
|
||||
|
||||
Orchestrator: {
|
||||
name: 'Orchestrator',
|
||||
description: 'Master coordinator that breaks down high-level goals and delegates to specialist agents',
|
||||
model: 'gemini-2.5-flash',
|
||||
systemPrompt: `You are the Orchestrator for Vibn, an autonomous AI system for software development.
|
||||
description: 'Master coordinator — breaks down goals and delegates to specialist agents',
|
||||
model: 'B', // GLM-5 — good planner, chain-of-thought reasoning
|
||||
systemPrompt: `You are the Orchestrator for Vibn, an autonomous AI platform for software development.
|
||||
|
||||
Your role is to:
|
||||
1. Understand the high-level goal provided in the task.
|
||||
2. Break it down into concrete sub-tasks.
|
||||
3. Delegate sub-tasks to the appropriate specialist agents using the spawn_agent tool.
|
||||
4. Use Gitea to track progress: create an issue at the start, close it when done.
|
||||
5. Summarize what was done when complete.
|
||||
Your role:
|
||||
1. Understand the high-level goal.
|
||||
2. Break it into concrete sub-tasks.
|
||||
3. Delegate to the right specialist agents via spawn_agent.
|
||||
4. Track progress via Gitea issues.
|
||||
5. Summarize results when done.
|
||||
|
||||
Available specialist agents and when to use them:
|
||||
- **Coder**: Any code changes — features, bug fixes, refactors, tests.
|
||||
- **PM**: Project management — issue triage, sprint planning, documentation updates.
|
||||
- **Marketing**: Content and copy — blog posts, landing page copy, release notes.
|
||||
Agents available:
|
||||
- Coder: code changes, features, bug fixes, tests.
|
||||
- PM: issue triage, docs, sprint planning.
|
||||
- Marketing: copy, blog posts, release notes.
|
||||
|
||||
Rules:
|
||||
- Always create a Gitea issue first to track the work.
|
||||
- Delegate to ONE agent at a time unless tasks are fully independent.
|
||||
- Check back on progress by listing issues.
|
||||
- Never try to write code yourself — delegate to Coder.
|
||||
- Be concise in your task descriptions when spawning agents.`,
|
||||
- Create a Gitea issue first to track the work.
|
||||
- Delegate one agent at a time unless tasks are fully independent.
|
||||
- Never write code yourself — delegate to Coder.
|
||||
- Be specific in task descriptions when spawning agents.`,
|
||||
tools: pick([...GITEA_TOOLS, ...SPAWN_TOOL, ...COOLIFY_TOOLS])
|
||||
},
|
||||
|
||||
Coder: {
|
||||
name: 'Coder',
|
||||
description: 'Senior software engineer — writes, edits, and tests code. Commits and pushes when done.',
|
||||
model: 'gemini-2.5-flash',
|
||||
systemPrompt: `You are an expert senior software engineer working autonomously on a git repository.
|
||||
description: 'Senior software engineer — writes, edits, tests, commits, and pushes code',
|
||||
model: 'B', // GLM-5 — strong at code generation and diffs
|
||||
systemPrompt: `You are an expert senior software engineer working autonomously on a Git repository.
|
||||
|
||||
Your job is to complete the coding task given to you. Follow these rules:
|
||||
|
||||
**Workflow:**
|
||||
1. Start by exploring the codebase: list_directory, find_files, read_file to understand structure.
|
||||
2. Search for relevant code: search_code to find existing patterns.
|
||||
Workflow:
|
||||
1. Explore the codebase: list_directory, find_files, read_file.
|
||||
2. Search for patterns: search_code.
|
||||
3. Plan your changes before making them.
|
||||
4. Read every file BEFORE editing it.
|
||||
5. Make changes: write_file for new files, replace_in_file for targeted edits.
|
||||
6. Run tests or lint if applicable: execute_command.
|
||||
7. Commit and push when the task is complete: git_commit_and_push.
|
||||
6. Run tests/lint if applicable: execute_command.
|
||||
7. Commit and push when complete: git_commit_and_push.
|
||||
|
||||
**Code quality rules:**
|
||||
- Match existing code style exactly.
|
||||
- Never leave TODO comments — implement or skip.
|
||||
Code quality:
|
||||
- Match existing style exactly.
|
||||
- No TODO comments — implement or skip.
|
||||
- Write complete files, not partial snippets.
|
||||
- If tests exist, run them and fix failures before committing.
|
||||
- Commit message should be concise and in imperative mood (e.g. "add user authentication").
|
||||
- Run tests and fix failures before committing.
|
||||
- Commit messages: imperative mood, concise (e.g. "add user authentication").
|
||||
|
||||
**Safety rules:**
|
||||
- Never delete files unless explicitly instructed.
|
||||
- Never modify .env files or credentials.
|
||||
Safety:
|
||||
- Never delete files unless explicitly told to.
|
||||
- Never touch .env files or credentials.
|
||||
- Never commit secrets or API keys.
|
||||
|
||||
**If you were triggered by a Gitea issue:**
|
||||
- After committing, close the issue using gitea_close_issue.
|
||||
- The repo name is in the format "owner/name".
|
||||
|
||||
Be methodical. Read before you write. Test before you commit.`,
|
||||
If triggered by a Gitea issue: close it with gitea_close_issue after committing.`,
|
||||
tools: pick([...FILE_TOOLS, ...SHELL_TOOLS, ...GIT_TOOLS, ...GITEA_TOOLS])
|
||||
},
|
||||
|
||||
PM: {
|
||||
name: 'PM',
|
||||
description: 'Product manager — manages Gitea issues, writes documentation, tracks project health',
|
||||
model: 'gemini-2.5-flash',
|
||||
description: 'Product manager — docs, issue management, project health reports',
|
||||
model: 'A', // Gemini Flash — lightweight, cheap for docs/issue work
|
||||
systemPrompt: `You are an autonomous Product Manager for a software project hosted on Gitea.
|
||||
|
||||
Your responsibilities:
|
||||
1. Create, update, and close Gitea issues to track work.
|
||||
2. Write and update documentation files in the repository.
|
||||
Responsibilities:
|
||||
1. Create, update, and close Gitea issues.
|
||||
2. Write and update docs in the repository.
|
||||
3. Summarize project state and create reports.
|
||||
4. Prioritize and triage bugs/features based on impact.
|
||||
4. Triage bugs and features by impact.
|
||||
|
||||
When writing documentation:
|
||||
- Be clear and concise.
|
||||
- Use markdown formatting.
|
||||
- Focus on what users and developers need to know.
|
||||
- Keep docs up to date with the actual codebase state.
|
||||
|
||||
Always commit documentation updates after writing them.`,
|
||||
When writing docs:
|
||||
- Clear and concise.
|
||||
- Markdown formatting.
|
||||
- Keep docs in sync with the codebase.
|
||||
- Always commit after writing.`,
|
||||
tools: pick([...GITEA_TOOLS, ...FILE_TOOLS, ...GIT_TOOLS])
|
||||
},
|
||||
|
||||
Marketing: {
|
||||
name: 'Marketing',
|
||||
description: 'Marketing specialist — writes copy, blog posts, release notes, and landing page content',
|
||||
model: 'gemini-2.5-flash',
|
||||
description: 'Marketing specialist — copy, blog posts, release notes, landing page content',
|
||||
model: 'A', // Gemini Flash — cheap for content generation
|
||||
systemPrompt: `You are an autonomous Marketing specialist for a SaaS product called Vibn.
|
||||
|
||||
Vibn is a cloud-based AI-powered development environment. It helps development teams build faster with AI agents that can write code, manage projects, and deploy automatically.
|
||||
Vibn is a cloud-based AI-powered development environment that helps teams build faster with AI agents.
|
||||
|
||||
Your responsibilities:
|
||||
1. Write compelling marketing copy for landing pages, email campaigns, and social media.
|
||||
2. Write technical blog posts that explain features in an accessible way.
|
||||
Responsibilities:
|
||||
1. Write landing page copy, emails, and social media content.
|
||||
2. Write technical blog posts explaining features accessibly.
|
||||
3. Write release notes that highlight user-facing value.
|
||||
4. Ensure all copy is on-brand: professional, clear, forward-thinking, and developer-friendly.
|
||||
4. Maintain brand voice: smart, confident, practical. No hype, no jargon.
|
||||
|
||||
Brand voice: Smart, confident, practical. No hype. No jargon. Show don't tell.
|
||||
|
||||
When writing content, create actual files in the repository (e.g. blog/2026-02-release.md) and commit them.`,
|
||||
Always create real files in the repo (e.g. blog/2026-02-release.md) and commit them.`,
|
||||
tools: pick([...FILE_TOOLS, ...GIT_TOOLS])
|
||||
}
|
||||
};
|
||||
|
||||
285
src/llm.ts
Normal file
285
src/llm.ts
Normal file
@@ -0,0 +1,285 @@
|
||||
import { execSync } from 'child_process';
|
||||
import { GoogleGenAI } from '@google/genai';
|
||||
import { v4 as uuidv4 } from 'uuid';
|
||||
|
||||
// =============================================================================
|
||||
// Unified LLM client — OpenAI-compatible message format throughout
|
||||
//
|
||||
// Two backends:
|
||||
// VertexOpenAIClient — for GLM-5, Claude Sonnet, etc. via Vertex global endpoint
|
||||
// GeminiFlashClient — for Gemini Flash/Pro via @google/genai SDK
|
||||
//
|
||||
// Model tier defaults (overridable via env):
|
||||
// Tier A: gemini-2.5-flash ($0.15/$0.60 per 1M) — routing, summaries, log parsing
|
||||
// Tier B: zai-org/glm-5-maas ($1.00/$3.20 per 1M) — coding, feature work
|
||||
// Tier C: zai-org/glm-5-maas ($1.00/$3.20 per 1M) — complex decisions, escalation
|
||||
// =============================================================================
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Shared message types (OpenAI format — used everywhere internally)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface LLMMessage {
|
||||
role: 'system' | 'user' | 'assistant' | 'tool';
|
||||
content: string | null;
|
||||
tool_calls?: LLMToolCall[];
|
||||
tool_call_id?: string; // set on role=tool messages
|
||||
name?: string; // function name on role=tool messages
|
||||
}
|
||||
|
||||
export interface LLMToolCall {
|
||||
id: string;
|
||||
type: 'function';
|
||||
function: {
|
||||
name: string;
|
||||
arguments: string; // JSON-encoded string
|
||||
};
|
||||
}
|
||||
|
||||
export interface LLMTool {
|
||||
type: 'function';
|
||||
function: {
|
||||
name: string;
|
||||
description: string;
|
||||
parameters: Record<string, unknown>;
|
||||
};
|
||||
}
|
||||
|
||||
export interface LLMResponse {
|
||||
content: string | null;
|
||||
reasoning: string | null; // GLM-5 chain-of-thought
|
||||
tool_calls: LLMToolCall[];
|
||||
finish_reason: string;
|
||||
usage?: {
|
||||
prompt_tokens: number;
|
||||
completion_tokens: number;
|
||||
total_tokens: number;
|
||||
};
|
||||
}
|
||||
|
||||
export interface LLMClient {
|
||||
modelId: string;
|
||||
chat(messages: LLMMessage[], tools?: LLMTool[], maxTokens?: number): Promise<LLMResponse>;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Vertex AI OpenAI-compatible client
|
||||
// Used for: zai-org/glm-5-maas, anthropic/claude-sonnet-4-6, etc.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
let _cachedToken = '';
|
||||
let _tokenExpiry = 0;
|
||||
|
||||
function getVertexToken(): string {
|
||||
const now = Date.now();
|
||||
if (_cachedToken && now < _tokenExpiry) return _cachedToken;
|
||||
_cachedToken = execSync('gcloud auth print-access-token', { encoding: 'utf8' }).trim();
|
||||
_tokenExpiry = now + 55 * 60 * 1000; // tokens last 1hr, refresh at 55min
|
||||
return _cachedToken;
|
||||
}
|
||||
|
||||
export class VertexOpenAIClient implements LLMClient {
|
||||
modelId: string;
|
||||
private projectId: string;
|
||||
private region: string;
|
||||
private temperature: number;
|
||||
|
||||
constructor(modelId: string, opts?: { projectId?: string; region?: string; temperature?: number }) {
|
||||
this.modelId = modelId;
|
||||
this.projectId = opts?.projectId ?? process.env.GCP_PROJECT_ID ?? 'master-ai-484822';
|
||||
this.region = opts?.region ?? 'global';
|
||||
this.temperature = opts?.temperature ?? 0.3;
|
||||
}
|
||||
|
||||
async chat(messages: LLMMessage[], tools?: LLMTool[], maxTokens = 4096): Promise<LLMResponse> {
|
||||
const token = getVertexToken();
|
||||
const base = this.region === 'global'
|
||||
? 'https://aiplatform.googleapis.com'
|
||||
: `https://${this.region}-aiplatform.googleapis.com`;
|
||||
const url = `${base}/v1/projects/${this.projectId}/locations/${this.region}/endpoints/openapi/chat/completions`;
|
||||
|
||||
const body: Record<string, unknown> = {
|
||||
model: this.modelId,
|
||||
messages,
|
||||
max_tokens: maxTokens,
|
||||
temperature: this.temperature,
|
||||
stream: false
|
||||
};
|
||||
|
||||
if (tools && tools.length > 0) {
|
||||
body.tools = tools;
|
||||
body.tool_choice = 'auto';
|
||||
}
|
||||
|
||||
const res = await fetch(url, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Authorization': `Bearer ${token}`,
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify(body)
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
const errText = await res.text();
|
||||
// Force token refresh on 401
|
||||
if (res.status === 401) _tokenExpiry = 0;
|
||||
throw new Error(`Vertex API ${res.status}: ${errText.slice(0, 400)}`);
|
||||
}
|
||||
|
||||
const data = await res.json() as any;
|
||||
const choice = data.choices?.[0];
|
||||
const message = choice?.message ?? {};
|
||||
|
||||
return {
|
||||
content: message.content ?? null,
|
||||
reasoning: message.reasoning_content ?? null,
|
||||
tool_calls: message.tool_calls ?? [],
|
||||
finish_reason: choice?.finish_reason ?? 'stop',
|
||||
usage: data.usage
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Gemini client via @google/genai SDK
|
||||
// Used for: Tier A (fast/cheap routing, summaries, log parsing)
|
||||
// Converts to/from OpenAI message format internally.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export class GeminiClient implements LLMClient {
|
||||
modelId: string;
|
||||
private temperature: number;
|
||||
|
||||
constructor(modelId = 'gemini-2.5-flash', opts?: { temperature?: number }) {
|
||||
this.modelId = modelId;
|
||||
this.temperature = opts?.temperature ?? 0.2;
|
||||
}
|
||||
|
||||
async chat(messages: LLMMessage[], tools?: LLMTool[], maxTokens = 8192): Promise<LLMResponse> {
|
||||
const apiKey = process.env.GOOGLE_API_KEY;
|
||||
if (!apiKey) throw new Error('GOOGLE_API_KEY not set');
|
||||
|
||||
const genai = new GoogleGenAI({ apiKey });
|
||||
|
||||
const systemMsg = messages.find(m => m.role === 'system');
|
||||
const nonSystem = messages.filter(m => m.role !== 'system');
|
||||
|
||||
const functionDeclarations = (tools ?? []).map(t => ({
|
||||
name: t.function.name,
|
||||
description: t.function.description,
|
||||
parameters: t.function.parameters as any
|
||||
}));
|
||||
|
||||
const response = await genai.models.generateContent({
|
||||
model: this.modelId,
|
||||
contents: toGeminiContents(nonSystem),
|
||||
config: {
|
||||
systemInstruction: systemMsg?.content ?? undefined,
|
||||
tools: functionDeclarations.length > 0 ? [{ functionDeclarations }] : undefined,
|
||||
temperature: this.temperature,
|
||||
maxOutputTokens: maxTokens
|
||||
}
|
||||
});
|
||||
|
||||
const candidate = response.candidates?.[0];
|
||||
if (!candidate) throw new Error('No response from Gemini');
|
||||
|
||||
const parts = candidate.content?.parts ?? [];
|
||||
const textContent = parts.filter(p => p.text).map(p => p.text).join('') || null;
|
||||
const fnCalls = parts.filter(p => p.functionCall);
|
||||
|
||||
const tool_calls: LLMToolCall[] = fnCalls.map(p => ({
|
||||
id: `call_${uuidv4().replace(/-/g, '').slice(0, 12)}`,
|
||||
type: 'function' as const,
|
||||
function: {
|
||||
name: p.functionCall!.name ?? '',
|
||||
arguments: JSON.stringify(p.functionCall!.args ?? {})
|
||||
}
|
||||
}));
|
||||
|
||||
return {
|
||||
content: textContent,
|
||||
reasoning: null,
|
||||
tool_calls,
|
||||
finish_reason: fnCalls.length > 0 ? 'tool_calls' : 'stop'
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/** Convert OpenAI message format → Gemini Content[] format */
|
||||
function toGeminiContents(messages: LLMMessage[]): any[] {
|
||||
const contents: any[] = [];
|
||||
for (const msg of messages) {
|
||||
if (msg.role === 'assistant') {
|
||||
const parts: any[] = [];
|
||||
if (msg.content) parts.push({ text: msg.content });
|
||||
for (const tc of msg.tool_calls ?? []) {
|
||||
parts.push({
|
||||
functionCall: {
|
||||
name: tc.function.name,
|
||||
args: JSON.parse(tc.function.arguments || '{}')
|
||||
}
|
||||
});
|
||||
}
|
||||
contents.push({ role: 'model', parts });
|
||||
} else if (msg.role === 'tool') {
|
||||
// Parse content back — could be JSON or plain text
|
||||
let resultValue: unknown = msg.content;
|
||||
try { resultValue = JSON.parse(msg.content ?? 'null'); } catch { /* keep as string */ }
|
||||
contents.push({
|
||||
role: 'user',
|
||||
parts: [{
|
||||
functionResponse: {
|
||||
name: msg.name ?? 'tool',
|
||||
response: { result: resultValue }
|
||||
}
|
||||
}]
|
||||
});
|
||||
} else {
|
||||
contents.push({ role: 'user', parts: [{ text: msg.content ?? '' }] });
|
||||
}
|
||||
}
|
||||
return contents;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Factory — createLLM(modelId | tier)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export type ModelTier = 'A' | 'B' | 'C';
|
||||
|
||||
const TIER_MODELS: Record<ModelTier, string> = {
|
||||
A: process.env.TIER_A_MODEL ?? 'gemini-2.5-flash',
|
||||
B: process.env.TIER_B_MODEL ?? 'zai-org/glm-5-maas',
|
||||
C: process.env.TIER_C_MODEL ?? 'zai-org/glm-5-maas'
|
||||
};
|
||||
|
||||
export function createLLM(modelOrTier: string | ModelTier, opts?: { temperature?: number }): LLMClient {
|
||||
const modelId = (modelOrTier === 'A' || modelOrTier === 'B' || modelOrTier === 'C')
|
||||
? TIER_MODELS[modelOrTier]
|
||||
: modelOrTier;
|
||||
|
||||
if (modelId.startsWith('gemini-')) {
|
||||
return new GeminiClient(modelId, opts);
|
||||
}
|
||||
|
||||
return new VertexOpenAIClient(modelId, { temperature: opts?.temperature });
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helper — convert our ToolDefinition[] → LLMTool[] (OpenAI format)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export function toOAITools(
|
||||
tools: Array<{ name: string; description: string; parameters: Record<string, unknown> }>
|
||||
): LLMTool[] {
|
||||
return tools.map(t => ({
|
||||
type: 'function',
|
||||
function: {
|
||||
name: t.name,
|
||||
description: t.description,
|
||||
parameters: t.parameters
|
||||
}
|
||||
}));
|
||||
}
|
||||
Reference in New Issue
Block a user