fix: add all missing source files (llm.ts, updated agent-runner/agents)
src/llm.ts was never committed — this caused the Docker build to fail with "Cannot find module './llm'". Also commit updated agent-runner.ts, agents.ts, and .env.example that reference the new LLM client. Made-with: Cursor
This commit is contained in:
35
.env.example
35
.env.example
@@ -1,23 +1,48 @@
|
|||||||
# Google AI — required for all agents
|
# ---------------------------------------------------------------------------
|
||||||
|
# AI Models — 3-tier routing
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
# Tier A — fast/cheap: routing, summaries, log parsing (Gemini Flash)
|
||||||
|
TIER_A_MODEL=gemini-2.5-flash
|
||||||
|
|
||||||
|
# Tier B — workhorse coder: features, diffs, standard bug fixes (GLM-5 on Vertex)
|
||||||
|
TIER_B_MODEL=zai-org/glm-5-maas
|
||||||
|
|
||||||
|
# Tier C — premium escalation: architecture decisions, complex debugging
|
||||||
|
# Options: zai-org/glm-5-maas | anthropic/claude-sonnet-4-6
|
||||||
|
TIER_C_MODEL=zai-org/glm-5-maas
|
||||||
|
|
||||||
|
# Orchestrator model (defaults to Tier B if not set)
|
||||||
|
ORCHESTRATOR_MODEL=B
|
||||||
|
|
||||||
|
# Tier A fallback — Gemini API key (required if using gemini-* models)
|
||||||
GOOGLE_API_KEY=your-gemini-api-key
|
GOOGLE_API_KEY=your-gemini-api-key
|
||||||
|
|
||||||
# Gitea — required for git push, issue management
|
# GCP project for Vertex AI (GLM-5, Claude Sonnet via Vertex)
|
||||||
|
GCP_PROJECT_ID=master-ai-484822
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Gitea — required for git push and issue management
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
GITEA_API_URL=https://git.vibnai.com
|
GITEA_API_URL=https://git.vibnai.com
|
||||||
GITEA_API_TOKEN=your-gitea-token
|
GITEA_API_TOKEN=your-gitea-token
|
||||||
GITEA_USERNAME=your-gitea-username
|
GITEA_USERNAME=your-gitea-username
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
# Coolify — required for deployment tools
|
# Coolify — required for deployment tools
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
COOLIFY_API_URL=https://coolify.vibnai.com
|
COOLIFY_API_URL=https://coolify.vibnai.com
|
||||||
COOLIFY_API_TOKEN=your-coolify-token
|
COOLIFY_API_TOKEN=your-coolify-token
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
# Server config
|
# Server config
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
PORT=3333
|
PORT=3333
|
||||||
|
|
||||||
# Base path where agent workspaces are cloned
|
# Base path where agent workspaces are cloned (owner_reponame subdirs)
|
||||||
# Each repo gets a subfolder: /workspaces/owner_reponame
|
|
||||||
WORKSPACE_BASE=/workspaces
|
WORKSPACE_BASE=/workspaces
|
||||||
|
|
||||||
# Optional: internal URL of this service (used by spawn_agent to self-call)
|
# Internal URL of this service (used by spawn_agent to self-call)
|
||||||
AGENT_RUNNER_URL=http://localhost:3333
|
AGENT_RUNNER_URL=http://localhost:3333
|
||||||
|
|
||||||
# Optional: shared secret for validating Gitea webhook POSTs
|
# Optional: shared secret for validating Gitea webhook POSTs
|
||||||
|
|||||||
@@ -1,22 +1,22 @@
|
|||||||
import { GoogleGenAI, Content, Tool, FunctionDeclaration } from '@google/genai';
|
import { createLLM, toOAITools, LLMMessage } from './llm';
|
||||||
import { AgentConfig } from './agents';
|
import { AgentConfig } from './agents';
|
||||||
import { executeTool, ToolContext } from './tools';
|
import { executeTool, ToolContext } from './tools';
|
||||||
import { Job, updateJob } from './job-store';
|
import { Job, updateJob } from './job-store';
|
||||||
|
|
||||||
const MAX_TURNS = 40; // safety cap — prevents infinite loops
|
const MAX_TURNS = 40;
|
||||||
|
|
||||||
export interface RunResult {
|
export interface RunResult {
|
||||||
finalText: string;
|
finalText: string;
|
||||||
toolCallCount: number;
|
toolCallCount: number;
|
||||||
turns: number;
|
turns: number;
|
||||||
|
model: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Core Gemini agent loop.
|
* Core agent execution loop — model-agnostic via the unified LLM client.
|
||||||
*
|
*
|
||||||
* Sends the task to Gemini with the agent's system prompt and tools,
|
* Agents use their configured model tier (A/B/C) or a specific model ID.
|
||||||
* then loops: execute tool calls → send results back → repeat until
|
* Tool calling uses OpenAI format throughout.
|
||||||
* the model stops calling tools or MAX_TURNS is reached.
|
|
||||||
*/
|
*/
|
||||||
export async function runAgent(
|
export async function runAgent(
|
||||||
job: Job,
|
job: Job,
|
||||||
@@ -24,126 +24,79 @@ export async function runAgent(
|
|||||||
task: string,
|
task: string,
|
||||||
ctx: ToolContext
|
ctx: ToolContext
|
||||||
): Promise<RunResult> {
|
): Promise<RunResult> {
|
||||||
const apiKey = process.env.GOOGLE_API_KEY;
|
const llm = createLLM(config.model, { temperature: 0.2 });
|
||||||
if (!apiKey) {
|
const oaiTools = toOAITools(config.tools);
|
||||||
throw new Error('GOOGLE_API_KEY environment variable is not set');
|
|
||||||
}
|
|
||||||
|
|
||||||
const genai = new GoogleGenAI({ apiKey });
|
const history: LLMMessage[] = [
|
||||||
|
{ role: 'user', content: task }
|
||||||
// Build Gemini function declarations from our tool definitions
|
];
|
||||||
const functionDeclarations: FunctionDeclaration[] = config.tools.map(tool => ({
|
|
||||||
name: tool.name,
|
|
||||||
description: tool.description,
|
|
||||||
parameters: tool.parameters as any
|
|
||||||
}));
|
|
||||||
|
|
||||||
const tools: Tool[] = functionDeclarations.length > 0
|
|
||||||
? [{ functionDeclarations }]
|
|
||||||
: [];
|
|
||||||
|
|
||||||
const model = genai.models;
|
|
||||||
|
|
||||||
// Build conversation history
|
|
||||||
const history: Content[] = [];
|
|
||||||
|
|
||||||
// Initial user message
|
|
||||||
let currentMessage: Content = {
|
|
||||||
role: 'user',
|
|
||||||
parts: [{ text: task }]
|
|
||||||
};
|
|
||||||
|
|
||||||
let toolCallCount = 0;
|
let toolCallCount = 0;
|
||||||
let turn = 0;
|
let turn = 0;
|
||||||
let finalText = '';
|
let finalText = '';
|
||||||
|
|
||||||
updateJob(job.id, { status: 'running', progress: `Starting ${config.name} agent...` });
|
updateJob(job.id, { status: 'running', progress: `Starting ${config.name} (${llm.modelId})…` });
|
||||||
|
|
||||||
while (turn < MAX_TURNS) {
|
while (turn < MAX_TURNS) {
|
||||||
turn++;
|
turn++;
|
||||||
|
|
||||||
// Add current message to history
|
const messages: LLMMessage[] = [
|
||||||
history.push(currentMessage);
|
{ role: 'system', content: config.systemPrompt },
|
||||||
|
...history
|
||||||
|
];
|
||||||
|
|
||||||
// Call Gemini
|
const response = await llm.chat(messages, oaiTools, 8192);
|
||||||
const response = await model.generateContent({
|
|
||||||
model: config.model || 'gemini-2.0-flash',
|
|
||||||
contents: history,
|
|
||||||
config: {
|
|
||||||
systemInstruction: config.systemPrompt,
|
|
||||||
tools: tools.length > 0 ? tools : undefined,
|
|
||||||
temperature: 0.2,
|
|
||||||
maxOutputTokens: 8192
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
const candidate = response.candidates?.[0];
|
// Build assistant message for history
|
||||||
if (!candidate) {
|
const assistantMsg: LLMMessage = {
|
||||||
throw new Error('No response from Gemini');
|
role: 'assistant',
|
||||||
}
|
content: response.content,
|
||||||
|
tool_calls: response.tool_calls.length > 0 ? response.tool_calls : undefined
|
||||||
// Add model response to history
|
|
||||||
const modelContent: Content = {
|
|
||||||
role: 'model',
|
|
||||||
parts: candidate.content?.parts || []
|
|
||||||
};
|
};
|
||||||
history.push(modelContent);
|
history.push(assistantMsg);
|
||||||
|
|
||||||
// Extract function calls from the response
|
// No tool calls — agent is done
|
||||||
const functionCalls = candidate.content?.parts?.filter(p => p.functionCall) ?? [];
|
if (response.tool_calls.length === 0) {
|
||||||
|
finalText = response.content ?? '';
|
||||||
if (functionCalls.length === 0) {
|
|
||||||
// No tool calls — the agent is done
|
|
||||||
finalText = candidate.content?.parts
|
|
||||||
?.filter(p => p.text)
|
|
||||||
.map(p => p.text)
|
|
||||||
.join('') ?? '';
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Execute all tool calls
|
// Execute tool calls
|
||||||
const toolResultParts: any[] = [];
|
for (const tc of response.tool_calls) {
|
||||||
for (const part of functionCalls) {
|
const fnName = tc.function.name;
|
||||||
const call = part.functionCall!;
|
let fnArgs: Record<string, unknown> = {};
|
||||||
const callName = call.name ?? 'unknown';
|
try { fnArgs = JSON.parse(tc.function.arguments || '{}'); } catch { /* bad JSON */ }
|
||||||
const callArgs = (call.args ?? {}) as Record<string, unknown>;
|
|
||||||
toolCallCount++;
|
|
||||||
|
|
||||||
|
toolCallCount++;
|
||||||
updateJob(job.id, {
|
updateJob(job.id, {
|
||||||
progress: `Turn ${turn}: calling ${callName}...`,
|
progress: `Turn ${turn}: calling ${fnName}…`,
|
||||||
toolCalls: [...(job.toolCalls || []), {
|
toolCalls: [...(job.toolCalls || []), {
|
||||||
turn,
|
turn,
|
||||||
tool: callName,
|
tool: fnName,
|
||||||
args: callArgs,
|
args: fnArgs,
|
||||||
timestamp: new Date().toISOString()
|
timestamp: new Date().toISOString()
|
||||||
}]
|
}]
|
||||||
});
|
});
|
||||||
|
|
||||||
let result: unknown;
|
let result: unknown;
|
||||||
try {
|
try {
|
||||||
result = await executeTool(callName, callArgs, ctx);
|
result = await executeTool(fnName, fnArgs, ctx);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
result = { error: err instanceof Error ? err.message : String(err) };
|
result = { error: err instanceof Error ? err.message : String(err) };
|
||||||
}
|
}
|
||||||
|
|
||||||
toolResultParts.push({
|
history.push({
|
||||||
functionResponse: {
|
role: 'tool',
|
||||||
name: callName,
|
tool_call_id: tc.id,
|
||||||
response: { result }
|
name: fnName,
|
||||||
}
|
content: typeof result === 'string' ? result : JSON.stringify(result)
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// Next turn: send tool results back to the model
|
|
||||||
currentMessage = {
|
|
||||||
role: 'user',
|
|
||||||
parts: toolResultParts
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (turn >= MAX_TURNS && !finalText) {
|
if (turn >= MAX_TURNS && !finalText) {
|
||||||
finalText = `Agent reached the ${MAX_TURNS}-turn safety limit. Last tool call count: ${toolCallCount}.`;
|
finalText = `Agent hit the ${MAX_TURNS}-turn safety limit. Tool calls made: ${toolCallCount}.`;
|
||||||
}
|
}
|
||||||
|
|
||||||
return { finalText, toolCallCount, turns: turn };
|
return { finalText, toolCallCount, turns: turn, model: llm.modelId };
|
||||||
}
|
}
|
||||||
|
|||||||
129
src/agents.ts
129
src/agents.ts
@@ -1,13 +1,13 @@
|
|||||||
import { ToolDefinition, ALL_TOOLS } from './tools';
|
import { ToolDefinition, ALL_TOOLS } from './tools';
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Agent configuration — which tools each agent gets + system prompt
|
// Agent configuration
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
export interface AgentConfig {
|
export interface AgentConfig {
|
||||||
name: string;
|
name: string;
|
||||||
description: string;
|
description: string;
|
||||||
model: string;
|
model: string; // model ID or tier ('A' | 'B' | 'C')
|
||||||
systemPrompt: string;
|
systemPrompt: string;
|
||||||
tools: ToolDefinition[];
|
tools: ToolDefinition[];
|
||||||
}
|
}
|
||||||
@@ -25,112 +25,109 @@ function pick(names: string[]): ToolDefinition[] {
|
|||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Agent definitions
|
// Agent definitions
|
||||||
|
//
|
||||||
|
// model is a tier ('A' | 'B' | 'C') or a specific model ID.
|
||||||
|
// Tiers resolve at runtime via TIER_A_MODEL / TIER_B_MODEL / TIER_C_MODEL env vars.
|
||||||
|
//
|
||||||
|
// Tier A = gemini-2.5-flash — fast, cheap: routing, summaries, monitoring
|
||||||
|
// Tier B = zai-org/glm-5-maas — workhorse coding model
|
||||||
|
// Tier C = zai-org/glm-5-maas — complex decisions (or Claude Sonnet via TIER_C_MODEL)
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
export const AGENTS: Record<string, AgentConfig> = {
|
export const AGENTS: Record<string, AgentConfig> = {
|
||||||
|
|
||||||
Orchestrator: {
|
Orchestrator: {
|
||||||
name: 'Orchestrator',
|
name: 'Orchestrator',
|
||||||
description: 'Master coordinator that breaks down high-level goals and delegates to specialist agents',
|
description: 'Master coordinator — breaks down goals and delegates to specialist agents',
|
||||||
model: 'gemini-2.5-flash',
|
model: 'B', // GLM-5 — good planner, chain-of-thought reasoning
|
||||||
systemPrompt: `You are the Orchestrator for Vibn, an autonomous AI system for software development.
|
systemPrompt: `You are the Orchestrator for Vibn, an autonomous AI platform for software development.
|
||||||
|
|
||||||
Your role is to:
|
Your role:
|
||||||
1. Understand the high-level goal provided in the task.
|
1. Understand the high-level goal.
|
||||||
2. Break it down into concrete sub-tasks.
|
2. Break it into concrete sub-tasks.
|
||||||
3. Delegate sub-tasks to the appropriate specialist agents using the spawn_agent tool.
|
3. Delegate to the right specialist agents via spawn_agent.
|
||||||
4. Use Gitea to track progress: create an issue at the start, close it when done.
|
4. Track progress via Gitea issues.
|
||||||
5. Summarize what was done when complete.
|
5. Summarize results when done.
|
||||||
|
|
||||||
Available specialist agents and when to use them:
|
Agents available:
|
||||||
- **Coder**: Any code changes — features, bug fixes, refactors, tests.
|
- Coder: code changes, features, bug fixes, tests.
|
||||||
- **PM**: Project management — issue triage, sprint planning, documentation updates.
|
- PM: issue triage, docs, sprint planning.
|
||||||
- **Marketing**: Content and copy — blog posts, landing page copy, release notes.
|
- Marketing: copy, blog posts, release notes.
|
||||||
|
|
||||||
Rules:
|
Rules:
|
||||||
- Always create a Gitea issue first to track the work.
|
- Create a Gitea issue first to track the work.
|
||||||
- Delegate to ONE agent at a time unless tasks are fully independent.
|
- Delegate one agent at a time unless tasks are fully independent.
|
||||||
- Check back on progress by listing issues.
|
- Never write code yourself — delegate to Coder.
|
||||||
- Never try to write code yourself — delegate to Coder.
|
- Be specific in task descriptions when spawning agents.`,
|
||||||
- Be concise in your task descriptions when spawning agents.`,
|
|
||||||
tools: pick([...GITEA_TOOLS, ...SPAWN_TOOL, ...COOLIFY_TOOLS])
|
tools: pick([...GITEA_TOOLS, ...SPAWN_TOOL, ...COOLIFY_TOOLS])
|
||||||
},
|
},
|
||||||
|
|
||||||
Coder: {
|
Coder: {
|
||||||
name: 'Coder',
|
name: 'Coder',
|
||||||
description: 'Senior software engineer — writes, edits, and tests code. Commits and pushes when done.',
|
description: 'Senior software engineer — writes, edits, tests, commits, and pushes code',
|
||||||
model: 'gemini-2.5-flash',
|
model: 'B', // GLM-5 — strong at code generation and diffs
|
||||||
systemPrompt: `You are an expert senior software engineer working autonomously on a git repository.
|
systemPrompt: `You are an expert senior software engineer working autonomously on a Git repository.
|
||||||
|
|
||||||
Your job is to complete the coding task given to you. Follow these rules:
|
Workflow:
|
||||||
|
1. Explore the codebase: list_directory, find_files, read_file.
|
||||||
**Workflow:**
|
2. Search for patterns: search_code.
|
||||||
1. Start by exploring the codebase: list_directory, find_files, read_file to understand structure.
|
|
||||||
2. Search for relevant code: search_code to find existing patterns.
|
|
||||||
3. Plan your changes before making them.
|
3. Plan your changes before making them.
|
||||||
4. Read every file BEFORE editing it.
|
4. Read every file BEFORE editing it.
|
||||||
5. Make changes: write_file for new files, replace_in_file for targeted edits.
|
5. Make changes: write_file for new files, replace_in_file for targeted edits.
|
||||||
6. Run tests or lint if applicable: execute_command.
|
6. Run tests/lint if applicable: execute_command.
|
||||||
7. Commit and push when the task is complete: git_commit_and_push.
|
7. Commit and push when complete: git_commit_and_push.
|
||||||
|
|
||||||
**Code quality rules:**
|
Code quality:
|
||||||
- Match existing code style exactly.
|
- Match existing style exactly.
|
||||||
- Never leave TODO comments — implement or skip.
|
- No TODO comments — implement or skip.
|
||||||
- Write complete files, not partial snippets.
|
- Write complete files, not partial snippets.
|
||||||
- If tests exist, run them and fix failures before committing.
|
- Run tests and fix failures before committing.
|
||||||
- Commit message should be concise and in imperative mood (e.g. "add user authentication").
|
- Commit messages: imperative mood, concise (e.g. "add user authentication").
|
||||||
|
|
||||||
**Safety rules:**
|
Safety:
|
||||||
- Never delete files unless explicitly instructed.
|
- Never delete files unless explicitly told to.
|
||||||
- Never modify .env files or credentials.
|
- Never touch .env files or credentials.
|
||||||
- Never commit secrets or API keys.
|
- Never commit secrets or API keys.
|
||||||
|
|
||||||
**If you were triggered by a Gitea issue:**
|
If triggered by a Gitea issue: close it with gitea_close_issue after committing.`,
|
||||||
- After committing, close the issue using gitea_close_issue.
|
|
||||||
- The repo name is in the format "owner/name".
|
|
||||||
|
|
||||||
Be methodical. Read before you write. Test before you commit.`,
|
|
||||||
tools: pick([...FILE_TOOLS, ...SHELL_TOOLS, ...GIT_TOOLS, ...GITEA_TOOLS])
|
tools: pick([...FILE_TOOLS, ...SHELL_TOOLS, ...GIT_TOOLS, ...GITEA_TOOLS])
|
||||||
},
|
},
|
||||||
|
|
||||||
PM: {
|
PM: {
|
||||||
name: 'PM',
|
name: 'PM',
|
||||||
description: 'Product manager — manages Gitea issues, writes documentation, tracks project health',
|
description: 'Product manager — docs, issue management, project health reports',
|
||||||
model: 'gemini-2.5-flash',
|
model: 'A', // Gemini Flash — lightweight, cheap for docs/issue work
|
||||||
systemPrompt: `You are an autonomous Product Manager for a software project hosted on Gitea.
|
systemPrompt: `You are an autonomous Product Manager for a software project hosted on Gitea.
|
||||||
|
|
||||||
Your responsibilities:
|
Responsibilities:
|
||||||
1. Create, update, and close Gitea issues to track work.
|
1. Create, update, and close Gitea issues.
|
||||||
2. Write and update documentation files in the repository.
|
2. Write and update docs in the repository.
|
||||||
3. Summarize project state and create reports.
|
3. Summarize project state and create reports.
|
||||||
4. Prioritize and triage bugs/features based on impact.
|
4. Triage bugs and features by impact.
|
||||||
|
|
||||||
When writing documentation:
|
When writing docs:
|
||||||
- Be clear and concise.
|
- Clear and concise.
|
||||||
- Use markdown formatting.
|
- Markdown formatting.
|
||||||
- Focus on what users and developers need to know.
|
- Keep docs in sync with the codebase.
|
||||||
- Keep docs up to date with the actual codebase state.
|
- Always commit after writing.`,
|
||||||
|
|
||||||
Always commit documentation updates after writing them.`,
|
|
||||||
tools: pick([...GITEA_TOOLS, ...FILE_TOOLS, ...GIT_TOOLS])
|
tools: pick([...GITEA_TOOLS, ...FILE_TOOLS, ...GIT_TOOLS])
|
||||||
},
|
},
|
||||||
|
|
||||||
Marketing: {
|
Marketing: {
|
||||||
name: 'Marketing',
|
name: 'Marketing',
|
||||||
description: 'Marketing specialist — writes copy, blog posts, release notes, and landing page content',
|
description: 'Marketing specialist — copy, blog posts, release notes, landing page content',
|
||||||
model: 'gemini-2.5-flash',
|
model: 'A', // Gemini Flash — cheap for content generation
|
||||||
systemPrompt: `You are an autonomous Marketing specialist for a SaaS product called Vibn.
|
systemPrompt: `You are an autonomous Marketing specialist for a SaaS product called Vibn.
|
||||||
|
|
||||||
Vibn is a cloud-based AI-powered development environment. It helps development teams build faster with AI agents that can write code, manage projects, and deploy automatically.
|
Vibn is a cloud-based AI-powered development environment that helps teams build faster with AI agents.
|
||||||
|
|
||||||
Your responsibilities:
|
Responsibilities:
|
||||||
1. Write compelling marketing copy for landing pages, email campaigns, and social media.
|
1. Write landing page copy, emails, and social media content.
|
||||||
2. Write technical blog posts that explain features in an accessible way.
|
2. Write technical blog posts explaining features accessibly.
|
||||||
3. Write release notes that highlight user-facing value.
|
3. Write release notes that highlight user-facing value.
|
||||||
4. Ensure all copy is on-brand: professional, clear, forward-thinking, and developer-friendly.
|
4. Maintain brand voice: smart, confident, practical. No hype, no jargon.
|
||||||
|
|
||||||
Brand voice: Smart, confident, practical. No hype. No jargon. Show don't tell.
|
Always create real files in the repo (e.g. blog/2026-02-release.md) and commit them.`,
|
||||||
|
|
||||||
When writing content, create actual files in the repository (e.g. blog/2026-02-release.md) and commit them.`,
|
|
||||||
tools: pick([...FILE_TOOLS, ...GIT_TOOLS])
|
tools: pick([...FILE_TOOLS, ...GIT_TOOLS])
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|||||||
285
src/llm.ts
Normal file
285
src/llm.ts
Normal file
@@ -0,0 +1,285 @@
|
|||||||
|
import { execSync } from 'child_process';
|
||||||
|
import { GoogleGenAI } from '@google/genai';
|
||||||
|
import { v4 as uuidv4 } from 'uuid';
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// Unified LLM client — OpenAI-compatible message format throughout
|
||||||
|
//
|
||||||
|
// Two backends:
|
||||||
|
// VertexOpenAIClient — for GLM-5, Claude Sonnet, etc. via Vertex global endpoint
|
||||||
|
// GeminiFlashClient — for Gemini Flash/Pro via @google/genai SDK
|
||||||
|
//
|
||||||
|
// Model tier defaults (overridable via env):
|
||||||
|
// Tier A: gemini-2.5-flash ($0.15/$0.60 per 1M) — routing, summaries, log parsing
|
||||||
|
// Tier B: zai-org/glm-5-maas ($1.00/$3.20 per 1M) — coding, feature work
|
||||||
|
// Tier C: zai-org/glm-5-maas ($1.00/$3.20 per 1M) — complex decisions, escalation
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Shared message types (OpenAI format — used everywhere internally)
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
export interface LLMMessage {
|
||||||
|
role: 'system' | 'user' | 'assistant' | 'tool';
|
||||||
|
content: string | null;
|
||||||
|
tool_calls?: LLMToolCall[];
|
||||||
|
tool_call_id?: string; // set on role=tool messages
|
||||||
|
name?: string; // function name on role=tool messages
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface LLMToolCall {
|
||||||
|
id: string;
|
||||||
|
type: 'function';
|
||||||
|
function: {
|
||||||
|
name: string;
|
||||||
|
arguments: string; // JSON-encoded string
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface LLMTool {
|
||||||
|
type: 'function';
|
||||||
|
function: {
|
||||||
|
name: string;
|
||||||
|
description: string;
|
||||||
|
parameters: Record<string, unknown>;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface LLMResponse {
|
||||||
|
content: string | null;
|
||||||
|
reasoning: string | null; // GLM-5 chain-of-thought
|
||||||
|
tool_calls: LLMToolCall[];
|
||||||
|
finish_reason: string;
|
||||||
|
usage?: {
|
||||||
|
prompt_tokens: number;
|
||||||
|
completion_tokens: number;
|
||||||
|
total_tokens: number;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface LLMClient {
|
||||||
|
modelId: string;
|
||||||
|
chat(messages: LLMMessage[], tools?: LLMTool[], maxTokens?: number): Promise<LLMResponse>;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Vertex AI OpenAI-compatible client
|
||||||
|
// Used for: zai-org/glm-5-maas, anthropic/claude-sonnet-4-6, etc.
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
let _cachedToken = '';
|
||||||
|
let _tokenExpiry = 0;
|
||||||
|
|
||||||
|
function getVertexToken(): string {
|
||||||
|
const now = Date.now();
|
||||||
|
if (_cachedToken && now < _tokenExpiry) return _cachedToken;
|
||||||
|
_cachedToken = execSync('gcloud auth print-access-token', { encoding: 'utf8' }).trim();
|
||||||
|
_tokenExpiry = now + 55 * 60 * 1000; // tokens last 1hr, refresh at 55min
|
||||||
|
return _cachedToken;
|
||||||
|
}
|
||||||
|
|
||||||
|
export class VertexOpenAIClient implements LLMClient {
|
||||||
|
modelId: string;
|
||||||
|
private projectId: string;
|
||||||
|
private region: string;
|
||||||
|
private temperature: number;
|
||||||
|
|
||||||
|
constructor(modelId: string, opts?: { projectId?: string; region?: string; temperature?: number }) {
|
||||||
|
this.modelId = modelId;
|
||||||
|
this.projectId = opts?.projectId ?? process.env.GCP_PROJECT_ID ?? 'master-ai-484822';
|
||||||
|
this.region = opts?.region ?? 'global';
|
||||||
|
this.temperature = opts?.temperature ?? 0.3;
|
||||||
|
}
|
||||||
|
|
||||||
|
async chat(messages: LLMMessage[], tools?: LLMTool[], maxTokens = 4096): Promise<LLMResponse> {
|
||||||
|
const token = getVertexToken();
|
||||||
|
const base = this.region === 'global'
|
||||||
|
? 'https://aiplatform.googleapis.com'
|
||||||
|
: `https://${this.region}-aiplatform.googleapis.com`;
|
||||||
|
const url = `${base}/v1/projects/${this.projectId}/locations/${this.region}/endpoints/openapi/chat/completions`;
|
||||||
|
|
||||||
|
const body: Record<string, unknown> = {
|
||||||
|
model: this.modelId,
|
||||||
|
messages,
|
||||||
|
max_tokens: maxTokens,
|
||||||
|
temperature: this.temperature,
|
||||||
|
stream: false
|
||||||
|
};
|
||||||
|
|
||||||
|
if (tools && tools.length > 0) {
|
||||||
|
body.tools = tools;
|
||||||
|
body.tool_choice = 'auto';
|
||||||
|
}
|
||||||
|
|
||||||
|
const res = await fetch(url, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Authorization': `Bearer ${token}`,
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
},
|
||||||
|
body: JSON.stringify(body)
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!res.ok) {
|
||||||
|
const errText = await res.text();
|
||||||
|
// Force token refresh on 401
|
||||||
|
if (res.status === 401) _tokenExpiry = 0;
|
||||||
|
throw new Error(`Vertex API ${res.status}: ${errText.slice(0, 400)}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = await res.json() as any;
|
||||||
|
const choice = data.choices?.[0];
|
||||||
|
const message = choice?.message ?? {};
|
||||||
|
|
||||||
|
return {
|
||||||
|
content: message.content ?? null,
|
||||||
|
reasoning: message.reasoning_content ?? null,
|
||||||
|
tool_calls: message.tool_calls ?? [],
|
||||||
|
finish_reason: choice?.finish_reason ?? 'stop',
|
||||||
|
usage: data.usage
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Gemini client via @google/genai SDK
|
||||||
|
// Used for: Tier A (fast/cheap routing, summaries, log parsing)
|
||||||
|
// Converts to/from OpenAI message format internally.
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
export class GeminiClient implements LLMClient {
|
||||||
|
modelId: string;
|
||||||
|
private temperature: number;
|
||||||
|
|
||||||
|
constructor(modelId = 'gemini-2.5-flash', opts?: { temperature?: number }) {
|
||||||
|
this.modelId = modelId;
|
||||||
|
this.temperature = opts?.temperature ?? 0.2;
|
||||||
|
}
|
||||||
|
|
||||||
|
async chat(messages: LLMMessage[], tools?: LLMTool[], maxTokens = 8192): Promise<LLMResponse> {
|
||||||
|
const apiKey = process.env.GOOGLE_API_KEY;
|
||||||
|
if (!apiKey) throw new Error('GOOGLE_API_KEY not set');
|
||||||
|
|
||||||
|
const genai = new GoogleGenAI({ apiKey });
|
||||||
|
|
||||||
|
const systemMsg = messages.find(m => m.role === 'system');
|
||||||
|
const nonSystem = messages.filter(m => m.role !== 'system');
|
||||||
|
|
||||||
|
const functionDeclarations = (tools ?? []).map(t => ({
|
||||||
|
name: t.function.name,
|
||||||
|
description: t.function.description,
|
||||||
|
parameters: t.function.parameters as any
|
||||||
|
}));
|
||||||
|
|
||||||
|
const response = await genai.models.generateContent({
|
||||||
|
model: this.modelId,
|
||||||
|
contents: toGeminiContents(nonSystem),
|
||||||
|
config: {
|
||||||
|
systemInstruction: systemMsg?.content ?? undefined,
|
||||||
|
tools: functionDeclarations.length > 0 ? [{ functionDeclarations }] : undefined,
|
||||||
|
temperature: this.temperature,
|
||||||
|
maxOutputTokens: maxTokens
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
const candidate = response.candidates?.[0];
|
||||||
|
if (!candidate) throw new Error('No response from Gemini');
|
||||||
|
|
||||||
|
const parts = candidate.content?.parts ?? [];
|
||||||
|
const textContent = parts.filter(p => p.text).map(p => p.text).join('') || null;
|
||||||
|
const fnCalls = parts.filter(p => p.functionCall);
|
||||||
|
|
||||||
|
const tool_calls: LLMToolCall[] = fnCalls.map(p => ({
|
||||||
|
id: `call_${uuidv4().replace(/-/g, '').slice(0, 12)}`,
|
||||||
|
type: 'function' as const,
|
||||||
|
function: {
|
||||||
|
name: p.functionCall!.name ?? '',
|
||||||
|
arguments: JSON.stringify(p.functionCall!.args ?? {})
|
||||||
|
}
|
||||||
|
}));
|
||||||
|
|
||||||
|
return {
|
||||||
|
content: textContent,
|
||||||
|
reasoning: null,
|
||||||
|
tool_calls,
|
||||||
|
finish_reason: fnCalls.length > 0 ? 'tool_calls' : 'stop'
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Convert OpenAI message format → Gemini Content[] format */
|
||||||
|
function toGeminiContents(messages: LLMMessage[]): any[] {
|
||||||
|
const contents: any[] = [];
|
||||||
|
for (const msg of messages) {
|
||||||
|
if (msg.role === 'assistant') {
|
||||||
|
const parts: any[] = [];
|
||||||
|
if (msg.content) parts.push({ text: msg.content });
|
||||||
|
for (const tc of msg.tool_calls ?? []) {
|
||||||
|
parts.push({
|
||||||
|
functionCall: {
|
||||||
|
name: tc.function.name,
|
||||||
|
args: JSON.parse(tc.function.arguments || '{}')
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
contents.push({ role: 'model', parts });
|
||||||
|
} else if (msg.role === 'tool') {
|
||||||
|
// Parse content back — could be JSON or plain text
|
||||||
|
let resultValue: unknown = msg.content;
|
||||||
|
try { resultValue = JSON.parse(msg.content ?? 'null'); } catch { /* keep as string */ }
|
||||||
|
contents.push({
|
||||||
|
role: 'user',
|
||||||
|
parts: [{
|
||||||
|
functionResponse: {
|
||||||
|
name: msg.name ?? 'tool',
|
||||||
|
response: { result: resultValue }
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
contents.push({ role: 'user', parts: [{ text: msg.content ?? '' }] });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return contents;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Factory — createLLM(modelId | tier)
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
export type ModelTier = 'A' | 'B' | 'C';
|
||||||
|
|
||||||
|
const TIER_MODELS: Record<ModelTier, string> = {
|
||||||
|
A: process.env.TIER_A_MODEL ?? 'gemini-2.5-flash',
|
||||||
|
B: process.env.TIER_B_MODEL ?? 'zai-org/glm-5-maas',
|
||||||
|
C: process.env.TIER_C_MODEL ?? 'zai-org/glm-5-maas'
|
||||||
|
};
|
||||||
|
|
||||||
|
export function createLLM(modelOrTier: string | ModelTier, opts?: { temperature?: number }): LLMClient {
|
||||||
|
const modelId = (modelOrTier === 'A' || modelOrTier === 'B' || modelOrTier === 'C')
|
||||||
|
? TIER_MODELS[modelOrTier]
|
||||||
|
: modelOrTier;
|
||||||
|
|
||||||
|
if (modelId.startsWith('gemini-')) {
|
||||||
|
return new GeminiClient(modelId, opts);
|
||||||
|
}
|
||||||
|
|
||||||
|
return new VertexOpenAIClient(modelId, { temperature: opts?.temperature });
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Helper — convert our ToolDefinition[] → LLMTool[] (OpenAI format)
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
export function toOAITools(
|
||||||
|
tools: Array<{ name: string; description: string; parameters: Record<string, unknown> }>
|
||||||
|
): LLMTool[] {
|
||||||
|
return tools.map(t => ({
|
||||||
|
type: 'function',
|
||||||
|
function: {
|
||||||
|
name: t.name,
|
||||||
|
description: t.description,
|
||||||
|
parameters: t.parameters
|
||||||
|
}
|
||||||
|
}));
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user