fix: compile dist from source in Docker, fix ChatResult interface
- Dockerfile now runs tsc during build so committed dist/ is never stale - ChatResult interface was missing history[] and memoryUpdates[] fields - Re-add missing MemoryUpdate import in orchestrator.ts - Rebuild dist/ with all new fields included Made-with: Cursor
This commit is contained in:
197
dist/llm.js
vendored
Normal file
197
dist/llm.js
vendored
Normal file
@@ -0,0 +1,197 @@
|
||||
"use strict";
|
||||
Object.defineProperty(exports, "__esModule", { value: true });
|
||||
exports.GeminiClient = exports.VertexOpenAIClient = void 0;
|
||||
exports.createLLM = createLLM;
|
||||
exports.toOAITools = toOAITools;
|
||||
const child_process_1 = require("child_process");
|
||||
const genai_1 = require("@google/genai");
|
||||
const uuid_1 = require("uuid");
|
||||
// ---------------------------------------------------------------------------
|
||||
// Vertex AI OpenAI-compatible client
|
||||
// Used for: zai-org/glm-5-maas, anthropic/claude-sonnet-4-6, etc.
|
||||
// ---------------------------------------------------------------------------
|
||||
let _cachedToken = '';
|
||||
let _tokenExpiry = 0;
|
||||
function getVertexToken() {
|
||||
const now = Date.now();
|
||||
if (_cachedToken && now < _tokenExpiry)
|
||||
return _cachedToken;
|
||||
_cachedToken = (0, child_process_1.execSync)('gcloud auth print-access-token', { encoding: 'utf8' }).trim();
|
||||
_tokenExpiry = now + 55 * 60 * 1000; // tokens last 1hr, refresh at 55min
|
||||
return _cachedToken;
|
||||
}
|
||||
class VertexOpenAIClient {
|
||||
constructor(modelId, opts) {
|
||||
this.modelId = modelId;
|
||||
this.projectId = opts?.projectId ?? process.env.GCP_PROJECT_ID ?? 'master-ai-484822';
|
||||
this.region = opts?.region ?? 'global';
|
||||
this.temperature = opts?.temperature ?? 0.3;
|
||||
}
|
||||
async chat(messages, tools, maxTokens = 4096) {
|
||||
const token = getVertexToken();
|
||||
const base = this.region === 'global'
|
||||
? 'https://aiplatform.googleapis.com'
|
||||
: `https://${this.region}-aiplatform.googleapis.com`;
|
||||
const url = `${base}/v1/projects/${this.projectId}/locations/${this.region}/endpoints/openapi/chat/completions`;
|
||||
const body = {
|
||||
model: this.modelId,
|
||||
messages,
|
||||
max_tokens: maxTokens,
|
||||
temperature: this.temperature,
|
||||
stream: false
|
||||
};
|
||||
if (tools && tools.length > 0) {
|
||||
body.tools = tools;
|
||||
body.tool_choice = 'auto';
|
||||
}
|
||||
const res = await fetch(url, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Authorization': `Bearer ${token}`,
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify(body)
|
||||
});
|
||||
if (!res.ok) {
|
||||
const errText = await res.text();
|
||||
// Force token refresh on 401
|
||||
if (res.status === 401)
|
||||
_tokenExpiry = 0;
|
||||
throw new Error(`Vertex API ${res.status}: ${errText.slice(0, 400)}`);
|
||||
}
|
||||
const data = await res.json();
|
||||
const choice = data.choices?.[0];
|
||||
const message = choice?.message ?? {};
|
||||
return {
|
||||
content: message.content ?? null,
|
||||
reasoning: message.reasoning_content ?? null,
|
||||
tool_calls: message.tool_calls ?? [],
|
||||
finish_reason: choice?.finish_reason ?? 'stop',
|
||||
usage: data.usage
|
||||
};
|
||||
}
|
||||
}
|
||||
exports.VertexOpenAIClient = VertexOpenAIClient;
|
||||
// ---------------------------------------------------------------------------
|
||||
// Gemini client via @google/genai SDK
|
||||
// Used for: Tier A (fast/cheap routing, summaries, log parsing)
|
||||
// Converts to/from OpenAI message format internally.
|
||||
// ---------------------------------------------------------------------------
|
||||
class GeminiClient {
|
||||
constructor(modelId = 'gemini-2.5-flash', opts) {
|
||||
this.modelId = modelId;
|
||||
this.temperature = opts?.temperature ?? 0.2;
|
||||
}
|
||||
async chat(messages, tools, maxTokens = 8192) {
|
||||
const apiKey = process.env.GOOGLE_API_KEY;
|
||||
if (!apiKey)
|
||||
throw new Error('GOOGLE_API_KEY not set');
|
||||
const genai = new genai_1.GoogleGenAI({ apiKey });
|
||||
const systemMsg = messages.find(m => m.role === 'system');
|
||||
const nonSystem = messages.filter(m => m.role !== 'system');
|
||||
const functionDeclarations = (tools ?? []).map(t => ({
|
||||
name: t.function.name,
|
||||
description: t.function.description,
|
||||
parameters: t.function.parameters
|
||||
}));
|
||||
const response = await genai.models.generateContent({
|
||||
model: this.modelId,
|
||||
contents: toGeminiContents(nonSystem),
|
||||
config: {
|
||||
systemInstruction: systemMsg?.content ?? undefined,
|
||||
tools: functionDeclarations.length > 0 ? [{ functionDeclarations }] : undefined,
|
||||
temperature: this.temperature,
|
||||
maxOutputTokens: maxTokens
|
||||
}
|
||||
});
|
||||
const candidate = response.candidates?.[0];
|
||||
if (!candidate)
|
||||
throw new Error('No response from Gemini');
|
||||
const parts = candidate.content?.parts ?? [];
|
||||
const textContent = parts.filter(p => p.text).map(p => p.text).join('') || null;
|
||||
const fnCalls = parts.filter(p => p.functionCall);
|
||||
const tool_calls = fnCalls.map(p => ({
|
||||
id: `call_${(0, uuid_1.v4)().replace(/-/g, '').slice(0, 12)}`,
|
||||
type: 'function',
|
||||
function: {
|
||||
name: p.functionCall.name ?? '',
|
||||
arguments: JSON.stringify(p.functionCall.args ?? {})
|
||||
}
|
||||
}));
|
||||
return {
|
||||
content: textContent,
|
||||
reasoning: null,
|
||||
tool_calls,
|
||||
finish_reason: fnCalls.length > 0 ? 'tool_calls' : 'stop'
|
||||
};
|
||||
}
|
||||
}
|
||||
exports.GeminiClient = GeminiClient;
|
||||
/** Convert OpenAI message format → Gemini Content[] format */
|
||||
function toGeminiContents(messages) {
|
||||
const contents = [];
|
||||
for (const msg of messages) {
|
||||
if (msg.role === 'assistant') {
|
||||
const parts = [];
|
||||
if (msg.content)
|
||||
parts.push({ text: msg.content });
|
||||
for (const tc of msg.tool_calls ?? []) {
|
||||
parts.push({
|
||||
functionCall: {
|
||||
name: tc.function.name,
|
||||
args: JSON.parse(tc.function.arguments || '{}')
|
||||
}
|
||||
});
|
||||
}
|
||||
contents.push({ role: 'model', parts });
|
||||
}
|
||||
else if (msg.role === 'tool') {
|
||||
// Parse content back — could be JSON or plain text
|
||||
let resultValue = msg.content;
|
||||
try {
|
||||
resultValue = JSON.parse(msg.content ?? 'null');
|
||||
}
|
||||
catch { /* keep as string */ }
|
||||
contents.push({
|
||||
role: 'user',
|
||||
parts: [{
|
||||
functionResponse: {
|
||||
name: msg.name ?? 'tool',
|
||||
response: { result: resultValue }
|
||||
}
|
||||
}]
|
||||
});
|
||||
}
|
||||
else {
|
||||
contents.push({ role: 'user', parts: [{ text: msg.content ?? '' }] });
|
||||
}
|
||||
}
|
||||
return contents;
|
||||
}
|
||||
const TIER_MODELS = {
|
||||
A: process.env.TIER_A_MODEL ?? 'gemini-2.5-flash',
|
||||
B: process.env.TIER_B_MODEL ?? 'zai-org/glm-5-maas',
|
||||
C: process.env.TIER_C_MODEL ?? 'zai-org/glm-5-maas'
|
||||
};
|
||||
function createLLM(modelOrTier, opts) {
|
||||
const modelId = (modelOrTier === 'A' || modelOrTier === 'B' || modelOrTier === 'C')
|
||||
? TIER_MODELS[modelOrTier]
|
||||
: modelOrTier;
|
||||
if (modelId.startsWith('gemini-')) {
|
||||
return new GeminiClient(modelId, opts);
|
||||
}
|
||||
return new VertexOpenAIClient(modelId, { temperature: opts?.temperature });
|
||||
}
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helper — convert our ToolDefinition[] → LLMTool[] (OpenAI format)
|
||||
// ---------------------------------------------------------------------------
|
||||
function toOAITools(tools) {
|
||||
return tools.map(t => ({
|
||||
type: 'function',
|
||||
function: {
|
||||
name: t.name,
|
||||
description: t.description,
|
||||
parameters: t.parameters
|
||||
}
|
||||
}));
|
||||
}
|
||||
Reference in New Issue
Block a user