feat: add AnthropicVertexClient for claude-* models
- @anthropic-ai/vertex-sdk: proper Anthropic Messages API on Vertex - AnthropicVertexClient: converts OAI message format ↔ Anthropic format, handles tool_use blocks, retries 429/503 with backoff - createLLM: routes anthropic/* and claude-* models through new client - Tier B/C default: claude-sonnet-4-6 via us-east5 Vertex endpoint - /generate endpoint: accepts region param for regional endpoint testing Made-with: Cursor
This commit is contained in:
219
package-lock.json
generated
219
package-lock.json
generated
@@ -8,6 +8,8 @@
|
||||
"name": "vibn-agent-runner",
|
||||
"version": "0.1.0",
|
||||
"dependencies": {
|
||||
"@anthropic-ai/sdk": "^0.78.0",
|
||||
"@anthropic-ai/vertex-sdk": "^0.14.4",
|
||||
"@google/genai": "^1.0.0",
|
||||
"cors": "^2.8.5",
|
||||
"express": "^4.19.2",
|
||||
@@ -24,6 +26,121 @@
|
||||
"typescript": "^5.4.5"
|
||||
}
|
||||
},
|
||||
"node_modules/@anthropic-ai/sdk": {
|
||||
"version": "0.78.0",
|
||||
"resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.78.0.tgz",
|
||||
"integrity": "sha512-PzQhR715td/m1UaaN5hHXjYB8Gl2lF9UVhrrGrZeysiF6Rb74Wc9GCB8hzLdzmQtBd1qe89F9OptgB9Za1Ib5w==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"json-schema-to-ts": "^3.1.1"
|
||||
},
|
||||
"bin": {
|
||||
"anthropic-ai-sdk": "bin/cli"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"zod": "^3.25.0 || ^4.0.0"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"zod": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/@anthropic-ai/vertex-sdk": {
|
||||
"version": "0.14.4",
|
||||
"resolved": "https://registry.npmjs.org/@anthropic-ai/vertex-sdk/-/vertex-sdk-0.14.4.tgz",
|
||||
"integrity": "sha512-BZUPRWghZxfSFtAxU563wH+jfWBPoedAwsVxG35FhmNsjeV8tyfN+lFriWhCpcZApxA4NdT6Soov+PzfnxxD5g==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@anthropic-ai/sdk": ">=0.50.3 <1",
|
||||
"google-auth-library": "^9.4.2"
|
||||
}
|
||||
},
|
||||
"node_modules/@anthropic-ai/vertex-sdk/node_modules/gaxios": {
|
||||
"version": "6.7.1",
|
||||
"resolved": "https://registry.npmjs.org/gaxios/-/gaxios-6.7.1.tgz",
|
||||
"integrity": "sha512-LDODD4TMYx7XXdpwxAVRAIAuB0bzv0s+ywFonY46k126qzQHT9ygyoa9tncmOiQmmDrik65UYsEkv3lbfqQ3yQ==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"extend": "^3.0.2",
|
||||
"https-proxy-agent": "^7.0.1",
|
||||
"is-stream": "^2.0.0",
|
||||
"node-fetch": "^2.6.9",
|
||||
"uuid": "^9.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=14"
|
||||
}
|
||||
},
|
||||
"node_modules/@anthropic-ai/vertex-sdk/node_modules/gcp-metadata": {
|
||||
"version": "6.1.1",
|
||||
"resolved": "https://registry.npmjs.org/gcp-metadata/-/gcp-metadata-6.1.1.tgz",
|
||||
"integrity": "sha512-a4tiq7E0/5fTjxPAaH4jpjkSv/uCaU2p5KC6HVGrvl0cDjA8iBZv4vv1gyzlmK0ZUKqwpOyQMKzZQe3lTit77A==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"gaxios": "^6.1.1",
|
||||
"google-logging-utils": "^0.0.2",
|
||||
"json-bigint": "^1.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=14"
|
||||
}
|
||||
},
|
||||
"node_modules/@anthropic-ai/vertex-sdk/node_modules/google-auth-library": {
|
||||
"version": "9.15.1",
|
||||
"resolved": "https://registry.npmjs.org/google-auth-library/-/google-auth-library-9.15.1.tgz",
|
||||
"integrity": "sha512-Jb6Z0+nvECVz+2lzSMt9u98UsoakXxA2HGHMCxh+so3n90XgYWkq5dur19JAJV7ONiJY22yBTyJB1TSkvPq9Ng==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"base64-js": "^1.3.0",
|
||||
"ecdsa-sig-formatter": "^1.0.11",
|
||||
"gaxios": "^6.1.1",
|
||||
"gcp-metadata": "^6.1.0",
|
||||
"gtoken": "^7.0.0",
|
||||
"jws": "^4.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=14"
|
||||
}
|
||||
},
|
||||
"node_modules/@anthropic-ai/vertex-sdk/node_modules/google-logging-utils": {
|
||||
"version": "0.0.2",
|
||||
"resolved": "https://registry.npmjs.org/google-logging-utils/-/google-logging-utils-0.0.2.tgz",
|
||||
"integrity": "sha512-NEgUnEcBiP5HrPzufUkBzJOD/Sxsco3rLNo1F1TNf7ieU8ryUzBhqba8r756CjLX7rn3fHl6iLEwPYuqpoKgQQ==",
|
||||
"license": "Apache-2.0",
|
||||
"engines": {
|
||||
"node": ">=14"
|
||||
}
|
||||
},
|
||||
"node_modules/@anthropic-ai/vertex-sdk/node_modules/node-fetch": {
|
||||
"version": "2.7.0",
|
||||
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz",
|
||||
"integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"whatwg-url": "^5.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": "4.x || >=6.0.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"encoding": "^0.1.0"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"encoding": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/@babel/runtime": {
|
||||
"version": "7.28.6",
|
||||
"resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.28.6.tgz",
|
||||
"integrity": "sha512-05WQkdpL9COIMz4LjTxGpPNCdlpyimKppYNoJ5Di5EUObifl8t4tuLuUBBZEpoLYOmfvIWrsp9fCl0HoPRVTdA==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=6.9.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@cspotcode/source-map-support": {
|
||||
"version": "0.8.1",
|
||||
"resolved": "https://registry.npmjs.org/@cspotcode/source-map-support/-/source-map-support-0.8.1.tgz",
|
||||
@@ -1051,6 +1168,55 @@
|
||||
"url": "https://github.com/sponsors/ljharb"
|
||||
}
|
||||
},
|
||||
"node_modules/gtoken": {
|
||||
"version": "7.1.0",
|
||||
"resolved": "https://registry.npmjs.org/gtoken/-/gtoken-7.1.0.tgz",
|
||||
"integrity": "sha512-pCcEwRi+TKpMlxAQObHDQ56KawURgyAf6jtIY046fJ5tIv3zDe/LEIubckAO8fj6JnAxLdmWkUfNyulQ2iKdEw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"gaxios": "^6.0.0",
|
||||
"jws": "^4.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=14.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/gtoken/node_modules/gaxios": {
|
||||
"version": "6.7.1",
|
||||
"resolved": "https://registry.npmjs.org/gaxios/-/gaxios-6.7.1.tgz",
|
||||
"integrity": "sha512-LDODD4TMYx7XXdpwxAVRAIAuB0bzv0s+ywFonY46k126qzQHT9ygyoa9tncmOiQmmDrik65UYsEkv3lbfqQ3yQ==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"extend": "^3.0.2",
|
||||
"https-proxy-agent": "^7.0.1",
|
||||
"is-stream": "^2.0.0",
|
||||
"node-fetch": "^2.6.9",
|
||||
"uuid": "^9.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=14"
|
||||
}
|
||||
},
|
||||
"node_modules/gtoken/node_modules/node-fetch": {
|
||||
"version": "2.7.0",
|
||||
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz",
|
||||
"integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"whatwg-url": "^5.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": "4.x || >=6.0.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"encoding": "^0.1.0"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"encoding": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/has-symbols": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz",
|
||||
@@ -1167,6 +1333,18 @@
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/is-stream": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/is-stream/-/is-stream-2.0.1.tgz",
|
||||
"integrity": "sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
}
|
||||
},
|
||||
"node_modules/isexe": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz",
|
||||
@@ -1197,6 +1375,19 @@
|
||||
"bignumber.js": "^9.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/json-schema-to-ts": {
|
||||
"version": "3.1.1",
|
||||
"resolved": "https://registry.npmjs.org/json-schema-to-ts/-/json-schema-to-ts-3.1.1.tgz",
|
||||
"integrity": "sha512-+DWg8jCJG2TEnpy7kOm/7/AxaYoaRbjVB4LFZLySZlWn8exGs3A4OLJR966cVvU26N7X9TWxl+Jsw7dzAqKT6g==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@babel/runtime": "^7.18.3",
|
||||
"ts-algebra": "^2.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=16"
|
||||
}
|
||||
},
|
||||
"node_modules/jwa": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/jwa/-/jwa-2.0.1.tgz",
|
||||
@@ -1871,6 +2062,18 @@
|
||||
"node": ">=0.6"
|
||||
}
|
||||
},
|
||||
"node_modules/tr46": {
|
||||
"version": "0.0.3",
|
||||
"resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz",
|
||||
"integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/ts-algebra": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/ts-algebra/-/ts-algebra-2.0.0.tgz",
|
||||
"integrity": "sha512-FPAhNPFMrkwz76P7cdjdmiShwMynZYN6SgOujD1urY4oNm80Ou9oMdmbR45LotcKOXoy7wSmHkRFE6Mxbrhefw==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/ts-node": {
|
||||
"version": "10.9.2",
|
||||
"resolved": "https://registry.npmjs.org/ts-node/-/ts-node-10.9.2.tgz",
|
||||
@@ -2004,6 +2207,22 @@
|
||||
"node": ">= 8"
|
||||
}
|
||||
},
|
||||
"node_modules/webidl-conversions": {
|
||||
"version": "3.0.1",
|
||||
"resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz",
|
||||
"integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==",
|
||||
"license": "BSD-2-Clause"
|
||||
},
|
||||
"node_modules/whatwg-url": {
|
||||
"version": "5.0.0",
|
||||
"resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz",
|
||||
"integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"tr46": "~0.0.3",
|
||||
"webidl-conversions": "^3.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/which": {
|
||||
"version": "2.0.2",
|
||||
"resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz",
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
"test": "ts-node src/test.ts"
|
||||
},
|
||||
"dependencies": {
|
||||
"@anthropic-ai/sdk": "^0.78.0",
|
||||
"@anthropic-ai/vertex-sdk": "^0.14.4",
|
||||
"@google/genai": "^1.0.0",
|
||||
"cors": "^2.8.5",
|
||||
"express": "^4.19.2",
|
||||
|
||||
146
src/llm.ts
146
src/llm.ts
@@ -1,20 +1,20 @@
|
||||
import { GoogleAuth } from 'google-auth-library';
|
||||
import { GoogleGenAI } from '@google/genai';
|
||||
import AnthropicVertex from '@anthropic-ai/vertex-sdk';
|
||||
import { v4 as uuidv4 } from 'uuid';
|
||||
|
||||
// =============================================================================
|
||||
// Unified LLM client — OpenAI-compatible message format throughout
|
||||
//
|
||||
// Two backends:
|
||||
// VertexOpenAIClient — for GLM-5 and other Vertex MaaS models (openai-compat endpoint)
|
||||
// GeminiClient — for Gemini Flash/Pro via @google/genai SDK (API key)
|
||||
// Three backends:
|
||||
// VertexOpenAIClient — GLM-5 and other Vertex MaaS models (openai-compat endpoint)
|
||||
// GeminiClient — Gemini Flash/Pro via @google/genai SDK (API key)
|
||||
// AnthropicVertexClient — Claude models via Anthropic Messages API on Vertex (us-east5)
|
||||
//
|
||||
// Model tier defaults (overridable via TIER_A/B/C_MODEL env vars):
|
||||
// Tier A: gemini-2.5-flash — routing, summaries, log parsing (API key, high quota)
|
||||
// Tier B: zai-org/glm-5-maas — coding, feature work (Vertex MaaS, retry on 429)
|
||||
// Tier C: zai-org/glm-5-maas — complex decisions, escalation
|
||||
//
|
||||
// Claude models are NOT available in this GCP project — do not use anthropic/* IDs.
|
||||
// Tier A: gemini-2.5-flash — routing, summaries (API key, high quota)
|
||||
// Tier B: claude-sonnet-4-6 — coding, feature work (Anthropic Vertex, us-east5)
|
||||
// Tier C: claude-sonnet-4-6 — complex decisions
|
||||
// =============================================================================
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -288,6 +288,128 @@ function toGeminiContents(messages: LLMMessage[]): any[] {
|
||||
return contents;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Anthropic Vertex client
|
||||
// Used for: claude-* models via Vertex AI (proper Anthropic Messages API)
|
||||
// Handles tool_calls by converting to/from Anthropic's tool_use blocks.
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export class AnthropicVertexClient implements LLMClient {
|
||||
modelId: string;
|
||||
private projectId: string;
|
||||
private region: string;
|
||||
|
||||
constructor(modelId: string, opts?: { projectId?: string; region?: string }) {
|
||||
// Strip the "anthropic/" prefix if present — the SDK uses bare model names
|
||||
this.modelId = modelId.startsWith('anthropic/') ? modelId.slice(10) : modelId;
|
||||
this.projectId = opts?.projectId ?? process.env.GCP_PROJECT_ID ?? 'master-ai-484822';
|
||||
this.region = opts?.region ?? process.env.CLAUDE_REGION ?? 'us-east5';
|
||||
}
|
||||
|
||||
private buildClient(): AnthropicVertex {
|
||||
const b64Key = process.env.GCP_SA_KEY_BASE64;
|
||||
if (b64Key) {
|
||||
try {
|
||||
const jsonStr = Buffer.from(b64Key, 'base64').toString('utf8');
|
||||
const credentials = JSON.parse(jsonStr);
|
||||
return new AnthropicVertex({
|
||||
projectId: this.projectId,
|
||||
region: this.region,
|
||||
googleAuth: new GoogleAuth({ credentials, scopes: ['https://www.googleapis.com/auth/cloud-platform'] }) as any,
|
||||
});
|
||||
} catch {
|
||||
console.warn('[llm] AnthropicVertex: SA key decode failed, falling back to metadata server');
|
||||
}
|
||||
}
|
||||
return new AnthropicVertex({ projectId: this.projectId, region: this.region });
|
||||
}
|
||||
|
||||
async chat(messages: LLMMessage[], tools?: LLMTool[], maxTokens = 8192): Promise<LLMResponse> {
|
||||
const client = this.buildClient();
|
||||
|
||||
const system = messages.find(m => m.role === 'system')?.content ?? undefined;
|
||||
const nonSystem = messages.filter(m => m.role !== 'system');
|
||||
|
||||
// Convert OpenAI message format → Anthropic format
|
||||
const anthropicMessages: any[] = nonSystem.map(m => {
|
||||
if (m.role === 'assistant') {
|
||||
const parts: any[] = [];
|
||||
if (m.content) parts.push({ type: 'text', text: m.content });
|
||||
for (const tc of m.tool_calls ?? []) {
|
||||
parts.push({
|
||||
type: 'tool_use',
|
||||
id: tc.id,
|
||||
name: tc.function.name,
|
||||
input: JSON.parse(tc.function.arguments || '{}'),
|
||||
});
|
||||
}
|
||||
return { role: 'assistant', content: parts.length === 1 && parts[0].type === 'text' ? parts[0].text : parts };
|
||||
}
|
||||
if (m.role === 'tool') {
|
||||
return {
|
||||
role: 'user',
|
||||
content: [{ type: 'tool_result', tool_use_id: m.tool_call_id, content: m.content ?? '' }],
|
||||
};
|
||||
}
|
||||
return { role: 'user', content: m.content ?? '' };
|
||||
});
|
||||
|
||||
const anthropicTools = (tools ?? []).map(t => ({
|
||||
name: t.function.name,
|
||||
description: t.function.description,
|
||||
input_schema: t.function.parameters,
|
||||
}));
|
||||
|
||||
const MAX_RETRIES = 4;
|
||||
const RETRY_STATUSES = new Set([429, 503]);
|
||||
|
||||
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
|
||||
try {
|
||||
const response = await (client.messages.create as Function)({
|
||||
model: this.modelId,
|
||||
max_tokens: maxTokens,
|
||||
system: system ?? undefined,
|
||||
messages: anthropicMessages,
|
||||
tools: anthropicTools.length > 0 ? anthropicTools : undefined,
|
||||
});
|
||||
|
||||
const textContent = response.content
|
||||
.filter((b: any) => b.type === 'text')
|
||||
.map((b: any) => b.text)
|
||||
.join('') || null;
|
||||
|
||||
const tool_calls: LLMToolCall[] = response.content
|
||||
.filter((b: any) => b.type === 'tool_use')
|
||||
.map((b: any) => ({
|
||||
id: b.id,
|
||||
type: 'function' as const,
|
||||
function: { name: b.name, arguments: JSON.stringify(b.input ?? {}) },
|
||||
}));
|
||||
|
||||
return {
|
||||
content: textContent,
|
||||
reasoning: null,
|
||||
tool_calls,
|
||||
finish_reason: response.stop_reason === 'tool_use' ? 'tool_calls' : 'stop',
|
||||
usage: response.usage
|
||||
? { prompt_tokens: response.usage.input_tokens, completion_tokens: response.usage.output_tokens, total_tokens: response.usage.input_tokens + response.usage.output_tokens }
|
||||
: undefined,
|
||||
};
|
||||
} catch (err: any) {
|
||||
const status = err?.status ?? err?.statusCode ?? 0;
|
||||
if (RETRY_STATUSES.has(status) && attempt < MAX_RETRIES) {
|
||||
const waitMs = Math.min(2 ** attempt * 2000 + Math.random() * 500, 30_000);
|
||||
console.warn(`[llm] Anthropic Vertex ${status} on attempt ${attempt + 1}/${MAX_RETRIES + 1} — retrying in ${Math.round(waitMs / 1000)}s`);
|
||||
await new Promise(r => setTimeout(r, waitMs));
|
||||
continue;
|
||||
}
|
||||
throw new Error(`Anthropic Vertex error: ${err?.message ?? String(err)}`);
|
||||
}
|
||||
}
|
||||
throw new Error('Anthropic Vertex: exceeded max retries');
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Factory — createLLM(modelId | tier)
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -296,8 +418,8 @@ export type ModelTier = 'A' | 'B' | 'C';
|
||||
|
||||
const TIER_MODELS: Record<ModelTier, string> = {
|
||||
A: process.env.TIER_A_MODEL ?? 'gemini-2.5-flash',
|
||||
B: process.env.TIER_B_MODEL ?? 'zai-org/glm-5-maas',
|
||||
C: process.env.TIER_C_MODEL ?? 'zai-org/glm-5-maas'
|
||||
B: process.env.TIER_B_MODEL ?? 'claude-sonnet-4-6',
|
||||
C: process.env.TIER_C_MODEL ?? 'claude-sonnet-4-6'
|
||||
};
|
||||
|
||||
export function createLLM(modelOrTier: string | ModelTier, opts?: { temperature?: number }): LLMClient {
|
||||
@@ -309,6 +431,10 @@ export function createLLM(modelOrTier: string | ModelTier, opts?: { temperature?
|
||||
return new GeminiClient(modelId, opts);
|
||||
}
|
||||
|
||||
if (modelId.startsWith('anthropic/') || modelId.startsWith('claude-')) {
|
||||
return new AnthropicVertexClient(modelId);
|
||||
}
|
||||
|
||||
return new VertexOpenAIClient(modelId, { temperature: opts?.temperature });
|
||||
}
|
||||
|
||||
|
||||
@@ -550,18 +550,24 @@ app.post('/agent/approve', async (req: Request, res: Response) => {
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
app.post('/generate', async (req: Request, res: Response) => {
|
||||
const { prompt, model } = req.body as { prompt?: string; model?: string };
|
||||
const { prompt, model, region } = req.body as { prompt?: string; model?: string; region?: string };
|
||||
if (!prompt) { res.status(400).json({ error: '"prompt" is required' }); return; }
|
||||
|
||||
// Allow overriding CLAUDE_REGION per-request for testing
|
||||
const prevRegion = process.env.CLAUDE_REGION;
|
||||
if (region) process.env.CLAUDE_REGION = region;
|
||||
|
||||
try {
|
||||
const llm = createLLM(model ?? 'A', { temperature: 0.3 });
|
||||
const messages: import('./llm').LLMMessage[] = [
|
||||
{ role: 'user', content: prompt }
|
||||
];
|
||||
const response = await llm.chat(messages, [], 8192);
|
||||
res.json({ reply: response.content ?? '' });
|
||||
res.json({ reply: response.content ?? '', model: llm.modelId });
|
||||
} catch (err) {
|
||||
res.status(500).json({ error: err instanceof Error ? err.message : String(err) });
|
||||
res.status(500).json({ error: err instanceof Error ? err.message : String(err), model });
|
||||
} finally {
|
||||
if (region) process.env.CLAUDE_REGION = prevRegion ?? '';
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
Reference in New Issue
Block a user