From 551fdb9e54882c2dcf09bcc7da247056a7fd830f Mon Sep 17 00:00:00 2001 From: mawkone Date: Sat, 7 Mar 2026 12:54:39 -0800 Subject: [PATCH] feat: add AnthropicVertexClient for claude-* models MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - @anthropic-ai/vertex-sdk: proper Anthropic Messages API on Vertex - AnthropicVertexClient: converts OAI message format ↔ Anthropic format, handles tool_use blocks, retries 429/503 with backoff - createLLM: routes anthropic/* and claude-* models through new client - Tier B/C default: claude-sonnet-4-6 via us-east5 Vertex endpoint - /generate endpoint: accepts region param for regional endpoint testing Made-with: Cursor --- package-lock.json | 219 ++++++++++++++++++++++++++++++++++++++++++++++ package.json | 2 + src/llm.ts | 146 ++++++++++++++++++++++++++++--- src/server.ts | 12 ++- 4 files changed, 366 insertions(+), 13 deletions(-) diff --git a/package-lock.json b/package-lock.json index 342f124..ce7d3c7 100644 --- a/package-lock.json +++ b/package-lock.json @@ -8,6 +8,8 @@ "name": "vibn-agent-runner", "version": "0.1.0", "dependencies": { + "@anthropic-ai/sdk": "^0.78.0", + "@anthropic-ai/vertex-sdk": "^0.14.4", "@google/genai": "^1.0.0", "cors": "^2.8.5", "express": "^4.19.2", @@ -24,6 +26,121 @@ "typescript": "^5.4.5" } }, + "node_modules/@anthropic-ai/sdk": { + "version": "0.78.0", + "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.78.0.tgz", + "integrity": "sha512-PzQhR715td/m1UaaN5hHXjYB8Gl2lF9UVhrrGrZeysiF6Rb74Wc9GCB8hzLdzmQtBd1qe89F9OptgB9Za1Ib5w==", + "license": "MIT", + "dependencies": { + "json-schema-to-ts": "^3.1.1" + }, + "bin": { + "anthropic-ai-sdk": "bin/cli" + }, + "peerDependencies": { + "zod": "^3.25.0 || ^4.0.0" + }, + "peerDependenciesMeta": { + "zod": { + "optional": true + } + } + }, + "node_modules/@anthropic-ai/vertex-sdk": { + "version": "0.14.4", + "resolved": "https://registry.npmjs.org/@anthropic-ai/vertex-sdk/-/vertex-sdk-0.14.4.tgz", + "integrity": "sha512-BZUPRWghZxfSFtAxU563wH+jfWBPoedAwsVxG35FhmNsjeV8tyfN+lFriWhCpcZApxA4NdT6Soov+PzfnxxD5g==", + "license": "MIT", + "dependencies": { + "@anthropic-ai/sdk": ">=0.50.3 <1", + "google-auth-library": "^9.4.2" + } + }, + "node_modules/@anthropic-ai/vertex-sdk/node_modules/gaxios": { + "version": "6.7.1", + "resolved": "https://registry.npmjs.org/gaxios/-/gaxios-6.7.1.tgz", + "integrity": "sha512-LDODD4TMYx7XXdpwxAVRAIAuB0bzv0s+ywFonY46k126qzQHT9ygyoa9tncmOiQmmDrik65UYsEkv3lbfqQ3yQ==", + "license": "Apache-2.0", + "dependencies": { + "extend": "^3.0.2", + "https-proxy-agent": "^7.0.1", + "is-stream": "^2.0.0", + "node-fetch": "^2.6.9", + "uuid": "^9.0.1" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/@anthropic-ai/vertex-sdk/node_modules/gcp-metadata": { + "version": "6.1.1", + "resolved": "https://registry.npmjs.org/gcp-metadata/-/gcp-metadata-6.1.1.tgz", + "integrity": "sha512-a4tiq7E0/5fTjxPAaH4jpjkSv/uCaU2p5KC6HVGrvl0cDjA8iBZv4vv1gyzlmK0ZUKqwpOyQMKzZQe3lTit77A==", + "license": "Apache-2.0", + "dependencies": { + "gaxios": "^6.1.1", + "google-logging-utils": "^0.0.2", + "json-bigint": "^1.0.0" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/@anthropic-ai/vertex-sdk/node_modules/google-auth-library": { + "version": "9.15.1", + "resolved": "https://registry.npmjs.org/google-auth-library/-/google-auth-library-9.15.1.tgz", + "integrity": "sha512-Jb6Z0+nvECVz+2lzSMt9u98UsoakXxA2HGHMCxh+so3n90XgYWkq5dur19JAJV7ONiJY22yBTyJB1TSkvPq9Ng==", + "license": "Apache-2.0", + "dependencies": { + "base64-js": "^1.3.0", + "ecdsa-sig-formatter": "^1.0.11", + "gaxios": "^6.1.1", + "gcp-metadata": "^6.1.0", + "gtoken": "^7.0.0", + "jws": "^4.0.0" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/@anthropic-ai/vertex-sdk/node_modules/google-logging-utils": { + "version": "0.0.2", + "resolved": "https://registry.npmjs.org/google-logging-utils/-/google-logging-utils-0.0.2.tgz", + "integrity": "sha512-NEgUnEcBiP5HrPzufUkBzJOD/Sxsco3rLNo1F1TNf7ieU8ryUzBhqba8r756CjLX7rn3fHl6iLEwPYuqpoKgQQ==", + "license": "Apache-2.0", + "engines": { + "node": ">=14" + } + }, + "node_modules/@anthropic-ai/vertex-sdk/node_modules/node-fetch": { + "version": "2.7.0", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz", + "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==", + "license": "MIT", + "dependencies": { + "whatwg-url": "^5.0.0" + }, + "engines": { + "node": "4.x || >=6.0.0" + }, + "peerDependencies": { + "encoding": "^0.1.0" + }, + "peerDependenciesMeta": { + "encoding": { + "optional": true + } + } + }, + "node_modules/@babel/runtime": { + "version": "7.28.6", + "resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.28.6.tgz", + "integrity": "sha512-05WQkdpL9COIMz4LjTxGpPNCdlpyimKppYNoJ5Di5EUObifl8t4tuLuUBBZEpoLYOmfvIWrsp9fCl0HoPRVTdA==", + "license": "MIT", + "engines": { + "node": ">=6.9.0" + } + }, "node_modules/@cspotcode/source-map-support": { "version": "0.8.1", "resolved": "https://registry.npmjs.org/@cspotcode/source-map-support/-/source-map-support-0.8.1.tgz", @@ -1051,6 +1168,55 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/gtoken": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/gtoken/-/gtoken-7.1.0.tgz", + "integrity": "sha512-pCcEwRi+TKpMlxAQObHDQ56KawURgyAf6jtIY046fJ5tIv3zDe/LEIubckAO8fj6JnAxLdmWkUfNyulQ2iKdEw==", + "license": "MIT", + "dependencies": { + "gaxios": "^6.0.0", + "jws": "^4.0.0" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/gtoken/node_modules/gaxios": { + "version": "6.7.1", + "resolved": "https://registry.npmjs.org/gaxios/-/gaxios-6.7.1.tgz", + "integrity": "sha512-LDODD4TMYx7XXdpwxAVRAIAuB0bzv0s+ywFonY46k126qzQHT9ygyoa9tncmOiQmmDrik65UYsEkv3lbfqQ3yQ==", + "license": "Apache-2.0", + "dependencies": { + "extend": "^3.0.2", + "https-proxy-agent": "^7.0.1", + "is-stream": "^2.0.0", + "node-fetch": "^2.6.9", + "uuid": "^9.0.1" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/gtoken/node_modules/node-fetch": { + "version": "2.7.0", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz", + "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==", + "license": "MIT", + "dependencies": { + "whatwg-url": "^5.0.0" + }, + "engines": { + "node": "4.x || >=6.0.0" + }, + "peerDependencies": { + "encoding": "^0.1.0" + }, + "peerDependenciesMeta": { + "encoding": { + "optional": true + } + } + }, "node_modules/has-symbols": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz", @@ -1167,6 +1333,18 @@ "node": ">=8" } }, + "node_modules/is-stream": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/is-stream/-/is-stream-2.0.1.tgz", + "integrity": "sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg==", + "license": "MIT", + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/isexe": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", @@ -1197,6 +1375,19 @@ "bignumber.js": "^9.0.0" } }, + "node_modules/json-schema-to-ts": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/json-schema-to-ts/-/json-schema-to-ts-3.1.1.tgz", + "integrity": "sha512-+DWg8jCJG2TEnpy7kOm/7/AxaYoaRbjVB4LFZLySZlWn8exGs3A4OLJR966cVvU26N7X9TWxl+Jsw7dzAqKT6g==", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.18.3", + "ts-algebra": "^2.0.0" + }, + "engines": { + "node": ">=16" + } + }, "node_modules/jwa": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/jwa/-/jwa-2.0.1.tgz", @@ -1871,6 +2062,18 @@ "node": ">=0.6" } }, + "node_modules/tr46": { + "version": "0.0.3", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", + "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==", + "license": "MIT" + }, + "node_modules/ts-algebra": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/ts-algebra/-/ts-algebra-2.0.0.tgz", + "integrity": "sha512-FPAhNPFMrkwz76P7cdjdmiShwMynZYN6SgOujD1urY4oNm80Ou9oMdmbR45LotcKOXoy7wSmHkRFE6Mxbrhefw==", + "license": "MIT" + }, "node_modules/ts-node": { "version": "10.9.2", "resolved": "https://registry.npmjs.org/ts-node/-/ts-node-10.9.2.tgz", @@ -2004,6 +2207,22 @@ "node": ">= 8" } }, + "node_modules/webidl-conversions": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", + "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==", + "license": "BSD-2-Clause" + }, + "node_modules/whatwg-url": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz", + "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==", + "license": "MIT", + "dependencies": { + "tr46": "~0.0.3", + "webidl-conversions": "^3.0.0" + } + }, "node_modules/which": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", diff --git a/package.json b/package.json index c78abf2..27565ba 100644 --- a/package.json +++ b/package.json @@ -10,6 +10,8 @@ "test": "ts-node src/test.ts" }, "dependencies": { + "@anthropic-ai/sdk": "^0.78.0", + "@anthropic-ai/vertex-sdk": "^0.14.4", "@google/genai": "^1.0.0", "cors": "^2.8.5", "express": "^4.19.2", diff --git a/src/llm.ts b/src/llm.ts index 6290dc2..a62061e 100644 --- a/src/llm.ts +++ b/src/llm.ts @@ -1,20 +1,20 @@ import { GoogleAuth } from 'google-auth-library'; import { GoogleGenAI } from '@google/genai'; +import AnthropicVertex from '@anthropic-ai/vertex-sdk'; import { v4 as uuidv4 } from 'uuid'; // ============================================================================= // Unified LLM client — OpenAI-compatible message format throughout // -// Two backends: -// VertexOpenAIClient — for GLM-5 and other Vertex MaaS models (openai-compat endpoint) -// GeminiClient — for Gemini Flash/Pro via @google/genai SDK (API key) +// Three backends: +// VertexOpenAIClient — GLM-5 and other Vertex MaaS models (openai-compat endpoint) +// GeminiClient — Gemini Flash/Pro via @google/genai SDK (API key) +// AnthropicVertexClient — Claude models via Anthropic Messages API on Vertex (us-east5) // // Model tier defaults (overridable via TIER_A/B/C_MODEL env vars): -// Tier A: gemini-2.5-flash — routing, summaries, log parsing (API key, high quota) -// Tier B: zai-org/glm-5-maas — coding, feature work (Vertex MaaS, retry on 429) -// Tier C: zai-org/glm-5-maas — complex decisions, escalation -// -// Claude models are NOT available in this GCP project — do not use anthropic/* IDs. +// Tier A: gemini-2.5-flash — routing, summaries (API key, high quota) +// Tier B: claude-sonnet-4-6 — coding, feature work (Anthropic Vertex, us-east5) +// Tier C: claude-sonnet-4-6 — complex decisions // ============================================================================= // --------------------------------------------------------------------------- @@ -288,6 +288,128 @@ function toGeminiContents(messages: LLMMessage[]): any[] { return contents; } +// --------------------------------------------------------------------------- +// Anthropic Vertex client +// Used for: claude-* models via Vertex AI (proper Anthropic Messages API) +// Handles tool_calls by converting to/from Anthropic's tool_use blocks. +// --------------------------------------------------------------------------- + +export class AnthropicVertexClient implements LLMClient { + modelId: string; + private projectId: string; + private region: string; + + constructor(modelId: string, opts?: { projectId?: string; region?: string }) { + // Strip the "anthropic/" prefix if present — the SDK uses bare model names + this.modelId = modelId.startsWith('anthropic/') ? modelId.slice(10) : modelId; + this.projectId = opts?.projectId ?? process.env.GCP_PROJECT_ID ?? 'master-ai-484822'; + this.region = opts?.region ?? process.env.CLAUDE_REGION ?? 'us-east5'; + } + + private buildClient(): AnthropicVertex { + const b64Key = process.env.GCP_SA_KEY_BASE64; + if (b64Key) { + try { + const jsonStr = Buffer.from(b64Key, 'base64').toString('utf8'); + const credentials = JSON.parse(jsonStr); + return new AnthropicVertex({ + projectId: this.projectId, + region: this.region, + googleAuth: new GoogleAuth({ credentials, scopes: ['https://www.googleapis.com/auth/cloud-platform'] }) as any, + }); + } catch { + console.warn('[llm] AnthropicVertex: SA key decode failed, falling back to metadata server'); + } + } + return new AnthropicVertex({ projectId: this.projectId, region: this.region }); + } + + async chat(messages: LLMMessage[], tools?: LLMTool[], maxTokens = 8192): Promise { + const client = this.buildClient(); + + const system = messages.find(m => m.role === 'system')?.content ?? undefined; + const nonSystem = messages.filter(m => m.role !== 'system'); + + // Convert OpenAI message format → Anthropic format + const anthropicMessages: any[] = nonSystem.map(m => { + if (m.role === 'assistant') { + const parts: any[] = []; + if (m.content) parts.push({ type: 'text', text: m.content }); + for (const tc of m.tool_calls ?? []) { + parts.push({ + type: 'tool_use', + id: tc.id, + name: tc.function.name, + input: JSON.parse(tc.function.arguments || '{}'), + }); + } + return { role: 'assistant', content: parts.length === 1 && parts[0].type === 'text' ? parts[0].text : parts }; + } + if (m.role === 'tool') { + return { + role: 'user', + content: [{ type: 'tool_result', tool_use_id: m.tool_call_id, content: m.content ?? '' }], + }; + } + return { role: 'user', content: m.content ?? '' }; + }); + + const anthropicTools = (tools ?? []).map(t => ({ + name: t.function.name, + description: t.function.description, + input_schema: t.function.parameters, + })); + + const MAX_RETRIES = 4; + const RETRY_STATUSES = new Set([429, 503]); + + for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { + try { + const response = await (client.messages.create as Function)({ + model: this.modelId, + max_tokens: maxTokens, + system: system ?? undefined, + messages: anthropicMessages, + tools: anthropicTools.length > 0 ? anthropicTools : undefined, + }); + + const textContent = response.content + .filter((b: any) => b.type === 'text') + .map((b: any) => b.text) + .join('') || null; + + const tool_calls: LLMToolCall[] = response.content + .filter((b: any) => b.type === 'tool_use') + .map((b: any) => ({ + id: b.id, + type: 'function' as const, + function: { name: b.name, arguments: JSON.stringify(b.input ?? {}) }, + })); + + return { + content: textContent, + reasoning: null, + tool_calls, + finish_reason: response.stop_reason === 'tool_use' ? 'tool_calls' : 'stop', + usage: response.usage + ? { prompt_tokens: response.usage.input_tokens, completion_tokens: response.usage.output_tokens, total_tokens: response.usage.input_tokens + response.usage.output_tokens } + : undefined, + }; + } catch (err: any) { + const status = err?.status ?? err?.statusCode ?? 0; + if (RETRY_STATUSES.has(status) && attempt < MAX_RETRIES) { + const waitMs = Math.min(2 ** attempt * 2000 + Math.random() * 500, 30_000); + console.warn(`[llm] Anthropic Vertex ${status} on attempt ${attempt + 1}/${MAX_RETRIES + 1} — retrying in ${Math.round(waitMs / 1000)}s`); + await new Promise(r => setTimeout(r, waitMs)); + continue; + } + throw new Error(`Anthropic Vertex error: ${err?.message ?? String(err)}`); + } + } + throw new Error('Anthropic Vertex: exceeded max retries'); + } +} + // --------------------------------------------------------------------------- // Factory — createLLM(modelId | tier) // --------------------------------------------------------------------------- @@ -296,8 +418,8 @@ export type ModelTier = 'A' | 'B' | 'C'; const TIER_MODELS: Record = { A: process.env.TIER_A_MODEL ?? 'gemini-2.5-flash', - B: process.env.TIER_B_MODEL ?? 'zai-org/glm-5-maas', - C: process.env.TIER_C_MODEL ?? 'zai-org/glm-5-maas' + B: process.env.TIER_B_MODEL ?? 'claude-sonnet-4-6', + C: process.env.TIER_C_MODEL ?? 'claude-sonnet-4-6' }; export function createLLM(modelOrTier: string | ModelTier, opts?: { temperature?: number }): LLMClient { @@ -309,6 +431,10 @@ export function createLLM(modelOrTier: string | ModelTier, opts?: { temperature? return new GeminiClient(modelId, opts); } + if (modelId.startsWith('anthropic/') || modelId.startsWith('claude-')) { + return new AnthropicVertexClient(modelId); + } + return new VertexOpenAIClient(modelId, { temperature: opts?.temperature }); } diff --git a/src/server.ts b/src/server.ts index 2e71c56..312210b 100644 --- a/src/server.ts +++ b/src/server.ts @@ -550,18 +550,24 @@ app.post('/agent/approve', async (req: Request, res: Response) => { // --------------------------------------------------------------------------- app.post('/generate', async (req: Request, res: Response) => { - const { prompt, model } = req.body as { prompt?: string; model?: string }; + const { prompt, model, region } = req.body as { prompt?: string; model?: string; region?: string }; if (!prompt) { res.status(400).json({ error: '"prompt" is required' }); return; } + // Allow overriding CLAUDE_REGION per-request for testing + const prevRegion = process.env.CLAUDE_REGION; + if (region) process.env.CLAUDE_REGION = region; + try { const llm = createLLM(model ?? 'A', { temperature: 0.3 }); const messages: import('./llm').LLMMessage[] = [ { role: 'user', content: prompt } ]; const response = await llm.chat(messages, [], 8192); - res.json({ reply: response.content ?? '' }); + res.json({ reply: response.content ?? '', model: llm.modelId }); } catch (err) { - res.status(500).json({ error: err instanceof Error ? err.message : String(err) }); + res.status(500).json({ error: err instanceof Error ? err.message : String(err), model }); + } finally { + if (region) process.env.CLAUDE_REGION = prevRegion ?? ''; } });