diff --git a/src/llm.ts b/src/llm.ts index 84408f0..6290dc2 100644 --- a/src/llm.ts +++ b/src/llm.ts @@ -6,13 +6,15 @@ import { v4 as uuidv4 } from 'uuid'; // Unified LLM client — OpenAI-compatible message format throughout // // Two backends: -// VertexOpenAIClient — for GLM-5, Claude Sonnet, etc. via Vertex global endpoint -// GeminiFlashClient — for Gemini Flash/Pro via @google/genai SDK +// VertexOpenAIClient — for GLM-5 and other Vertex MaaS models (openai-compat endpoint) +// GeminiClient — for Gemini Flash/Pro via @google/genai SDK (API key) // -// Model tier defaults (overridable via env): -// Tier A: gemini-2.5-flash ($0.15/$0.60 per 1M) — routing, summaries, log parsing -// Tier B: zai-org/glm-5-maas ($1.00/$3.20 per 1M) — coding, feature work -// Tier C: zai-org/glm-5-maas ($1.00/$3.20 per 1M) — complex decisions, escalation +// Model tier defaults (overridable via TIER_A/B/C_MODEL env vars): +// Tier A: gemini-2.5-flash — routing, summaries, log parsing (API key, high quota) +// Tier B: zai-org/glm-5-maas — coding, feature work (Vertex MaaS, retry on 429) +// Tier C: zai-org/glm-5-maas — complex decisions, escalation +// +// Claude models are NOT available in this GCP project — do not use anthropic/* IDs. // ============================================================================= // --------------------------------------------------------------------------- @@ -294,8 +296,8 @@ export type ModelTier = 'A' | 'B' | 'C'; const TIER_MODELS: Record = { A: process.env.TIER_A_MODEL ?? 'gemini-2.5-flash', - B: process.env.TIER_B_MODEL ?? 'claude-sonnet-4-6', - C: process.env.TIER_C_MODEL ?? 'claude-sonnet-4-6' + B: process.env.TIER_B_MODEL ?? 'zai-org/glm-5-maas', + C: process.env.TIER_C_MODEL ?? 'zai-org/glm-5-maas' }; export function createLLM(modelOrTier: string | ModelTier, opts?: { temperature?: number }): LLMClient {