From 8872ab606b8482a6ffd21be4c2e95970be8eb88a Mon Sep 17 00:00:00 2001 From: Mark Henderson Date: Mon, 27 Apr 2026 17:18:34 -0700 Subject: [PATCH] Fix tool calling: use non-streaming generateContent for tool rounds Gemini 3.1 Pro thinking model requires thought_signature to be echoed in functionResponse. SSE stream doesn't reliably include it in individual chunks. Switch tool-calling rounds to non-streaming generateContent which always returns the complete response with thought_signature present. Made-with: Cursor --- app/api/chat/route.ts | 65 ++++++++--------- lib/ai/gemini-chat.ts | 161 +++++++++++++++++++++++------------------- 2 files changed, 118 insertions(+), 108 deletions(-) diff --git a/app/api/chat/route.ts b/app/api/chat/route.ts index 5f5a945f..3012a9b5 100644 --- a/app/api/chat/route.ts +++ b/app/api/chat/route.ts @@ -15,7 +15,7 @@ import { NextResponse } from 'next/server'; import { authSession } from '@/lib/auth/session-server'; import { query } from '@/lib/db-postgres'; -import { streamGeminiChat } from '@/lib/ai/gemini-chat'; +import { callGeminiChat, streamGeminiChat } from '@/lib/ai/gemini-chat'; import { VIBN_TOOL_DEFINITIONS, executeMcpTool } from '@/lib/ai/vibn-tools'; import type { ChatMessage, ToolCall } from '@/lib/ai/gemini-chat'; @@ -134,59 +134,56 @@ export async function POST(request: Request) { const assistantToolCalls: ToolCall[] = []; try { + // Tool-calling loop: use non-streaming so thought_signature is + // always present in the complete response (required by thinking models). while (round < MAX_TOOL_ROUNDS) { round++; - let pendingToolCalls: ToolCall[] = []; - let roundText = ''; - for await (const chunk of streamGeminiChat({ - systemPrompt, - messages, - tools: mcp_token ? VIBN_TOOL_DEFINITIONS : [], - temperature: 0.7, - })) { - if (chunk.type === 'text' && chunk.text) { - roundText += chunk.text; - assistantText += chunk.text; - emit({ type: 'text', text: chunk.text }); - } else if (chunk.type === 'tool_call' && chunk.toolCall) { - pendingToolCalls.push(chunk.toolCall); - assistantToolCalls.push(chunk.toolCall); - emit({ type: 'tool_start', name: chunk.toolCall.name, args: chunk.toolCall.args }); - } else if (chunk.type === 'error') { - emit({ type: 'error', error: chunk.error }); - controller.close(); - return; - } + const toolDefs = mcp_token ? VIBN_TOOL_DEFINITIONS : []; + const resp = await callGeminiChat({ systemPrompt, messages, tools: toolDefs, temperature: 0.7 }); + + if (resp.error) { + emit({ type: 'error', error: resp.error }); + controller.close(); + return; + } + + // Stream text to client + if (resp.text) { + assistantText += resp.text; + emit({ type: 'text', text: resp.text }); + } + + // Announce tool calls + for (const tc of resp.toolCalls) { + assistantToolCalls.push(tc); + emit({ type: 'tool_start', name: tc.name, args: tc.args }); } // Save assistant turn - const assistantMsg: ChatMessage = { + messages.push({ role: 'assistant', - content: roundText, - toolCalls: pendingToolCalls.length ? pendingToolCalls : undefined, - }; - messages.push(assistantMsg); + content: resp.text, + toolCalls: resp.toolCalls.length ? resp.toolCalls : undefined, + }); - if (!pendingToolCalls.length) break; + if (!resp.toolCalls.length) break; - // Execute tool calls - for (const tc of pendingToolCalls) { + // Execute tool calls and add results + for (const tc of resp.toolCalls) { const result = mcp_token ? await executeMcpTool(tc.name, tc.args, mcp_token, baseUrl) : JSON.stringify({ error: 'No MCP token — read-only mode.' }); emit({ type: 'tool_result', name: tc.name, result: result.slice(0, 500) }); - const toolMsg: ChatMessage = { + messages.push({ role: 'tool', content: result, toolCallId: tc.id, toolName: tc.name, - // Echo thought_signature back — required by Gemini thinking models thoughtSignature: tc.thoughtSignature, - }; - messages.push(toolMsg); + }); } } diff --git a/lib/ai/gemini-chat.ts b/lib/ai/gemini-chat.ts index 14ecd36e..0ccb3f3b 100644 --- a/lib/ai/gemini-chat.ts +++ b/lib/ai/gemini-chat.ts @@ -1,14 +1,12 @@ /** - * Gemini 3.1 Pro streaming chat client with tool-calling support. + * Gemini 3.1 Pro chat client with tool-calling support. * - * Uses the Gemini API (generativelanguage.googleapis.com) with the - * existing GOOGLE_API_KEY. Drop-in upgrade to Vertex AI when needed - * by swapping GEMINI_BASE_URL. - * - * NOTE: Gemini thinking models (2.5+, 3.x) attach a `thought_signature` - * to functionCall parts. This signature MUST be echoed back in the - * functionResponse or the API returns a 400. We carry it through our - * ToolCall type and re-attach it when building contents[]. + * Architecture: + * - Tool-calling rounds use generateContent (non-streaming) so we always + * get the complete response including thought_signature. Thinking models + * (2.5+, 3.x) require this field to be echoed back in functionResponse + * and it is not reliably present in individual SSE chunks. + * - Final text-only response uses streamGenerateContent for good UX. */ const GEMINI_API_KEY = process.env.GOOGLE_API_KEY || ''; @@ -18,12 +16,9 @@ const GEMINI_BASE_URL = 'https://generativelanguage.googleapis.com/v1beta'; export interface ChatMessage { role: 'user' | 'assistant' | 'tool'; content: string; - /** Populated when role === 'assistant' and model made tool calls */ toolCalls?: ToolCall[]; - /** Populated when role === 'tool' */ toolCallId?: string; toolName?: string; - /** Echo thought_signature back for tool responses (thinking models) */ thoughtSignature?: string; } @@ -31,7 +26,7 @@ export interface ToolCall { id: string; name: string; args: Record; - /** Gemini thinking-model signature — must be echoed in functionResponse */ + /** Must be echoed back in functionResponse for Gemini thinking models */ thoughtSignature?: string; } @@ -48,7 +43,7 @@ export interface ChatChunk { error?: string; } -/** Convert our flat ChatMessage[] to Gemini's contents[] format */ +/** Convert our ChatMessage[] to Gemini's contents[] format */ function toGeminiContents(messages: ChatMessage[]) { const contents: any[] = []; @@ -72,9 +67,7 @@ function toGeminiContents(messages: ChatMessage[]) { id: msg.toolCallId, response: { content: msg.content }, }; - // Echo the thought_signature back — required for Gemini thinking models if (msg.thoughtSignature) fr.thought_signature = msg.thoughtSignature; - const part = { functionResponse: fr }; const last = contents[contents.length - 1]; if (last?.role === 'user') { @@ -87,23 +80,84 @@ function toGeminiContents(messages: ChatMessage[]) { return contents; } -/** Convert our ToolDefinition[] to Gemini functionDeclarations */ function toGeminiFunctions(tools: ToolDefinition[]) { if (!tools.length) return undefined; - return [ - { - functionDeclarations: tools.map((t) => ({ - name: t.name, - description: t.description, - parameters: t.parameters, - })), - }, - ]; + return [{ + functionDeclarations: tools.map((t) => ({ + name: t.name, + description: t.description, + parameters: t.parameters, + })), + }]; +} + +function buildBody(opts: { + systemPrompt: string; + messages: ChatMessage[]; + tools?: ToolDefinition[]; + temperature?: number; +}) { + const body: any = { + contents: toGeminiContents(opts.messages), + systemInstruction: { parts: [{ text: opts.systemPrompt }] }, + generationConfig: { temperature: opts.temperature ?? 0.7, maxOutputTokens: 8192 }, + }; + const fns = toGeminiFunctions(opts.tools ?? []); + if (fns) body.tools = fns; + return body; } /** - * Stream a Gemini response with optional tool-calling. - * Yields ChatChunk objects: text deltas, tool_call requests, and a final done. + * Non-streaming call — used for tool-calling rounds. + * Returns complete response with thought_signature guaranteed. + */ +export async function callGeminiChat(opts: { + systemPrompt: string; + messages: ChatMessage[]; + tools?: ToolDefinition[]; + temperature?: number; +}): Promise<{ text: string; toolCalls: ToolCall[]; error?: string }> { + const url = `${GEMINI_BASE_URL}/models/${GEMINI_MODEL}:generateContent?key=${GEMINI_API_KEY}`; + + let res: Response; + try { + res = await fetch(url, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(buildBody(opts)), + }); + } catch (e) { + return { text: '', toolCalls: [], error: `Network error: ${e instanceof Error ? e.message : String(e)}` }; + } + + const data = await res.json().catch(() => ({})); + if (!res.ok) { + const msg = data?.error?.message || JSON.stringify(data).slice(0, 200); + return { text: '', toolCalls: [], error: `Gemini API error ${res.status}: ${msg}` }; + } + + const parts: any[] = data?.candidates?.[0]?.content?.parts ?? []; + let text = ''; + const toolCalls: ToolCall[] = []; + + for (const part of parts) { + if (part.text) text += part.text; + if (part.functionCall) { + toolCalls.push({ + id: part.functionCall.id || `tc-${Date.now()}-${Math.random().toString(36).slice(2)}`, + name: part.functionCall.name, + args: part.functionCall.args ?? {}, + thoughtSignature: part.functionCall.thought_signature, + }); + } + } + + return { text, toolCalls }; +} + +/** + * Streaming call — used for the final text-only response. + * Yields ChatChunk objects. */ export async function* streamGeminiChat(opts: { systemPrompt: string; @@ -111,20 +165,6 @@ export async function* streamGeminiChat(opts: { tools?: ToolDefinition[]; temperature?: number; }): AsyncGenerator { - const { systemPrompt, messages, tools = [], temperature = 0.7 } = opts; - - const body: any = { - contents: toGeminiContents(messages), - systemInstruction: { parts: [{ text: systemPrompt }] }, - generationConfig: { - temperature, - maxOutputTokens: 8192, - }, - }; - - const fns = toGeminiFunctions(tools); - if (fns) body.tools = fns; - const url = `${GEMINI_BASE_URL}/models/${GEMINI_MODEL}:streamGenerateContent?key=${GEMINI_API_KEY}&alt=sse`; let res: Response; @@ -132,7 +172,7 @@ export async function* streamGeminiChat(opts: { res = await fetch(url, { method: 'POST', headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify(body), + body: JSON.stringify(buildBody(opts)), }); } catch (e) { yield { type: 'error', error: `Network error: ${e instanceof Error ? e.message : String(e)}` }; @@ -146,10 +186,7 @@ export async function* streamGeminiChat(opts: { } const reader = res.body?.getReader(); - if (!reader) { - yield { type: 'error', error: 'No response body' }; - return; - } + if (!reader) { yield { type: 'error', error: 'No response body' }; return; } const decoder = new TextDecoder(); let buffer = ''; @@ -159,7 +196,6 @@ export async function* streamGeminiChat(opts: { const { done, value } = await reader.read(); if (done) break; buffer += decoder.decode(value, { stream: true }); - const lines = buffer.split('\n'); buffer = lines.pop() ?? ''; @@ -167,34 +203,11 @@ export async function* streamGeminiChat(opts: { if (!line.startsWith('data: ')) continue; const data = line.slice(6).trim(); if (!data || data === '[DONE]') continue; - let chunk: any; - try { - chunk = JSON.parse(data); - } catch { - continue; - } - - const candidate = chunk?.candidates?.[0]; - if (!candidate) continue; - const parts = candidate?.content?.parts ?? []; - + try { chunk = JSON.parse(data); } catch { continue; } + const parts = chunk?.candidates?.[0]?.content?.parts ?? []; for (const part of parts) { - if (part.text) { - yield { type: 'text', text: part.text }; - } - if (part.functionCall) { - yield { - type: 'tool_call', - toolCall: { - id: part.functionCall.id || `tc-${Date.now()}`, - name: part.functionCall.name, - args: part.functionCall.args ?? {}, - // Carry the thought_signature so the chat route can echo it back - thoughtSignature: part.functionCall.thought_signature, - }, - }; - } + if (part.text) yield { type: 'text', text: part.text }; } } }