From 8872ab606b8482a6ffd21be4c2e95970be8eb88a Mon Sep 17 00:00:00 2001
From: Mark Henderson <mark@getacquired.com>
Date: Mon, 27 Apr 2026 17:18:34 -0700
Subject: [PATCH] Fix tool calling: use non-streaming generateContent for tool
 rounds

Gemini 3.1 Pro thinking model requires thought_signature to be echoed
in functionResponse. SSE stream doesn't reliably include it in individual
chunks. Switch tool-calling rounds to non-streaming generateContent which
always returns the complete response with thought_signature present.

Made-with: Cursor
---
 app/api/chat/route.ts |  65 ++++++++---------
 lib/ai/gemini-chat.ts | 161 +++++++++++++++++++++++-------------------
 2 files changed, 118 insertions(+), 108 deletions(-)

diff --git a/app/api/chat/route.ts b/app/api/chat/route.ts
index 5f5a945f..3012a9b5 100644
--- a/app/api/chat/route.ts
+++ b/app/api/chat/route.ts
@@ -15,7 +15,7 @@
 import { NextResponse } from 'next/server';
 import { authSession } from '@/lib/auth/session-server';
 import { query } from '@/lib/db-postgres';
-import { streamGeminiChat } from '@/lib/ai/gemini-chat';
+import { callGeminiChat, streamGeminiChat } from '@/lib/ai/gemini-chat';
 import { VIBN_TOOL_DEFINITIONS, executeMcpTool } from '@/lib/ai/vibn-tools';
 import type { ChatMessage, ToolCall } from '@/lib/ai/gemini-chat';
 
@@ -134,59 +134,56 @@ export async function POST(request: Request) {
       const assistantToolCalls: ToolCall[] = [];
 
       try {
+        // Tool-calling loop: use non-streaming so thought_signature is
+        // always present in the complete response (required by thinking models).
         while (round < MAX_TOOL_ROUNDS) {
           round++;
-          let pendingToolCalls: ToolCall[] = [];
-          let roundText = '';
 
-          for await (const chunk of streamGeminiChat({
-            systemPrompt,
-            messages,
-            tools: mcp_token ? VIBN_TOOL_DEFINITIONS : [],
-            temperature: 0.7,
-          })) {
-            if (chunk.type === 'text' && chunk.text) {
-              roundText += chunk.text;
-              assistantText += chunk.text;
-              emit({ type: 'text', text: chunk.text });
-            } else if (chunk.type === 'tool_call' && chunk.toolCall) {
-              pendingToolCalls.push(chunk.toolCall);
-              assistantToolCalls.push(chunk.toolCall);
-              emit({ type: 'tool_start', name: chunk.toolCall.name, args: chunk.toolCall.args });
-            } else if (chunk.type === 'error') {
-              emit({ type: 'error', error: chunk.error });
-              controller.close();
-              return;
-            }
+          const toolDefs = mcp_token ? VIBN_TOOL_DEFINITIONS : [];
+          const resp = await callGeminiChat({ systemPrompt, messages, tools: toolDefs, temperature: 0.7 });
+
+          if (resp.error) {
+            emit({ type: 'error', error: resp.error });
+            controller.close();
+            return;
+          }
+
+          // Stream text to client
+          if (resp.text) {
+            assistantText += resp.text;
+            emit({ type: 'text', text: resp.text });
+          }
+
+          // Announce tool calls
+          for (const tc of resp.toolCalls) {
+            assistantToolCalls.push(tc);
+            emit({ type: 'tool_start', name: tc.name, args: tc.args });
           }
 
           // Save assistant turn
-          const assistantMsg: ChatMessage = {
+          messages.push({
             role: 'assistant',
-            content: roundText,
-            toolCalls: pendingToolCalls.length ? pendingToolCalls : undefined,
-          };
-          messages.push(assistantMsg);
+            content: resp.text,
+            toolCalls: resp.toolCalls.length ? resp.toolCalls : undefined,
+          });
 
-          if (!pendingToolCalls.length) break;
+          if (!resp.toolCalls.length) break;
 
-          // Execute tool calls
-          for (const tc of pendingToolCalls) {
+          // Execute tool calls and add results
+          for (const tc of resp.toolCalls) {
             const result = mcp_token
               ? await executeMcpTool(tc.name, tc.args, mcp_token, baseUrl)
               : JSON.stringify({ error: 'No MCP token — read-only mode.' });
 
             emit({ type: 'tool_result', name: tc.name, result: result.slice(0, 500) });
 
-            const toolMsg: ChatMessage = {
+            messages.push({
               role: 'tool',
               content: result,
               toolCallId: tc.id,
               toolName: tc.name,
-              // Echo thought_signature back — required by Gemini thinking models
               thoughtSignature: tc.thoughtSignature,
-            };
-            messages.push(toolMsg);
+            });
           }
         }
 
diff --git a/lib/ai/gemini-chat.ts b/lib/ai/gemini-chat.ts
index 14ecd36e..0ccb3f3b 100644
--- a/lib/ai/gemini-chat.ts
+++ b/lib/ai/gemini-chat.ts
@@ -1,14 +1,12 @@
 /**
- * Gemini 3.1 Pro streaming chat client with tool-calling support.
+ * Gemini 3.1 Pro chat client with tool-calling support.
  *
- * Uses the Gemini API (generativelanguage.googleapis.com) with the
- * existing GOOGLE_API_KEY. Drop-in upgrade to Vertex AI when needed
- * by swapping GEMINI_BASE_URL.
- *
- * NOTE: Gemini thinking models (2.5+, 3.x) attach a `thought_signature`
- * to functionCall parts. This signature MUST be echoed back in the
- * functionResponse or the API returns a 400. We carry it through our
- * ToolCall type and re-attach it when building contents[].
+ * Architecture:
+ *   - Tool-calling rounds use generateContent (non-streaming) so we always
+ *     get the complete response including thought_signature. Thinking models
+ *     (2.5+, 3.x) require this field to be echoed back in functionResponse
+ *     and it is not reliably present in individual SSE chunks.
+ *   - Final text-only response uses streamGenerateContent for good UX.
  */
 
 const GEMINI_API_KEY = process.env.GOOGLE_API_KEY || '';
@@ -18,12 +16,9 @@ const GEMINI_BASE_URL = 'https://generativelanguage.googleapis.com/v1beta';
 export interface ChatMessage {
   role: 'user' | 'assistant' | 'tool';
   content: string;
-  /** Populated when role === 'assistant' and model made tool calls */
   toolCalls?: ToolCall[];
-  /** Populated when role === 'tool' */
   toolCallId?: string;
   toolName?: string;
-  /** Echo thought_signature back for tool responses (thinking models) */
   thoughtSignature?: string;
 }
 
@@ -31,7 +26,7 @@ export interface ToolCall {
   id: string;
   name: string;
   args: Record<string, unknown>;
-  /** Gemini thinking-model signature — must be echoed in functionResponse */
+  /** Must be echoed back in functionResponse for Gemini thinking models */
   thoughtSignature?: string;
 }
 
@@ -48,7 +43,7 @@ export interface ChatChunk {
   error?: string;
 }
 
-/** Convert our flat ChatMessage[] to Gemini's contents[] format */
+/** Convert our ChatMessage[] to Gemini's contents[] format */
 function toGeminiContents(messages: ChatMessage[]) {
   const contents: any[] = [];
 
@@ -72,9 +67,7 @@ function toGeminiContents(messages: ChatMessage[]) {
         id: msg.toolCallId,
         response: { content: msg.content },
       };
-      // Echo the thought_signature back — required for Gemini thinking models
       if (msg.thoughtSignature) fr.thought_signature = msg.thoughtSignature;
-
       const part = { functionResponse: fr };
       const last = contents[contents.length - 1];
       if (last?.role === 'user') {
@@ -87,23 +80,84 @@ function toGeminiContents(messages: ChatMessage[]) {
   return contents;
 }
 
-/** Convert our ToolDefinition[] to Gemini functionDeclarations */
 function toGeminiFunctions(tools: ToolDefinition[]) {
   if (!tools.length) return undefined;
-  return [
-    {
-      functionDeclarations: tools.map((t) => ({
-        name: t.name,
-        description: t.description,
-        parameters: t.parameters,
-      })),
-    },
-  ];
+  return [{
+    functionDeclarations: tools.map((t) => ({
+      name: t.name,
+      description: t.description,
+      parameters: t.parameters,
+    })),
+  }];
+}
+
+function buildBody(opts: {
+  systemPrompt: string;
+  messages: ChatMessage[];
+  tools?: ToolDefinition[];
+  temperature?: number;
+}) {
+  const body: any = {
+    contents: toGeminiContents(opts.messages),
+    systemInstruction: { parts: [{ text: opts.systemPrompt }] },
+    generationConfig: { temperature: opts.temperature ?? 0.7, maxOutputTokens: 8192 },
+  };
+  const fns = toGeminiFunctions(opts.tools ?? []);
+  if (fns) body.tools = fns;
+  return body;
 }
 
 /**
- * Stream a Gemini response with optional tool-calling.
- * Yields ChatChunk objects: text deltas, tool_call requests, and a final done.
+ * Non-streaming call — used for tool-calling rounds.
+ * Returns complete response with thought_signature guaranteed.
+ */
+export async function callGeminiChat(opts: {
+  systemPrompt: string;
+  messages: ChatMessage[];
+  tools?: ToolDefinition[];
+  temperature?: number;
+}): Promise<{ text: string; toolCalls: ToolCall[]; error?: string }> {
+  const url = `${GEMINI_BASE_URL}/models/${GEMINI_MODEL}:generateContent?key=${GEMINI_API_KEY}`;
+
+  let res: Response;
+  try {
+    res = await fetch(url, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify(buildBody(opts)),
+    });
+  } catch (e) {
+    return { text: '', toolCalls: [], error: `Network error: ${e instanceof Error ? e.message : String(e)}` };
+  }
+
+  const data = await res.json().catch(() => ({}));
+  if (!res.ok) {
+    const msg = data?.error?.message || JSON.stringify(data).slice(0, 200);
+    return { text: '', toolCalls: [], error: `Gemini API error ${res.status}: ${msg}` };
+  }
+
+  const parts: any[] = data?.candidates?.[0]?.content?.parts ?? [];
+  let text = '';
+  const toolCalls: ToolCall[] = [];
+
+  for (const part of parts) {
+    if (part.text) text += part.text;
+    if (part.functionCall) {
+      toolCalls.push({
+        id: part.functionCall.id || `tc-${Date.now()}-${Math.random().toString(36).slice(2)}`,
+        name: part.functionCall.name,
+        args: part.functionCall.args ?? {},
+        thoughtSignature: part.functionCall.thought_signature,
+      });
+    }
+  }
+
+  return { text, toolCalls };
+}
+
+/**
+ * Streaming call — used for the final text-only response.
+ * Yields ChatChunk objects.
  */
 export async function* streamGeminiChat(opts: {
   systemPrompt: string;
@@ -111,20 +165,6 @@ export async function* streamGeminiChat(opts: {
   tools?: ToolDefinition[];
   temperature?: number;
 }): AsyncGenerator<ChatChunk> {
-  const { systemPrompt, messages, tools = [], temperature = 0.7 } = opts;
-
-  const body: any = {
-    contents: toGeminiContents(messages),
-    systemInstruction: { parts: [{ text: systemPrompt }] },
-    generationConfig: {
-      temperature,
-      maxOutputTokens: 8192,
-    },
-  };
-
-  const fns = toGeminiFunctions(tools);
-  if (fns) body.tools = fns;
-
   const url = `${GEMINI_BASE_URL}/models/${GEMINI_MODEL}:streamGenerateContent?key=${GEMINI_API_KEY}&alt=sse`;
 
   let res: Response;
@@ -132,7 +172,7 @@ export async function* streamGeminiChat(opts: {
     res = await fetch(url, {
       method: 'POST',
       headers: { 'Content-Type': 'application/json' },
-      body: JSON.stringify(body),
+      body: JSON.stringify(buildBody(opts)),
     });
   } catch (e) {
     yield { type: 'error', error: `Network error: ${e instanceof Error ? e.message : String(e)}` };
@@ -146,10 +186,7 @@ export async function* streamGeminiChat(opts: {
   }
 
   const reader = res.body?.getReader();
-  if (!reader) {
-    yield { type: 'error', error: 'No response body' };
-    return;
-  }
+  if (!reader) { yield { type: 'error', error: 'No response body' }; return; }
 
   const decoder = new TextDecoder();
   let buffer = '';
@@ -159,7 +196,6 @@ export async function* streamGeminiChat(opts: {
       const { done, value } = await reader.read();
       if (done) break;
       buffer += decoder.decode(value, { stream: true });
-
       const lines = buffer.split('\n');
       buffer = lines.pop() ?? '';
 
@@ -167,34 +203,11 @@ export async function* streamGeminiChat(opts: {
         if (!line.startsWith('data: ')) continue;
         const data = line.slice(6).trim();
         if (!data || data === '[DONE]') continue;
-
         let chunk: any;
-        try {
-          chunk = JSON.parse(data);
-        } catch {
-          continue;
-        }
-
-        const candidate = chunk?.candidates?.[0];
-        if (!candidate) continue;
-        const parts = candidate?.content?.parts ?? [];
-
+        try { chunk = JSON.parse(data); } catch { continue; }
+        const parts = chunk?.candidates?.[0]?.content?.parts ?? [];
         for (const part of parts) {
-          if (part.text) {
-            yield { type: 'text', text: part.text };
-          }
-          if (part.functionCall) {
-            yield {
-              type: 'tool_call',
-              toolCall: {
-                id: part.functionCall.id || `tc-${Date.now()}`,
-                name: part.functionCall.name,
-                args: part.functionCall.args ?? {},
-                // Carry the thought_signature so the chat route can echo it back
-                thoughtSignature: part.functionCall.thought_signature,
-              },
-            };
-          }
+          if (part.text) yield { type: 'text', text: part.text };
         }
       }
     }