Fix tool calling: use non-streaming generateContent for tool rounds
Gemini 3.1 Pro thinking model requires thought_signature to be echoed in functionResponse. SSE stream doesn't reliably include it in individual chunks. Switch tool-calling rounds to non-streaming generateContent which always returns the complete response with thought_signature present. Made-with: Cursor
This commit is contained in:
@@ -15,7 +15,7 @@
|
||||
import { NextResponse } from 'next/server';
|
||||
import { authSession } from '@/lib/auth/session-server';
|
||||
import { query } from '@/lib/db-postgres';
|
||||
import { streamGeminiChat } from '@/lib/ai/gemini-chat';
|
||||
import { callGeminiChat, streamGeminiChat } from '@/lib/ai/gemini-chat';
|
||||
import { VIBN_TOOL_DEFINITIONS, executeMcpTool } from '@/lib/ai/vibn-tools';
|
||||
import type { ChatMessage, ToolCall } from '@/lib/ai/gemini-chat';
|
||||
|
||||
@@ -134,59 +134,56 @@ export async function POST(request: Request) {
|
||||
const assistantToolCalls: ToolCall[] = [];
|
||||
|
||||
try {
|
||||
// Tool-calling loop: use non-streaming so thought_signature is
|
||||
// always present in the complete response (required by thinking models).
|
||||
while (round < MAX_TOOL_ROUNDS) {
|
||||
round++;
|
||||
let pendingToolCalls: ToolCall[] = [];
|
||||
let roundText = '';
|
||||
|
||||
for await (const chunk of streamGeminiChat({
|
||||
systemPrompt,
|
||||
messages,
|
||||
tools: mcp_token ? VIBN_TOOL_DEFINITIONS : [],
|
||||
temperature: 0.7,
|
||||
})) {
|
||||
if (chunk.type === 'text' && chunk.text) {
|
||||
roundText += chunk.text;
|
||||
assistantText += chunk.text;
|
||||
emit({ type: 'text', text: chunk.text });
|
||||
} else if (chunk.type === 'tool_call' && chunk.toolCall) {
|
||||
pendingToolCalls.push(chunk.toolCall);
|
||||
assistantToolCalls.push(chunk.toolCall);
|
||||
emit({ type: 'tool_start', name: chunk.toolCall.name, args: chunk.toolCall.args });
|
||||
} else if (chunk.type === 'error') {
|
||||
emit({ type: 'error', error: chunk.error });
|
||||
controller.close();
|
||||
return;
|
||||
}
|
||||
const toolDefs = mcp_token ? VIBN_TOOL_DEFINITIONS : [];
|
||||
const resp = await callGeminiChat({ systemPrompt, messages, tools: toolDefs, temperature: 0.7 });
|
||||
|
||||
if (resp.error) {
|
||||
emit({ type: 'error', error: resp.error });
|
||||
controller.close();
|
||||
return;
|
||||
}
|
||||
|
||||
// Stream text to client
|
||||
if (resp.text) {
|
||||
assistantText += resp.text;
|
||||
emit({ type: 'text', text: resp.text });
|
||||
}
|
||||
|
||||
// Announce tool calls
|
||||
for (const tc of resp.toolCalls) {
|
||||
assistantToolCalls.push(tc);
|
||||
emit({ type: 'tool_start', name: tc.name, args: tc.args });
|
||||
}
|
||||
|
||||
// Save assistant turn
|
||||
const assistantMsg: ChatMessage = {
|
||||
messages.push({
|
||||
role: 'assistant',
|
||||
content: roundText,
|
||||
toolCalls: pendingToolCalls.length ? pendingToolCalls : undefined,
|
||||
};
|
||||
messages.push(assistantMsg);
|
||||
content: resp.text,
|
||||
toolCalls: resp.toolCalls.length ? resp.toolCalls : undefined,
|
||||
});
|
||||
|
||||
if (!pendingToolCalls.length) break;
|
||||
if (!resp.toolCalls.length) break;
|
||||
|
||||
// Execute tool calls
|
||||
for (const tc of pendingToolCalls) {
|
||||
// Execute tool calls and add results
|
||||
for (const tc of resp.toolCalls) {
|
||||
const result = mcp_token
|
||||
? await executeMcpTool(tc.name, tc.args, mcp_token, baseUrl)
|
||||
: JSON.stringify({ error: 'No MCP token — read-only mode.' });
|
||||
|
||||
emit({ type: 'tool_result', name: tc.name, result: result.slice(0, 500) });
|
||||
|
||||
const toolMsg: ChatMessage = {
|
||||
messages.push({
|
||||
role: 'tool',
|
||||
content: result,
|
||||
toolCallId: tc.id,
|
||||
toolName: tc.name,
|
||||
// Echo thought_signature back — required by Gemini thinking models
|
||||
thoughtSignature: tc.thoughtSignature,
|
||||
};
|
||||
messages.push(toolMsg);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,14 +1,12 @@
|
||||
/**
|
||||
* Gemini 3.1 Pro streaming chat client with tool-calling support.
|
||||
* Gemini 3.1 Pro chat client with tool-calling support.
|
||||
*
|
||||
* Uses the Gemini API (generativelanguage.googleapis.com) with the
|
||||
* existing GOOGLE_API_KEY. Drop-in upgrade to Vertex AI when needed
|
||||
* by swapping GEMINI_BASE_URL.
|
||||
*
|
||||
* NOTE: Gemini thinking models (2.5+, 3.x) attach a `thought_signature`
|
||||
* to functionCall parts. This signature MUST be echoed back in the
|
||||
* functionResponse or the API returns a 400. We carry it through our
|
||||
* ToolCall type and re-attach it when building contents[].
|
||||
* Architecture:
|
||||
* - Tool-calling rounds use generateContent (non-streaming) so we always
|
||||
* get the complete response including thought_signature. Thinking models
|
||||
* (2.5+, 3.x) require this field to be echoed back in functionResponse
|
||||
* and it is not reliably present in individual SSE chunks.
|
||||
* - Final text-only response uses streamGenerateContent for good UX.
|
||||
*/
|
||||
|
||||
const GEMINI_API_KEY = process.env.GOOGLE_API_KEY || '';
|
||||
@@ -18,12 +16,9 @@ const GEMINI_BASE_URL = 'https://generativelanguage.googleapis.com/v1beta';
|
||||
export interface ChatMessage {
|
||||
role: 'user' | 'assistant' | 'tool';
|
||||
content: string;
|
||||
/** Populated when role === 'assistant' and model made tool calls */
|
||||
toolCalls?: ToolCall[];
|
||||
/** Populated when role === 'tool' */
|
||||
toolCallId?: string;
|
||||
toolName?: string;
|
||||
/** Echo thought_signature back for tool responses (thinking models) */
|
||||
thoughtSignature?: string;
|
||||
}
|
||||
|
||||
@@ -31,7 +26,7 @@ export interface ToolCall {
|
||||
id: string;
|
||||
name: string;
|
||||
args: Record<string, unknown>;
|
||||
/** Gemini thinking-model signature — must be echoed in functionResponse */
|
||||
/** Must be echoed back in functionResponse for Gemini thinking models */
|
||||
thoughtSignature?: string;
|
||||
}
|
||||
|
||||
@@ -48,7 +43,7 @@ export interface ChatChunk {
|
||||
error?: string;
|
||||
}
|
||||
|
||||
/** Convert our flat ChatMessage[] to Gemini's contents[] format */
|
||||
/** Convert our ChatMessage[] to Gemini's contents[] format */
|
||||
function toGeminiContents(messages: ChatMessage[]) {
|
||||
const contents: any[] = [];
|
||||
|
||||
@@ -72,9 +67,7 @@ function toGeminiContents(messages: ChatMessage[]) {
|
||||
id: msg.toolCallId,
|
||||
response: { content: msg.content },
|
||||
};
|
||||
// Echo the thought_signature back — required for Gemini thinking models
|
||||
if (msg.thoughtSignature) fr.thought_signature = msg.thoughtSignature;
|
||||
|
||||
const part = { functionResponse: fr };
|
||||
const last = contents[contents.length - 1];
|
||||
if (last?.role === 'user') {
|
||||
@@ -87,23 +80,84 @@ function toGeminiContents(messages: ChatMessage[]) {
|
||||
return contents;
|
||||
}
|
||||
|
||||
/** Convert our ToolDefinition[] to Gemini functionDeclarations */
|
||||
function toGeminiFunctions(tools: ToolDefinition[]) {
|
||||
if (!tools.length) return undefined;
|
||||
return [
|
||||
{
|
||||
functionDeclarations: tools.map((t) => ({
|
||||
name: t.name,
|
||||
description: t.description,
|
||||
parameters: t.parameters,
|
||||
})),
|
||||
},
|
||||
];
|
||||
return [{
|
||||
functionDeclarations: tools.map((t) => ({
|
||||
name: t.name,
|
||||
description: t.description,
|
||||
parameters: t.parameters,
|
||||
})),
|
||||
}];
|
||||
}
|
||||
|
||||
function buildBody(opts: {
|
||||
systemPrompt: string;
|
||||
messages: ChatMessage[];
|
||||
tools?: ToolDefinition[];
|
||||
temperature?: number;
|
||||
}) {
|
||||
const body: any = {
|
||||
contents: toGeminiContents(opts.messages),
|
||||
systemInstruction: { parts: [{ text: opts.systemPrompt }] },
|
||||
generationConfig: { temperature: opts.temperature ?? 0.7, maxOutputTokens: 8192 },
|
||||
};
|
||||
const fns = toGeminiFunctions(opts.tools ?? []);
|
||||
if (fns) body.tools = fns;
|
||||
return body;
|
||||
}
|
||||
|
||||
/**
|
||||
* Stream a Gemini response with optional tool-calling.
|
||||
* Yields ChatChunk objects: text deltas, tool_call requests, and a final done.
|
||||
* Non-streaming call — used for tool-calling rounds.
|
||||
* Returns complete response with thought_signature guaranteed.
|
||||
*/
|
||||
export async function callGeminiChat(opts: {
|
||||
systemPrompt: string;
|
||||
messages: ChatMessage[];
|
||||
tools?: ToolDefinition[];
|
||||
temperature?: number;
|
||||
}): Promise<{ text: string; toolCalls: ToolCall[]; error?: string }> {
|
||||
const url = `${GEMINI_BASE_URL}/models/${GEMINI_MODEL}:generateContent?key=${GEMINI_API_KEY}`;
|
||||
|
||||
let res: Response;
|
||||
try {
|
||||
res = await fetch(url, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(buildBody(opts)),
|
||||
});
|
||||
} catch (e) {
|
||||
return { text: '', toolCalls: [], error: `Network error: ${e instanceof Error ? e.message : String(e)}` };
|
||||
}
|
||||
|
||||
const data = await res.json().catch(() => ({}));
|
||||
if (!res.ok) {
|
||||
const msg = data?.error?.message || JSON.stringify(data).slice(0, 200);
|
||||
return { text: '', toolCalls: [], error: `Gemini API error ${res.status}: ${msg}` };
|
||||
}
|
||||
|
||||
const parts: any[] = data?.candidates?.[0]?.content?.parts ?? [];
|
||||
let text = '';
|
||||
const toolCalls: ToolCall[] = [];
|
||||
|
||||
for (const part of parts) {
|
||||
if (part.text) text += part.text;
|
||||
if (part.functionCall) {
|
||||
toolCalls.push({
|
||||
id: part.functionCall.id || `tc-${Date.now()}-${Math.random().toString(36).slice(2)}`,
|
||||
name: part.functionCall.name,
|
||||
args: part.functionCall.args ?? {},
|
||||
thoughtSignature: part.functionCall.thought_signature,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return { text, toolCalls };
|
||||
}
|
||||
|
||||
/**
|
||||
* Streaming call — used for the final text-only response.
|
||||
* Yields ChatChunk objects.
|
||||
*/
|
||||
export async function* streamGeminiChat(opts: {
|
||||
systemPrompt: string;
|
||||
@@ -111,20 +165,6 @@ export async function* streamGeminiChat(opts: {
|
||||
tools?: ToolDefinition[];
|
||||
temperature?: number;
|
||||
}): AsyncGenerator<ChatChunk> {
|
||||
const { systemPrompt, messages, tools = [], temperature = 0.7 } = opts;
|
||||
|
||||
const body: any = {
|
||||
contents: toGeminiContents(messages),
|
||||
systemInstruction: { parts: [{ text: systemPrompt }] },
|
||||
generationConfig: {
|
||||
temperature,
|
||||
maxOutputTokens: 8192,
|
||||
},
|
||||
};
|
||||
|
||||
const fns = toGeminiFunctions(tools);
|
||||
if (fns) body.tools = fns;
|
||||
|
||||
const url = `${GEMINI_BASE_URL}/models/${GEMINI_MODEL}:streamGenerateContent?key=${GEMINI_API_KEY}&alt=sse`;
|
||||
|
||||
let res: Response;
|
||||
@@ -132,7 +172,7 @@ export async function* streamGeminiChat(opts: {
|
||||
res = await fetch(url, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify(body),
|
||||
body: JSON.stringify(buildBody(opts)),
|
||||
});
|
||||
} catch (e) {
|
||||
yield { type: 'error', error: `Network error: ${e instanceof Error ? e.message : String(e)}` };
|
||||
@@ -146,10 +186,7 @@ export async function* streamGeminiChat(opts: {
|
||||
}
|
||||
|
||||
const reader = res.body?.getReader();
|
||||
if (!reader) {
|
||||
yield { type: 'error', error: 'No response body' };
|
||||
return;
|
||||
}
|
||||
if (!reader) { yield { type: 'error', error: 'No response body' }; return; }
|
||||
|
||||
const decoder = new TextDecoder();
|
||||
let buffer = '';
|
||||
@@ -159,7 +196,6 @@ export async function* streamGeminiChat(opts: {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
buffer += decoder.decode(value, { stream: true });
|
||||
|
||||
const lines = buffer.split('\n');
|
||||
buffer = lines.pop() ?? '';
|
||||
|
||||
@@ -167,34 +203,11 @@ export async function* streamGeminiChat(opts: {
|
||||
if (!line.startsWith('data: ')) continue;
|
||||
const data = line.slice(6).trim();
|
||||
if (!data || data === '[DONE]') continue;
|
||||
|
||||
let chunk: any;
|
||||
try {
|
||||
chunk = JSON.parse(data);
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
|
||||
const candidate = chunk?.candidates?.[0];
|
||||
if (!candidate) continue;
|
||||
const parts = candidate?.content?.parts ?? [];
|
||||
|
||||
try { chunk = JSON.parse(data); } catch { continue; }
|
||||
const parts = chunk?.candidates?.[0]?.content?.parts ?? [];
|
||||
for (const part of parts) {
|
||||
if (part.text) {
|
||||
yield { type: 'text', text: part.text };
|
||||
}
|
||||
if (part.functionCall) {
|
||||
yield {
|
||||
type: 'tool_call',
|
||||
toolCall: {
|
||||
id: part.functionCall.id || `tc-${Date.now()}`,
|
||||
name: part.functionCall.name,
|
||||
args: part.functionCall.args ?? {},
|
||||
// Carry the thought_signature so the chat route can echo it back
|
||||
thoughtSignature: part.functionCall.thought_signature,
|
||||
},
|
||||
};
|
||||
}
|
||||
if (part.text) yield { type: 'text', text: part.text };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user