Today the chat shows ✓-icon tool trays with no narration between
calls — the user has no idea WHY the AI just called fs_edit or
ship. Meanwhile Gemini is producing 500-1000 chars of first-person
reasoning per round ("Updating the Express Server: A Quick
Production Deployment / Right, so we have a basic Express server
here, nothing fancy. I need to get a new version live...") and
billing us for those tokens — we just weren't asking for them.
Three layers:
1. lib/ai/gemini-chat.ts
- generationConfig.thinkingConfig.includeThoughts = true (default
true, opt-out via includeThoughts: false). We're already paying
for thinking tokens regardless of this flag — it just controls
whether the model returns the human-readable summary or only the
compressed signature.
- callGeminiChat now returns { text, thoughts, toolCalls,
finishReason } and the parser splits parts by `part.thought`.
CRITICAL bug avoided: previously `if (part.text) text += ...`
would have lumped thoughts into the chat bubble verbatim.
- streamGeminiChat yields `{ type: 'thinking' }` for thought parts.
2. app/api/chat/route.ts
- New SSE event: `data: {"type":"thinking","text":"..."}`
- Emitted on every round alongside text + tool_start.
- Recovery-summary branch also emits thoughts so even when the
model produces no user-facing prose, the user sees the model's
reasoning instead of dead silence.
3. components/vibn-chat/chat-panel.tsx
- Message gains optional `thoughts` field (in-memory only — we do
NOT persist thoughts to fs_chat_messages; they're ephemeral and
cheap to drop).
- New ThinkingBubble component: dashed-border italic pill above
the assistant bubble, collapsed by default to show one-line
preview, click to expand for full chain. Strips Gemini's
"**Section Heading**" prefixes from the preview.
- SSE handler accumulates thinking chunks onto the in-flight
assistant message.
UX impact: instead of staring at fs.read ✓ fs.edit ✓ ship ✓ icons,
the user sees "Examining the target server file..." → "Shipping the
twenty-crm project..." in real time. Costs zero additional tokens
(we already paid for the thoughts).
Cleanup: removed scripts/probe-gemini-raw.ts and
scripts/probe-recovery-summary.ts — diagnostic scripts that
identified this opportunity, no longer needed in-tree.
Made-with: Cursor
267 lines
8.0 KiB
TypeScript
267 lines
8.0 KiB
TypeScript
/**
|
|
* Gemini 3.1 Pro chat client with tool-calling support.
|
|
*
|
|
* Architecture:
|
|
* - Tool-calling rounds use generateContent (non-streaming) so we always
|
|
* get the complete response including thought_signature. Thinking models
|
|
* (2.5+, 3.x) require this field to be echoed back in functionResponse
|
|
* and it is not reliably present in individual SSE chunks.
|
|
* - Final text-only response uses streamGenerateContent for good UX.
|
|
*/
|
|
|
|
const GEMINI_API_KEY = process.env.GOOGLE_API_KEY || '';
|
|
const GEMINI_MODEL = process.env.VIBN_CHAT_MODEL || 'gemini-3.1-pro-preview';
|
|
const GEMINI_BASE_URL = 'https://generativelanguage.googleapis.com/v1beta';
|
|
|
|
export interface ChatMessage {
|
|
role: 'user' | 'assistant' | 'tool';
|
|
content: string;
|
|
toolCalls?: ToolCall[];
|
|
toolCallId?: string;
|
|
toolName?: string;
|
|
thoughtSignature?: string;
|
|
}
|
|
|
|
export interface ToolCall {
|
|
id: string;
|
|
name: string;
|
|
args: Record<string, unknown>;
|
|
/** Must be echoed back in functionResponse for Gemini thinking models */
|
|
thoughtSignature?: string;
|
|
}
|
|
|
|
export interface ToolDefinition {
|
|
name: string;
|
|
description: string;
|
|
parameters: Record<string, unknown>;
|
|
}
|
|
|
|
export interface ChatChunk {
|
|
type: 'text' | 'thinking' | 'tool_call' | 'done' | 'error';
|
|
text?: string;
|
|
toolCall?: ToolCall;
|
|
error?: string;
|
|
}
|
|
|
|
/** Convert our ChatMessage[] to Gemini's contents[] format */
|
|
function toGeminiContents(messages: ChatMessage[]) {
|
|
const contents: any[] = [];
|
|
|
|
for (const msg of messages) {
|
|
if (msg.role === 'user') {
|
|
contents.push({ role: 'user', parts: [{ text: msg.content }] });
|
|
} else if (msg.role === 'assistant') {
|
|
const parts: any[] = [];
|
|
if (msg.content) parts.push({ text: msg.content });
|
|
if (msg.toolCalls?.length) {
|
|
for (const tc of msg.toolCalls) {
|
|
// thoughtSignature is a SIBLING of functionCall in the part object,
|
|
// not nested inside it. See: ai.google.dev/gemini-api/docs/thought-signatures
|
|
const part: any = { functionCall: { name: tc.name, args: tc.args, id: tc.id } };
|
|
if (tc.thoughtSignature) part.thoughtSignature = tc.thoughtSignature;
|
|
parts.push(part);
|
|
}
|
|
}
|
|
if (parts.length) contents.push({ role: 'model', parts });
|
|
} else if (msg.role === 'tool') {
|
|
const part = {
|
|
functionResponse: {
|
|
name: msg.toolName || 'unknown',
|
|
id: msg.toolCallId,
|
|
response: { content: msg.content },
|
|
},
|
|
};
|
|
const last = contents[contents.length - 1];
|
|
if (last?.role === 'user') {
|
|
last.parts.push(part);
|
|
} else {
|
|
contents.push({ role: 'user', parts: [part] });
|
|
}
|
|
}
|
|
}
|
|
return contents;
|
|
}
|
|
|
|
function toGeminiFunctions(tools: ToolDefinition[]) {
|
|
if (!tools.length) return undefined;
|
|
return [{
|
|
functionDeclarations: tools.map((t) => ({
|
|
name: t.name,
|
|
description: t.description,
|
|
parameters: t.parameters,
|
|
})),
|
|
}];
|
|
}
|
|
|
|
function buildBody(opts: {
|
|
systemPrompt: string;
|
|
messages: ChatMessage[];
|
|
tools?: ToolDefinition[];
|
|
temperature?: number;
|
|
/**
|
|
* Ask Gemini to return its thought summaries as parts marked
|
|
* `thought: true`. We pay for thinking tokens regardless; this just
|
|
* makes them visible so the UI can show "Reading server.js…",
|
|
* "Shipping to production…" between tool calls instead of leaving
|
|
* the user staring at a silent tool tray. Defaults to true.
|
|
*/
|
|
includeThoughts?: boolean;
|
|
}) {
|
|
const body: any = {
|
|
contents: toGeminiContents(opts.messages),
|
|
systemInstruction: { parts: [{ text: opts.systemPrompt }] },
|
|
generationConfig: {
|
|
temperature: opts.temperature ?? 0.7,
|
|
maxOutputTokens: 8192,
|
|
thinkingConfig: { includeThoughts: opts.includeThoughts ?? true },
|
|
},
|
|
};
|
|
const fns = toGeminiFunctions(opts.tools ?? []);
|
|
if (fns) body.tools = fns;
|
|
return body;
|
|
}
|
|
|
|
/**
|
|
* Non-streaming call — used for tool-calling rounds.
|
|
* Returns complete response with thought_signature guaranteed.
|
|
*/
|
|
export async function callGeminiChat(opts: {
|
|
systemPrompt: string;
|
|
messages: ChatMessage[];
|
|
tools?: ToolDefinition[];
|
|
temperature?: number;
|
|
includeThoughts?: boolean;
|
|
}): Promise<{
|
|
text: string;
|
|
/** First-person reasoning narration; meant for a "thinking" UI panel, not the main bubble. */
|
|
thoughts: string;
|
|
toolCalls: ToolCall[];
|
|
finishReason?: string;
|
|
error?: string;
|
|
}> {
|
|
const url = `${GEMINI_BASE_URL}/models/${GEMINI_MODEL}:generateContent?key=${GEMINI_API_KEY}`;
|
|
|
|
let res: Response;
|
|
try {
|
|
res = await fetch(url, {
|
|
method: 'POST',
|
|
headers: { 'Content-Type': 'application/json' },
|
|
body: JSON.stringify(buildBody(opts)),
|
|
});
|
|
} catch (e) {
|
|
return {
|
|
text: '',
|
|
thoughts: '',
|
|
toolCalls: [],
|
|
error: `Network error: ${e instanceof Error ? e.message : String(e)}`,
|
|
};
|
|
}
|
|
|
|
const data = await res.json().catch(() => ({}));
|
|
if (!res.ok) {
|
|
const msg = data?.error?.message || JSON.stringify(data).slice(0, 200);
|
|
return {
|
|
text: '',
|
|
thoughts: '',
|
|
toolCalls: [],
|
|
error: `Gemini API error ${res.status}: ${msg}`,
|
|
};
|
|
}
|
|
|
|
const cand = data?.candidates?.[0];
|
|
const parts: any[] = cand?.content?.parts ?? [];
|
|
let text = '';
|
|
let thoughts = '';
|
|
const toolCalls: ToolCall[] = [];
|
|
|
|
for (const part of parts) {
|
|
if (part.text) {
|
|
// CRITICAL: Gemini tags reasoning parts with `thought: true`. If
|
|
// we lump them into `text` they leak into the chat bubble as if
|
|
// they were prose for the user — which is the opposite of what
|
|
// the user wants. Keep them in their own bucket so the route
|
|
// can stream them as a separate SSE event type.
|
|
if (part.thought) thoughts += part.text;
|
|
else text += part.text;
|
|
}
|
|
if (part.functionCall) {
|
|
toolCalls.push({
|
|
id: part.functionCall.id || `tc-${Date.now()}-${Math.random().toString(36).slice(2)}`,
|
|
name: part.functionCall.name,
|
|
args: part.functionCall.args ?? {},
|
|
// thoughtSignature is a SIBLING of functionCall in the part, not inside it
|
|
thoughtSignature: part.thoughtSignature,
|
|
});
|
|
}
|
|
}
|
|
|
|
return { text, thoughts, toolCalls, finishReason: cand?.finishReason };
|
|
}
|
|
|
|
/**
|
|
* Streaming call — used for the final text-only response.
|
|
* Yields ChatChunk objects.
|
|
*/
|
|
export async function* streamGeminiChat(opts: {
|
|
systemPrompt: string;
|
|
messages: ChatMessage[];
|
|
tools?: ToolDefinition[];
|
|
temperature?: number;
|
|
}): AsyncGenerator<ChatChunk> {
|
|
const url = `${GEMINI_BASE_URL}/models/${GEMINI_MODEL}:streamGenerateContent?key=${GEMINI_API_KEY}&alt=sse`;
|
|
|
|
let res: Response;
|
|
try {
|
|
res = await fetch(url, {
|
|
method: 'POST',
|
|
headers: { 'Content-Type': 'application/json' },
|
|
body: JSON.stringify(buildBody(opts)),
|
|
});
|
|
} catch (e) {
|
|
yield { type: 'error', error: `Network error: ${e instanceof Error ? e.message : String(e)}` };
|
|
return;
|
|
}
|
|
|
|
if (!res.ok) {
|
|
const errText = await res.text().catch(() => '');
|
|
yield { type: 'error', error: `Gemini API error ${res.status}: ${errText.slice(0, 300)}` };
|
|
return;
|
|
}
|
|
|
|
const reader = res.body?.getReader();
|
|
if (!reader) { yield { type: 'error', error: 'No response body' }; return; }
|
|
|
|
const decoder = new TextDecoder();
|
|
let buffer = '';
|
|
|
|
try {
|
|
while (true) {
|
|
const { done, value } = await reader.read();
|
|
if (done) break;
|
|
buffer += decoder.decode(value, { stream: true });
|
|
const lines = buffer.split('\n');
|
|
buffer = lines.pop() ?? '';
|
|
|
|
for (const line of lines) {
|
|
if (!line.startsWith('data: ')) continue;
|
|
const data = line.slice(6).trim();
|
|
if (!data || data === '[DONE]') continue;
|
|
let chunk: any;
|
|
try { chunk = JSON.parse(data); } catch { continue; }
|
|
const parts = chunk?.candidates?.[0]?.content?.parts ?? [];
|
|
for (const part of parts) {
|
|
if (part.text) {
|
|
yield part.thought
|
|
? { type: 'thinking', text: part.text }
|
|
: { type: 'text', text: part.text };
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} finally {
|
|
reader.releaseLock();
|
|
}
|
|
|
|
yield { type: 'done' };
|
|
}
|