From 4184baca772e86dd0b6fa654db4ed317d60ea77e Mon Sep 17 00:00:00 2001 From: Mark Henderson Date: Tue, 28 Apr 2026 15:24:49 -0700 Subject: [PATCH] feat(chat): expose Gemini's reasoning narration as a thinking pill MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Today the chat shows ✓-icon tool trays with no narration between calls — the user has no idea WHY the AI just called fs_edit or ship. Meanwhile Gemini is producing 500-1000 chars of first-person reasoning per round ("Updating the Express Server: A Quick Production Deployment / Right, so we have a basic Express server here, nothing fancy. I need to get a new version live...") and billing us for those tokens — we just weren't asking for them. Three layers: 1. lib/ai/gemini-chat.ts - generationConfig.thinkingConfig.includeThoughts = true (default true, opt-out via includeThoughts: false). We're already paying for thinking tokens regardless of this flag — it just controls whether the model returns the human-readable summary or only the compressed signature. - callGeminiChat now returns { text, thoughts, toolCalls, finishReason } and the parser splits parts by `part.thought`. CRITICAL bug avoided: previously `if (part.text) text += ...` would have lumped thoughts into the chat bubble verbatim. - streamGeminiChat yields `{ type: 'thinking' }` for thought parts. 2. app/api/chat/route.ts - New SSE event: `data: {"type":"thinking","text":"..."}` - Emitted on every round alongside text + tool_start. - Recovery-summary branch also emits thoughts so even when the model produces no user-facing prose, the user sees the model's reasoning instead of dead silence. 3. components/vibn-chat/chat-panel.tsx - Message gains optional `thoughts` field (in-memory only — we do NOT persist thoughts to fs_chat_messages; they're ephemeral and cheap to drop). - New ThinkingBubble component: dashed-border italic pill above the assistant bubble, collapsed by default to show one-line preview, click to expand for full chain. Strips Gemini's "**Section Heading**" prefixes from the preview. - SSE handler accumulates thinking chunks onto the in-flight assistant message. UX impact: instead of staring at fs.read ✓ fs.edit ✓ ship ✓ icons, the user sees "Examining the target server file..." → "Shipping the twenty-crm project..." in real time. Costs zero additional tokens (we already paid for the thoughts). Cleanup: removed scripts/probe-gemini-raw.ts and scripts/probe-recovery-summary.ts — diagnostic scripts that identified this opportunity, no longer needed in-tree. Made-with: Cursor --- app/api/chat/route.ts | 17 +++- components/vibn-chat/chat-panel.tsx | 119 +++++++++++++++++++++++++--- lib/ai/gemini-chat.ts | 62 ++++++++++++--- 3 files changed, 176 insertions(+), 22 deletions(-) diff --git a/app/api/chat/route.ts b/app/api/chat/route.ts index 99a70940..1b272b21 100644 --- a/app/api/chat/route.ts +++ b/app/api/chat/route.ts @@ -7,8 +7,10 @@ * * SSE event shapes: * data: {"type":"text","text":"..."} + * data: {"type":"thinking","text":"..."} // model's first-person reasoning * data: {"type":"tool_start","name":"...","args":{}} * data: {"type":"tool_result","name":"...","result":"..."} + * data: {"type":"aborted"} * data: {"type":"done"} * data: {"type":"error","error":"..."} */ @@ -54,7 +56,7 @@ async function ensureChatTables() { chatTablesReady = true; } -function buildSystemPrompt(projects: any[], workspace: string): string { +export function buildSystemPrompt(projects: any[], workspace: string): string { const projectsText = projects.length ? projects .map( @@ -291,12 +293,20 @@ export async function POST(request: Request) { return; } - // Stream text to client + // Stream user-facing text to client if (resp.text) { assistantText += resp.text; emit({ type: 'text', text: resp.text }); } + // Stream the model's reasoning narration as a separate SSE + // event type. We pay for thinking tokens whether or not we + // ask for them, so making them visible is free transparency + // — and it cures the "tool tray with no narrative" feel. + if (resp.thoughts) { + emit({ type: 'thinking', text: resp.thoughts }); + } + // Announce tool calls for (const tc of resp.toolCalls) { assistantToolCalls.push(tc); @@ -366,6 +376,9 @@ export async function POST(request: Request) { assistantText += summary.text; emit({ type: 'text', text: summary.text }); } + if (summary.thoughts) { + emit({ type: 'thinking', text: summary.thoughts }); + } } catch { // Don't let a failed summary kill the stream. } diff --git a/components/vibn-chat/chat-panel.tsx b/components/vibn-chat/chat-panel.tsx index 66514727..f1801dde 100644 --- a/components/vibn-chat/chat-panel.tsx +++ b/components/vibn-chat/chat-panel.tsx @@ -31,6 +31,14 @@ interface Message { toolCalls?: { id: string; name: string; args: Record }[]; toolName?: string; createdAt?: string; + /** + * First-person reasoning narration streamed alongside tool calls. + * Rendered as collapsed italic text above the message bubble; the + * user can expand for the full chain of thought. Discarded on + * persistence (we pay tokens regardless, but the bytes aren't + * worth keeping in PG). + */ + thoughts?: string; } interface ToolEvent { @@ -80,6 +88,72 @@ function renderMarkdown(text: string): string { // ── Message bubble ──────────────────────────────────────────────────────────── +/** + * Strip the markdown-bold "**Section Heading**" lines that Gemini + * loves to start each thought with so the collapsed pill shows the + * actual sentence rather than "**Examining the Target Server File**". + * The full text is still available in the expanded view. + */ +function thoughtPreview(thoughts: string): string { + const stripped = thoughts + .replace(/^\s*\*\*[^*]+\*\*\s*/gm, "") + .replace(/\s+/g, " ") + .trim(); + if (stripped.length <= 90) return stripped; + return stripped.slice(0, 87) + "…"; +} + +function ThinkingBubble({ thoughts }: { thoughts: string }) { + const [expanded, setExpanded] = useState(false); + const preview = thoughtPreview(thoughts); + if (!thoughts.trim()) return null; + return ( +
setExpanded(v => !v)} + title={expanded ? "Click to collapse" : "Click to see full reasoning"} + style={{ + display: "flex", + alignItems: expanded ? "flex-start" : "center", + gap: 8, + padding: "6px 12px", + margin: "4px 0", + background: "#faf8f5", + border: "1px dashed #e0dad0", + borderRadius: 8, + fontSize: "0.72rem", + color: "#8a847e", + fontStyle: "italic", + fontFamily: "var(--font-inter),ui-sans-serif,sans-serif", + cursor: "pointer", + userSelect: "text", + lineHeight: 1.55, + }} + > + + {expanded ? ( + + ) : ( + + {preview} + + )} +
+ ); +} + function MessageBubble({ msg }: { msg: Message }) { const isUser = msg.role === "user"; return ( @@ -95,18 +169,26 @@ function MessageBubble({ msg }: { msg: Message }) { )}
- {isUser ? ( - {msg.content} - ) : ( - + {!isUser && msg.thoughts && } + {(msg.content || isUser) && ( +
+ {isUser ? ( + {msg.content} + ) : ( + + )} +
)}
@@ -336,6 +418,21 @@ export function ChatPanel() { } return next; }); + } else if (ev.type === "thinking" && ev.text) { + // Accumulate reasoning narration on the in-flight + // assistant message. The renderer collapses it by + // default and shows the latest sentence as a pill. + setMessages((prev) => { + const next = [...prev]; + if (msgIndex >= 0 && next[msgIndex]) { + const existing = next[msgIndex].thoughts ?? ""; + next[msgIndex] = { + ...next[msgIndex], + thoughts: existing + ev.text, + }; + } + return next; + }); } else if (ev.type === "tool_start") { setToolEvents((prev) => [...prev, { name: ev.name, status: "running" }]); } else if (ev.type === "tool_result") { diff --git a/lib/ai/gemini-chat.ts b/lib/ai/gemini-chat.ts index d693e9c5..8690962b 100644 --- a/lib/ai/gemini-chat.ts +++ b/lib/ai/gemini-chat.ts @@ -37,7 +37,7 @@ export interface ToolDefinition { } export interface ChatChunk { - type: 'text' | 'tool_call' | 'done' | 'error'; + type: 'text' | 'thinking' | 'tool_call' | 'done' | 'error'; text?: string; toolCall?: ToolCall; error?: string; @@ -98,11 +98,23 @@ function buildBody(opts: { messages: ChatMessage[]; tools?: ToolDefinition[]; temperature?: number; + /** + * Ask Gemini to return its thought summaries as parts marked + * `thought: true`. We pay for thinking tokens regardless; this just + * makes them visible so the UI can show "Reading server.js…", + * "Shipping to production…" between tool calls instead of leaving + * the user staring at a silent tool tray. Defaults to true. + */ + includeThoughts?: boolean; }) { const body: any = { contents: toGeminiContents(opts.messages), systemInstruction: { parts: [{ text: opts.systemPrompt }] }, - generationConfig: { temperature: opts.temperature ?? 0.7, maxOutputTokens: 8192 }, + generationConfig: { + temperature: opts.temperature ?? 0.7, + maxOutputTokens: 8192, + thinkingConfig: { includeThoughts: opts.includeThoughts ?? true }, + }, }; const fns = toGeminiFunctions(opts.tools ?? []); if (fns) body.tools = fns; @@ -118,7 +130,15 @@ export async function callGeminiChat(opts: { messages: ChatMessage[]; tools?: ToolDefinition[]; temperature?: number; -}): Promise<{ text: string; toolCalls: ToolCall[]; error?: string }> { + includeThoughts?: boolean; +}): Promise<{ + text: string; + /** First-person reasoning narration; meant for a "thinking" UI panel, not the main bubble. */ + thoughts: string; + toolCalls: ToolCall[]; + finishReason?: string; + error?: string; +}> { const url = `${GEMINI_BASE_URL}/models/${GEMINI_MODEL}:generateContent?key=${GEMINI_API_KEY}`; let res: Response; @@ -129,21 +149,41 @@ export async function callGeminiChat(opts: { body: JSON.stringify(buildBody(opts)), }); } catch (e) { - return { text: '', toolCalls: [], error: `Network error: ${e instanceof Error ? e.message : String(e)}` }; + return { + text: '', + thoughts: '', + toolCalls: [], + error: `Network error: ${e instanceof Error ? e.message : String(e)}`, + }; } const data = await res.json().catch(() => ({})); if (!res.ok) { const msg = data?.error?.message || JSON.stringify(data).slice(0, 200); - return { text: '', toolCalls: [], error: `Gemini API error ${res.status}: ${msg}` }; + return { + text: '', + thoughts: '', + toolCalls: [], + error: `Gemini API error ${res.status}: ${msg}`, + }; } - const parts: any[] = data?.candidates?.[0]?.content?.parts ?? []; + const cand = data?.candidates?.[0]; + const parts: any[] = cand?.content?.parts ?? []; let text = ''; + let thoughts = ''; const toolCalls: ToolCall[] = []; for (const part of parts) { - if (part.text) text += part.text; + if (part.text) { + // CRITICAL: Gemini tags reasoning parts with `thought: true`. If + // we lump them into `text` they leak into the chat bubble as if + // they were prose for the user — which is the opposite of what + // the user wants. Keep them in their own bucket so the route + // can stream them as a separate SSE event type. + if (part.thought) thoughts += part.text; + else text += part.text; + } if (part.functionCall) { toolCalls.push({ id: part.functionCall.id || `tc-${Date.now()}-${Math.random().toString(36).slice(2)}`, @@ -155,7 +195,7 @@ export async function callGeminiChat(opts: { } } - return { text, toolCalls }; + return { text, thoughts, toolCalls, finishReason: cand?.finishReason }; } /** @@ -210,7 +250,11 @@ export async function* streamGeminiChat(opts: { try { chunk = JSON.parse(data); } catch { continue; } const parts = chunk?.candidates?.[0]?.content?.parts ?? []; for (const part of parts) { - if (part.text) yield { type: 'text', text: part.text }; + if (part.text) { + yield part.thought + ? { type: 'thinking', text: part.text } + : { type: 'text', text: part.text }; + } } } }