From 4184baca772e86dd0b6fa654db4ed317d60ea77e Mon Sep 17 00:00:00 2001
From: Mark Henderson <mark@getacquired.com>
Date: Tue, 28 Apr 2026 15:24:49 -0700
Subject: [PATCH] feat(chat): expose Gemini's reasoning narration as a thinking
 pill
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Today the chat shows ✓-icon tool trays with no narration between
calls — the user has no idea WHY the AI just called fs_edit or
ship. Meanwhile Gemini is producing 500-1000 chars of first-person
reasoning per round ("Updating the Express Server: A Quick
Production Deployment / Right, so we have a basic Express server
here, nothing fancy. I need to get a new version live...") and
billing us for those tokens — we just weren't asking for them.

Three layers:

1. lib/ai/gemini-chat.ts
   - generationConfig.thinkingConfig.includeThoughts = true (default
     true, opt-out via includeThoughts: false). We're already paying
     for thinking tokens regardless of this flag — it just controls
     whether the model returns the human-readable summary or only the
     compressed signature.
   - callGeminiChat now returns { text, thoughts, toolCalls,
     finishReason } and the parser splits parts by `part.thought`.
     CRITICAL bug avoided: previously `if (part.text) text += ...`
     would have lumped thoughts into the chat bubble verbatim.
   - streamGeminiChat yields `{ type: 'thinking' }` for thought parts.

2. app/api/chat/route.ts
   - New SSE event: `data: {"type":"thinking","text":"..."}`
   - Emitted on every round alongside text + tool_start.
   - Recovery-summary branch also emits thoughts so even when the
     model produces no user-facing prose, the user sees the model's
     reasoning instead of dead silence.

3. components/vibn-chat/chat-panel.tsx
   - Message gains optional `thoughts` field (in-memory only — we do
     NOT persist thoughts to fs_chat_messages; they're ephemeral and
     cheap to drop).
   - New ThinkingBubble component: dashed-border italic pill above
     the assistant bubble, collapsed by default to show one-line
     preview, click to expand for full chain. Strips Gemini's
     "**Section Heading**" prefixes from the preview.
   - SSE handler accumulates thinking chunks onto the in-flight
     assistant message.

UX impact: instead of staring at fs.read ✓ fs.edit ✓ ship ✓ icons,
the user sees "Examining the target server file..." → "Shipping the
twenty-crm project..." in real time. Costs zero additional tokens
(we already paid for the thoughts).

Cleanup: removed scripts/probe-gemini-raw.ts and
scripts/probe-recovery-summary.ts — diagnostic scripts that
identified this opportunity, no longer needed in-tree.

Made-with: Cursor
---
 app/api/chat/route.ts               |  17 +++-
 components/vibn-chat/chat-panel.tsx | 119 +++++++++++++++++++++++++---
 lib/ai/gemini-chat.ts               |  62 ++++++++++++---
 3 files changed, 176 insertions(+), 22 deletions(-)

diff --git a/app/api/chat/route.ts b/app/api/chat/route.ts
index 99a70940..1b272b21 100644
--- a/app/api/chat/route.ts
+++ b/app/api/chat/route.ts
@@ -7,8 +7,10 @@
  *
  * SSE event shapes:
  *   data: {"type":"text","text":"..."}
+ *   data: {"type":"thinking","text":"..."}    // model's first-person reasoning
  *   data: {"type":"tool_start","name":"...","args":{}}
  *   data: {"type":"tool_result","name":"...","result":"..."}
+ *   data: {"type":"aborted"}
  *   data: {"type":"done"}
  *   data: {"type":"error","error":"..."}
  */
@@ -54,7 +56,7 @@ async function ensureChatTables() {
   chatTablesReady = true;
 }
 
-function buildSystemPrompt(projects: any[], workspace: string): string {
+export function buildSystemPrompt(projects: any[], workspace: string): string {
   const projectsText = projects.length
     ? projects
         .map(
@@ -291,12 +293,20 @@ export async function POST(request: Request) {
             return;
           }
 
-          // Stream text to client
+          // Stream user-facing text to client
           if (resp.text) {
             assistantText += resp.text;
             emit({ type: 'text', text: resp.text });
           }
 
+          // Stream the model's reasoning narration as a separate SSE
+          // event type. We pay for thinking tokens whether or not we
+          // ask for them, so making them visible is free transparency
+          // — and it cures the "tool tray with no narrative" feel.
+          if (resp.thoughts) {
+            emit({ type: 'thinking', text: resp.thoughts });
+          }
+
           // Announce tool calls
           for (const tc of resp.toolCalls) {
             assistantToolCalls.push(tc);
@@ -366,6 +376,9 @@ export async function POST(request: Request) {
               assistantText += summary.text;
               emit({ type: 'text', text: summary.text });
             }
+            if (summary.thoughts) {
+              emit({ type: 'thinking', text: summary.thoughts });
+            }
           } catch {
             // Don't let a failed summary kill the stream.
           }
diff --git a/components/vibn-chat/chat-panel.tsx b/components/vibn-chat/chat-panel.tsx
index 66514727..f1801dde 100644
--- a/components/vibn-chat/chat-panel.tsx
+++ b/components/vibn-chat/chat-panel.tsx
@@ -31,6 +31,14 @@ interface Message {
   toolCalls?: { id: string; name: string; args: Record<string, unknown> }[];
   toolName?: string;
   createdAt?: string;
+  /**
+   * First-person reasoning narration streamed alongside tool calls.
+   * Rendered as collapsed italic text above the message bubble; the
+   * user can expand for the full chain of thought. Discarded on
+   * persistence (we pay tokens regardless, but the bytes aren't
+   * worth keeping in PG).
+   */
+  thoughts?: string;
 }
 
 interface ToolEvent {
@@ -80,6 +88,72 @@ function renderMarkdown(text: string): string {
 
 // ── Message bubble ────────────────────────────────────────────────────────────
 
+/**
+ * Strip the markdown-bold "**Section Heading**" lines that Gemini
+ * loves to start each thought with so the collapsed pill shows the
+ * actual sentence rather than "**Examining the Target Server File**".
+ * The full text is still available in the expanded view.
+ */
+function thoughtPreview(thoughts: string): string {
+  const stripped = thoughts
+    .replace(/^\s*\*\*[^*]+\*\*\s*/gm, "")
+    .replace(/\s+/g, " ")
+    .trim();
+  if (stripped.length <= 90) return stripped;
+  return stripped.slice(0, 87) + "…";
+}
+
+function ThinkingBubble({ thoughts }: { thoughts: string }) {
+  const [expanded, setExpanded] = useState(false);
+  const preview = thoughtPreview(thoughts);
+  if (!thoughts.trim()) return null;
+  return (
+    <div
+      onClick={() => setExpanded(v => !v)}
+      title={expanded ? "Click to collapse" : "Click to see full reasoning"}
+      style={{
+        display: "flex",
+        alignItems: expanded ? "flex-start" : "center",
+        gap: 8,
+        padding: "6px 12px",
+        margin: "4px 0",
+        background: "#faf8f5",
+        border: "1px dashed #e0dad0",
+        borderRadius: 8,
+        fontSize: "0.72rem",
+        color: "#8a847e",
+        fontStyle: "italic",
+        fontFamily: "var(--font-inter),ui-sans-serif,sans-serif",
+        cursor: "pointer",
+        userSelect: "text",
+        lineHeight: 1.55,
+      }}
+    >
+      <ChevronRight
+        style={{
+          width: 11,
+          height: 11,
+          flexShrink: 0,
+          marginTop: expanded ? 4 : 0,
+          transform: expanded ? "rotate(90deg)" : "none",
+          transition: "transform 0.15s",
+          color: "#b0a99e",
+        }}
+      />
+      {expanded ? (
+        <span
+          style={{ whiteSpace: "pre-wrap" }}
+          dangerouslySetInnerHTML={{ __html: renderMarkdown(thoughts) }}
+        />
+      ) : (
+        <span style={{ overflow: "hidden", textOverflow: "ellipsis", whiteSpace: "nowrap", flex: 1 }}>
+          {preview}
+        </span>
+      )}
+    </div>
+  );
+}
+
 function MessageBubble({ msg }: { msg: Message }) {
   const isUser = msg.role === "user";
   return (
@@ -95,18 +169,26 @@ function MessageBubble({ msg }: { msg: Message }) {
       )}
       <div style={{
         maxWidth: "82%",
-        padding: isUser ? "9px 14px" : "10px 14px",
-        borderRadius: isUser ? "14px 14px 4px 14px" : "4px 14px 14px 14px",
-        background: isUser ? "#1a1a1a" : "#f7f4ef",
-        color: isUser ? "#fff" : "#1a1a1a",
-        fontSize: "0.84rem",
-        lineHeight: 1.6,
-        fontFamily: "var(--font-inter),ui-sans-serif,sans-serif",
+        display: "flex",
+        flexDirection: "column",
       }}>
-        {isUser ? (
-          <span style={{ whiteSpace: "pre-wrap" }}>{msg.content}</span>
-        ) : (
-          <span dangerouslySetInnerHTML={{ __html: renderMarkdown(msg.content) }} />
+        {!isUser && msg.thoughts && <ThinkingBubble thoughts={msg.thoughts} />}
+        {(msg.content || isUser) && (
+          <div style={{
+            padding: isUser ? "9px 14px" : "10px 14px",
+            borderRadius: isUser ? "14px 14px 4px 14px" : "4px 14px 14px 14px",
+            background: isUser ? "#1a1a1a" : "#f7f4ef",
+            color: isUser ? "#fff" : "#1a1a1a",
+            fontSize: "0.84rem",
+            lineHeight: 1.6,
+            fontFamily: "var(--font-inter),ui-sans-serif,sans-serif",
+          }}>
+            {isUser ? (
+              <span style={{ whiteSpace: "pre-wrap" }}>{msg.content}</span>
+            ) : (
+              <span dangerouslySetInnerHTML={{ __html: renderMarkdown(msg.content) }} />
+            )}
+          </div>
         )}
       </div>
     </div>
@@ -336,6 +418,21 @@ export function ChatPanel() {
               }
               return next;
             });
+          } else if (ev.type === "thinking" && ev.text) {
+            // Accumulate reasoning narration on the in-flight
+            // assistant message. The renderer collapses it by
+            // default and shows the latest sentence as a pill.
+            setMessages((prev) => {
+              const next = [...prev];
+              if (msgIndex >= 0 && next[msgIndex]) {
+                const existing = next[msgIndex].thoughts ?? "";
+                next[msgIndex] = {
+                  ...next[msgIndex],
+                  thoughts: existing + ev.text,
+                };
+              }
+              return next;
+            });
           } else if (ev.type === "tool_start") {
             setToolEvents((prev) => [...prev, { name: ev.name, status: "running" }]);
           } else if (ev.type === "tool_result") {
diff --git a/lib/ai/gemini-chat.ts b/lib/ai/gemini-chat.ts
index d693e9c5..8690962b 100644
--- a/lib/ai/gemini-chat.ts
+++ b/lib/ai/gemini-chat.ts
@@ -37,7 +37,7 @@ export interface ToolDefinition {
 }
 
 export interface ChatChunk {
-  type: 'text' | 'tool_call' | 'done' | 'error';
+  type: 'text' | 'thinking' | 'tool_call' | 'done' | 'error';
   text?: string;
   toolCall?: ToolCall;
   error?: string;
@@ -98,11 +98,23 @@ function buildBody(opts: {
   messages: ChatMessage[];
   tools?: ToolDefinition[];
   temperature?: number;
+  /**
+   * Ask Gemini to return its thought summaries as parts marked
+   * `thought: true`. We pay for thinking tokens regardless; this just
+   * makes them visible so the UI can show "Reading server.js…",
+   * "Shipping to production…" between tool calls instead of leaving
+   * the user staring at a silent tool tray. Defaults to true.
+   */
+  includeThoughts?: boolean;
 }) {
   const body: any = {
     contents: toGeminiContents(opts.messages),
     systemInstruction: { parts: [{ text: opts.systemPrompt }] },
-    generationConfig: { temperature: opts.temperature ?? 0.7, maxOutputTokens: 8192 },
+    generationConfig: {
+      temperature: opts.temperature ?? 0.7,
+      maxOutputTokens: 8192,
+      thinkingConfig: { includeThoughts: opts.includeThoughts ?? true },
+    },
   };
   const fns = toGeminiFunctions(opts.tools ?? []);
   if (fns) body.tools = fns;
@@ -118,7 +130,15 @@ export async function callGeminiChat(opts: {
   messages: ChatMessage[];
   tools?: ToolDefinition[];
   temperature?: number;
-}): Promise<{ text: string; toolCalls: ToolCall[]; error?: string }> {
+  includeThoughts?: boolean;
+}): Promise<{
+  text: string;
+  /** First-person reasoning narration; meant for a "thinking" UI panel, not the main bubble. */
+  thoughts: string;
+  toolCalls: ToolCall[];
+  finishReason?: string;
+  error?: string;
+}> {
   const url = `${GEMINI_BASE_URL}/models/${GEMINI_MODEL}:generateContent?key=${GEMINI_API_KEY}`;
 
   let res: Response;
@@ -129,21 +149,41 @@ export async function callGeminiChat(opts: {
       body: JSON.stringify(buildBody(opts)),
     });
   } catch (e) {
-    return { text: '', toolCalls: [], error: `Network error: ${e instanceof Error ? e.message : String(e)}` };
+    return {
+      text: '',
+      thoughts: '',
+      toolCalls: [],
+      error: `Network error: ${e instanceof Error ? e.message : String(e)}`,
+    };
   }
 
   const data = await res.json().catch(() => ({}));
   if (!res.ok) {
     const msg = data?.error?.message || JSON.stringify(data).slice(0, 200);
-    return { text: '', toolCalls: [], error: `Gemini API error ${res.status}: ${msg}` };
+    return {
+      text: '',
+      thoughts: '',
+      toolCalls: [],
+      error: `Gemini API error ${res.status}: ${msg}`,
+    };
   }
 
-  const parts: any[] = data?.candidates?.[0]?.content?.parts ?? [];
+  const cand = data?.candidates?.[0];
+  const parts: any[] = cand?.content?.parts ?? [];
   let text = '';
+  let thoughts = '';
   const toolCalls: ToolCall[] = [];
 
   for (const part of parts) {
-    if (part.text) text += part.text;
+    if (part.text) {
+      // CRITICAL: Gemini tags reasoning parts with `thought: true`. If
+      // we lump them into `text` they leak into the chat bubble as if
+      // they were prose for the user — which is the opposite of what
+      // the user wants. Keep them in their own bucket so the route
+      // can stream them as a separate SSE event type.
+      if (part.thought) thoughts += part.text;
+      else text += part.text;
+    }
     if (part.functionCall) {
       toolCalls.push({
         id: part.functionCall.id || `tc-${Date.now()}-${Math.random().toString(36).slice(2)}`,
@@ -155,7 +195,7 @@ export async function callGeminiChat(opts: {
     }
   }
 
-  return { text, toolCalls };
+  return { text, thoughts, toolCalls, finishReason: cand?.finishReason };
 }
 
 /**
@@ -210,7 +250,11 @@ export async function* streamGeminiChat(opts: {
         try { chunk = JSON.parse(data); } catch { continue; }
         const parts = chunk?.candidates?.[0]?.content?.parts ?? [];
         for (const part of parts) {
-          if (part.text) yield { type: 'text', text: part.text };
+          if (part.text) {
+            yield part.thought
+              ? { type: 'thinking', text: part.text }
+              : { type: 'text', text: part.text };
+          }
         }
       }
     }