feat(chat): expose Gemini's reasoning narration as a thinking pill

Today the chat shows ✓-icon tool trays with no narration between
calls — the user has no idea WHY the AI just called fs_edit or
ship. Meanwhile Gemini is producing 500-1000 chars of first-person
reasoning per round ("Updating the Express Server: A Quick
Production Deployment / Right, so we have a basic Express server
here, nothing fancy. I need to get a new version live...") and
billing us for those tokens — we just weren't asking for them.

Three layers:

1. lib/ai/gemini-chat.ts
   - generationConfig.thinkingConfig.includeThoughts = true (default
     true, opt-out via includeThoughts: false). We're already paying
     for thinking tokens regardless of this flag — it just controls
     whether the model returns the human-readable summary or only the
     compressed signature.
   - callGeminiChat now returns { text, thoughts, toolCalls,
     finishReason } and the parser splits parts by `part.thought`.
     CRITICAL bug avoided: previously `if (part.text) text += ...`
     would have lumped thoughts into the chat bubble verbatim.
   - streamGeminiChat yields `{ type: 'thinking' }` for thought parts.

2. app/api/chat/route.ts
   - New SSE event: `data: {"type":"thinking","text":"..."}`
   - Emitted on every round alongside text + tool_start.
   - Recovery-summary branch also emits thoughts so even when the
     model produces no user-facing prose, the user sees the model's
     reasoning instead of dead silence.

3. components/vibn-chat/chat-panel.tsx
   - Message gains optional `thoughts` field (in-memory only — we do
     NOT persist thoughts to fs_chat_messages; they're ephemeral and
     cheap to drop).
   - New ThinkingBubble component: dashed-border italic pill above
     the assistant bubble, collapsed by default to show one-line
     preview, click to expand for full chain. Strips Gemini's
     "**Section Heading**" prefixes from the preview.
   - SSE handler accumulates thinking chunks onto the in-flight
     assistant message.

UX impact: instead of staring at fs.read ✓ fs.edit ✓ ship ✓ icons,
the user sees "Examining the target server file..." → "Shipping the
twenty-crm project..." in real time. Costs zero additional tokens
(we already paid for the thoughts).

Cleanup: removed scripts/probe-gemini-raw.ts and
scripts/probe-recovery-summary.ts — diagnostic scripts that
identified this opportunity, no longer needed in-tree.

Made-with: Cursor
This commit is contained in:
2026-04-28 15:24:49 -07:00
parent 4f84a19e75
commit 4184baca77
3 changed files with 176 additions and 22 deletions

View File

@@ -7,8 +7,10 @@
*
* SSE event shapes:
* data: {"type":"text","text":"..."}
* data: {"type":"thinking","text":"..."} // model's first-person reasoning
* data: {"type":"tool_start","name":"...","args":{}}
* data: {"type":"tool_result","name":"...","result":"..."}
* data: {"type":"aborted"}
* data: {"type":"done"}
* data: {"type":"error","error":"..."}
*/
@@ -54,7 +56,7 @@ async function ensureChatTables() {
chatTablesReady = true;
}
function buildSystemPrompt(projects: any[], workspace: string): string {
export function buildSystemPrompt(projects: any[], workspace: string): string {
const projectsText = projects.length
? projects
.map(
@@ -291,12 +293,20 @@ export async function POST(request: Request) {
return;
}
// Stream text to client
// Stream user-facing text to client
if (resp.text) {
assistantText += resp.text;
emit({ type: 'text', text: resp.text });
}
// Stream the model's reasoning narration as a separate SSE
// event type. We pay for thinking tokens whether or not we
// ask for them, so making them visible is free transparency
// — and it cures the "tool tray with no narrative" feel.
if (resp.thoughts) {
emit({ type: 'thinking', text: resp.thoughts });
}
// Announce tool calls
for (const tc of resp.toolCalls) {
assistantToolCalls.push(tc);
@@ -366,6 +376,9 @@ export async function POST(request: Request) {
assistantText += summary.text;
emit({ type: 'text', text: summary.text });
}
if (summary.thoughts) {
emit({ type: 'thinking', text: summary.thoughts });
}
} catch {
// Don't let a failed summary kill the stream.
}

View File

@@ -31,6 +31,14 @@ interface Message {
toolCalls?: { id: string; name: string; args: Record<string, unknown> }[];
toolName?: string;
createdAt?: string;
/**
* First-person reasoning narration streamed alongside tool calls.
* Rendered as collapsed italic text above the message bubble; the
* user can expand for the full chain of thought. Discarded on
* persistence (we pay tokens regardless, but the bytes aren't
* worth keeping in PG).
*/
thoughts?: string;
}
interface ToolEvent {
@@ -80,6 +88,72 @@ function renderMarkdown(text: string): string {
// ── Message bubble ────────────────────────────────────────────────────────────
/**
* Strip the markdown-bold "**Section Heading**" lines that Gemini
* loves to start each thought with so the collapsed pill shows the
* actual sentence rather than "**Examining the Target Server File**".
* The full text is still available in the expanded view.
*/
function thoughtPreview(thoughts: string): string {
const stripped = thoughts
.replace(/^\s*\*\*[^*]+\*\*\s*/gm, "")
.replace(/\s+/g, " ")
.trim();
if (stripped.length <= 90) return stripped;
return stripped.slice(0, 87) + "…";
}
function ThinkingBubble({ thoughts }: { thoughts: string }) {
const [expanded, setExpanded] = useState(false);
const preview = thoughtPreview(thoughts);
if (!thoughts.trim()) return null;
return (
<div
onClick={() => setExpanded(v => !v)}
title={expanded ? "Click to collapse" : "Click to see full reasoning"}
style={{
display: "flex",
alignItems: expanded ? "flex-start" : "center",
gap: 8,
padding: "6px 12px",
margin: "4px 0",
background: "#faf8f5",
border: "1px dashed #e0dad0",
borderRadius: 8,
fontSize: "0.72rem",
color: "#8a847e",
fontStyle: "italic",
fontFamily: "var(--font-inter),ui-sans-serif,sans-serif",
cursor: "pointer",
userSelect: "text",
lineHeight: 1.55,
}}
>
<ChevronRight
style={{
width: 11,
height: 11,
flexShrink: 0,
marginTop: expanded ? 4 : 0,
transform: expanded ? "rotate(90deg)" : "none",
transition: "transform 0.15s",
color: "#b0a99e",
}}
/>
{expanded ? (
<span
style={{ whiteSpace: "pre-wrap" }}
dangerouslySetInnerHTML={{ __html: renderMarkdown(thoughts) }}
/>
) : (
<span style={{ overflow: "hidden", textOverflow: "ellipsis", whiteSpace: "nowrap", flex: 1 }}>
{preview}
</span>
)}
</div>
);
}
function MessageBubble({ msg }: { msg: Message }) {
const isUser = msg.role === "user";
return (
@@ -95,18 +169,26 @@ function MessageBubble({ msg }: { msg: Message }) {
)}
<div style={{
maxWidth: "82%",
padding: isUser ? "9px 14px" : "10px 14px",
borderRadius: isUser ? "14px 14px 4px 14px" : "4px 14px 14px 14px",
background: isUser ? "#1a1a1a" : "#f7f4ef",
color: isUser ? "#fff" : "#1a1a1a",
fontSize: "0.84rem",
lineHeight: 1.6,
fontFamily: "var(--font-inter),ui-sans-serif,sans-serif",
display: "flex",
flexDirection: "column",
}}>
{isUser ? (
<span style={{ whiteSpace: "pre-wrap" }}>{msg.content}</span>
) : (
<span dangerouslySetInnerHTML={{ __html: renderMarkdown(msg.content) }} />
{!isUser && msg.thoughts && <ThinkingBubble thoughts={msg.thoughts} />}
{(msg.content || isUser) && (
<div style={{
padding: isUser ? "9px 14px" : "10px 14px",
borderRadius: isUser ? "14px 14px 4px 14px" : "4px 14px 14px 14px",
background: isUser ? "#1a1a1a" : "#f7f4ef",
color: isUser ? "#fff" : "#1a1a1a",
fontSize: "0.84rem",
lineHeight: 1.6,
fontFamily: "var(--font-inter),ui-sans-serif,sans-serif",
}}>
{isUser ? (
<span style={{ whiteSpace: "pre-wrap" }}>{msg.content}</span>
) : (
<span dangerouslySetInnerHTML={{ __html: renderMarkdown(msg.content) }} />
)}
</div>
)}
</div>
</div>
@@ -336,6 +418,21 @@ export function ChatPanel() {
}
return next;
});
} else if (ev.type === "thinking" && ev.text) {
// Accumulate reasoning narration on the in-flight
// assistant message. The renderer collapses it by
// default and shows the latest sentence as a pill.
setMessages((prev) => {
const next = [...prev];
if (msgIndex >= 0 && next[msgIndex]) {
const existing = next[msgIndex].thoughts ?? "";
next[msgIndex] = {
...next[msgIndex],
thoughts: existing + ev.text,
};
}
return next;
});
} else if (ev.type === "tool_start") {
setToolEvents((prev) => [...prev, { name: ev.name, status: "running" }]);
} else if (ev.type === "tool_result") {

View File

@@ -37,7 +37,7 @@ export interface ToolDefinition {
}
export interface ChatChunk {
type: 'text' | 'tool_call' | 'done' | 'error';
type: 'text' | 'thinking' | 'tool_call' | 'done' | 'error';
text?: string;
toolCall?: ToolCall;
error?: string;
@@ -98,11 +98,23 @@ function buildBody(opts: {
messages: ChatMessage[];
tools?: ToolDefinition[];
temperature?: number;
/**
* Ask Gemini to return its thought summaries as parts marked
* `thought: true`. We pay for thinking tokens regardless; this just
* makes them visible so the UI can show "Reading server.js…",
* "Shipping to production…" between tool calls instead of leaving
* the user staring at a silent tool tray. Defaults to true.
*/
includeThoughts?: boolean;
}) {
const body: any = {
contents: toGeminiContents(opts.messages),
systemInstruction: { parts: [{ text: opts.systemPrompt }] },
generationConfig: { temperature: opts.temperature ?? 0.7, maxOutputTokens: 8192 },
generationConfig: {
temperature: opts.temperature ?? 0.7,
maxOutputTokens: 8192,
thinkingConfig: { includeThoughts: opts.includeThoughts ?? true },
},
};
const fns = toGeminiFunctions(opts.tools ?? []);
if (fns) body.tools = fns;
@@ -118,7 +130,15 @@ export async function callGeminiChat(opts: {
messages: ChatMessage[];
tools?: ToolDefinition[];
temperature?: number;
}): Promise<{ text: string; toolCalls: ToolCall[]; error?: string }> {
includeThoughts?: boolean;
}): Promise<{
text: string;
/** First-person reasoning narration; meant for a "thinking" UI panel, not the main bubble. */
thoughts: string;
toolCalls: ToolCall[];
finishReason?: string;
error?: string;
}> {
const url = `${GEMINI_BASE_URL}/models/${GEMINI_MODEL}:generateContent?key=${GEMINI_API_KEY}`;
let res: Response;
@@ -129,21 +149,41 @@ export async function callGeminiChat(opts: {
body: JSON.stringify(buildBody(opts)),
});
} catch (e) {
return { text: '', toolCalls: [], error: `Network error: ${e instanceof Error ? e.message : String(e)}` };
return {
text: '',
thoughts: '',
toolCalls: [],
error: `Network error: ${e instanceof Error ? e.message : String(e)}`,
};
}
const data = await res.json().catch(() => ({}));
if (!res.ok) {
const msg = data?.error?.message || JSON.stringify(data).slice(0, 200);
return { text: '', toolCalls: [], error: `Gemini API error ${res.status}: ${msg}` };
return {
text: '',
thoughts: '',
toolCalls: [],
error: `Gemini API error ${res.status}: ${msg}`,
};
}
const parts: any[] = data?.candidates?.[0]?.content?.parts ?? [];
const cand = data?.candidates?.[0];
const parts: any[] = cand?.content?.parts ?? [];
let text = '';
let thoughts = '';
const toolCalls: ToolCall[] = [];
for (const part of parts) {
if (part.text) text += part.text;
if (part.text) {
// CRITICAL: Gemini tags reasoning parts with `thought: true`. If
// we lump them into `text` they leak into the chat bubble as if
// they were prose for the user — which is the opposite of what
// the user wants. Keep them in their own bucket so the route
// can stream them as a separate SSE event type.
if (part.thought) thoughts += part.text;
else text += part.text;
}
if (part.functionCall) {
toolCalls.push({
id: part.functionCall.id || `tc-${Date.now()}-${Math.random().toString(36).slice(2)}`,
@@ -155,7 +195,7 @@ export async function callGeminiChat(opts: {
}
}
return { text, toolCalls };
return { text, thoughts, toolCalls, finishReason: cand?.finishReason };
}
/**
@@ -210,7 +250,11 @@ export async function* streamGeminiChat(opts: {
try { chunk = JSON.parse(data); } catch { continue; }
const parts = chunk?.candidates?.[0]?.content?.parts ?? [];
for (const part of parts) {
if (part.text) yield { type: 'text', text: part.text };
if (part.text) {
yield part.thought
? { type: 'thinking', text: part.text }
: { type: 'text', text: part.text };
}
}
}
}