feat(chat): expose Gemini's reasoning narration as a thinking pill
Today the chat shows ✓-icon tool trays with no narration between
calls — the user has no idea WHY the AI just called fs_edit or
ship. Meanwhile Gemini is producing 500-1000 chars of first-person
reasoning per round ("Updating the Express Server: A Quick
Production Deployment / Right, so we have a basic Express server
here, nothing fancy. I need to get a new version live...") and
billing us for those tokens — we just weren't asking for them.
Three layers:
1. lib/ai/gemini-chat.ts
- generationConfig.thinkingConfig.includeThoughts = true (default
true, opt-out via includeThoughts: false). We're already paying
for thinking tokens regardless of this flag — it just controls
whether the model returns the human-readable summary or only the
compressed signature.
- callGeminiChat now returns { text, thoughts, toolCalls,
finishReason } and the parser splits parts by `part.thought`.
CRITICAL bug avoided: previously `if (part.text) text += ...`
would have lumped thoughts into the chat bubble verbatim.
- streamGeminiChat yields `{ type: 'thinking' }` for thought parts.
2. app/api/chat/route.ts
- New SSE event: `data: {"type":"thinking","text":"..."}`
- Emitted on every round alongside text + tool_start.
- Recovery-summary branch also emits thoughts so even when the
model produces no user-facing prose, the user sees the model's
reasoning instead of dead silence.
3. components/vibn-chat/chat-panel.tsx
- Message gains optional `thoughts` field (in-memory only — we do
NOT persist thoughts to fs_chat_messages; they're ephemeral and
cheap to drop).
- New ThinkingBubble component: dashed-border italic pill above
the assistant bubble, collapsed by default to show one-line
preview, click to expand for full chain. Strips Gemini's
"**Section Heading**" prefixes from the preview.
- SSE handler accumulates thinking chunks onto the in-flight
assistant message.
UX impact: instead of staring at fs.read ✓ fs.edit ✓ ship ✓ icons,
the user sees "Examining the target server file..." → "Shipping the
twenty-crm project..." in real time. Costs zero additional tokens
(we already paid for the thoughts).
Cleanup: removed scripts/probe-gemini-raw.ts and
scripts/probe-recovery-summary.ts — diagnostic scripts that
identified this opportunity, no longer needed in-tree.
Made-with: Cursor
This commit is contained in:
@@ -7,8 +7,10 @@
|
||||
*
|
||||
* SSE event shapes:
|
||||
* data: {"type":"text","text":"..."}
|
||||
* data: {"type":"thinking","text":"..."} // model's first-person reasoning
|
||||
* data: {"type":"tool_start","name":"...","args":{}}
|
||||
* data: {"type":"tool_result","name":"...","result":"..."}
|
||||
* data: {"type":"aborted"}
|
||||
* data: {"type":"done"}
|
||||
* data: {"type":"error","error":"..."}
|
||||
*/
|
||||
@@ -54,7 +56,7 @@ async function ensureChatTables() {
|
||||
chatTablesReady = true;
|
||||
}
|
||||
|
||||
function buildSystemPrompt(projects: any[], workspace: string): string {
|
||||
export function buildSystemPrompt(projects: any[], workspace: string): string {
|
||||
const projectsText = projects.length
|
||||
? projects
|
||||
.map(
|
||||
@@ -291,12 +293,20 @@ export async function POST(request: Request) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Stream text to client
|
||||
// Stream user-facing text to client
|
||||
if (resp.text) {
|
||||
assistantText += resp.text;
|
||||
emit({ type: 'text', text: resp.text });
|
||||
}
|
||||
|
||||
// Stream the model's reasoning narration as a separate SSE
|
||||
// event type. We pay for thinking tokens whether or not we
|
||||
// ask for them, so making them visible is free transparency
|
||||
// — and it cures the "tool tray with no narrative" feel.
|
||||
if (resp.thoughts) {
|
||||
emit({ type: 'thinking', text: resp.thoughts });
|
||||
}
|
||||
|
||||
// Announce tool calls
|
||||
for (const tc of resp.toolCalls) {
|
||||
assistantToolCalls.push(tc);
|
||||
@@ -366,6 +376,9 @@ export async function POST(request: Request) {
|
||||
assistantText += summary.text;
|
||||
emit({ type: 'text', text: summary.text });
|
||||
}
|
||||
if (summary.thoughts) {
|
||||
emit({ type: 'thinking', text: summary.thoughts });
|
||||
}
|
||||
} catch {
|
||||
// Don't let a failed summary kill the stream.
|
||||
}
|
||||
|
||||
@@ -31,6 +31,14 @@ interface Message {
|
||||
toolCalls?: { id: string; name: string; args: Record<string, unknown> }[];
|
||||
toolName?: string;
|
||||
createdAt?: string;
|
||||
/**
|
||||
* First-person reasoning narration streamed alongside tool calls.
|
||||
* Rendered as collapsed italic text above the message bubble; the
|
||||
* user can expand for the full chain of thought. Discarded on
|
||||
* persistence (we pay tokens regardless, but the bytes aren't
|
||||
* worth keeping in PG).
|
||||
*/
|
||||
thoughts?: string;
|
||||
}
|
||||
|
||||
interface ToolEvent {
|
||||
@@ -80,6 +88,72 @@ function renderMarkdown(text: string): string {
|
||||
|
||||
// ── Message bubble ────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Strip the markdown-bold "**Section Heading**" lines that Gemini
|
||||
* loves to start each thought with so the collapsed pill shows the
|
||||
* actual sentence rather than "**Examining the Target Server File**".
|
||||
* The full text is still available in the expanded view.
|
||||
*/
|
||||
function thoughtPreview(thoughts: string): string {
|
||||
const stripped = thoughts
|
||||
.replace(/^\s*\*\*[^*]+\*\*\s*/gm, "")
|
||||
.replace(/\s+/g, " ")
|
||||
.trim();
|
||||
if (stripped.length <= 90) return stripped;
|
||||
return stripped.slice(0, 87) + "…";
|
||||
}
|
||||
|
||||
function ThinkingBubble({ thoughts }: { thoughts: string }) {
|
||||
const [expanded, setExpanded] = useState(false);
|
||||
const preview = thoughtPreview(thoughts);
|
||||
if (!thoughts.trim()) return null;
|
||||
return (
|
||||
<div
|
||||
onClick={() => setExpanded(v => !v)}
|
||||
title={expanded ? "Click to collapse" : "Click to see full reasoning"}
|
||||
style={{
|
||||
display: "flex",
|
||||
alignItems: expanded ? "flex-start" : "center",
|
||||
gap: 8,
|
||||
padding: "6px 12px",
|
||||
margin: "4px 0",
|
||||
background: "#faf8f5",
|
||||
border: "1px dashed #e0dad0",
|
||||
borderRadius: 8,
|
||||
fontSize: "0.72rem",
|
||||
color: "#8a847e",
|
||||
fontStyle: "italic",
|
||||
fontFamily: "var(--font-inter),ui-sans-serif,sans-serif",
|
||||
cursor: "pointer",
|
||||
userSelect: "text",
|
||||
lineHeight: 1.55,
|
||||
}}
|
||||
>
|
||||
<ChevronRight
|
||||
style={{
|
||||
width: 11,
|
||||
height: 11,
|
||||
flexShrink: 0,
|
||||
marginTop: expanded ? 4 : 0,
|
||||
transform: expanded ? "rotate(90deg)" : "none",
|
||||
transition: "transform 0.15s",
|
||||
color: "#b0a99e",
|
||||
}}
|
||||
/>
|
||||
{expanded ? (
|
||||
<span
|
||||
style={{ whiteSpace: "pre-wrap" }}
|
||||
dangerouslySetInnerHTML={{ __html: renderMarkdown(thoughts) }}
|
||||
/>
|
||||
) : (
|
||||
<span style={{ overflow: "hidden", textOverflow: "ellipsis", whiteSpace: "nowrap", flex: 1 }}>
|
||||
{preview}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function MessageBubble({ msg }: { msg: Message }) {
|
||||
const isUser = msg.role === "user";
|
||||
return (
|
||||
@@ -95,18 +169,26 @@ function MessageBubble({ msg }: { msg: Message }) {
|
||||
)}
|
||||
<div style={{
|
||||
maxWidth: "82%",
|
||||
padding: isUser ? "9px 14px" : "10px 14px",
|
||||
borderRadius: isUser ? "14px 14px 4px 14px" : "4px 14px 14px 14px",
|
||||
background: isUser ? "#1a1a1a" : "#f7f4ef",
|
||||
color: isUser ? "#fff" : "#1a1a1a",
|
||||
fontSize: "0.84rem",
|
||||
lineHeight: 1.6,
|
||||
fontFamily: "var(--font-inter),ui-sans-serif,sans-serif",
|
||||
display: "flex",
|
||||
flexDirection: "column",
|
||||
}}>
|
||||
{isUser ? (
|
||||
<span style={{ whiteSpace: "pre-wrap" }}>{msg.content}</span>
|
||||
) : (
|
||||
<span dangerouslySetInnerHTML={{ __html: renderMarkdown(msg.content) }} />
|
||||
{!isUser && msg.thoughts && <ThinkingBubble thoughts={msg.thoughts} />}
|
||||
{(msg.content || isUser) && (
|
||||
<div style={{
|
||||
padding: isUser ? "9px 14px" : "10px 14px",
|
||||
borderRadius: isUser ? "14px 14px 4px 14px" : "4px 14px 14px 14px",
|
||||
background: isUser ? "#1a1a1a" : "#f7f4ef",
|
||||
color: isUser ? "#fff" : "#1a1a1a",
|
||||
fontSize: "0.84rem",
|
||||
lineHeight: 1.6,
|
||||
fontFamily: "var(--font-inter),ui-sans-serif,sans-serif",
|
||||
}}>
|
||||
{isUser ? (
|
||||
<span style={{ whiteSpace: "pre-wrap" }}>{msg.content}</span>
|
||||
) : (
|
||||
<span dangerouslySetInnerHTML={{ __html: renderMarkdown(msg.content) }} />
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
@@ -336,6 +418,21 @@ export function ChatPanel() {
|
||||
}
|
||||
return next;
|
||||
});
|
||||
} else if (ev.type === "thinking" && ev.text) {
|
||||
// Accumulate reasoning narration on the in-flight
|
||||
// assistant message. The renderer collapses it by
|
||||
// default and shows the latest sentence as a pill.
|
||||
setMessages((prev) => {
|
||||
const next = [...prev];
|
||||
if (msgIndex >= 0 && next[msgIndex]) {
|
||||
const existing = next[msgIndex].thoughts ?? "";
|
||||
next[msgIndex] = {
|
||||
...next[msgIndex],
|
||||
thoughts: existing + ev.text,
|
||||
};
|
||||
}
|
||||
return next;
|
||||
});
|
||||
} else if (ev.type === "tool_start") {
|
||||
setToolEvents((prev) => [...prev, { name: ev.name, status: "running" }]);
|
||||
} else if (ev.type === "tool_result") {
|
||||
|
||||
@@ -37,7 +37,7 @@ export interface ToolDefinition {
|
||||
}
|
||||
|
||||
export interface ChatChunk {
|
||||
type: 'text' | 'tool_call' | 'done' | 'error';
|
||||
type: 'text' | 'thinking' | 'tool_call' | 'done' | 'error';
|
||||
text?: string;
|
||||
toolCall?: ToolCall;
|
||||
error?: string;
|
||||
@@ -98,11 +98,23 @@ function buildBody(opts: {
|
||||
messages: ChatMessage[];
|
||||
tools?: ToolDefinition[];
|
||||
temperature?: number;
|
||||
/**
|
||||
* Ask Gemini to return its thought summaries as parts marked
|
||||
* `thought: true`. We pay for thinking tokens regardless; this just
|
||||
* makes them visible so the UI can show "Reading server.js…",
|
||||
* "Shipping to production…" between tool calls instead of leaving
|
||||
* the user staring at a silent tool tray. Defaults to true.
|
||||
*/
|
||||
includeThoughts?: boolean;
|
||||
}) {
|
||||
const body: any = {
|
||||
contents: toGeminiContents(opts.messages),
|
||||
systemInstruction: { parts: [{ text: opts.systemPrompt }] },
|
||||
generationConfig: { temperature: opts.temperature ?? 0.7, maxOutputTokens: 8192 },
|
||||
generationConfig: {
|
||||
temperature: opts.temperature ?? 0.7,
|
||||
maxOutputTokens: 8192,
|
||||
thinkingConfig: { includeThoughts: opts.includeThoughts ?? true },
|
||||
},
|
||||
};
|
||||
const fns = toGeminiFunctions(opts.tools ?? []);
|
||||
if (fns) body.tools = fns;
|
||||
@@ -118,7 +130,15 @@ export async function callGeminiChat(opts: {
|
||||
messages: ChatMessage[];
|
||||
tools?: ToolDefinition[];
|
||||
temperature?: number;
|
||||
}): Promise<{ text: string; toolCalls: ToolCall[]; error?: string }> {
|
||||
includeThoughts?: boolean;
|
||||
}): Promise<{
|
||||
text: string;
|
||||
/** First-person reasoning narration; meant for a "thinking" UI panel, not the main bubble. */
|
||||
thoughts: string;
|
||||
toolCalls: ToolCall[];
|
||||
finishReason?: string;
|
||||
error?: string;
|
||||
}> {
|
||||
const url = `${GEMINI_BASE_URL}/models/${GEMINI_MODEL}:generateContent?key=${GEMINI_API_KEY}`;
|
||||
|
||||
let res: Response;
|
||||
@@ -129,21 +149,41 @@ export async function callGeminiChat(opts: {
|
||||
body: JSON.stringify(buildBody(opts)),
|
||||
});
|
||||
} catch (e) {
|
||||
return { text: '', toolCalls: [], error: `Network error: ${e instanceof Error ? e.message : String(e)}` };
|
||||
return {
|
||||
text: '',
|
||||
thoughts: '',
|
||||
toolCalls: [],
|
||||
error: `Network error: ${e instanceof Error ? e.message : String(e)}`,
|
||||
};
|
||||
}
|
||||
|
||||
const data = await res.json().catch(() => ({}));
|
||||
if (!res.ok) {
|
||||
const msg = data?.error?.message || JSON.stringify(data).slice(0, 200);
|
||||
return { text: '', toolCalls: [], error: `Gemini API error ${res.status}: ${msg}` };
|
||||
return {
|
||||
text: '',
|
||||
thoughts: '',
|
||||
toolCalls: [],
|
||||
error: `Gemini API error ${res.status}: ${msg}`,
|
||||
};
|
||||
}
|
||||
|
||||
const parts: any[] = data?.candidates?.[0]?.content?.parts ?? [];
|
||||
const cand = data?.candidates?.[0];
|
||||
const parts: any[] = cand?.content?.parts ?? [];
|
||||
let text = '';
|
||||
let thoughts = '';
|
||||
const toolCalls: ToolCall[] = [];
|
||||
|
||||
for (const part of parts) {
|
||||
if (part.text) text += part.text;
|
||||
if (part.text) {
|
||||
// CRITICAL: Gemini tags reasoning parts with `thought: true`. If
|
||||
// we lump them into `text` they leak into the chat bubble as if
|
||||
// they were prose for the user — which is the opposite of what
|
||||
// the user wants. Keep them in their own bucket so the route
|
||||
// can stream them as a separate SSE event type.
|
||||
if (part.thought) thoughts += part.text;
|
||||
else text += part.text;
|
||||
}
|
||||
if (part.functionCall) {
|
||||
toolCalls.push({
|
||||
id: part.functionCall.id || `tc-${Date.now()}-${Math.random().toString(36).slice(2)}`,
|
||||
@@ -155,7 +195,7 @@ export async function callGeminiChat(opts: {
|
||||
}
|
||||
}
|
||||
|
||||
return { text, toolCalls };
|
||||
return { text, thoughts, toolCalls, finishReason: cand?.finishReason };
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -210,7 +250,11 @@ export async function* streamGeminiChat(opts: {
|
||||
try { chunk = JSON.parse(data); } catch { continue; }
|
||||
const parts = chunk?.candidates?.[0]?.content?.parts ?? [];
|
||||
for (const part of parts) {
|
||||
if (part.text) yield { type: 'text', text: part.text };
|
||||
if (part.text) {
|
||||
yield part.thought
|
||||
? { type: 'thinking', text: part.text }
|
||||
: { type: 'text', text: part.text };
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user