diff --git a/app/api/chat/route.ts b/app/api/chat/route.ts index 99a70940..1b272b21 100644 --- a/app/api/chat/route.ts +++ b/app/api/chat/route.ts @@ -7,8 +7,10 @@ * * SSE event shapes: * data: {"type":"text","text":"..."} + * data: {"type":"thinking","text":"..."} // model's first-person reasoning * data: {"type":"tool_start","name":"...","args":{}} * data: {"type":"tool_result","name":"...","result":"..."} + * data: {"type":"aborted"} * data: {"type":"done"} * data: {"type":"error","error":"..."} */ @@ -54,7 +56,7 @@ async function ensureChatTables() { chatTablesReady = true; } -function buildSystemPrompt(projects: any[], workspace: string): string { +export function buildSystemPrompt(projects: any[], workspace: string): string { const projectsText = projects.length ? projects .map( @@ -291,12 +293,20 @@ export async function POST(request: Request) { return; } - // Stream text to client + // Stream user-facing text to client if (resp.text) { assistantText += resp.text; emit({ type: 'text', text: resp.text }); } + // Stream the model's reasoning narration as a separate SSE + // event type. We pay for thinking tokens whether or not we + // ask for them, so making them visible is free transparency + // — and it cures the "tool tray with no narrative" feel. + if (resp.thoughts) { + emit({ type: 'thinking', text: resp.thoughts }); + } + // Announce tool calls for (const tc of resp.toolCalls) { assistantToolCalls.push(tc); @@ -366,6 +376,9 @@ export async function POST(request: Request) { assistantText += summary.text; emit({ type: 'text', text: summary.text }); } + if (summary.thoughts) { + emit({ type: 'thinking', text: summary.thoughts }); + } } catch { // Don't let a failed summary kill the stream. } diff --git a/components/vibn-chat/chat-panel.tsx b/components/vibn-chat/chat-panel.tsx index 66514727..f1801dde 100644 --- a/components/vibn-chat/chat-panel.tsx +++ b/components/vibn-chat/chat-panel.tsx @@ -31,6 +31,14 @@ interface Message { toolCalls?: { id: string; name: string; args: Record }[]; toolName?: string; createdAt?: string; + /** + * First-person reasoning narration streamed alongside tool calls. + * Rendered as collapsed italic text above the message bubble; the + * user can expand for the full chain of thought. Discarded on + * persistence (we pay tokens regardless, but the bytes aren't + * worth keeping in PG). + */ + thoughts?: string; } interface ToolEvent { @@ -80,6 +88,72 @@ function renderMarkdown(text: string): string { // ── Message bubble ──────────────────────────────────────────────────────────── +/** + * Strip the markdown-bold "**Section Heading**" lines that Gemini + * loves to start each thought with so the collapsed pill shows the + * actual sentence rather than "**Examining the Target Server File**". + * The full text is still available in the expanded view. + */ +function thoughtPreview(thoughts: string): string { + const stripped = thoughts + .replace(/^\s*\*\*[^*]+\*\*\s*/gm, "") + .replace(/\s+/g, " ") + .trim(); + if (stripped.length <= 90) return stripped; + return stripped.slice(0, 87) + "…"; +} + +function ThinkingBubble({ thoughts }: { thoughts: string }) { + const [expanded, setExpanded] = useState(false); + const preview = thoughtPreview(thoughts); + if (!thoughts.trim()) return null; + return ( +
setExpanded(v => !v)} + title={expanded ? "Click to collapse" : "Click to see full reasoning"} + style={{ + display: "flex", + alignItems: expanded ? "flex-start" : "center", + gap: 8, + padding: "6px 12px", + margin: "4px 0", + background: "#faf8f5", + border: "1px dashed #e0dad0", + borderRadius: 8, + fontSize: "0.72rem", + color: "#8a847e", + fontStyle: "italic", + fontFamily: "var(--font-inter),ui-sans-serif,sans-serif", + cursor: "pointer", + userSelect: "text", + lineHeight: 1.55, + }} + > + + {expanded ? ( + + ) : ( + + {preview} + + )} +
+ ); +} + function MessageBubble({ msg }: { msg: Message }) { const isUser = msg.role === "user"; return ( @@ -95,18 +169,26 @@ function MessageBubble({ msg }: { msg: Message }) { )}
- {isUser ? ( - {msg.content} - ) : ( - + {!isUser && msg.thoughts && } + {(msg.content || isUser) && ( +
+ {isUser ? ( + {msg.content} + ) : ( + + )} +
)}
@@ -336,6 +418,21 @@ export function ChatPanel() { } return next; }); + } else if (ev.type === "thinking" && ev.text) { + // Accumulate reasoning narration on the in-flight + // assistant message. The renderer collapses it by + // default and shows the latest sentence as a pill. + setMessages((prev) => { + const next = [...prev]; + if (msgIndex >= 0 && next[msgIndex]) { + const existing = next[msgIndex].thoughts ?? ""; + next[msgIndex] = { + ...next[msgIndex], + thoughts: existing + ev.text, + }; + } + return next; + }); } else if (ev.type === "tool_start") { setToolEvents((prev) => [...prev, { name: ev.name, status: "running" }]); } else if (ev.type === "tool_result") { diff --git a/lib/ai/gemini-chat.ts b/lib/ai/gemini-chat.ts index d693e9c5..8690962b 100644 --- a/lib/ai/gemini-chat.ts +++ b/lib/ai/gemini-chat.ts @@ -37,7 +37,7 @@ export interface ToolDefinition { } export interface ChatChunk { - type: 'text' | 'tool_call' | 'done' | 'error'; + type: 'text' | 'thinking' | 'tool_call' | 'done' | 'error'; text?: string; toolCall?: ToolCall; error?: string; @@ -98,11 +98,23 @@ function buildBody(opts: { messages: ChatMessage[]; tools?: ToolDefinition[]; temperature?: number; + /** + * Ask Gemini to return its thought summaries as parts marked + * `thought: true`. We pay for thinking tokens regardless; this just + * makes them visible so the UI can show "Reading server.js…", + * "Shipping to production…" between tool calls instead of leaving + * the user staring at a silent tool tray. Defaults to true. + */ + includeThoughts?: boolean; }) { const body: any = { contents: toGeminiContents(opts.messages), systemInstruction: { parts: [{ text: opts.systemPrompt }] }, - generationConfig: { temperature: opts.temperature ?? 0.7, maxOutputTokens: 8192 }, + generationConfig: { + temperature: opts.temperature ?? 0.7, + maxOutputTokens: 8192, + thinkingConfig: { includeThoughts: opts.includeThoughts ?? true }, + }, }; const fns = toGeminiFunctions(opts.tools ?? []); if (fns) body.tools = fns; @@ -118,7 +130,15 @@ export async function callGeminiChat(opts: { messages: ChatMessage[]; tools?: ToolDefinition[]; temperature?: number; -}): Promise<{ text: string; toolCalls: ToolCall[]; error?: string }> { + includeThoughts?: boolean; +}): Promise<{ + text: string; + /** First-person reasoning narration; meant for a "thinking" UI panel, not the main bubble. */ + thoughts: string; + toolCalls: ToolCall[]; + finishReason?: string; + error?: string; +}> { const url = `${GEMINI_BASE_URL}/models/${GEMINI_MODEL}:generateContent?key=${GEMINI_API_KEY}`; let res: Response; @@ -129,21 +149,41 @@ export async function callGeminiChat(opts: { body: JSON.stringify(buildBody(opts)), }); } catch (e) { - return { text: '', toolCalls: [], error: `Network error: ${e instanceof Error ? e.message : String(e)}` }; + return { + text: '', + thoughts: '', + toolCalls: [], + error: `Network error: ${e instanceof Error ? e.message : String(e)}`, + }; } const data = await res.json().catch(() => ({})); if (!res.ok) { const msg = data?.error?.message || JSON.stringify(data).slice(0, 200); - return { text: '', toolCalls: [], error: `Gemini API error ${res.status}: ${msg}` }; + return { + text: '', + thoughts: '', + toolCalls: [], + error: `Gemini API error ${res.status}: ${msg}`, + }; } - const parts: any[] = data?.candidates?.[0]?.content?.parts ?? []; + const cand = data?.candidates?.[0]; + const parts: any[] = cand?.content?.parts ?? []; let text = ''; + let thoughts = ''; const toolCalls: ToolCall[] = []; for (const part of parts) { - if (part.text) text += part.text; + if (part.text) { + // CRITICAL: Gemini tags reasoning parts with `thought: true`. If + // we lump them into `text` they leak into the chat bubble as if + // they were prose for the user — which is the opposite of what + // the user wants. Keep them in their own bucket so the route + // can stream them as a separate SSE event type. + if (part.thought) thoughts += part.text; + else text += part.text; + } if (part.functionCall) { toolCalls.push({ id: part.functionCall.id || `tc-${Date.now()}-${Math.random().toString(36).slice(2)}`, @@ -155,7 +195,7 @@ export async function callGeminiChat(opts: { } } - return { text, toolCalls }; + return { text, thoughts, toolCalls, finishReason: cand?.finishReason }; } /** @@ -210,7 +250,11 @@ export async function* streamGeminiChat(opts: { try { chunk = JSON.parse(data); } catch { continue; } const parts = chunk?.candidates?.[0]?.content?.parts ?? []; for (const part of parts) { - if (part.text) yield { type: 'text', text: part.text }; + if (part.text) { + yield part.thought + ? { type: 'thinking', text: part.text } + : { type: 'text', text: part.text }; + } } } }