From 3d1a0e00c7b810abbb64ea77d487bedadd0235b0 Mon Sep 17 00:00:00 2001 From: Mark Henderson Date: Mon, 4 May 2026 10:44:27 -0700 Subject: [PATCH] fix(chat): render multi-round assistant turns as separate bubbles MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Smoke test surfaced a UX bug: when the model fired multiple tool rounds with interleaved text, the client concatenated every text SSE event into one growing assistantContent string and rendered it as a single chat bubble. Result: 'now.Spinning up...first boot... The dev container is ready!' — three distinct narrative beats mashed into one wall of run-on text with no visual breaks. Server (app/api/chat/route.ts): - Added assistantTextSegments[] alongside the legacy assistantText. Each non-empty resp.text per round pushes one segment. - assistantText is still produced (joined with blank lines) for backward compat — old consumers still get a single-string content. - finalMsg now persists textSegments[] so reloaded threads can reconstruct per-round segmentation. - Stop-marker / round-cap recovery / loop-break paths all push to segments AND content, with the leading '\n\n' stripped from the segment form so bubble joins look clean. Client (components/vibn-chat/chat-panel.tsx): - TimelineEntry gains a 'text' kind. - text SSE events push a new TimelineEntry instead of growing a single content string. Subsequent tool/thought events land in between, so the renderer naturally groups text-tools-text-tools. - New TimelineText component renders each segment as its own bubble inline with thoughts and tool pills. - MessageBubble's bottom content slot is now skipped for assistant messages whose timeline has any text entries, so we don't duplicate the prose below the timeline. - loadThread() rehydrates timeline from persisted textSegments + toolCalls so reload preserves bubble segmentation. Backwards compat: messages without textSegments fall through to the old single-bubble content rendering — no migration needed for existing chat history. Co-authored-by: Cursor --- app/api/chat/route.ts | 38 +++++++-- components/vibn-chat/chat-panel.tsx | 127 ++++++++++++++++++++++++---- 2 files changed, 138 insertions(+), 27 deletions(-) diff --git a/app/api/chat/route.ts b/app/api/chat/route.ts index c2377cdf..d1a28fa1 100644 --- a/app/api/chat/route.ts +++ b/app/api/chat/route.ts @@ -416,6 +416,15 @@ export async function POST(request: Request) { let messages = [...history]; let round = 0; let assistantText = ''; + // Per-round text segments. The model emits one `resp.text` per + // tool-loop round; we used to concatenate them all into one + // `assistantText` blob and render that as a single chat bubble. + // That made multi-round turns look like one giant run-on + // paragraph ("now.Spinning up...first boot...The dev container + // is ready!" with no breaks). Keeping them separate on the + // server lets the client render each as its own bubble and + // restores the segmentation on reload. + const assistantTextSegments: string[] = []; const assistantToolCalls: ToolCall[] = []; let aborted = clientSignal.aborted; const onAbort = () => { @@ -463,7 +472,8 @@ export async function POST(request: Request) { // Stream user-facing text to client if (resp.text) { - assistantText += resp.text; + assistantText += (assistantText ? '\n\n' : '') + resp.text; + assistantTextSegments.push(resp.text); emit({ type: 'text', text: resp.text }); roundsSinceText = 0; } else if (resp.toolCalls.length) { @@ -562,6 +572,7 @@ export async function POST(request: Request) { ? '\n\n_(stopped by user)_' : '_(stopped by user before any response)_'; assistantText += stopMarker; + assistantTextSegments.push(stopMarker.trimStart()); emit({ type: 'text', text: stopMarker }); emit({ type: 'aborted' }); } @@ -597,32 +608,41 @@ export async function POST(request: Request) { temperature: 0.3, }); if (summary.text && summary.text.trim()) { - assistantText += summary.text; + assistantText += (assistantText ? '\n\n' : '') + summary.text; + assistantTextSegments.push(summary.text); emit({ type: 'text', text: summary.text }); } else { // Gemini returned empty — fall back to a deterministic // status so the user never sees silent ✓ pills. const fallback = loopBreakReason - ? `\n\nI hit a loop while working on this — ${loopBreakReason}. Want me to try a different approach, or do you want to take a look?` - : `\n\nI ran a chain of ${assistantToolCalls.length} tool calls but didn't reach a clean stopping point. Want me to keep going, or take a different angle?`; - assistantText += fallback; + ? `I hit a loop while working on this — ${loopBreakReason}. Want me to try a different approach, or do you want to take a look?` + : `I ran a chain of ${assistantToolCalls.length} tool calls but didn't reach a clean stopping point. Want me to keep going, or take a different angle?`; + assistantText += (assistantText ? '\n\n' : '') + fallback; + assistantTextSegments.push(fallback); emit({ type: 'text', text: fallback }); } if (summary.thoughts) { emit({ type: 'thinking', text: summary.thoughts }); } } catch { - const fallback = `\n\nI ran ${assistantToolCalls.length} tool calls but the wrap-up failed. Want me to retry, or try a different approach?`; - assistantText += fallback; + const fallback = `I ran ${assistantToolCalls.length} tool calls but the wrap-up failed. Want me to retry, or try a different approach?`; + assistantText += (assistantText ? '\n\n' : '') + fallback; + assistantTextSegments.push(fallback); emit({ type: 'text', text: fallback }); } } - // Persist final assistant message - const finalMsg: ChatMessage = { + // Persist final assistant message. We include `textSegments` + // alongside the legacy concatenated `content` so the client + // can render reloaded threads with the same per-round bubble + // segmentation it shows during streaming. Older messages + // (pre-this-fix) won't have textSegments and fall back to + // single-bubble content rendering. + const finalMsg: ChatMessage & { textSegments?: string[] } = { role: 'assistant', content: assistantText, toolCalls: assistantToolCalls.length ? assistantToolCalls : undefined, + textSegments: assistantTextSegments.length ? assistantTextSegments : undefined, }; await query( `INSERT INTO fs_chat_messages (thread_id, user_id, data) VALUES ($1, $2, $3)`, diff --git a/components/vibn-chat/chat-panel.tsx b/components/vibn-chat/chat-panel.tsx index 8a353f96..3a7c2956 100644 --- a/components/vibn-chat/chat-panel.tsx +++ b/components/vibn-chat/chat-panel.tsx @@ -47,7 +47,13 @@ interface Message { type TimelineEntry = | { kind: "thought"; text: string } - | { kind: "tool"; name: string; status: "running" | "done"; result?: string }; + | { kind: "tool"; name: string; status: "running" | "done"; result?: string } + // A text segment from one round of the assistant's tool loop. + // Each text SSE event from the server starts a new entry; subsequent + // streaming chunks for that same round append to the most-recent + // text entry. Tool/thought entries between text segments break the + // accumulation so multi-round turns render as separate bubbles. + | { kind: "text"; text: string }; interface ToolEvent { name: string; @@ -183,7 +189,18 @@ function MessageBubble({ msg }: { msg: Message }) { {!isUser && msg.timeline && msg.timeline.length > 0 && ( )} - {(msg.content || isUser) && ( + {/* + Render the legacy bottom content bubble ONLY when: + - the message is from the user (their bubble is always the + content slot), OR + - the assistant message has no timeline at all (very old + messages from before timeline existed). + When the timeline contains text entries the prose is already + rendered there, and showing it again here would duplicate + every paragraph below the timeline. + */} + {((msg.content && isUser) || + (msg.content && !isUser && (!msg.timeline || msg.timeline.length === 0))) && (
> }; const items: Item[] = []; for (const e of entries) { if (e.kind === "thought") { items.push({ kind: "thought", text: e.text }); + } else if (e.kind === "text") { + items.push({ kind: "text", text: e.text }); } else { const last = items[items.length - 1]; if (last && last.kind === "toolGroup" && last.name === e.name) { @@ -233,13 +253,39 @@ function Timeline({ entries }: { entries: TimelineEntry[] }) { } return (
- {items.map((item, i) => - item.kind === "thought" ? ( - - ) : ( - - ) - )} + {items.map((item, i) => { + if (item.kind === "thought") { + return ; + } + if (item.kind === "text") { + return ; + } + return ; + })} +
+ ); +} + +/** + * One text segment in the assistant's timeline. Rendered as its own + * bubble so each round of multi-tool-loop output reads as a discrete + * step instead of concatenating into a wall of text. + */ +function TimelineText({ text }: { text: string }) { + return ( +
+
); } @@ -449,7 +495,29 @@ export function ChatPanel() { try { const res = await fetch(`/api/chat/threads/${id}`); const data = await res.json(); - setMessages(data.messages || []); + // Hydrate the timeline from persisted textSegments + toolCalls + // so a reloaded thread renders the same per-round bubbles the + // user saw during streaming. Older messages without + // textSegments fall back to the legacy single-bubble path. + const hydrated = (data.messages || []).map((m: any) => { + if (m.role !== "assistant") return m; + const segs: string[] = Array.isArray(m.textSegments) ? m.textSegments : []; + if (segs.length === 0) return m; + const timeline: TimelineEntry[] = segs.map((t) => ({ kind: "text", text: t })); + // We don't have round-level interleaving for tool calls in + // the persisted shape (the schema flattens them), so we drop + // the toolCalls into the timeline at the end. The streamed + // shape preserves true ordering; this is just a reload + // approximation. Good enough — what the user really cares + // about is the text segments not run-on'ing into one blob. + if (Array.isArray(m.toolCalls)) { + for (const tc of m.toolCalls) { + timeline.push({ kind: "tool", name: tc.name, status: "done" }); + } + } + return { ...m, timeline, content: "" }; + }); + setMessages(hydrated); } catch { /* silent */ } }, []); @@ -547,11 +615,29 @@ export function ChatPanel() { try { ev = JSON.parse(line.slice(6)); } catch { continue; } if (ev.type === "text" && ev.text) { - assistantContent += ev.text; + // Each text SSE event = one round of the model's text + // output. Push a new "text" timeline entry so the + // renderer can show multi-round turns as separate + // bubbles instead of one run-on paragraph. We still + // maintain `assistantContent` (joined with blank lines) + // so the legacy single-bubble fallback path and any + // post-stream consumers still work. + assistantContent += (assistantContent ? "\n\n" : "") + ev.text; setMessages((prev) => { const next = [...prev]; if (msgIndex >= 0 && next[msgIndex]) { - next[msgIndex] = { ...next[msgIndex], content: assistantContent }; + const tl = next[msgIndex].timeline ?? []; + next[msgIndex] = { + ...next[msgIndex], + // Don't write to msg.content during streaming — + // the timeline is the source of truth. Setting + // content on every text event re-renders one + // giant bubble in the bottom slot AND the + // segmented timeline above it, duplicating the + // same prose. Persisted messages pick up + // content via the final flush below. + timeline: [...tl, { kind: "text", text: ev.text }], + }; } return next; }); @@ -610,13 +696,18 @@ export function ChatPanel() { } else if (ev.type === "error") { const errText = ev.error || "Unknown error"; const isToolErr = /tool|mcp|coolify|gitea/i.test(errText); - assistantContent += isToolErr - ? `\n\n⚠️ **Tool error:** ${errText}` - : `\n\n⚠️ ${errText}`; + const errBubble = isToolErr + ? `⚠️ **Tool error:** ${errText}` + : `⚠️ ${errText}`; + assistantContent += (assistantContent ? "\n\n" : "") + errBubble; setMessages((prev) => { const next = [...prev]; if (msgIndex >= 0 && next[msgIndex]) { - next[msgIndex] = { ...next[msgIndex], content: assistantContent }; + const tl = next[msgIndex].timeline ?? []; + next[msgIndex] = { + ...next[msgIndex], + timeline: [...tl, { kind: "text", text: errBubble }], + }; } return next; });