fix(chat): render multi-round assistant turns as separate bubbles

Smoke test surfaced a UX bug: when the model fired multiple tool rounds with interleaved text, the client concatenated every text SSE event into one growing assistantContent string and rendered it as a single chat bubble. Result: 'now.Spinning up...first boot... The dev container is ready!' — three distinct narrative beats mashed into one wall of run-on text with no visual breaks. Server (app/api/chat/route.ts): - Added assistantTextSegments[] alongside the legacy assistantText. Each non-empty resp.text per round pushes one segment. - assistantText is still produced (joined with blank lines) for backward compat — old consumers still get a single-string content. - finalMsg now persists textSegments[] so reloaded threads can reconstruct per-round segmentation. - Stop-marker / round-cap recovery / loop-break paths all push to segments AND content, with the leading '\n\n' stripped from the segment form so bubble joins look clean. Client (components/vibn-chat/chat-panel.tsx): - TimelineEntry gains a 'text' kind. - text SSE events push a new TimelineEntry instead of growing a single content string. Subsequent tool/thought events land in between, so the renderer naturally groups text-tools-text-tools. - New TimelineText component renders each segment as its own bubble inline with thoughts and tool pills. - MessageBubble's bottom content slot is now skipped for assistant messages whose timeline has any text entries, so we don't duplicate the prose below the timeline. - loadThread() rehydrates timeline from persisted textSegments + toolCalls so reload preserves bubble segmentation. Backwards compat: messages without textSegments fall through to the old single-bubble content rendering — no migration needed for existing chat history. Co-authored-by: Cursor <cursoragent@cursor.com>
2026-05-04 10:44:27 -07:00
parent 4dd8974b43
commit 3d1a0e00c7
2 changed files with 138 additions and 27 deletions
--- a/app/api/chat/route.ts
+++ b/app/api/chat/route.ts
@@ -416,6 +416,15 @@ export async function POST(request: Request) {
      let messages = [...history];
      let round = 0;
      let assistantText = '';
+      // Per-round text segments. The model emits one `resp.text` per
+      // tool-loop round; we used to concatenate them all into one
+      // `assistantText` blob and render that as a single chat bubble.
+      // That made multi-round turns look like one giant run-on
+      // paragraph ("now.Spinning up...first boot...The dev container
+      // is ready!" with no breaks). Keeping them separate on the
+      // server lets the client render each as its own bubble and
+      // restores the segmentation on reload.
+      const assistantTextSegments: string[] = [];
      const assistantToolCalls: ToolCall[] = [];
      let aborted = clientSignal.aborted;
      const onAbort = () => {
@@ -463,7 +472,8 @@ export async function POST(request: Request) {

          // Stream user-facing text to client
          if (resp.text) {
-            assistantText += resp.text;
+            assistantText += (assistantText ? '\n\n' : '') + resp.text;
+            assistantTextSegments.push(resp.text);
            emit({ type: 'text', text: resp.text });
            roundsSinceText = 0;
          } else if (resp.toolCalls.length) {
@@ -562,6 +572,7 @@ export async function POST(request: Request) {
            ? '\n\n_(stopped by user)_'
            : '_(stopped by user before any response)_';
          assistantText += stopMarker;
+          assistantTextSegments.push(stopMarker.trimStart());
          emit({ type: 'text', text: stopMarker });
          emit({ type: 'aborted' });
        }
@@ -597,32 +608,41 @@ export async function POST(request: Request) {
              temperature: 0.3,
            });
            if (summary.text && summary.text.trim()) {
-              assistantText += summary.text;
+              assistantText += (assistantText ? '\n\n' : '') + summary.text;
+              assistantTextSegments.push(summary.text);
              emit({ type: 'text', text: summary.text });
            } else {
              // Gemini returned empty — fall back to a deterministic
              // status so the user never sees silent ✓ pills.
              const fallback = loopBreakReason
-                ? `\n\nI hit a loop while working on this — ${loopBreakReason}. Want me to try a different approach, or do you want to take a look?`
-                : `\n\nI ran a chain of ${assistantToolCalls.length} tool calls but didn't reach a clean stopping point. Want me to keep going, or take a different angle?`;
-              assistantText += fallback;
+                ? `I hit a loop while working on this — ${loopBreakReason}. Want me to try a different approach, or do you want to take a look?`
+                : `I ran a chain of ${assistantToolCalls.length} tool calls but didn't reach a clean stopping point. Want me to keep going, or take a different angle?`;
+              assistantText += (assistantText ? '\n\n' : '') + fallback;
+              assistantTextSegments.push(fallback);
              emit({ type: 'text', text: fallback });
            }
            if (summary.thoughts) {
              emit({ type: 'thinking', text: summary.thoughts });
            }
          } catch {
-            const fallback = `\n\nI ran ${assistantToolCalls.length} tool calls but the wrap-up failed. Want me to retry, or try a different approach?`;
-            assistantText += fallback;
+            const fallback = `I ran ${assistantToolCalls.length} tool calls but the wrap-up failed. Want me to retry, or try a different approach?`;
+            assistantText += (assistantText ? '\n\n' : '') + fallback;
+            assistantTextSegments.push(fallback);
            emit({ type: 'text', text: fallback });
          }
        }

-        // Persist final assistant message
-        const finalMsg: ChatMessage = {
+        // Persist final assistant message. We include `textSegments`
+        // alongside the legacy concatenated `content` so the client
+        // can render reloaded threads with the same per-round bubble
+        // segmentation it shows during streaming. Older messages
+        // (pre-this-fix) won't have textSegments and fall back to
+        // single-bubble content rendering.
+        const finalMsg: ChatMessage & { textSegments?: string[] } = {
          role: 'assistant',
          content: assistantText,
          toolCalls: assistantToolCalls.length ? assistantToolCalls : undefined,
+          textSegments: assistantTextSegments.length ? assistantTextSegments : undefined,
        };
        await query(
          `INSERT INTO fs_chat_messages (thread_id, user_id, data) VALUES ($1, $2, $3)`,
--- a/components/vibn-chat/chat-panel.tsx
+++ b/components/vibn-chat/chat-panel.tsx
@@ -47,7 +47,13 @@ interface Message {

 type TimelineEntry =
  | { kind: "thought"; text: string }
-  | { kind: "tool"; name: string; status: "running" | "done"; result?: string };
+  | { kind: "tool"; name: string; status: "running" | "done"; result?: string }
+  // A text segment from one round of the assistant's tool loop.
+  // Each text SSE event from the server starts a new entry; subsequent
+  // streaming chunks for that same round append to the most-recent
+  // text entry. Tool/thought entries between text segments break the
+  // accumulation so multi-round turns render as separate bubbles.
+  | { kind: "text"; text: string };

 interface ToolEvent {
  name: string;
@@ -183,7 +189,18 @@ function MessageBubble({ msg }: { msg: Message }) {
        {!isUser && msg.timeline && msg.timeline.length > 0 && (
          <Timeline entries={msg.timeline} />
        )}
-        {(msg.content || isUser) && (
+        {/*
+          Render the legacy bottom content bubble ONLY when:
+          - the message is from the user (their bubble is always the
+            content slot), OR
+          - the assistant message has no timeline at all (very old
+            messages from before timeline existed).
+          When the timeline contains text entries the prose is already
+          rendered there, and showing it again here would duplicate
+          every paragraph below the timeline.
+        */}
+        {((msg.content && isUser) ||
+          (msg.content && !isUser && (!msg.timeline || msg.timeline.length === 0))) && (
          <div style={{
            padding: isUser ? "9px 14px" : "10px 14px",
            borderRadius: isUser ? "14px 14px 4px 14px" : "4px 14px 14px 14px",
@@ -213,15 +230,18 @@ function MessageBubble({ msg }: { msg: Message }) {
 */
 function Timeline({ entries }: { entries: TimelineEntry[] }) {
  // Walk the entries and emit a renderable list. Adjacent same-named
-  // tool entries get bundled into a TimelineToolGroup; everything
-  // else passes through as-is.
+  // tool entries get bundled into a TimelineToolGroup; thought and
+  // text entries pass through as-is.
  type Item =
    | { kind: "thought"; text: string }
+    | { kind: "text"; text: string }
    | { kind: "toolGroup"; name: string; entries: Array<Extract<TimelineEntry, { kind: "tool" }>> };
  const items: Item[] = [];
  for (const e of entries) {
    if (e.kind === "thought") {
      items.push({ kind: "thought", text: e.text });
+    } else if (e.kind === "text") {
+      items.push({ kind: "text", text: e.text });
    } else {
      const last = items[items.length - 1];
      if (last && last.kind === "toolGroup" && last.name === e.name) {
@@ -233,13 +253,39 @@ function Timeline({ entries }: { entries: TimelineEntry[] }) {
  }
  return (
    <div style={{ marginBottom: 6 }}>
-      {items.map((item, i) =>
-        item.kind === "thought" ? (
-          <ThinkingBubble key={i} thoughts={item.text} />
-        ) : (
-          <TimelineToolGroup key={i} name={item.name} entries={item.entries} />
-        )
-      )}
+      {items.map((item, i) => {
+        if (item.kind === "thought") {
+          return <ThinkingBubble key={i} thoughts={item.text} />;
+        }
+        if (item.kind === "text") {
+          return <TimelineText key={i} text={item.text} />;
+        }
+        return <TimelineToolGroup key={i} name={item.name} entries={item.entries} />;
+      })}
+    </div>
+  );
+}
+
+/**
+ * One text segment in the assistant's timeline. Rendered as its own
+ * bubble so each round of multi-tool-loop output reads as a discrete
+ * step instead of concatenating into a wall of text.
+ */
+function TimelineText({ text }: { text: string }) {
+  return (
+    <div
+      style={{
+        padding: "10px 14px",
+        borderRadius: "4px 14px 14px 14px",
+        background: "#f7f4ef",
+        color: "#1a1a1a",
+        fontSize: "0.84rem",
+        lineHeight: 1.6,
+        fontFamily: "var(--font-inter),ui-sans-serif,sans-serif",
+        marginBottom: 6,
+      }}
+    >
+      <span dangerouslySetInnerHTML={{ __html: renderMarkdown(text) }} />
    </div>
  );
 }
@@ -449,7 +495,29 @@ export function ChatPanel() {
    try {
      const res = await fetch(`/api/chat/threads/${id}`);
      const data = await res.json();
-      setMessages(data.messages || []);
+      // Hydrate the timeline from persisted textSegments + toolCalls
+      // so a reloaded thread renders the same per-round bubbles the
+      // user saw during streaming. Older messages without
+      // textSegments fall back to the legacy single-bubble path.
+      const hydrated = (data.messages || []).map((m: any) => {
+        if (m.role !== "assistant") return m;
+        const segs: string[] = Array.isArray(m.textSegments) ? m.textSegments : [];
+        if (segs.length === 0) return m;
+        const timeline: TimelineEntry[] = segs.map((t) => ({ kind: "text", text: t }));
+        // We don't have round-level interleaving for tool calls in
+        // the persisted shape (the schema flattens them), so we drop
+        // the toolCalls into the timeline at the end. The streamed
+        // shape preserves true ordering; this is just a reload
+        // approximation. Good enough — what the user really cares
+        // about is the text segments not run-on'ing into one blob.
+        if (Array.isArray(m.toolCalls)) {
+          for (const tc of m.toolCalls) {
+            timeline.push({ kind: "tool", name: tc.name, status: "done" });
+          }
+        }
+        return { ...m, timeline, content: "" };
+      });
+      setMessages(hydrated);
    } catch { /* silent */ }
  }, []);

@@ -547,11 +615,29 @@ export function ChatPanel() {
          try { ev = JSON.parse(line.slice(6)); } catch { continue; }

          if (ev.type === "text" && ev.text) {
-            assistantContent += ev.text;
+            // Each text SSE event = one round of the model's text
+            // output. Push a new "text" timeline entry so the
+            // renderer can show multi-round turns as separate
+            // bubbles instead of one run-on paragraph. We still
+            // maintain `assistantContent` (joined with blank lines)
+            // so the legacy single-bubble fallback path and any
+            // post-stream consumers still work.
+            assistantContent += (assistantContent ? "\n\n" : "") + ev.text;
            setMessages((prev) => {
              const next = [...prev];
              if (msgIndex >= 0 && next[msgIndex]) {
-                next[msgIndex] = { ...next[msgIndex], content: assistantContent };
+                const tl = next[msgIndex].timeline ?? [];
+                next[msgIndex] = {
+                  ...next[msgIndex],
+                  // Don't write to msg.content during streaming —
+                  // the timeline is the source of truth. Setting
+                  // content on every text event re-renders one
+                  // giant bubble in the bottom slot AND the
+                  // segmented timeline above it, duplicating the
+                  // same prose. Persisted messages pick up
+                  // content via the final flush below.
+                  timeline: [...tl, { kind: "text", text: ev.text }],
+                };
              }
              return next;
            });
@@ -610,13 +696,18 @@ export function ChatPanel() {
          } else if (ev.type === "error") {
            const errText = ev.error || "Unknown error";
            const isToolErr = /tool|mcp|coolify|gitea/i.test(errText);
-            assistantContent += isToolErr
-              ? `\n\n⚠️ **Tool error:** ${errText}`
-              : `\n\n⚠️ ${errText}`;
+            const errBubble = isToolErr
+              ? `⚠️ **Tool error:** ${errText}`
+              : `⚠️ ${errText}`;
+            assistantContent += (assistantContent ? "\n\n" : "") + errBubble;
            setMessages((prev) => {
              const next = [...prev];
              if (msgIndex >= 0 && next[msgIndex]) {
-                next[msgIndex] = { ...next[msgIndex], content: assistantContent };
+                const tl = next[msgIndex].timeline ?? [];
+                next[msgIndex] = {
+                  ...next[msgIndex],
+                  timeline: [...tl, { kind: "text", text: errBubble }],
+                };
              }
              return next;
            });