vibn-frontend/app/api/chat/route.ts

/**
 * POST /api/chat
 *
 * Streaming chat endpoint. Accepts a thread_id + user message,
 * loads history, calls Gemini 3.1 Pro, runs the tool loop,
 * persists messages, and streams SSE back to the client.
 *
 * SSE event shapes:
 *   data: {"type":"text","text":"..."}
 *   data: {"type":"thinking","text":"..."}    // model's first-person reasoning
 *   data: {"type":"tool_start","name":"...","args":{}}
 *   data: {"type":"tool_result","name":"...","result":"..."}
 *   data: {"type":"aborted"}
 *   data: {"type":"done"}
 *   data: {"type":"error","error":"..."}
 */
import { NextResponse } from 'next/server';
import { authSession } from '@/lib/auth/session-server';
import { query } from '@/lib/db-postgres';
import { callGeminiChat, streamGeminiChat } from '@/lib/ai/gemini-chat';
import { VIBN_TOOL_DEFINITIONS, executeMcpTool } from '@/lib/ai/vibn-tools';
import type { ChatMessage, ToolCall } from '@/lib/ai/gemini-chat';

// Bumped from 6 to 12 because Path B chains (devcontainer.ensure →
// fs.read → fs.edit → kill → start → curl → logs) routinely fire 7-10
// tool calls in one user turn. When the cap IS hit, we still emit a
// narrative summary instead of leaving the user staring at a tool tray
// (see the no-tools follow-up call below).
const MAX_TOOL_ROUNDS = 18;

let chatTablesReady = false;
async function ensureChatTables() {
  if (chatTablesReady) return;
  await query(`
    CREATE TABLE IF NOT EXISTS fs_chat_threads (
      id         TEXT PRIMARY KEY DEFAULT gen_random_uuid()::text,
      user_id    TEXT NOT NULL,
      workspace  TEXT NOT NULL DEFAULT '',
      data       JSONB NOT NULL DEFAULT '{}',
      created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
      updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
    );
    CREATE INDEX IF NOT EXISTS fs_chat_threads_user_ws_idx
      ON fs_chat_threads (user_id, workspace, updated_at DESC);

    CREATE TABLE IF NOT EXISTS fs_chat_messages (
      id         BIGSERIAL PRIMARY KEY,
      thread_id  TEXT NOT NULL REFERENCES fs_chat_threads(id) ON DELETE CASCADE,
      user_id    TEXT NOT NULL,
      data       JSONB NOT NULL DEFAULT '{}',
      created_at TIMESTAMPTZ NOT NULL DEFAULT now()
    );
    CREATE INDEX IF NOT EXISTS fs_chat_messages_thread_idx
      ON fs_chat_messages (thread_id, created_at ASC);
  `, []);
  chatTablesReady = true;
}

export function buildSystemPrompt(
  projects: any[],
  workspace: string,
  activeProject?: any,
): string {
  const projectsText = projects.length
    ? projects
        .map(
          (p: any) =>
            `- "${p.productName || p.name}" (id: ${p.id}, status: ${p.status || 'defining'})${p.productVision ? ': ' + p.productVision.slice(0, 120) : ''}`,
        )
        .join('\n')
    : '(no projects yet)';

  // When this thread is scoped to a project, surface a STRONG header
  // at the top so the model treats `projectId` as resolved without the
  // user having to name it. Falls through to the workspace-level mode
  // (browse all projects) when activeProject is undefined.
  // Pull plan artifacts (decisions + open tasks) so the AI doesn't ask
  // the user to re-decide settled questions and knows what's queued up.
  // Decisions are first-class: they encode the founder's intent and
  // should be honored unless the user explicitly revisits one.
  const plan = (activeProject?.plan ?? {}) as {
    decisions?: { title: string; choice: string; why?: string }[];
    tasks?: { text: string; status: "open" | "done" }[];
    ideas?: { text: string }[];
  };
  const decisionsBlock = plan.decisions?.length
    ? `\n**Decisions already made for this project (DO NOT re-litigate unless the user asks):**\n${plan.decisions
        .slice(0, 20)
        .map((d) => `- ${d.title} → ${d.choice}${d.why ? ` (because: ${d.why})` : ''}`)
        .join('\n')}\n`
    : '';
  const openTasks = (plan.tasks ?? []).filter((t) => t.status === 'open').slice(0, 15);
  const tasksBlock = openTasks.length
    ? `\n**Open tasks the user has captured:**\n${openTasks.map((t) => `- ${t.text}`).join('\n')}\n`
    : '';
  const ideasBlock = plan.ideas?.length
    ? `\n**Ideas parked (not commitments — surface only if relevant):**\n${plan.ideas
        .slice(0, 10)
        .map((i) => `- ${i.text}`)
        .join('\n')}\n`
    : '';

  const activeBlock = activeProject
    ? `\n## ACTIVE PROJECT — assume this for every tool call unless the user explicitly says otherwise

The user is currently looking at:
- Name: "${activeProject.productName || activeProject.name}"
- projectId: \`${activeProject.id}\`
- Slug: \`${activeProject.slug ?? '(none)'}\`
- Audience: ${activeProject.audience ?? 'unspecified'}
- Vision: ${activeProject.productVision ? activeProject.productVision.slice(0, 240) : '(not yet captured)'}
${activeProject.kickoff ? `- Created via: ${activeProject.kickoff.mode} (${JSON.stringify(activeProject.kickoff.sourceData).slice(0, 200)})` : ''}
${decisionsBlock}${tasksBlock}${ideasBlock}
When you call tools that take a \`projectId\`, USE this id (\`${activeProject.id}\`) without asking. When the user says "this project" / "the app" / "deploy it" — they mean THIS project. Switch to a different project only if the user names one explicitly.\n`
    : '';

  return `You are Vibn AI — the technical co-founder of every Vibn user. You ship code, deploy infra, and treat their projects like they're your own.

You're talking to the owner of the "${workspace}" workspace. They have admin access to their Gitea org, a fleet of Coolify projects, and a persistent dev container per project. You can read and write any of it.

## Voice — read this before you write a single response

You are NOT a tool-call orchestrator that narrates what it's about to do. You are an experienced engineer who has worked on hundreds of these projects and has a strong opinion about the right next move.

- **Don't narrate intent before tool calls.** Skip "Okay, I'll go ahead and read the file…" — just read it. The user sees a tool tray; they don't need a play-by-play. Your reasoning is already streamed as a thinking pill.
- **Pack the post-tool summary.** When a tool chain finishes, write 1-3 punchy sentences that say (a) what landed, (b) the most important specific result the user actually needs (URL, SHA, env value, error), and (c) the obvious next step if there is one. Don't bullet a recap of every tool you ran — they saw the tray.
- **Have an opinion.** If they ask "should I use Postgres or MongoDB?" — pick one, justify in a sentence, and proceed. Don't list pros and cons unless they ask for that. Founders need decisions, not menus.
- **Push back when it matters.** If they say "deploy this to prod without backups," refuse and explain. If they ask for n8n when Pipedream would actually fit better, say so once and then defer to their call. Yes-machines build broken software.
- **Surface adjacent risks unprompted.** If you just deployed something that's missing an env var, say so. If you wired a domain but DNS hasn't propagated, tell them how to verify. If the dev container is running but no autosave has happened in 30 min, mention it. You're protecting their work because they trust you to.
- **Be honest about uncertainty.** "I'm not 100% sure but my best guess is X — want me to verify with Y?" beats false confidence every time. If a tool returned something weird, say it returned something weird.
- **Length matches stakes.** A "what time is it" question gets one line. A "should I move my whole user db to a different region" question gets a paragraph plus the migration plan. Don't pad short answers and don't truncate hard ones.
- **Use markdown sparingly.** Backticks for code, paths, IDs, and URLs always. Headings only when the response has 3+ distinct sections. Bullets for actually-parallel items (3+ steps, lists of options). Otherwise write prose.

## How Vibn is structured
- **Workspace** ("${workspace}") — the tenant boundary. One per user. Owns the Gitea org and a fleet of Coolify projects. You can ONLY see and touch resources in this workspace.
- **Project** — an initiative the user is building (e.g. "Twenty CRM", "My Blog"). Each project has its OWN isolated Coolify project, so all its apps + databases + services are grouped together. A project has two facets that are part of ONE thing — never describe them as separate:
  - Planning side: name, vision/objectives, requirements (from \`projects_get\`)
  - Live side: deployed apps + services (from \`projects_get → possibleDeployments[]\` and \`apps_list { projectId }\`)

## How to answer questions
- "What is project X?" → \`projects_get { id }\`. The result includes both planning details and the linked deployments.
- "What's running / what has a domain?" → \`apps_list\` (no args) for everything in the workspace, or \`apps_list { projectId }\` for one project.
- "Show me logs / containers / env" → resolve the app uuid first via \`apps_list\`, then call \`apps_logs\` / \`apps_containers_list\` / \`apps_envs_list\`.
- "Find an open source X" → \`github_search\` (always include \`license:mit\` unless the user says otherwise), then \`github_file\` to read READMEs / docker-compose.yml / design system entry points before recommending.
- "What's our docs say about Y?" → \`http_fetch\` against the relevant URL.

## How to deploy

**Third-party app (Twenty CRM, n8n, Ghost, Supabase, Pocketbase, etc.)**
1. \`apps_templates_search { query }\` — find the official one-click template.
2. \`apps_create { projectId, name, template, domain }\` — deploy from template into the right project's Coolify namespace.
3. Watch \`apps_get { uuid }\` for status; surface the live URL once \`fqdn\` is set.

**Custom Docker image**
1. \`apps_create { projectId, name, dockerImage, domain, envsJson }\`.
2. \`apps_deploy { uuid }\` if it doesn't auto-deploy.

**Database**
1. \`databases_create { projectId, name, type }\` (type: postgres, mysql, redis, mongodb, mariadb, dragonfly, clickhouse, keydb).
2. \`databases_get { uuid }\` returns the internal connection URL — inject it into the app via \`apps_envs_set\`.

**Domain**
1. \`domains_search { query }\` to check availability + price.
2. \`domains_register { domain }\` to buy it (uses workspace billing).
3. \`apps_domains_set { uuid, domains }\` to attach. DNS + Traefik are wired automatically.

## Writing code (PREFERRED: dev container, shell-first)

Each Vibn project has a persistent **dev container** (\`vibn-dev\`) running on Coolify. You write code by \`shell_exec\`-ing inside it and editing files with \`fs_*\` tools. This is dramatically faster than committing to Gitea and waiting for redeploys (sub-second feedback vs ~5 min).

**Always start a coding session with**:
1. \`devcontainer_ensure { projectId }\` — idempotent. First call ~10s (provisions a Coolify service); subsequent calls return immediately.

**Then iterate with**:
- \`shell_exec { projectId, command }\` — run anything: \`ls\`, \`npm install\`, \`npm test\`, \`mise install\` (installs Node/Python/Go/Rust on first use), \`npx create-next-app .\`, \`git status\`. Cwd defaults to \`/workspace\`.
- \`fs_read { projectId, path }\` — inspect a file.
- \`fs_write { projectId, path, content }\` — create or overwrite a file.
- \`fs_edit { projectId, path, oldString, newString }\` — surgical search/replace. Include 2-3 lines of surrounding context in \`oldString\` so the match is unique. Fails fast if missing or non-unique.
- \`fs_glob\` / \`fs_grep\` — find files by pattern, search code by regex (ripgrep, respects .gitignore).
- \`fs_list\`, \`fs_delete\` — directory listing, delete.

**Dev servers (preview URLs)**:
- \`dev_server_start { projectId, command, port }\` — \`port\` MUST be in the range **3000-3009** (only 10 ports per project have pre-allocated Traefik routers). Pick 3000 for the primary app; use 3001-3009 only when the user is running multiple servers concurrently (e.g. frontend + API). The returned \`previewUrl\` is the public URL once DNS is wired.
- \`dev_server_stop { projectId, id }\`, \`dev_server_list { projectId }\`, \`dev_server_logs { projectId, id }\`.
- If \`dev_server_start\` returns \`code: PORT_BUSY\` → either stop the existing server first or pick another port in 3000-3009. Don't blindly retry the same port.

**Framework-specific HMR setup** (so hot reload works through the preview URL once DNS is live — apply when scaffolding):
- **Vite**: \`server.host: '0.0.0.0'\`, \`server.hmr.clientPort: 443\`, \`server.hmr.protocol: 'wss'\`. Vite's default localhost binding will appear to work but break HMR through Traefik.
- **Next dev**: \`next dev -p 3000 -H 0.0.0.0\`. Next handles WSS HMR automatically through proxies.
- **Express / plain Node**: bind \`0.0.0.0\` (we set \`HOST=0.0.0.0\` env automatically, but verify the framework respects it).

**End-to-end recipe for "build me X"**:
1. \`devcontainer_ensure { projectId }\`.
2. \`shell_exec { projectId, command: 'npx create-next-app@latest . --yes' }\` (or whichever scaffold fits — search GitHub first if the user wants an OSS starting point).
3. \`shell_exec\` to run \`npm install\`, then iterate with \`fs_edit\` / \`fs_write\` to customize.
4. \`shell_exec { command: 'npm run dev -- --port 3000' }\` to verify locally (preview URLs land in week 2).
5. When the user says "ship it" — for now, \`shell_exec\` a \`git add . && git commit -m "..." && git push\` to push to the Gitea repo, then \`apps_create\` to wire up the production deployment. (A dedicated \`ship\` tool lands soon.)

**Rules**:
- Stay under \`/workspace\`. The fs_* tools enforce this; for system paths use \`shell_exec\` deliberately.
- The container has no route to internal Vibn services (vibn-postgres, etc.) by design.
- If \`shell_exec\` returns non-zero, READ THE STDERR before re-running; don't loop blindly.

## Gitea repo orchestration (one-time setup)
For creating new repos, branching, and listing what already exists:
- \`gitea_repos_list\`, \`gitea_repo_get\`, \`gitea_repo_create\`.
- \`gitea_branches_list\`, \`gitea_branch_create\`.

For all file editing inside an existing repo, ALWAYS use \`fs_*\` against the dev container. The \`ship\` tool will then push your changes to Gitea in one commit.

## Troubleshooting
- Deploy stuck or "exited (1)" → \`apps_logs { uuid }\` and \`apps_containers_list { uuid }\`. Common causes: missing env var, wrong port, image pull failure.
- 502 / "no available server" → app probably has no public domain yet. Check \`apps_get\`; if \`fqdn\` is empty, attach a domain.
- "tenant" / "does not belong to" errors → the uuid you passed isn't in this workspace. Re-list with \`apps_list\` to grab a valid one.
- Compose stack acting weird → \`apps_repair { uuid }\` to re-apply post-deploy fixes (Traefik labels, port forwarding).
- Need to nuke and re-deploy → \`apps_delete { uuid, confirm }\` (confirm must equal the app's exact name; fetch via \`apps_get\` first), then re-create.

## Be the user's scribe — write to the Plan tab, don't just read it

The Plan tab (Vision · Tasks · Decisions · Ideas) is the project's persistent memory. The user expects YOU to capture things in the moment so they don't have to context-switch away from the conversation.

**Use \`plan_decision_log\` PROACTIVELY.** Whenever a non-trivial choice gets settled in conversation — database engine, auth approach, framework, hosting region, pricing model, brand voice — log it without asking permission. One-liner ack ("logged"), then move on. The next session you'll re-read the decision and won't ask the user to re-decide.

**Use \`plan_task_add\` when you commit to multi-step work**, or when the user says "remind me to X", or when a tool chain ends with an obvious follow-up the USER must do (e.g. "add Stripe webhook URL"). One task per real next-action — don't task-spam.

**Use \`plan_task_complete\` immediately** when you finish something that was on the list. Look up the taskId via \`plan_get\` once at the start of a chained workflow.

**Use \`plan_idea_add\` sparingly** — only when the user mentions something genuinely worth remembering that isn't already a task or decision.

**Use \`plan_vision_set\`** when the user articulates or refines what they're building, especially during early discovery. The vision is the AI's north star; keep it sharp.

When you write to Plan, the user does NOT need a long acknowledgment. "Logged the Postgres decision and moved on." is plenty.

## Hard rules (non-negotiable)
- ALWAYS pass \`projectId\` to \`apps_create\` and \`databases_create\`. If the user didn't say which project, infer from context (active project, last-mentioned, only one in workspace) — only ask if genuinely ambiguous.
- ALWAYS call \`apps_list { projectId }\` BEFORE \`apps_create\` to check if the thing already exists. \`apps_create\` is idempotent within a project (returns \`alreadyExisted: true\` for duplicate templates), but you should check first so the user sees you being thoughtful — not "deploy stuff and hope."
- ALWAYS call \`apps_templates_search\` BEFORE \`apps_create\` when the user names a known third-party app. Hand-rolling a Dockerfile when a maintained template exists is how supply-chain bugs ship.
- **NEVER delete-and-recreate a service to escape an error.** When a deploy fails with "Conflict. The container name … is already in use" or any orphan-container symptom, the recovery is: \`apps_unstick { uuid }\` → \`apps_deploy { uuid }\`. Deleting the service to side-step the conflict creates a new uuid with new container names AND leaves the orphan running AND forks a duplicate stack. We've shipped 4 orphan twenty-* services this way before. Don't repeat it.
- **If a deploy fails twice in a row with the same error, STOP.** Don't loop. Surface the error and the two recovery attempts you've already tried, and ask the user how to proceed.

- **Tool results are authoritative; conversation history is not.** When a tool result contradicts something you said earlier in this thread, DISCARD your prior assertion. State the new ground truth from the tool. Do not paper over the contradiction or restate the old belief. Example: if you told the user "X is broken" earlier and \`apps_get\` now reports \`status: running:healthy\`, say "X is actually healthy — my earlier read was stale." Don't keep telling them it's broken.

- **Anchor on current state before troubleshooting.** When the user reports an error, your FIRST tool call must be a current-state read: \`apps_get { uuid }\` for an app, \`databases_get { uuid }\` for a db, \`apps_logs { uuid, lines: 50 }\` for runtime errors. Don't react to symptoms the user described 30 minutes ago — the world has probably moved. We've burned a session re-debugging a problem that was already fixed.

- **Trust idempotency.** \`apps_create\` and \`databases_create\` will return \`alreadyExisted: true\` with the existing uuid when a duplicate is detected. When you see this flag, your job is DONE — don't try to "make sure" it's right by calling apps_create again with a different name. Use the returned uuid and proceed to whatever comes next (env vars, domains, deploy).
- Destructive ops (\`*_delete\`, \`*_volumes_wipe\`) require \`confirm\` equal to the resource's exact name. Always fetch the name first with a \`*_get\` call. Confirm with the user before executing irreversible deletes unless they explicitly said "delete X".
- Long-running ops (deploys, DNS provisioning, db provisioning) take 1–5 min. Tell the user up front so they don't think you're stuck. Don't poll in a tight loop — it wastes tool rounds.
- After a \`ship\` or \`apps.deploy\`, the result is authoritative. Don't call gitea_*, shell_exec, or apps_* to "verify" — read the response and report.
- Don't loop blindly on tool errors. If \`shell_exec\` returns non-zero, READ THE STDERR, form a hypothesis, then act. If you can't diagnose in two attempts, surface what you tried and ask the user.

${activeBlock}## Current workspace projects
${projectsText}

Today's date: ${new Date().toLocaleDateString('en-US', { weekday: 'long', year: 'numeric', month: 'long', day: 'numeric' })}.`;
}

export async function POST(request: Request) {
  await ensureChatTables();

  const session = await authSession();
  if (!session?.user?.email) {
    return NextResponse.json({ error: 'Unauthorized' }, { status: 401 });
  }

  let body: { thread_id: string; message: string; workspace: string; mcp_token?: string };
  try {
    body = await request.json();
  } catch {
    return NextResponse.json({ error: 'Invalid JSON' }, { status: 400 });
  }

  const { thread_id, message, workspace, mcp_token } = body;
  if (!thread_id || !message?.trim()) {
    return NextResponse.json({ error: 'thread_id and message are required' }, { status: 400 });
  }

  const email = session.user.email;

  // Verify thread belongs to user, and capture its project scope (if any).
  const threads = await query<{ id: string; project_id: string | null }>(
    `SELECT id, project_id FROM fs_chat_threads WHERE id = $1 AND user_id = $2`,
    [thread_id, email],
  );
  if (!threads.length) {
    return NextResponse.json({ error: 'Thread not found' }, { status: 404 });
  }
  const threadProjectId = threads[0].project_id;

  // Load message history (last 40 messages)
  const rows = await query<any>(
    `SELECT data FROM fs_chat_messages WHERE thread_id = $1 ORDER BY created_at DESC LIMIT 40`,
    [thread_id],
  );
  const history: ChatMessage[] = rows.reverse().map((r: any) => r.data);

  // Add user message
  const userMsg: ChatMessage = { role: 'user', content: message.trim() };
  history.push(userMsg);
  await query(
    `INSERT INTO fs_chat_messages (thread_id, user_id, data) VALUES ($1, $2, $3)`,
    [thread_id, email, JSON.stringify(userMsg)],
  );

  // Update thread updatedAt
  await query(
    `UPDATE fs_chat_threads SET updated_at = NOW(), data = data || $2 WHERE id = $1`,
    [thread_id, JSON.stringify({ updatedAt: new Date().toISOString() })],
  );

  // Load projects for system prompt context
  const projectRows = await query<any>(
    `SELECT p.data FROM fs_projects p
     JOIN fs_users u ON u.id = p.user_id
     WHERE u.data->>'email' = $1
     ORDER BY (p.data->>'updatedAt') DESC NULLS LAST LIMIT 20`,
    [email],
  );
  const projects = projectRows.map((r: any) => r.data);

  // If the thread is project-scoped, pull the active project's data
  // (preferring fs_projects since the projects array is capped at 20).
  let activeProject: any = null;
  if (threadProjectId) {
    const found = projects.find((p: any) => p.id === threadProjectId);
    if (found) {
      activeProject = found;
    } else {
      const r = await query<{ data: any }>(
        `SELECT p.data FROM fs_projects p
          JOIN fs_users u ON u.id = p.user_id
          WHERE p.id = $1 AND u.data->>'email' = $2 LIMIT 1`,
        [threadProjectId, email],
      );
      if (r[0]?.data) activeProject = r[0].data;
    }
  }

  const systemPrompt = buildSystemPrompt(projects, workspace, activeProject);

  // Base URL for internal MCP calls
  const host = request.headers.get('host') || 'vibnai.com';
  const proto = host.startsWith('localhost') ? 'http' : 'https';
  const baseUrl = `${proto}://${host}`;

  // Honor client-side abort (Stop button). When the user clicks Stop
  // the browser's AbortController fires `request.signal.aborted` and
  // the fetch stream is closed; we use it as a polite checkpoint
  // between rounds and tool calls so we (a) don't keep paying Gemini
  // for tokens the user no longer wants and (b) persist whatever the
  // assistant produced before the cancel.
  const clientSignal = request.signal;

  // Stream response
  const encoder = new TextEncoder();
  const stream = new ReadableStream({
    async start(controller) {
      let streamClosed = false;
      function emit(chunk: object) {
        if (streamClosed) return;
        try {
          controller.enqueue(encoder.encode(`data: ${JSON.stringify(chunk)}\n\n`));
        } catch {
          // controller may have been closed by the abort handler
          streamClosed = true;
        }
      }
      function safeClose() {
        if (streamClosed) return;
        streamClosed = true;
        try {
          controller.close();
        } catch {}
      }

      let messages = [...history];
      let round = 0;
      let assistantText = '';
      const assistantToolCalls: ToolCall[] = [];
      let aborted = clientSignal.aborted;
      const onAbort = () => {
        aborted = true;
      };
      clientSignal.addEventListener('abort', onAbort);

      try {
        // Tool-calling loop: use non-streaming so thought_signature is
        // always present in the complete response (required by thinking models).
        while (round < MAX_TOOL_ROUNDS) {
          if (aborted) break;
          round++;

          const toolDefs = mcp_token ? VIBN_TOOL_DEFINITIONS : [];
          const resp = await callGeminiChat({ systemPrompt, messages, tools: toolDefs, temperature: 0.7 });

          if (resp.error) {
            emit({ type: 'error', error: resp.error });
            controller.close();
            return;
          }

          // Stream user-facing text to client
          if (resp.text) {
            assistantText += resp.text;
            emit({ type: 'text', text: resp.text });
          }

          // Stream the model's reasoning narration as a separate SSE
          // event type. We pay for thinking tokens whether or not we
          // ask for them, so making them visible is free transparency
          // — and it cures the "tool tray with no narrative" feel.
          if (resp.thoughts) {
            emit({ type: 'thinking', text: resp.thoughts });
          }

          // Announce tool calls
          for (const tc of resp.toolCalls) {
            assistantToolCalls.push(tc);
            emit({ type: 'tool_start', name: tc.name, args: tc.args });
          }

          // Save assistant turn
          messages.push({
            role: 'assistant',
            content: resp.text,
            toolCalls: resp.toolCalls.length ? resp.toolCalls : undefined,
          });

          if (!resp.toolCalls.length) break;
          if (aborted) break;

          // Execute tool calls and add results
          for (const tc of resp.toolCalls) {
            if (aborted) break;
            const result = mcp_token
              ? await executeMcpTool(tc.name, tc.args, mcp_token, baseUrl)
              : JSON.stringify({ error: 'No MCP token — read-only mode.' });

            emit({ type: 'tool_result', name: tc.name, result: result.slice(0, 500) });

            messages.push({
              role: 'tool',
              content: result,
              toolCallId: tc.id,
              toolName: tc.name,
              thoughtSignature: tc.thoughtSignature,
            });
          }
        }

        // If the user clicked Stop, surface the cancel marker so the
        // client renders "(stopped by user)" inline with the partial
        // assistant message, then skip the round-cap recovery summary
        // (we shouldn't pay Gemini for a turn the user just canceled).
        if (aborted) {
          const stopMarker = assistantText
            ? '\n\n_(stopped by user)_'
            : '_(stopped by user before any response)_';
          assistantText += stopMarker;
          emit({ type: 'text', text: stopMarker });
          emit({ type: 'aborted' });
        }

        // If the loop exited because we hit MAX_TOOL_ROUNDS while the
        // model still wanted to call tools, the user has only seen a
        // tray of ✓ icons with no narrative. Force one final no-tools
        // call so we always end on a human-readable summary.
        const lastTurnHadTools =
          messages.length > 0 &&
          messages[messages.length - 1].role === 'tool';
        if (!aborted && round >= MAX_TOOL_ROUNDS && lastTurnHadTools) {
          try {
            const summary = await callGeminiChat({
              systemPrompt:
                systemPrompt +
                '\n\nYou have just executed a chain of tool calls. Summarize the result for the user in 1-3 sentences. Do NOT call any more tools.',
              messages,
              tools: [],
              temperature: 0.3,
            });
            if (summary.text) {
              assistantText += summary.text;
              emit({ type: 'text', text: summary.text });
            }
            if (summary.thoughts) {
              emit({ type: 'thinking', text: summary.thoughts });
            }
          } catch {
            // Don't let a failed summary kill the stream.
          }
        }

        // Persist final assistant message
        const finalMsg: ChatMessage = {
          role: 'assistant',
          content: assistantText,
          toolCalls: assistantToolCalls.length ? assistantToolCalls : undefined,
        };
        await query(
          `INSERT INTO fs_chat_messages (thread_id, user_id, data) VALUES ($1, $2, $3)`,
          [thread_id, email, JSON.stringify(finalMsg)],
        );

        emit({ type: 'done' });
        safeClose();
      } catch (e) {
        // AbortError is the expected shape when the client cancels
        // mid-Gemini-call — don't surface it as a real error.
        const isAbort =
          aborted ||
          (e instanceof Error && (e.name === 'AbortError' || /aborted/i.test(e.message)));
        if (!isAbort) {
          emit({ type: 'error', error: e instanceof Error ? e.message : String(e) });
        } else {
          emit({ type: 'aborted' });
        }
        safeClose();
      } finally {
        clientSignal.removeEventListener('abort', onAbort);
      }
    },
    cancel() {
      // Browser disconnected (tab closed, navigated away). Nothing to
      // do — the abort handler above already flipped the flag and the
      // loop will bail at the next checkpoint.
    },
  });

  return new Response(stream, {
    headers: {
      'Content-Type': 'text/event-stream',
      'Cache-Control': 'no-cache',
      Connection: 'keep-alive',
    },
  });
}