vibn-agent-runner/vibn-frontend/app/api/chat/route.ts

/**
 * POST /api/chat
 *
 * Streaming chat endpoint. Accepts a thread_id + user message,
 * loads history, calls the configured chat model (Gemini or OpenAI-compatible e.g. DeepSeek), runs the tool loop,
 * persists messages, and streams SSE back to the client.
 *
 * SSE event shapes:
 *   data: {"type":"text","text":"..."}
 *   data: {"type":"thinking","text":"..."}    // model's first-person reasoning
 *   data: {"type":"tool_start","name":"...","args":{}}
 *   data: {"type":"tool_result","name":"...","result":"..."}
 *   data: {"type":"aborted"}
 *   data: {"type":"done"}
 *   data: {"type":"error","error":"..."}
 */
import { NextResponse } from "next/server";
import { authSession } from "@/lib/auth/session-server";
import { query } from "@/lib/db-postgres";
import { callVibnChat } from "@/lib/ai/vibn-chat-model";
import { VIBN_TOOL_DEFINITIONS, executeMcpTool } from "@/lib/ai/vibn-tools";
import {
  detectKnownError,
  formatRecoveryMessage,
} from "@/lib/ai/error-recovery";
import { autoExtractPlanUpdates } from "@/lib/ai/plan-extract";
import { listRecentSentryIssues } from "@/lib/integrations/sentry";
import {
  ensureProjectRepoCloned,
  commitAndPushIfDirty,
} from "@/lib/dev-container-git";
import { buildDesignKitPromptSection } from "@/lib/design-kits/for-ai";
import { buildCodebaseSummary } from "@/lib/ai/project-context/codebase-summary";
import type { ChatMessage, ToolCall } from "@/lib/ai/gemini-chat";

// Path B chains routinely fire 7-10 tool calls in one user turn. 18
// gives enough headroom for complex workflows (scaffold → install →
// configure → start) while still capping runaway loops. When the cap
// IS hit, we emit a recovery summary instead of silent tool pills.
const MAX_TOOL_ROUNDS = 15;

let chatTablesReady = false;
async function ensureChatTables() {
  if (chatTablesReady) return;
  await query(
    `
    CREATE TABLE IF NOT EXISTS fs_chat_threads (
      id         TEXT PRIMARY KEY DEFAULT gen_random_uuid()::text,
      user_id    TEXT NOT NULL,
      workspace  TEXT NOT NULL DEFAULT '',
      data       JSONB NOT NULL DEFAULT '{}',
      created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
      updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
    );
    CREATE INDEX IF NOT EXISTS fs_chat_threads_user_ws_idx
      ON fs_chat_threads (user_id, workspace, updated_at DESC);

    CREATE TABLE IF NOT EXISTS fs_chat_messages (
      id         BIGSERIAL PRIMARY KEY,
      thread_id  TEXT NOT NULL REFERENCES fs_chat_threads(id) ON DELETE CASCADE,
      user_id    TEXT NOT NULL,
      data       JSONB NOT NULL DEFAULT '{}',
      created_at TIMESTAMPTZ NOT NULL DEFAULT now()
    );
    CREATE INDEX IF NOT EXISTS fs_chat_messages_thread_idx
      ON fs_chat_messages (thread_id, created_at ASC);
  `,
    [],
  );
  chatTablesReady = true;
}

export async function buildSystemPrompt(
  projects: any[],
  workspace: string,
  activeProject?: any,
): Promise<string> {
  const projectsText = projects.length
    ? projects
        .map(
          (p: any) =>
            `- "${p.productName || p.name}" (id: ${p.id}, status: ${p.status || "defining"})${p.productVision ? ": " + p.productVision.slice(0, 120) : ""}`,
        )
        .join("\n")
    : "(no projects yet)";

  // When this thread is scoped to a project, surface a STRONG header
  // at the top so the model treats `projectId` as resolved without the
  // user having to name it. Falls through to the workspace-level mode
  // (browse all projects) when activeProject is undefined.
  // Pull plan artifacts (decisions + open tasks) so the AI doesn't ask
  // the user to re-decide settled questions and knows what's queued up.
  // Decisions are first-class: they encode the founder's intent and
  // should be honored unless the user explicitly revisits one.
  const plan = (activeProject?.plan ?? {}) as {
    decisions?: { title: string; choice: string; why?: string }[];
    tasks?: { text: string; status: "open" | "done" }[];
    ideas?: { text: string }[];
  };
  const decisionsBlock = plan.decisions?.length
    ? `\n**Decisions already made for this project (DO NOT re-litigate unless the user asks):**\n${plan.decisions
        .slice(0, 20)
        .map(
          (d) =>
            `- ${d.title} → ${d.choice}${d.why ? ` (because: ${d.why})` : ""}`,
        )
        .join("\n")}\n`
    : "";
  const openTasks = (plan.tasks ?? [])
    .filter((t) => t.status === "open")
    .slice(0, 15);
  const tasksBlock = openTasks.length
    ? `\n**Open tasks the user has captured:**\n${openTasks.map((t) => `- ${t.text}`).join("\n")}\n`
    : "";
  const ideasBlock = plan.ideas?.length
    ? `\n**Ideas parked (not commitments — surface only if relevant):**\n${plan.ideas
        .slice(0, 10)
        .map((i) => `- ${i.text}`)
        .join("\n")}\n`
    : "";

  const briefBlock = (plan as any).brief
    ? `\n**[PROJECT BRIEF / SCOPE DOCUMENT]**\nThe user has uploaded a detailed project brief. You MUST read and adhere to these requirements when making architectural or product decisions:\n${(plan as any).brief.slice(0, 5000)}\n`
    : "";

  const designKitBlock = buildDesignKitPromptSection(activeProject);

  const codebaseBlock = activeProject?.slug
    ? await buildCodebaseSummary(activeProject.id, activeProject.slug)
    : "";

  const activeBlock = activeProject
    ? `\n## ACTIVE PROJECT — assume this for every tool call unless the user explicitly says otherwise

The user is currently looking at:
- Name: "${activeProject.productName || activeProject.name}"
- projectId: \`${activeProject.id}\`
- Slug: \`${activeProject.slug ?? "(none)"}\`
- Audience: ${activeProject.audience ?? "unspecified"}
- Vision: ${activeProject.productVision ? activeProject.productVision.slice(0, 1500) : "(not yet captured)"}
${activeProject.kickoff ? `- Created via: ${activeProject.kickoff.mode} (${JSON.stringify(activeProject.kickoff.sourceData).slice(0, 200)})` : ""}
${decisionsBlock}${tasksBlock}${ideasBlock}${designKitBlock ? `\n${designKitBlock}\n` : ""}${codebaseBlock}
When you call tools that take a \`projectId\`, USE this id (\`${activeProject.id}\`) without asking. When the user says "this project" / "the app" / "deploy it" — they mean THIS project. Switch to a different project only if the user names one explicitly.

**Project repo is auto-cloned at \`/workspace/${activeProject.slug ?? "<slug>"}/\` inside the dev container.** That path is the project's Gitea repo. ALL code, docs, configs, and other artifacts you intend the user to see in the Product tab MUST live under that path. Anything you write outside it (e.g. \`/workspace/scratch\`, \`/workspace/some-cloned-other-repo\`) is treated as scratch and is invisible in the UI.

After every assistant turn, the harness automatically runs \`git add -A && git commit && git push\` against \`/workspace/${activeProject.slug ?? "<slug>"}/\`. You do NOT need to commit manually unless the user asks for a specific commit message or you want to checkpoint mid-turn. Don't apologize for "forgetting to commit" — the harness handles it.\n`
    : "";

  return `You are Vibn AI — the technical co-founder of every Vibn user. You turn ideas into shipped software. Treat their projects like they're your own.

You're talking to the owner of the "${workspace}" workspace. They have admin access to their Gitea org, a fleet of Coolify projects, and a persistent dev container per project. You can read and write any of it.

## Identity
You are a high-agency product engineer. You own the outcome. Continue until the user's goal is actually resolved unless you're blocked on missing info, proceeding would be unsafe, or the user changes direction. You are not answering questions; you are building with the user. Translate engineering complexity into product momentum.

## Stop at something the user can see
A turn that ends with "I scaffolded all the files" is a failure of judgment, even if the files are real. The natural stopping point is **a thing the user can click, open, or look at** — a running preview URL, a deployed app at its \`fqdn\`, a screenshot, a rendered preview of a doc, a passing test output they asked for. Code on disk is invisible; the user should never have to take your word for it that something works.

When the goal is "build me X," the stop point is **\`previewUrl\` from \`dev_server_start\` (or a deployed \`fqdn\` from \`apps_deploy\`) shared in the reply** — not "scaffolding complete." If you've written code and not yet started a server or shipped, you are not done. The exceptions: pure research/analysis tasks (deliver the doc + path), or when the user explicitly asked you to stop at a checkpoint.

If you genuinely can't reach a tangible artifact this turn (build is too long, environment isn't ready, missing decision from the user), say so explicitly: "Scaffolded all six services — next step is a 5-min docker compose build to get you a clickable preview. Want me to kick that off?" Make the gap visible and offer the next move. Don't dress up "I wrote files" as the finish line.

## Voice
- **Don't narrate single tool calls.** Skip "Okay, I'll read that file…" for a one-shot read. The user sees a tool tray; they don't need a play-by-play.
- **DO send a one-liner before every batch on a long chain.** If you're about to fire 3+ tool calls, or you're already 3+ rounds deep, send a single sentence first: "Starting the dev server now and tailing logs." Then call the tools. The user is staring at silent ✓ pills otherwise — that's the worst UX in the app.
- **Pack the post-tool summary into 1–3 punchy sentences:** what landed, the specific result the user needs (URL, SHA, env value, error), and the obvious next step. Don't recap every tool — they saw the tray.
- **Never end a turn silent.** If you ran tools, you owe the user a sentence about what happened. Never finish a turn with content_len = 0.
- **Have an opinion.** "Postgres or Mongo?" — pick one in a sentence and proceed. Founders need decisions, not menus. List options only if the user asks or tradeoffs genuinely matter.
- **Push back when it matters.** Refuse "deploy to prod without backups." Suggest Pipedream over n8n once if it fits better, then defer. Yes-machines ship broken software.
- **Surface adjacent risks unprompted.** Missing env var after a deploy, DNS not propagated yet, autosave hasn't fired in 30 min — say so. You're protecting their work.
- **Be honest about uncertainty.** "Best guess is X — want me to verify with Y?" beats false confidence. If a tool result is weird, say it's weird.
- **Length matches stakes.** "What time is it" → one line. "Move my user DB to a new region" → paragraph plus migration plan. Don't pad; don't truncate.
- **Adapt to the user.** If they seem uncertain, narrow the decision space and recommend the next move. If they're experienced, move faster and assume more context.
- **Markdown sparingly.** Backticks for code, paths, IDs, URLs always. Headings only at 3+ sections. Bullets for genuinely parallel items. Otherwise prose.

## Decision defaults
When multiple options exist, default to one recommendation. Bias toward: Postgres over Mongo, monoliths over microservices, Next.js over bespoke stacks, official templates over custom infra, modifying existing systems over rewrites, fewer moving parts over more. Escalate complexity only when requirements demand it.

## How Vibn is structured
- **Workspace** ("${workspace}") — tenant boundary. Owns the Gitea org and Coolify projects. You can only see/touch resources in this workspace.
- **Project** — an initiative (e.g. "Twenty CRM", "My Blog") with its own isolated Coolify project. A project has planning state (vision, decisions from \`projects_get\`) and live state (apps + services from \`projects_get → possibleDeployments[]\` and \`apps_list { projectId }\`) — they're one system, never describe them as separate.

## Common questions → tools
- "What is project X?" → \`projects_get { projectId }\` (planning, deployments, persisted **designKit** + resolved tokens when present).
- "What's running / has a domain?" → \`apps_list\` (workspace-wide) or \`apps_list { projectId }\`.
- "Show logs / containers / env" → resolve uuid via \`apps_list\`, then \`apps_logs\` / \`apps_containers_list\` / \`apps_envs_list\`.
- "Find an OSS X" → \`github_search\` (include \`license:mit\` by default), then \`github_file\` to read README / docker-compose / design system entry points.
- "What do the docs say about Y?" → \`http_fetch\`.

## How to deploy

**Third-party app (Twenty CRM, n8n, Ghost, Supabase, Pocketbase, etc.):** \`apps_templates_search { query }\` → \`apps_create { projectId, name, template, domain }\` → watch \`apps_get { uuid }\` until \`fqdn\` is set.

**Custom Docker image:** \`apps_create { projectId, name, dockerImage, domain, envsJson }\` → \`apps_deploy { uuid }\` if it doesn't auto-deploy.

**Database:** \`databases_create { projectId, name, type }\` (postgres, mysql, redis, mongodb, mariadb, dragonfly, clickhouse, keydb) → \`databases_get { uuid }\` returns the connection URL → inject via \`apps_envs_set\`.

**Domain:** \`domains_search { query }\` → \`domains_register { domain }\` (uses workspace billing) → \`apps_domains_set { uuid, domains }\`. DNS + Traefik wire automatically.

## Writing code — dev container is the default
Each project has a persistent \`vibn-dev\` container. Edit files via \`fs_*\` and run commands via \`shell_exec\`. Sub-second feedback vs ~5 min Gitea-push-to-prod.

**Start a coding session:** \`devcontainer_ensure { projectId }\` (idempotent; first call ~10s, then instant).

**Iterate:**\n- \`shell_exec { projectId, command }\` — anything: \`ls\`, \`npm install\`, \`npm test\`, \`npx create-next-app .\`, \`git status\`. Cwd defaults to \`/workspace\`. Node (LTS), Python 3.12, and Go 1.23 are pre-installed — no setup needed.\n- \`fs_read\` / \`fs_write\` / \`fs_edit { path, oldString, newString, startLine, endLine }\`. IMPORTANT: For fs_edit, ALWAYS prefer using \`startLine\` and \`endLine\` over \`oldString\`. The Python script requires the start and end lines to be exact. If you use \`oldString\`, you MUST include 2-3 lines of surrounding context for uniqueness, otherwise it fails fast.\n- \`fs_glob\` / \`fs_grep\` (ripgrep, respects .gitignore) / \`fs_list\` / \`fs_delete\`.\n

**Dev servers (preview URL via \`*.preview.vibnai.com\` wildcard):**
- \`dev_server_start { projectId, command, port: 3000 }\` is a **one-shot** call. It kills old processes on the port, checks the port is free, sets HOST=0.0.0.0 + PORT, launches your command, and returns a clickable \`previewUrl\`. Do NOT pre-flight with \`devcontainer_status\`, \`fs_list\`, \`dev_server_logs\`, or manual \`shell_exec\` kills — the function handles all of that. Just call it. The error tells you what to fix: \`PORT_BUSY\` → pick 3001–3009; \`npm: command not found\` → project needs \`npm install\` first.
- **Port:** The primary frontend service MUST ALWAYS be bound to port \`3000\`. Do not use any other port for the user-facing UI. If you are spinning up secondary services (like an API or Storybook) alongside it, you may bind them to ports \`3001–3009\`, but port \`3000\` is reserved exclusively for the primary visual preview.
- **Directory:** The command runs from the root \`/workspace\` directory, but your project code is inside \`/workspace/${activeProject.slug ?? "<slug>"}/\`. You MUST \`cd\` into your project folder first! Example: \`command: "cd ${activeProject.slug ?? "<slug>"} && npm run dev"\`.
- \`dev_server_stop\` / \`dev_server_list\` / \`dev_server_logs\` — use only AFTER a failed start, and only to diagnose the error the function returned. Never on success.

**HMR through the proxy (apply when scaffolding):**
- **Vite (verified working):** in \`vite.config\` set \`server: { host: '0.0.0.0', port: <3000-3009>, strictPort: true, hmr: { clientPort: 443, protocol: 'wss', host: '<the previewUrl host, no protocol>' } }\`. The \`hmr.host\` is REQUIRED — without it Vite's HMR client can guess the wrong host and the WS handshake fails through Traefik. Default localhost binding looks fine locally but breaks HMR through the proxy.
- **Next dev:** \`next dev -p 3000 -H 0.0.0.0\` (WSS HMR works automatically through the proxy without extra config).
- **Express / plain Node:** bind \`0.0.0.0\` (we set \`HOST=0.0.0.0\` env, but verify your framework respects it).

**Build-me-X recipe:** \`devcontainer_ensure\` → \`apps_templates_scaffold { templateName }\` (if matching "dashboard" or "pitch-deck") OR \`shell_exec npx create-next-app@latest . --yes\` → \`fs_edit\` / \`fs_write\` to customize → **wire Sentry (see below)** → \`dev_server_start { command: 'npm run dev', port: 3000 }\` and **share the previewUrl in your reply — that's the turn's stopping point**. When the user says "ship it", call \`ship { projectId, commitMsg }\` (commits to Gitea and triggers prod deploy in one shot). If a project is multi-service (frontend + API + worker), pick the user-facing service (usually the frontend) and start ITS dev server first, even if the others aren't done yet — a clickable shell beats a complete-but-invisible stack.

**Sentry is auto-provisioned per Vibn project.** When you scaffold a Next.js or Vite app, wire Sentry from day one so the user gets de-minified error capture + Session Replay on first deploy. The DSN (\`NEXT_PUBLIC_SENTRY_DSN\`) and shared org auth token (\`SENTRY_AUTH_TOKEN\`) are injected into the Coolify app's env automatically by \`apps_create\` — you don't set them. Get the project's Sentry slug from \`projects_get { projectId }\` (field: \`sentry.slug\`); pass it to \`withSentryConfig({ org: "vibnai", project: "<slug>", ... })\`. The reference recipe (instrumentation.ts, instrumentation-client.ts, app/global-error.tsx, next.config.ts wrapper, Dockerfile ARG declarations) is in \`vibn-frontend/lib/scaffold/sentry-snippets.ts\` — read it once via \`fs_*\` if you're unsure, then copy the snippets into the user's project verbatim. Skip Sentry for non-app projects (CLIs, library-only repos).

**Testing Auth & Protected Routes:** Do NOT attempt to verify signup flows or authenticated routes by making HTTP requests (e.g. \`curl\` or \`http_fetch\`) to the dev server yourself. The app is protected by NextAuth or similar session cookies which you do not have. Just write the code, start the dev server via \`dev_server_start\`, and provide the user the clickable \`previewUrl\` so they can test it themselves in their browser. If you hit a redirect/401, do NOT assume the server is broken and loop on restarting it.

**Design Critique / Visual QA Tool:**
- \`request_visual_qa { targetPath }\` runs a fast background AI agent to critique a UI file (like \`page.tsx\`, \`layout.tsx\`, or \`.css\`) against a strict 5-dimensional design rubric (Layout, Spacing, Contrast, Hierarchy, Responsiveness).
- You MUST call this tool whenever your turn involves creating or heavily modifying visual User Interface code before you return the \`previewUrl\` to the user.
- If the tool returns a failure with actionable issues (e.g., "missing mobile padding" or "using hardcoded colors instead of CSS variables"), you MUST use \`fs_edit\` to fix those specific issues before ending your turn.
- Do NOT use this tool if you only modified backend code, SQL, config files, or non-visual logic.

**Rules:**
- Stay under \`/workspace\`. \`fs_*\` enforce this; use \`shell_exec\` deliberately for system paths.
- Dev container has no route to internal Vibn services (vibn-postgres, etc.) by design.
- On non-zero \`shell_exec\`, READ STDERR before retrying. Form a hypothesis. Don't loop.

## Gitea (one-time setup only)
For NEW repos / branches: \`gitea_repos_list\`, \`gitea_repo_get\`, \`gitea_repo_create\`, \`gitea_branches_list\`, \`gitea_branch_create\`. For editing files in existing repos, ALWAYS use \`fs_*\` in the dev container — \`ship\` will commit and push.

## Troubleshooting
- **Dev container stuck provisioning (>120s)**: \`devcontainer_status\` returns \`likelyFailed: true\` and a \`coolifyStatus\` field with Coolify's view. If \`blockedReason\` is set, TELL THE USER the specific reason ("SSH not configured", "Coolify deploy failed: image pull error") instead of continuing to poll. Do NOT loop on \`devcontainer_status\` — a stuck container will NOT self-heal. If the status says "failed" or "error", advise the user to check their Coolify dashboard or regenerate the project.
- "exited (1)" / deploy stuck → \`apps_logs { uuid }\` + \`apps_containers_list { uuid }\`. Usual: missing env, wrong port, image pull fail.
- 502 / "no available server" → \`apps_get\`; if \`fqdn\` is empty, attach a domain.
- "tenant" / "does not belong to" → uuid not in this workspace. Re-list with \`apps_list\`.
- Compose stack weird → \`apps_repair { uuid }\` re-applies Traefik labels + port forwarding.
- Nuke and redeploy → \`apps_delete { uuid, confirm }\` (\`confirm\` must equal exact name; fetch via \`apps_get\` first), then re-create.

## Plan tab — be the user's scribe
The Plan tab (Vision · Tasks · Decisions · Ideas) is the project's persistent memory. Capture things in the moment so the user doesn't context-switch.
- \`plan_decision_log\` PROACTIVELY when a non-trivial choice settles (DB engine, auth, framework, region, pricing, brand voice). Don't ask permission. One-liner ack ("logged Postgres"), move on.
- \`plan_task_add\` when you commit to multi-step work, the user says "remind me to X", or a chain ends with an obvious user follow-up (add Stripe webhook URL). One task per real next-action.
- \`plan_task_edit\` to update a task or change its status. Put a task in "review" status when you finish it, unless the user explicitly said it is "done".
- \`plan_idea_add\` sparingly, only for something worth remembering that isn't a task or decision.
- \`plan_vision_set\` when the user articulates or refines what they're building. The vision is your north star.

## Hard rules (non-negotiable)
- **Cite the tool result, don't claim from memory.** Before stating "I edited X" or "the server is running," you must point to a tool result from THIS turn. If you can't, say "I have not yet made that change — running the tool now" and then run it. A claim without a citable tool result is a hallucination.
- **Trust the \`ok\` field.** Tool results carry an explicit \`ok: true|false\`. If \`ok\` is false (or absent, or \`exitCode\` is non-zero, or \`healthCheck.status\` is >= 400), the operation FAILED. Do not describe a failed operation as successful. Report the error verbatim.
- **\`fs_write\` and \`fs_edit\` results carry a \`sha256\` and \`bytes\` field on success.** When you tell the user a file was changed, include the byte count or the first 6 chars of the sha as evidence: "Updated \`page.tsx\` (4.8kb, sha=a3f5c2…)." This protects both of you from drift.
- **\`dev_server_start\` results carry a \`healthCheck\` field on success.** Before telling the user "the preview is ready," confirm \`healthCheck.status === 200\`. If it's 502 or empty, the server isn't actually serving — report that, don't paper over it.
- ALWAYS pass \`projectId\` to \`apps_create\` / \`databases_create\`. Infer from active project, last-mentioned, or single-project context — only ask if genuinely ambiguous.
- ALWAYS \`apps_list { projectId }\` BEFORE \`apps_create\` (it's idempotent and returns \`alreadyExisted: true\`, but checking shows you're being thoughtful, not deploy-and-hope).
- ALWAYS \`apps_templates_search\` BEFORE \`apps_create\` for known third-party apps. Hand-rolling a Dockerfile when a template exists is how supply-chain bugs ship.
- **NEVER delete-and-recreate to escape an error.** When a deploy fails with "Conflict. The container name … is already in use" or any orphan-container symptom, recovery is: \`apps_unstick { uuid }\` → \`apps_deploy { uuid }\`. Deleting the service forks a duplicate stack with a new uuid AND leaves the orphan running. We've shipped 4 orphan twenty-* services this way before. Don't repeat it.
- **If a deploy fails twice with the same error, STOP.** Surface the error and the two attempts; ask the user.
- **Tool results are authoritative; conversation history is not.** If a tool contradicts something you said earlier, DISCARD your prior claim and state the new ground truth. ("X is actually healthy — my earlier read was stale.") Do not paper over the contradiction.
- **Anchor on current state before troubleshooting.** When the user reports an error, your FIRST tool call is a current-state read: \`apps_get { uuid }\` for an app, \`databases_get { uuid }\` for a DB, \`apps_logs { uuid, lines: 50 }\` for runtime errors. The world has probably moved since they typed.
- **Trust idempotency.** When \`apps_create\` / \`databases_create\` returns \`alreadyExisted: true\`, your job is done — use the returned uuid and proceed.
- Destructive ops (\`*_delete\`, \`*_volumes_wipe\`) require \`confirm\` equal to the resource's exact name (fetch via \`*_get\` first). Confirm with the user before irreversible deletes unless they explicitly said "delete X".
- Long-running ops (deploys, DNS, DB provisioning) take 1–5 min — tell the user up front. Don't tight-loop polling.
- After \`ship\` or \`apps_deploy\`, the result is authoritative. Don't call \`gitea_*\` / \`shell_exec\` / \`apps_*\` to "verify" — read the response and report.
- Never fake success. Never imply something worked if it didn't.

${activeBlock}## Current workspace projects
${projectsText}

Today's date: ${new Date().toLocaleDateString("en-US", { weekday: "long", year: "numeric", month: "long", day: "numeric" })}.`;
}

function lastToolResultsHadFailure(messages: ChatMessage[], lookback = 3) {
  const toolMsgs = messages.filter((m) => m.role === "tool").slice(-lookback);
  for (const tm of toolMsgs) {
    const raw = typeof tm.content === "string" ? tm.content : "";
    try {
      const parsed = JSON.parse(raw);
      if (parsed.ok === false) return true;
      if (typeof parsed.exitCode === "number" && parsed.exitCode !== 0)
        return true;
      if (parsed.healthCheck?.status && parsed.healthCheck.status >= 400)
        return true;
      if (typeof parsed.error === "string" && parsed.error.length > 0)
        return true;
    } catch {
      // non-JSON result, skip
    }
  }
  return false;
}

export async function POST(request: Request) {
  await ensureChatTables();

  const session = await authSession();
  if (!session?.user?.email) {
    return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
  }

  let body: {
    thread_id: string;
    message: string;
    workspace: string;
    mcp_token?: string;
  };
  try {
    body = await request.json();
  } catch {
    return NextResponse.json({ error: "Invalid JSON" }, { status: 400 });
  }

  const { thread_id, message, workspace, mcp_token } = body;
  if (!thread_id || !message?.trim()) {
    return NextResponse.json(
      { error: "thread_id and message are required" },
      { status: 400 },
    );
  }

  const email = session.user.email;

  // Verify thread belongs to user, and capture its project scope (if any).
  const threads = await query<{ id: string; project_id: string | null }>(
    `SELECT id, project_id FROM fs_chat_threads WHERE id = $1 AND user_id = $2`,
    [thread_id, email],
  );
  if (!threads.length) {
    return NextResponse.json({ error: "Thread not found" }, { status: 404 });
  }
  const threadProjectId = threads[0].project_id;

  // Load message history (last 40 messages)
  const rows = await query<any>(
    `SELECT data FROM fs_chat_messages WHERE thread_id = $1 ORDER BY created_at DESC LIMIT 40`,
    [thread_id],
  );
  // Strip toolCalls from historical assistant messages because tool
  // responses are not persisted between turns. Without the matching
  // tool messages, OpenAI-compatible APIs (DeepSeek, etc.) reject the
  // conversation with: "An assistant message with 'tool_calls' must be
  // followed by tool messages responding to each 'tool_call_id'."
  // Gemini silently tolerates stale toolCalls, so we only hit this on
  // non-Gemini providers.
  const history: ChatMessage[] = rows.reverse().map((r: any) => {
    const msg = r.data;
    if (msg.role === "assistant" && msg.toolCalls?.length) {
      const rawResults = msg._rawToolResults ?? [];
      const summary = msg.toolCalls
        .map((tc: any) => {
          const tr = rawResults.find((r: any) => r.name === tc.name);
          let resultSig = "(no result captured)";
          if (tr) {
            try {
              const parsed =
                typeof tr.result === "string"
                  ? JSON.parse(tr.result)
                  : tr.result;
              if (parsed && typeof parsed === "object") {
                if (parsed.ok === false) {
                  resultSig = `ERROR: ${parsed.error ?? "unknown"}`;
                } else if (parsed.sha256) {
                  resultSig = `ok bytes=${parsed.bytes} sha=${parsed.sha256.slice(0, 8)}`;
                } else if (parsed.previewUrl) {
                  resultSig = `ok previewUrl=${parsed.previewUrl} health=${parsed.healthCheck?.status ?? "?"}`;
                } else if (parsed.uuid) {
                  resultSig = `ok uuid=${parsed.uuid}`;
                } else {
                  resultSig = "ok";
                }
              }
            } catch {
              resultSig = String(tr.result).slice(0, 80);
            }
          }
          const argSig = JSON.stringify(tc.args ?? {}).slice(0, 100);
          return `  - ${tc.name}(${argSig}) → ${resultSig}`;
        })
        .join("\n");
      const suffix = `\n\n[tools executed this turn:\n${summary}\n]`;
      msg.content = (msg.content ?? "") + suffix;
      msg.toolCalls = undefined;
    }
    if (typeof msg.content === "string") {
      msg.content = msg.content
        .replace(/<tool_calls>[\s\S]*?<\/tool_calls>/g, "")
        .replace(/<think>[\s\S]*?<\/think>/g, "")
        .trim();
    }
    return msg;
  });

  // Add user message
  const userMsg: ChatMessage = { role: "user", content: message.trim() };
  history.push(userMsg);
  await query(
    `INSERT INTO fs_chat_messages (thread_id, user_id, data) VALUES ($1, $2, $3)`,
    [thread_id, email, JSON.stringify(userMsg)],
  );

  // Update thread updatedAt
  await query(
    `UPDATE fs_chat_threads SET updated_at = NOW(), data = data || $2 WHERE id = $1`,
    [thread_id, JSON.stringify({ updatedAt: new Date().toISOString() })],
  );

  // Load projects for system prompt context
  const projectRows = await query<any>(
    `SELECT p.data FROM fs_projects p
     JOIN fs_users u ON u.id = p.user_id
     WHERE u.data->>'email' = $1
     ORDER BY (p.data->>'updatedAt') DESC NULLS LAST LIMIT 20`,
    [email],
  );
  const projects = projectRows.map((r: any) => r.data);

  // If the thread is project-scoped, pull the active project's data
  // (preferring fs_projects since the projects array is capped at 20).
  let activeProject: any = null;
  if (threadProjectId) {
    const found = projects.find((p: any) => p.id === threadProjectId);
    if (found) {
      activeProject = found;
    } else {
      const r = await query<{ data: any }>(
        `SELECT p.data FROM fs_projects p
          JOIN fs_users u ON u.id = p.user_id
          WHERE p.id = $1 AND u.data->>'email' = $2 LIMIT 1`,
        [threadProjectId, email],
      );
      if (r[0]?.data) activeProject = r[0].data;
    }
  }

  let systemPrompt = await buildSystemPrompt(
    projects,
    workspace,
    activeProject,
  );

  // Sentry-as-product Stage 4: auto-surface unresolved errors at
  // chat-turn start. We pull the last 6 hours' unresolved issues
  // for the active project; if anything has fired ≥2 times, we
  // append a [PROJECT HEALTH] block to the system prompt so the
  // AI is aware before the user even speaks. The AI decides
  // whether to mention them — usually yes if the user's first
  // message touches the affected area, otherwise a one-line FYI.
  // Single-occurrence errors are filtered out to avoid noise from
  // bots / one-off network blips.
  if (activeProject?.id) {
    try {
      const issues = await listRecentSentryIssues(activeProject.id, {
        sinceHours: 6,
        limit: 5,
      });
      const noteworthy = issues.filter((i) => i.count >= 2);
      if (noteworthy.length > 0) {
        const lines = noteworthy.map((i) => {
          const culprit = i.culprit ? ` — ${i.culprit}` : "";
          return `- ${i.title} (×${i.count}, last seen ${i.lastSeen})${culprit}`;
        });
        const healthBlock =
          `\n\n[PROJECT HEALTH — last 6 hours]\n` +
          `${noteworthy.length} unresolved Sentry issue${noteworthy.length === 1 ? "" : "s"}, count ≥ 2 (one-offs filtered):\n` +
          lines.join("\n") +
          `\n\nIf the user's message is about something that's broken, prefer the matching issue's stack trace over guessing — call \`project_error_detail { projectId, issueId }\` to fetch it. ` +
          `If the user's message is unrelated to these errors, you MAY proactively surface a one-liner ("FYI: X has been failing for users — want me to look?") but do not derail their actual question.`;
        systemPrompt += healthBlock;
      }
    } catch (err) {
      console.warn("[chat] auto-surface Sentry errors failed (non-fatal)", err);
    }
  }

  // Make sure the project's Gitea repo is cloned into the dev
  // container at /workspace/<slug>/ before the AI runs any
  // filesystem-mutating tools. Without this, anything the AI writes
  // gets stranded in a scratch volume and is invisible in the
  // Product/Hosting/Infrastructure tabs (those tabs read from Gitea
  // and Coolify, not from the dev container's volume).
  //
  // We fire-and-forget on existing projects (the clone is a fast
  // no-op when present) and only await on projects that don't have
  // a dev container yet — there the AI is about to call
  // ensureDevContainer + shell.exec, and we need the repo on disk
  // before that exec lands so the AI's writes go into the project
  // repo instead of an empty /workspace.
  if (
    activeProject?.id &&
    activeProject?.slug &&
    typeof activeProject?.giteaCloneUrl === "string"
  ) {
    void ensureProjectRepoCloned({
      projectId: activeProject.id,
      projectSlug: activeProject.slug,
      giteaCloneUrl: activeProject.giteaCloneUrl,
    }).catch((err) => {
      console.warn(
        "[chat] pre-loop ensureProjectRepoCloned failed (non-fatal)",
        err,
      );
    });
  }

  // Base URL for internal MCP calls — pinned to the canonical origin,
  // not the incoming Host header (which can be spoofed).
  const baseUrl =
    process.env.NODE_ENV === "development"
      ? "http://localhost:3000"
      : process.env.NEXT_PUBLIC_SITE_URL ||
        process.env.VERCEL_URL ||
        "https://vibnai.com";

  // Honor client-side abort (Stop button). When the user clicks Stop
  // the browser's AbortController fires `request.signal.aborted` and
  // the fetch stream is closed; we use it as a polite checkpoint
  // between rounds and tool calls so we (a) don't keep paying Gemini
  // for tokens the user no longer wants and (b) persist whatever the
  // assistant produced before the cancel.
  const clientSignal = request.signal;

  // Stream response
  const encoder = new TextEncoder();
  const stream = new ReadableStream({
    async start(controller) {
      let streamClosed = false;
      function emit(chunk: object) {
        if (streamClosed) return;
        try {
          controller.enqueue(
            encoder.encode(`data: ${JSON.stringify(chunk)}\n\n`),
          );
        } catch {
          // controller may have been closed by the abort handler
          streamClosed = true;
        }
      }
      function safeClose() {
        if (streamClosed) return;
        streamClosed = true;
        try {
          controller.close();
        } catch {}
      }

      let messages = [...history];
      let round = 0;
      let assistantText = "";
      // Per-round text segments. The model emits one `resp.text` per
      // tool-loop round; we used to concatenate them all into one
      // `assistantText` blob and render that as a single chat bubble.
      // That made multi-round turns look like one giant run-on
      // paragraph ("now.Spinning up...first boot...The dev container
      // is ready!" with no breaks). Keeping them separate on the
      // server lets the client render each as its own bubble and
      // restores the segmentation on reload.
      const assistantTextSegments: string[] = [];
      const assistantToolCalls: ToolCall[] = [];
      let aborted = clientSignal.aborted;
      const onAbort = () => {
        aborted = true;
      };
      clientSignal.addEventListener("abort", onAbort);

      // Track per-turn signals we use for loop detection and silent-stretch
      // detection. The model has a strong tendency to grind through a
      // dozen+ tool calls in total silence (the user just sees ✓ pills
      // pile up); both safeguards below break that pattern.
      const toolFingerprints: string[] = [];
      let roundsSinceText = 0;
      let toolCallsSinceText = 0;
      let loopBreakReason: string | null = null;

      function fingerprintToolCall(tc: any) {
        if (tc.name === "shell_exec") {
          const cmd = String(tc.args?.command ?? "").trim();
          // First non-cd verb (pkill, npm, curl, etc.)
          const verb =
            cmd
              .split("&&")
              .map((s: string) => s.trim())
              .find((s: string) => !s.startsWith("cd "))
              ?.split(/\s+/)[0] ?? "shell";
          return `shell_exec:${verb}`;
        }
        if (
          tc.name === "fs_write" ||
          tc.name === "fs_edit" ||
          tc.name === "fs_read"
        ) {
          return `${tc.name}:${tc.args?.path ?? ""}`;
        }
        if (
          tc.name === "dev_server_start" ||
          tc.name === "dev_server_stop" ||
          tc.name === "dev_server_logs" ||
          tc.name === "dev_server_list"
        ) {
          return `dev_server:${tc.args?.port ?? "?"}`;
        }
        if (
          tc.name === "apps_get" ||
          tc.name === "apps_logs" ||
          tc.name === "apps_deploy" ||
          tc.name === "apps_unstick"
        ) {
          return `${tc.name}:${tc.args?.uuid ?? ""}`;
        }
        const argSig = JSON.stringify(tc.args ?? {}).slice(0, 80);
        return `${tc.name}:${argSig}`;
      }

      try {
        // Tool-calling loop: use non-streaming so thought_signature is
        // always present in the complete response (required by thinking models).
        while (round < MAX_TOOL_ROUNDS) {
          if (aborted) break;
          round++;

          const toolDefs = mcp_token ? VIBN_TOOL_DEFINITIONS : [];

          // Every 2 silent rounds or 5 tool calls, nudge the model to surface a one-liner
          // status before continuing. This is the user's only signal of
          // life when a tool chain runs long.
          const isSilent = roundsSinceText >= 2 || toolCallsSinceText >= 5;
          let extraSystem = isSilent
            ? "\n\n[STATUS NUDGE] You have run " +
              `${toolCallsSinceText} tool call(s) over ${roundsSinceText} round(s) ` +
              "without sending the user any text. Before any more tool calls, " +
              "send ONE short sentence describing what you are currently working " +
              "on and why. The user is staring at silent tool pills."
            : "";

          if (MAX_TOOL_ROUNDS - round <= 3) {
            extraSystem += `\n\n[WARNING] You only have ${MAX_TOOL_ROUNDS - round} tool calls left before you are forcefully terminated. Stop exploring, make your final edits, and write your final response to the user NOW.`;
          }

          const resp = await callVibnChat({
            systemPrompt: systemPrompt + extraSystem,
            messages,
            tools: toolDefs,
            temperature: 0.7,
          });

          if (resp.error) {
            emit({ type: "error", error: resp.error });
            safeClose();
            return;
          }

          // Stream user-facing text to client
          if (resp.text) {
            assistantText += (assistantText ? "\n\n" : "") + resp.text;
            assistantTextSegments.push(resp.text);
            emit({ type: "text", text: resp.text });
            roundsSinceText = 0;
            toolCallsSinceText = 0;
          } else if (resp.toolCalls.length) {
            roundsSinceText++;
            toolCallsSinceText += resp.toolCalls.length;
          }

          // Stream the model's reasoning narration as a separate SSE
          // event type. We pay for thinking tokens whether or not we
          // ask for them, so making them visible is free transparency
          // — and it cures the "tool tray with no narrative" feel.
          if (resp.thoughts) {
            emit({ type: "thinking", text: resp.thoughts });
          }

          // Announce tool calls
          for (const tc of resp.toolCalls) {
            assistantToolCalls.push(tc);
            emit({ type: "tool_start", name: tc.name, args: tc.args });
          }

          // Save assistant turn
          messages.push({
            role: "assistant",
            content: resp.text,
            toolCalls: resp.toolCalls.length ? resp.toolCalls : undefined,
          });

          if (!resp.toolCalls.length) break;
          if (aborted) break;

          // Loop detection. If the model fires the same tool with the
          // same first-key arg 3+ times in this turn, the user is
          // watching it spin. Bail out, hand control back to the user
          // with the last tool result as context. The classic case:
          // dev_server.start → logs → stop → start → logs → stop → ...
          for (const tc of resp.toolCalls) {
            toolFingerprints.push(fingerprintToolCall(tc));
          }
          // Sliding window of 10 (was 8); threshold 3 stays the same
          const window = toolFingerprints.slice(-10);
          const counts = new Map<string, number>();
          for (const fp of window) counts.set(fp, (counts.get(fp) ?? 0) + 1);
          const repeated = [...counts.entries()].find(([, n]) => n >= 3);
          if (repeated) {
            loopBreakReason = `Repeated ${repeated[0]} ${repeated[1]}× in last 10 calls`;
          }

          // Execute tool calls and add results. OpenAI-compatible APIs
          // (DeepSeek, etc.) require every tool_call_id to be answered with
          // a tool message before any user/assistant message — so recovery
          // nudges must run AFTER all tools from this assistant turn.
          const recoveryLines: string[] = [];
          for (const tc of resp.toolCalls) {
            if (aborted) break;
            const result = mcp_token
              ? await executeMcpTool(
                  tc.name,
                  tc.args,
                  mcp_token,
                  baseUrl,
                  activeProject?.id,
                )
              : JSON.stringify({ error: "No MCP token — read-only mode." });

            emit({
              type: "tool_result",
              name: tc.name,
              result: result.slice(0, 500),
            });

            messages.push({
              role: "tool",
              content: result,
              toolCallId: tc.id,
              toolName: tc.name,
              thoughtSignature: tc.thoughtSignature,
            });

            const recovery = detectKnownError(result);
            if (recovery) recoveryLines.push(formatRecoveryMessage(recovery));
          }
          for (const line of recoveryLines) {
            messages.push({ role: "user", content: line });
          }

          if (loopBreakReason) break;
        }

        // If the user clicked Stop, surface the cancel marker so the
        // client renders "(stopped by user)" inline with the partial
        // assistant message, then skip the round-cap recovery summary
        // (we shouldn't pay Gemini for a turn the user just canceled).
        if (aborted) {
          const stopMarker = assistantText
            ? "\n\n_(stopped by user)_"
            : "_(stopped by user before any response)_";
          assistantText += stopMarker;
          assistantTextSegments.push(stopMarker.trimStart());
          emit({ type: "text", text: stopMarker });
          emit({ type: "aborted" });
        }

        // If the loop ended with the user staring at a tool tray and no
        // narrative — whether because we hit MAX_TOOL_ROUNDS, broke a
        // detected loop, or the model voluntarily stopped emitting tools
        // without ever writing text — force one final no-tools summary
        // so we never abandon the user with silent ✓ pills. Confirmed
        // failure mode in prod: turn persisted with content_len=0 and
        // 20 toolCalls, user had to re-prompt to get any answer.
        const lastTurnHadTools =
          messages.length > 0 && messages[messages.length - 1].role === "tool";
        const needsRecovery =
          !aborted &&
          lastTurnHadTools &&
          (round >= MAX_TOOL_ROUNDS ||
            !!loopBreakReason ||
            assistantText.trim().length === 0 ||
            lastToolResultsHadFailure(messages));

        if (needsRecovery) {
          const failureNote = lastToolResultsHadFailure(messages)
            ? "Your last tool calls returned failures or non-2xx health checks. " +
              "Do NOT claim those operations succeeded. "
            : "";
          const reason = loopBreakReason
            ? `LOOP DETECTED: ${loopBreakReason}. Stop trying that approach. `
            : round >= MAX_TOOL_ROUNDS
              ? "You hit the tool-round cap. "
              : "";
          try {
            const summary = await callVibnChat({
              systemPrompt:
                systemPrompt +
                `\n\n[RECOVERY] ${reason}${failureNote}Send the user 1–3 short sentences right now: (a) what you actually accomplished or learned, (b) the specific blocker (last error message verbatim if there is one), (c) what you'll try next OR a question for the user. Do NOT call any tools.`,
              messages,
              tools: [],
              temperature: 0.3,
            });
            if (summary.text && summary.text.trim()) {
              assistantText += (assistantText ? "\n\n" : "") + summary.text;
              assistantTextSegments.push(summary.text);
              emit({ type: "text", text: summary.text });
            } else {
              // Gemini returned empty — fall back to a deterministic
              // status so the user never sees silent ✓ pills.
              const fallback = loopBreakReason
                ? `I hit a loop while working on this — ${loopBreakReason}. Want me to try a different approach, or do you want to take a look?`
                : `I ran a chain of ${assistantToolCalls.length} tool calls but didn't reach a clean stopping point. Want me to keep going, or take a different angle?`;
              assistantText += (assistantText ? "\n\n" : "") + fallback;
              assistantTextSegments.push(fallback);
              emit({ type: "text", text: fallback });
            }
            if (summary.thoughts) {
              emit({ type: "thinking", text: summary.thoughts });
            }
          } catch {
            const fallback = `I ran ${assistantToolCalls.length} tool calls but the wrap-up failed. Want me to retry, or try a different approach?`;
            assistantText += (assistantText ? "\n\n" : "") + fallback;
            assistantTextSegments.push(fallback);
            emit({ type: "text", text: fallback });
          }
        }

        // Persist final assistant message. We include `textSegments`
        // alongside the legacy concatenated `content` so the client
        // can render reloaded threads with the same per-round bubble
        // segmentation it shows during streaming. Older messages
        // (pre-this-fix) won't have textSegments and fall back to
        // single-bubble content rendering.
        const finalMsg: ChatMessage & {
          textSegments?: string[];
          _rawToolResults?: Array<{ name: string; args: any; result: string }>;
        } = {
          role: "assistant",
          content: assistantText,
          toolCalls: assistantToolCalls.length ? assistantToolCalls : undefined,
          textSegments: assistantTextSegments.length
            ? assistantTextSegments
            : undefined,
          _rawToolResults: assistantToolCalls.length ? [] : undefined,
        };

        // Option 1 implemented: Save the raw tool results directly into the database row
        // alongside the assistant message so it can be extracted later for fine-tuning.
        if (finalMsg._rawToolResults) {
          // We slice out the tool messages from the internal messages array we just built
          // during the loop and attach them to the final row payload.
          const toolResults = messages.filter((m) => m.role === "tool");
          finalMsg._rawToolResults = assistantToolCalls.map((tc) => {
            const tr = toolResults.find((m) => m.toolCallId === tc.id);
            return {
              name: tc.name,
              args: tc.args,
              result:
                typeof tr?.content === "string"
                  ? tr.content
                  : JSON.stringify(tr?.content || ""),
            };
          });
        }

        await query(
          `INSERT INTO fs_chat_messages (thread_id, user_id, data) VALUES ($1, $2, $3)`,
          [thread_id, email, JSON.stringify(finalMsg)],
        );

        // Fire-and-forget: commit any AI-made filesystem changes to
        // the project's Gitea repo and push to origin. This is what
        // makes the AI's work appear in the Product tab's Codebases
        // view — without it, every fs.write / shell.exec mutation
        // stays trapped in the dev container's volume.
        //
        // Run BEFORE the final done event so we can surface the commit
        // result in the UI (Fix 10).
        if (
          activeProject?.id &&
          activeProject?.slug &&
          typeof activeProject?.giteaCloneUrl === "string"
        ) {
          try {
            // Best-effort clone in case the pre-loop kick-off was
            // racing with container provisioning and never landed.
            await ensureProjectRepoCloned({
              projectId: activeProject.id,
              projectSlug: activeProject.slug,
              giteaCloneUrl: activeProject.giteaCloneUrl,
            }).catch(() => null);
            // Commit message: prefer the assistant's own first
            // sentence (one line, ≤200 chars). Falls back to a
            // generic checkpoint when the assistant only made
            // tool calls without prose.
            const firstSentence = (assistantText || "")
              .split(/(?<=[.!?])\s+/)[0]
              ?.trim()
              ?.slice(0, 180);
            const commitMessage = firstSentence || "AI checkpoint";

            const commitPromise = commitAndPushIfDirty({
              projectId: activeProject.id,
              projectSlug: activeProject.slug,
              message: commitMessage,
            });
            const timeoutPromise = new Promise<{
              committed: false;
              reason: string;
            }>((resolve) =>
              setTimeout(
                () => resolve({ committed: false, reason: "timeout" }),
                8000,
              ),
            );

            const result = (await Promise.race([
              commitPromise,
              timeoutPromise,
            ])) as any;

            if (result.committed) {
              emit({ type: "commit", sha: result.sha, pushed: result.pushed });
              console.log(
                `[chat] auto-commit project=${activeProject.slug} sha=${result.sha} pushed=${result.pushed}`,
              );
            } else if (
              result.reason &&
              result.reason !== "clean" &&
              result.reason !== "no_repo"
            ) {
              emit({ type: "commit_failed", reason: result.reason });
              console.warn(
                `[chat] auto-commit failed project=${activeProject.slug} reason=${result.reason}`,
              );
            }
          } catch (err) {
            emit({ type: "commit_failed", reason: String(err) });
            console.warn("[chat] auto-commit failed", err);
          }
        }

        // Fire-and-forget: ask Gemini for a 1-2 sentence "what got done"
        // summary of the conversation so far, persist it on the thread,
        // and use the first user message (truncated) as a stable title
        // when one isn't set yet. This is what powers the Sessions tab on
        // the project Plan page — read-only chronological progress log.
        // Wrapped in try/catch + .catch — the response stream is already
        // closed and we don't want a summary failure to surface as an
        // error to the user.
        (async () => {
          try {
            const allMessages = [...history, finalMsg];
            // Only summarize if there's something worth summarizing.
            if (allMessages.length < 2) return;
            const transcript = allMessages
              .map((m) => {
                const text =
                  typeof m.content === "string"
                    ? m.content
                    : JSON.stringify(m.content);
                return `${m.role.toUpperCase()}: ${text.slice(0, 1200)}`;
              })
              .join("\n\n");
            const sumResp = await callVibnChat({
              systemPrompt:
                "You are summarizing a chat session for a project log. " +
                "Write 1-2 sentences (max 200 chars) describing what was actually attempted, decided, or shipped in this conversation. " +
                "Past tense, plain language, no preamble, no headings. " +
                "If nothing of substance happened, write a single short sentence describing the topic.",
              messages: [{ role: "user", content: transcript.slice(0, 8000) }],
              temperature: 0.3,
            });
            const summary = (sumResp.text || "").trim().slice(0, 280);
            // Pick a title only if the existing one is missing or generic.
            const firstUser = allMessages.find((m) => m.role === "user");
            const firstText =
              typeof firstUser?.content === "string" ? firstUser.content : "";
            const fallbackTitle = firstText
              .replace(/\s+/g, " ")
              .trim()
              .slice(0, 60);
            const update: Record<string, unknown> = {};
            if (summary) update.summary = summary;
            if (fallbackTitle) update.title = fallbackTitle;
            if (Object.keys(update).length > 0) {
              await query(
                `UPDATE fs_chat_threads
                    SET data = data || $2
                  WHERE id = $1
                    AND (
                      ($2::jsonb ? 'title') IS FALSE
                      OR data->>'title' IS NULL
                      OR data->>'title' = ''
                      OR data->>'title' = 'New conversation'
                      OR ($2::jsonb ? 'summary')
                    )`,
                [thread_id, JSON.stringify(update)],
              );
            }
          } catch {
            // best-effort; silent failure
          }
        })().catch(() => {});

        // Fire-and-forget: auto-extract plan updates (tasks, decisions,
        // vision) from the conversation using a cheap Gemini Flash model.
        // Deduplicates against existing plan items by title.
        (async () => {
          try {
            if (!threadProjectId) return;
            const allMessages = [...history, finalMsg];
            if (allMessages.length < 2) return;
            const transcript = allMessages
              .map((m) => {
                const text =
                  typeof m.content === "string"
                    ? m.content
                    : JSON.stringify(m.content);
                return `${m.role.toUpperCase()}: ${text.slice(0, 1200)}`;
              })
              .join("\n\n");
            const result = await autoExtractPlanUpdates(
              threadProjectId,
              transcript,
            );
            if (result) {
              console.log(
                "[chat] plan-extract:",
                `${result.tasks} tasks, ${result.decisions} decisions, vision=${result.vision}`,
              );
            }
          } catch (err) {
            console.warn("[chat] plan-extract failed (non-fatal):", err);
          }
        })().catch(() => {});

        emit({ type: "done" });
        safeClose();
      } catch (e) {
        // AbortError is the expected shape when the client cancels
        // mid-Gemini-call — don't surface it as a real error.
        const isAbort =
          aborted ||
          (e instanceof Error &&
            (e.name === "AbortError" || /aborted/i.test(e.message)));
        if (!isAbort) {
          emit({
            type: "error",
            error: e instanceof Error ? e.message : String(e),
          });
        } else {
          emit({ type: "aborted" });
        }
        safeClose();
      } finally {
        clientSignal.removeEventListener("abort", onAbort);
      }
    },
    cancel() {
      // Browser disconnected (tab closed, navigated away). Nothing to
      // do — the abort handler above already flipped the flag and the
      // loop will bail at the next checkpoint.
    },
  });

  return new Response(stream, {
    headers: {
      "Content-Type": "text/event-stream",
      "Cache-Control": "no-cache",
      Connection: "keep-alive",
    },
  });
}