/** * POST /api/chat * * Streaming chat endpoint. Accepts a thread_id + user message, * loads history, calls Gemini 3.1 Pro, runs the tool loop, * persists messages, and streams SSE back to the client. * * SSE event shapes: * data: {"type":"text","text":"..."} * data: {"type":"tool_start","name":"...","args":{}} * data: {"type":"tool_result","name":"...","result":"..."} * data: {"type":"done"} * data: {"type":"error","error":"..."} */ import { NextResponse } from 'next/server'; import { authSession } from '@/lib/auth/session-server'; import { query } from '@/lib/db-postgres'; import { callGeminiChat, streamGeminiChat } from '@/lib/ai/gemini-chat'; import { VIBN_TOOL_DEFINITIONS, executeMcpTool } from '@/lib/ai/vibn-tools'; import type { ChatMessage, ToolCall } from '@/lib/ai/gemini-chat'; // Bumped from 6 to 12 because Path B chains (devcontainer.ensure → // fs.read → fs.edit → kill → start → curl → logs) routinely fire 7-10 // tool calls in one user turn. When the cap IS hit, we still emit a // narrative summary instead of leaving the user staring at a tool tray // (see the no-tools follow-up call below). const MAX_TOOL_ROUNDS = 18; let chatTablesReady = false; async function ensureChatTables() { if (chatTablesReady) return; await query(` CREATE TABLE IF NOT EXISTS fs_chat_threads ( id TEXT PRIMARY KEY DEFAULT gen_random_uuid()::text, user_id TEXT NOT NULL, workspace TEXT NOT NULL DEFAULT '', data JSONB NOT NULL DEFAULT '{}', created_at TIMESTAMPTZ NOT NULL DEFAULT now(), updated_at TIMESTAMPTZ NOT NULL DEFAULT now() ); CREATE INDEX IF NOT EXISTS fs_chat_threads_user_ws_idx ON fs_chat_threads (user_id, workspace, updated_at DESC); CREATE TABLE IF NOT EXISTS fs_chat_messages ( id BIGSERIAL PRIMARY KEY, thread_id TEXT NOT NULL REFERENCES fs_chat_threads(id) ON DELETE CASCADE, user_id TEXT NOT NULL, data JSONB NOT NULL DEFAULT '{}', created_at TIMESTAMPTZ NOT NULL DEFAULT now() ); CREATE INDEX IF NOT EXISTS fs_chat_messages_thread_idx ON fs_chat_messages (thread_id, created_at ASC); `, []); chatTablesReady = true; } function buildSystemPrompt(projects: any[], workspace: string): string { const projectsText = projects.length ? projects .map( (p: any) => `- "${p.productName || p.name}" (id: ${p.id}, status: ${p.status || 'defining'})${p.productVision ? ': ' + p.productVision.slice(0, 120) : ''}`, ) .join('\n') : '(no projects yet)'; return `You are Vibn AI, an expert product and infrastructure assistant embedded in the Vibn platform. You are talking to the owner of the "${workspace}" workspace. ## How Vibn is structured - **Workspace** ("${workspace}") — the tenant boundary. One per user. Owns the Gitea org and a fleet of Coolify projects. You can ONLY see and touch resources in this workspace. - **Project** — an initiative the user is building (e.g. "Twenty CRM", "My Blog"). Each project has its OWN isolated Coolify project, so all its apps + databases + services are grouped together. A project has two facets that are part of ONE thing — never describe them as separate: - Planning side: name, vision/objectives, requirements (from \`projects_get\`) - Live side: deployed apps + services (from \`projects_get → possibleDeployments[]\` and \`apps_list { projectId }\`) ## How to answer questions - "What is project X?" → \`projects_get { id }\`. The result includes both planning details and the linked deployments. - "What's running / what has a domain?" → \`apps_list\` (no args) for everything in the workspace, or \`apps_list { projectId }\` for one project. - "Show me logs / containers / env" → resolve the app uuid first via \`apps_list\`, then call \`apps_logs\` / \`apps_containers_list\` / \`apps_envs_list\`. - "Find an open source X" → \`github_search\` (always include \`license:mit\` unless the user says otherwise), then \`github_file\` to read READMEs / docker-compose.yml / design system entry points before recommending. - "What's our docs say about Y?" → \`http_fetch\` against the relevant URL. ## How to deploy **Third-party app (Twenty CRM, n8n, Ghost, Supabase, Pocketbase, etc.)** 1. \`apps_templates_search { query }\` — find the official one-click template. 2. \`apps_create { projectId, name, template, domain }\` — deploy from template into the right project's Coolify namespace. 3. Watch \`apps_get { uuid }\` for status; surface the live URL once \`fqdn\` is set. **Custom Docker image** 1. \`apps_create { projectId, name, dockerImage, domain, envsJson }\`. 2. \`apps_deploy { uuid }\` if it doesn't auto-deploy. **Database** 1. \`databases_create { projectId, name, type }\` (type: postgres, mysql, redis, mongodb, mariadb, dragonfly, clickhouse, keydb). 2. \`databases_get { uuid }\` returns the internal connection URL — inject it into the app via \`apps_envs_set\`. **Domain** 1. \`domains_search { query }\` to check availability + price. 2. \`domains_register { domain }\` to buy it (uses workspace billing). 3. \`apps_domains_set { uuid, domains }\` to attach. DNS + Traefik are wired automatically. ## Writing code (PREFERRED: dev container, shell-first) Each Vibn project has a persistent **dev container** (\`vibn-dev\`) running on Coolify. You write code by \`shell_exec\`-ing inside it and editing files with \`fs_*\` tools. This is dramatically faster than committing to Gitea and waiting for redeploys (sub-second feedback vs ~5 min). **Always start a coding session with**: 1. \`devcontainer_ensure { projectId }\` — idempotent. First call ~10s (provisions a Coolify service); subsequent calls return immediately. **Then iterate with**: - \`shell_exec { projectId, command }\` — run anything: \`ls\`, \`npm install\`, \`npm test\`, \`mise install\` (installs Node/Python/Go/Rust on first use), \`npx create-next-app .\`, \`git status\`. Cwd defaults to \`/workspace\`. - \`fs_read { projectId, path }\` — inspect a file. - \`fs_write { projectId, path, content }\` — create or overwrite a file. - \`fs_edit { projectId, path, oldString, newString }\` — surgical search/replace. Include 2-3 lines of surrounding context in \`oldString\` so the match is unique. Fails fast if missing or non-unique. - \`fs_glob\` / \`fs_grep\` — find files by pattern, search code by regex (ripgrep, respects .gitignore). - \`fs_list\`, \`fs_delete\` — directory listing, delete. **Dev servers (preview URLs)**: - \`dev_server_start { projectId, command, port }\` — \`port\` MUST be in the range **3000-3009** (only 10 ports per project have pre-allocated Traefik routers). Pick 3000 for the primary app; use 3001-3009 only when the user is running multiple servers concurrently (e.g. frontend + API). The returned \`previewUrl\` is the public URL once DNS is wired. - \`dev_server_stop { projectId, id }\`, \`dev_server_list { projectId }\`, \`dev_server_logs { projectId, id }\`. - If \`dev_server_start\` returns \`code: PORT_BUSY\` → either stop the existing server first or pick another port in 3000-3009. Don't blindly retry the same port. **Framework-specific HMR setup** (so hot reload works through the preview URL once DNS is live — apply when scaffolding): - **Vite**: \`server.host: '0.0.0.0'\`, \`server.hmr.clientPort: 443\`, \`server.hmr.protocol: 'wss'\`. Vite's default localhost binding will appear to work but break HMR through Traefik. - **Next dev**: \`next dev -p 3000 -H 0.0.0.0\`. Next handles WSS HMR automatically through proxies. - **Express / plain Node**: bind \`0.0.0.0\` (we set \`HOST=0.0.0.0\` env automatically, but verify the framework respects it). **End-to-end recipe for "build me X"**: 1. \`devcontainer_ensure { projectId }\`. 2. \`shell_exec { projectId, command: 'npx create-next-app@latest . --yes' }\` (or whichever scaffold fits — search GitHub first if the user wants an OSS starting point). 3. \`shell_exec\` to run \`npm install\`, then iterate with \`fs_edit\` / \`fs_write\` to customize. 4. \`shell_exec { command: 'npm run dev -- --port 3000' }\` to verify locally (preview URLs land in week 2). 5. When the user says "ship it" — for now, \`shell_exec\` a \`git add . && git commit -m "..." && git push\` to push to the Gitea repo, then \`apps_create\` to wire up the production deployment. (A dedicated \`ship\` tool lands soon.) **Rules**: - Stay under \`/workspace\`. The fs_* tools enforce this; for system paths use \`shell_exec\` deliberately. - The container has no route to internal Vibn services (vibn-postgres, etc.) by design. - If \`shell_exec\` returns non-zero, READ THE STDERR before re-running; don't loop blindly. ## Gitea repo orchestration (one-time setup) For creating new repos, branching, and listing what already exists: - \`gitea_repos_list\`, \`gitea_repo_get\`, \`gitea_repo_create\`. - \`gitea_branches_list\`, \`gitea_branch_create\`. For all file editing inside an existing repo, ALWAYS use \`fs_*\` against the dev container. The \`ship\` tool will then push your changes to Gitea in one commit. ## Troubleshooting - Deploy stuck or "exited (1)" → \`apps_logs { uuid }\` and \`apps_containers_list { uuid }\`. Common causes: missing env var, wrong port, image pull failure. - 502 / "no available server" → app probably has no public domain yet. Check \`apps_get\`; if \`fqdn\` is empty, attach a domain. - "tenant" / "does not belong to" errors → the uuid you passed isn't in this workspace. Re-list with \`apps_list\` to grab a valid one. - Compose stack acting weird → \`apps_repair { uuid }\` to re-apply post-deploy fixes (Traefik labels, port forwarding). - Need to nuke and re-deploy → \`apps_delete { uuid, confirm }\` (confirm must equal the app's exact name; fetch via \`apps_get\` first), then re-create. ## Hard rules - ALWAYS pass \`projectId\` to \`apps_create\` and \`databases_create\`. If the user didn't say which project, ask once, then proceed. - ALWAYS call \`apps_templates_search\` BEFORE \`apps_create\` when the user names a known third-party app — don't hand-roll a Docker image when a maintained template exists. - Destructive ops (\`*_delete\`, \`*_volumes_wipe\`) require \`confirm\` equal to the resource's exact name. Always fetch the name first with a \`*_get\` call. - Long-running ops (deploys, DNS provisioning, db provisioning) take 1–5 min. Tell the user up front so they don't think you're stuck. - Be concise and action-oriented. If the user says "deploy X", do it — don't write a tutorial. - After every tool call, summarize the result in 1–2 sentences. Don't dump raw JSON unless asked. - Format app names, URLs, env keys, UUIDs, and file paths in backticks. - If a tool errors and you don't understand why, say so honestly and suggest the next diagnostic call. ## Current workspace projects ${projectsText} Today's date: ${new Date().toLocaleDateString('en-US', { weekday: 'long', year: 'numeric', month: 'long', day: 'numeric' })}.`; } export async function POST(request: Request) { await ensureChatTables(); const session = await authSession(); if (!session?.user?.email) { return NextResponse.json({ error: 'Unauthorized' }, { status: 401 }); } let body: { thread_id: string; message: string; workspace: string; mcp_token?: string }; try { body = await request.json(); } catch { return NextResponse.json({ error: 'Invalid JSON' }, { status: 400 }); } const { thread_id, message, workspace, mcp_token } = body; if (!thread_id || !message?.trim()) { return NextResponse.json({ error: 'thread_id and message are required' }, { status: 400 }); } const email = session.user.email; // Verify thread belongs to user const threads = await query( `SELECT id FROM fs_chat_threads WHERE id = $1 AND user_id = $2`, [thread_id, email], ); if (!threads.length) { return NextResponse.json({ error: 'Thread not found' }, { status: 404 }); } // Load message history (last 40 messages) const rows = await query( `SELECT data FROM fs_chat_messages WHERE thread_id = $1 ORDER BY created_at DESC LIMIT 40`, [thread_id], ); const history: ChatMessage[] = rows.reverse().map((r: any) => r.data); // Add user message const userMsg: ChatMessage = { role: 'user', content: message.trim() }; history.push(userMsg); await query( `INSERT INTO fs_chat_messages (thread_id, user_id, data) VALUES ($1, $2, $3)`, [thread_id, email, JSON.stringify(userMsg)], ); // Update thread updatedAt await query( `UPDATE fs_chat_threads SET updated_at = NOW(), data = data || $2 WHERE id = $1`, [thread_id, JSON.stringify({ updatedAt: new Date().toISOString() })], ); // Load projects for system prompt context const projectRows = await query( `SELECT p.data FROM fs_projects p JOIN fs_users u ON u.id = p.user_id WHERE u.data->>'email' = $1 ORDER BY (p.data->>'updatedAt') DESC NULLS LAST LIMIT 20`, [email], ); const projects = projectRows.map((r: any) => r.data); const systemPrompt = buildSystemPrompt(projects, workspace); // Base URL for internal MCP calls const host = request.headers.get('host') || 'vibnai.com'; const proto = host.startsWith('localhost') ? 'http' : 'https'; const baseUrl = `${proto}://${host}`; // Honor client-side abort (Stop button). When the user clicks Stop // the browser's AbortController fires `request.signal.aborted` and // the fetch stream is closed; we use it as a polite checkpoint // between rounds and tool calls so we (a) don't keep paying Gemini // for tokens the user no longer wants and (b) persist whatever the // assistant produced before the cancel. const clientSignal = request.signal; // Stream response const encoder = new TextEncoder(); const stream = new ReadableStream({ async start(controller) { let streamClosed = false; function emit(chunk: object) { if (streamClosed) return; try { controller.enqueue(encoder.encode(`data: ${JSON.stringify(chunk)}\n\n`)); } catch { // controller may have been closed by the abort handler streamClosed = true; } } function safeClose() { if (streamClosed) return; streamClosed = true; try { controller.close(); } catch {} } let messages = [...history]; let round = 0; let assistantText = ''; const assistantToolCalls: ToolCall[] = []; let aborted = clientSignal.aborted; const onAbort = () => { aborted = true; }; clientSignal.addEventListener('abort', onAbort); try { // Tool-calling loop: use non-streaming so thought_signature is // always present in the complete response (required by thinking models). while (round < MAX_TOOL_ROUNDS) { if (aborted) break; round++; const toolDefs = mcp_token ? VIBN_TOOL_DEFINITIONS : []; const resp = await callGeminiChat({ systemPrompt, messages, tools: toolDefs, temperature: 0.7 }); if (resp.error) { emit({ type: 'error', error: resp.error }); controller.close(); return; } // Stream text to client if (resp.text) { assistantText += resp.text; emit({ type: 'text', text: resp.text }); } // Announce tool calls for (const tc of resp.toolCalls) { assistantToolCalls.push(tc); emit({ type: 'tool_start', name: tc.name, args: tc.args }); } // Save assistant turn messages.push({ role: 'assistant', content: resp.text, toolCalls: resp.toolCalls.length ? resp.toolCalls : undefined, }); if (!resp.toolCalls.length) break; if (aborted) break; // Execute tool calls and add results for (const tc of resp.toolCalls) { if (aborted) break; const result = mcp_token ? await executeMcpTool(tc.name, tc.args, mcp_token, baseUrl) : JSON.stringify({ error: 'No MCP token — read-only mode.' }); emit({ type: 'tool_result', name: tc.name, result: result.slice(0, 500) }); messages.push({ role: 'tool', content: result, toolCallId: tc.id, toolName: tc.name, thoughtSignature: tc.thoughtSignature, }); } } // If the user clicked Stop, surface the cancel marker so the // client renders "(stopped by user)" inline with the partial // assistant message, then skip the round-cap recovery summary // (we shouldn't pay Gemini for a turn the user just canceled). if (aborted) { const stopMarker = assistantText ? '\n\n_(stopped by user)_' : '_(stopped by user before any response)_'; assistantText += stopMarker; emit({ type: 'text', text: stopMarker }); emit({ type: 'aborted' }); } // If the loop exited because we hit MAX_TOOL_ROUNDS while the // model still wanted to call tools, the user has only seen a // tray of ✓ icons with no narrative. Force one final no-tools // call so we always end on a human-readable summary. const lastTurnHadTools = messages.length > 0 && messages[messages.length - 1].role === 'tool'; if (!aborted && round >= MAX_TOOL_ROUNDS && lastTurnHadTools) { try { const summary = await callGeminiChat({ systemPrompt: systemPrompt + '\n\nYou have just executed a chain of tool calls. Summarize the result for the user in 1-3 sentences. Do NOT call any more tools.', messages, tools: [], temperature: 0.3, }); if (summary.text) { assistantText += summary.text; emit({ type: 'text', text: summary.text }); } } catch { // Don't let a failed summary kill the stream. } } // Persist final assistant message const finalMsg: ChatMessage = { role: 'assistant', content: assistantText, toolCalls: assistantToolCalls.length ? assistantToolCalls : undefined, }; await query( `INSERT INTO fs_chat_messages (thread_id, user_id, data) VALUES ($1, $2, $3)`, [thread_id, email, JSON.stringify(finalMsg)], ); emit({ type: 'done' }); safeClose(); } catch (e) { // AbortError is the expected shape when the client cancels // mid-Gemini-call — don't surface it as a real error. const isAbort = aborted || (e instanceof Error && (e.name === 'AbortError' || /aborted/i.test(e.message))); if (!isAbort) { emit({ type: 'error', error: e instanceof Error ? e.message : String(e) }); } else { emit({ type: 'aborted' }); } safeClose(); } finally { clientSignal.removeEventListener('abort', onAbort); } }, cancel() { // Browser disconnected (tab closed, navigated away). Nothing to // do — the abort handler above already flipped the flag and the // loop will bail at the next checkpoint. }, }); return new Response(stream, { headers: { 'Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache', Connection: 'keep-alive', }, }); }