Files
vibn-frontend/app/api/chat/route.ts
Mark Henderson 4f84a19e75 feat(chat): Stop button to cancel in-flight AI turns
Standard chat-app pattern: while the AI is streaming or running
tools, the Send button morphs into a Stop control (filled square
inside a faded spinner). Click it (or press Esc) to abort the turn.

Why: with MAX_TOOL_ROUNDS=18, a confused tool-loop can chew through
60-90s of compute and tokens. The user had no way to interrupt — they
just watched ✓ icons accumulate. Stop fixes that.

How:

Client (chat-panel.tsx):
- abortRef holds the in-flight AbortController; lives in a ref so the
  Stop button can reach it without re-rendering on every chunk.
- sendMessage creates a fresh controller and passes signal to fetch.
- cancelMessage calls .abort(); also bound to Escape while sending.
- Button morph: while `sending`, render lucide Square overlaid on a
  faded Loader2 spin, switch onClick to cancelMessage, swap aria/title
  to "Stop generating (Esc)".
- Catch DOMException AbortError separately from network errors and
  append "(stopped by user)" to the partial assistant message.
- Textarea no longer disabled during streaming so users can queue
  the next prompt; Enter still won't submit until the turn ends.

Server (app/api/chat/route.ts):
- request.signal is captured before the ReadableStream and an `aborted`
  flag is flipped on the addEventListener('abort', ...) callback.
- Loop checkpoints `aborted` (a) at the top of every round, (b) before
  the inner tool-call loop, (c) before each individual executeMcpTool
  call. Picks the next safe boundary instead of yanking mid-call.
- On abort: emit a "(stopped by user)" text chunk + an "aborted" event,
  skip the round-cap recovery summary (don't pay for tokens the user
  just canceled), persist the partial assistant message normally.
- Fetch errors that come from the abort propagating into Gemini's HTTP
  client are recognized and downgraded from "error" to "aborted".
- safeClose() guards against double controller.close() when the abort
  races with normal completion.

Made-with: Cursor
2026-04-28 14:56:35 -07:00

418 lines
19 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* POST /api/chat
*
* Streaming chat endpoint. Accepts a thread_id + user message,
* loads history, calls Gemini 3.1 Pro, runs the tool loop,
* persists messages, and streams SSE back to the client.
*
* SSE event shapes:
* data: {"type":"text","text":"..."}
* data: {"type":"tool_start","name":"...","args":{}}
* data: {"type":"tool_result","name":"...","result":"..."}
* data: {"type":"done"}
* data: {"type":"error","error":"..."}
*/
import { NextResponse } from 'next/server';
import { authSession } from '@/lib/auth/session-server';
import { query } from '@/lib/db-postgres';
import { callGeminiChat, streamGeminiChat } from '@/lib/ai/gemini-chat';
import { VIBN_TOOL_DEFINITIONS, executeMcpTool } from '@/lib/ai/vibn-tools';
import type { ChatMessage, ToolCall } from '@/lib/ai/gemini-chat';
// Bumped from 6 to 12 because Path B chains (devcontainer.ensure →
// fs.read → fs.edit → kill → start → curl → logs) routinely fire 7-10
// tool calls in one user turn. When the cap IS hit, we still emit a
// narrative summary instead of leaving the user staring at a tool tray
// (see the no-tools follow-up call below).
const MAX_TOOL_ROUNDS = 18;
let chatTablesReady = false;
async function ensureChatTables() {
if (chatTablesReady) return;
await query(`
CREATE TABLE IF NOT EXISTS fs_chat_threads (
id TEXT PRIMARY KEY DEFAULT gen_random_uuid()::text,
user_id TEXT NOT NULL,
workspace TEXT NOT NULL DEFAULT '',
data JSONB NOT NULL DEFAULT '{}',
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
CREATE INDEX IF NOT EXISTS fs_chat_threads_user_ws_idx
ON fs_chat_threads (user_id, workspace, updated_at DESC);
CREATE TABLE IF NOT EXISTS fs_chat_messages (
id BIGSERIAL PRIMARY KEY,
thread_id TEXT NOT NULL REFERENCES fs_chat_threads(id) ON DELETE CASCADE,
user_id TEXT NOT NULL,
data JSONB NOT NULL DEFAULT '{}',
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
CREATE INDEX IF NOT EXISTS fs_chat_messages_thread_idx
ON fs_chat_messages (thread_id, created_at ASC);
`, []);
chatTablesReady = true;
}
function buildSystemPrompt(projects: any[], workspace: string): string {
const projectsText = projects.length
? projects
.map(
(p: any) =>
`- "${p.productName || p.name}" (id: ${p.id}, status: ${p.status || 'defining'})${p.productVision ? ': ' + p.productVision.slice(0, 120) : ''}`,
)
.join('\n')
: '(no projects yet)';
return `You are Vibn AI, an expert product and infrastructure assistant embedded in the Vibn platform.
You are talking to the owner of the "${workspace}" workspace.
## How Vibn is structured
- **Workspace** ("${workspace}") — the tenant boundary. One per user. Owns the Gitea org and a fleet of Coolify projects. You can ONLY see and touch resources in this workspace.
- **Project** — an initiative the user is building (e.g. "Twenty CRM", "My Blog"). Each project has its OWN isolated Coolify project, so all its apps + databases + services are grouped together. A project has two facets that are part of ONE thing — never describe them as separate:
- Planning side: name, vision/objectives, requirements (from \`projects_get\`)
- Live side: deployed apps + services (from \`projects_get → possibleDeployments[]\` and \`apps_list { projectId }\`)
## How to answer questions
- "What is project X?" → \`projects_get { id }\`. The result includes both planning details and the linked deployments.
- "What's running / what has a domain?" → \`apps_list\` (no args) for everything in the workspace, or \`apps_list { projectId }\` for one project.
- "Show me logs / containers / env" → resolve the app uuid first via \`apps_list\`, then call \`apps_logs\` / \`apps_containers_list\` / \`apps_envs_list\`.
- "Find an open source X" → \`github_search\` (always include \`license:mit\` unless the user says otherwise), then \`github_file\` to read READMEs / docker-compose.yml / design system entry points before recommending.
- "What's our docs say about Y?" → \`http_fetch\` against the relevant URL.
## How to deploy
**Third-party app (Twenty CRM, n8n, Ghost, Supabase, Pocketbase, etc.)**
1. \`apps_templates_search { query }\` — find the official one-click template.
2. \`apps_create { projectId, name, template, domain }\` — deploy from template into the right project's Coolify namespace.
3. Watch \`apps_get { uuid }\` for status; surface the live URL once \`fqdn\` is set.
**Custom Docker image**
1. \`apps_create { projectId, name, dockerImage, domain, envsJson }\`.
2. \`apps_deploy { uuid }\` if it doesn't auto-deploy.
**Database**
1. \`databases_create { projectId, name, type }\` (type: postgres, mysql, redis, mongodb, mariadb, dragonfly, clickhouse, keydb).
2. \`databases_get { uuid }\` returns the internal connection URL — inject it into the app via \`apps_envs_set\`.
**Domain**
1. \`domains_search { query }\` to check availability + price.
2. \`domains_register { domain }\` to buy it (uses workspace billing).
3. \`apps_domains_set { uuid, domains }\` to attach. DNS + Traefik are wired automatically.
## Writing code (PREFERRED: dev container, shell-first)
Each Vibn project has a persistent **dev container** (\`vibn-dev\`) running on Coolify. You write code by \`shell_exec\`-ing inside it and editing files with \`fs_*\` tools. This is dramatically faster than committing to Gitea and waiting for redeploys (sub-second feedback vs ~5 min).
**Always start a coding session with**:
1. \`devcontainer_ensure { projectId }\` — idempotent. First call ~10s (provisions a Coolify service); subsequent calls return immediately.
**Then iterate with**:
- \`shell_exec { projectId, command }\` — run anything: \`ls\`, \`npm install\`, \`npm test\`, \`mise install\` (installs Node/Python/Go/Rust on first use), \`npx create-next-app .\`, \`git status\`. Cwd defaults to \`/workspace\`.
- \`fs_read { projectId, path }\` — inspect a file.
- \`fs_write { projectId, path, content }\` — create or overwrite a file.
- \`fs_edit { projectId, path, oldString, newString }\` — surgical search/replace. Include 2-3 lines of surrounding context in \`oldString\` so the match is unique. Fails fast if missing or non-unique.
- \`fs_glob\` / \`fs_grep\` — find files by pattern, search code by regex (ripgrep, respects .gitignore).
- \`fs_list\`, \`fs_delete\` — directory listing, delete.
**Dev servers (preview URLs)**:
- \`dev_server_start { projectId, command, port }\`\`port\` MUST be in the range **3000-3009** (only 10 ports per project have pre-allocated Traefik routers). Pick 3000 for the primary app; use 3001-3009 only when the user is running multiple servers concurrently (e.g. frontend + API). The returned \`previewUrl\` is the public URL once DNS is wired.
- \`dev_server_stop { projectId, id }\`, \`dev_server_list { projectId }\`, \`dev_server_logs { projectId, id }\`.
- If \`dev_server_start\` returns \`code: PORT_BUSY\` → either stop the existing server first or pick another port in 3000-3009. Don't blindly retry the same port.
**Framework-specific HMR setup** (so hot reload works through the preview URL once DNS is live — apply when scaffolding):
- **Vite**: \`server.host: '0.0.0.0'\`, \`server.hmr.clientPort: 443\`, \`server.hmr.protocol: 'wss'\`. Vite's default localhost binding will appear to work but break HMR through Traefik.
- **Next dev**: \`next dev -p 3000 -H 0.0.0.0\`. Next handles WSS HMR automatically through proxies.
- **Express / plain Node**: bind \`0.0.0.0\` (we set \`HOST=0.0.0.0\` env automatically, but verify the framework respects it).
**End-to-end recipe for "build me X"**:
1. \`devcontainer_ensure { projectId }\`.
2. \`shell_exec { projectId, command: 'npx create-next-app@latest . --yes' }\` (or whichever scaffold fits — search GitHub first if the user wants an OSS starting point).
3. \`shell_exec\` to run \`npm install\`, then iterate with \`fs_edit\` / \`fs_write\` to customize.
4. \`shell_exec { command: 'npm run dev -- --port 3000' }\` to verify locally (preview URLs land in week 2).
5. When the user says "ship it" — for now, \`shell_exec\` a \`git add . && git commit -m "..." && git push\` to push to the Gitea repo, then \`apps_create\` to wire up the production deployment. (A dedicated \`ship\` tool lands soon.)
**Rules**:
- Stay under \`/workspace\`. The fs_* tools enforce this; for system paths use \`shell_exec\` deliberately.
- The container has no route to internal Vibn services (vibn-postgres, etc.) by design.
- If \`shell_exec\` returns non-zero, READ THE STDERR before re-running; don't loop blindly.
## Gitea repo orchestration (one-time setup)
For creating new repos, branching, and listing what already exists:
- \`gitea_repos_list\`, \`gitea_repo_get\`, \`gitea_repo_create\`.
- \`gitea_branches_list\`, \`gitea_branch_create\`.
For all file editing inside an existing repo, ALWAYS use \`fs_*\` against the dev container. The \`ship\` tool will then push your changes to Gitea in one commit.
## Troubleshooting
- Deploy stuck or "exited (1)" → \`apps_logs { uuid }\` and \`apps_containers_list { uuid }\`. Common causes: missing env var, wrong port, image pull failure.
- 502 / "no available server" → app probably has no public domain yet. Check \`apps_get\`; if \`fqdn\` is empty, attach a domain.
- "tenant" / "does not belong to" errors → the uuid you passed isn't in this workspace. Re-list with \`apps_list\` to grab a valid one.
- Compose stack acting weird → \`apps_repair { uuid }\` to re-apply post-deploy fixes (Traefik labels, port forwarding).
- Need to nuke and re-deploy → \`apps_delete { uuid, confirm }\` (confirm must equal the app's exact name; fetch via \`apps_get\` first), then re-create.
## Hard rules
- ALWAYS pass \`projectId\` to \`apps_create\` and \`databases_create\`. If the user didn't say which project, ask once, then proceed.
- ALWAYS call \`apps_templates_search\` BEFORE \`apps_create\` when the user names a known third-party app — don't hand-roll a Docker image when a maintained template exists.
- Destructive ops (\`*_delete\`, \`*_volumes_wipe\`) require \`confirm\` equal to the resource's exact name. Always fetch the name first with a \`*_get\` call.
- Long-running ops (deploys, DNS provisioning, db provisioning) take 15 min. Tell the user up front so they don't think you're stuck.
- Be concise and action-oriented. If the user says "deploy X", do it — don't write a tutorial.
- After every tool call, summarize the result in 12 sentences. Don't dump raw JSON unless asked.
- Format app names, URLs, env keys, UUIDs, and file paths in backticks.
- If a tool errors and you don't understand why, say so honestly and suggest the next diagnostic call.
## Current workspace projects
${projectsText}
Today's date: ${new Date().toLocaleDateString('en-US', { weekday: 'long', year: 'numeric', month: 'long', day: 'numeric' })}.`;
}
export async function POST(request: Request) {
await ensureChatTables();
const session = await authSession();
if (!session?.user?.email) {
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 });
}
let body: { thread_id: string; message: string; workspace: string; mcp_token?: string };
try {
body = await request.json();
} catch {
return NextResponse.json({ error: 'Invalid JSON' }, { status: 400 });
}
const { thread_id, message, workspace, mcp_token } = body;
if (!thread_id || !message?.trim()) {
return NextResponse.json({ error: 'thread_id and message are required' }, { status: 400 });
}
const email = session.user.email;
// Verify thread belongs to user
const threads = await query<any>(
`SELECT id FROM fs_chat_threads WHERE id = $1 AND user_id = $2`,
[thread_id, email],
);
if (!threads.length) {
return NextResponse.json({ error: 'Thread not found' }, { status: 404 });
}
// Load message history (last 40 messages)
const rows = await query<any>(
`SELECT data FROM fs_chat_messages WHERE thread_id = $1 ORDER BY created_at DESC LIMIT 40`,
[thread_id],
);
const history: ChatMessage[] = rows.reverse().map((r: any) => r.data);
// Add user message
const userMsg: ChatMessage = { role: 'user', content: message.trim() };
history.push(userMsg);
await query(
`INSERT INTO fs_chat_messages (thread_id, user_id, data) VALUES ($1, $2, $3)`,
[thread_id, email, JSON.stringify(userMsg)],
);
// Update thread updatedAt
await query(
`UPDATE fs_chat_threads SET updated_at = NOW(), data = data || $2 WHERE id = $1`,
[thread_id, JSON.stringify({ updatedAt: new Date().toISOString() })],
);
// Load projects for system prompt context
const projectRows = await query<any>(
`SELECT p.data FROM fs_projects p
JOIN fs_users u ON u.id = p.user_id
WHERE u.data->>'email' = $1
ORDER BY (p.data->>'updatedAt') DESC NULLS LAST LIMIT 20`,
[email],
);
const projects = projectRows.map((r: any) => r.data);
const systemPrompt = buildSystemPrompt(projects, workspace);
// Base URL for internal MCP calls
const host = request.headers.get('host') || 'vibnai.com';
const proto = host.startsWith('localhost') ? 'http' : 'https';
const baseUrl = `${proto}://${host}`;
// Honor client-side abort (Stop button). When the user clicks Stop
// the browser's AbortController fires `request.signal.aborted` and
// the fetch stream is closed; we use it as a polite checkpoint
// between rounds and tool calls so we (a) don't keep paying Gemini
// for tokens the user no longer wants and (b) persist whatever the
// assistant produced before the cancel.
const clientSignal = request.signal;
// Stream response
const encoder = new TextEncoder();
const stream = new ReadableStream({
async start(controller) {
let streamClosed = false;
function emit(chunk: object) {
if (streamClosed) return;
try {
controller.enqueue(encoder.encode(`data: ${JSON.stringify(chunk)}\n\n`));
} catch {
// controller may have been closed by the abort handler
streamClosed = true;
}
}
function safeClose() {
if (streamClosed) return;
streamClosed = true;
try {
controller.close();
} catch {}
}
let messages = [...history];
let round = 0;
let assistantText = '';
const assistantToolCalls: ToolCall[] = [];
let aborted = clientSignal.aborted;
const onAbort = () => {
aborted = true;
};
clientSignal.addEventListener('abort', onAbort);
try {
// Tool-calling loop: use non-streaming so thought_signature is
// always present in the complete response (required by thinking models).
while (round < MAX_TOOL_ROUNDS) {
if (aborted) break;
round++;
const toolDefs = mcp_token ? VIBN_TOOL_DEFINITIONS : [];
const resp = await callGeminiChat({ systemPrompt, messages, tools: toolDefs, temperature: 0.7 });
if (resp.error) {
emit({ type: 'error', error: resp.error });
controller.close();
return;
}
// Stream text to client
if (resp.text) {
assistantText += resp.text;
emit({ type: 'text', text: resp.text });
}
// Announce tool calls
for (const tc of resp.toolCalls) {
assistantToolCalls.push(tc);
emit({ type: 'tool_start', name: tc.name, args: tc.args });
}
// Save assistant turn
messages.push({
role: 'assistant',
content: resp.text,
toolCalls: resp.toolCalls.length ? resp.toolCalls : undefined,
});
if (!resp.toolCalls.length) break;
if (aborted) break;
// Execute tool calls and add results
for (const tc of resp.toolCalls) {
if (aborted) break;
const result = mcp_token
? await executeMcpTool(tc.name, tc.args, mcp_token, baseUrl)
: JSON.stringify({ error: 'No MCP token — read-only mode.' });
emit({ type: 'tool_result', name: tc.name, result: result.slice(0, 500) });
messages.push({
role: 'tool',
content: result,
toolCallId: tc.id,
toolName: tc.name,
thoughtSignature: tc.thoughtSignature,
});
}
}
// If the user clicked Stop, surface the cancel marker so the
// client renders "(stopped by user)" inline with the partial
// assistant message, then skip the round-cap recovery summary
// (we shouldn't pay Gemini for a turn the user just canceled).
if (aborted) {
const stopMarker = assistantText
? '\n\n_(stopped by user)_'
: '_(stopped by user before any response)_';
assistantText += stopMarker;
emit({ type: 'text', text: stopMarker });
emit({ type: 'aborted' });
}
// If the loop exited because we hit MAX_TOOL_ROUNDS while the
// model still wanted to call tools, the user has only seen a
// tray of ✓ icons with no narrative. Force one final no-tools
// call so we always end on a human-readable summary.
const lastTurnHadTools =
messages.length > 0 &&
messages[messages.length - 1].role === 'tool';
if (!aborted && round >= MAX_TOOL_ROUNDS && lastTurnHadTools) {
try {
const summary = await callGeminiChat({
systemPrompt:
systemPrompt +
'\n\nYou have just executed a chain of tool calls. Summarize the result for the user in 1-3 sentences. Do NOT call any more tools.',
messages,
tools: [],
temperature: 0.3,
});
if (summary.text) {
assistantText += summary.text;
emit({ type: 'text', text: summary.text });
}
} catch {
// Don't let a failed summary kill the stream.
}
}
// Persist final assistant message
const finalMsg: ChatMessage = {
role: 'assistant',
content: assistantText,
toolCalls: assistantToolCalls.length ? assistantToolCalls : undefined,
};
await query(
`INSERT INTO fs_chat_messages (thread_id, user_id, data) VALUES ($1, $2, $3)`,
[thread_id, email, JSON.stringify(finalMsg)],
);
emit({ type: 'done' });
safeClose();
} catch (e) {
// AbortError is the expected shape when the client cancels
// mid-Gemini-call — don't surface it as a real error.
const isAbort =
aborted ||
(e instanceof Error && (e.name === 'AbortError' || /aborted/i.test(e.message)));
if (!isAbort) {
emit({ type: 'error', error: e instanceof Error ? e.message : String(e) });
} else {
emit({ type: 'aborted' });
}
safeClose();
} finally {
clientSignal.removeEventListener('abort', onAbort);
}
},
cancel() {
// Browser disconnected (tab closed, navigated away). Nothing to
// do — the abort handler above already flipped the flag and the
// loop will bail at the next checkpoint.
},
});
return new Response(stream, {
headers: {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
Connection: 'keep-alive',
},
});
}