diff --git a/app/api/chat/route.ts b/app/api/chat/route.ts index 703ed7d8..ed9d0827 100644 --- a/app/api/chat/route.ts +++ b/app/api/chat/route.ts @@ -18,11 +18,86 @@ import { NextResponse } from "next/server"; import { requireWorkspacePrincipal } from "@/lib/auth/workspace-auth"; import { query, queryOne } from "@/lib/db-postgres"; import { callVibnChat } from "@/lib/ai/vibn-chat-model"; -import { VIBN_TOOL_DEFINITIONS, executeMcpTool } from "@/lib/ai/vibn-tools"; +import { + VIBN_TOOL_DEFINITIONS, + executeMcpTool, + filterToolsForPhase, + type AgentPhase, +} from "@/lib/ai/vibn-tools"; import { detectKnownError, formatRecoveryMessage, } from "@/lib/ai/error-recovery"; + +// --- Agent Orchestration Types & Constants --- +type TurnIntent = + | "conversational" + | "status_check" + | "diagnose" + | "small_fix" + | "feature_build" + | "deploy" + | "autonomous"; + +type AgentPhase = + | "plan" + | "recon" + | "checkpoint" + | "execute" + | "verify" + | "final"; + +const TOOL_BUDGETS: Record = { + conversational: 0, + status_check: 2, + diagnose: 8, + small_fix: 18, + feature_build: 40, + deploy: 25, + autonomous: 150, +}; + +function classifyTurnIntent(message: string): TurnIntent { + const m = message.trim().toLowerCase(); + + // High-agency directives + if ( + /(keep going|continue|build it|do it|go ahead|proceed|autonomous)/.test(m) + ) + return "autonomous"; + + // Deployments + if (/(deploy|ship|release|publish|push to prod)/.test(m)) return "deploy"; + + // Feature build + if ( + /(build|create|add|implement|make a|setup|wire up|scaffold|integrate)/.test( + m, + ) + ) { + if (m.length > 50) return "feature_build"; + return "small_fix"; + } + + // Diagnostics + if ( + /(why|broken|error|blank|not loading|fail|bug|issue|doesn't work|isn't working|fix)/.test( + m, + ) + ) + return "diagnose"; + + // Status check + if (/(status|logs|running|active|what is|show me|check)/.test(m)) + return "status_check"; + + // Conversational fallback + if (m.length < 20 || /^(hi|hello|thanks|ok|yes|no)/.test(m)) + return "conversational"; + + // Default to a generous feature build if we can't tell + return "feature_build"; +} import { listRecentSentryIssues } from "@/lib/integrations/sentry"; import { ensureProjectRepoCloned, @@ -837,17 +912,28 @@ export async function POST(request: Request) { let fileHashes = new Map(); let stallRounds = 0; + // ── Phase & Intent State ── + const turnIntent = classifyTurnIntent(message); + const maxToolRounds = activeMcpToken ? TOOL_BUDGETS[turnIntent] : 0; + let phase: AgentPhase = "recon"; + let checkpointEmitted = false; + let verificationPassed = false; + + emit({ type: "phase", phase, label: "Investigating & Planning" }); + try { // Tool-calling loop: use non-streaming so thought_signature is // always present in the complete response (required by thinking models). - while (round < MAX_TOOL_ROUNDS) { + while (round < maxToolRounds) { if (aborted) break; round++; // Keep tool definitions active in the schema to avoid model confusion and // MALFORMED_FUNCTION_CALL gateway crashes, but let our system instructions // guide the model to respond in plain text for conversational inputs. - const toolDefs = activeMcpToken ? VIBN_TOOL_DEFINITIONS : []; + const toolDefs = activeMcpToken + ? filterToolsForPhase(VIBN_TOOL_DEFINITIONS, phase, turnIntent) + : []; // Every 6 silent rounds or 8 tool calls, gently nudge the model to surface a one-liner // status before continuing. This is the user's only signal of @@ -871,10 +957,57 @@ export async function POST(request: Request) { "If they want you to take action, confirm intent and wait for a clear directive."; } - if (MAX_TOOL_ROUNDS - round <= 3) { - extraSystem += `\n\n[WARNING] You only have ${MAX_TOOL_ROUNDS - round} tool calls left before you are forcefully terminated. Stop exploring, make your final edits, and write your final response to the user NOW.`; + if (maxToolRounds - round <= 3) { + extraSystem += `\n\n[WARNING] You only have ${maxToolRounds - round} tool calls left before you are forcefully terminated. Stop exploring, make your final edits, and write your final response to the user NOW.`; } + // C-08: Force Checkpoint Before Mutation + // If the AI is trying to use mutating tools but hasn't emitted a checkpoint, + // intercept the tool calls, block them, and force it to state its plan. + const requestedMutations = resp.toolCalls.filter((tc) => + [ + "fs_write", + "fs_edit", + "fs_delete", + "dev_server_start", + "dev_server_stop", + "apps_deploy", + "ship", + ].includes(tc.name), + ); + + if ( + requestedMutations.length > 0 && + !checkpointEmitted && + phase === "recon" + ) { + const blockMsg = + "[PHASE CHECKPOINT REQUIRED] Before editing files or deploying, you MUST state your goal, current findings, the suspected cause of the issue, the exact file(s) to change, and your verification plan. Do not call any tools in your response."; + messages.push({ + role: "user", + content: blockMsg, + }); + emit({ + type: "checkpoint", + goal: "Awaiting checkpoint...", + findings: "Evaluating...", + }); + checkpointEmitted = true; + phase = "execute"; + emit({ type: "phase", phase, label: "Executing Code Edits" }); + continue; // Skip tool execution and re-prompt + } + + if (requestedMutations.length > 0) { + phase = "verify"; + emit({ + type: "phase", + phase, + label: "Verifying Build & Compiling", + }); + } + + // Execute tool calls and add results. OpenAI-compatible APIs const resp = await callVibnChat({ systemPrompt: systemPrompt + extraSystem, messages, @@ -1120,7 +1253,7 @@ export async function POST(request: Request) { const needsRecovery = !aborted && anyToolsExecuted && - (round >= MAX_TOOL_ROUNDS || + (round >= maxToolRounds || !!loopBreakReason || assistantText.trim().length === 0 || roundsSinceText >= 30 || @@ -1133,7 +1266,7 @@ export async function POST(request: Request) { : ""; const reason = loopBreakReason ? `LOOP DETECTED: ${loopBreakReason}. Stop trying that approach. ` - : round >= MAX_TOOL_ROUNDS + : round >= maxToolRounds ? "You hit the tool-round cap. " : ""; try { diff --git a/lib/ai/vibn-tools.ts b/lib/ai/vibn-tools.ts index e7cad6e7..70f19c33 100644 --- a/lib/ai/vibn-tools.ts +++ b/lib/ai/vibn-tools.ts @@ -12,6 +12,85 @@ import type { ToolDefinition } from "./gemini-chat"; const GITHUB_TOKEN = process.env.GITHUB_TOKEN || ""; +export type AgentPhase = + | "plan" + | "recon" + | "checkpoint" + | "execute" + | "verify" + | "final"; + +export type TurnIntent = + | "conversational" + | "status_check" + | "diagnose" + | "small_fix" + | "feature_build" + | "deploy" + | "autonomous"; + +const READ_ONLY_TOOLS = new Set([ + "projects_get", + "projects_list", + "workspace_describe", + "apps_list", + "apps_get", + "apps_logs", + "dev_server_list", + "dev_server_logs", + "browser_console", + "fs_read", + "fs_list", + "fs_tree", + "fs_glob", + "fs_grep", + "gitea_credentials", + "plan_get", + "shell_exec", // Safe-listed with prompt constraints +]); + +const MUTATING_TOOLS = new Set([ + "fs_write", + "fs_edit", + "fs_delete", + "dev_server_start", + "dev_server_stop", + "devcontainer_ensure", + "apps_create", + "apps_update", + "apps_deploy", + "apps_delete", + "apps_envs_upsert", + "apps_envs_delete", + "apps_domains_set", + "databases_create", + "domains_register", + "ship", + "plan_task_add", + "plan_task_edit", + "plan_task_complete", + "plan_vision_set", +]); + +export function filterToolsForPhase( + tools: ToolDefinition[], + phase: AgentPhase, + intent: TurnIntent, +): ToolDefinition[] { + if (phase === "recon" || phase === "verify") { + return tools.filter( + (t) => + READ_ONLY_TOOLS.has(t.name) || + t.name === "request_visual_qa" || + t.name === "browser_navigate", + ); + } + if (phase === "execute") { + return tools; // All tools allowed + } + return tools; // Default fallback +} + export const VIBN_TOOL_DEFINITIONS: ToolDefinition[] = [ // ── Workspace & identity ─────────────────────────────────────────────────