diff --git a/vibn-frontend/app/api/chat/route.ts b/vibn-frontend/app/api/chat/route.ts index cc9ddb1a..5945f263 100644 --- a/vibn-frontend/app/api/chat/route.ts +++ b/vibn-frontend/app/api/chat/route.ts @@ -792,50 +792,10 @@ export async function POST(request: Request) { // detection. The model has a strong tendency to grind through a // dozen+ tool calls in total silence (the user just sees ✓ pills // pile up); both safeguards below break that pattern. - const toolFingerprints: string[] = []; let roundsSinceText = 0; let toolCallsSinceText = 0; let loopBreakReason: string | null = null; - function fingerprintToolCall(tc: ToolCall) { - if (tc.name === "shell_exec") { - const cmd = String(tc.args?.command ?? "").trim(); - // First non-cd verb (pkill, npm, curl, etc.) - const verb = - cmd - .split("&&") - .map((s: string) => s.trim()) - .find((s: string) => !s.startsWith("cd ")) - ?.split(/\s+/)[0] ?? "shell"; - return `shell_exec:${verb}`; - } - if ( - tc.name === "fs_write" || - tc.name === "fs_edit" || - tc.name === "fs_read" - ) { - return `${tc.name}:${tc.args?.path ?? ""}`; - } - if ( - tc.name === "dev_server_start" || - tc.name === "dev_server_stop" || - tc.name === "dev_server_logs" || - tc.name === "dev_server_list" - ) { - return `dev_server:${tc.args?.port ?? "?"}`; - } - if ( - tc.name === "apps_get" || - tc.name === "apps_logs" || - tc.name === "apps_deploy" || - tc.name === "apps_unstick" - ) { - return `${tc.name}:${tc.args?.uuid ?? ""}`; - } - const argSig = JSON.stringify(tc.args ?? {}).slice(0, 80); - return `${tc.name}:${argSig}`; - } - // ── Server-side conversational guard (C-03 enforcement) ─────────── // If the user's message looks conversational we withhold tools for // round 1. The model MUST respond in text first. If its reply then @@ -861,7 +821,7 @@ export async function POST(request: Request) { isConversational(message.trim()); let lastVerifySig: string | null = null; - let lastRoundToolSig: string | null = null; + let fileHashes = new Map(); let stallRounds = 0; try { @@ -952,39 +912,6 @@ export async function POST(request: Request) { if (!resp.toolCalls.length) break; if (aborted) break; - // Loop detection. If the model fires the same tool with the - // same first-key arg 3+ times in this turn, the user is - // watching it spin. Bail out, hand control back to the user - // with the last tool result as context. The classic case: - // dev_server.start → logs → stop → start → logs → stop → ... - for (const tc of resp.toolCalls) { - toolFingerprints.push(fingerprintToolCall(tc)); - } - // Sliding window of 10 (was 8) - const window = toolFingerprints.slice(-10); - const counts = new Map(); - for (const fp of window) counts.set(fp, (counts.get(fp) ?? 0) + 1); - - // Find highest repeating tool call - let maxRepeats = 0; - let repeatedCmd = ""; - for (const [fp, n] of counts.entries()) { - if (n > maxRepeats) { - maxRepeats = n; - repeatedCmd = fp.split("|")[0]; - } - } - - // Hard-break at 6 identical fingerprints - if (maxRepeats === 4) { - extraSystem += `\n\n[WARNING] You have called ${repeatedCmd} four times recently. Try a different approach or surface what's blocking you to the user.`; - } - if (maxRepeats >= 6) { - loopBreakReason = `Repeated ${repeatedCmd} ${maxRepeats}× in last 10 calls`; - } - - // Removed consecutive tool call hard-break logic because it interrupts valid long tool chains. - // Execute tool calls and add results. OpenAI-compatible APIs // (DeepSeek, etc.) require every tool_call_id to be answered with // a tool message before any user/assistant message — so recovery @@ -1069,24 +996,38 @@ export async function POST(request: Request) { // 1. Compute verify signature const verifySig = getRoundVerifySignature(currentRoundResults); - // 2. Check for stall/progress by comparing tool call signatures (names + inputs) - const currentRoundToolSig = resp.toolCalls - .map((tc) => `${tc.name}:${JSON.stringify(tc.args || {})}`) - .sort() - .join(";;"); + // 2. Check for actual state progress (did files change, did a plan update, did a mutating tool succeed, or did the error set change?) + const { progressed, nextHashes } = checkRoundProgress( + currentRoundResults, + fileHashes, + verifySig, + lastVerifySig, + ); + fileHashes = nextHashes; - const progressed = !lastVerifySig || verifySig !== lastVerifySig; + const ranVerification = currentRoundResults.some((r) => + [ + "browser_console", + "shell_exec", + "dev_server_start", + "browser.console", + "dev.server.start", + ].includes(r.toolName), + ); - if ( - verifySig && - lastVerifySig && - verifySig === lastVerifySig && - !progressed - ) { - loopBreakReason = `Blocked on persistent error: ${verifySig.split(";;")[0]}`; + if (ranVerification) { + if (verifySig) { + if (lastVerifySig && verifySig === lastVerifySig && !progressed) { + loopBreakReason = `Blocked on persistent error: ${verifySig.split(";;")[0]}`; + } + lastVerifySig = verifySig; + } else { + // Successfully compiled cleanly! Clear the active error memory + lastVerifySig = null; + } } - if (lastRoundToolSig && currentRoundToolSig === lastRoundToolSig) { + if (!progressed) { stallRounds++; } else { stallRounds = 0; @@ -1094,12 +1035,9 @@ export async function POST(request: Request) { if (stallRounds >= 2) { loopBreakReason = - "Stalled (Repeated the exact same tool calls twice without advancing)"; + "Stalled (No file state progress or diagnostic advancement made for 2 rounds)"; } - lastVerifySig = verifySig; - lastRoundToolSig = currentRoundToolSig; - if (loopBreakReason) break; } @@ -1454,35 +1392,34 @@ function getRoundVerifySignature(roundResults: any[]): string | null { tr.toolName === "browser.console" ) { if ( - parsed.errors && - Array.isArray(parsed.errors) && - parsed.errors.length > 0 + result.errors && + Array.isArray(result.errors) && + result.errors.length > 0 ) { // Normalize: replace preview subdomain hashes and port numbers to keep signature stable - const cleanErrors = parsed.errors.map((e: string) => - e - .replace(/preview-\d+-\w+-\d+/g, "preview-X-url") - .replace(/localhost:\d+/g, "localhost:PORT") - .replace(/\d+/g, "N"), + const cleanErrors = result.errors.map((e: string) => + normalizeError(e), ); errors.push(`browser_console_errors:${cleanErrors.join("|")}`); } - if (parsed.ok === false && parsed.error) { - errors.push(`browser_console_fail:${parsed.error}`); + if (result.ok === false && result.error) { + errors.push(`browser_console_fail:${normalizeError(result.error)}`); } } // 2. Check shell_exec failures if (tr.toolName === "shell_exec") { - if (parsed.code !== 0 && parsed.code !== undefined) { - const stderrLine = (parsed.stderr || parsed.stdout || "error") + if (result.code !== 0 && result.code !== undefined) { + const stderrLine = (result.stderr || result.stdout || "error") .split("\n")[0] .trim() .substring(0, 100); - errors.push(`shell_exec_fail:${parsed.code}:${stderrLine}`); + errors.push( + `shell_exec_fail:${result.code}:${normalizeError(stderrLine)}`, + ); } - if (parsed.ok === false && parsed.error) { - errors.push(`shell_exec_error:${parsed.error}`); + if (result.ok === false && result.error) { + errors.push(`shell_exec_error:${normalizeError(result.error)}`); } } @@ -1491,11 +1428,11 @@ function getRoundVerifySignature(roundResults: any[]): string | null { tr.toolName === "dev_server_start" || tr.toolName === "dev.server.start" ) { - if (parsed.healthCheck && parsed.healthCheck.status >= 400) { - errors.push(`dev_server_unhealthy:${parsed.healthCheck.status}`); + if (result.healthCheck && result.healthCheck.status >= 400) { + errors.push(`dev_server_unhealthy:${result.healthCheck.status}`); } - if (parsed.ok === false && parsed.error) { - errors.push(`dev_server_fail:${parsed.error}`); + if (result.ok === false && result.error) { + errors.push(`dev_server_fail:${normalizeError(result.error)}`); } } @@ -1506,9 +1443,9 @@ function getRoundVerifySignature(roundResults: any[]): string | null { tr.toolName === "fs.edit" || tr.toolName === "fs.write" ) { - if (parsed.ok === false || parsed.error) { + if (result.ok === false || result.error) { errors.push( - `file_op_failed:${tr.toolName}:${parsed.error || parsed.stderr || "error"}`, + `file_op_failed:${tr.toolName}:${normalizeError(result.error || result.stderr || "error")}`, ); } } @@ -1520,3 +1457,64 @@ function getRoundVerifySignature(roundResults: any[]): string | null { if (errors.length === 0) return null; return errors.sort().join(";;"); } + +function normalizeError(error: string): string { + return error + .replace(/preview-\d+-\w+-[0-9a-f]+/g, "preview-X") + .replace(/localhost:\d+/g, "localhost:PORT") + .replace(/\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?Z/g, "TIMESTAMP") + .trim(); +} + +function checkRoundProgress( + roundResults: any[], + lastHashes: Map, + verifySig: string | null, + lastVerifySig: string | null, +): { progressed: boolean; nextHashes: Map } { + let progressed = false; + const nextHashes = new Map(lastHashes); + + // A. Progress check: did the compile error signature change/improve? + if (verifySig !== lastVerifySig) { + progressed = true; // Error set changed/shifted = progress toward diagnosis! + } + + for (const tr of roundResults) { + if (!tr.content) continue; + try { + const parsed = JSON.parse(tr.content); + const result = parsed.result || parsed; + + // B. Progress check: did a file edit/write result in a new/changed sha256? + if (result.ok && result.sha256 && result.path) { + const lastHash = lastHashes.get(result.path); + if (lastHash !== result.sha256) { + progressed = true; + nextHashes.set(result.path, result.sha256); + } + } + + // C. Progress check: did any mutating/deploying tool succeed? + if ( + result.ok && + ![ + "fs_read", + "fs_list", + "fs_tree", + "fs_glob", + "fs_grep", + "dev_server_list", + "browser_console", + "browser.console", + ].includes(tr.toolName) + ) { + progressed = true; + } + } catch (e) { + // skip + } + } + + return { progressed, nextHashes }; +}