fix(ai): implement fixes 4, 5, and 7 to broaden loop detection, tighten silent stretches, and lower tool round caps

2026-05-16 12:24:09 -07:00
parent 9f59b584e5
commit 36c9dd47fb
1 changed files with 56 additions and 13 deletions
--- a/vibn-frontend/app/api/chat/route.ts
+++ b/vibn-frontend/app/api/chat/route.ts
@@ -550,8 +550,48 @@ export async function POST(request: Request) {
      // pile up); both safeguards below break that pattern.
      const toolFingerprints: string[] = [];
      let roundsSinceText = 0;
+      let toolCallsSinceText = 0;
      let loopBreakReason: string | null = null;

+      function fingerprintToolCall(tc: any) {
+        if (tc.name === "shell_exec") {
+          const cmd = String(tc.args?.command ?? "").trim();
+          // First non-cd verb (pkill, npm, curl, etc.)
+          const verb =
+            cmd
+              .split("&&")
+              .map((s: string) => s.trim())
+              .find((s: string) => !s.startsWith("cd "))
+              ?.split(/\s+/)[0] ?? "shell";
+          return `shell_exec:${verb}`;
+        }
+        if (
+          tc.name === "fs_write" ||
+          tc.name === "fs_edit" ||
+          tc.name === "fs_read"
+        ) {
+          return `${tc.name}:${tc.args?.path ?? ""}`;
+        }
+        if (
+          tc.name === "dev_server_start" ||
+          tc.name === "dev_server_stop" ||
+          tc.name === "dev_server_logs" ||
+          tc.name === "dev_server_list"
+        ) {
+          return `dev_server:${tc.args?.port ?? "?"}`;
+        }
+        if (
+          tc.name === "apps_get" ||
+          tc.name === "apps_logs" ||
+          tc.name === "apps_deploy" ||
+          tc.name === "apps_unstick"
+        ) {
+          return `${tc.name}:${tc.args?.uuid ?? ""}`;
+        }
+        const argSig = JSON.stringify(tc.args ?? {}).slice(0, 80);
+        return `${tc.name}:${argSig}`;
+      }
+
      try {
        // Tool-calling loop: use non-streaming so thought_signature is
        // always present in the complete response (required by thinking models).
@@ -561,12 +601,16 @@ export async function POST(request: Request) {

          const toolDefs = mcp_token ? VIBN_TOOL_DEFINITIONS : [];

-          // Every 4 silent rounds, nudge the model to surface a one-liner
+          // Every 2 silent rounds or 5 tool calls, nudge the model to surface a one-liner
          // status before continuing. This is the user's only signal of
          // life when a tool chain runs long.
-          let extraSystem =
-            roundsSinceText >= 4
-              ? "\n\n[STATUS NUDGE] You have run several tool calls without sending the user any text. Before any more tool calls, send ONE short sentence describing what you are currently working on and why. The user is staring at a wall of tool pills and needs a signal of life."
+          const isSilent = roundsSinceText >= 2 || toolCallsSinceText >= 5;
+          let extraSystem = isSilent
+            ? "\n\n[STATUS NUDGE] You have run " +
+              `${toolCallsSinceText} tool call(s) over ${roundsSinceText} round(s) ` +
+              "without sending the user any text. Before any more tool calls, " +
+              "send ONE short sentence describing what you are currently working " +
+              "on and why. The user is staring at silent tool pills."
            : "";

          if (MAX_TOOL_ROUNDS - round <= 3) {
@@ -592,8 +636,10 @@ export async function POST(request: Request) {
            assistantTextSegments.push(resp.text);
            emit({ type: "text", text: resp.text });
            roundsSinceText = 0;
+            toolCallsSinceText = 0;
          } else if (resp.toolCalls.length) {
            roundsSinceText++;
+            toolCallsSinceText += resp.toolCalls.length;
          }

          // Stream the model's reasoning narration as a separate SSE
@@ -626,18 +672,15 @@ export async function POST(request: Request) {
          // with the last tool result as context. The classic case:
          // dev_server.start → logs → stop → start → logs → stop → ...
          for (const tc of resp.toolCalls) {
-            const argSig =
-              tc.args && typeof tc.args === "object"
-                ? JSON.stringify(tc.args).slice(0, 120)
-                : "";
-            toolFingerprints.push(`${tc.name}|${argSig}`);
+            toolFingerprints.push(fingerprintToolCall(tc));
          }
-          const last8 = toolFingerprints.slice(-8);
+          // Sliding window of 10 (was 8); threshold 3 stays the same
+          const window = toolFingerprints.slice(-10);
          const counts = new Map<string, number>();
-          for (const fp of last8) counts.set(fp, (counts.get(fp) ?? 0) + 1);
+          for (const fp of window) counts.set(fp, (counts.get(fp) ?? 0) + 1);
          const repeated = [...counts.entries()].find(([, n]) => n >= 3);
          if (repeated) {
-            loopBreakReason = `Same call (${repeated[0].split("|")[0]}) fired ${repeated[1]}× in a row`;
+            loopBreakReason = `Repeated ${repeated[0]} ${repeated[1]}× in last 10 calls`;
          }

          // Execute tool calls and add results. OpenAI-compatible APIs