chore(telemetry): refactor stall detector to track real state progress and persist non-null verify signatures across edit rounds

2026-06-09 13:36:30 -07:00
parent 6ec312f716
commit de1209afe4
1 changed files with 111 additions and 113 deletions
--- a/vibn-frontend/app/api/chat/route.ts
+++ b/vibn-frontend/app/api/chat/route.ts
@@ -792,50 +792,10 @@ export async function POST(request: Request) {
      // detection. The model has a strong tendency to grind through a
      // dozen+ tool calls in total silence (the user just sees ✓ pills
      // pile up); both safeguards below break that pattern.
-      const toolFingerprints: string[] = [];
      let roundsSinceText = 0;
      let toolCallsSinceText = 0;
      let loopBreakReason: string | null = null;

-      function fingerprintToolCall(tc: ToolCall) {
-        if (tc.name === "shell_exec") {
-          const cmd = String(tc.args?.command ?? "").trim();
-          // First non-cd verb (pkill, npm, curl, etc.)
-          const verb =
-            cmd
-              .split("&&")
-              .map((s: string) => s.trim())
-              .find((s: string) => !s.startsWith("cd "))
-              ?.split(/\s+/)[0] ?? "shell";
-          return `shell_exec:${verb}`;
-        }
-        if (
-          tc.name === "fs_write" ||
-          tc.name === "fs_edit" ||
-          tc.name === "fs_read"
-        ) {
-          return `${tc.name}:${tc.args?.path ?? ""}`;
-        }
-        if (
-          tc.name === "dev_server_start" ||
-          tc.name === "dev_server_stop" ||
-          tc.name === "dev_server_logs" ||
-          tc.name === "dev_server_list"
-        ) {
-          return `dev_server:${tc.args?.port ?? "?"}`;
-        }
-        if (
-          tc.name === "apps_get" ||
-          tc.name === "apps_logs" ||
-          tc.name === "apps_deploy" ||
-          tc.name === "apps_unstick"
-        ) {
-          return `${tc.name}:${tc.args?.uuid ?? ""}`;
-        }
-        const argSig = JSON.stringify(tc.args ?? {}).slice(0, 80);
-        return `${tc.name}:${argSig}`;
-      }
-
      // ── Server-side conversational guard (C-03 enforcement) ───────────
      // If the user's message looks conversational we withhold tools for
      // round 1. The model MUST respond in text first. If its reply then
@@ -861,7 +821,7 @@ export async function POST(request: Request) {
        isConversational(message.trim());

      let lastVerifySig: string | null = null;
-      let lastRoundToolSig: string | null = null;
+      let fileHashes = new Map<string, string>();
      let stallRounds = 0;

      try {
@@ -952,39 +912,6 @@ export async function POST(request: Request) {
          if (!resp.toolCalls.length) break;
          if (aborted) break;

-          // Loop detection. If the model fires the same tool with the
-          // same first-key arg 3+ times in this turn, the user is
-          // watching it spin. Bail out, hand control back to the user
-          // with the last tool result as context. The classic case:
-          // dev_server.start → logs → stop → start → logs → stop → ...
-          for (const tc of resp.toolCalls) {
-            toolFingerprints.push(fingerprintToolCall(tc));
-          }
-          // Sliding window of 10 (was 8)
-          const window = toolFingerprints.slice(-10);
-          const counts = new Map<string, number>();
-          for (const fp of window) counts.set(fp, (counts.get(fp) ?? 0) + 1);
-
-          // Find highest repeating tool call
-          let maxRepeats = 0;
-          let repeatedCmd = "";
-          for (const [fp, n] of counts.entries()) {
-            if (n > maxRepeats) {
-              maxRepeats = n;
-              repeatedCmd = fp.split("|")[0];
-            }
-          }
-
-          // Hard-break at 6 identical fingerprints
-          if (maxRepeats === 4) {
-            extraSystem += `\n\n[WARNING] You have called ${repeatedCmd} four times recently. Try a different approach or surface what's blocking you to the user.`;
-          }
-          if (maxRepeats >= 6) {
-            loopBreakReason = `Repeated ${repeatedCmd} ${maxRepeats}× in last 10 calls`;
-          }
-
-          // Removed consecutive tool call hard-break logic because it interrupts valid long tool chains.
-
          // Execute tool calls and add results. OpenAI-compatible APIs
          // (DeepSeek, etc.) require every tool_call_id to be answered with
          // a tool message before any user/assistant message — so recovery
@@ -1069,24 +996,38 @@ export async function POST(request: Request) {
          // 1. Compute verify signature
          const verifySig = getRoundVerifySignature(currentRoundResults);

-          // 2. Check for stall/progress by comparing tool call signatures (names + inputs)
-          const currentRoundToolSig = resp.toolCalls
-            .map((tc) => `${tc.name}:${JSON.stringify(tc.args || {})}`)
-            .sort()
-            .join(";;");
+          // 2. Check for actual state progress (did files change, did a plan update, did a mutating tool succeed, or did the error set change?)
+          const { progressed, nextHashes } = checkRoundProgress(
+            currentRoundResults,
+            fileHashes,
+            verifySig,
+            lastVerifySig,
+          );
+          fileHashes = nextHashes;

-          const progressed = !lastVerifySig || verifySig !== lastVerifySig;
+          const ranVerification = currentRoundResults.some((r) =>
+            [
+              "browser_console",
+              "shell_exec",
+              "dev_server_start",
+              "browser.console",
+              "dev.server.start",
+            ].includes(r.toolName),
+          );

-          if (
-            verifySig &&
-            lastVerifySig &&
-            verifySig === lastVerifySig &&
-            !progressed
-          ) {
-            loopBreakReason = `Blocked on persistent error: ${verifySig.split(";;")[0]}`;
+          if (ranVerification) {
+            if (verifySig) {
+              if (lastVerifySig && verifySig === lastVerifySig && !progressed) {
+                loopBreakReason = `Blocked on persistent error: ${verifySig.split(";;")[0]}`;
+              }
+              lastVerifySig = verifySig;
+            } else {
+              // Successfully compiled cleanly! Clear the active error memory
+              lastVerifySig = null;
+            }
          }

-          if (lastRoundToolSig && currentRoundToolSig === lastRoundToolSig) {
+          if (!progressed) {
            stallRounds++;
          } else {
            stallRounds = 0;
@@ -1094,12 +1035,9 @@ export async function POST(request: Request) {

          if (stallRounds >= 2) {
            loopBreakReason =
-              "Stalled (Repeated the exact same tool calls twice without advancing)";
+              "Stalled (No file state progress or diagnostic advancement made for 2 rounds)";
          }

-          lastVerifySig = verifySig;
-          lastRoundToolSig = currentRoundToolSig;
-
          if (loopBreakReason) break;
        }

@@ -1454,35 +1392,34 @@ function getRoundVerifySignature(roundResults: any[]): string | null {
        tr.toolName === "browser.console"
      ) {
        if (
-          parsed.errors &&
-          Array.isArray(parsed.errors) &&
-          parsed.errors.length > 0
+          result.errors &&
+          Array.isArray(result.errors) &&
+          result.errors.length > 0
        ) {
          // Normalize: replace preview subdomain hashes and port numbers to keep signature stable
-          const cleanErrors = parsed.errors.map((e: string) =>
-            e
-              .replace(/preview-\d+-\w+-\d+/g, "preview-X-url")
-              .replace(/localhost:\d+/g, "localhost:PORT")
-              .replace(/\d+/g, "N"),
+          const cleanErrors = result.errors.map((e: string) =>
+            normalizeError(e),
          );
          errors.push(`browser_console_errors:${cleanErrors.join("|")}`);
        }
-        if (parsed.ok === false && parsed.error) {
-          errors.push(`browser_console_fail:${parsed.error}`);
+        if (result.ok === false && result.error) {
+          errors.push(`browser_console_fail:${normalizeError(result.error)}`);
        }
      }

      // 2. Check shell_exec failures
      if (tr.toolName === "shell_exec") {
-        if (parsed.code !== 0 && parsed.code !== undefined) {
-          const stderrLine = (parsed.stderr || parsed.stdout || "error")
+        if (result.code !== 0 && result.code !== undefined) {
+          const stderrLine = (result.stderr || result.stdout || "error")
            .split("\n")[0]
            .trim()
            .substring(0, 100);
-          errors.push(`shell_exec_fail:${parsed.code}:${stderrLine}`);
+          errors.push(
+            `shell_exec_fail:${result.code}:${normalizeError(stderrLine)}`,
+          );
        }
-        if (parsed.ok === false && parsed.error) {
-          errors.push(`shell_exec_error:${parsed.error}`);
+        if (result.ok === false && result.error) {
+          errors.push(`shell_exec_error:${normalizeError(result.error)}`);
        }
      }

@@ -1491,11 +1428,11 @@ function getRoundVerifySignature(roundResults: any[]): string | null {
        tr.toolName === "dev_server_start" ||
        tr.toolName === "dev.server.start"
      ) {
-        if (parsed.healthCheck && parsed.healthCheck.status >= 400) {
-          errors.push(`dev_server_unhealthy:${parsed.healthCheck.status}`);
+        if (result.healthCheck && result.healthCheck.status >= 400) {
+          errors.push(`dev_server_unhealthy:${result.healthCheck.status}`);
        }
-        if (parsed.ok === false && parsed.error) {
-          errors.push(`dev_server_fail:${parsed.error}`);
+        if (result.ok === false && result.error) {
+          errors.push(`dev_server_fail:${normalizeError(result.error)}`);
        }
      }

@@ -1506,9 +1443,9 @@ function getRoundVerifySignature(roundResults: any[]): string | null {
        tr.toolName === "fs.edit" ||
        tr.toolName === "fs.write"
      ) {
-        if (parsed.ok === false || parsed.error) {
+        if (result.ok === false || result.error) {
          errors.push(
-            `file_op_failed:${tr.toolName}:${parsed.error || parsed.stderr || "error"}`,
+            `file_op_failed:${tr.toolName}:${normalizeError(result.error || result.stderr || "error")}`,
          );
        }
      }
@@ -1520,3 +1457,64 @@ function getRoundVerifySignature(roundResults: any[]): string | null {
  if (errors.length === 0) return null;
  return errors.sort().join(";;");
 }
+
+function normalizeError(error: string): string {
+  return error
+    .replace(/preview-\d+-\w+-[0-9a-f]+/g, "preview-X")
+    .replace(/localhost:\d+/g, "localhost:PORT")
+    .replace(/\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?Z/g, "TIMESTAMP")
+    .trim();
+}
+
+function checkRoundProgress(
+  roundResults: any[],
+  lastHashes: Map<string, string>,
+  verifySig: string | null,
+  lastVerifySig: string | null,
+): { progressed: boolean; nextHashes: Map<string, string> } {
+  let progressed = false;
+  const nextHashes = new Map(lastHashes);
+
+  // A. Progress check: did the compile error signature change/improve?
+  if (verifySig !== lastVerifySig) {
+    progressed = true; // Error set changed/shifted = progress toward diagnosis!
+  }
+
+  for (const tr of roundResults) {
+    if (!tr.content) continue;
+    try {
+      const parsed = JSON.parse(tr.content);
+      const result = parsed.result || parsed;
+
+      // B. Progress check: did a file edit/write result in a new/changed sha256?
+      if (result.ok && result.sha256 && result.path) {
+        const lastHash = lastHashes.get(result.path);
+        if (lastHash !== result.sha256) {
+          progressed = true;
+          nextHashes.set(result.path, result.sha256);
+        }
+      }
+
+      // C. Progress check: did any mutating/deploying tool succeed?
+      if (
+        result.ok &&
+        ![
+          "fs_read",
+          "fs_list",
+          "fs_tree",
+          "fs_glob",
+          "fs_grep",
+          "dev_server_list",
+          "browser_console",
+          "browser.console",
+        ].includes(tr.toolName)
+      ) {
+        progressed = true;
+      }
+    } catch (e) {
+      // skip
+    }
+  }
+
+  return { progressed, nextHashes };
+}