chore(telemetry): refactor stall detector to track real state progress and persist non-null verify signatures across edit rounds
This commit is contained in:
@@ -792,50 +792,10 @@ export async function POST(request: Request) {
|
||||
// detection. The model has a strong tendency to grind through a
|
||||
// dozen+ tool calls in total silence (the user just sees ✓ pills
|
||||
// pile up); both safeguards below break that pattern.
|
||||
const toolFingerprints: string[] = [];
|
||||
let roundsSinceText = 0;
|
||||
let toolCallsSinceText = 0;
|
||||
let loopBreakReason: string | null = null;
|
||||
|
||||
function fingerprintToolCall(tc: ToolCall) {
|
||||
if (tc.name === "shell_exec") {
|
||||
const cmd = String(tc.args?.command ?? "").trim();
|
||||
// First non-cd verb (pkill, npm, curl, etc.)
|
||||
const verb =
|
||||
cmd
|
||||
.split("&&")
|
||||
.map((s: string) => s.trim())
|
||||
.find((s: string) => !s.startsWith("cd "))
|
||||
?.split(/\s+/)[0] ?? "shell";
|
||||
return `shell_exec:${verb}`;
|
||||
}
|
||||
if (
|
||||
tc.name === "fs_write" ||
|
||||
tc.name === "fs_edit" ||
|
||||
tc.name === "fs_read"
|
||||
) {
|
||||
return `${tc.name}:${tc.args?.path ?? ""}`;
|
||||
}
|
||||
if (
|
||||
tc.name === "dev_server_start" ||
|
||||
tc.name === "dev_server_stop" ||
|
||||
tc.name === "dev_server_logs" ||
|
||||
tc.name === "dev_server_list"
|
||||
) {
|
||||
return `dev_server:${tc.args?.port ?? "?"}`;
|
||||
}
|
||||
if (
|
||||
tc.name === "apps_get" ||
|
||||
tc.name === "apps_logs" ||
|
||||
tc.name === "apps_deploy" ||
|
||||
tc.name === "apps_unstick"
|
||||
) {
|
||||
return `${tc.name}:${tc.args?.uuid ?? ""}`;
|
||||
}
|
||||
const argSig = JSON.stringify(tc.args ?? {}).slice(0, 80);
|
||||
return `${tc.name}:${argSig}`;
|
||||
}
|
||||
|
||||
// ── Server-side conversational guard (C-03 enforcement) ───────────
|
||||
// If the user's message looks conversational we withhold tools for
|
||||
// round 1. The model MUST respond in text first. If its reply then
|
||||
@@ -861,7 +821,7 @@ export async function POST(request: Request) {
|
||||
isConversational(message.trim());
|
||||
|
||||
let lastVerifySig: string | null = null;
|
||||
let lastRoundToolSig: string | null = null;
|
||||
let fileHashes = new Map<string, string>();
|
||||
let stallRounds = 0;
|
||||
|
||||
try {
|
||||
@@ -952,39 +912,6 @@ export async function POST(request: Request) {
|
||||
if (!resp.toolCalls.length) break;
|
||||
if (aborted) break;
|
||||
|
||||
// Loop detection. If the model fires the same tool with the
|
||||
// same first-key arg 3+ times in this turn, the user is
|
||||
// watching it spin. Bail out, hand control back to the user
|
||||
// with the last tool result as context. The classic case:
|
||||
// dev_server.start → logs → stop → start → logs → stop → ...
|
||||
for (const tc of resp.toolCalls) {
|
||||
toolFingerprints.push(fingerprintToolCall(tc));
|
||||
}
|
||||
// Sliding window of 10 (was 8)
|
||||
const window = toolFingerprints.slice(-10);
|
||||
const counts = new Map<string, number>();
|
||||
for (const fp of window) counts.set(fp, (counts.get(fp) ?? 0) + 1);
|
||||
|
||||
// Find highest repeating tool call
|
||||
let maxRepeats = 0;
|
||||
let repeatedCmd = "";
|
||||
for (const [fp, n] of counts.entries()) {
|
||||
if (n > maxRepeats) {
|
||||
maxRepeats = n;
|
||||
repeatedCmd = fp.split("|")[0];
|
||||
}
|
||||
}
|
||||
|
||||
// Hard-break at 6 identical fingerprints
|
||||
if (maxRepeats === 4) {
|
||||
extraSystem += `\n\n[WARNING] You have called ${repeatedCmd} four times recently. Try a different approach or surface what's blocking you to the user.`;
|
||||
}
|
||||
if (maxRepeats >= 6) {
|
||||
loopBreakReason = `Repeated ${repeatedCmd} ${maxRepeats}× in last 10 calls`;
|
||||
}
|
||||
|
||||
// Removed consecutive tool call hard-break logic because it interrupts valid long tool chains.
|
||||
|
||||
// Execute tool calls and add results. OpenAI-compatible APIs
|
||||
// (DeepSeek, etc.) require every tool_call_id to be answered with
|
||||
// a tool message before any user/assistant message — so recovery
|
||||
@@ -1069,24 +996,38 @@ export async function POST(request: Request) {
|
||||
// 1. Compute verify signature
|
||||
const verifySig = getRoundVerifySignature(currentRoundResults);
|
||||
|
||||
// 2. Check for stall/progress by comparing tool call signatures (names + inputs)
|
||||
const currentRoundToolSig = resp.toolCalls
|
||||
.map((tc) => `${tc.name}:${JSON.stringify(tc.args || {})}`)
|
||||
.sort()
|
||||
.join(";;");
|
||||
// 2. Check for actual state progress (did files change, did a plan update, did a mutating tool succeed, or did the error set change?)
|
||||
const { progressed, nextHashes } = checkRoundProgress(
|
||||
currentRoundResults,
|
||||
fileHashes,
|
||||
verifySig,
|
||||
lastVerifySig,
|
||||
);
|
||||
fileHashes = nextHashes;
|
||||
|
||||
const progressed = !lastVerifySig || verifySig !== lastVerifySig;
|
||||
const ranVerification = currentRoundResults.some((r) =>
|
||||
[
|
||||
"browser_console",
|
||||
"shell_exec",
|
||||
"dev_server_start",
|
||||
"browser.console",
|
||||
"dev.server.start",
|
||||
].includes(r.toolName),
|
||||
);
|
||||
|
||||
if (
|
||||
verifySig &&
|
||||
lastVerifySig &&
|
||||
verifySig === lastVerifySig &&
|
||||
!progressed
|
||||
) {
|
||||
loopBreakReason = `Blocked on persistent error: ${verifySig.split(";;")[0]}`;
|
||||
if (ranVerification) {
|
||||
if (verifySig) {
|
||||
if (lastVerifySig && verifySig === lastVerifySig && !progressed) {
|
||||
loopBreakReason = `Blocked on persistent error: ${verifySig.split(";;")[0]}`;
|
||||
}
|
||||
lastVerifySig = verifySig;
|
||||
} else {
|
||||
// Successfully compiled cleanly! Clear the active error memory
|
||||
lastVerifySig = null;
|
||||
}
|
||||
}
|
||||
|
||||
if (lastRoundToolSig && currentRoundToolSig === lastRoundToolSig) {
|
||||
if (!progressed) {
|
||||
stallRounds++;
|
||||
} else {
|
||||
stallRounds = 0;
|
||||
@@ -1094,12 +1035,9 @@ export async function POST(request: Request) {
|
||||
|
||||
if (stallRounds >= 2) {
|
||||
loopBreakReason =
|
||||
"Stalled (Repeated the exact same tool calls twice without advancing)";
|
||||
"Stalled (No file state progress or diagnostic advancement made for 2 rounds)";
|
||||
}
|
||||
|
||||
lastVerifySig = verifySig;
|
||||
lastRoundToolSig = currentRoundToolSig;
|
||||
|
||||
if (loopBreakReason) break;
|
||||
}
|
||||
|
||||
@@ -1454,35 +1392,34 @@ function getRoundVerifySignature(roundResults: any[]): string | null {
|
||||
tr.toolName === "browser.console"
|
||||
) {
|
||||
if (
|
||||
parsed.errors &&
|
||||
Array.isArray(parsed.errors) &&
|
||||
parsed.errors.length > 0
|
||||
result.errors &&
|
||||
Array.isArray(result.errors) &&
|
||||
result.errors.length > 0
|
||||
) {
|
||||
// Normalize: replace preview subdomain hashes and port numbers to keep signature stable
|
||||
const cleanErrors = parsed.errors.map((e: string) =>
|
||||
e
|
||||
.replace(/preview-\d+-\w+-\d+/g, "preview-X-url")
|
||||
.replace(/localhost:\d+/g, "localhost:PORT")
|
||||
.replace(/\d+/g, "N"),
|
||||
const cleanErrors = result.errors.map((e: string) =>
|
||||
normalizeError(e),
|
||||
);
|
||||
errors.push(`browser_console_errors:${cleanErrors.join("|")}`);
|
||||
}
|
||||
if (parsed.ok === false && parsed.error) {
|
||||
errors.push(`browser_console_fail:${parsed.error}`);
|
||||
if (result.ok === false && result.error) {
|
||||
errors.push(`browser_console_fail:${normalizeError(result.error)}`);
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Check shell_exec failures
|
||||
if (tr.toolName === "shell_exec") {
|
||||
if (parsed.code !== 0 && parsed.code !== undefined) {
|
||||
const stderrLine = (parsed.stderr || parsed.stdout || "error")
|
||||
if (result.code !== 0 && result.code !== undefined) {
|
||||
const stderrLine = (result.stderr || result.stdout || "error")
|
||||
.split("\n")[0]
|
||||
.trim()
|
||||
.substring(0, 100);
|
||||
errors.push(`shell_exec_fail:${parsed.code}:${stderrLine}`);
|
||||
errors.push(
|
||||
`shell_exec_fail:${result.code}:${normalizeError(stderrLine)}`,
|
||||
);
|
||||
}
|
||||
if (parsed.ok === false && parsed.error) {
|
||||
errors.push(`shell_exec_error:${parsed.error}`);
|
||||
if (result.ok === false && result.error) {
|
||||
errors.push(`shell_exec_error:${normalizeError(result.error)}`);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1491,11 +1428,11 @@ function getRoundVerifySignature(roundResults: any[]): string | null {
|
||||
tr.toolName === "dev_server_start" ||
|
||||
tr.toolName === "dev.server.start"
|
||||
) {
|
||||
if (parsed.healthCheck && parsed.healthCheck.status >= 400) {
|
||||
errors.push(`dev_server_unhealthy:${parsed.healthCheck.status}`);
|
||||
if (result.healthCheck && result.healthCheck.status >= 400) {
|
||||
errors.push(`dev_server_unhealthy:${result.healthCheck.status}`);
|
||||
}
|
||||
if (parsed.ok === false && parsed.error) {
|
||||
errors.push(`dev_server_fail:${parsed.error}`);
|
||||
if (result.ok === false && result.error) {
|
||||
errors.push(`dev_server_fail:${normalizeError(result.error)}`);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1506,9 +1443,9 @@ function getRoundVerifySignature(roundResults: any[]): string | null {
|
||||
tr.toolName === "fs.edit" ||
|
||||
tr.toolName === "fs.write"
|
||||
) {
|
||||
if (parsed.ok === false || parsed.error) {
|
||||
if (result.ok === false || result.error) {
|
||||
errors.push(
|
||||
`file_op_failed:${tr.toolName}:${parsed.error || parsed.stderr || "error"}`,
|
||||
`file_op_failed:${tr.toolName}:${normalizeError(result.error || result.stderr || "error")}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1520,3 +1457,64 @@ function getRoundVerifySignature(roundResults: any[]): string | null {
|
||||
if (errors.length === 0) return null;
|
||||
return errors.sort().join(";;");
|
||||
}
|
||||
|
||||
function normalizeError(error: string): string {
|
||||
return error
|
||||
.replace(/preview-\d+-\w+-[0-9a-f]+/g, "preview-X")
|
||||
.replace(/localhost:\d+/g, "localhost:PORT")
|
||||
.replace(/\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?Z/g, "TIMESTAMP")
|
||||
.trim();
|
||||
}
|
||||
|
||||
function checkRoundProgress(
|
||||
roundResults: any[],
|
||||
lastHashes: Map<string, string>,
|
||||
verifySig: string | null,
|
||||
lastVerifySig: string | null,
|
||||
): { progressed: boolean; nextHashes: Map<string, string> } {
|
||||
let progressed = false;
|
||||
const nextHashes = new Map(lastHashes);
|
||||
|
||||
// A. Progress check: did the compile error signature change/improve?
|
||||
if (verifySig !== lastVerifySig) {
|
||||
progressed = true; // Error set changed/shifted = progress toward diagnosis!
|
||||
}
|
||||
|
||||
for (const tr of roundResults) {
|
||||
if (!tr.content) continue;
|
||||
try {
|
||||
const parsed = JSON.parse(tr.content);
|
||||
const result = parsed.result || parsed;
|
||||
|
||||
// B. Progress check: did a file edit/write result in a new/changed sha256?
|
||||
if (result.ok && result.sha256 && result.path) {
|
||||
const lastHash = lastHashes.get(result.path);
|
||||
if (lastHash !== result.sha256) {
|
||||
progressed = true;
|
||||
nextHashes.set(result.path, result.sha256);
|
||||
}
|
||||
}
|
||||
|
||||
// C. Progress check: did any mutating/deploying tool succeed?
|
||||
if (
|
||||
result.ok &&
|
||||
![
|
||||
"fs_read",
|
||||
"fs_list",
|
||||
"fs_tree",
|
||||
"fs_glob",
|
||||
"fs_grep",
|
||||
"dev_server_list",
|
||||
"browser_console",
|
||||
"browser.console",
|
||||
].includes(tr.toolName)
|
||||
) {
|
||||
progressed = true;
|
||||
}
|
||||
} catch (e) {
|
||||
// skip
|
||||
}
|
||||
}
|
||||
|
||||
return { progressed, nextHashes };
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user