chore(telemetry): implement state-based loop governor, 180s tool timeout, visual-qa path fix, and fs_write diff-guard

This commit is contained in:
2026-06-09 12:05:15 -07:00
parent c442921ccb
commit 7b6cac5462
2 changed files with 210 additions and 8 deletions

View File

@@ -860,6 +860,10 @@ export async function POST(request: Request) {
activeMcpToken !== undefined && // tools available
isConversational(message.trim());
let lastVerifySig: string | null = null;
let fileHashes = new Map<string, string>();
let stallRounds = 0;
try {
// Tool-calling loop: use non-streaming so thought_signature is
// always present in the complete response (required by thinking models).
@@ -989,7 +993,7 @@ export async function POST(request: Request) {
for (const tc of resp.toolCalls) {
if (aborted) break;
// C-05: Per-tool timeout. A hung MCP call would freeze the whole turn.
const TOOL_TIMEOUT_MS = 45_000;
const TOOL_TIMEOUT_MS = 180_000;
const toolTimeout = new Promise<string>((resolve) =>
setTimeout(
() =>
@@ -1055,6 +1059,45 @@ export async function POST(request: Request) {
messages.push({ role: "user", content: line });
}
// --- STATE-BASED LOOP GOVERNOR (Part 2) ---
const currentRoundResults = messages.filter(
(m) =>
m.role === "tool" &&
resp.toolCalls.some((tc) => tc.id === m.toolCallId),
);
// 1. Compute verify signature
const verifySig = getRoundVerifySignature(currentRoundResults);
// 2. Check for stall/progress
const { progressed, nextHashes } = checkRoundProgress(
currentRoundResults,
fileHashes,
);
fileHashes = nextHashes;
if (
verifySig &&
lastVerifySig &&
verifySig === lastVerifySig &&
!progressed
) {
loopBreakReason = `Blocked on persistent error: ${verifySig.split(";;")[0]}`;
}
if (!progressed) {
stallRounds++;
} else {
stallRounds = 0;
}
if (stallRounds >= 2) {
loopBreakReason =
"Stalled (No state changes or progress made for 2 rounds)";
}
lastVerifySig = verifySig;
if (loopBreakReason) break;
}
@@ -1391,3 +1434,132 @@ export async function POST(request: Request) {
},
});
}
// ── State-Based Loop Governor Helpers ─────────────────────────────────
function getRoundVerifySignature(roundResults: any[]): string | null {
const errors: string[] = [];
for (const tr of roundResults) {
if (!tr.content) continue;
try {
const parsed = JSON.parse(tr.content);
const result = parsed.result || parsed;
// 1. Check browser_console errors
if (
tr.toolName === "browser_console" ||
tr.toolName === "browser.console"
) {
if (
parsed.errors &&
Array.isArray(parsed.errors) &&
parsed.errors.length > 0
) {
// Normalize: replace preview subdomain hashes and port numbers to keep signature stable
const cleanErrors = parsed.errors.map((e: string) =>
e
.replace(/preview-\d+-\w+-\d+/g, "preview-X-url")
.replace(/localhost:\d+/g, "localhost:PORT")
.replace(/\d+/g, "N"),
);
errors.push(`browser_console_errors:${cleanErrors.join("|")}`);
}
if (parsed.ok === false && parsed.error) {
errors.push(`browser_console_fail:${parsed.error}`);
}
}
// 2. Check shell_exec failures
if (tr.toolName === "shell_exec") {
if (parsed.code !== 0 && parsed.code !== undefined) {
const stderrLine = (parsed.stderr || parsed.stdout || "error")
.split("\n")[0]
.trim()
.substring(0, 100);
errors.push(`shell_exec_fail:${parsed.code}:${stderrLine}`);
}
if (parsed.ok === false && parsed.error) {
errors.push(`shell_exec_error:${parsed.error}`);
}
}
// 3. Check dev_server_start failures
if (
tr.toolName === "dev_server_start" ||
tr.toolName === "dev.server.start"
) {
if (parsed.healthCheck && parsed.healthCheck.status >= 400) {
errors.push(`dev_server_unhealthy:${parsed.healthCheck.status}`);
}
if (parsed.ok === false && parsed.error) {
errors.push(`dev_server_fail:${parsed.error}`);
}
}
// 4. Check fs_edit / fs_write failures
if (
tr.toolName === "fs_edit" ||
tr.toolName === "fs_write" ||
tr.toolName === "fs.edit" ||
tr.toolName === "fs.write"
) {
if (parsed.ok === false || parsed.error) {
errors.push(
`file_op_failed:${tr.toolName}:${parsed.error || parsed.stderr || "error"}`,
);
}
}
} catch (e) {
// skip
}
}
if (errors.length === 0) return null;
return errors.sort().join(";;");
}
function checkRoundProgress(
roundResults: any[],
lastHashes: Map<string, string>,
): { progressed: boolean; nextHashes: Map<string, string> } {
let progressed = false;
const nextHashes = new Map(lastHashes);
for (const tr of roundResults) {
if (!tr.content) continue;
try {
const parsed = JSON.parse(tr.content);
const result = parsed.result || parsed;
// If a file write or edit succeeded, check if the sha256 is new or changed
if (result.ok && result.sha256 && result.path) {
const lastHash = lastHashes.get(result.path);
if (lastHash !== result.sha256) {
progressed = true;
nextHashes.set(result.path, result.sha256);
}
}
// If any other action completed with ok: true (excluding read-only lookup tools)
if (
result.ok &&
![
"fs_read",
"fs_list",
"fs_tree",
"fs_glob",
"fs_grep",
"dev_server_list",
"browser_console",
].includes(tr.toolName)
) {
progressed = true;
}
} catch (e) {
// skip
}
}
return { progressed, nextHashes };
}

View File

@@ -4657,7 +4657,7 @@ async function toolRequestVisualQA(
);
}
const absPath = normalizeFsPath(targetPath);
const absPath = normalizeFsPath(targetPath, project.slug);
if (absPath instanceof NextResponse) return absPath;
const r = await runFsCmd(
@@ -4969,20 +4969,50 @@ async function toolFsWrite(principal: Principal, params: Record<string, any>) {
const path = normalizeFsPath(String(params.path ?? ""), project.slug);
if (path instanceof NextResponse) return path;
const content = typeof params.content === "string" ? params.content : "";
const force = Boolean(params.force);
// Stream content via base64 to avoid shell-quoting headaches with
// arbitrary binary / multibyte input.
const b64 = Buffer.from(content, "utf8").toString("base64");
const cmd =
`mkdir -p ${shq(path.replace(/\/[^/]+$/, "") || FS_ROOT)} && ` +
`printf %s ${shq(b64)} | base64 -d > ${shq(path)}`;
const py = `import sys, os, difflib, base64
path = sys.argv[1]
new_b64 = sys.argv[2]
force_overwrite = sys.argv[3] == 'true'
new_content = base64.b64decode(new_b64).decode('utf-8')
if os.path.exists(path) and not force_overwrite:
try:
with open(path, 'r', encoding='utf-8', errors='ignore') as f:
old_content = f.read()
old_lines = old_content.splitlines()
new_lines = new_content.splitlines()
if len(old_lines) > 5:
diff = list(difflib.unified_diff(old_lines, new_lines))
add_rem = len([l for l in diff if l.startswith('+') or l.startswith('-')]) - 2
change_pct = add_rem / max(1, len(old_lines))
if change_pct > 0.60:
sys.stderr.write(f"REWRITE_GUARD_TRIGGERED: Your fs_write would overwrite {int(change_pct*100)}% of this {len(old_lines)}-line file. To replace large blocks or the entire file, please use surgical 'fs_edit' anchors instead, or pass 'force: true' on fs_write if you genuinely need a full rewrite.\\n")
sys.exit(4)
except Exception as e:
pass
os.makedirs(os.path.dirname(path), exist_ok=True)
with open(path, 'w', encoding='utf-8') as f:
f.write(new_content)
`;
const pyB64 = Buffer.from(py, "utf8").toString("base64");
const cmd = `python3 -c "$(printf %s ${shq(pyB64)} | base64 -d)" ${shq(path)} ${shq(b64)} ${shq(String(force))} && sha256sum ${shq(path)} | cut -d' ' -f1 && wc -c < ${shq(path)}`;
const r = await runFsCmd(principal, project, cmd);
if (r.code !== 0) {
const status = r.code === 4 ? 409 : 500;
return NextResponse.json(
{ error: `fs.write failed: ${r.stderr.trim() || "unknown error"}` },
{ status: 500 },
{ status },
);
}
const stdoutParts = r.stdout.split("\n").filter(Boolean);
const { createHash } = require("crypto");
const bytes = Buffer.byteLength(content, "utf8");
const sha256 = createHash("sha256").update(content, "utf8").digest("hex");