chore(telemetry): implement state-based loop governor, 180s tool timeout, visual-qa path fix, and fs_write diff-guard
This commit is contained in:
@@ -860,6 +860,10 @@ export async function POST(request: Request) {
|
||||
activeMcpToken !== undefined && // tools available
|
||||
isConversational(message.trim());
|
||||
|
||||
let lastVerifySig: string | null = null;
|
||||
let fileHashes = new Map<string, string>();
|
||||
let stallRounds = 0;
|
||||
|
||||
try {
|
||||
// Tool-calling loop: use non-streaming so thought_signature is
|
||||
// always present in the complete response (required by thinking models).
|
||||
@@ -989,7 +993,7 @@ export async function POST(request: Request) {
|
||||
for (const tc of resp.toolCalls) {
|
||||
if (aborted) break;
|
||||
// C-05: Per-tool timeout. A hung MCP call would freeze the whole turn.
|
||||
const TOOL_TIMEOUT_MS = 45_000;
|
||||
const TOOL_TIMEOUT_MS = 180_000;
|
||||
const toolTimeout = new Promise<string>((resolve) =>
|
||||
setTimeout(
|
||||
() =>
|
||||
@@ -1055,6 +1059,45 @@ export async function POST(request: Request) {
|
||||
messages.push({ role: "user", content: line });
|
||||
}
|
||||
|
||||
// --- STATE-BASED LOOP GOVERNOR (Part 2) ---
|
||||
const currentRoundResults = messages.filter(
|
||||
(m) =>
|
||||
m.role === "tool" &&
|
||||
resp.toolCalls.some((tc) => tc.id === m.toolCallId),
|
||||
);
|
||||
|
||||
// 1. Compute verify signature
|
||||
const verifySig = getRoundVerifySignature(currentRoundResults);
|
||||
|
||||
// 2. Check for stall/progress
|
||||
const { progressed, nextHashes } = checkRoundProgress(
|
||||
currentRoundResults,
|
||||
fileHashes,
|
||||
);
|
||||
fileHashes = nextHashes;
|
||||
|
||||
if (
|
||||
verifySig &&
|
||||
lastVerifySig &&
|
||||
verifySig === lastVerifySig &&
|
||||
!progressed
|
||||
) {
|
||||
loopBreakReason = `Blocked on persistent error: ${verifySig.split(";;")[0]}`;
|
||||
}
|
||||
|
||||
if (!progressed) {
|
||||
stallRounds++;
|
||||
} else {
|
||||
stallRounds = 0;
|
||||
}
|
||||
|
||||
if (stallRounds >= 2) {
|
||||
loopBreakReason =
|
||||
"Stalled (No state changes or progress made for 2 rounds)";
|
||||
}
|
||||
|
||||
lastVerifySig = verifySig;
|
||||
|
||||
if (loopBreakReason) break;
|
||||
}
|
||||
|
||||
@@ -1391,3 +1434,132 @@ export async function POST(request: Request) {
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
// ── State-Based Loop Governor Helpers ─────────────────────────────────
|
||||
|
||||
function getRoundVerifySignature(roundResults: any[]): string | null {
|
||||
const errors: string[] = [];
|
||||
|
||||
for (const tr of roundResults) {
|
||||
if (!tr.content) continue;
|
||||
try {
|
||||
const parsed = JSON.parse(tr.content);
|
||||
const result = parsed.result || parsed;
|
||||
|
||||
// 1. Check browser_console errors
|
||||
if (
|
||||
tr.toolName === "browser_console" ||
|
||||
tr.toolName === "browser.console"
|
||||
) {
|
||||
if (
|
||||
parsed.errors &&
|
||||
Array.isArray(parsed.errors) &&
|
||||
parsed.errors.length > 0
|
||||
) {
|
||||
// Normalize: replace preview subdomain hashes and port numbers to keep signature stable
|
||||
const cleanErrors = parsed.errors.map((e: string) =>
|
||||
e
|
||||
.replace(/preview-\d+-\w+-\d+/g, "preview-X-url")
|
||||
.replace(/localhost:\d+/g, "localhost:PORT")
|
||||
.replace(/\d+/g, "N"),
|
||||
);
|
||||
errors.push(`browser_console_errors:${cleanErrors.join("|")}`);
|
||||
}
|
||||
if (parsed.ok === false && parsed.error) {
|
||||
errors.push(`browser_console_fail:${parsed.error}`);
|
||||
}
|
||||
}
|
||||
|
||||
// 2. Check shell_exec failures
|
||||
if (tr.toolName === "shell_exec") {
|
||||
if (parsed.code !== 0 && parsed.code !== undefined) {
|
||||
const stderrLine = (parsed.stderr || parsed.stdout || "error")
|
||||
.split("\n")[0]
|
||||
.trim()
|
||||
.substring(0, 100);
|
||||
errors.push(`shell_exec_fail:${parsed.code}:${stderrLine}`);
|
||||
}
|
||||
if (parsed.ok === false && parsed.error) {
|
||||
errors.push(`shell_exec_error:${parsed.error}`);
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Check dev_server_start failures
|
||||
if (
|
||||
tr.toolName === "dev_server_start" ||
|
||||
tr.toolName === "dev.server.start"
|
||||
) {
|
||||
if (parsed.healthCheck && parsed.healthCheck.status >= 400) {
|
||||
errors.push(`dev_server_unhealthy:${parsed.healthCheck.status}`);
|
||||
}
|
||||
if (parsed.ok === false && parsed.error) {
|
||||
errors.push(`dev_server_fail:${parsed.error}`);
|
||||
}
|
||||
}
|
||||
|
||||
// 4. Check fs_edit / fs_write failures
|
||||
if (
|
||||
tr.toolName === "fs_edit" ||
|
||||
tr.toolName === "fs_write" ||
|
||||
tr.toolName === "fs.edit" ||
|
||||
tr.toolName === "fs.write"
|
||||
) {
|
||||
if (parsed.ok === false || parsed.error) {
|
||||
errors.push(
|
||||
`file_op_failed:${tr.toolName}:${parsed.error || parsed.stderr || "error"}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
// skip
|
||||
}
|
||||
}
|
||||
|
||||
if (errors.length === 0) return null;
|
||||
return errors.sort().join(";;");
|
||||
}
|
||||
|
||||
function checkRoundProgress(
|
||||
roundResults: any[],
|
||||
lastHashes: Map<string, string>,
|
||||
): { progressed: boolean; nextHashes: Map<string, string> } {
|
||||
let progressed = false;
|
||||
const nextHashes = new Map(lastHashes);
|
||||
|
||||
for (const tr of roundResults) {
|
||||
if (!tr.content) continue;
|
||||
try {
|
||||
const parsed = JSON.parse(tr.content);
|
||||
const result = parsed.result || parsed;
|
||||
|
||||
// If a file write or edit succeeded, check if the sha256 is new or changed
|
||||
if (result.ok && result.sha256 && result.path) {
|
||||
const lastHash = lastHashes.get(result.path);
|
||||
if (lastHash !== result.sha256) {
|
||||
progressed = true;
|
||||
nextHashes.set(result.path, result.sha256);
|
||||
}
|
||||
}
|
||||
|
||||
// If any other action completed with ok: true (excluding read-only lookup tools)
|
||||
if (
|
||||
result.ok &&
|
||||
![
|
||||
"fs_read",
|
||||
"fs_list",
|
||||
"fs_tree",
|
||||
"fs_glob",
|
||||
"fs_grep",
|
||||
"dev_server_list",
|
||||
"browser_console",
|
||||
].includes(tr.toolName)
|
||||
) {
|
||||
progressed = true;
|
||||
}
|
||||
} catch (e) {
|
||||
// skip
|
||||
}
|
||||
}
|
||||
|
||||
return { progressed, nextHashes };
|
||||
}
|
||||
|
||||
@@ -4657,7 +4657,7 @@ async function toolRequestVisualQA(
|
||||
);
|
||||
}
|
||||
|
||||
const absPath = normalizeFsPath(targetPath);
|
||||
const absPath = normalizeFsPath(targetPath, project.slug);
|
||||
if (absPath instanceof NextResponse) return absPath;
|
||||
|
||||
const r = await runFsCmd(
|
||||
@@ -4969,20 +4969,50 @@ async function toolFsWrite(principal: Principal, params: Record<string, any>) {
|
||||
const path = normalizeFsPath(String(params.path ?? ""), project.slug);
|
||||
if (path instanceof NextResponse) return path;
|
||||
const content = typeof params.content === "string" ? params.content : "";
|
||||
const force = Boolean(params.force);
|
||||
|
||||
// Stream content via base64 to avoid shell-quoting headaches with
|
||||
// arbitrary binary / multibyte input.
|
||||
const b64 = Buffer.from(content, "utf8").toString("base64");
|
||||
const cmd =
|
||||
`mkdir -p ${shq(path.replace(/\/[^/]+$/, "") || FS_ROOT)} && ` +
|
||||
`printf %s ${shq(b64)} | base64 -d > ${shq(path)}`;
|
||||
|
||||
const py = `import sys, os, difflib, base64
|
||||
path = sys.argv[1]
|
||||
new_b64 = sys.argv[2]
|
||||
force_overwrite = sys.argv[3] == 'true'
|
||||
|
||||
new_content = base64.b64decode(new_b64).decode('utf-8')
|
||||
|
||||
if os.path.exists(path) and not force_overwrite:
|
||||
try:
|
||||
with open(path, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
old_content = f.read()
|
||||
old_lines = old_content.splitlines()
|
||||
new_lines = new_content.splitlines()
|
||||
if len(old_lines) > 5:
|
||||
diff = list(difflib.unified_diff(old_lines, new_lines))
|
||||
add_rem = len([l for l in diff if l.startswith('+') or l.startswith('-')]) - 2
|
||||
change_pct = add_rem / max(1, len(old_lines))
|
||||
if change_pct > 0.60:
|
||||
sys.stderr.write(f"REWRITE_GUARD_TRIGGERED: Your fs_write would overwrite {int(change_pct*100)}% of this {len(old_lines)}-line file. To replace large blocks or the entire file, please use surgical 'fs_edit' anchors instead, or pass 'force: true' on fs_write if you genuinely need a full rewrite.\\n")
|
||||
sys.exit(4)
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
os.makedirs(os.path.dirname(path), exist_ok=True)
|
||||
with open(path, 'w', encoding='utf-8') as f:
|
||||
f.write(new_content)
|
||||
`;
|
||||
|
||||
const pyB64 = Buffer.from(py, "utf8").toString("base64");
|
||||
const cmd = `python3 -c "$(printf %s ${shq(pyB64)} | base64 -d)" ${shq(path)} ${shq(b64)} ${shq(String(force))} && sha256sum ${shq(path)} | cut -d' ' -f1 && wc -c < ${shq(path)}`;
|
||||
|
||||
const r = await runFsCmd(principal, project, cmd);
|
||||
if (r.code !== 0) {
|
||||
const status = r.code === 4 ? 409 : 500;
|
||||
return NextResponse.json(
|
||||
{ error: `fs.write failed: ${r.stderr.trim() || "unknown error"}` },
|
||||
{ status: 500 },
|
||||
{ status },
|
||||
);
|
||||
}
|
||||
const stdoutParts = r.stdout.split("\n").filter(Boolean);
|
||||
const { createHash } = require("crypto");
|
||||
const bytes = Buffer.byteLength(content, "utf8");
|
||||
const sha256 = createHash("sha256").update(content, "utf8").digest("hex");
|
||||
|
||||
Reference in New Issue
Block a user