Stop falsely labeling log-reading tools as failed when they read stack traces

This commit is contained in:
2026-06-15 14:15:17 -07:00
parent d67d8e2052
commit f41f309c3f
2 changed files with 38 additions and 8 deletions

View File

@@ -628,6 +628,30 @@ function extractPreviewUrl(messages: ChatMessage[]): string | undefined {
return undefined; return undefined;
} }
function summarizeForUI(raw: string): string {
try {
const p = JSON.parse(raw);
if (p && typeof p === "object") {
const clone = { ...p };
// Strip massive payload fields so the UI gets intact JSON
if (clone.result && typeof clone.result === 'object') {
if (clone.result.log) clone.result.log = "...";
if (clone.result.content) clone.result.content = "...";
if (clone.result.listing) clone.result.listing = "...";
}
if (typeof clone.stdout === 'string' && clone.stdout.length > 200) {
clone.stdout = clone.stdout.slice(0, 200) + "...";
}
if (typeof clone.stderr === 'string' && clone.stderr.length > 200) {
clone.stderr = clone.stderr.slice(0, 200) + "...";
}
return JSON.stringify(clone);
}
} catch {}
return raw.slice(0, 500);
}
export async function POST(request: Request) { export async function POST(request: Request) {
await ensureChatTables(); await ensureChatTables();
@@ -1109,7 +1133,7 @@ export async function POST(request: Request) {
emit({ emit({
type: "tool_result", type: "tool_result",
name: tc.name, name: tc.name,
result: result.slice(0, 500), result: summarizeForUI(result),
}); });
messages.push({ messages.push({
role: "tool", role: "tool",
@@ -1322,7 +1346,7 @@ export async function POST(request: Request) {
emit({ emit({
type: "tool_result", type: "tool_result",
name: tc.name, name: tc.name,
result: result.slice(0, 500), result: summarizeForUI(result),
}); });
messages.push({ messages.push({

View File

@@ -284,11 +284,14 @@ function summarizeToolResult(result?: string): {
} }
// Plain-text heuristics // Plain-text heuristics
// We explicitly ignore 'error' and 'exception' here because tools like dev_server_logs
// or browser_console legitimately return stack traces when working correctly.
// A raw string with 'error' inside it shouldn't auto-fail the tool execution pill.
const lower = raw.toLowerCase(); const lower = raw.toLowerCase();
if ( if (
/(econnrefused|enoent|error|failed|traceback|exception|not found|permission denied|cannot)/.test( /(econnrefused|enoent|permission denied|command not found)/.test(
lower, lower,
) ) && !raw.includes("dev_server_logs") && !raw.includes("browser_console")
) { ) {
return { ok: false, label: `Failed — ${firstLine(raw)}` }; return { ok: false, label: `Failed — ${firstLine(raw)}` };
} }
@@ -1548,12 +1551,15 @@ export function ChatPanel({
} }
} catch { } catch {
// 2. If it's a raw string (like a bash crash), scan for fatal keywords // 2. If it's a raw string (like a bash crash), scan for fatal keywords
// We skip this check for log-reading tools since they legitimately contain errors.
const lower = ev.result.toLowerCase(); const lower = ev.result.toLowerCase();
if ( if (
lower.includes("error") || !ev.name?.includes("logs") &&
lower.includes("failed") || !ev.name?.includes("console") &&
lower.includes("unexpected") || (lower.includes("econnrefused") ||
lower.includes("not found") lower.includes("enoent") ||
lower.includes("permission denied") ||
lower.includes("command not found"))
) { ) {
isToolErr = true; isToolErr = true;
} }