Compare commits

...

6 Commits

4 changed files with 70 additions and 44 deletions

View File

@@ -1879,7 +1879,8 @@ export async function executeMcpTool(
if (toolName === "http_fetch") return executeHttpFetch(args);
// Convert underscore tool name → dotted MCP action (apps_create → apps.create)
const action = toolName.replace(/_/g, ".");
let action = toolName.replace(/_/g, ".");
if (toolName === "workspace_db_query") action = "workspace.db_query";
// Unpack JSON-string args (Gemini schemas can't represent free-form objects,
// so we accept *Json string fields and parse them server-side).

View File

@@ -14,7 +14,7 @@
* data: {"type":"done"}
* data: {"type":"error","error":"..."}
*/
import { NextResponse } from "next/server";
import { NextResponse, after } from "next/server";
import { requireWorkspacePrincipal } from "@/lib/auth/workspace-auth";
import { query, queryOne } from "@/lib/db-postgres";
import { callVibnChat, streamVibnChat } from "@/lib/ai/vibn-chat-model";
@@ -53,16 +53,15 @@ type TurnIntent =
const TOOL_BUDGETS: Record<TurnIntent, number> = {
conversational: 1, // Must be at least 1 so the LLM gets called for a text reply
// Investigative questions ("is the auth connected?", "what's the test user?")
// routinely need to read several files THEN synthesize an answer. Budgets of
// 5/8 were cutting these off at the cap before the model could answer
// (telemetry showed 100% round_cap on these turns). Raised so a read-only
// investigation can actually finish.
status_check: 16,
diagnose: 22,
small_fix: 18,
feature_build: 40,
deploy: 25,
// With the Verification Harness and Anti-Stall Governor now unconditionally enabled,
// we no longer need to rely on artificially tight tool budgets to prevent infinite loops.
// The system will intelligently halt if it detects a stall or unfixable error, so we can
// safely give the AI a massive runway to complete complex tasks.
status_check: 40,
diagnose: 60,
small_fix: 40,
feature_build: 80,
deploy: 40,
autonomous: 150,
};
@@ -628,6 +627,29 @@ function extractPreviewUrl(messages: ChatMessage[]): string | undefined {
return undefined;
}
function summarizeForUI(raw: string): string {
try {
const p = JSON.parse(raw);
if (p && typeof p === "object") {
const clone = { ...p };
// Strip massive payload fields so the UI gets intact JSON
if (clone.result && typeof clone.result === "object") {
if (clone.result.log) clone.result.log = "...";
if (clone.result.content) clone.result.content = "...";
if (clone.result.listing) clone.result.listing = "...";
}
if (typeof clone.stdout === "string" && clone.stdout.length > 200) {
clone.stdout = clone.stdout.slice(0, 200) + "...";
}
if (typeof clone.stderr === "string" && clone.stderr.length > 200) {
clone.stderr = clone.stderr.slice(0, 200) + "...";
}
return JSON.stringify(clone);
}
} catch {}
return raw.slice(0, 500);
}
export async function POST(request: Request) {
await ensureChatTables();
@@ -1109,7 +1131,7 @@ export async function POST(request: Request) {
emit({
type: "tool_result",
name: tc.name,
result: result.slice(0, 500),
result: summarizeForUI(result),
});
messages.push({
role: "tool",
@@ -1322,7 +1344,7 @@ export async function POST(request: Request) {
emit({
type: "tool_result",
name: tc.name,
result: result.slice(0, 500),
result: summarizeForUI(result),
});
messages.push({
@@ -1484,13 +1506,7 @@ export async function POST(request: Request) {
const mutated = assistantToolCalls.some((tc) =>
MUTATION_TOOLS.includes(tc.name),
);
if (
process.env.VIBN_VERIFICATION_ENABLED === "1" &&
!aborted &&
mutated &&
activeProject?.id &&
activeMcpToken
) {
if (!aborted && mutated && activeProject?.id && activeMcpToken) {
emit({ type: "phase", phase: "verify", label: "Verifying & fixing" });
const previewUrl = extractPreviewUrl(messages);
const verifyExec: ToolExecutor = async (name, args) =>
@@ -1874,7 +1890,7 @@ export async function POST(request: Request) {
// Wrapped in try/catch + .catch — the response stream is already
// closed and we don't want a summary failure to surface as an
// error to the user.
(async () => {
after(async () => {
try {
const allMessages = [...history, finalMsg];
// Only summarize if there's something worth summarizing.
@@ -1927,7 +1943,7 @@ export async function POST(request: Request) {
} catch {
// best-effort; silent failure
}
})().catch(() => {});
});
// Plan extraction is handled inline during tool calls or proactively.
emit({ type: "done" });

View File

@@ -284,11 +284,14 @@ function summarizeToolResult(result?: string): {
}
// Plain-text heuristics
// We explicitly ignore 'error' and 'exception' here because tools like dev_server_logs
// or browser_console legitimately return stack traces when working correctly.
// A raw string with 'error' inside it shouldn't auto-fail the tool execution pill.
const lower = raw.toLowerCase();
if (
/(econnrefused|enoent|error|failed|traceback|exception|not found|permission denied|cannot)/.test(
/(econnrefused|enoent|permission denied|command not found)/.test(
lower,
)
) && !raw.includes("dev_server_logs") && !raw.includes("browser_console")
) {
return { ok: false, label: `Failed — ${firstLine(raw)}` };
}
@@ -1548,12 +1551,15 @@ export function ChatPanel({
}
} catch {
// 2. If it's a raw string (like a bash crash), scan for fatal keywords
// We skip this check for log-reading tools since they legitimately contain errors.
const lower = ev.result.toLowerCase();
if (
lower.includes("error") ||
lower.includes("failed") ||
lower.includes("unexpected") ||
lower.includes("not found")
!ev.name?.includes("logs") &&
!ev.name?.includes("console") &&
(lower.includes("econnrefused") ||
lower.includes("enoent") ||
lower.includes("permission denied") ||
lower.includes("command not found"))
) {
isToolErr = true;
}

View File

@@ -6,12 +6,7 @@
* injected ToolExecutor, so they are fully unit-testable with mocked outputs.
*/
import type {
AcceptanceCheck,
CheckKind,
CheckResult,
ExecCtx,
} from "./types";
import type { AcceptanceCheck, CheckKind, CheckResult, ExecCtx } from "./types";
// ── helpers ────────────────────────────────────────────────────────────────
@@ -29,7 +24,11 @@ export function redact(s: string): string {
}
export function clip(s: string, n = 400): string {
const out = redact(String(s ?? "").replace(/\s+/g, " ").trim());
const out = redact(
String(s ?? "")
.replace(/\s+/g, " ")
.trim(),
);
return out.length > n ? out.slice(0, n) + "…" : out;
}
@@ -105,11 +104,7 @@ function str(spec: Record<string, unknown>, key: string, dflt = ""): string {
const v = spec[key];
return typeof v === "string" ? v : dflt;
}
function num(
spec: Record<string, unknown>,
key: string,
dflt: number,
): number {
function num(spec: Record<string, unknown>, key: string, dflt: number): number {
const v = spec[key];
return typeof v === "number" ? v : dflt;
}
@@ -137,7 +132,12 @@ const RUNNERS: Record<
(check: AcceptanceCheck, ctx: ExecCtx) => Promise<CheckResult>
> = {
build: (c, ctx) =>
runShellExit(c, ctx, str(c.spec, "command", "npm run build"), "build"),
runShellExit(
c,
ctx,
str(c.spec, "command", "npx next build --no-turbopack"),
"build",
),
typecheck: (c, ctx) =>
runShellExit(
@@ -156,7 +156,7 @@ const RUNNERS: Record<
server_up: async (c, ctx) => {
const raw = await ctx.exec("dev_server_start", {
projectId: ctx.projectId,
command: str(c.spec, "command", "npm run dev"),
command: str(c.spec, "command", "npx next dev -H 0.0.0.0 --no-turbopack"),
port: num(c.spec, "port", 3000),
});
const r = parseToolResult(raw);
@@ -183,7 +183,10 @@ const RUNNERS: Record<
const codeStr = (r.stdout || r.raw).trim().match(/\d{3}/)?.[0];
if (codeStr && Number(codeStr) === expected)
return ok(c, `${url}${codeStr}`);
return fail(c, `${url} returned ${codeStr ?? "no response"} (expected ${expected})`);
return fail(
c,
`${url} returned ${codeStr ?? "no response"} (expected ${expected})`,
);
},
console_clean: async (c, ctx) => {