Disable Turbopack in Verification Harness auto-build checks

Significantly increase AI tool budgets now that the Verification Harness prevents runaway loops
Enable verification harness unconditionally so AI auto-checks builds and console errors
2026-06-15 17:25:40 -07:00 · 2026-06-15 17:19:37 -07:00 · 2026-06-15 17:02:26 -07:00 · 2026-06-15 16:48:40 -07:00 · 2026-06-15 16:32:35 -07:00 · 2026-06-15 13:55:21 -07:00
4 changed files with 70 additions and 44 deletions
--- a/vibn-agent-runner/src/tools/vibn-tools.ts
+++ b/vibn-agent-runner/src/tools/vibn-tools.ts
@@ -1879,7 +1879,8 @@ export async function executeMcpTool(
  if (toolName === "http_fetch") return executeHttpFetch(args);

  // Convert underscore tool name → dotted MCP action (apps_create → apps.create)
-  const action = toolName.replace(/_/g, ".");
+  let action = toolName.replace(/_/g, ".");
+  if (toolName === "workspace_db_query") action = "workspace.db_query";

  // Unpack JSON-string args (Gemini schemas can't represent free-form objects,
  // so we accept *Json string fields and parse them server-side).
--- a/vibn-frontend/app/api/chat/route.ts
+++ b/vibn-frontend/app/api/chat/route.ts
@@ -14,7 +14,7 @@
 *   data: {"type":"done"}
 *   data: {"type":"error","error":"..."}
 */
-import { NextResponse } from "next/server";
+import { NextResponse, after } from "next/server";
 import { requireWorkspacePrincipal } from "@/lib/auth/workspace-auth";
 import { query, queryOne } from "@/lib/db-postgres";
 import { callVibnChat, streamVibnChat } from "@/lib/ai/vibn-chat-model";
@@ -53,16 +53,15 @@ type TurnIntent =

 const TOOL_BUDGETS: Record<TurnIntent, number> = {
  conversational: 1, // Must be at least 1 so the LLM gets called for a text reply
-  // Investigative questions ("is the auth connected?", "what's the test user?")
-  // routinely need to read several files THEN synthesize an answer. Budgets of
-  // 5/8 were cutting these off at the cap before the model could answer
-  // (telemetry showed 100% round_cap on these turns). Raised so a read-only
-  // investigation can actually finish.
-  status_check: 16,
-  diagnose: 22,
-  small_fix: 18,
-  feature_build: 40,
-  deploy: 25,
+  // With the Verification Harness and Anti-Stall Governor now unconditionally enabled,
+  // we no longer need to rely on artificially tight tool budgets to prevent infinite loops.
+  // The system will intelligently halt if it detects a stall or unfixable error, so we can
+  // safely give the AI a massive runway to complete complex tasks.
+  status_check: 40,
+  diagnose: 60,
+  small_fix: 40,
+  feature_build: 80,
+  deploy: 40,
  autonomous: 150,
 };

@@ -628,6 +627,29 @@ function extractPreviewUrl(messages: ChatMessage[]): string | undefined {
  return undefined;
 }

+function summarizeForUI(raw: string): string {
+  try {
+    const p = JSON.parse(raw);
+    if (p && typeof p === "object") {
+      const clone = { ...p };
+      // Strip massive payload fields so the UI gets intact JSON
+      if (clone.result && typeof clone.result === "object") {
+        if (clone.result.log) clone.result.log = "...";
+        if (clone.result.content) clone.result.content = "...";
+        if (clone.result.listing) clone.result.listing = "...";
+      }
+      if (typeof clone.stdout === "string" && clone.stdout.length > 200) {
+        clone.stdout = clone.stdout.slice(0, 200) + "...";
+      }
+      if (typeof clone.stderr === "string" && clone.stderr.length > 200) {
+        clone.stderr = clone.stderr.slice(0, 200) + "...";
+      }
+      return JSON.stringify(clone);
+    }
+  } catch {}
+  return raw.slice(0, 500);
+}
+
 export async function POST(request: Request) {
  await ensureChatTables();

@@ -1109,7 +1131,7 @@ export async function POST(request: Request) {
            emit({
              type: "tool_result",
              name: tc.name,
-              result: result.slice(0, 500),
+              result: summarizeForUI(result),
            });
            messages.push({
              role: "tool",
@@ -1322,7 +1344,7 @@ export async function POST(request: Request) {
            emit({
              type: "tool_result",
              name: tc.name,
-              result: result.slice(0, 500),
+              result: summarizeForUI(result),
            });

            messages.push({
@@ -1484,13 +1506,7 @@ export async function POST(request: Request) {
        const mutated = assistantToolCalls.some((tc) =>
          MUTATION_TOOLS.includes(tc.name),
        );
-        if (
-          process.env.VIBN_VERIFICATION_ENABLED === "1" &&
-          !aborted &&
-          mutated &&
-          activeProject?.id &&
-          activeMcpToken
-        ) {
+        if (!aborted && mutated && activeProject?.id && activeMcpToken) {
          emit({ type: "phase", phase: "verify", label: "Verifying & fixing" });
          const previewUrl = extractPreviewUrl(messages);
          const verifyExec: ToolExecutor = async (name, args) =>
@@ -1874,7 +1890,7 @@ export async function POST(request: Request) {
        // Wrapped in try/catch + .catch — the response stream is already
        // closed and we don't want a summary failure to surface as an
        // error to the user.
-        (async () => {
+        after(async () => {
          try {
            const allMessages = [...history, finalMsg];
            // Only summarize if there's something worth summarizing.
@@ -1927,7 +1943,7 @@ export async function POST(request: Request) {
          } catch {
            // best-effort; silent failure
          }
-        })().catch(() => {});
+        });

        // Plan extraction is handled inline during tool calls or proactively.
        emit({ type: "done" });
--- a/vibn-frontend/components/vibn-chat/chat-panel.tsx
+++ b/vibn-frontend/components/vibn-chat/chat-panel.tsx
@@ -284,11 +284,14 @@ function summarizeToolResult(result?: string): {
  }

  // Plain-text heuristics
+  // We explicitly ignore 'error' and 'exception' here because tools like dev_server_logs
+  // or browser_console legitimately return stack traces when working correctly.
+  // A raw string with 'error' inside it shouldn't auto-fail the tool execution pill.
  const lower = raw.toLowerCase();
  if (
-    /(econnrefused|enoent|error|failed|traceback|exception|not found|permission denied|cannot)/.test(
+    /(econnrefused|enoent|permission denied|command not found)/.test(
      lower,
-    )
+    ) && !raw.includes("dev_server_logs") && !raw.includes("browser_console")
  ) {
    return { ok: false, label: `Failed — ${firstLine(raw)}` };
  }
@@ -1548,12 +1551,15 @@ export function ChatPanel({
                      }
                    } catch {
                      // 2. If it's a raw string (like a bash crash), scan for fatal keywords
+                      // We skip this check for log-reading tools since they legitimately contain errors.
                      const lower = ev.result.toLowerCase();
                      if (
-                        lower.includes("error") ||
-                        lower.includes("failed") ||
-                        lower.includes("unexpected") ||
-                        lower.includes("not found")
+                        !ev.name?.includes("logs") &&
+                        !ev.name?.includes("console") &&
+                        (lower.includes("econnrefused") ||
+                         lower.includes("enoent") ||
+                         lower.includes("permission denied") ||
+                         lower.includes("command not found"))
                      ) {
                        isToolErr = true;
                      }
--- a/vibn-frontend/lib/ai/verification/runners.ts
+++ b/vibn-frontend/lib/ai/verification/runners.ts
@@ -6,12 +6,7 @@
 * injected ToolExecutor, so they are fully unit-testable with mocked outputs.
 */

-import type {
-  AcceptanceCheck,
-  CheckKind,
-  CheckResult,
-  ExecCtx,
-} from "./types";
+import type { AcceptanceCheck, CheckKind, CheckResult, ExecCtx } from "./types";

 // ── helpers ────────────────────────────────────────────────────────────────

@@ -29,7 +24,11 @@ export function redact(s: string): string {
 }

 export function clip(s: string, n = 400): string {
-  const out = redact(String(s ?? "").replace(/\s+/g, " ").trim());
+  const out = redact(
+    String(s ?? "")
+      .replace(/\s+/g, " ")
+      .trim(),
+  );
  return out.length > n ? out.slice(0, n) + "…" : out;
 }

@@ -105,11 +104,7 @@ function str(spec: Record<string, unknown>, key: string, dflt = ""): string {
  const v = spec[key];
  return typeof v === "string" ? v : dflt;
 }
-function num(
-  spec: Record<string, unknown>,
-  key: string,
-  dflt: number,
-): number {
+function num(spec: Record<string, unknown>, key: string, dflt: number): number {
  const v = spec[key];
  return typeof v === "number" ? v : dflt;
 }
@@ -137,7 +132,12 @@ const RUNNERS: Record<
  (check: AcceptanceCheck, ctx: ExecCtx) => Promise<CheckResult>
 > = {
  build: (c, ctx) =>
-    runShellExit(c, ctx, str(c.spec, "command", "npm run build"), "build"),
+    runShellExit(
+      c,
+      ctx,
+      str(c.spec, "command", "npx next build --no-turbopack"),
+      "build",
+    ),

  typecheck: (c, ctx) =>
    runShellExit(
@@ -156,7 +156,7 @@ const RUNNERS: Record<
  server_up: async (c, ctx) => {
    const raw = await ctx.exec("dev_server_start", {
      projectId: ctx.projectId,
-      command: str(c.spec, "command", "npm run dev"),
+      command: str(c.spec, "command", "npx next dev -H 0.0.0.0 --no-turbopack"),
      port: num(c.spec, "port", 3000),
    });
    const r = parseToolResult(raw);
@@ -183,7 +183,10 @@ const RUNNERS: Record<
    const codeStr = (r.stdout || r.raw).trim().match(/\d{3}/)?.[0];
    if (codeStr && Number(codeStr) === expected)
      return ok(c, `${url} → ${codeStr}`);
-    return fail(c, `${url} returned ${codeStr ?? "no response"} (expected ${expected})`);
+    return fail(
+      c,
+      `${url} returned ${codeStr ?? "no response"} (expected ${expected})`,
+    );
  },

  console_clean: async (c, ctx) => {
Author	SHA1	Message	Date
mawkone	273247e98d	Disable Turbopack in Verification Harness auto-build checks	2026-06-15 17:25:40 -07:00
mawkone	e88a566609	Significantly increase AI tool budgets now that the Verification Harness prevents runaway loops	2026-06-15 17:19:37 -07:00
mawkone	0f90a21302	Enable verification harness unconditionally so AI auto-checks builds and console errors	2026-06-15 17:02:26 -07:00
mawkone	74f81f23d0	Fix UI stop button delay by using Next.js after() for background chat summaries	2026-06-15 16:48:40 -07:00
mawkone	7aa3056f59	Stop falsely labeling log-reading tools as failed when they read stack traces	2026-06-15 16:32:35 -07:00
mawkone	d67d8e2052	Fix MCP action routing for workspace_db_query	2026-06-15 13:55:21 -07:00