Compare commits
6 Commits
9b19befa0a
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 273247e98d | |||
| e88a566609 | |||
| 0f90a21302 | |||
| 74f81f23d0 | |||
| 7aa3056f59 | |||
| d67d8e2052 |
@@ -1879,7 +1879,8 @@ export async function executeMcpTool(
|
||||
if (toolName === "http_fetch") return executeHttpFetch(args);
|
||||
|
||||
// Convert underscore tool name → dotted MCP action (apps_create → apps.create)
|
||||
const action = toolName.replace(/_/g, ".");
|
||||
let action = toolName.replace(/_/g, ".");
|
||||
if (toolName === "workspace_db_query") action = "workspace.db_query";
|
||||
|
||||
// Unpack JSON-string args (Gemini schemas can't represent free-form objects,
|
||||
// so we accept *Json string fields and parse them server-side).
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
* data: {"type":"done"}
|
||||
* data: {"type":"error","error":"..."}
|
||||
*/
|
||||
import { NextResponse } from "next/server";
|
||||
import { NextResponse, after } from "next/server";
|
||||
import { requireWorkspacePrincipal } from "@/lib/auth/workspace-auth";
|
||||
import { query, queryOne } from "@/lib/db-postgres";
|
||||
import { callVibnChat, streamVibnChat } from "@/lib/ai/vibn-chat-model";
|
||||
@@ -53,16 +53,15 @@ type TurnIntent =
|
||||
|
||||
const TOOL_BUDGETS: Record<TurnIntent, number> = {
|
||||
conversational: 1, // Must be at least 1 so the LLM gets called for a text reply
|
||||
// Investigative questions ("is the auth connected?", "what's the test user?")
|
||||
// routinely need to read several files THEN synthesize an answer. Budgets of
|
||||
// 5/8 were cutting these off at the cap before the model could answer
|
||||
// (telemetry showed 100% round_cap on these turns). Raised so a read-only
|
||||
// investigation can actually finish.
|
||||
status_check: 16,
|
||||
diagnose: 22,
|
||||
small_fix: 18,
|
||||
feature_build: 40,
|
||||
deploy: 25,
|
||||
// With the Verification Harness and Anti-Stall Governor now unconditionally enabled,
|
||||
// we no longer need to rely on artificially tight tool budgets to prevent infinite loops.
|
||||
// The system will intelligently halt if it detects a stall or unfixable error, so we can
|
||||
// safely give the AI a massive runway to complete complex tasks.
|
||||
status_check: 40,
|
||||
diagnose: 60,
|
||||
small_fix: 40,
|
||||
feature_build: 80,
|
||||
deploy: 40,
|
||||
autonomous: 150,
|
||||
};
|
||||
|
||||
@@ -628,6 +627,29 @@ function extractPreviewUrl(messages: ChatMessage[]): string | undefined {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function summarizeForUI(raw: string): string {
|
||||
try {
|
||||
const p = JSON.parse(raw);
|
||||
if (p && typeof p === "object") {
|
||||
const clone = { ...p };
|
||||
// Strip massive payload fields so the UI gets intact JSON
|
||||
if (clone.result && typeof clone.result === "object") {
|
||||
if (clone.result.log) clone.result.log = "...";
|
||||
if (clone.result.content) clone.result.content = "...";
|
||||
if (clone.result.listing) clone.result.listing = "...";
|
||||
}
|
||||
if (typeof clone.stdout === "string" && clone.stdout.length > 200) {
|
||||
clone.stdout = clone.stdout.slice(0, 200) + "...";
|
||||
}
|
||||
if (typeof clone.stderr === "string" && clone.stderr.length > 200) {
|
||||
clone.stderr = clone.stderr.slice(0, 200) + "...";
|
||||
}
|
||||
return JSON.stringify(clone);
|
||||
}
|
||||
} catch {}
|
||||
return raw.slice(0, 500);
|
||||
}
|
||||
|
||||
export async function POST(request: Request) {
|
||||
await ensureChatTables();
|
||||
|
||||
@@ -1109,7 +1131,7 @@ export async function POST(request: Request) {
|
||||
emit({
|
||||
type: "tool_result",
|
||||
name: tc.name,
|
||||
result: result.slice(0, 500),
|
||||
result: summarizeForUI(result),
|
||||
});
|
||||
messages.push({
|
||||
role: "tool",
|
||||
@@ -1322,7 +1344,7 @@ export async function POST(request: Request) {
|
||||
emit({
|
||||
type: "tool_result",
|
||||
name: tc.name,
|
||||
result: result.slice(0, 500),
|
||||
result: summarizeForUI(result),
|
||||
});
|
||||
|
||||
messages.push({
|
||||
@@ -1484,13 +1506,7 @@ export async function POST(request: Request) {
|
||||
const mutated = assistantToolCalls.some((tc) =>
|
||||
MUTATION_TOOLS.includes(tc.name),
|
||||
);
|
||||
if (
|
||||
process.env.VIBN_VERIFICATION_ENABLED === "1" &&
|
||||
!aborted &&
|
||||
mutated &&
|
||||
activeProject?.id &&
|
||||
activeMcpToken
|
||||
) {
|
||||
if (!aborted && mutated && activeProject?.id && activeMcpToken) {
|
||||
emit({ type: "phase", phase: "verify", label: "Verifying & fixing" });
|
||||
const previewUrl = extractPreviewUrl(messages);
|
||||
const verifyExec: ToolExecutor = async (name, args) =>
|
||||
@@ -1874,7 +1890,7 @@ export async function POST(request: Request) {
|
||||
// Wrapped in try/catch + .catch — the response stream is already
|
||||
// closed and we don't want a summary failure to surface as an
|
||||
// error to the user.
|
||||
(async () => {
|
||||
after(async () => {
|
||||
try {
|
||||
const allMessages = [...history, finalMsg];
|
||||
// Only summarize if there's something worth summarizing.
|
||||
@@ -1927,7 +1943,7 @@ export async function POST(request: Request) {
|
||||
} catch {
|
||||
// best-effort; silent failure
|
||||
}
|
||||
})().catch(() => {});
|
||||
});
|
||||
|
||||
// Plan extraction is handled inline during tool calls or proactively.
|
||||
emit({ type: "done" });
|
||||
|
||||
@@ -284,11 +284,14 @@ function summarizeToolResult(result?: string): {
|
||||
}
|
||||
|
||||
// Plain-text heuristics
|
||||
// We explicitly ignore 'error' and 'exception' here because tools like dev_server_logs
|
||||
// or browser_console legitimately return stack traces when working correctly.
|
||||
// A raw string with 'error' inside it shouldn't auto-fail the tool execution pill.
|
||||
const lower = raw.toLowerCase();
|
||||
if (
|
||||
/(econnrefused|enoent|error|failed|traceback|exception|not found|permission denied|cannot)/.test(
|
||||
/(econnrefused|enoent|permission denied|command not found)/.test(
|
||||
lower,
|
||||
)
|
||||
) && !raw.includes("dev_server_logs") && !raw.includes("browser_console")
|
||||
) {
|
||||
return { ok: false, label: `Failed — ${firstLine(raw)}` };
|
||||
}
|
||||
@@ -1548,12 +1551,15 @@ export function ChatPanel({
|
||||
}
|
||||
} catch {
|
||||
// 2. If it's a raw string (like a bash crash), scan for fatal keywords
|
||||
// We skip this check for log-reading tools since they legitimately contain errors.
|
||||
const lower = ev.result.toLowerCase();
|
||||
if (
|
||||
lower.includes("error") ||
|
||||
lower.includes("failed") ||
|
||||
lower.includes("unexpected") ||
|
||||
lower.includes("not found")
|
||||
!ev.name?.includes("logs") &&
|
||||
!ev.name?.includes("console") &&
|
||||
(lower.includes("econnrefused") ||
|
||||
lower.includes("enoent") ||
|
||||
lower.includes("permission denied") ||
|
||||
lower.includes("command not found"))
|
||||
) {
|
||||
isToolErr = true;
|
||||
}
|
||||
|
||||
@@ -6,12 +6,7 @@
|
||||
* injected ToolExecutor, so they are fully unit-testable with mocked outputs.
|
||||
*/
|
||||
|
||||
import type {
|
||||
AcceptanceCheck,
|
||||
CheckKind,
|
||||
CheckResult,
|
||||
ExecCtx,
|
||||
} from "./types";
|
||||
import type { AcceptanceCheck, CheckKind, CheckResult, ExecCtx } from "./types";
|
||||
|
||||
// ── helpers ────────────────────────────────────────────────────────────────
|
||||
|
||||
@@ -29,7 +24,11 @@ export function redact(s: string): string {
|
||||
}
|
||||
|
||||
export function clip(s: string, n = 400): string {
|
||||
const out = redact(String(s ?? "").replace(/\s+/g, " ").trim());
|
||||
const out = redact(
|
||||
String(s ?? "")
|
||||
.replace(/\s+/g, " ")
|
||||
.trim(),
|
||||
);
|
||||
return out.length > n ? out.slice(0, n) + "…" : out;
|
||||
}
|
||||
|
||||
@@ -105,11 +104,7 @@ function str(spec: Record<string, unknown>, key: string, dflt = ""): string {
|
||||
const v = spec[key];
|
||||
return typeof v === "string" ? v : dflt;
|
||||
}
|
||||
function num(
|
||||
spec: Record<string, unknown>,
|
||||
key: string,
|
||||
dflt: number,
|
||||
): number {
|
||||
function num(spec: Record<string, unknown>, key: string, dflt: number): number {
|
||||
const v = spec[key];
|
||||
return typeof v === "number" ? v : dflt;
|
||||
}
|
||||
@@ -137,7 +132,12 @@ const RUNNERS: Record<
|
||||
(check: AcceptanceCheck, ctx: ExecCtx) => Promise<CheckResult>
|
||||
> = {
|
||||
build: (c, ctx) =>
|
||||
runShellExit(c, ctx, str(c.spec, "command", "npm run build"), "build"),
|
||||
runShellExit(
|
||||
c,
|
||||
ctx,
|
||||
str(c.spec, "command", "npx next build --no-turbopack"),
|
||||
"build",
|
||||
),
|
||||
|
||||
typecheck: (c, ctx) =>
|
||||
runShellExit(
|
||||
@@ -156,7 +156,7 @@ const RUNNERS: Record<
|
||||
server_up: async (c, ctx) => {
|
||||
const raw = await ctx.exec("dev_server_start", {
|
||||
projectId: ctx.projectId,
|
||||
command: str(c.spec, "command", "npm run dev"),
|
||||
command: str(c.spec, "command", "npx next dev -H 0.0.0.0 --no-turbopack"),
|
||||
port: num(c.spec, "port", 3000),
|
||||
});
|
||||
const r = parseToolResult(raw);
|
||||
@@ -183,7 +183,10 @@ const RUNNERS: Record<
|
||||
const codeStr = (r.stdout || r.raw).trim().match(/\d{3}/)?.[0];
|
||||
if (codeStr && Number(codeStr) === expected)
|
||||
return ok(c, `${url} → ${codeStr}`);
|
||||
return fail(c, `${url} returned ${codeStr ?? "no response"} (expected ${expected})`);
|
||||
return fail(
|
||||
c,
|
||||
`${url} returned ${codeStr ?? "no response"} (expected ${expected})`,
|
||||
);
|
||||
},
|
||||
|
||||
console_clean: async (c, ctx) => {
|
||||
|
||||
Reference in New Issue
Block a user