Compare commits

..

1 Commits

2 changed files with 42 additions and 37 deletions

View File

@@ -14,7 +14,7 @@
* data: {"type":"done"}
* data: {"type":"error","error":"..."}
*/
import { NextResponse, after } from "next/server";
import { NextResponse } from "next/server";
import { requireWorkspacePrincipal } from "@/lib/auth/workspace-auth";
import { query, queryOne } from "@/lib/db-postgres";
import { callVibnChat, streamVibnChat } from "@/lib/ai/vibn-chat-model";
@@ -53,15 +53,16 @@ type TurnIntent =
const TOOL_BUDGETS: Record<TurnIntent, number> = {
conversational: 1, // Must be at least 1 so the LLM gets called for a text reply
// With the Verification Harness and Anti-Stall Governor now unconditionally enabled,
// we no longer need to rely on artificially tight tool budgets to prevent infinite loops.
// The system will intelligently halt if it detects a stall or unfixable error, so we can
// safely give the AI a massive runway to complete complex tasks.
status_check: 40,
diagnose: 60,
small_fix: 40,
feature_build: 80,
deploy: 40,
// Investigative questions ("is the auth connected?", "what's the test user?")
// routinely need to read several files THEN synthesize an answer. Budgets of
// 5/8 were cutting these off at the cap before the model could answer
// (telemetry showed 100% round_cap on these turns). Raised so a read-only
// investigation can actually finish.
status_check: 16,
diagnose: 22,
small_fix: 18,
feature_build: 40,
deploy: 25,
autonomous: 150,
};
@@ -627,21 +628,22 @@ function extractPreviewUrl(messages: ChatMessage[]): string | undefined {
return undefined;
}
function summarizeForUI(raw: string): string {
try {
const p = JSON.parse(raw);
if (p && typeof p === "object") {
const clone = { ...p };
// Strip massive payload fields so the UI gets intact JSON
if (clone.result && typeof clone.result === "object") {
if (clone.result.log) clone.result.log = "...";
if (clone.result.content) clone.result.content = "...";
if (clone.result.listing) clone.result.listing = "...";
if (clone.result && typeof clone.result === 'object') {
if (clone.result.log) clone.result.log = "...";
if (clone.result.content) clone.result.content = "...";
if (clone.result.listing) clone.result.listing = "...";
}
if (typeof clone.stdout === "string" && clone.stdout.length > 200) {
if (typeof clone.stdout === 'string' && clone.stdout.length > 200) {
clone.stdout = clone.stdout.slice(0, 200) + "...";
}
if (typeof clone.stderr === "string" && clone.stderr.length > 200) {
if (typeof clone.stderr === 'string' && clone.stderr.length > 200) {
clone.stderr = clone.stderr.slice(0, 200) + "...";
}
return JSON.stringify(clone);
@@ -1506,7 +1508,13 @@ export async function POST(request: Request) {
const mutated = assistantToolCalls.some((tc) =>
MUTATION_TOOLS.includes(tc.name),
);
if (!aborted && mutated && activeProject?.id && activeMcpToken) {
if (
process.env.VIBN_VERIFICATION_ENABLED === "1" &&
!aborted &&
mutated &&
activeProject?.id &&
activeMcpToken
) {
emit({ type: "phase", phase: "verify", label: "Verifying & fixing" });
const previewUrl = extractPreviewUrl(messages);
const verifyExec: ToolExecutor = async (name, args) =>
@@ -1890,7 +1898,7 @@ export async function POST(request: Request) {
// Wrapped in try/catch + .catch — the response stream is already
// closed and we don't want a summary failure to surface as an
// error to the user.
after(async () => {
(async () => {
try {
const allMessages = [...history, finalMsg];
// Only summarize if there's something worth summarizing.
@@ -1943,7 +1951,7 @@ export async function POST(request: Request) {
} catch {
// best-effort; silent failure
}
});
})().catch(() => {});
// Plan extraction is handled inline during tool calls or proactively.
emit({ type: "done" });

View File

@@ -6,7 +6,12 @@
* injected ToolExecutor, so they are fully unit-testable with mocked outputs.
*/
import type { AcceptanceCheck, CheckKind, CheckResult, ExecCtx } from "./types";
import type {
AcceptanceCheck,
CheckKind,
CheckResult,
ExecCtx,
} from "./types";
// ── helpers ────────────────────────────────────────────────────────────────
@@ -24,11 +29,7 @@ export function redact(s: string): string {
}
export function clip(s: string, n = 400): string {
const out = redact(
String(s ?? "")
.replace(/\s+/g, " ")
.trim(),
);
const out = redact(String(s ?? "").replace(/\s+/g, " ").trim());
return out.length > n ? out.slice(0, n) + "…" : out;
}
@@ -104,7 +105,11 @@ function str(spec: Record<string, unknown>, key: string, dflt = ""): string {
const v = spec[key];
return typeof v === "string" ? v : dflt;
}
function num(spec: Record<string, unknown>, key: string, dflt: number): number {
function num(
spec: Record<string, unknown>,
key: string,
dflt: number,
): number {
const v = spec[key];
return typeof v === "number" ? v : dflt;
}
@@ -132,12 +137,7 @@ const RUNNERS: Record<
(check: AcceptanceCheck, ctx: ExecCtx) => Promise<CheckResult>
> = {
build: (c, ctx) =>
runShellExit(
c,
ctx,
str(c.spec, "command", "npx next build --no-turbopack"),
"build",
),
runShellExit(c, ctx, str(c.spec, "command", "npm run build"), "build"),
typecheck: (c, ctx) =>
runShellExit(
@@ -156,7 +156,7 @@ const RUNNERS: Record<
server_up: async (c, ctx) => {
const raw = await ctx.exec("dev_server_start", {
projectId: ctx.projectId,
command: str(c.spec, "command", "npx next dev -H 0.0.0.0 --no-turbopack"),
command: str(c.spec, "command", "npm run dev"),
port: num(c.spec, "port", 3000),
});
const r = parseToolResult(raw);
@@ -183,10 +183,7 @@ const RUNNERS: Record<
const codeStr = (r.stdout || r.raw).trim().match(/\d{3}/)?.[0];
if (codeStr && Number(codeStr) === expected)
return ok(c, `${url}${codeStr}`);
return fail(
c,
`${url} returned ${codeStr ?? "no response"} (expected ${expected})`,
);
return fail(c, `${url} returned ${codeStr ?? "no response"} (expected ${expected})`);
},
console_clean: async (c, ctx) => {