Files
vibn-frontend/lib/ai/verification/runners.ts

270 lines
9.0 KiB
TypeScript

/**
* Acceptance check runners.
*
* Each runner maps a single AcceptanceCheck to a deterministic tool invocation
* and returns a structured { pass, evidence }. Runners depend only on the
* injected ToolExecutor, so they are fully unit-testable with mocked outputs.
*/
import type {
AcceptanceCheck,
CheckKind,
CheckResult,
ExecCtx,
} from "./types";
// ── helpers ────────────────────────────────────────────────────────────────
export function redact(s: string): string {
return s
.replace(
/postgres(?:ql)?:\/\/[^:\s]+:[^@\s]+@[^/\s]+\/[^\s"']+/gi,
"postgresql://[REDACTED_DB_URL]",
)
.replace(
/eyJ[a-zA-Z0-9_-]{5,}\.[a-zA-Z0-9_-]{5,}\.[a-zA-Z0-9_-]{5,}/g,
"[REDACTED_JWT]",
)
.replace(/\b[A-Za-z0-9_-]{40,}\b/g, "[REDACTED_SECRET]");
}
export function clip(s: string, n = 400): string {
const out = redact(String(s ?? "").replace(/\s+/g, " ").trim());
return out.length > n ? out.slice(0, n) + "…" : out;
}
/**
* Parse a raw tool result into a normalized shape. Tool results come back as a
* JSON string; shapes vary by tool, so we extract defensively. Some tools
* double-wrap (a `stdout` field that is itself JSON) — we unwrap one level.
*/
export interface ParsedToolResult {
code: number | null;
stdout: string;
stderr: string;
status: number | null; // healthCheck.status, etc.
raw: string;
obj: Record<string, unknown> | null;
}
export function parseToolResult(raw: string): ParsedToolResult {
const base: ParsedToolResult = {
code: null,
stdout: "",
stderr: "",
status: null,
raw: String(raw ?? ""),
obj: null,
};
let obj: Record<string, unknown> | null = null;
try {
const p = JSON.parse(base.raw);
if (p && typeof p === "object") obj = p as Record<string, unknown>;
} catch {
return base;
}
if (!obj) return base;
base.obj = obj;
// Some wrappers nest the real payload under `stdout` as a JSON string.
let target = obj;
if (
typeof obj.stdout === "string" &&
obj.code === undefined &&
obj.healthCheck === undefined
) {
try {
const inner = JSON.parse(obj.stdout);
if (inner && typeof inner === "object")
target = inner as Record<string, unknown>;
} catch {
/* stdout is plain text, keep outer */
}
}
if (typeof target.code === "number") base.code = target.code;
if (typeof target.exitCode === "number") base.code = target.exitCode;
if (typeof target.stdout === "string") base.stdout = target.stdout;
if (typeof target.stderr === "string") base.stderr = target.stderr;
const hc = target.healthCheck as { status?: number } | undefined;
if (hc && typeof hc.status === "number") base.status = hc.status;
if (typeof target.status === "number") base.status = target.status;
return base;
}
function ok(check: AcceptanceCheck, evidence = "passed"): CheckResult {
return { check, pass: true, evidence: clip(evidence) };
}
function fail(check: AcceptanceCheck, evidence: string): CheckResult {
return { check, pass: false, evidence: clip(evidence) };
}
function str(spec: Record<string, unknown>, key: string, dflt = ""): string {
const v = spec[key];
return typeof v === "string" ? v : dflt;
}
function num(
spec: Record<string, unknown>,
key: string,
dflt: number,
): number {
const v = spec[key];
return typeof v === "number" ? v : dflt;
}
// ── runners ────────────────────────────────────────────────────────────────
async function runShellExit(
check: AcceptanceCheck,
ctx: ExecCtx,
command: string,
label: string,
): Promise<CheckResult> {
const raw = await ctx.exec("shell_exec", {
projectId: ctx.projectId,
command,
});
const r = parseToolResult(raw);
if (r.code === 0) return ok(check, `${label} passed`);
const detail = r.stderr || r.stdout || `exit ${r.code ?? "?"}`;
return fail(check, `${label} failed (exit ${r.code ?? "?"}): ${detail}`);
}
const RUNNERS: Record<
CheckKind,
(check: AcceptanceCheck, ctx: ExecCtx) => Promise<CheckResult>
> = {
build: (c, ctx) =>
runShellExit(c, ctx, str(c.spec, "command", "npm run build"), "build"),
typecheck: (c, ctx) =>
runShellExit(
c,
ctx,
str(c.spec, "command", "npx tsc --noEmit"),
"typecheck",
),
test: (c, ctx) =>
runShellExit(c, ctx, str(c.spec, "command", "npm test"), "tests"),
data: (c, ctx) =>
runShellExit(c, ctx, str(c.spec, "command", ""), "data check"),
server_up: async (c, ctx) => {
const raw = await ctx.exec("dev_server_start", {
projectId: ctx.projectId,
command: str(c.spec, "command", "npm run dev"),
port: num(c.spec, "port", 3000),
});
const r = parseToolResult(raw);
if (r.status === 200) return ok(c, "dev server returned 200");
return fail(
c,
`dev server not healthy (status ${r.status ?? "none"}): ${
r.stderr || r.stdout || r.raw
}`,
);
},
route_ok: async (c, ctx) => {
const url = str(c.spec, "url");
const expected = num(c.spec, "expectedStatus", 200);
if (!url) return fail(c, "route_ok check is missing a url");
const raw = await ctx.exec("shell_exec", {
projectId: ctx.projectId,
command: `curl -s -o /dev/null -w "%{http_code}" --max-time 20 ${JSON.stringify(
url,
)}`,
});
const r = parseToolResult(raw);
const codeStr = (r.stdout || r.raw).trim().match(/\d{3}/)?.[0];
if (codeStr && Number(codeStr) === expected)
return ok(c, `${url}${codeStr}`);
return fail(c, `${url} returned ${codeStr ?? "no response"} (expected ${expected})`);
},
console_clean: async (c, ctx) => {
const url = str(c.spec, "url", ctx.previewUrl ?? "");
if (!url) return fail(c, "console_clean check is missing a url");
const raw = await ctx.exec("browser_console", { url });
const r = parseToolResult(raw);
const text = (r.raw || "").toLowerCase();
// Look for error-level console output or framework error overlays.
const errorHit =
/"type"\s*:\s*"error"/.test(text) ||
/\berror\b[^"]{0,40}(overlay|boundary|uncaught|unhandled)/.test(text) ||
/failed to compile|module not found|referenceerror|typeerror:/.test(text);
if (!errorHit) return ok(c, "no console errors");
return fail(c, `console errors on ${url}: ${clip(r.raw, 240)}`);
},
content: async (c, ctx) => {
const url = str(c.spec, "url", ctx.previewUrl ?? "");
const needle = str(c.spec, "contains");
if (!url || !needle)
return fail(c, "content check requires both `url` and `contains`");
const raw = await ctx.exec("shell_exec", {
projectId: ctx.projectId,
command: `curl -s --max-time 20 ${JSON.stringify(url)}`,
});
const r = parseToolResult(raw);
const body = r.stdout || r.raw;
if (body.includes(needle)) return ok(c, `found "${needle}"`);
return fail(c, `"${needle}" not found on ${url}`);
},
flow: async (c, ctx) => {
// A basic journey assertion: navigate to startUrl, then assert the page
// body contains `expectContains` (or that a follow URL is reachable).
const startUrl = str(c.spec, "startUrl", ctx.previewUrl ?? "");
const expectContains = str(c.spec, "expectContains");
if (!startUrl) return fail(c, "flow check is missing a startUrl");
const raw = await ctx.exec("browser_navigate", { url: startUrl });
const r = parseToolResult(raw);
const body = (r.stdout || r.raw).toString();
if (expectContains && !body.includes(expectContains))
return fail(c, `flow on ${startUrl}: did not reach "${expectContains}"`);
if (/error|cannot|failed/i.test(body) && !expectContains)
return fail(c, `flow on ${startUrl} hit an error page`);
return ok(c, `flow reached expected state`);
},
visual: async (c, ctx) => {
const targetPath = str(c.spec, "targetPath");
if (!targetPath) return fail(c, "visual check is missing a targetPath");
const raw = await ctx.exec("request_visual_qa", {
projectId: ctx.projectId,
targetPath,
});
const r = parseToolResult(raw);
const obj = r.obj as { score?: number; passed?: boolean } | null;
const threshold = num(c.spec, "minScore", 7);
if (obj?.passed === true) return ok(c, "visual QA passed");
if (typeof obj?.score === "number")
return obj.score >= threshold
? ok(c, `visual QA score ${obj.score}`)
: fail(c, `visual QA score ${obj.score} < ${threshold}`);
// No structured score — treat as advisory pass (soft checks won't block).
return ok(c, "visual QA ran (no numeric score)");
},
};
export async function runCheck(
check: AcceptanceCheck,
ctx: ExecCtx,
): Promise<CheckResult> {
const runner = RUNNERS[check.kind];
if (!runner) return fail(check, `unknown check kind: ${check.kind}`);
try {
return await runner(check, ctx);
} catch (e) {
return fail(
check,
`check runner errored: ${e instanceof Error ? e.message : String(e)}`,
);
}
}