vibn-agent-runner/vibn-frontend/lib/ai/verification/generation.ts

/**
 * Acceptance-check generation + feedback formatting.
 *
 * - The Planner emits `acceptanceChecks` per task using a strict schema; we
 *   validate/normalize that output here (models are not trustworthy emitters).
 * - On a failed verification we format the failures into concrete, structured
 *   feedback that the next execution round consumes — this is what makes the
 *   model FIX rather than guess.
 */

import type { AcceptanceCheck, CheckKind, CheckResult } from "./types";

const VALID_KINDS: CheckKind[] = [
  "build",
  "typecheck",
  "test",
  "server_up",
  "route_ok",
  "console_clean",
  "content",
  "flow",
  "visual",
  "data",
];

// Soft-by-default kinds (advisory, never block "done").
const SOFT_KINDS = new Set<CheckKind>(["visual"]);

/**
 * Validate and normalize a raw `acceptanceChecks` array from the model.
 * Drops unknown kinds, coerces missing fields, and caps the count.
 */
export function normalizeAcceptanceChecks(raw: unknown): AcceptanceCheck[] {
  if (!Array.isArray(raw)) return [];
  const out: AcceptanceCheck[] = [];
  for (const item of raw) {
    if (!item || typeof item !== "object") continue;
    const o = item as Record<string, unknown>;
    const kind = o.kind as CheckKind;
    if (!VALID_KINDS.includes(kind)) continue;
    const spec =
      o.spec && typeof o.spec === "object"
        ? (o.spec as Record<string, unknown>)
        : {};
    const hard =
      typeof o.hard === "boolean" ? o.hard : !SOFT_KINDS.has(kind);
    const description =
      typeof o.description === "string" && o.description.trim()
        ? o.description.trim()
        : kind;
    out.push({ kind, hard, description, spec });
    if (out.length >= 3) break; // keep contracts tight (1–3 checks)
  }
  return out;
}

/**
 * Instruction appended to the Planner's system prompt so each task it creates
 * carries a checkable contract.
 */
export const CHECK_GENERATION_PROMPT = `
[ACCEPTANCE CHECKS] For every task you create, attach \`acceptanceChecks\`: a JSON
array of 1–3 checks that objectively prove THIS task is done.
Each check: { "kind": <kind>, "hard": <bool>, "description": <string>, "spec": { ... } }
Allowed kinds and their spec:
- build        spec: {}                                  (compiles)
- typecheck    spec: {}                                  (no type errors)
- test         spec: { command?: string }               (tests pass)
- server_up    spec: { port?: number }                   (app boots, 200)
- route_ok     spec: { url: string, expectedStatus?: number }
- console_clean spec: { url?: string }                   (no JS errors)
- content      spec: { url: string, contains: string }   (text present)
- flow         spec: { startUrl: string, expectContains: string }
- visual       spec: { targetPath: string, minScore?: number }   (soft)
- data         spec: { command: string }                 (records exist)
Rules:
- build + server_up + console_clean are added AUTOMATICALLY. Do NOT repeat them.
- Add only checks that prove THIS task's specific behavior.
- Prefer the cheapest proof: route_ok/content over flow, flow over visual.
- If a task is not objectively verifiable (e.g. "make the copy friendlier"),
  return an empty acceptanceChecks array and set "requiresHumanConfirm": true.
  Do NOT fabricate a check you cannot actually verify.
`.trim();

/**
 * Turn hard failures into specific, actionable feedback for the next execution
 * round. Not "it didn't work" — the exact check, evidence, and a directive.
 */
export function formatFailureFeedback(failures: CheckResult[]): string {
  if (!failures.length) return "";
  const lines = failures.map(
    (f) => `- ${f.check.kind} (${f.check.description}): FAILED — ${f.evidence}`,
  );
  return (
    "[VERIFICATION FAILED] Your last changes did not pass these checks:\n" +
    lines.join("\n") +
    "\nFix these specific failures. Do not claim success until every check passes. " +
    "Address the exact errors above — read the relevant files first if needed."
  );
}

/** Stable signature of a report's hard failures — used to detect no-progress. */
export function failureSignature(failures: CheckResult[]): string {
  return failures
    .map((f) => `${f.check.kind}:${f.evidence}`)
    .sort()
    .join(";;");
}