109 lines
4.2 KiB
TypeScript
109 lines
4.2 KiB
TypeScript
/**
|
||
* Acceptance-check generation + feedback formatting.
|
||
*
|
||
* - The Planner emits `acceptanceChecks` per task using a strict schema; we
|
||
* validate/normalize that output here (models are not trustworthy emitters).
|
||
* - On a failed verification we format the failures into concrete, structured
|
||
* feedback that the next execution round consumes — this is what makes the
|
||
* model FIX rather than guess.
|
||
*/
|
||
|
||
import type { AcceptanceCheck, CheckKind, CheckResult } from "./types";
|
||
|
||
const VALID_KINDS: CheckKind[] = [
|
||
"build",
|
||
"typecheck",
|
||
"test",
|
||
"server_up",
|
||
"route_ok",
|
||
"console_clean",
|
||
"content",
|
||
"flow",
|
||
"visual",
|
||
"data",
|
||
];
|
||
|
||
// Soft-by-default kinds (advisory, never block "done").
|
||
const SOFT_KINDS = new Set<CheckKind>(["visual"]);
|
||
|
||
/**
|
||
* Validate and normalize a raw `acceptanceChecks` array from the model.
|
||
* Drops unknown kinds, coerces missing fields, and caps the count.
|
||
*/
|
||
export function normalizeAcceptanceChecks(raw: unknown): AcceptanceCheck[] {
|
||
if (!Array.isArray(raw)) return [];
|
||
const out: AcceptanceCheck[] = [];
|
||
for (const item of raw) {
|
||
if (!item || typeof item !== "object") continue;
|
||
const o = item as Record<string, unknown>;
|
||
const kind = o.kind as CheckKind;
|
||
if (!VALID_KINDS.includes(kind)) continue;
|
||
const spec =
|
||
o.spec && typeof o.spec === "object"
|
||
? (o.spec as Record<string, unknown>)
|
||
: {};
|
||
const hard =
|
||
typeof o.hard === "boolean" ? o.hard : !SOFT_KINDS.has(kind);
|
||
const description =
|
||
typeof o.description === "string" && o.description.trim()
|
||
? o.description.trim()
|
||
: kind;
|
||
out.push({ kind, hard, description, spec });
|
||
if (out.length >= 3) break; // keep contracts tight (1–3 checks)
|
||
}
|
||
return out;
|
||
}
|
||
|
||
/**
|
||
* Instruction appended to the Planner's system prompt so each task it creates
|
||
* carries a checkable contract.
|
||
*/
|
||
export const CHECK_GENERATION_PROMPT = `
|
||
[ACCEPTANCE CHECKS] For every task you create, attach \`acceptanceChecks\`: a JSON
|
||
array of 1–3 checks that objectively prove THIS task is done.
|
||
Each check: { "kind": <kind>, "hard": <bool>, "description": <string>, "spec": { ... } }
|
||
Allowed kinds and their spec:
|
||
- build spec: {} (compiles)
|
||
- typecheck spec: {} (no type errors)
|
||
- test spec: { command?: string } (tests pass)
|
||
- server_up spec: { port?: number } (app boots, 200)
|
||
- route_ok spec: { url: string, expectedStatus?: number }
|
||
- console_clean spec: { url?: string } (no JS errors)
|
||
- content spec: { url: string, contains: string } (text present)
|
||
- flow spec: { startUrl: string, expectContains: string }
|
||
- visual spec: { targetPath: string, minScore?: number } (soft)
|
||
- data spec: { command: string } (records exist)
|
||
Rules:
|
||
- build + server_up + console_clean are added AUTOMATICALLY. Do NOT repeat them.
|
||
- Add only checks that prove THIS task's specific behavior.
|
||
- Prefer the cheapest proof: route_ok/content over flow, flow over visual.
|
||
- If a task is not objectively verifiable (e.g. "make the copy friendlier"),
|
||
return an empty acceptanceChecks array and set "requiresHumanConfirm": true.
|
||
Do NOT fabricate a check you cannot actually verify.
|
||
`.trim();
|
||
|
||
/**
|
||
* Turn hard failures into specific, actionable feedback for the next execution
|
||
* round. Not "it didn't work" — the exact check, evidence, and a directive.
|
||
*/
|
||
export function formatFailureFeedback(failures: CheckResult[]): string {
|
||
if (!failures.length) return "";
|
||
const lines = failures.map(
|
||
(f) => `- ${f.check.kind} (${f.check.description}): FAILED — ${f.evidence}`,
|
||
);
|
||
return (
|
||
"[VERIFICATION FAILED] Your last changes did not pass these checks:\n" +
|
||
lines.join("\n") +
|
||
"\nFix these specific failures. Do not claim success until every check passes. " +
|
||
"Address the exact errors above — read the relevant files first if needed."
|
||
);
|
||
}
|
||
|
||
/** Stable signature of a report's hard failures — used to detect no-progress. */
|
||
export function failureSignature(failures: CheckResult[]): string {
|
||
return failures
|
||
.map((f) => `${f.check.kind}:${f.evidence}`)
|
||
.sort()
|
||
.join(";;");
|
||
}
|