Files

109 lines
4.2 KiB
TypeScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* Acceptance-check generation + feedback formatting.
*
* - The Planner emits `acceptanceChecks` per task using a strict schema; we
* validate/normalize that output here (models are not trustworthy emitters).
* - On a failed verification we format the failures into concrete, structured
* feedback that the next execution round consumes — this is what makes the
* model FIX rather than guess.
*/
import type { AcceptanceCheck, CheckKind, CheckResult } from "./types";
const VALID_KINDS: CheckKind[] = [
"build",
"typecheck",
"test",
"server_up",
"route_ok",
"console_clean",
"content",
"flow",
"visual",
"data",
];
// Soft-by-default kinds (advisory, never block "done").
const SOFT_KINDS = new Set<CheckKind>(["visual"]);
/**
* Validate and normalize a raw `acceptanceChecks` array from the model.
* Drops unknown kinds, coerces missing fields, and caps the count.
*/
export function normalizeAcceptanceChecks(raw: unknown): AcceptanceCheck[] {
if (!Array.isArray(raw)) return [];
const out: AcceptanceCheck[] = [];
for (const item of raw) {
if (!item || typeof item !== "object") continue;
const o = item as Record<string, unknown>;
const kind = o.kind as CheckKind;
if (!VALID_KINDS.includes(kind)) continue;
const spec =
o.spec && typeof o.spec === "object"
? (o.spec as Record<string, unknown>)
: {};
const hard =
typeof o.hard === "boolean" ? o.hard : !SOFT_KINDS.has(kind);
const description =
typeof o.description === "string" && o.description.trim()
? o.description.trim()
: kind;
out.push({ kind, hard, description, spec });
if (out.length >= 3) break; // keep contracts tight (13 checks)
}
return out;
}
/**
* Instruction appended to the Planner's system prompt so each task it creates
* carries a checkable contract.
*/
export const CHECK_GENERATION_PROMPT = `
[ACCEPTANCE CHECKS] For every task you create, attach \`acceptanceChecks\`: a JSON
array of 13 checks that objectively prove THIS task is done.
Each check: { "kind": <kind>, "hard": <bool>, "description": <string>, "spec": { ... } }
Allowed kinds and their spec:
- build spec: {} (compiles)
- typecheck spec: {} (no type errors)
- test spec: { command?: string } (tests pass)
- server_up spec: { port?: number } (app boots, 200)
- route_ok spec: { url: string, expectedStatus?: number }
- console_clean spec: { url?: string } (no JS errors)
- content spec: { url: string, contains: string } (text present)
- flow spec: { startUrl: string, expectContains: string }
- visual spec: { targetPath: string, minScore?: number } (soft)
- data spec: { command: string } (records exist)
Rules:
- build + server_up + console_clean are added AUTOMATICALLY. Do NOT repeat them.
- Add only checks that prove THIS task's specific behavior.
- Prefer the cheapest proof: route_ok/content over flow, flow over visual.
- If a task is not objectively verifiable (e.g. "make the copy friendlier"),
return an empty acceptanceChecks array and set "requiresHumanConfirm": true.
Do NOT fabricate a check you cannot actually verify.
`.trim();
/**
* Turn hard failures into specific, actionable feedback for the next execution
* round. Not "it didn't work" — the exact check, evidence, and a directive.
*/
export function formatFailureFeedback(failures: CheckResult[]): string {
if (!failures.length) return "";
const lines = failures.map(
(f) => `- ${f.check.kind} (${f.check.description}): FAILED — ${f.evidence}`,
);
return (
"[VERIFICATION FAILED] Your last changes did not pass these checks:\n" +
lines.join("\n") +
"\nFix these specific failures. Do not claim success until every check passes. " +
"Address the exact errors above — read the relevant files first if needed."
);
}
/** Stable signature of a report's hard failures — used to detect no-progress. */
export function failureSignature(failures: CheckResult[]): string {
return failures
.map((f) => `${f.check.kind}:${f.evidence}`)
.sort()
.join(";;");
}