Files

128 lines
3.8 KiB
TypeScript

/**
* Task executor — the iterate-to-green loop.
*
* EXECUTE (model edits toward the goal, with prior failures as context)
* → TEST (run the verification contract)
* → pass? → FINALIZE (task done)
* → fail? → KEEP FIXING (feed concrete failures back)
* → stuck? → ESCALATE (re-plan or honest blocker to the user)
*
* This module is pure orchestration over injected dependencies, so the
* finalize / keep-fixing / escalate decisions are fully unit-testable without
* a live dev container.
*/
import type { CheckResult, VerificationReport, VerificationTask } from "./types";
import { failureSignature, formatFailureFeedback } from "./generation";
export interface ExecuteTaskDeps {
/**
* Run one execution pass: let the model make edits toward the task's goal.
* `failureFeedback` is the structured "[VERIFICATION FAILED] …" message from
* the previous attempt (empty on the first attempt).
*/
runExecution: (args: {
task: VerificationTask;
failureFeedback: string;
attempt: number;
}) => Promise<void>;
/** Run the verification contract and return a structured report. */
verify: (task: VerificationTask) => Promise<VerificationReport>;
/** Persist task progress (attempts + lastFailures) so a turn can resume. */
persist?: (task: VerificationTask) => void | Promise<void>;
/** Max execute→verify cycles before escalating. Default 5. */
maxAttempts?: number;
/** Stop after this many consecutive no-progress attempts. Default 2. */
noProgressLimit?: number;
}
export type ExecuteTaskOutcome =
| { status: "done"; report: VerificationReport; attempts: number }
| {
status: "blocked";
report: VerificationReport | null;
attempts: number;
reason: string;
failures: CheckResult[];
};
export async function executeTask(
task: VerificationTask,
deps: ExecuteTaskDeps,
): Promise<ExecuteTaskOutcome> {
const maxAttempts = deps.maxAttempts ?? 5;
const noProgressLimit = deps.noProgressLimit ?? 2;
task.status = "in_progress";
let prevSig: string | null = null;
let noProgressStreak = 0;
let lastReport: VerificationReport | null = null;
while (task.attempts < maxAttempts) {
task.attempts++;
// EXECUTE — with the prior failures fed back as concrete instructions.
const failureFeedback = task.lastFailures?.length
? formatFailureFeedback(task.lastFailures)
: "";
await deps.runExecution({
task,
failureFeedback,
attempt: task.attempts,
});
// TEST
const report = await deps.verify(task);
lastReport = report;
if (report.passed) {
// FINALIZE
task.status = "done";
task.lastFailures = [];
await deps.persist?.(task);
return { status: "done", report, attempts: task.attempts };
}
// KEEP FIXING — persist the concrete failures so the next attempt (even in
// a later HTTP turn) resumes with full context.
task.lastFailures = report.failures;
await deps.persist?.(task);
// Detect no progress: the same hard failures with the same evidence.
const sig = failureSignature(report.failures);
if (prevSig !== null && sig === prevSig) {
noProgressStreak++;
} else {
noProgressStreak = 0;
}
prevSig = sig;
if (noProgressStreak >= noProgressLimit) {
task.status = "blocked";
await deps.persist?.(task);
return {
status: "blocked",
report,
attempts: task.attempts,
reason: "no_progress",
failures: report.failures,
};
}
}
// Hit the attempt ceiling without going green.
task.status = "blocked";
await deps.persist?.(task);
return {
status: "blocked",
report: lastReport,
attempts: task.attempts,
reason: "max_attempts",
failures: lastReport?.failures ?? [],
};
}