128 lines
3.8 KiB
TypeScript
128 lines
3.8 KiB
TypeScript
/**
|
|
* Task executor — the iterate-to-green loop.
|
|
*
|
|
* EXECUTE (model edits toward the goal, with prior failures as context)
|
|
* → TEST (run the verification contract)
|
|
* → pass? → FINALIZE (task done)
|
|
* → fail? → KEEP FIXING (feed concrete failures back)
|
|
* → stuck? → ESCALATE (re-plan or honest blocker to the user)
|
|
*
|
|
* This module is pure orchestration over injected dependencies, so the
|
|
* finalize / keep-fixing / escalate decisions are fully unit-testable without
|
|
* a live dev container.
|
|
*/
|
|
|
|
import type { CheckResult, VerificationReport, VerificationTask } from "./types";
|
|
import { failureSignature, formatFailureFeedback } from "./generation";
|
|
|
|
export interface ExecuteTaskDeps {
|
|
/**
|
|
* Run one execution pass: let the model make edits toward the task's goal.
|
|
* `failureFeedback` is the structured "[VERIFICATION FAILED] …" message from
|
|
* the previous attempt (empty on the first attempt).
|
|
*/
|
|
runExecution: (args: {
|
|
task: VerificationTask;
|
|
failureFeedback: string;
|
|
attempt: number;
|
|
}) => Promise<void>;
|
|
|
|
/** Run the verification contract and return a structured report. */
|
|
verify: (task: VerificationTask) => Promise<VerificationReport>;
|
|
|
|
/** Persist task progress (attempts + lastFailures) so a turn can resume. */
|
|
persist?: (task: VerificationTask) => void | Promise<void>;
|
|
|
|
/** Max execute→verify cycles before escalating. Default 5. */
|
|
maxAttempts?: number;
|
|
|
|
/** Stop after this many consecutive no-progress attempts. Default 2. */
|
|
noProgressLimit?: number;
|
|
}
|
|
|
|
export type ExecuteTaskOutcome =
|
|
| { status: "done"; report: VerificationReport; attempts: number }
|
|
| {
|
|
status: "blocked";
|
|
report: VerificationReport | null;
|
|
attempts: number;
|
|
reason: string;
|
|
failures: CheckResult[];
|
|
};
|
|
|
|
export async function executeTask(
|
|
task: VerificationTask,
|
|
deps: ExecuteTaskDeps,
|
|
): Promise<ExecuteTaskOutcome> {
|
|
const maxAttempts = deps.maxAttempts ?? 5;
|
|
const noProgressLimit = deps.noProgressLimit ?? 2;
|
|
|
|
task.status = "in_progress";
|
|
let prevSig: string | null = null;
|
|
let noProgressStreak = 0;
|
|
let lastReport: VerificationReport | null = null;
|
|
|
|
while (task.attempts < maxAttempts) {
|
|
task.attempts++;
|
|
|
|
// EXECUTE — with the prior failures fed back as concrete instructions.
|
|
const failureFeedback = task.lastFailures?.length
|
|
? formatFailureFeedback(task.lastFailures)
|
|
: "";
|
|
await deps.runExecution({
|
|
task,
|
|
failureFeedback,
|
|
attempt: task.attempts,
|
|
});
|
|
|
|
// TEST
|
|
const report = await deps.verify(task);
|
|
lastReport = report;
|
|
|
|
if (report.passed) {
|
|
// FINALIZE
|
|
task.status = "done";
|
|
task.lastFailures = [];
|
|
await deps.persist?.(task);
|
|
return { status: "done", report, attempts: task.attempts };
|
|
}
|
|
|
|
// KEEP FIXING — persist the concrete failures so the next attempt (even in
|
|
// a later HTTP turn) resumes with full context.
|
|
task.lastFailures = report.failures;
|
|
await deps.persist?.(task);
|
|
|
|
// Detect no progress: the same hard failures with the same evidence.
|
|
const sig = failureSignature(report.failures);
|
|
if (prevSig !== null && sig === prevSig) {
|
|
noProgressStreak++;
|
|
} else {
|
|
noProgressStreak = 0;
|
|
}
|
|
prevSig = sig;
|
|
|
|
if (noProgressStreak >= noProgressLimit) {
|
|
task.status = "blocked";
|
|
await deps.persist?.(task);
|
|
return {
|
|
status: "blocked",
|
|
report,
|
|
attempts: task.attempts,
|
|
reason: "no_progress",
|
|
failures: report.failures,
|
|
};
|
|
}
|
|
}
|
|
|
|
// Hit the attempt ceiling without going green.
|
|
task.status = "blocked";
|
|
await deps.persist?.(task);
|
|
return {
|
|
status: "blocked",
|
|
report: lastReport,
|
|
attempts: task.attempts,
|
|
reason: "max_attempts",
|
|
failures: lastReport?.failures ?? [],
|
|
};
|
|
}
|