vibn-agent-runner/vibn-frontend/lib/ai/verification/executor.ts

/**
 * Task executor — the iterate-to-green loop.
 *
 *   EXECUTE (model edits toward the goal, with prior failures as context)
 *     → TEST (run the verification contract)
 *       → pass?  → FINALIZE (task done)
 *       → fail?  → KEEP FIXING (feed concrete failures back)
 *       → stuck? → ESCALATE (re-plan or honest blocker to the user)
 *
 * This module is pure orchestration over injected dependencies, so the
 * finalize / keep-fixing / escalate decisions are fully unit-testable without
 * a live dev container.
 */

import type { CheckResult, VerificationReport, VerificationTask } from "./types";
import { failureSignature, formatFailureFeedback } from "./generation";

export interface ExecuteTaskDeps {
  /**
   * Run one execution pass: let the model make edits toward the task's goal.
   * `failureFeedback` is the structured "[VERIFICATION FAILED] …" message from
   * the previous attempt (empty on the first attempt).
   */
  runExecution: (args: {
    task: VerificationTask;
    failureFeedback: string;
    attempt: number;
  }) => Promise<void>;

  /** Run the verification contract and return a structured report. */
  verify: (task: VerificationTask) => Promise<VerificationReport>;

  /** Persist task progress (attempts + lastFailures) so a turn can resume. */
  persist?: (task: VerificationTask) => void | Promise<void>;

  /** Max execute→verify cycles before escalating. Default 5. */
  maxAttempts?: number;

  /** Stop after this many consecutive no-progress attempts. Default 2. */
  noProgressLimit?: number;
}

export type ExecuteTaskOutcome =
  | { status: "done"; report: VerificationReport; attempts: number }
  | {
      status: "blocked";
      report: VerificationReport | null;
      attempts: number;
      reason: string;
      failures: CheckResult[];
    };

export async function executeTask(
  task: VerificationTask,
  deps: ExecuteTaskDeps,
): Promise<ExecuteTaskOutcome> {
  const maxAttempts = deps.maxAttempts ?? 5;
  const noProgressLimit = deps.noProgressLimit ?? 2;

  task.status = "in_progress";
  let prevSig: string | null = null;
  let noProgressStreak = 0;
  let lastReport: VerificationReport | null = null;

  while (task.attempts < maxAttempts) {
    task.attempts++;

    // EXECUTE — with the prior failures fed back as concrete instructions.
    const failureFeedback = task.lastFailures?.length
      ? formatFailureFeedback(task.lastFailures)
      : "";
    await deps.runExecution({
      task,
      failureFeedback,
      attempt: task.attempts,
    });

    // TEST
    const report = await deps.verify(task);
    lastReport = report;

    if (report.passed) {
      // FINALIZE
      task.status = "done";
      task.lastFailures = [];
      await deps.persist?.(task);
      return { status: "done", report, attempts: task.attempts };
    }

    // KEEP FIXING — persist the concrete failures so the next attempt (even in
    // a later HTTP turn) resumes with full context.
    task.lastFailures = report.failures;
    await deps.persist?.(task);

    // Detect no progress: the same hard failures with the same evidence.
    const sig = failureSignature(report.failures);
    if (prevSig !== null && sig === prevSig) {
      noProgressStreak++;
    } else {
      noProgressStreak = 0;
    }
    prevSig = sig;

    if (noProgressStreak >= noProgressLimit) {
      task.status = "blocked";
      await deps.persist?.(task);
      return {
        status: "blocked",
        report,
        attempts: task.attempts,
        reason: "no_progress",
        failures: report.failures,
      };
    }
  }

  // Hit the attempt ceiling without going green.
  task.status = "blocked";
  await deps.persist?.(task);
  return {
    status: "blocked",
    report: lastReport,
    attempts: task.attempts,
    reason: "max_attempts",
    failures: lastReport?.failures ?? [],
  };
}