feat(telemetry): implement phase-based execution loop and adaptive tool budgets

This commit is contained in:
2026-06-09 18:58:12 -07:00
parent cd1fdd1d48
commit b1ad4fb363
2 changed files with 219 additions and 7 deletions

View File

@@ -18,11 +18,86 @@ import { NextResponse } from "next/server";
import { requireWorkspacePrincipal } from "@/lib/auth/workspace-auth";
import { query, queryOne } from "@/lib/db-postgres";
import { callVibnChat } from "@/lib/ai/vibn-chat-model";
import { VIBN_TOOL_DEFINITIONS, executeMcpTool } from "@/lib/ai/vibn-tools";
import {
VIBN_TOOL_DEFINITIONS,
executeMcpTool,
filterToolsForPhase,
type AgentPhase,
} from "@/lib/ai/vibn-tools";
import {
detectKnownError,
formatRecoveryMessage,
} from "@/lib/ai/error-recovery";
// --- Agent Orchestration Types & Constants ---
type TurnIntent =
| "conversational"
| "status_check"
| "diagnose"
| "small_fix"
| "feature_build"
| "deploy"
| "autonomous";
type AgentPhase =
| "plan"
| "recon"
| "checkpoint"
| "execute"
| "verify"
| "final";
const TOOL_BUDGETS: Record<TurnIntent, number> = {
conversational: 0,
status_check: 2,
diagnose: 8,
small_fix: 18,
feature_build: 40,
deploy: 25,
autonomous: 150,
};
function classifyTurnIntent(message: string): TurnIntent {
const m = message.trim().toLowerCase();
// High-agency directives
if (
/(keep going|continue|build it|do it|go ahead|proceed|autonomous)/.test(m)
)
return "autonomous";
// Deployments
if (/(deploy|ship|release|publish|push to prod)/.test(m)) return "deploy";
// Feature build
if (
/(build|create|add|implement|make a|setup|wire up|scaffold|integrate)/.test(
m,
)
) {
if (m.length > 50) return "feature_build";
return "small_fix";
}
// Diagnostics
if (
/(why|broken|error|blank|not loading|fail|bug|issue|doesn't work|isn't working|fix)/.test(
m,
)
)
return "diagnose";
// Status check
if (/(status|logs|running|active|what is|show me|check)/.test(m))
return "status_check";
// Conversational fallback
if (m.length < 20 || /^(hi|hello|thanks|ok|yes|no)/.test(m))
return "conversational";
// Default to a generous feature build if we can't tell
return "feature_build";
}
import { listRecentSentryIssues } from "@/lib/integrations/sentry";
import {
ensureProjectRepoCloned,
@@ -837,17 +912,28 @@ export async function POST(request: Request) {
let fileHashes = new Map<string, string>();
let stallRounds = 0;
// ── Phase & Intent State ──
const turnIntent = classifyTurnIntent(message);
const maxToolRounds = activeMcpToken ? TOOL_BUDGETS[turnIntent] : 0;
let phase: AgentPhase = "recon";
let checkpointEmitted = false;
let verificationPassed = false;
emit({ type: "phase", phase, label: "Investigating & Planning" });
try {
// Tool-calling loop: use non-streaming so thought_signature is
// always present in the complete response (required by thinking models).
while (round < MAX_TOOL_ROUNDS) {
while (round < maxToolRounds) {
if (aborted) break;
round++;
// Keep tool definitions active in the schema to avoid model confusion and
// MALFORMED_FUNCTION_CALL gateway crashes, but let our system instructions
// guide the model to respond in plain text for conversational inputs.
const toolDefs = activeMcpToken ? VIBN_TOOL_DEFINITIONS : [];
const toolDefs = activeMcpToken
? filterToolsForPhase(VIBN_TOOL_DEFINITIONS, phase, turnIntent)
: [];
// Every 6 silent rounds or 8 tool calls, gently nudge the model to surface a one-liner
// status before continuing. This is the user's only signal of
@@ -871,10 +957,57 @@ export async function POST(request: Request) {
"If they want you to take action, confirm intent and wait for a clear directive.";
}
if (MAX_TOOL_ROUNDS - round <= 3) {
extraSystem += `\n\n[WARNING] You only have ${MAX_TOOL_ROUNDS - round} tool calls left before you are forcefully terminated. Stop exploring, make your final edits, and write your final response to the user NOW.`;
if (maxToolRounds - round <= 3) {
extraSystem += `\n\n[WARNING] You only have ${maxToolRounds - round} tool calls left before you are forcefully terminated. Stop exploring, make your final edits, and write your final response to the user NOW.`;
}
// C-08: Force Checkpoint Before Mutation
// If the AI is trying to use mutating tools but hasn't emitted a checkpoint,
// intercept the tool calls, block them, and force it to state its plan.
const requestedMutations = resp.toolCalls.filter((tc) =>
[
"fs_write",
"fs_edit",
"fs_delete",
"dev_server_start",
"dev_server_stop",
"apps_deploy",
"ship",
].includes(tc.name),
);
if (
requestedMutations.length > 0 &&
!checkpointEmitted &&
phase === "recon"
) {
const blockMsg =
"[PHASE CHECKPOINT REQUIRED] Before editing files or deploying, you MUST state your goal, current findings, the suspected cause of the issue, the exact file(s) to change, and your verification plan. Do not call any tools in your response.";
messages.push({
role: "user",
content: blockMsg,
});
emit({
type: "checkpoint",
goal: "Awaiting checkpoint...",
findings: "Evaluating...",
});
checkpointEmitted = true;
phase = "execute";
emit({ type: "phase", phase, label: "Executing Code Edits" });
continue; // Skip tool execution and re-prompt
}
if (requestedMutations.length > 0) {
phase = "verify";
emit({
type: "phase",
phase,
label: "Verifying Build & Compiling",
});
}
// Execute tool calls and add results. OpenAI-compatible APIs
const resp = await callVibnChat({
systemPrompt: systemPrompt + extraSystem,
messages,
@@ -1120,7 +1253,7 @@ export async function POST(request: Request) {
const needsRecovery =
!aborted &&
anyToolsExecuted &&
(round >= MAX_TOOL_ROUNDS ||
(round >= maxToolRounds ||
!!loopBreakReason ||
assistantText.trim().length === 0 ||
roundsSinceText >= 30 ||
@@ -1133,7 +1266,7 @@ export async function POST(request: Request) {
: "";
const reason = loopBreakReason
? `LOOP DETECTED: ${loopBreakReason}. Stop trying that approach. `
: round >= MAX_TOOL_ROUNDS
: round >= maxToolRounds
? "You hit the tool-round cap. "
: "";
try {