feat(ai): tool-error recovery middleware
Pattern-matches known-recoverable MCP tool failures and injects a synthetic imperative message into the conversation right after the failing tool result. Static prompt rules lose to accumulated tool reality (we've shipped 4 orphan twenty-* services because the model ignored the "no delete-and-recreate" rule); a fresh role:'user' message at decision time does not. Initial rules cover the three highest-confidence Docker failure patterns: orphan container conflict (use apps_unstick), image pull denied (use apps_repair), port already allocated (identify holder). Each rule names the wrong-but-tempting move explicitly. See AI_HARNESS_GAPS.md §1 for the failure case this addresses.
This commit is contained in:
@@ -19,6 +19,7 @@ import { authSession } from '@/lib/auth/session-server';
|
|||||||
import { query } from '@/lib/db-postgres';
|
import { query } from '@/lib/db-postgres';
|
||||||
import { callGeminiChat } from '@/lib/ai/gemini-chat';
|
import { callGeminiChat } from '@/lib/ai/gemini-chat';
|
||||||
import { VIBN_TOOL_DEFINITIONS, executeMcpTool } from '@/lib/ai/vibn-tools';
|
import { VIBN_TOOL_DEFINITIONS, executeMcpTool } from '@/lib/ai/vibn-tools';
|
||||||
|
import { detectKnownError, formatRecoveryMessage } from '@/lib/ai/error-recovery';
|
||||||
import type { ChatMessage, ToolCall } from '@/lib/ai/gemini-chat';
|
import type { ChatMessage, ToolCall } from '@/lib/ai/gemini-chat';
|
||||||
|
|
||||||
// Bumped from 6 to 12 because Path B chains (devcontainer.ensure →
|
// Bumped from 6 to 12 because Path B chains (devcontainer.ensure →
|
||||||
@@ -457,6 +458,24 @@ export async function POST(request: Request) {
|
|||||||
toolName: tc.name,
|
toolName: tc.name,
|
||||||
thoughtSignature: tc.thoughtSignature,
|
thoughtSignature: tc.thoughtSignature,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Harness-layer error recovery: if the tool result matches
|
||||||
|
// a known-recoverable failure (e.g. orphan container
|
||||||
|
// conflict), inject a synthetic user-role message
|
||||||
|
// immediately after the tool result. This puts a fresh
|
||||||
|
// imperative ("CALL apps_unstick. DO NOT delete-and-
|
||||||
|
// recreate.") in the conversation right where the model
|
||||||
|
// is about to decide what to do next. Static prompt
|
||||||
|
// rules lose to accumulated tool reality; an injected
|
||||||
|
// message at decision time does not. See
|
||||||
|
// lib/ai/error-recovery.ts.
|
||||||
|
const recovery = detectKnownError(result);
|
||||||
|
if (recovery) {
|
||||||
|
messages.push({
|
||||||
|
role: 'user',
|
||||||
|
content: formatRecoveryMessage(recovery),
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (loopBreakReason) break;
|
if (loopBreakReason) break;
|
||||||
|
|||||||
111
lib/ai/error-recovery.ts
Normal file
111
lib/ai/error-recovery.ts
Normal file
@@ -0,0 +1,111 @@
|
|||||||
|
/**
|
||||||
|
* Tool-error recovery middleware.
|
||||||
|
*
|
||||||
|
* Pattern-matches known-recoverable error strings in MCP tool results
|
||||||
|
* and produces a synthetic system message instructing the model on the
|
||||||
|
* exact recovery action. Injected into the conversation before the
|
||||||
|
* next model round.
|
||||||
|
*
|
||||||
|
* Why this exists (vs just a system-prompt rule):
|
||||||
|
* Static prompt rules against accumulating tool reality lose. We've
|
||||||
|
* shipped 4 orphan twenty-* services because the model kept doing
|
||||||
|
* delete-and-recreate even though the prompt told it not to. The
|
||||||
|
* model treats prompt rules as soft guidance; it cannot ignore a
|
||||||
|
* fresh `role: "system"` message that arrives between tool result
|
||||||
|
* and next call. See AI_HARNESS_GAPS.md §1 for the full case.
|
||||||
|
*
|
||||||
|
* Adding a rule:
|
||||||
|
* 1. Pick a regex that matches the error string with NO false
|
||||||
|
* positives. If it could fire on a legitimate success or
|
||||||
|
* unrelated failure, leave it out — silent miss > wrong fix.
|
||||||
|
* 2. Write the `diagnosis` as a sentence the model can use as-is
|
||||||
|
* in a status update to the user.
|
||||||
|
* 3. Write `requiredAction` as the literal next tool call(s) the
|
||||||
|
* model should make, with arg shapes if non-obvious.
|
||||||
|
* 4. Write `antipattern` as the wrong-but-tempting move the model
|
||||||
|
* keeps doing. The injected message tells it explicitly NOT
|
||||||
|
* to do this.
|
||||||
|
*
|
||||||
|
* Rules are checked in registration order. First match wins.
|
||||||
|
*/
|
||||||
|
|
||||||
|
export interface RecoveryRule {
|
||||||
|
/** Stable identifier for logs / future telemetry. */
|
||||||
|
id: string;
|
||||||
|
/** Pattern that uniquely identifies this error in tool output. */
|
||||||
|
pattern: RegExp;
|
||||||
|
/** Human-readable explanation of what went wrong. */
|
||||||
|
diagnosis: string;
|
||||||
|
/** Exact next tool call(s) the model should make. */
|
||||||
|
requiredAction: string;
|
||||||
|
/** The wrong move the model keeps making for this error. */
|
||||||
|
antipattern: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
const RULES: RecoveryRule[] = [
|
||||||
|
{
|
||||||
|
id: 'orphan-container-conflict',
|
||||||
|
// Matches: `Conflict. The container name "/postgres-..." is already in use`
|
||||||
|
// Real prod example, twenty-crm thread, 2026-04-30.
|
||||||
|
pattern: /Conflict\.\s+The container name\s+["/]?[\w./-]+["/]?\s+is already in use/i,
|
||||||
|
diagnosis:
|
||||||
|
'A previous deploy left an orphan Docker container holding this service\'s container name. The new boot is colliding with the orphan. This is a recoverable state.',
|
||||||
|
requiredAction:
|
||||||
|
'Call `apps_unstick { uuid }` against the SAME app uuid you were just trying to deploy, then `apps_deploy { uuid }`. Both calls use the existing uuid; do not create a new app.',
|
||||||
|
antipattern:
|
||||||
|
'Do NOT delete the failing app and create a new one with a different name. That keeps the orphan running, doubles the stack, and ships another shadow service. We have shipped 4 orphan twenty-* services this way before. Do not repeat it.',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'image-pull-denied',
|
||||||
|
// Matches: `pull access denied for ...` and `manifest unknown` from the registry.
|
||||||
|
pattern: /(pull access denied for|manifest unknown|repository does not exist)/i,
|
||||||
|
diagnosis:
|
||||||
|
'The Docker image referenced by this app is not on the host, and the registry pull failed (private repo, missing credentials, or wrong tag).',
|
||||||
|
requiredAction:
|
||||||
|
'Call `apps_repair { uuid }` to re-attempt the post-deploy fixes. If that fails too, surface the exact image reference to the user and ask whether the image should be pulled from a different registry or rebuilt.',
|
||||||
|
antipattern:
|
||||||
|
'Do NOT retry the same `apps_deploy` blindly hoping the registry will respond differently. The pull failure is persistent until the underlying image-availability issue is fixed.',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: 'port-already-allocated',
|
||||||
|
// Matches: `port is already allocated` / `bind: address already in use`.
|
||||||
|
pattern: /(port\s+\S+\s+is already allocated|bind:\s+address already in use|Ports are not available)/i,
|
||||||
|
diagnosis:
|
||||||
|
'A different container or process on the host is already bound to the port this app is trying to claim.',
|
||||||
|
requiredAction:
|
||||||
|
'Use `apps_containers_list { uuid }` plus `shell_exec` (e.g. `docker ps --filter publish=<port>`) to identify the holder. If the holder is a stale Coolify-managed container, call `apps_unstick { uuid }` on its app. If it is a legitimate other app, surface the conflict to the user and ask which one should get the port.',
|
||||||
|
antipattern:
|
||||||
|
'Do NOT pick a random different port and retry. Port choice is part of the user\'s product configuration; a silent change will break their docs / DNS / clients.',
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Inspect a tool result and return the matching recovery rule, or
|
||||||
|
* null if nothing matches. The result is treated as plain text;
|
||||||
|
* structured JSON tool results work fine because the error strings
|
||||||
|
* we match on appear inside the JSON value.
|
||||||
|
*/
|
||||||
|
export function detectKnownError(toolResult: unknown): RecoveryRule | null {
|
||||||
|
if (toolResult == null) return null;
|
||||||
|
const text = typeof toolResult === 'string' ? toolResult : JSON.stringify(toolResult);
|
||||||
|
for (const rule of RULES) {
|
||||||
|
if (rule.pattern.test(text)) return rule;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Format a recovery rule as the synthetic system message we inject
|
||||||
|
* into the conversation before the next model round. The shape is
|
||||||
|
* deliberately imperative ("CALL X. DO NOT do Y.") because that is
|
||||||
|
* the prompting style the model responds to most reliably.
|
||||||
|
*/
|
||||||
|
export function formatRecoveryMessage(rule: RecoveryRule): string {
|
||||||
|
return [
|
||||||
|
`[RECOVERY: ${rule.id}]`,
|
||||||
|
`Diagnosis: ${rule.diagnosis}`,
|
||||||
|
`Required next action: ${rule.requiredAction}`,
|
||||||
|
`Do NOT: ${rule.antipattern}`,
|
||||||
|
`Send the user a one-line status before the recovery call so they know what you are doing.`,
|
||||||
|
].join('\n');
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user