From 035cdca84677c5cbff915784986730511c334e8b Mon Sep 17 00:00:00 2001 From: mawkone Date: Mon, 15 Jun 2026 14:47:36 -0700 Subject: [PATCH] Implement LLM context compression and persistent memory --- vibn-agent-runner/src/tools/vibn-tools.ts | 23 +++ vibn-frontend/app/api/chat/route.ts | 183 +++++++++++++++++----- vibn-frontend/app/api/mcp/route.ts | 40 +++++ 3 files changed, 208 insertions(+), 38 deletions(-) diff --git a/vibn-agent-runner/src/tools/vibn-tools.ts b/vibn-agent-runner/src/tools/vibn-tools.ts index daecf2f4..32458e42 100644 --- a/vibn-agent-runner/src/tools/vibn-tools.ts +++ b/vibn-agent-runner/src/tools/vibn-tools.ts @@ -831,6 +831,28 @@ After this returns, ALWAYS call apps_deploy { uuid } to regenerate the live Trae // ── Databases ───────────────────────────────────────────────────────────── + { + name: "update_memory", + description: "Write a persistent fact about this project that should be remembered across all turns. Use this when you discover something that will affect future actions — e.g. the correct start command, the actual entry point file, a broken dependency. Do NOT use this for temporary observations.", + parameters: { + type: "OBJECT", + properties: { + operation: { + type: "STRING", + description: "set_fact: store a key/value fact. set_plan: record your current high-level plan. clear_plan: mark the plan as complete." + }, + key: { + type: "STRING", + description: "For set_fact: the fact name (e.g. 'start_command', 'entry_point')" + }, + value: { + type: "STRING", + description: "For set_fact or set_plan: the value to store" + } + }, + required: ["operation"], + }, + }, { name: "workspace_db_query", description: "Run a read-only SQL query against the workspace's main production/telemetry database (the one powering Next.js + Telemetry). ONLY USE THIS IF THE USER ASKS FOR LOGS OR TELEMETRY USAGE DATA.", @@ -1881,6 +1903,7 @@ export async function executeMcpTool( // Convert underscore tool name → dotted MCP action (apps_create → apps.create) let action = toolName.replace(/_/g, "."); if (toolName === "workspace_db_query") action = "workspace.db_query"; + if (toolName === "update_memory") action = "update.memory"; // Unpack JSON-string args (Gemini schemas can't represent free-form objects, // so we accept *Json string fields and parse them server-side). diff --git a/vibn-frontend/app/api/chat/route.ts b/vibn-frontend/app/api/chat/route.ts index b06a9a32..1fd7d8d1 100644 --- a/vibn-frontend/app/api/chat/route.ts +++ b/vibn-frontend/app/api/chat/route.ts @@ -629,6 +629,72 @@ function extractPreviewUrl(messages: ChatMessage[]): string | undefined { } + +function compressToolResultForLLM(toolName: string, args: any, result: string): string { + let parsed: any; + try { parsed = JSON.parse(result); } catch {} + + if (parsed && (parsed.ok === false || parsed.error || parsed.errors?.length > 0)) { + return result; // Don't compress errors, they are high signal + } + + if (toolName === 'fs_read') { + const content = parsed?.content || result; + const lines = content.split('\n'); + if (lines.length <= 50) return result; + const compressed = [ + `[fs_read: ${args?.path} — ${lines.length} lines total]`, + `[Lines 1-20]:`, + lines.slice(0, 20).join('\n'), + `... [${lines.length - 40} lines omitted] ...`, + `[Lines ${lines.length - 20}-${lines.length}]:`, + lines.slice(-20).join('\n') + ].join('\n'); + if (parsed) return JSON.stringify({ ...parsed, content: compressed }); + return compressed; + } + + if (toolName === 'shell_exec') { + const stdout = parsed?.stdout || ""; + const stderr = parsed?.stderr || ""; + const output = stderr || stdout || result; + const lines = output.split('\n'); + if (lines.length <= 30) return result; + const compressed = [ + `[run_terminal: ${args?.command}]`, + `[First 15 lines]:`, + lines.slice(0, 15).join('\n'), + `... [${lines.length - 30} lines omitted] ...`, + `[Last 15 lines]:`, + lines.slice(-15).join('\n') + ].join('\n'); + if (parsed) return JSON.stringify({ ...parsed, stdout: parsed.stdout ? compressed : "", stderr: parsed.stderr ? compressed : "" }); + return compressed; + } + + if (toolName.includes('logs') || toolName.includes('console')) { + const log = parsed?.log || result; + const lines = log.split('\n'); + if (lines.length <= 30) return result; + const compressed = [ + `[${toolName}]`, + `[First 15 lines]:`, + lines.slice(0, 15).join('\n'), + `... [${lines.length - 30} lines omitted] ...`, + `[Last 15 lines]:`, + lines.slice(-15).join('\n') + ].join('\n'); + if (parsed) return JSON.stringify({ ...parsed, log: compressed }); + return compressed; + } + + if (result.length > 2000) { + return result.substring(0, 1000) + `\n... [${result.length - 2000} chars omitted] ...\n` + result.substring(result.length - 1000); + } + + return result; +} + function summarizeForUI(raw: string): string { try { const p = JSON.parse(raw); @@ -652,6 +718,35 @@ function summarizeForUI(raw: string): string { return raw.slice(0, 500); } + +function isToolError(toolName: string, result: string): boolean { + try { + const p = JSON.parse(result); + if (p && typeof p === "object") { + if (typeof p.code === "number" && p.code !== 0) return true; + if (p.ok === false) return true; + if (p.error && !/^null$/i.test(String(p.error))) return true; + if (p.errors && p.errors.length > 0) return true; + return false; + } + } catch {} + + if (toolName.includes('logs') || toolName.includes('console')) return false; + const lower = result.toLowerCase(); + return /(econnrefused|enoent|permission denied|command not found)/.test(lower); +} + +function extractErrorReason(result: string): string { + try { + const p = JSON.parse(result); + if (p && typeof p === "object") { + if (p.stderr && typeof p.stderr === "string") return p.stderr.split('\n').slice(0, 3).join(' '); + if (p.error && typeof p.error === "string") return p.error; + } + } catch {} + return result.split('\n').slice(0, 3).join(' ').substring(0, 200); +} + export async function POST(request: Request) { await ensureChatTables(); @@ -727,55 +822,50 @@ export async function POST(request: Request) { // followed by tool messages responding to each 'tool_call_id'." // Gemini silently tolerates stale toolCalls, so we only hit this on // non-Gemini providers. - const history: ChatMessage[] = rows - .reverse() - .map((r: { data: ChatMessage }) => { + const history: ChatMessage[] = []; + + rows.reverse().forEach((r: { data: ChatMessage }) => { const msg = r.data as unknown as { role: string; content?: string; - toolCalls?: unknown; - _rawToolResults?: unknown; + toolCalls?: any[]; + _rawToolResults?: any[]; }; - if ( - msg.role === "assistant" && - Array.isArray(msg.toolCalls) && - msg.toolCalls.length - ) { - // Remove any tool calls completely from the history payload. - // This is the clean, standard way to pass assistant history without - // polluting the context or inducing model hallucinations. - msg.toolCalls = undefined; - msg._rawToolResults = undefined; - } + if (typeof msg.content === "string") { msg.content = msg.content .replace(/[\s\S]*?<\/tool_calls>/g, "") .replace(/[\s\S]*?<\/think>/g, "") - // Completely strip any legacy leaked "[tools executed this turn]" strings in case they exist in older messages .replace(/(?:\r?\n)*\[tools executed this turn:[\s\S]*?\]/g, "") - // Strip legacy "### Phase Checkpoint" planning walls (Goal / Findings / - // Suspected Cause / Verification Plan) from historical assistant - // messages. That flow was removed, but old threads still contain it, - // and replaying it as context biases the model into re-emitting the - // same walls + verify-everything behavior. Drop from the heading to - // the end of the message; any plain narration before it is kept. .replace(/(?:^|\n)\s*#{1,6}\s*Phase Checkpoint[\s\S]*$/i, "") .trim(); } - return msg as unknown as ChatMessage; - }) - // Drop assistant messages that became empty after stripping the internal - // checkpoint/QA walls so they don't inject blank turns into the context. - .filter((msg) => { - if (msg.role !== "assistant") return true; - const hasText = - typeof msg.content === "string" && msg.content.trim().length > 0; - const hasTools = - Array.isArray((msg as { toolCalls?: unknown[] }).toolCalls) && - ((msg as { toolCalls?: unknown[] }).toolCalls?.length ?? 0) > 0; - return hasText || hasTools; - }); + const isAssistant = msg.role === "assistant"; + const hasText = typeof msg.content === "string" && msg.content.trim().length > 0; + const hasTools = Array.isArray(msg.toolCalls) && msg.toolCalls.length > 0; + + if (!isAssistant || hasText || hasTools) { + history.push(msg as unknown as ChatMessage); + + // Reconstruct compressed tool messages from _rawToolResults so the LLM remembers its actions! + if (isAssistant && hasTools && Array.isArray(msg._rawToolResults)) { + for (const tc of msg.toolCalls!) { + const rawRes = msg._rawToolResults.find(tr => tr.name === tc.name && JSON.stringify(tr.args) === JSON.stringify(tc.args)); + const resultString = typeof rawRes?.result === 'string' ? rawRes.result : JSON.stringify(rawRes?.result || { ok: true }); + + history.push({ + role: "tool", + content: compressToolResultForLLM(tc.name, tc.args, resultString), + toolCallId: tc.id, + toolName: tc.name, + }); + } + } + } + + msg._rawToolResults = undefined; // Don't send this custom field to the LLM + }); // Add user message const userMsg: ChatMessage = { role: "user", content: message.trim() }; @@ -1137,7 +1227,7 @@ export async function POST(request: Request) { }); messages.push({ role: "tool", - content: result, + content: compressToolResultForLLM(tc.name, tc.args, result), toolCallId: tc.id, toolName: tc.name, thoughtSignature: tc.thoughtSignature, @@ -1351,11 +1441,28 @@ export async function POST(request: Request) { messages.push({ role: "tool", - content: result, + content: compressToolResultForLLM(tc.name, tc.args, result), toolCallId: tc.id, toolName: tc.name, thoughtSignature: tc.thoughtSignature, }); + + // Auto-append failures to agent memory + if (isToolError(tc.name, result) && activeProject?.id) { + agentMemory.failed_strategies.push({ + tool: tc.name, + args: tc.args, + reason: extractErrorReason(result), + timestamp: Date.now() + }); + if (agentMemory.failed_strategies.length > 20) { + agentMemory.failed_strategies = agentMemory.failed_strategies.slice(-20); + } + query( + `UPDATE fs_projects SET data = jsonb_set(data, '{agent_memory}', $2::jsonb, true) WHERE id = $1`, + [activeProject.id, JSON.stringify(agentMemory)] + ).catch(() => {}); + } const recovery = detectKnownError(result); if (recovery) recoveryLines.push(formatRecoveryMessage(recovery)); diff --git a/vibn-frontend/app/api/mcp/route.ts b/vibn-frontend/app/api/mcp/route.ts index 73123fe8..5229b118 100644 --- a/vibn-frontend/app/api/mcp/route.ts +++ b/vibn-frontend/app/api/mcp/route.ts @@ -367,6 +367,8 @@ export async function POST(request: Request) { case "apps.templates.search": return await toolAppsTemplatesSearch(params); + case "update.memory": + return await toolUpdateMemory(principal, params); case "workspace.db_query": return await toolWorkspaceDbQuery(principal, params); case "databases.list": @@ -3260,6 +3262,44 @@ const DB_TYPES: readonly CoolifyDatabaseType[] = [ + +async function toolUpdateMemory( + principal: Principal, + params: Record +) { + const { operation, key, value, projectId } = params; + if (!projectId) return NextResponse.json({ error: "projectId required" }, { status: 400 }); + + const projectRow = await queryOne<{ id: string; data: any }>( + `SELECT data FROM fs_projects WHERE id = $1`, + [projectId] + ); + if (!projectRow) return NextResponse.json({ error: "Project not found" }, { status: 404 }); + + const agentMemory = projectRow.data?.agent_memory || { facts: {}, failed_strategies: [] }; + if (!agentMemory.facts) agentMemory.facts = {}; + if (!agentMemory.failed_strategies) agentMemory.failed_strategies = []; + + if (operation === 'set_fact') { + if (!key || !value) return NextResponse.json({ error: "key and value required for set_fact" }, { status: 400 }); + agentMemory.facts[String(key)] = String(value); + } else if (operation === 'set_plan') { + if (!value) return NextResponse.json({ error: "value required for set_plan" }, { status: 400 }); + agentMemory.current_plan = String(value); + } else if (operation === 'clear_plan') { + agentMemory.current_plan = null; + } else { + return NextResponse.json({ error: "Invalid operation" }, { status: 400 }); + } + + await query( + `UPDATE fs_projects SET data = jsonb_set(data, '{agent_memory}', $2::jsonb, true) WHERE id = $1`, + [projectId, JSON.stringify(agentMemory)] + ); + + return NextResponse.json({ result: `Memory updated successfully via ${operation}` }); +} + async function toolWorkspaceDbQuery( principal: Principal, params: Record