Implement LLM context compression and persistent memory

2026-06-15 14:47:36 -07:00
parent 17c8681073
commit 035cdca846
3 changed files with 208 additions and 38 deletions
--- a/vibn-agent-runner/src/tools/vibn-tools.ts
+++ b/vibn-agent-runner/src/tools/vibn-tools.ts
@@ -831,6 +831,28 @@ After this returns, ALWAYS call apps_deploy { uuid } to regenerate the live Trae

  // ── Databases ─────────────────────────────────────────────────────────────

+  {
+    name: "update_memory",
+    description: "Write a persistent fact about this project that should be remembered across all turns. Use this when you discover something that will affect future actions — e.g. the correct start command, the actual entry point file, a broken dependency. Do NOT use this for temporary observations.",
+    parameters: {
+      type: "OBJECT",
+      properties: {
+        operation: {
+          type: "STRING",
+          description: "set_fact: store a key/value fact. set_plan: record your current high-level plan. clear_plan: mark the plan as complete."
+        },
+        key: {
+          type: "STRING",
+          description: "For set_fact: the fact name (e.g. 'start_command', 'entry_point')"
+        },
+        value: {
+          type: "STRING", 
+          description: "For set_fact or set_plan: the value to store"
+        }
+      },
+      required: ["operation"],
+    },
+  },
  {
    name: "workspace_db_query",
    description: "Run a read-only SQL query against the workspace's main production/telemetry database (the one powering Next.js + Telemetry). ONLY USE THIS IF THE USER ASKS FOR LOGS OR TELEMETRY USAGE DATA.",
@@ -1881,6 +1903,7 @@ export async function executeMcpTool(
  // Convert underscore tool name → dotted MCP action (apps_create → apps.create)
  let action = toolName.replace(/_/g, ".");
  if (toolName === "workspace_db_query") action = "workspace.db_query";
+  if (toolName === "update_memory") action = "update.memory";

  // Unpack JSON-string args (Gemini schemas can't represent free-form objects,
  // so we accept *Json string fields and parse them server-side).
--- a/vibn-frontend/app/api/chat/route.ts
+++ b/vibn-frontend/app/api/chat/route.ts
@@ -629,6 +629,72 @@ function extractPreviewUrl(messages: ChatMessage[]): string | undefined {
 }


+
+function compressToolResultForLLM(toolName: string, args: any, result: string): string {
+  let parsed: any;
+  try { parsed = JSON.parse(result); } catch {}
+  
+  if (parsed && (parsed.ok === false || parsed.error || parsed.errors?.length > 0)) {
+    return result; // Don't compress errors, they are high signal
+  }
+  
+  if (toolName === 'fs_read') {
+    const content = parsed?.content || result;
+    const lines = content.split('\n');
+    if (lines.length <= 50) return result;
+    const compressed = [
+      `[fs_read: ${args?.path} — ${lines.length} lines total]`,
+      `[Lines 1-20]:`,
+      lines.slice(0, 20).join('\n'),
+      `... [${lines.length - 40} lines omitted] ...`,
+      `[Lines ${lines.length - 20}-${lines.length}]:`,
+      lines.slice(-20).join('\n')
+    ].join('\n');
+    if (parsed) return JSON.stringify({ ...parsed, content: compressed });
+    return compressed;
+  }
+  
+  if (toolName === 'shell_exec') {
+    const stdout = parsed?.stdout || "";
+    const stderr = parsed?.stderr || "";
+    const output = stderr || stdout || result;
+    const lines = output.split('\n');
+    if (lines.length <= 30) return result;
+    const compressed = [
+      `[run_terminal: ${args?.command}]`,
+      `[First 15 lines]:`,
+      lines.slice(0, 15).join('\n'),
+      `... [${lines.length - 30} lines omitted] ...`,
+      `[Last 15 lines]:`,
+      lines.slice(-15).join('\n')
+    ].join('\n');
+    if (parsed) return JSON.stringify({ ...parsed, stdout: parsed.stdout ? compressed : "", stderr: parsed.stderr ? compressed : "" });
+    return compressed;
+  }
+
+  if (toolName.includes('logs') || toolName.includes('console')) {
+    const log = parsed?.log || result;
+    const lines = log.split('\n');
+    if (lines.length <= 30) return result;
+    const compressed = [
+      `[${toolName}]`,
+      `[First 15 lines]:`,
+      lines.slice(0, 15).join('\n'),
+      `... [${lines.length - 30} lines omitted] ...`,
+      `[Last 15 lines]:`,
+      lines.slice(-15).join('\n')
+    ].join('\n');
+    if (parsed) return JSON.stringify({ ...parsed, log: compressed });
+    return compressed;
+  }
+  
+  if (result.length > 2000) {
+    return result.substring(0, 1000) + `\n... [${result.length - 2000} chars omitted] ...\n` + result.substring(result.length - 1000);
+  }
+  
+  return result;
+}
+
 function summarizeForUI(raw: string): string {
  try {
    const p = JSON.parse(raw);
@@ -652,6 +718,35 @@ function summarizeForUI(raw: string): string {
  return raw.slice(0, 500);
 }

+
+function isToolError(toolName: string, result: string): boolean {
+  try {
+    const p = JSON.parse(result);
+    if (p && typeof p === "object") {
+      if (typeof p.code === "number" && p.code !== 0) return true;
+      if (p.ok === false) return true;
+      if (p.error && !/^null$/i.test(String(p.error))) return true;
+      if (p.errors && p.errors.length > 0) return true;
+      return false;
+    }
+  } catch {}
+  
+  if (toolName.includes('logs') || toolName.includes('console')) return false;
+  const lower = result.toLowerCase();
+  return /(econnrefused|enoent|permission denied|command not found)/.test(lower);
+}
+
+function extractErrorReason(result: string): string {
+  try {
+    const p = JSON.parse(result);
+    if (p && typeof p === "object") {
+      if (p.stderr && typeof p.stderr === "string") return p.stderr.split('\n').slice(0, 3).join(' ');
+      if (p.error && typeof p.error === "string") return p.error;
+    }
+  } catch {}
+  return result.split('\n').slice(0, 3).join(' ').substring(0, 200);
+}
+
 export async function POST(request: Request) {
  await ensureChatTables();

@@ -727,55 +822,50 @@ export async function POST(request: Request) {
  // followed by tool messages responding to each 'tool_call_id'."
  // Gemini silently tolerates stale toolCalls, so we only hit this on
  // non-Gemini providers.
-  const history: ChatMessage[] = rows
-    .reverse()
-    .map((r: { data: ChatMessage }) => {
+  const history: ChatMessage[] = [];
+  
+  rows.reverse().forEach((r: { data: ChatMessage }) => {
      const msg = r.data as unknown as {
        role: string;
        content?: string;
-        toolCalls?: unknown;
-        _rawToolResults?: unknown;
+        toolCalls?: any[];
+        _rawToolResults?: any[];
      };
-      if (
-        msg.role === "assistant" &&
-        Array.isArray(msg.toolCalls) &&
-        msg.toolCalls.length
-      ) {
-        // Remove any tool calls completely from the history payload.
-        // This is the clean, standard way to pass assistant history without
-        // polluting the context or inducing model hallucinations.
-        msg.toolCalls = undefined;
-        msg._rawToolResults = undefined;
-      }
+      
      if (typeof msg.content === "string") {
        msg.content = msg.content
          .replace(/<tool_calls>[\s\S]*?<\/tool_calls>/g, "")
          .replace(/<think>[\s\S]*?<\/think>/g, "")
-          // Completely strip any legacy leaked "[tools executed this turn]" strings in case they exist in older messages
          .replace(/(?:\r?\n)*\[tools executed this turn:[\s\S]*?\]/g, "")
-          // Strip legacy "### Phase Checkpoint" planning walls (Goal / Findings /
-          // Suspected Cause / Verification Plan) from historical assistant
-          // messages. That flow was removed, but old threads still contain it,
-          // and replaying it as context biases the model into re-emitting the
-          // same walls + verify-everything behavior. Drop from the heading to
-          // the end of the message; any plain narration before it is kept.
          .replace(/(?:^|\n)\s*#{1,6}\s*Phase Checkpoint[\s\S]*$/i, "")
          .trim();
      }

-      return msg as unknown as ChatMessage;
-    })
-    // Drop assistant messages that became empty after stripping the internal
-    // checkpoint/QA walls so they don't inject blank turns into the context.
-    .filter((msg) => {
-      if (msg.role !== "assistant") return true;
-      const hasText =
-        typeof msg.content === "string" && msg.content.trim().length > 0;
-      const hasTools =
-        Array.isArray((msg as { toolCalls?: unknown[] }).toolCalls) &&
-        ((msg as { toolCalls?: unknown[] }).toolCalls?.length ?? 0) > 0;
-      return hasText || hasTools;
-    });
+      const isAssistant = msg.role === "assistant";
+      const hasText = typeof msg.content === "string" && msg.content.trim().length > 0;
+      const hasTools = Array.isArray(msg.toolCalls) && msg.toolCalls.length > 0;
+
+      if (!isAssistant || hasText || hasTools) {
+        history.push(msg as unknown as ChatMessage);
+        
+        // Reconstruct compressed tool messages from _rawToolResults so the LLM remembers its actions!
+        if (isAssistant && hasTools && Array.isArray(msg._rawToolResults)) {
+           for (const tc of msg.toolCalls!) {
+             const rawRes = msg._rawToolResults.find(tr => tr.name === tc.name && JSON.stringify(tr.args) === JSON.stringify(tc.args));
+             const resultString = typeof rawRes?.result === 'string' ? rawRes.result : JSON.stringify(rawRes?.result || { ok: true });
+             
+             history.push({
+               role: "tool",
+               content: compressToolResultForLLM(tc.name, tc.args, resultString),
+               toolCallId: tc.id,
+               toolName: tc.name,
+             });
+           }
+        }
+      }
+      
+      msg._rawToolResults = undefined; // Don't send this custom field to the LLM
+  });

  // Add user message
  const userMsg: ChatMessage = { role: "user", content: message.trim() };
@@ -1137,7 +1227,7 @@ export async function POST(request: Request) {
            });
            messages.push({
              role: "tool",
-              content: result,
+              content: compressToolResultForLLM(tc.name, tc.args, result),
              toolCallId: tc.id,
              toolName: tc.name,
              thoughtSignature: tc.thoughtSignature,
@@ -1351,11 +1441,28 @@ export async function POST(request: Request) {

            messages.push({
              role: "tool",
-              content: result,
+              content: compressToolResultForLLM(tc.name, tc.args, result),
              toolCallId: tc.id,
              toolName: tc.name,
              thoughtSignature: tc.thoughtSignature,
            });
+            
+            // Auto-append failures to agent memory
+            if (isToolError(tc.name, result) && activeProject?.id) {
+              agentMemory.failed_strategies.push({
+                tool: tc.name,
+                args: tc.args,
+                reason: extractErrorReason(result),
+                timestamp: Date.now()
+              });
+              if (agentMemory.failed_strategies.length > 20) {
+                agentMemory.failed_strategies = agentMemory.failed_strategies.slice(-20);
+              }
+              query(
+                `UPDATE fs_projects SET data = jsonb_set(data, '{agent_memory}', $2::jsonb, true) WHERE id = $1`,
+                [activeProject.id, JSON.stringify(agentMemory)]
+              ).catch(() => {});
+            }

            const recovery = detectKnownError(result);
            if (recovery) recoveryLines.push(formatRecoveryMessage(recovery));
--- a/vibn-frontend/app/api/mcp/route.ts
+++ b/vibn-frontend/app/api/mcp/route.ts
@@ -367,6 +367,8 @@ export async function POST(request: Request) {
      case "apps.templates.search":
        return await toolAppsTemplatesSearch(params);

+      case "update.memory":
+        return await toolUpdateMemory(principal, params);
      case "workspace.db_query":
        return await toolWorkspaceDbQuery(principal, params);
      case "databases.list":
@@ -3260,6 +3262,44 @@ const DB_TYPES: readonly CoolifyDatabaseType[] = [



+
+async function toolUpdateMemory(
+  principal: Principal,
+  params: Record<string, unknown>
+) {
+  const { operation, key, value, projectId } = params;
+  if (!projectId) return NextResponse.json({ error: "projectId required" }, { status: 400 });
+  
+  const projectRow = await queryOne<{ id: string; data: any }>(
+    `SELECT data FROM fs_projects WHERE id = $1`,
+    [projectId]
+  );
+  if (!projectRow) return NextResponse.json({ error: "Project not found" }, { status: 404 });
+  
+  const agentMemory = projectRow.data?.agent_memory || { facts: {}, failed_strategies: [] };
+  if (!agentMemory.facts) agentMemory.facts = {};
+  if (!agentMemory.failed_strategies) agentMemory.failed_strategies = [];
+
+  if (operation === 'set_fact') {
+    if (!key || !value) return NextResponse.json({ error: "key and value required for set_fact" }, { status: 400 });
+    agentMemory.facts[String(key)] = String(value);
+  } else if (operation === 'set_plan') {
+    if (!value) return NextResponse.json({ error: "value required for set_plan" }, { status: 400 });
+    agentMemory.current_plan = String(value);
+  } else if (operation === 'clear_plan') {
+    agentMemory.current_plan = null;
+  } else {
+    return NextResponse.json({ error: "Invalid operation" }, { status: 400 });
+  }
+
+  await query(
+    `UPDATE fs_projects SET data = jsonb_set(data, '{agent_memory}', $2::jsonb, true) WHERE id = $1`,
+    [projectId, JSON.stringify(agentMemory)]
+  );
+  
+  return NextResponse.json({ result: `Memory updated successfully via ${operation}` });
+}
+
 async function toolWorkspaceDbQuery(
  principal: Principal,
  params: Record<string, unknown>