Implement LLM context compression and persistent memory

This commit is contained in:
2026-06-15 14:47:36 -07:00
parent 17c8681073
commit 035cdca846
3 changed files with 208 additions and 38 deletions

View File

@@ -831,6 +831,28 @@ After this returns, ALWAYS call apps_deploy { uuid } to regenerate the live Trae
// ── Databases ─────────────────────────────────────────────────────────────
{
name: "update_memory",
description: "Write a persistent fact about this project that should be remembered across all turns. Use this when you discover something that will affect future actions — e.g. the correct start command, the actual entry point file, a broken dependency. Do NOT use this for temporary observations.",
parameters: {
type: "OBJECT",
properties: {
operation: {
type: "STRING",
description: "set_fact: store a key/value fact. set_plan: record your current high-level plan. clear_plan: mark the plan as complete."
},
key: {
type: "STRING",
description: "For set_fact: the fact name (e.g. 'start_command', 'entry_point')"
},
value: {
type: "STRING",
description: "For set_fact or set_plan: the value to store"
}
},
required: ["operation"],
},
},
{
name: "workspace_db_query",
description: "Run a read-only SQL query against the workspace's main production/telemetry database (the one powering Next.js + Telemetry). ONLY USE THIS IF THE USER ASKS FOR LOGS OR TELEMETRY USAGE DATA.",
@@ -1881,6 +1903,7 @@ export async function executeMcpTool(
// Convert underscore tool name → dotted MCP action (apps_create → apps.create)
let action = toolName.replace(/_/g, ".");
if (toolName === "workspace_db_query") action = "workspace.db_query";
if (toolName === "update_memory") action = "update.memory";
// Unpack JSON-string args (Gemini schemas can't represent free-form objects,
// so we accept *Json string fields and parse them server-side).

View File

@@ -629,6 +629,72 @@ function extractPreviewUrl(messages: ChatMessage[]): string | undefined {
}
function compressToolResultForLLM(toolName: string, args: any, result: string): string {
let parsed: any;
try { parsed = JSON.parse(result); } catch {}
if (parsed && (parsed.ok === false || parsed.error || parsed.errors?.length > 0)) {
return result; // Don't compress errors, they are high signal
}
if (toolName === 'fs_read') {
const content = parsed?.content || result;
const lines = content.split('\n');
if (lines.length <= 50) return result;
const compressed = [
`[fs_read: ${args?.path}${lines.length} lines total]`,
`[Lines 1-20]:`,
lines.slice(0, 20).join('\n'),
`... [${lines.length - 40} lines omitted] ...`,
`[Lines ${lines.length - 20}-${lines.length}]:`,
lines.slice(-20).join('\n')
].join('\n');
if (parsed) return JSON.stringify({ ...parsed, content: compressed });
return compressed;
}
if (toolName === 'shell_exec') {
const stdout = parsed?.stdout || "";
const stderr = parsed?.stderr || "";
const output = stderr || stdout || result;
const lines = output.split('\n');
if (lines.length <= 30) return result;
const compressed = [
`[run_terminal: ${args?.command}]`,
`[First 15 lines]:`,
lines.slice(0, 15).join('\n'),
`... [${lines.length - 30} lines omitted] ...`,
`[Last 15 lines]:`,
lines.slice(-15).join('\n')
].join('\n');
if (parsed) return JSON.stringify({ ...parsed, stdout: parsed.stdout ? compressed : "", stderr: parsed.stderr ? compressed : "" });
return compressed;
}
if (toolName.includes('logs') || toolName.includes('console')) {
const log = parsed?.log || result;
const lines = log.split('\n');
if (lines.length <= 30) return result;
const compressed = [
`[${toolName}]`,
`[First 15 lines]:`,
lines.slice(0, 15).join('\n'),
`... [${lines.length - 30} lines omitted] ...`,
`[Last 15 lines]:`,
lines.slice(-15).join('\n')
].join('\n');
if (parsed) return JSON.stringify({ ...parsed, log: compressed });
return compressed;
}
if (result.length > 2000) {
return result.substring(0, 1000) + `\n... [${result.length - 2000} chars omitted] ...\n` + result.substring(result.length - 1000);
}
return result;
}
function summarizeForUI(raw: string): string {
try {
const p = JSON.parse(raw);
@@ -652,6 +718,35 @@ function summarizeForUI(raw: string): string {
return raw.slice(0, 500);
}
function isToolError(toolName: string, result: string): boolean {
try {
const p = JSON.parse(result);
if (p && typeof p === "object") {
if (typeof p.code === "number" && p.code !== 0) return true;
if (p.ok === false) return true;
if (p.error && !/^null$/i.test(String(p.error))) return true;
if (p.errors && p.errors.length > 0) return true;
return false;
}
} catch {}
if (toolName.includes('logs') || toolName.includes('console')) return false;
const lower = result.toLowerCase();
return /(econnrefused|enoent|permission denied|command not found)/.test(lower);
}
function extractErrorReason(result: string): string {
try {
const p = JSON.parse(result);
if (p && typeof p === "object") {
if (p.stderr && typeof p.stderr === "string") return p.stderr.split('\n').slice(0, 3).join(' ');
if (p.error && typeof p.error === "string") return p.error;
}
} catch {}
return result.split('\n').slice(0, 3).join(' ').substring(0, 200);
}
export async function POST(request: Request) {
await ensureChatTables();
@@ -727,55 +822,50 @@ export async function POST(request: Request) {
// followed by tool messages responding to each 'tool_call_id'."
// Gemini silently tolerates stale toolCalls, so we only hit this on
// non-Gemini providers.
const history: ChatMessage[] = rows
.reverse()
.map((r: { data: ChatMessage }) => {
const history: ChatMessage[] = [];
rows.reverse().forEach((r: { data: ChatMessage }) => {
const msg = r.data as unknown as {
role: string;
content?: string;
toolCalls?: unknown;
_rawToolResults?: unknown;
toolCalls?: any[];
_rawToolResults?: any[];
};
if (
msg.role === "assistant" &&
Array.isArray(msg.toolCalls) &&
msg.toolCalls.length
) {
// Remove any tool calls completely from the history payload.
// This is the clean, standard way to pass assistant history without
// polluting the context or inducing model hallucinations.
msg.toolCalls = undefined;
msg._rawToolResults = undefined;
}
if (typeof msg.content === "string") {
msg.content = msg.content
.replace(/<tool_calls>[\s\S]*?<\/tool_calls>/g, "")
.replace(/<think>[\s\S]*?<\/think>/g, "")
// Completely strip any legacy leaked "[tools executed this turn]" strings in case they exist in older messages
.replace(/(?:\r?\n)*\[tools executed this turn:[\s\S]*?\]/g, "")
// Strip legacy "### Phase Checkpoint" planning walls (Goal / Findings /
// Suspected Cause / Verification Plan) from historical assistant
// messages. That flow was removed, but old threads still contain it,
// and replaying it as context biases the model into re-emitting the
// same walls + verify-everything behavior. Drop from the heading to
// the end of the message; any plain narration before it is kept.
.replace(/(?:^|\n)\s*#{1,6}\s*Phase Checkpoint[\s\S]*$/i, "")
.trim();
}
return msg as unknown as ChatMessage;
})
// Drop assistant messages that became empty after stripping the internal
// checkpoint/QA walls so they don't inject blank turns into the context.
.filter((msg) => {
if (msg.role !== "assistant") return true;
const hasText =
typeof msg.content === "string" && msg.content.trim().length > 0;
const hasTools =
Array.isArray((msg as { toolCalls?: unknown[] }).toolCalls) &&
((msg as { toolCalls?: unknown[] }).toolCalls?.length ?? 0) > 0;
return hasText || hasTools;
});
const isAssistant = msg.role === "assistant";
const hasText = typeof msg.content === "string" && msg.content.trim().length > 0;
const hasTools = Array.isArray(msg.toolCalls) && msg.toolCalls.length > 0;
if (!isAssistant || hasText || hasTools) {
history.push(msg as unknown as ChatMessage);
// Reconstruct compressed tool messages from _rawToolResults so the LLM remembers its actions!
if (isAssistant && hasTools && Array.isArray(msg._rawToolResults)) {
for (const tc of msg.toolCalls!) {
const rawRes = msg._rawToolResults.find(tr => tr.name === tc.name && JSON.stringify(tr.args) === JSON.stringify(tc.args));
const resultString = typeof rawRes?.result === 'string' ? rawRes.result : JSON.stringify(rawRes?.result || { ok: true });
history.push({
role: "tool",
content: compressToolResultForLLM(tc.name, tc.args, resultString),
toolCallId: tc.id,
toolName: tc.name,
});
}
}
}
msg._rawToolResults = undefined; // Don't send this custom field to the LLM
});
// Add user message
const userMsg: ChatMessage = { role: "user", content: message.trim() };
@@ -1137,7 +1227,7 @@ export async function POST(request: Request) {
});
messages.push({
role: "tool",
content: result,
content: compressToolResultForLLM(tc.name, tc.args, result),
toolCallId: tc.id,
toolName: tc.name,
thoughtSignature: tc.thoughtSignature,
@@ -1351,11 +1441,28 @@ export async function POST(request: Request) {
messages.push({
role: "tool",
content: result,
content: compressToolResultForLLM(tc.name, tc.args, result),
toolCallId: tc.id,
toolName: tc.name,
thoughtSignature: tc.thoughtSignature,
});
// Auto-append failures to agent memory
if (isToolError(tc.name, result) && activeProject?.id) {
agentMemory.failed_strategies.push({
tool: tc.name,
args: tc.args,
reason: extractErrorReason(result),
timestamp: Date.now()
});
if (agentMemory.failed_strategies.length > 20) {
agentMemory.failed_strategies = agentMemory.failed_strategies.slice(-20);
}
query(
`UPDATE fs_projects SET data = jsonb_set(data, '{agent_memory}', $2::jsonb, true) WHERE id = $1`,
[activeProject.id, JSON.stringify(agentMemory)]
).catch(() => {});
}
const recovery = detectKnownError(result);
if (recovery) recoveryLines.push(formatRecoveryMessage(recovery));

View File

@@ -367,6 +367,8 @@ export async function POST(request: Request) {
case "apps.templates.search":
return await toolAppsTemplatesSearch(params);
case "update.memory":
return await toolUpdateMemory(principal, params);
case "workspace.db_query":
return await toolWorkspaceDbQuery(principal, params);
case "databases.list":
@@ -3260,6 +3262,44 @@ const DB_TYPES: readonly CoolifyDatabaseType[] = [
async function toolUpdateMemory(
principal: Principal,
params: Record<string, unknown>
) {
const { operation, key, value, projectId } = params;
if (!projectId) return NextResponse.json({ error: "projectId required" }, { status: 400 });
const projectRow = await queryOne<{ id: string; data: any }>(
`SELECT data FROM fs_projects WHERE id = $1`,
[projectId]
);
if (!projectRow) return NextResponse.json({ error: "Project not found" }, { status: 404 });
const agentMemory = projectRow.data?.agent_memory || { facts: {}, failed_strategies: [] };
if (!agentMemory.facts) agentMemory.facts = {};
if (!agentMemory.failed_strategies) agentMemory.failed_strategies = [];
if (operation === 'set_fact') {
if (!key || !value) return NextResponse.json({ error: "key and value required for set_fact" }, { status: 400 });
agentMemory.facts[String(key)] = String(value);
} else if (operation === 'set_plan') {
if (!value) return NextResponse.json({ error: "value required for set_plan" }, { status: 400 });
agentMemory.current_plan = String(value);
} else if (operation === 'clear_plan') {
agentMemory.current_plan = null;
} else {
return NextResponse.json({ error: "Invalid operation" }, { status: 400 });
}
await query(
`UPDATE fs_projects SET data = jsonb_set(data, '{agent_memory}', $2::jsonb, true) WHERE id = $1`,
[projectId, JSON.stringify(agentMemory)]
);
return NextResponse.json({ result: `Memory updated successfully via ${operation}` });
}
async function toolWorkspaceDbQuery(
principal: Principal,
params: Record<string, unknown>