Implement LLM context compression and persistent memory
This commit is contained in:
@@ -831,6 +831,28 @@ After this returns, ALWAYS call apps_deploy { uuid } to regenerate the live Trae
|
||||
|
||||
// ── Databases ─────────────────────────────────────────────────────────────
|
||||
|
||||
{
|
||||
name: "update_memory",
|
||||
description: "Write a persistent fact about this project that should be remembered across all turns. Use this when you discover something that will affect future actions — e.g. the correct start command, the actual entry point file, a broken dependency. Do NOT use this for temporary observations.",
|
||||
parameters: {
|
||||
type: "OBJECT",
|
||||
properties: {
|
||||
operation: {
|
||||
type: "STRING",
|
||||
description: "set_fact: store a key/value fact. set_plan: record your current high-level plan. clear_plan: mark the plan as complete."
|
||||
},
|
||||
key: {
|
||||
type: "STRING",
|
||||
description: "For set_fact: the fact name (e.g. 'start_command', 'entry_point')"
|
||||
},
|
||||
value: {
|
||||
type: "STRING",
|
||||
description: "For set_fact or set_plan: the value to store"
|
||||
}
|
||||
},
|
||||
required: ["operation"],
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "workspace_db_query",
|
||||
description: "Run a read-only SQL query against the workspace's main production/telemetry database (the one powering Next.js + Telemetry). ONLY USE THIS IF THE USER ASKS FOR LOGS OR TELEMETRY USAGE DATA.",
|
||||
@@ -1881,6 +1903,7 @@ export async function executeMcpTool(
|
||||
// Convert underscore tool name → dotted MCP action (apps_create → apps.create)
|
||||
let action = toolName.replace(/_/g, ".");
|
||||
if (toolName === "workspace_db_query") action = "workspace.db_query";
|
||||
if (toolName === "update_memory") action = "update.memory";
|
||||
|
||||
// Unpack JSON-string args (Gemini schemas can't represent free-form objects,
|
||||
// so we accept *Json string fields and parse them server-side).
|
||||
|
||||
@@ -629,6 +629,72 @@ function extractPreviewUrl(messages: ChatMessage[]): string | undefined {
|
||||
}
|
||||
|
||||
|
||||
|
||||
function compressToolResultForLLM(toolName: string, args: any, result: string): string {
|
||||
let parsed: any;
|
||||
try { parsed = JSON.parse(result); } catch {}
|
||||
|
||||
if (parsed && (parsed.ok === false || parsed.error || parsed.errors?.length > 0)) {
|
||||
return result; // Don't compress errors, they are high signal
|
||||
}
|
||||
|
||||
if (toolName === 'fs_read') {
|
||||
const content = parsed?.content || result;
|
||||
const lines = content.split('\n');
|
||||
if (lines.length <= 50) return result;
|
||||
const compressed = [
|
||||
`[fs_read: ${args?.path} — ${lines.length} lines total]`,
|
||||
`[Lines 1-20]:`,
|
||||
lines.slice(0, 20).join('\n'),
|
||||
`... [${lines.length - 40} lines omitted] ...`,
|
||||
`[Lines ${lines.length - 20}-${lines.length}]:`,
|
||||
lines.slice(-20).join('\n')
|
||||
].join('\n');
|
||||
if (parsed) return JSON.stringify({ ...parsed, content: compressed });
|
||||
return compressed;
|
||||
}
|
||||
|
||||
if (toolName === 'shell_exec') {
|
||||
const stdout = parsed?.stdout || "";
|
||||
const stderr = parsed?.stderr || "";
|
||||
const output = stderr || stdout || result;
|
||||
const lines = output.split('\n');
|
||||
if (lines.length <= 30) return result;
|
||||
const compressed = [
|
||||
`[run_terminal: ${args?.command}]`,
|
||||
`[First 15 lines]:`,
|
||||
lines.slice(0, 15).join('\n'),
|
||||
`... [${lines.length - 30} lines omitted] ...`,
|
||||
`[Last 15 lines]:`,
|
||||
lines.slice(-15).join('\n')
|
||||
].join('\n');
|
||||
if (parsed) return JSON.stringify({ ...parsed, stdout: parsed.stdout ? compressed : "", stderr: parsed.stderr ? compressed : "" });
|
||||
return compressed;
|
||||
}
|
||||
|
||||
if (toolName.includes('logs') || toolName.includes('console')) {
|
||||
const log = parsed?.log || result;
|
||||
const lines = log.split('\n');
|
||||
if (lines.length <= 30) return result;
|
||||
const compressed = [
|
||||
`[${toolName}]`,
|
||||
`[First 15 lines]:`,
|
||||
lines.slice(0, 15).join('\n'),
|
||||
`... [${lines.length - 30} lines omitted] ...`,
|
||||
`[Last 15 lines]:`,
|
||||
lines.slice(-15).join('\n')
|
||||
].join('\n');
|
||||
if (parsed) return JSON.stringify({ ...parsed, log: compressed });
|
||||
return compressed;
|
||||
}
|
||||
|
||||
if (result.length > 2000) {
|
||||
return result.substring(0, 1000) + `\n... [${result.length - 2000} chars omitted] ...\n` + result.substring(result.length - 1000);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
function summarizeForUI(raw: string): string {
|
||||
try {
|
||||
const p = JSON.parse(raw);
|
||||
@@ -652,6 +718,35 @@ function summarizeForUI(raw: string): string {
|
||||
return raw.slice(0, 500);
|
||||
}
|
||||
|
||||
|
||||
function isToolError(toolName: string, result: string): boolean {
|
||||
try {
|
||||
const p = JSON.parse(result);
|
||||
if (p && typeof p === "object") {
|
||||
if (typeof p.code === "number" && p.code !== 0) return true;
|
||||
if (p.ok === false) return true;
|
||||
if (p.error && !/^null$/i.test(String(p.error))) return true;
|
||||
if (p.errors && p.errors.length > 0) return true;
|
||||
return false;
|
||||
}
|
||||
} catch {}
|
||||
|
||||
if (toolName.includes('logs') || toolName.includes('console')) return false;
|
||||
const lower = result.toLowerCase();
|
||||
return /(econnrefused|enoent|permission denied|command not found)/.test(lower);
|
||||
}
|
||||
|
||||
function extractErrorReason(result: string): string {
|
||||
try {
|
||||
const p = JSON.parse(result);
|
||||
if (p && typeof p === "object") {
|
||||
if (p.stderr && typeof p.stderr === "string") return p.stderr.split('\n').slice(0, 3).join(' ');
|
||||
if (p.error && typeof p.error === "string") return p.error;
|
||||
}
|
||||
} catch {}
|
||||
return result.split('\n').slice(0, 3).join(' ').substring(0, 200);
|
||||
}
|
||||
|
||||
export async function POST(request: Request) {
|
||||
await ensureChatTables();
|
||||
|
||||
@@ -727,54 +822,49 @@ export async function POST(request: Request) {
|
||||
// followed by tool messages responding to each 'tool_call_id'."
|
||||
// Gemini silently tolerates stale toolCalls, so we only hit this on
|
||||
// non-Gemini providers.
|
||||
const history: ChatMessage[] = rows
|
||||
.reverse()
|
||||
.map((r: { data: ChatMessage }) => {
|
||||
const history: ChatMessage[] = [];
|
||||
|
||||
rows.reverse().forEach((r: { data: ChatMessage }) => {
|
||||
const msg = r.data as unknown as {
|
||||
role: string;
|
||||
content?: string;
|
||||
toolCalls?: unknown;
|
||||
_rawToolResults?: unknown;
|
||||
toolCalls?: any[];
|
||||
_rawToolResults?: any[];
|
||||
};
|
||||
if (
|
||||
msg.role === "assistant" &&
|
||||
Array.isArray(msg.toolCalls) &&
|
||||
msg.toolCalls.length
|
||||
) {
|
||||
// Remove any tool calls completely from the history payload.
|
||||
// This is the clean, standard way to pass assistant history without
|
||||
// polluting the context or inducing model hallucinations.
|
||||
msg.toolCalls = undefined;
|
||||
msg._rawToolResults = undefined;
|
||||
}
|
||||
|
||||
if (typeof msg.content === "string") {
|
||||
msg.content = msg.content
|
||||
.replace(/<tool_calls>[\s\S]*?<\/tool_calls>/g, "")
|
||||
.replace(/<think>[\s\S]*?<\/think>/g, "")
|
||||
// Completely strip any legacy leaked "[tools executed this turn]" strings in case they exist in older messages
|
||||
.replace(/(?:\r?\n)*\[tools executed this turn:[\s\S]*?\]/g, "")
|
||||
// Strip legacy "### Phase Checkpoint" planning walls (Goal / Findings /
|
||||
// Suspected Cause / Verification Plan) from historical assistant
|
||||
// messages. That flow was removed, but old threads still contain it,
|
||||
// and replaying it as context biases the model into re-emitting the
|
||||
// same walls + verify-everything behavior. Drop from the heading to
|
||||
// the end of the message; any plain narration before it is kept.
|
||||
.replace(/(?:^|\n)\s*#{1,6}\s*Phase Checkpoint[\s\S]*$/i, "")
|
||||
.trim();
|
||||
}
|
||||
|
||||
return msg as unknown as ChatMessage;
|
||||
})
|
||||
// Drop assistant messages that became empty after stripping the internal
|
||||
// checkpoint/QA walls so they don't inject blank turns into the context.
|
||||
.filter((msg) => {
|
||||
if (msg.role !== "assistant") return true;
|
||||
const hasText =
|
||||
typeof msg.content === "string" && msg.content.trim().length > 0;
|
||||
const hasTools =
|
||||
Array.isArray((msg as { toolCalls?: unknown[] }).toolCalls) &&
|
||||
((msg as { toolCalls?: unknown[] }).toolCalls?.length ?? 0) > 0;
|
||||
return hasText || hasTools;
|
||||
const isAssistant = msg.role === "assistant";
|
||||
const hasText = typeof msg.content === "string" && msg.content.trim().length > 0;
|
||||
const hasTools = Array.isArray(msg.toolCalls) && msg.toolCalls.length > 0;
|
||||
|
||||
if (!isAssistant || hasText || hasTools) {
|
||||
history.push(msg as unknown as ChatMessage);
|
||||
|
||||
// Reconstruct compressed tool messages from _rawToolResults so the LLM remembers its actions!
|
||||
if (isAssistant && hasTools && Array.isArray(msg._rawToolResults)) {
|
||||
for (const tc of msg.toolCalls!) {
|
||||
const rawRes = msg._rawToolResults.find(tr => tr.name === tc.name && JSON.stringify(tr.args) === JSON.stringify(tc.args));
|
||||
const resultString = typeof rawRes?.result === 'string' ? rawRes.result : JSON.stringify(rawRes?.result || { ok: true });
|
||||
|
||||
history.push({
|
||||
role: "tool",
|
||||
content: compressToolResultForLLM(tc.name, tc.args, resultString),
|
||||
toolCallId: tc.id,
|
||||
toolName: tc.name,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
msg._rawToolResults = undefined; // Don't send this custom field to the LLM
|
||||
});
|
||||
|
||||
// Add user message
|
||||
@@ -1137,7 +1227,7 @@ export async function POST(request: Request) {
|
||||
});
|
||||
messages.push({
|
||||
role: "tool",
|
||||
content: result,
|
||||
content: compressToolResultForLLM(tc.name, tc.args, result),
|
||||
toolCallId: tc.id,
|
||||
toolName: tc.name,
|
||||
thoughtSignature: tc.thoughtSignature,
|
||||
@@ -1351,12 +1441,29 @@ export async function POST(request: Request) {
|
||||
|
||||
messages.push({
|
||||
role: "tool",
|
||||
content: result,
|
||||
content: compressToolResultForLLM(tc.name, tc.args, result),
|
||||
toolCallId: tc.id,
|
||||
toolName: tc.name,
|
||||
thoughtSignature: tc.thoughtSignature,
|
||||
});
|
||||
|
||||
// Auto-append failures to agent memory
|
||||
if (isToolError(tc.name, result) && activeProject?.id) {
|
||||
agentMemory.failed_strategies.push({
|
||||
tool: tc.name,
|
||||
args: tc.args,
|
||||
reason: extractErrorReason(result),
|
||||
timestamp: Date.now()
|
||||
});
|
||||
if (agentMemory.failed_strategies.length > 20) {
|
||||
agentMemory.failed_strategies = agentMemory.failed_strategies.slice(-20);
|
||||
}
|
||||
query(
|
||||
`UPDATE fs_projects SET data = jsonb_set(data, '{agent_memory}', $2::jsonb, true) WHERE id = $1`,
|
||||
[activeProject.id, JSON.stringify(agentMemory)]
|
||||
).catch(() => {});
|
||||
}
|
||||
|
||||
const recovery = detectKnownError(result);
|
||||
if (recovery) recoveryLines.push(formatRecoveryMessage(recovery));
|
||||
|
||||
|
||||
@@ -367,6 +367,8 @@ export async function POST(request: Request) {
|
||||
case "apps.templates.search":
|
||||
return await toolAppsTemplatesSearch(params);
|
||||
|
||||
case "update.memory":
|
||||
return await toolUpdateMemory(principal, params);
|
||||
case "workspace.db_query":
|
||||
return await toolWorkspaceDbQuery(principal, params);
|
||||
case "databases.list":
|
||||
@@ -3260,6 +3262,44 @@ const DB_TYPES: readonly CoolifyDatabaseType[] = [
|
||||
|
||||
|
||||
|
||||
|
||||
async function toolUpdateMemory(
|
||||
principal: Principal,
|
||||
params: Record<string, unknown>
|
||||
) {
|
||||
const { operation, key, value, projectId } = params;
|
||||
if (!projectId) return NextResponse.json({ error: "projectId required" }, { status: 400 });
|
||||
|
||||
const projectRow = await queryOne<{ id: string; data: any }>(
|
||||
`SELECT data FROM fs_projects WHERE id = $1`,
|
||||
[projectId]
|
||||
);
|
||||
if (!projectRow) return NextResponse.json({ error: "Project not found" }, { status: 404 });
|
||||
|
||||
const agentMemory = projectRow.data?.agent_memory || { facts: {}, failed_strategies: [] };
|
||||
if (!agentMemory.facts) agentMemory.facts = {};
|
||||
if (!agentMemory.failed_strategies) agentMemory.failed_strategies = [];
|
||||
|
||||
if (operation === 'set_fact') {
|
||||
if (!key || !value) return NextResponse.json({ error: "key and value required for set_fact" }, { status: 400 });
|
||||
agentMemory.facts[String(key)] = String(value);
|
||||
} else if (operation === 'set_plan') {
|
||||
if (!value) return NextResponse.json({ error: "value required for set_plan" }, { status: 400 });
|
||||
agentMemory.current_plan = String(value);
|
||||
} else if (operation === 'clear_plan') {
|
||||
agentMemory.current_plan = null;
|
||||
} else {
|
||||
return NextResponse.json({ error: "Invalid operation" }, { status: 400 });
|
||||
}
|
||||
|
||||
await query(
|
||||
`UPDATE fs_projects SET data = jsonb_set(data, '{agent_memory}', $2::jsonb, true) WHERE id = $1`,
|
||||
[projectId, JSON.stringify(agentMemory)]
|
||||
);
|
||||
|
||||
return NextResponse.json({ result: `Memory updated successfully via ${operation}` });
|
||||
}
|
||||
|
||||
async function toolWorkspaceDbQuery(
|
||||
principal: Principal,
|
||||
params: Record<string, unknown>
|
||||
|
||||
Reference in New Issue
Block a user