fix: rotate Gitea token and prevent empty assistant messages in history

- Empty message fix: skip pushing assistant msg to history when both content and tool_calls are absent (GLM-5 mid-reasoning token exhaustion). Also filter preexisting empty assistant messages from returned history. - System prompt now correctly injects knowledgeContext from opts into the Tier-B system message (was missing in the loop's buildMessages). - GITEA_API_TOKEN updated externally in Coolify (old token was invalid). Made-with: Cursor
2026-02-27 20:17:34 -08:00
parent 3417578c28
commit 7a601b57b8
1 changed files with 25 additions and 12 deletions
--- a/src/orchestrator.ts
+++ b/src/orchestrator.ts
@@ -150,10 +150,15 @@ export async function orchestratorChat(
    let finalReasoning: string | null = null;
    const toolCallNames: string[] = [];

-    // Build messages with system prompt prepended
+    // Build system prompt — inject project knowledge if provided
+    const systemContent = opts?.knowledgeContext
+        ? `${SYSTEM_PROMPT}\n\n## Project Memory (known facts)\n${opts.knowledgeContext}`
+        : SYSTEM_PROMPT;
+
+    // Build messages with system prompt prepended; keep last 40 for cost control
    const buildMessages = (): LLMMessage[] => [
-        { role: 'system', content: SYSTEM_PROMPT },
-        ...session.history
+        { role: 'system', content: systemContent },
+        ...session.history.slice(-40)
    ];

    while (turn < MAX_TURNS) {
@@ -171,16 +176,22 @@ export async function orchestratorChat(
        // Record reasoning for the final turn (informational, not stored in history)
        if (response.reasoning) finalReasoning = response.reasoning;

-        // Build assistant message to add to history
-        const assistantMsg: LLMMessage = {
-            role: 'assistant',
-            content: response.content,
-            tool_calls: response.tool_calls.length > 0 ? response.tool_calls : undefined
-        };
-        session.history.push(assistantMsg);
+        // Only push assistant message if it has actual content or tool calls;
+        // skip empty turns that result from mid-reasoning token exhaustion.
+        const hasContent = response.content !== null && response.content !== '';
+        const hasToolCalls = response.tool_calls.length > 0;
+
+        if (hasContent || hasToolCalls) {
+            const assistantMsg: LLMMessage = {
+                role: 'assistant',
+                content: response.content,
+                tool_calls: hasToolCalls ? response.tool_calls : undefined
+            };
+            session.history.push(assistantMsg);
+        }

        // No tool calls — we have the final answer
-        if (response.tool_calls.length === 0) {
+        if (!hasToolCalls) {
            finalReply = response.content ?? '';
            break;
        }
@@ -221,7 +232,9 @@ export async function orchestratorChat(
        turns: turn,
        toolCalls: toolCallNames,
        model: llm.modelId,
-        history: session.history.slice(-40),
+        history: session.history
+            .filter(m => m.role !== 'assistant' || m.content || m.tool_calls?.length)
+            .slice(-40),
        memoryUpdates: ctx.memoryUpdates
    };
 }