From 7a601b57b814fa32971f66484c74918b8bd5d738 Mon Sep 17 00:00:00 2001
From: mawkone <mark@getacquired.com>
Date: Fri, 27 Feb 2026 20:17:34 -0800
Subject: [PATCH] fix: rotate Gitea token and prevent empty assistant messages
 in history

- Empty message fix: skip pushing assistant msg to history when both
  content and tool_calls are absent (GLM-5 mid-reasoning token exhaustion).
  Also filter preexisting empty assistant messages from returned history.
- System prompt now correctly injects knowledgeContext from opts into the
  Tier-B system message (was missing in the loop's buildMessages).
- GITEA_API_TOKEN updated externally in Coolify (old token was invalid).

Made-with: Cursor
---
 src/orchestrator.ts | 37 +++++++++++++++++++++++++------------
 1 file changed, 25 insertions(+), 12 deletions(-)

diff --git a/src/orchestrator.ts b/src/orchestrator.ts
index 9a1f65f..75baef4 100644
--- a/src/orchestrator.ts
+++ b/src/orchestrator.ts
@@ -150,10 +150,15 @@ export async function orchestratorChat(
     let finalReasoning: string | null = null;
     const toolCallNames: string[] = [];
 
-    // Build messages with system prompt prepended
+    // Build system prompt — inject project knowledge if provided
+    const systemContent = opts?.knowledgeContext
+        ? `${SYSTEM_PROMPT}\n\n## Project Memory (known facts)\n${opts.knowledgeContext}`
+        : SYSTEM_PROMPT;
+
+    // Build messages with system prompt prepended; keep last 40 for cost control
     const buildMessages = (): LLMMessage[] => [
-        { role: 'system', content: SYSTEM_PROMPT },
-        ...session.history
+        { role: 'system', content: systemContent },
+        ...session.history.slice(-40)
     ];
 
     while (turn < MAX_TURNS) {
@@ -171,16 +176,22 @@ export async function orchestratorChat(
         // Record reasoning for the final turn (informational, not stored in history)
         if (response.reasoning) finalReasoning = response.reasoning;
 
-        // Build assistant message to add to history
-        const assistantMsg: LLMMessage = {
-            role: 'assistant',
-            content: response.content,
-            tool_calls: response.tool_calls.length > 0 ? response.tool_calls : undefined
-        };
-        session.history.push(assistantMsg);
+        // Only push assistant message if it has actual content or tool calls;
+        // skip empty turns that result from mid-reasoning token exhaustion.
+        const hasContent = response.content !== null && response.content !== '';
+        const hasToolCalls = response.tool_calls.length > 0;
+
+        if (hasContent || hasToolCalls) {
+            const assistantMsg: LLMMessage = {
+                role: 'assistant',
+                content: response.content,
+                tool_calls: hasToolCalls ? response.tool_calls : undefined
+            };
+            session.history.push(assistantMsg);
+        }
 
         // No tool calls — we have the final answer
-        if (response.tool_calls.length === 0) {
+        if (!hasToolCalls) {
             finalReply = response.content ?? '';
             break;
         }
@@ -221,7 +232,9 @@ export async function orchestratorChat(
         turns: turn,
         toolCalls: toolCallNames,
         model: llm.modelId,
-        history: session.history.slice(-40),
+        history: session.history
+            .filter(m => m.role !== 'assistant' || m.content || m.tool_calls?.length)
+            .slice(-40),
         memoryUpdates: ctx.memoryUpdates
     };
 }