import { createLLM, toOAITools, LLMMessage } from './llm'; import { ALL_TOOLS, executeTool, ToolContext, MemoryUpdate } from './tools'; import { resolvePrompt } from './prompts/loader'; const MAX_TURNS = 20; // --------------------------------------------------------------------------- // Session store — one conversation history per session_id // --------------------------------------------------------------------------- interface Session { id: string; history: LLMMessage[]; // OpenAI message format createdAt: string; lastActiveAt: string; } const sessions = new Map(); function getOrCreateSession(sessionId: string): Session { if (!sessions.has(sessionId)) { sessions.set(sessionId, { id: sessionId, history: [], createdAt: new Date().toISOString(), lastActiveAt: new Date().toISOString() }); } const session = sessions.get(sessionId)!; session.lastActiveAt = new Date().toISOString(); return session; } export function listSessions() { return Array.from(sessions.values()).map(s => ({ id: s.id, messages: s.history.length, createdAt: s.createdAt, lastActiveAt: s.lastActiveAt })); } export function clearSession(sessionId: string) { sessions.delete(sessionId); } // Prompt text lives in src/prompts/orchestrator.ts — imported via agents/index.ts // which is loaded before orchestratorChat() is first called. // --------------------------------------------------------------------------- // Chat types // --------------------------------------------------------------------------- export interface ChatResult { reply: string; reasoning: string | null; sessionId: string; turns: number; toolCalls: string[]; model: string; /** Updated conversation history — caller should persist this */ history: LLMMessage[]; /** Knowledge items the AI chose to save this turn */ memoryUpdates: MemoryUpdate[]; } // --------------------------------------------------------------------------- // Main orchestrator chat — uses GLM-5 (Tier B) by default // --------------------------------------------------------------------------- export async function orchestratorChat( sessionId: string, userMessage: string, ctx: ToolContext, opts?: { /** Pre-load history from DB — replaces in-memory session history */ preloadedHistory?: LLMMessage[]; /** Knowledge items to inject as context at start of conversation */ knowledgeContext?: string; } ): Promise { const modelId = process.env.ORCHESTRATOR_MODEL ?? 'B'; // Tier B = GLM-5 const llm = createLLM(modelId, { temperature: 0.3 }); const session = getOrCreateSession(sessionId); // Seed session from DB history if provided and session is fresh if (opts?.preloadedHistory && opts.preloadedHistory.length > 0 && session.history.length === 0) { session.history = [...opts.preloadedHistory]; } const oaiTools = toOAITools(ALL_TOOLS); // Append user message session.history.push({ role: 'user', content: userMessage }); let turn = 0; let finalReply = ''; let finalReasoning: string | null = null; const toolCallNames: string[] = []; // Resolve system prompt from template — {{knowledge}} injects project/COO context const systemContent = resolvePrompt('orchestrator', { knowledge: opts?.knowledgeContext ?? '' }); // Build messages with system prompt prepended; keep last 40 for cost control const buildMessages = (): LLMMessage[] => [ { role: 'system', content: systemContent }, ...session.history.slice(-40) ]; while (turn < MAX_TURNS) { turn++; const response = await llm.chat(buildMessages(), oaiTools, 4096); // If GLM-5 is still reasoning (content null, finish_reason length) give it more tokens if (response.content === null && response.tool_calls.length === 0 && response.finish_reason === 'length') { // Retry with more tokens — model hit max_tokens during reasoning const retry = await llm.chat(buildMessages(), oaiTools, 8192); Object.assign(response, retry); } // Record reasoning for the final turn (informational, not stored in history) if (response.reasoning) finalReasoning = response.reasoning; // Only push assistant message if it has actual content or tool calls; // skip empty turns that result from mid-reasoning token exhaustion. const hasContent = response.content !== null && response.content !== ''; const hasToolCalls = response.tool_calls.length > 0; if (hasContent || hasToolCalls) { const assistantMsg: LLMMessage = { role: 'assistant', content: response.content, tool_calls: hasToolCalls ? response.tool_calls : undefined }; session.history.push(assistantMsg); } // No tool calls — we have the final answer if (!hasToolCalls) { finalReply = response.content ?? ''; break; } // Execute each tool call and collect results for (const tc of response.tool_calls) { const fnName = tc.function.name; let fnArgs: Record = {}; try { fnArgs = JSON.parse(tc.function.arguments || '{}'); } catch { /* bad JSON */ } toolCallNames.push(fnName); let result: unknown; try { result = await executeTool(fnName, fnArgs, ctx); } catch (err) { result = { error: err instanceof Error ? err.message : String(err) }; } // Add tool result to history session.history.push({ role: 'tool', tool_call_id: tc.id, name: fnName, content: typeof result === 'string' ? result : JSON.stringify(result) }); } } if (turn >= MAX_TURNS && !finalReply) { finalReply = 'Hit the turn limit. Try a more specific request.'; } return { reply: finalReply, reasoning: finalReasoning, sessionId, turns: turn, toolCalls: toolCallNames, model: llm.modelId, history: session.history .filter(m => m.role !== 'assistant' || m.content || m.tool_calls?.length) .slice(-40), memoryUpdates: ctx.memoryUpdates }; }