vibn-api/src/orchestrator.ts

import { createLLM, toOAITools, LLMMessage } from './llm';
import { ALL_TOOLS, executeTool, ToolContext, MemoryUpdate } from './tools';
import { resolvePrompt } from './prompts/loader';

const MAX_TURNS = 20;

// ---------------------------------------------------------------------------
// Session store — one conversation history per session_id
// ---------------------------------------------------------------------------

interface Session {
    id: string;
    history: LLMMessage[]; // OpenAI message format
    createdAt: string;
    lastActiveAt: string;
}

const sessions = new Map<string, Session>();

function getOrCreateSession(sessionId: string): Session {
    if (!sessions.has(sessionId)) {
        sessions.set(sessionId, {
            id: sessionId,
            history: [],
            createdAt: new Date().toISOString(),
            lastActiveAt: new Date().toISOString()
        });
    }
    const session = sessions.get(sessionId)!;
    session.lastActiveAt = new Date().toISOString();
    return session;
}

export function listSessions() {
    return Array.from(sessions.values()).map(s => ({
        id: s.id,
        messages: s.history.length,
        createdAt: s.createdAt,
        lastActiveAt: s.lastActiveAt
    }));
}

export function clearSession(sessionId: string) {
    sessions.delete(sessionId);
}

// Prompt text lives in src/prompts/orchestrator.ts — imported via agents/index.ts
// which is loaded before orchestratorChat() is first called.

// ---------------------------------------------------------------------------
// Chat types
// ---------------------------------------------------------------------------

export interface ChatResult {
    reply: string;
    reasoning: string | null;
    sessionId: string;
    turns: number;
    toolCalls: string[];
    model: string;
    /** Updated conversation history — caller should persist this */
    history: LLMMessage[];
    /** Knowledge items the AI chose to save this turn */
    memoryUpdates: MemoryUpdate[];
}

// ---------------------------------------------------------------------------
// Main orchestrator chat — uses GLM-5 (Tier B) by default
// ---------------------------------------------------------------------------

export async function orchestratorChat(
    sessionId: string,
    userMessage: string,
    ctx: ToolContext,
    opts?: {
        /** Pre-load history from DB — replaces in-memory session history */
        preloadedHistory?: LLMMessage[];
        /** Knowledge items to inject as context at start of conversation */
        knowledgeContext?: string;
    }
): Promise<ChatResult> {
    const modelId = process.env.ORCHESTRATOR_MODEL ?? 'B'; // Tier B = GLM-5
    const llm = createLLM(modelId, { temperature: 0.3 });

    const session = getOrCreateSession(sessionId);

    // Seed session from DB history if provided and session is fresh
    if (opts?.preloadedHistory && opts.preloadedHistory.length > 0 && session.history.length === 0) {
        session.history = [...opts.preloadedHistory];
    }

    const oaiTools = toOAITools(ALL_TOOLS);

    // Append user message
    session.history.push({ role: 'user', content: userMessage });

    let turn = 0;
    let finalReply = '';
    let finalReasoning: string | null = null;
    const toolCallNames: string[] = [];

    // Resolve system prompt from template — {{knowledge}} injects project/COO context
    const systemContent = resolvePrompt('orchestrator', {
        knowledge: opts?.knowledgeContext ?? ''
    });

    // Build messages with system prompt prepended; keep last 40 for cost control
    const buildMessages = (): LLMMessage[] => [
        { role: 'system', content: systemContent },
        ...session.history.slice(-40)
    ];

    while (turn < MAX_TURNS) {
        turn++;

        const response = await llm.chat(buildMessages(), oaiTools, 4096);

        // If GLM-5 is still reasoning (content null, finish_reason length) give it more tokens
        if (response.content === null && response.tool_calls.length === 0 && response.finish_reason === 'length') {
            // Retry with more tokens — model hit max_tokens during reasoning
            const retry = await llm.chat(buildMessages(), oaiTools, 8192);
            Object.assign(response, retry);
        }

        // Record reasoning for the final turn (informational, not stored in history)
        if (response.reasoning) finalReasoning = response.reasoning;

        // Only push assistant message if it has actual content or tool calls;
        // skip empty turns that result from mid-reasoning token exhaustion.
        const hasContent = response.content !== null && response.content !== '';
        const hasToolCalls = response.tool_calls.length > 0;

        if (hasContent || hasToolCalls) {
            const assistantMsg: LLMMessage = {
                role: 'assistant',
                content: response.content,
                tool_calls: hasToolCalls ? response.tool_calls : undefined
            };
            session.history.push(assistantMsg);
        }

        // No tool calls — we have the final answer
        if (!hasToolCalls) {
            finalReply = response.content ?? '';
            break;
        }

        // Execute each tool call and collect results
        for (const tc of response.tool_calls) {
            const fnName = tc.function.name;
            let fnArgs: Record<string, unknown> = {};
            try { fnArgs = JSON.parse(tc.function.arguments || '{}'); } catch { /* bad JSON */ }

            toolCallNames.push(fnName);

            let result: unknown;
            try {
                result = await executeTool(fnName, fnArgs, ctx);
            } catch (err) {
                result = { error: err instanceof Error ? err.message : String(err) };
            }

            // Add tool result to history
            session.history.push({
                role: 'tool',
                tool_call_id: tc.id,
                name: fnName,
                content: typeof result === 'string' ? result : JSON.stringify(result)
            });
        }
    }

    if (turn >= MAX_TURNS && !finalReply) {
        finalReply = 'Hit the turn limit. Try a more specific request.';
    }

    return {
        reply: finalReply,
        reasoning: finalReasoning,
        sessionId,
        turns: turn,
        toolCalls: toolCallNames,
        model: llm.modelId,
        history: session.history
            .filter(m => m.role !== 'assistant' || m.content || m.tool_calls?.length)
            .slice(-40),
        memoryUpdates: ctx.memoryUpdates
    };
}