vibn-frontend/lib/ai/gemini-chat.ts

/**
 * Gemini 3.1 Pro chat client with tool-calling support.
 *
 * Architecture:
 *   - Tool-calling rounds use generateContent (non-streaming) so we always
 *     get the complete response including thought_signature. Thinking models
 *     (2.5+, 3.x) require this field to be echoed back in functionResponse
 *     and it is not reliably present in individual SSE chunks.
 *   - Final text-only response uses streamGenerateContent for good UX.
 */

const GEMINI_API_KEY = process.env.GOOGLE_API_KEY || '';
const GEMINI_MODEL = process.env.VIBN_CHAT_MODEL || 'gemini-3.1-pro-preview';
const GEMINI_BASE_URL = 'https://generativelanguage.googleapis.com/v1beta';

export interface ChatMessage {
  role: 'user' | 'assistant' | 'tool';
  content: string;
  toolCalls?: ToolCall[];
  toolCallId?: string;
  toolName?: string;
  thoughtSignature?: string;
}

export interface ToolCall {
  id: string;
  name: string;
  args: Record<string, unknown>;
  /** Must be echoed back in functionResponse for Gemini thinking models */
  thoughtSignature?: string;
}

export interface ToolDefinition {
  name: string;
  description: string;
  parameters: Record<string, unknown>;
}

export interface ChatChunk {
  type: 'text' | 'thinking' | 'tool_call' | 'done' | 'error';
  text?: string;
  toolCall?: ToolCall;
  error?: string;
}

/** Convert our ChatMessage[] to Gemini's contents[] format */
function toGeminiContents(messages: ChatMessage[]) {
  const contents: any[] = [];

  for (const msg of messages) {
    if (msg.role === 'user') {
      contents.push({ role: 'user', parts: [{ text: msg.content }] });
    } else if (msg.role === 'assistant') {
      const parts: any[] = [];
      if (msg.content) parts.push({ text: msg.content });
      if (msg.toolCalls?.length) {
        for (const tc of msg.toolCalls) {
          // thoughtSignature is a SIBLING of functionCall in the part object,
          // not nested inside it. See: ai.google.dev/gemini-api/docs/thought-signatures
          const part: any = { functionCall: { name: tc.name, args: tc.args, id: tc.id } };
          if (tc.thoughtSignature) part.thoughtSignature = tc.thoughtSignature;
          parts.push(part);
        }
      }
      if (parts.length) contents.push({ role: 'model', parts });
    } else if (msg.role === 'tool') {
      const part = {
        functionResponse: {
          name: msg.toolName || 'unknown',
          id: msg.toolCallId,
          response: { content: msg.content },
        },
      };
      const last = contents[contents.length - 1];
      if (last?.role === 'user') {
        last.parts.push(part);
      } else {
        contents.push({ role: 'user', parts: [part] });
      }
    }
  }
  return contents;
}

function toGeminiFunctions(tools: ToolDefinition[]) {
  if (!tools.length) return undefined;
  return [{
    functionDeclarations: tools.map((t) => ({
      name: t.name,
      description: t.description,
      parameters: t.parameters,
    })),
  }];
}

function buildBody(opts: {
  systemPrompt: string;
  messages: ChatMessage[];
  tools?: ToolDefinition[];
  temperature?: number;
  /**
   * Ask Gemini to return its thought summaries as parts marked
   * `thought: true`. We pay for thinking tokens regardless; this just
   * makes them visible so the UI can show "Reading server.js…",
   * "Shipping to production…" between tool calls instead of leaving
   * the user staring at a silent tool tray. Defaults to true.
   */
  includeThoughts?: boolean;
}) {
  const body: any = {
    contents: toGeminiContents(opts.messages),
    systemInstruction: { parts: [{ text: opts.systemPrompt }] },
    generationConfig: {
      temperature: opts.temperature ?? 0.7,
      maxOutputTokens: 8192,
      thinkingConfig: { includeThoughts: opts.includeThoughts ?? true },
    },
  };
  const fns = toGeminiFunctions(opts.tools ?? []);
  if (fns) body.tools = fns;
  return body;
}

/**
 * Non-streaming call — used for tool-calling rounds.
 * Returns complete response with thought_signature guaranteed.
 */
export async function callGeminiChat(opts: {
  systemPrompt: string;
  messages: ChatMessage[];
  tools?: ToolDefinition[];
  temperature?: number;
  includeThoughts?: boolean;
}): Promise<{
  text: string;
  /** First-person reasoning narration; meant for a "thinking" UI panel, not the main bubble. */
  thoughts: string;
  toolCalls: ToolCall[];
  finishReason?: string;
  error?: string;
}> {
  const url = `${GEMINI_BASE_URL}/models/${GEMINI_MODEL}:generateContent?key=${GEMINI_API_KEY}`;

  let res: Response;
  try {
    res = await fetch(url, {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
      body: JSON.stringify(buildBody(opts)),
    });
  } catch (e) {
    return {
      text: '',
      thoughts: '',
      toolCalls: [],
      error: `Network error: ${e instanceof Error ? e.message : String(e)}`,
    };
  }

  const data = await res.json().catch(() => ({}));
  if (!res.ok) {
    const msg = data?.error?.message || JSON.stringify(data).slice(0, 200);
    return {
      text: '',
      thoughts: '',
      toolCalls: [],
      error: `Gemini API error ${res.status}: ${msg}`,
    };
  }

  const cand = data?.candidates?.[0];
  const parts: any[] = cand?.content?.parts ?? [];
  let text = '';
  let thoughts = '';
  const toolCalls: ToolCall[] = [];

  for (const part of parts) {
    if (part.text) {
      // CRITICAL: Gemini tags reasoning parts with `thought: true`. If
      // we lump them into `text` they leak into the chat bubble as if
      // they were prose for the user — which is the opposite of what
      // the user wants. Keep them in their own bucket so the route
      // can stream them as a separate SSE event type.
      if (part.thought) thoughts += part.text;
      else text += part.text;
    }
    if (part.functionCall) {
      toolCalls.push({
        id: part.functionCall.id || `tc-${Date.now()}-${Math.random().toString(36).slice(2)}`,
        name: part.functionCall.name,
        args: part.functionCall.args ?? {},
        // thoughtSignature is a SIBLING of functionCall in the part, not inside it
        thoughtSignature: part.thoughtSignature,
      });
    }
  }

  return { text, thoughts, toolCalls, finishReason: cand?.finishReason };
}

/**
 * Streaming call — used for the final text-only response.
 * Yields ChatChunk objects.
 */
export async function* streamGeminiChat(opts: {
  systemPrompt: string;
  messages: ChatMessage[];
  tools?: ToolDefinition[];
  temperature?: number;
}): AsyncGenerator<ChatChunk> {
  const url = `${GEMINI_BASE_URL}/models/${GEMINI_MODEL}:streamGenerateContent?key=${GEMINI_API_KEY}&alt=sse`;

  let res: Response;
  try {
    res = await fetch(url, {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
      body: JSON.stringify(buildBody(opts)),
    });
  } catch (e) {
    yield { type: 'error', error: `Network error: ${e instanceof Error ? e.message : String(e)}` };
    return;
  }

  if (!res.ok) {
    const errText = await res.text().catch(() => '');
    yield { type: 'error', error: `Gemini API error ${res.status}: ${errText.slice(0, 300)}` };
    return;
  }

  const reader = res.body?.getReader();
  if (!reader) { yield { type: 'error', error: 'No response body' }; return; }

  const decoder = new TextDecoder();
  let buffer = '';

  try {
    while (true) {
      const { done, value } = await reader.read();
      if (done) break;
      buffer += decoder.decode(value, { stream: true });
      const lines = buffer.split('\n');
      buffer = lines.pop() ?? '';

      for (const line of lines) {
        if (!line.startsWith('data: ')) continue;
        const data = line.slice(6).trim();
        if (!data || data === '[DONE]') continue;
        let chunk: any;
        try { chunk = JSON.parse(data); } catch { continue; }
        const parts = chunk?.candidates?.[0]?.content?.parts ?? [];
        for (const part of parts) {
          if (part.text) {
            yield part.thought
              ? { type: 'thinking', text: part.text }
              : { type: 'text', text: part.text };
          }
        }
      }
    }
  } finally {
    reader.releaseLock();
  }

  yield { type: 'done' };
}