master-ai/vibn-frontend/lib/ai/gemini-chat.ts

/**
 * Gemini 3.1 Pro chat client with tool-calling support.
 *
 * Architecture:
 *   - Tool-calling rounds use generateContent (non-streaming) so we always
 *     get the complete response including thought_signature. Thinking models
 *     (2.5+, 3.x) require this field to be echoed back in functionResponse
 *     and it is not reliably present in individual SSE chunks.
 *   - Final text-only response uses streamGenerateContent for good UX.
 */

const GEMINI_API_KEY = process.env.GOOGLE_API_KEY || "";
const GEMINI_MODEL = process.env.VIBN_CHAT_MODEL || "gemini-3.1-pro-preview";
const GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta";

export interface ChatMessage {
  role: "user" | "assistant" | "tool";
  content: string;
  toolCalls?: ToolCall[];
  toolCallId?: string;
  toolName?: string;
  thoughtSignature?: string;
}

export interface ToolCall {
  id: string;
  name: string;
  args: Record<string, unknown>;
  /** Must be echoed back in functionResponse for Gemini thinking models */
  thoughtSignature?: string;
}

export interface ToolDefinition {
  name: string;
  description: string;
  parameters: Record<string, unknown>;
}

export interface ChatChunk {
  type: "text" | "thinking" | "tool_call" | "done" | "error";
  text?: string;
  toolCall?: ToolCall;
  error?: string;
}

/** Convert our ChatMessage[] to Gemini's contents[] format */
function toGeminiContents(messages: ChatMessage[]) {
  const contents: any[] = [];

  for (const msg of messages) {
    if (msg.role === "user") {
      contents.push({ role: "user", parts: [{ text: msg.content }] });
    } else if (msg.role === "assistant") {
      const parts: any[] = [];
      if (msg.content) parts.push({ text: msg.content });
      if (msg.toolCalls?.length) {
        for (const tc of msg.toolCalls) {
          // thoughtSignature is a SIBLING of functionCall in the part object,
          // not nested inside it. See: ai.google.dev/gemini-api/docs/thought-signatures
          const part: any = {
            functionCall: { name: tc.name, args: tc.args, id: tc.id },
          };
          if (tc.thoughtSignature) part.thoughtSignature = tc.thoughtSignature;
          parts.push(part);
        }
      }
      if (parts.length) contents.push({ role: "model", parts });
    } else if (msg.role === "tool") {
      const part = {
        functionResponse: {
          name: msg.toolName || "unknown",
          id: msg.toolCallId,
          response: { content: msg.content },
        },
      };
      const last = contents[contents.length - 1];
      if (last?.role === "user") {
        last.parts.push(part);
      } else {
        contents.push({ role: "user", parts: [part] });
      }
    }
  }
  return contents;
}

function toGeminiFunctions(tools: ToolDefinition[]) {
  if (!tools.length) return undefined;
  return [
    {
      functionDeclarations: tools.map((t) => ({
        name: t.name,
        description: t.description,
        parameters: t.parameters,
      })),
    },
  ];
}

function buildBody(opts: {
  systemPrompt: string;
  messages: ChatMessage[];
  tools?: ToolDefinition[];
  temperature?: number;
  /**
   * Ask Gemini to return its thought summaries as parts marked
   * `thought: true`. We pay for thinking tokens regardless; this just
   * makes them visible so the UI can show "Reading server.js…",
   * "Shipping to production…" between tool calls instead of leaving
   * the user staring at a silent tool tray. Defaults to true.
   */
  includeThoughts?: boolean;
}) {
  const body: any = {
    contents: toGeminiContents(opts.messages),
    systemInstruction: { parts: [{ text: opts.systemPrompt }] },
    generationConfig: {
      temperature: opts.temperature ?? 0.7,
      maxOutputTokens: 8192,
      thinkingConfig: { includeThoughts: opts.includeThoughts ?? true },
    },
  };
  const fns = toGeminiFunctions(opts.tools ?? []);
  if (fns) body.tools = fns;
  return body;
}

/**
 * Non-streaming call — used for tool-calling rounds.
 * Returns complete response with thought_signature guaranteed.
 */
export async function callGeminiChat(opts: {
  systemPrompt: string;
  messages: ChatMessage[];
  tools?: ToolDefinition[];
  temperature?: number;
  includeThoughts?: boolean;
}): Promise<{
  text: string;
  /** First-person reasoning narration; meant for a "thinking" UI panel, not the main bubble. */
  thoughts: string;
  toolCalls: ToolCall[];
  finishReason?: string;
  error?: string;
}> {
  const url = `${GEMINI_BASE_URL}/models/${GEMINI_MODEL}:generateContent?key=${GEMINI_API_KEY}`;

  let res: Response;
  try {
    res = await fetch(url, {
      method: "POST",
      headers: { "Content-Type": "application/json" },
      body: JSON.stringify(buildBody(opts)),
    });
  } catch (e) {
    return {
      text: "",
      thoughts: "",
      toolCalls: [],
      error: `Network error: ${e instanceof Error ? e.message : String(e)}`,
    };
  }

  const data = await res.json().catch(() => ({}));
  if (!res.ok) {
    const msg = data?.error?.message || JSON.stringify(data).slice(0, 200);
    return {
      text: "",
      thoughts: "",
      toolCalls: [],
      error: `Gemini API error ${res.status}: ${msg}`,
    };
  }

  const cand = data?.candidates?.[0];
  const parts: any[] = cand?.content?.parts ?? [];
  let text = "";
  let thoughts = "";
  const toolCalls: ToolCall[] = [];

  for (const part of parts) {
    if (part.text) {
      // CRITICAL: Gemini tags reasoning parts with `thought: true`. If
      // we lump them into `text` they leak into the chat bubble as if
      // they were prose for the user — which is the opposite of what
      // the user wants. Keep them in their own bucket so the route
      // can stream them as a separate SSE event type.
      if (part.thought) thoughts += part.text;
      else text += part.text;
    }
    if (part.functionCall) {
      toolCalls.push({
        id:
          part.functionCall.id ||
          `tc-${Date.now()}-${Math.random().toString(36).slice(2)}`,
        name: part.functionCall.name,
        args: part.functionCall.args ?? {},
        // thoughtSignature is a SIBLING of functionCall in the part, not inside it
        thoughtSignature: part.thoughtSignature,
      });
    }
  }

  return { text, thoughts, toolCalls, finishReason: cand?.finishReason };
}

/**
 * Streaming call — used for the final text-only response.
 * Yields ChatChunk objects.
 */
export async function* streamGeminiChat(opts: {
  systemPrompt: string;
  messages: ChatMessage[];
  tools?: ToolDefinition[];
  temperature?: number;
}): AsyncGenerator<ChatChunk> {
  const url = `${GEMINI_BASE_URL}/models/${GEMINI_MODEL}:streamGenerateContent?key=${GEMINI_API_KEY}&alt=sse`;

  let res: Response;
  try {
    res = await fetch(url, {
      method: "POST",
      headers: { "Content-Type": "application/json" },
      body: JSON.stringify(buildBody(opts)),
    });
  } catch (e) {
    yield {
      type: "error",
      error: `Network error: ${e instanceof Error ? e.message : String(e)}`,
    };
    return;
  }

  if (!res.ok) {
    const errText = await res.text().catch(() => "");
    yield {
      type: "error",
      error: `Gemini API error ${res.status}: ${errText.slice(0, 300)}`,
    };
    return;
  }

  const reader = res.body?.getReader();
  if (!reader) {
    yield { type: "error", error: "No response body" };
    return;
  }

  const decoder = new TextDecoder();
  let buffer = "";

  try {
    while (true) {
      const { done, value } = await reader.read();
      if (done) break;
      buffer += decoder.decode(value, { stream: true });
      const lines = buffer.split("\n");
      buffer = lines.pop() ?? "";

      for (const line of lines) {
        if (!line.startsWith("data: ")) continue;
        const data = line.slice(6).trim();
        if (!data || data === "[DONE]") continue;
        let chunk: any;
        try {
          chunk = JSON.parse(data);
        } catch {
          continue;
        }
        const parts = chunk?.candidates?.[0]?.content?.parts ?? [];
        for (const part of parts) {
          if (part.text) {
            yield part.thought
              ? { type: "thinking", text: part.text }
              : { type: "text", text: part.text };
          }
        }
      }
    }
  } finally {
    reader.releaseLock();
  }

  yield { type: "done" };
}