This repository has been archived on 2026-06-07. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
master-ai/vibn-frontend/lib/ai/gemini-chat.ts

286 lines
8.2 KiB
TypeScript

/**
* Gemini 3.1 Pro chat client with tool-calling support.
*
* Architecture:
* - Tool-calling rounds use generateContent (non-streaming) so we always
* get the complete response including thought_signature. Thinking models
* (2.5+, 3.x) require this field to be echoed back in functionResponse
* and it is not reliably present in individual SSE chunks.
* - Final text-only response uses streamGenerateContent for good UX.
*/
const GEMINI_API_KEY = process.env.GOOGLE_API_KEY || "";
const GEMINI_MODEL = process.env.VIBN_CHAT_MODEL || "gemini-3.1-pro-preview";
const GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta";
export interface ChatMessage {
role: "user" | "assistant" | "tool";
content: string;
toolCalls?: ToolCall[];
toolCallId?: string;
toolName?: string;
thoughtSignature?: string;
}
export interface ToolCall {
id: string;
name: string;
args: Record<string, unknown>;
/** Must be echoed back in functionResponse for Gemini thinking models */
thoughtSignature?: string;
}
export interface ToolDefinition {
name: string;
description: string;
parameters: Record<string, unknown>;
}
export interface ChatChunk {
type: "text" | "thinking" | "tool_call" | "done" | "error";
text?: string;
toolCall?: ToolCall;
error?: string;
}
/** Convert our ChatMessage[] to Gemini's contents[] format */
function toGeminiContents(messages: ChatMessage[]) {
const contents: any[] = [];
for (const msg of messages) {
if (msg.role === "user") {
contents.push({ role: "user", parts: [{ text: msg.content }] });
} else if (msg.role === "assistant") {
const parts: any[] = [];
if (msg.content) parts.push({ text: msg.content });
if (msg.toolCalls?.length) {
for (const tc of msg.toolCalls) {
// thoughtSignature is a SIBLING of functionCall in the part object,
// not nested inside it. See: ai.google.dev/gemini-api/docs/thought-signatures
const part: any = {
functionCall: { name: tc.name, args: tc.args, id: tc.id },
};
if (tc.thoughtSignature) part.thoughtSignature = tc.thoughtSignature;
parts.push(part);
}
}
if (parts.length) contents.push({ role: "model", parts });
} else if (msg.role === "tool") {
const part = {
functionResponse: {
name: msg.toolName || "unknown",
id: msg.toolCallId,
response: { content: msg.content },
},
};
const last = contents[contents.length - 1];
if (last?.role === "user") {
last.parts.push(part);
} else {
contents.push({ role: "user", parts: [part] });
}
}
}
return contents;
}
function toGeminiFunctions(tools: ToolDefinition[]) {
if (!tools.length) return undefined;
return [
{
functionDeclarations: tools.map((t) => ({
name: t.name,
description: t.description,
parameters: t.parameters,
})),
},
];
}
function buildBody(opts: {
systemPrompt: string;
messages: ChatMessage[];
tools?: ToolDefinition[];
temperature?: number;
/**
* Ask Gemini to return its thought summaries as parts marked
* `thought: true`. We pay for thinking tokens regardless; this just
* makes them visible so the UI can show "Reading server.js…",
* "Shipping to production…" between tool calls instead of leaving
* the user staring at a silent tool tray. Defaults to true.
*/
includeThoughts?: boolean;
}) {
const body: any = {
contents: toGeminiContents(opts.messages),
systemInstruction: { parts: [{ text: opts.systemPrompt }] },
generationConfig: {
temperature: opts.temperature ?? 0.7,
maxOutputTokens: 8192,
thinkingConfig: { includeThoughts: opts.includeThoughts ?? true },
},
};
const fns = toGeminiFunctions(opts.tools ?? []);
if (fns) body.tools = fns;
return body;
}
/**
* Non-streaming call — used for tool-calling rounds.
* Returns complete response with thought_signature guaranteed.
*/
export async function callGeminiChat(opts: {
systemPrompt: string;
messages: ChatMessage[];
tools?: ToolDefinition[];
temperature?: number;
includeThoughts?: boolean;
}): Promise<{
text: string;
/** First-person reasoning narration; meant for a "thinking" UI panel, not the main bubble. */
thoughts: string;
toolCalls: ToolCall[];
finishReason?: string;
error?: string;
}> {
const url = `${GEMINI_BASE_URL}/models/${GEMINI_MODEL}:generateContent?key=${GEMINI_API_KEY}`;
let res: Response;
try {
res = await fetch(url, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(buildBody(opts)),
});
} catch (e) {
return {
text: "",
thoughts: "",
toolCalls: [],
error: `Network error: ${e instanceof Error ? e.message : String(e)}`,
};
}
const data = await res.json().catch(() => ({}));
if (!res.ok) {
const msg = data?.error?.message || JSON.stringify(data).slice(0, 200);
return {
text: "",
thoughts: "",
toolCalls: [],
error: `Gemini API error ${res.status}: ${msg}`,
};
}
const cand = data?.candidates?.[0];
const parts: any[] = cand?.content?.parts ?? [];
let text = "";
let thoughts = "";
const toolCalls: ToolCall[] = [];
for (const part of parts) {
if (part.text) {
// CRITICAL: Gemini tags reasoning parts with `thought: true`. If
// we lump them into `text` they leak into the chat bubble as if
// they were prose for the user — which is the opposite of what
// the user wants. Keep them in their own bucket so the route
// can stream them as a separate SSE event type.
if (part.thought) thoughts += part.text;
else text += part.text;
}
if (part.functionCall) {
toolCalls.push({
id:
part.functionCall.id ||
`tc-${Date.now()}-${Math.random().toString(36).slice(2)}`,
name: part.functionCall.name,
args: part.functionCall.args ?? {},
// thoughtSignature is a SIBLING of functionCall in the part, not inside it
thoughtSignature: part.thoughtSignature,
});
}
}
return { text, thoughts, toolCalls, finishReason: cand?.finishReason };
}
/**
* Streaming call — used for the final text-only response.
* Yields ChatChunk objects.
*/
export async function* streamGeminiChat(opts: {
systemPrompt: string;
messages: ChatMessage[];
tools?: ToolDefinition[];
temperature?: number;
}): AsyncGenerator<ChatChunk> {
const url = `${GEMINI_BASE_URL}/models/${GEMINI_MODEL}:streamGenerateContent?key=${GEMINI_API_KEY}&alt=sse`;
let res: Response;
try {
res = await fetch(url, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(buildBody(opts)),
});
} catch (e) {
yield {
type: "error",
error: `Network error: ${e instanceof Error ? e.message : String(e)}`,
};
return;
}
if (!res.ok) {
const errText = await res.text().catch(() => "");
yield {
type: "error",
error: `Gemini API error ${res.status}: ${errText.slice(0, 300)}`,
};
return;
}
const reader = res.body?.getReader();
if (!reader) {
yield { type: "error", error: "No response body" };
return;
}
const decoder = new TextDecoder();
let buffer = "";
try {
while (true) {
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
const lines = buffer.split("\n");
buffer = lines.pop() ?? "";
for (const line of lines) {
if (!line.startsWith("data: ")) continue;
const data = line.slice(6).trim();
if (!data || data === "[DONE]") continue;
let chunk: any;
try {
chunk = JSON.parse(data);
} catch {
continue;
}
const parts = chunk?.candidates?.[0]?.content?.parts ?? [];
for (const part of parts) {
if (part.text) {
yield part.thought
? { type: "thinking", text: part.text }
: { type: "text", text: part.text };
}
}
}
}
} finally {
reader.releaseLock();
}
yield { type: "done" };
}