VIBN Frontend for Coolify deployment

2026-02-15 19:25:52 -08:00
commit 40bf8428cd
398 changed files with 76513 additions and 0 deletions
--- a/lib/ai/embeddings.ts
+++ b/lib/ai/embeddings.ts
@@ -0,0 +1,173 @@
+/**
+ * Embedding generation using Gemini API
+ * 
+ * Converts text into vector embeddings for semantic search.
+ */
+
+import { GoogleGenerativeAI } from '@google/generative-ai';
+
+const GEMINI_API_KEY = process.env.GEMINI_API_KEY;
+
+if (!GEMINI_API_KEY) {
+  console.warn('[Embeddings] GEMINI_API_KEY not set - embedding functions will fail');
+}
+
+const genAI = GEMINI_API_KEY ? new GoogleGenerativeAI(GEMINI_API_KEY) : null;
+
+// Gemini embedding model - text-embedding-004 produces 768-dim embeddings
+// Adjust EMBEDDING_DIMENSION in knowledge-chunks-schema.sql if using different model
+const EMBEDDING_MODEL = 'text-embedding-004';
+const EMBEDDING_DIMENSION = 768;
+
+/**
+ * Generate embedding for a single text string
+ * 
+ * @param text - Input text to embed
+ * @returns Vector embedding as array of numbers
+ * 
+ * @throws Error if Gemini API is not configured or request fails
+ */
+export async function embedText(text: string): Promise<number[]> {
+  if (!genAI) {
+    throw new Error('GEMINI_API_KEY not configured - cannot generate embeddings');
+  }
+
+  if (!text || text.trim().length === 0) {
+    throw new Error('Cannot embed empty text');
+  }
+
+  try {
+    const model = genAI.getGenerativeModel({ model: EMBEDDING_MODEL });
+    const result = await model.embedContent(text);
+    const embedding = result.embedding;
+
+    if (!embedding || !embedding.values || embedding.values.length === 0) {
+      throw new Error('Gemini returned empty embedding');
+    }
+
+    // Verify dimension matches expectation
+    if (embedding.values.length !== EMBEDDING_DIMENSION) {
+      console.warn(
+        `[Embeddings] Unexpected dimension: got ${embedding.values.length}, expected ${EMBEDDING_DIMENSION}`
+      );
+    }
+
+    return embedding.values;
+  } catch (error) {
+    console.error('[Embeddings] Failed to embed text:', error);
+    throw new Error(
+      `Embedding generation failed: ${error instanceof Error ? error.message : String(error)}`
+    );
+  }
+}
+
+/**
+ * Generate embeddings for multiple texts in batch
+ * 
+ * More efficient than calling embedText() repeatedly.
+ * Processes texts sequentially to avoid rate limiting.
+ * 
+ * @param texts - Array of texts to embed
+ * @param options - Batch processing options
+ * @returns Array of embeddings (same order as input texts)
+ * 
+ * @example
+ * ```typescript
+ * const chunks = ["First chunk...", "Second chunk...", "Third chunk..."];
+ * const embeddings = await embedTextBatch(chunks);
+ * // embeddings[0] corresponds to chunks[0], etc.
+ * ```
+ */
+export async function embedTextBatch(
+  texts: string[],
+  options: { delayMs?: number; skipEmpty?: boolean } = {}
+): Promise<number[][]> {
+  const { delayMs = 100, skipEmpty = true } = options;
+
+  if (texts.length === 0) {
+    return [];
+  }
+
+  const embeddings: number[][] = [];
+
+  for (let i = 0; i < texts.length; i++) {
+    const text = texts[i];
+
+    // Skip empty texts if requested
+    if (skipEmpty && (!text || text.trim().length === 0)) {
+      console.warn(`[Embeddings] Skipping empty text at index ${i}`);
+      embeddings.push(new Array(EMBEDDING_DIMENSION).fill(0)); // Zero vector for empty
+      continue;
+    }
+
+    try {
+      const embedding = await embedText(text);
+      embeddings.push(embedding);
+
+      // Add delay between requests to avoid rate limiting (except for last item)
+      if (i < texts.length - 1 && delayMs > 0) {
+        await new Promise((resolve) => setTimeout(resolve, delayMs));
+      }
+    } catch (error) {
+      console.error(`[Embeddings] Failed to embed text at index ${i}:`, error);
+      // Push zero vector as fallback
+      embeddings.push(new Array(EMBEDDING_DIMENSION).fill(0));
+    }
+  }
+
+  console.log(`[Embeddings] Generated ${embeddings.length} embeddings`);
+
+  return embeddings;
+}
+
+/**
+ * Compute cosine similarity between two embeddings
+ * 
+ * @param a - First embedding vector
+ * @param b - Second embedding vector
+ * @returns Cosine similarity score (0-1, higher = more similar)
+ */
+export function cosineSimilarity(a: number[], b: number[]): number {
+  if (a.length !== b.length) {
+    throw new Error('Embedding dimensions do not match');
+  }
+
+  let dotProduct = 0;
+  let normA = 0;
+  let normB = 0;
+
+  for (let i = 0; i < a.length; i++) {
+    dotProduct += a[i] * b[i];
+    normA += a[i] * a[i];
+    normB += b[i] * b[i];
+  }
+
+  const magnitude = Math.sqrt(normA) * Math.sqrt(normB);
+
+  if (magnitude === 0) {
+    return 0;
+  }
+
+  return dotProduct / magnitude;
+}
+
+/**
+ * Get the expected embedding dimension for the current model
+ */
+export function getEmbeddingDimension(): number {
+  return EMBEDDING_DIMENSION;
+}
+
+/**
+ * Check if embeddings API is configured and working
+ */
+export async function checkEmbeddingsHealth(): Promise<boolean> {
+  try {
+    const testEmbedding = await embedText('health check');
+    return testEmbedding.length === EMBEDDING_DIMENSION;
+  } catch (error) {
+    console.error('[Embeddings Health Check] Failed:', error);
+    return false;
+  }
+}
+