VIBN Frontend for Coolify deployment

2026-02-15 19:25:52 -08:00
commit 40bf8428cd
398 changed files with 76513 additions and 0 deletions
--- a/lib/server/vector-memory.ts
+++ b/lib/server/vector-memory.ts
@@ -0,0 +1,453 @@
+/**
+ * Server-side helpers for AlloyDB vector memory operations
+ * 
+ * Handles CRUD operations on knowledge_chunks and semantic search.
+ */
+
+import { getAlloyDbClient, executeQuery, getPooledClient } from '@/lib/db/alloydb';
+import type {
+  KnowledgeChunk,
+  KnowledgeChunkRow,
+  KnowledgeChunkSearchResult,
+  VectorSearchOptions,
+  CreateKnowledgeChunkInput,
+  BatchCreateKnowledgeChunksInput,
+} from '@/lib/types/vector-memory';
+
+/**
+ * Convert database row (snake_case) to TypeScript object (camelCase)
+ */
+function rowToKnowledgeChunk(row: KnowledgeChunkRow): KnowledgeChunk {
+  return {
+    id: row.id,
+    projectId: row.project_id,
+    knowledgeItemId: row.knowledge_item_id,
+    chunkIndex: row.chunk_index,
+    content: row.content,
+    sourceType: row.source_type,
+    importance: row.importance,
+    createdAt: row.created_at,
+    updatedAt: row.updated_at,
+  };
+}
+
+/**
+ * Retrieve relevant knowledge chunks using vector similarity search
+ * 
+ * @param projectId - Firestore project ID to filter by
+ * @param queryEmbedding - Vector embedding of the query (e.g., user's question)
+ * @param options - Search options (limit, filters, etc.)
+ * @returns Array of chunks ordered by similarity (most relevant first)
+ * 
+ * @example
+ * ```typescript
+ * const embedding = await embedText("What's the MVP scope?");
+ * const chunks = await retrieveRelevantChunks('proj123', embedding, { limit: 10, minSimilarity: 0.7 });
+ * ```
+ */
+export async function retrieveRelevantChunks(
+  projectId: string,
+  queryEmbedding: number[],
+  options: VectorSearchOptions = {}
+): Promise<KnowledgeChunkSearchResult[]> {
+  const {
+    limit = 10,
+    minSimilarity,
+    sourceTypes,
+    importanceLevels,
+  } = options;
+
+  try {
+    // Build the query with optional filters
+    let queryText = `
+      SELECT 
+        id, 
+        project_id, 
+        knowledge_item_id, 
+        chunk_index, 
+        content, 
+        source_type, 
+        importance, 
+        created_at, 
+        updated_at,
+        1 - (embedding <=> $1::vector) AS similarity
+      FROM knowledge_chunks
+      WHERE project_id = $2
+    `;
+
+    const params: any[] = [JSON.stringify(queryEmbedding), projectId];
+    let paramIndex = 3;
+
+    // Filter by source types
+    if (sourceTypes && sourceTypes.length > 0) {
+      queryText += ` AND source_type = ANY($${paramIndex})`;
+      params.push(sourceTypes);
+      paramIndex++;
+    }
+
+    // Filter by importance levels
+    if (importanceLevels && importanceLevels.length > 0) {
+      queryText += ` AND importance = ANY($${paramIndex})`;
+      params.push(importanceLevels);
+      paramIndex++;
+    }
+
+    // Filter by minimum similarity
+    if (minSimilarity !== undefined) {
+      queryText += ` AND (1 - (embedding <=> $1::vector)) >= $${paramIndex}`;
+      params.push(minSimilarity);
+      paramIndex++;
+    }
+
+    // Order by similarity and limit
+    queryText += ` ORDER BY embedding <=> $1::vector LIMIT $${paramIndex}`;
+    params.push(limit);
+
+    const result = await executeQuery<KnowledgeChunkRow & { similarity: number }>(
+      queryText,
+      params
+    );
+
+    return result.rows.map((row) => ({
+      ...rowToKnowledgeChunk(row),
+      similarity: row.similarity,
+    }));
+  } catch (error) {
+    console.error('[Vector Memory] Failed to retrieve relevant chunks:', error);
+    throw new Error(
+      `Failed to retrieve chunks: ${error instanceof Error ? error.message : String(error)}`
+    );
+  }
+}
+
+/**
+ * Create a single knowledge chunk
+ */
+export async function createKnowledgeChunk(
+  input: CreateKnowledgeChunkInput
+): Promise<KnowledgeChunk> {
+  const {
+    projectId,
+    knowledgeItemId,
+    chunkIndex,
+    content,
+    embedding,
+    sourceType = null,
+    importance = null,
+  } = input;
+
+  try {
+    const queryText = `
+      INSERT INTO knowledge_chunks (
+        project_id, 
+        knowledge_item_id, 
+        chunk_index, 
+        content, 
+        embedding, 
+        source_type, 
+        importance
+      )
+      VALUES ($1, $2, $3, $4, $5::vector, $6, $7)
+      RETURNING 
+        id, 
+        project_id, 
+        knowledge_item_id, 
+        chunk_index, 
+        content, 
+        source_type, 
+        importance, 
+        created_at, 
+        updated_at
+    `;
+
+    const result = await executeQuery<KnowledgeChunkRow>(queryText, [
+      projectId,
+      knowledgeItemId,
+      chunkIndex,
+      content,
+      JSON.stringify(embedding),
+      sourceType,
+      importance,
+    ]);
+
+    if (result.rows.length === 0) {
+      throw new Error('Failed to insert knowledge chunk');
+    }
+
+    return rowToKnowledgeChunk(result.rows[0]);
+  } catch (error) {
+    console.error('[Vector Memory] Failed to create knowledge chunk:', error);
+    throw new Error(
+      `Failed to create chunk: ${error instanceof Error ? error.message : String(error)}`
+    );
+  }
+}
+
+/**
+ * Batch create multiple knowledge chunks efficiently
+ * 
+ * Uses a transaction to ensure atomicity.
+ */
+export async function batchCreateKnowledgeChunks(
+  input: BatchCreateKnowledgeChunksInput
+): Promise<KnowledgeChunk[]> {
+  const { projectId, knowledgeItemId, chunks } = input;
+
+  if (chunks.length === 0) {
+    return [];
+  }
+
+  const client = await getPooledClient();
+
+  try {
+    await client.query('BEGIN');
+
+    const createdChunks: KnowledgeChunk[] = [];
+
+    for (const chunk of chunks) {
+      const queryText = `
+        INSERT INTO knowledge_chunks (
+          project_id, 
+          knowledge_item_id, 
+          chunk_index, 
+          content, 
+          embedding, 
+          source_type, 
+          importance
+        )
+        VALUES ($1, $2, $3, $4, $5::vector, $6, $7)
+        RETURNING 
+          id, 
+          project_id, 
+          knowledge_item_id, 
+          chunk_index, 
+          content, 
+          source_type, 
+          importance, 
+          created_at, 
+          updated_at
+      `;
+
+      const result = await client.query<KnowledgeChunkRow>(queryText, [
+        projectId,
+        knowledgeItemId,
+        chunk.chunkIndex,
+        chunk.content,
+        JSON.stringify(chunk.embedding),
+        chunk.sourceType ?? null,
+        chunk.importance ?? null,
+      ]);
+
+      if (result.rows.length > 0) {
+        createdChunks.push(rowToKnowledgeChunk(result.rows[0]));
+      }
+    }
+
+    await client.query('COMMIT');
+
+    console.log(
+      `[Vector Memory] Batch created ${createdChunks.length} chunks for knowledge_item ${knowledgeItemId}`
+    );
+
+    return createdChunks;
+  } catch (error) {
+    await client.query('ROLLBACK');
+    console.error('[Vector Memory] Failed to batch create chunks:', error);
+    throw new Error(
+      `Failed to batch create chunks: ${error instanceof Error ? error.message : String(error)}`
+    );
+  } finally {
+    client.release();
+  }
+}
+
+/**
+ * Delete all chunks for a specific knowledge_item
+ * 
+ * Used when regenerating chunks or removing a knowledge_item.
+ */
+export async function deleteChunksForKnowledgeItem(
+  knowledgeItemId: string
+): Promise<number> {
+  try {
+    const queryText = `
+      DELETE FROM knowledge_chunks 
+      WHERE knowledge_item_id = $1
+      RETURNING id
+    `;
+
+    const result = await executeQuery(queryText, [knowledgeItemId]);
+
+    console.log(
+      `[Vector Memory] Deleted ${result.rowCount ?? 0} chunks for knowledge_item ${knowledgeItemId}`
+    );
+
+    return result.rowCount ?? 0;
+  } catch (error) {
+    console.error('[Vector Memory] Failed to delete chunks:', error);
+    throw new Error(
+      `Failed to delete chunks: ${error instanceof Error ? error.message : String(error)}`
+    );
+  }
+}
+
+/**
+ * Delete all chunks for a specific project
+ * 
+ * Used when cleaning up or resetting a project.
+ */
+export async function deleteChunksForProject(projectId: string): Promise<number> {
+  try {
+    const queryText = `
+      DELETE FROM knowledge_chunks 
+      WHERE project_id = $1
+      RETURNING id
+    `;
+
+    const result = await executeQuery(queryText, [projectId]);
+
+    console.log(
+      `[Vector Memory] Deleted ${result.rowCount ?? 0} chunks for project ${projectId}`
+    );
+
+    return result.rowCount ?? 0;
+  } catch (error) {
+    console.error('[Vector Memory] Failed to delete project chunks:', error);
+    throw new Error(
+      `Failed to delete project chunks: ${error instanceof Error ? error.message : String(error)}`
+    );
+  }
+}
+
+/**
+ * Get chunk count for a knowledge_item
+ */
+export async function getChunkCountForKnowledgeItem(
+  knowledgeItemId: string
+): Promise<number> {
+  try {
+    const result = await executeQuery<{ count: string }>(
+      'SELECT COUNT(*) as count FROM knowledge_chunks WHERE knowledge_item_id = $1',
+      [knowledgeItemId]
+    );
+
+    return parseInt(result.rows[0]?.count ?? '0', 10);
+  } catch (error) {
+    console.error('[Vector Memory] Failed to get chunk count:', error);
+    return 0;
+  }
+}
+
+/**
+ * Get chunk count for a project
+ */
+export async function getChunkCountForProject(projectId: string): Promise<number> {
+  try {
+    const result = await executeQuery<{ count: string }>(
+      'SELECT COUNT(*) as count FROM knowledge_chunks WHERE project_id = $1',
+      [projectId]
+    );
+
+    return parseInt(result.rows[0]?.count ?? '0', 10);
+  } catch (error) {
+    console.error('[Vector Memory] Failed to get project chunk count:', error);
+    return 0;
+  }
+}
+
+/**
+ * Regenerate knowledge_chunks for a single knowledge_item
+ * 
+ * This is the main pipeline that:
+ * 1. Chunks the knowledge_item.content
+ * 2. Generates embeddings for each chunk
+ * 3. Deletes existing chunks for this item
+ * 4. Inserts new chunks into AlloyDB
+ * 
+ * @param knowledgeItem - The knowledge item to process
+ * 
+ * @example
+ * ```typescript
+ * const knowledgeItem = await getKnowledgeItem(projectId, itemId);
+ * await writeKnowledgeChunksForItem(knowledgeItem);
+ * ```
+ */
+export async function writeKnowledgeChunksForItem(
+  knowledgeItem: {
+    id: string;
+    projectId: string;
+    content: string;
+    sourceMeta?: { sourceType?: string; importance?: 'primary' | 'supporting' | 'irrelevant' };
+  }
+): Promise<void> {
+  const { chunkText } = await import('@/lib/ai/chunking');
+  const { embedTextBatch } = await import('@/lib/ai/embeddings');
+
+  try {
+    console.log(
+      `[Vector Memory] Starting chunking pipeline for knowledge_item ${knowledgeItem.id}`
+    );
+
+    // Step 1: Chunk the content
+    const textChunks = chunkText(knowledgeItem.content, {
+      maxTokens: 800,
+      overlapChars: 200,
+      preserveParagraphs: true,
+    });
+
+    if (textChunks.length === 0) {
+      console.warn(
+        `[Vector Memory] No chunks generated for knowledge_item ${knowledgeItem.id} - content may be empty`
+      );
+      return;
+    }
+
+    console.log(
+      `[Vector Memory] Generated ${textChunks.length} chunks for knowledge_item ${knowledgeItem.id}`
+    );
+
+    // Step 2: Generate embeddings for all chunks
+    const chunkTexts = textChunks.map((chunk) => chunk.text);
+    const embeddings = await embedTextBatch(chunkTexts, {
+      delayMs: 50, // Small delay to avoid rate limiting
+      skipEmpty: true,
+    });
+
+    if (embeddings.length !== textChunks.length) {
+      throw new Error(
+        `Embedding count mismatch: got ${embeddings.length}, expected ${textChunks.length}`
+      );
+    }
+
+    // Step 3: Delete existing chunks for this knowledge_item
+    await deleteChunksForKnowledgeItem(knowledgeItem.id);
+
+    // Step 4: Insert new chunks
+    const chunksToInsert = textChunks.map((chunk, index) => ({
+      chunkIndex: chunk.index,
+      content: chunk.text,
+      embedding: embeddings[index],
+      sourceType: knowledgeItem.sourceMeta?.sourceType ?? null,
+      importance: knowledgeItem.sourceMeta?.importance ?? null,
+    }));
+
+    await batchCreateKnowledgeChunks({
+      projectId: knowledgeItem.projectId,
+      knowledgeItemId: knowledgeItem.id,
+      chunks: chunksToInsert,
+    });
+
+    console.log(
+      `[Vector Memory] Successfully processed ${chunksToInsert.length} chunks for knowledge_item ${knowledgeItem.id}`
+    );
+  } catch (error) {
+    console.error(
+      `[Vector Memory] Failed to write chunks for knowledge_item ${knowledgeItem.id}:`,
+      error
+    );
+    throw new Error(
+      `Failed to write chunks: ${error instanceof Error ? error.message : String(error)}`
+    );
+  }
+}
+