/** * Server-side helpers for AlloyDB vector memory operations * * Handles CRUD operations on knowledge_chunks and semantic search. */ import { getAlloyDbClient, executeQuery, getPooledClient } from '@/lib/db/alloydb'; import type { KnowledgeChunk, KnowledgeChunkRow, KnowledgeChunkSearchResult, VectorSearchOptions, CreateKnowledgeChunkInput, BatchCreateKnowledgeChunksInput, } from '@/lib/types/vector-memory'; /** * Convert database row (snake_case) to TypeScript object (camelCase) */ function rowToKnowledgeChunk(row: KnowledgeChunkRow): KnowledgeChunk { return { id: row.id, projectId: row.project_id, knowledgeItemId: row.knowledge_item_id, chunkIndex: row.chunk_index, content: row.content, sourceType: row.source_type, importance: row.importance, createdAt: row.created_at, updatedAt: row.updated_at, }; } /** * Retrieve relevant knowledge chunks using vector similarity search * * @param projectId - Firestore project ID to filter by * @param queryEmbedding - Vector embedding of the query (e.g., user's question) * @param options - Search options (limit, filters, etc.) * @returns Array of chunks ordered by similarity (most relevant first) * * @example * ```typescript * const embedding = await embedText("What's the MVP scope?"); * const chunks = await retrieveRelevantChunks('proj123', embedding, { limit: 10, minSimilarity: 0.7 }); * ``` */ export async function retrieveRelevantChunks( projectId: string, queryEmbedding: number[], options: VectorSearchOptions = {} ): Promise { const { limit = 10, minSimilarity, sourceTypes, importanceLevels, } = options; try { // Build the query with optional filters let queryText = ` SELECT id, project_id, knowledge_item_id, chunk_index, content, source_type, importance, created_at, updated_at, 1 - (embedding <=> $1::vector) AS similarity FROM knowledge_chunks WHERE project_id = $2 `; const params: any[] = [JSON.stringify(queryEmbedding), projectId]; let paramIndex = 3; // Filter by source types if (sourceTypes && sourceTypes.length > 0) { queryText += ` AND source_type = ANY($${paramIndex})`; params.push(sourceTypes); paramIndex++; } // Filter by importance levels if (importanceLevels && importanceLevels.length > 0) { queryText += ` AND importance = ANY($${paramIndex})`; params.push(importanceLevels); paramIndex++; } // Filter by minimum similarity if (minSimilarity !== undefined) { queryText += ` AND (1 - (embedding <=> $1::vector)) >= $${paramIndex}`; params.push(minSimilarity); paramIndex++; } // Order by similarity and limit queryText += ` ORDER BY embedding <=> $1::vector LIMIT $${paramIndex}`; params.push(limit); const result = await executeQuery( queryText, params ); return result.rows.map((row) => ({ ...rowToKnowledgeChunk(row), similarity: row.similarity, })); } catch (error) { console.error('[Vector Memory] Failed to retrieve relevant chunks:', error); throw new Error( `Failed to retrieve chunks: ${error instanceof Error ? error.message : String(error)}` ); } } /** * Create a single knowledge chunk */ export async function createKnowledgeChunk( input: CreateKnowledgeChunkInput ): Promise { const { projectId, knowledgeItemId, chunkIndex, content, embedding, sourceType = null, importance = null, } = input; try { const queryText = ` INSERT INTO knowledge_chunks ( project_id, knowledge_item_id, chunk_index, content, embedding, source_type, importance ) VALUES ($1, $2, $3, $4, $5::vector, $6, $7) RETURNING id, project_id, knowledge_item_id, chunk_index, content, source_type, importance, created_at, updated_at `; const result = await executeQuery(queryText, [ projectId, knowledgeItemId, chunkIndex, content, JSON.stringify(embedding), sourceType, importance, ]); if (result.rows.length === 0) { throw new Error('Failed to insert knowledge chunk'); } return rowToKnowledgeChunk(result.rows[0]); } catch (error) { console.error('[Vector Memory] Failed to create knowledge chunk:', error); throw new Error( `Failed to create chunk: ${error instanceof Error ? error.message : String(error)}` ); } } /** * Batch create multiple knowledge chunks efficiently * * Uses a transaction to ensure atomicity. */ export async function batchCreateKnowledgeChunks( input: BatchCreateKnowledgeChunksInput ): Promise { const { projectId, knowledgeItemId, chunks } = input; if (chunks.length === 0) { return []; } const client = await getPooledClient(); try { await client.query('BEGIN'); const createdChunks: KnowledgeChunk[] = []; for (const chunk of chunks) { const queryText = ` INSERT INTO knowledge_chunks ( project_id, knowledge_item_id, chunk_index, content, embedding, source_type, importance ) VALUES ($1, $2, $3, $4, $5::vector, $6, $7) RETURNING id, project_id, knowledge_item_id, chunk_index, content, source_type, importance, created_at, updated_at `; const result = await client.query(queryText, [ projectId, knowledgeItemId, chunk.chunkIndex, chunk.content, JSON.stringify(chunk.embedding), chunk.sourceType ?? null, chunk.importance ?? null, ]); if (result.rows.length > 0) { createdChunks.push(rowToKnowledgeChunk(result.rows[0])); } } await client.query('COMMIT'); console.log( `[Vector Memory] Batch created ${createdChunks.length} chunks for knowledge_item ${knowledgeItemId}` ); return createdChunks; } catch (error) { await client.query('ROLLBACK'); console.error('[Vector Memory] Failed to batch create chunks:', error); throw new Error( `Failed to batch create chunks: ${error instanceof Error ? error.message : String(error)}` ); } finally { client.release(); } } /** * Delete all chunks for a specific knowledge_item * * Used when regenerating chunks or removing a knowledge_item. */ export async function deleteChunksForKnowledgeItem( knowledgeItemId: string ): Promise { try { const queryText = ` DELETE FROM knowledge_chunks WHERE knowledge_item_id = $1 RETURNING id `; const result = await executeQuery(queryText, [knowledgeItemId]); console.log( `[Vector Memory] Deleted ${result.rowCount ?? 0} chunks for knowledge_item ${knowledgeItemId}` ); return result.rowCount ?? 0; } catch (error) { console.error('[Vector Memory] Failed to delete chunks:', error); throw new Error( `Failed to delete chunks: ${error instanceof Error ? error.message : String(error)}` ); } } /** * Delete all chunks for a specific project * * Used when cleaning up or resetting a project. */ export async function deleteChunksForProject(projectId: string): Promise { try { const queryText = ` DELETE FROM knowledge_chunks WHERE project_id = $1 RETURNING id `; const result = await executeQuery(queryText, [projectId]); console.log( `[Vector Memory] Deleted ${result.rowCount ?? 0} chunks for project ${projectId}` ); return result.rowCount ?? 0; } catch (error) { console.error('[Vector Memory] Failed to delete project chunks:', error); throw new Error( `Failed to delete project chunks: ${error instanceof Error ? error.message : String(error)}` ); } } /** * Get chunk count for a knowledge_item */ export async function getChunkCountForKnowledgeItem( knowledgeItemId: string ): Promise { try { const result = await executeQuery<{ count: string }>( 'SELECT COUNT(*) as count FROM knowledge_chunks WHERE knowledge_item_id = $1', [knowledgeItemId] ); return parseInt(result.rows[0]?.count ?? '0', 10); } catch (error) { console.error('[Vector Memory] Failed to get chunk count:', error); return 0; } } /** * Get chunk count for a project */ export async function getChunkCountForProject(projectId: string): Promise { try { const result = await executeQuery<{ count: string }>( 'SELECT COUNT(*) as count FROM knowledge_chunks WHERE project_id = $1', [projectId] ); return parseInt(result.rows[0]?.count ?? '0', 10); } catch (error) { console.error('[Vector Memory] Failed to get project chunk count:', error); return 0; } } /** * Regenerate knowledge_chunks for a single knowledge_item * * This is the main pipeline that: * 1. Chunks the knowledge_item.content * 2. Generates embeddings for each chunk * 3. Deletes existing chunks for this item * 4. Inserts new chunks into AlloyDB * * @param knowledgeItem - The knowledge item to process * * @example * ```typescript * const knowledgeItem = await getKnowledgeItem(projectId, itemId); * await writeKnowledgeChunksForItem(knowledgeItem); * ``` */ export async function writeKnowledgeChunksForItem( knowledgeItem: { id: string; projectId: string; content: string; sourceMeta?: { sourceType?: string; importance?: 'primary' | 'supporting' | 'irrelevant' }; } ): Promise { const { chunkText } = await import('@/lib/ai/chunking'); const { embedTextBatch } = await import('@/lib/ai/embeddings'); try { console.log( `[Vector Memory] Starting chunking pipeline for knowledge_item ${knowledgeItem.id}` ); // Step 1: Chunk the content const textChunks = chunkText(knowledgeItem.content, { maxTokens: 800, overlapChars: 200, preserveParagraphs: true, }); if (textChunks.length === 0) { console.warn( `[Vector Memory] No chunks generated for knowledge_item ${knowledgeItem.id} - content may be empty` ); return; } console.log( `[Vector Memory] Generated ${textChunks.length} chunks for knowledge_item ${knowledgeItem.id}` ); // Step 2: Generate embeddings for all chunks const chunkTexts = textChunks.map((chunk) => chunk.text); const embeddings = await embedTextBatch(chunkTexts, { delayMs: 50, // Small delay to avoid rate limiting skipEmpty: true, }); if (embeddings.length !== textChunks.length) { throw new Error( `Embedding count mismatch: got ${embeddings.length}, expected ${textChunks.length}` ); } // Step 3: Delete existing chunks for this knowledge_item await deleteChunksForKnowledgeItem(knowledgeItem.id); // Step 4: Insert new chunks const chunksToInsert = textChunks.map((chunk, index) => ({ chunkIndex: chunk.index, content: chunk.text, embedding: embeddings[index], sourceType: knowledgeItem.sourceMeta?.sourceType ?? null, importance: knowledgeItem.sourceMeta?.importance ?? null, })); await batchCreateKnowledgeChunks({ projectId: knowledgeItem.projectId, knowledgeItemId: knowledgeItem.id, chunks: chunksToInsert, }); console.log( `[Vector Memory] Successfully processed ${chunksToInsert.length} chunks for knowledge_item ${knowledgeItem.id}` ); } catch (error) { console.error( `[Vector Memory] Failed to write chunks for knowledge_item ${knowledgeItem.id}:`, error ); throw new Error( `Failed to write chunks: ${error instanceof Error ? error.message : String(error)}` ); } }