Files
vibn-frontend/lib/server/vector-memory.ts

454 lines
12 KiB
TypeScript

/**
* Server-side helpers for AlloyDB vector memory operations
*
* Handles CRUD operations on knowledge_chunks and semantic search.
*/
import { getAlloyDbClient, executeQuery, getPooledClient } from '@/lib/db/alloydb';
import type {
KnowledgeChunk,
KnowledgeChunkRow,
KnowledgeChunkSearchResult,
VectorSearchOptions,
CreateKnowledgeChunkInput,
BatchCreateKnowledgeChunksInput,
} from '@/lib/types/vector-memory';
/**
* Convert database row (snake_case) to TypeScript object (camelCase)
*/
function rowToKnowledgeChunk(row: KnowledgeChunkRow): KnowledgeChunk {
return {
id: row.id,
projectId: row.project_id,
knowledgeItemId: row.knowledge_item_id,
chunkIndex: row.chunk_index,
content: row.content,
sourceType: row.source_type,
importance: row.importance,
createdAt: row.created_at,
updatedAt: row.updated_at,
};
}
/**
* Retrieve relevant knowledge chunks using vector similarity search
*
* @param projectId - Firestore project ID to filter by
* @param queryEmbedding - Vector embedding of the query (e.g., user's question)
* @param options - Search options (limit, filters, etc.)
* @returns Array of chunks ordered by similarity (most relevant first)
*
* @example
* ```typescript
* const embedding = await embedText("What's the MVP scope?");
* const chunks = await retrieveRelevantChunks('proj123', embedding, { limit: 10, minSimilarity: 0.7 });
* ```
*/
export async function retrieveRelevantChunks(
projectId: string,
queryEmbedding: number[],
options: VectorSearchOptions = {}
): Promise<KnowledgeChunkSearchResult[]> {
const {
limit = 10,
minSimilarity,
sourceTypes,
importanceLevels,
} = options;
try {
// Build the query with optional filters
let queryText = `
SELECT
id,
project_id,
knowledge_item_id,
chunk_index,
content,
source_type,
importance,
created_at,
updated_at,
1 - (embedding <=> $1::vector) AS similarity
FROM knowledge_chunks
WHERE project_id = $2
`;
const params: any[] = [JSON.stringify(queryEmbedding), projectId];
let paramIndex = 3;
// Filter by source types
if (sourceTypes && sourceTypes.length > 0) {
queryText += ` AND source_type = ANY($${paramIndex})`;
params.push(sourceTypes);
paramIndex++;
}
// Filter by importance levels
if (importanceLevels && importanceLevels.length > 0) {
queryText += ` AND importance = ANY($${paramIndex})`;
params.push(importanceLevels);
paramIndex++;
}
// Filter by minimum similarity
if (minSimilarity !== undefined) {
queryText += ` AND (1 - (embedding <=> $1::vector)) >= $${paramIndex}`;
params.push(minSimilarity);
paramIndex++;
}
// Order by similarity and limit
queryText += ` ORDER BY embedding <=> $1::vector LIMIT $${paramIndex}`;
params.push(limit);
const result = await executeQuery<KnowledgeChunkRow & { similarity: number }>(
queryText,
params
);
return result.rows.map((row) => ({
...rowToKnowledgeChunk(row),
similarity: row.similarity,
}));
} catch (error) {
console.error('[Vector Memory] Failed to retrieve relevant chunks:', error);
throw new Error(
`Failed to retrieve chunks: ${error instanceof Error ? error.message : String(error)}`
);
}
}
/**
* Create a single knowledge chunk
*/
export async function createKnowledgeChunk(
input: CreateKnowledgeChunkInput
): Promise<KnowledgeChunk> {
const {
projectId,
knowledgeItemId,
chunkIndex,
content,
embedding,
sourceType = null,
importance = null,
} = input;
try {
const queryText = `
INSERT INTO knowledge_chunks (
project_id,
knowledge_item_id,
chunk_index,
content,
embedding,
source_type,
importance
)
VALUES ($1, $2, $3, $4, $5::vector, $6, $7)
RETURNING
id,
project_id,
knowledge_item_id,
chunk_index,
content,
source_type,
importance,
created_at,
updated_at
`;
const result = await executeQuery<KnowledgeChunkRow>(queryText, [
projectId,
knowledgeItemId,
chunkIndex,
content,
JSON.stringify(embedding),
sourceType,
importance,
]);
if (result.rows.length === 0) {
throw new Error('Failed to insert knowledge chunk');
}
return rowToKnowledgeChunk(result.rows[0]);
} catch (error) {
console.error('[Vector Memory] Failed to create knowledge chunk:', error);
throw new Error(
`Failed to create chunk: ${error instanceof Error ? error.message : String(error)}`
);
}
}
/**
* Batch create multiple knowledge chunks efficiently
*
* Uses a transaction to ensure atomicity.
*/
export async function batchCreateKnowledgeChunks(
input: BatchCreateKnowledgeChunksInput
): Promise<KnowledgeChunk[]> {
const { projectId, knowledgeItemId, chunks } = input;
if (chunks.length === 0) {
return [];
}
const client = await getPooledClient();
try {
await client.query('BEGIN');
const createdChunks: KnowledgeChunk[] = [];
for (const chunk of chunks) {
const queryText = `
INSERT INTO knowledge_chunks (
project_id,
knowledge_item_id,
chunk_index,
content,
embedding,
source_type,
importance
)
VALUES ($1, $2, $3, $4, $5::vector, $6, $7)
RETURNING
id,
project_id,
knowledge_item_id,
chunk_index,
content,
source_type,
importance,
created_at,
updated_at
`;
const result = await client.query<KnowledgeChunkRow>(queryText, [
projectId,
knowledgeItemId,
chunk.chunkIndex,
chunk.content,
JSON.stringify(chunk.embedding),
chunk.sourceType ?? null,
chunk.importance ?? null,
]);
if (result.rows.length > 0) {
createdChunks.push(rowToKnowledgeChunk(result.rows[0]));
}
}
await client.query('COMMIT');
console.log(
`[Vector Memory] Batch created ${createdChunks.length} chunks for knowledge_item ${knowledgeItemId}`
);
return createdChunks;
} catch (error) {
await client.query('ROLLBACK');
console.error('[Vector Memory] Failed to batch create chunks:', error);
throw new Error(
`Failed to batch create chunks: ${error instanceof Error ? error.message : String(error)}`
);
} finally {
client.release();
}
}
/**
* Delete all chunks for a specific knowledge_item
*
* Used when regenerating chunks or removing a knowledge_item.
*/
export async function deleteChunksForKnowledgeItem(
knowledgeItemId: string
): Promise<number> {
try {
const queryText = `
DELETE FROM knowledge_chunks
WHERE knowledge_item_id = $1
RETURNING id
`;
const result = await executeQuery(queryText, [knowledgeItemId]);
console.log(
`[Vector Memory] Deleted ${result.rowCount ?? 0} chunks for knowledge_item ${knowledgeItemId}`
);
return result.rowCount ?? 0;
} catch (error) {
console.error('[Vector Memory] Failed to delete chunks:', error);
throw new Error(
`Failed to delete chunks: ${error instanceof Error ? error.message : String(error)}`
);
}
}
/**
* Delete all chunks for a specific project
*
* Used when cleaning up or resetting a project.
*/
export async function deleteChunksForProject(projectId: string): Promise<number> {
try {
const queryText = `
DELETE FROM knowledge_chunks
WHERE project_id = $1
RETURNING id
`;
const result = await executeQuery(queryText, [projectId]);
console.log(
`[Vector Memory] Deleted ${result.rowCount ?? 0} chunks for project ${projectId}`
);
return result.rowCount ?? 0;
} catch (error) {
console.error('[Vector Memory] Failed to delete project chunks:', error);
throw new Error(
`Failed to delete project chunks: ${error instanceof Error ? error.message : String(error)}`
);
}
}
/**
* Get chunk count for a knowledge_item
*/
export async function getChunkCountForKnowledgeItem(
knowledgeItemId: string
): Promise<number> {
try {
const result = await executeQuery<{ count: string }>(
'SELECT COUNT(*) as count FROM knowledge_chunks WHERE knowledge_item_id = $1',
[knowledgeItemId]
);
return parseInt(result.rows[0]?.count ?? '0', 10);
} catch (error) {
console.error('[Vector Memory] Failed to get chunk count:', error);
return 0;
}
}
/**
* Get chunk count for a project
*/
export async function getChunkCountForProject(projectId: string): Promise<number> {
try {
const result = await executeQuery<{ count: string }>(
'SELECT COUNT(*) as count FROM knowledge_chunks WHERE project_id = $1',
[projectId]
);
return parseInt(result.rows[0]?.count ?? '0', 10);
} catch (error) {
console.error('[Vector Memory] Failed to get project chunk count:', error);
return 0;
}
}
/**
* Regenerate knowledge_chunks for a single knowledge_item
*
* This is the main pipeline that:
* 1. Chunks the knowledge_item.content
* 2. Generates embeddings for each chunk
* 3. Deletes existing chunks for this item
* 4. Inserts new chunks into AlloyDB
*
* @param knowledgeItem - The knowledge item to process
*
* @example
* ```typescript
* const knowledgeItem = await getKnowledgeItem(projectId, itemId);
* await writeKnowledgeChunksForItem(knowledgeItem);
* ```
*/
export async function writeKnowledgeChunksForItem(
knowledgeItem: {
id: string;
projectId: string;
content: string;
sourceMeta?: { sourceType?: string; importance?: 'primary' | 'supporting' | 'irrelevant' };
}
): Promise<void> {
const { chunkText } = await import('@/lib/ai/chunking');
const { embedTextBatch } = await import('@/lib/ai/embeddings');
try {
console.log(
`[Vector Memory] Starting chunking pipeline for knowledge_item ${knowledgeItem.id}`
);
// Step 1: Chunk the content
const textChunks = chunkText(knowledgeItem.content, {
maxTokens: 800,
overlapChars: 200,
preserveParagraphs: true,
});
if (textChunks.length === 0) {
console.warn(
`[Vector Memory] No chunks generated for knowledge_item ${knowledgeItem.id} - content may be empty`
);
return;
}
console.log(
`[Vector Memory] Generated ${textChunks.length} chunks for knowledge_item ${knowledgeItem.id}`
);
// Step 2: Generate embeddings for all chunks
const chunkTexts = textChunks.map((chunk) => chunk.text);
const embeddings = await embedTextBatch(chunkTexts, {
delayMs: 50, // Small delay to avoid rate limiting
skipEmpty: true,
});
if (embeddings.length !== textChunks.length) {
throw new Error(
`Embedding count mismatch: got ${embeddings.length}, expected ${textChunks.length}`
);
}
// Step 3: Delete existing chunks for this knowledge_item
await deleteChunksForKnowledgeItem(knowledgeItem.id);
// Step 4: Insert new chunks
const chunksToInsert = textChunks.map((chunk, index) => ({
chunkIndex: chunk.index,
content: chunk.text,
embedding: embeddings[index],
sourceType: knowledgeItem.sourceMeta?.sourceType ?? null,
importance: knowledgeItem.sourceMeta?.importance ?? null,
}));
await batchCreateKnowledgeChunks({
projectId: knowledgeItem.projectId,
knowledgeItemId: knowledgeItem.id,
chunks: chunksToInsert,
});
console.log(
`[Vector Memory] Successfully processed ${chunksToInsert.length} chunks for knowledge_item ${knowledgeItem.id}`
);
} catch (error) {
console.error(
`[Vector Memory] Failed to write chunks for knowledge_item ${knowledgeItem.id}:`,
error
);
throw new Error(
`Failed to write chunks: ${error instanceof Error ? error.message : String(error)}`
);
}
}