VIBN Frontend for Coolify deployment

This commit is contained in:
2026-02-15 19:25:52 -08:00
commit 40bf8428cd
398 changed files with 76513 additions and 0 deletions

View File

@@ -0,0 +1,228 @@
/**
* Backend Extraction Module
*
* Runs extraction as a pure backend job, not in chat.
* Called when Collector phase completes.
*/
import { getAdminDb } from '@/lib/firebase/admin';
import { GeminiLlmClient } from '@/lib/ai/gemini-client';
import { BACKEND_EXTRACTOR_SYSTEM_PROMPT } from '@/lib/ai/prompts/extractor';
import { writeKnowledgeChunksForItem } from '@/lib/server/vector-memory';
import type { ExtractionOutput, ExtractedInsight } from '@/lib/types/extraction-output';
import type { PhaseHandoff } from '@/lib/types/phase-handoff';
import { z } from 'zod';
const ExtractionOutputSchema = z.object({
insights: z.array(z.object({
id: z.string(),
type: z.enum(["problem", "user", "feature", "constraint", "opportunity", "other"]),
title: z.string(),
description: z.string(),
sourceText: z.string(),
sourceKnowledgeItemId: z.string(),
importance: z.enum(["primary", "supporting"]),
confidence: z.number().min(0).max(1),
})),
problems: z.array(z.string()),
targetUsers: z.array(z.string()),
features: z.array(z.string()),
constraints: z.array(z.string()),
opportunities: z.array(z.string()),
uncertainties: z.array(z.string()),
missingInformation: z.array(z.string()),
overallConfidence: z.number().min(0).max(1),
});
export async function runBackendExtractionForProject(projectId: string): Promise<void> {
console.log(`[Backend Extractor] Starting extraction for project ${projectId}`);
const adminDb = getAdminDb();
try {
// 1. Load project
const projectDoc = await adminDb.collection('projects').doc(projectId).get();
if (!projectDoc.exists) {
throw new Error(`Project ${projectId} not found`);
}
const projectData = projectDoc.data();
// 2. Load knowledge items
const knowledgeSnapshot = await adminDb
.collection('knowledge_items')
.where('projectId', '==', projectId)
.where('sourceType', '==', 'imported_document')
.get();
if (knowledgeSnapshot.empty) {
console.log(`[Backend Extractor] No documents to extract for project ${projectId} - creating empty handoff`);
// Create a minimal extraction handoff even with no documents
const emptyHandoff: PhaseHandoff = {
phase: 'extraction',
readyForNextPhase: false, // Not ready - no materials to extract from
confidence: 0,
confirmed: {
problems: [],
targetUsers: [],
features: [],
constraints: [],
opportunities: [],
},
uncertain: {},
missing: ['No documents uploaded - need product requirements, specs, or notes'],
questionsForUser: [
'You haven\'t uploaded any documents yet. Do you have any product specs, requirements, or notes to share?',
],
sourceEvidence: [],
version: 'extraction_v1',
timestamp: new Date().toISOString(),
};
await adminDb.collection('projects').doc(projectId).update({
'phaseData.phaseHandoffs.extraction': emptyHandoff,
currentPhase: 'extraction_review',
phaseStatus: 'in_progress',
'phaseData.extractionCompletedAt': new Date().toISOString(),
updatedAt: new Date().toISOString(),
});
console.log(`[Backend Extractor] Set phase to extraction_review with empty handoff`);
return;
}
console.log(`[Backend Extractor] Found ${knowledgeSnapshot.size} documents to process`);
const llm = new GeminiLlmClient();
const allExtractionOutputs: ExtractionOutput[] = [];
const processedKnowledgeItemIds: string[] = [];
// 3. Process each document
for (const knowledgeDoc of knowledgeSnapshot.docs) {
const knowledgeData = knowledgeDoc.data();
const knowledgeItemId = knowledgeDoc.id;
try {
console.log(`[Backend Extractor] Processing document: ${knowledgeData.title || knowledgeItemId}`);
// Call LLM with structured extraction + thinking mode
const extraction = await llm.structuredCall<ExtractionOutput>({
model: 'gemini',
systemPrompt: BACKEND_EXTRACTOR_SYSTEM_PROMPT,
messages: [{
role: 'user',
content: `Document Title: ${knowledgeData.title || 'Untitled'}\nSource Type: ${knowledgeData.sourceType}\n\nContent:\n${knowledgeData.content}`,
}],
schema: ExtractionOutputSchema as any,
temperature: 1.0, // Gemini 3 default (changed from 0.3)
thinking_config: {
thinking_level: 'high', // Enable deep reasoning for document analysis
include_thoughts: false, // Don't include thought tokens in output (saves cost)
},
});
// Add knowledgeItemId to each insight
extraction.insights.forEach(insight => {
insight.sourceKnowledgeItemId = knowledgeItemId;
});
allExtractionOutputs.push(extraction);
processedKnowledgeItemIds.push(knowledgeItemId);
// 4. Persist extraction to chat_extractions
await adminDb.collection('chat_extractions').add({
projectId,
knowledgeItemId,
data: extraction,
overallConfidence: extraction.overallConfidence,
overallCompletion: extraction.overallConfidence > 0.7 ? 0.9 : 0.6,
createdAt: new Date().toISOString(),
updatedAt: new Date().toISOString(),
});
console.log(`[Backend Extractor] Extracted ${extraction.insights.length} insights from ${knowledgeData.title || knowledgeItemId}`);
// 5. Write vector chunks for primary insights
const primaryInsights = extraction.insights.filter(i => i.importance === 'primary');
for (const insight of primaryInsights) {
try {
// Create a knowledge chunk for this insight
await writeKnowledgeChunksForItem({
id: knowledgeItemId,
projectId,
content: `${insight.title}\n\n${insight.description}\n\nSource: ${insight.sourceText}`,
sourceMeta: {
sourceType: 'extracted_insight',
importance: 'primary',
},
});
} catch (chunkError) {
console.error(`[Backend Extractor] Failed to write chunk for insight ${insight.id}:`, chunkError);
// Continue processing other insights
}
}
} catch (docError) {
console.error(`[Backend Extractor] Failed to process document ${knowledgeItemId}:`, docError);
// Continue with next document
}
}
// 6. Build extraction PhaseHandoff
// Flatten all extracted items (they're already strings, not objects)
const allProblems = [...new Set(allExtractionOutputs.flatMap(e => e.problems))];
const allUsers = [...new Set(allExtractionOutputs.flatMap(e => e.targetUsers))];
const allFeatures = [...new Set(allExtractionOutputs.flatMap(e => e.features))];
const allConstraints = [...new Set(allExtractionOutputs.flatMap(e => e.constraints))];
const allOpportunities = [...new Set(allExtractionOutputs.flatMap(e => e.opportunities))];
const allUncertainties = [...new Set(allExtractionOutputs.flatMap(e => e.uncertainties))];
const allMissing = [...new Set(allExtractionOutputs.flatMap(e => e.missingInformation))];
const avgConfidence = allExtractionOutputs.length > 0
? allExtractionOutputs.reduce((sum, e) => sum + e.overallConfidence, 0) / allExtractionOutputs.length
: 0;
const readyForNextPhase = allProblems.length > 0 && allFeatures.length > 0 && avgConfidence > 0.5;
const extractionHandoff: PhaseHandoff = {
phase: 'extraction',
readyForNextPhase,
confidence: avgConfidence,
confirmed: {
problems: allProblems,
targetUsers: allUsers,
features: allFeatures,
constraints: allConstraints,
opportunities: allOpportunities,
},
uncertain: {},
missing: allMissing,
questionsForUser: allUncertainties,
sourceEvidence: processedKnowledgeItemIds,
version: 'extraction_v1',
timestamp: new Date().toISOString(),
};
// 7. Persist handoff and update phase
await adminDb.collection('projects').doc(projectId).update({
'phaseData.phaseHandoffs.extraction': extractionHandoff,
currentPhase: 'extraction_review',
phaseStatus: 'in_progress',
'phaseData.extractionCompletedAt': new Date().toISOString(),
updatedAt: new Date().toISOString(),
});
console.log(`[Backend Extractor] ✅ Extraction complete for project ${projectId}`);
console.log(`[Backend Extractor] - Problems: ${allProblems.length}`);
console.log(`[Backend Extractor] - Users: ${allUsers.length}`);
console.log(`[Backend Extractor] - Features: ${allFeatures.length}`);
console.log(`[Backend Extractor] - Confidence: ${(avgConfidence * 100).toFixed(1)}%`);
console.log(`[Backend Extractor] - Ready for next phase: ${readyForNextPhase}`);
} catch (error) {
console.error(`[Backend Extractor] Fatal error during extraction:`, error);
throw error;
}
}

402
lib/server/chat-context.ts Normal file
View File

@@ -0,0 +1,402 @@
/**
* Project Context Builder for Chat
*
* Loads project state from Firestore and AlloyDB vector memory,
* building a compact context object for LLM consumption.
*/
import { getAdminDb } from '@/lib/firebase/admin';
import { retrieveRelevantChunks } from '@/lib/server/vector-memory';
import { embedText } from '@/lib/ai/embeddings';
import {
summarizeKnowledgeItems,
summarizeExtractions,
} from '@/lib/server/chat-mode-resolver';
import type { ChatMode } from '@/lib/ai/chat-modes';
import type { ProjectPhaseData, ProjectPhaseScores } from '@/lib/types/project-artifacts';
import type { PhaseHandoff } from '@/lib/types/phase-handoff';
/**
* Compact project context for LLM
*/
export interface ProjectChatContext {
/** Basic project info */
project: {
id: string;
name: string;
currentPhase: string;
phaseStatus: string;
githubRepo?: string | null;
githubRepoUrl?: string | null;
extensionLinked?: boolean;
visionAnswers?: {
q1?: string;
q2?: string;
q3?: string;
updatedAt?: string;
};
};
/** Phase-specific artifacts */
phaseData: {
canonicalProductModel?: any;
mvpPlan?: any;
marketingPlan?: any;
};
/** Phase scores and progress */
phaseScores: ProjectPhaseScores;
/** Phase handoffs for smart transitions */
phaseHandoffs: Partial<Record<'collector' | 'extraction' | 'vision' | 'mvp' | 'marketing', PhaseHandoff>>;
/** Knowledge summary (counts, types) */
knowledgeSummary: {
totalCount: number;
bySourceType: Record<string, number>;
recentTitles: string[];
};
/** Extraction summary */
extractionSummary: {
totalCount: number;
avgConfidence: number;
avgCompletion: number;
};
/** Relevant chunks from vector search */
retrievedChunks: {
content: string;
sourceType?: string | null;
importance?: string | null;
similarity: number;
}[];
/** Repository analysis (if GitHub connected) */
repositoryAnalysis?: {
repoFullName: string;
totalFiles: number;
directories: string[];
keyFiles: string[];
techStack: string[];
readme: string | null;
summary: string;
} | null;
/** Session history from linked Cursor sessions */
sessionHistory: {
totalSessions: number;
messages: Array<{
role: string;
content: string;
timestamp: string;
sessionId?: string;
}>;
};
}
/**
* Build project context for a chat interaction
*
* @param projectId - Firestore project ID
* @param mode - Current chat mode
* @param userMessage - User's message (used for vector retrieval)
* @param options - Context building options
* @returns Compact context object
*/
export async function buildProjectContextForChat(
projectId: string,
mode: ChatMode,
userMessage: string,
options: {
retrievalLimit?: number;
includeVectorSearch?: boolean;
includeGitHubAnalysis?: boolean;
} = {}
): Promise<ProjectChatContext> {
const {
retrievalLimit = 10,
includeVectorSearch = true,
includeGitHubAnalysis = true,
} = options;
try {
const adminDb = getAdminDb();
// Load project document
const projectSnapshot = await adminDb.collection('projects').doc(projectId).get();
if (!projectSnapshot.exists) {
throw new Error(`Project ${projectId} not found`);
}
const projectData = projectSnapshot.data() ?? {};
// Load summaries in parallel
const [knowledgeSummary, extractionSummary] = await Promise.all([
summarizeKnowledgeItems(projectId),
summarizeExtractions(projectId),
]);
// Vector retrieval
let retrievedChunks: ProjectChatContext['retrievedChunks'] = [];
// extraction_review_mode does NOT load documents - it reviews extraction results
// Normal vector search for modes that need it
if (includeVectorSearch && mode !== 'collector_mode' && mode !== 'extraction_review_mode' && userMessage.trim().length > 0) {
try {
const queryEmbedding = await embedText(userMessage);
const chunks = await retrieveRelevantChunks(projectId, queryEmbedding, {
limit: retrievalLimit,
minSimilarity: 0.7, // Only include reasonably relevant chunks
});
retrievedChunks = chunks.map((chunk) => ({
content: chunk.content,
sourceType: chunk.sourceType,
importance: chunk.importance,
similarity: chunk.similarity,
}));
console.log(
`[Chat Context] Retrieved ${retrievedChunks.length} chunks for project ${projectId}`
);
} catch (vectorError) {
console.error('[Chat Context] Vector retrieval failed:', vectorError);
// Continue without vector results
}
}
// GitHub repository analysis
let repositoryAnalysis = null;
if (includeGitHubAnalysis && projectData.githubRepo && projectData.userId) {
try {
const { analyzeGitHubRepository } = await import('@/lib/server/github-analyzer');
repositoryAnalysis = await analyzeGitHubRepository(
projectData.userId,
projectData.githubRepo,
projectData.githubDefaultBranch || 'main'
);
} catch (githubError) {
console.error('[Chat Context] GitHub analysis failed:', githubError);
// Continue without GitHub analysis
}
}
// Fetch linked Cursor session history
let sessionHistory = {
totalSessions: 0,
messages: [] as Array<{
role: string;
content: string;
timestamp: string;
sessionId?: string;
}>,
};
try {
// Query sessions linked to this project
const sessionsSnapshot = await adminDb
.collection('sessions')
.where('projectId', '==', projectId)
.orderBy('startTime', 'asc')
.get();
if (!sessionsSnapshot.empty) {
sessionHistory.totalSessions = sessionsSnapshot.size;
// Extract all messages from all sessions in chronological order
const allMessages: Array<{
role: string;
content: string;
timestamp: string;
sessionId: string;
}> = [];
for (const sessionDoc of sessionsSnapshot.docs) {
const sessionData = sessionDoc.data();
const conversation = sessionData.conversation || [];
// Add messages from this session
for (const msg of conversation) {
if (msg.content && msg.content.trim()) {
allMessages.push({
role: msg.role || 'unknown',
content: msg.content,
timestamp: msg.timestamp instanceof Date
? msg.timestamp.toISOString()
: (typeof msg.timestamp === 'string' ? msg.timestamp : new Date().toISOString()),
sessionId: sessionDoc.id,
});
}
}
}
// Sort all messages by timestamp (chronological order)
allMessages.sort((a, b) =>
new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime()
);
sessionHistory.messages = allMessages;
console.log(
`[Chat Context] Loaded ${sessionHistory.totalSessions} sessions with ${allMessages.length} total messages for project ${projectId}`
);
} else {
console.log(`[Chat Context] No linked sessions found for project ${projectId}`);
}
} catch (sessionError) {
console.error('[Chat Context] Session history fetch failed:', sessionError);
// Continue without session history
}
// Build context object
const context: ProjectChatContext = {
project: {
id: projectId,
name: projectData.name ?? 'Unnamed Project',
currentPhase: projectData.currentPhase ?? 'collector',
phaseStatus: projectData.phaseStatus ?? 'not_started',
githubRepo: projectData.githubRepo ?? null,
githubRepoUrl: projectData.githubRepoUrl ?? null,
extensionLinked: projectData.extensionLinked ?? false,
visionAnswers: projectData.visionAnswers ?? {},
},
phaseData: {
canonicalProductModel: projectData.phaseData?.canonicalProductModel ?? null,
mvpPlan: projectData.phaseData?.mvpPlan ?? null,
marketingPlan: projectData.phaseData?.marketingPlan ?? null,
},
phaseScores: projectData.phaseScores ?? {},
phaseHandoffs: projectData.phaseData?.phaseHandoffs ?? {},
knowledgeSummary,
extractionSummary,
retrievedChunks,
repositoryAnalysis: repositoryAnalysis as any,
sessionHistory, // ✅ Include session history in context
};
return context;
} catch (error) {
console.error('[Chat Context] Failed to build context:', error);
throw new Error(
`Failed to build chat context: ${error instanceof Error ? error.message : String(error)}`
);
}
}
/**
* Determine which artifacts were used in building the context
*
* This helps the UI show what sources the AI is drawing from.
*/
export function determineArtifactsUsed(context: ProjectChatContext): string[] {
const artifacts: string[] = [];
if (context.phaseData.canonicalProductModel) {
artifacts.push('Product Model');
}
if (context.phaseData.mvpPlan) {
artifacts.push('MVP Plan');
}
if (context.phaseData.marketingPlan) {
artifacts.push('Marketing Plan');
}
if (context.retrievedChunks.length > 0) {
artifacts.push(`${context.retrievedChunks.length} Vector Chunks`);
}
if (context.repositoryAnalysis) {
artifacts.push('GitHub Repo Analysis');
}
if (context.knowledgeSummary.totalCount > 0) {
artifacts.push(`${context.knowledgeSummary.totalCount} Knowledge Items`);
}
if (context.extractionSummary.totalCount > 0) {
artifacts.push(`${context.extractionSummary.totalCount} Extractions`);
}
if (context.sessionHistory.totalSessions > 0) {
artifacts.push(`${context.sessionHistory.totalSessions} Cursor Sessions (${context.sessionHistory.messages.length} messages)`);
}
return artifacts;
}
/**
* Format project context as a string for LLM system prompt
*
* Provides a human-readable summary of the context.
*/
export function formatContextForPrompt(context: ProjectChatContext): string {
const sections: string[] = [];
// Project info
sections.push(`Project: ${context.project.name} (ID: ${context.project.id})`);
sections.push(
`Phase: ${context.project.currentPhase} (${context.project.phaseStatus})`
);
// Knowledge summary
if (context.knowledgeSummary.totalCount > 0) {
sections.push(`\nKnowledge Items: ${context.knowledgeSummary.totalCount} total`);
if (Object.keys(context.knowledgeSummary.bySourceType).length > 0) {
sections.push(
` By type: ${JSON.stringify(context.knowledgeSummary.bySourceType)}`
);
}
}
// Extraction summary
if (context.extractionSummary.totalCount > 0) {
sections.push(
`\nExtractions: ${context.extractionSummary.totalCount} analyzed (avg confidence: ${(context.extractionSummary.avgConfidence * 100).toFixed(1)}%)`
);
}
// Retrieved chunks
if (context.retrievedChunks.length > 0) {
sections.push(`\nRelevant Context (vector search):`);
context.retrievedChunks.slice(0, 3).forEach((chunk, i) => {
sections.push(
` ${i + 1}. [${chunk.sourceType ?? 'unknown'}] (similarity: ${(chunk.similarity * 100).toFixed(1)}%)`
);
sections.push(` ${chunk.content.substring(0, 150)}...`);
});
}
// GitHub repo
if (context.repositoryAnalysis) {
sections.push(`\nGitHub Repository: ${context.repositoryAnalysis.repoFullName}`);
sections.push(` Files: ${context.repositoryAnalysis.totalFiles}`);
sections.push(` Tech: ${context.repositoryAnalysis.techStack.join(', ')}`);
}
// Phase handoffs
const handoffs = Object.keys(context.phaseHandoffs);
if (handoffs.length > 0) {
sections.push(`\nPhase Handoffs: ${handoffs.join(', ')}`);
}
// Session history
if (context.sessionHistory.totalSessions > 0) {
sections.push(`\n## Cursor Session History (${context.sessionHistory.totalSessions} sessions, ${context.sessionHistory.messages.length} messages)`);
sections.push(`This is your complete conversation history with the user from Cursor IDE, in chronological order.`);
sections.push(`Use this to understand what has been built, discussed, and decided so far.\n`);
// Include all messages chronologically
context.sessionHistory.messages.forEach((msg, i) => {
const timestamp = new Date(msg.timestamp).toLocaleString();
sections.push(`[${timestamp}] ${msg.role}:`);
sections.push(msg.content);
sections.push(''); // Empty line between messages
});
}
return sections.join('\n');
}

View File

@@ -0,0 +1,64 @@
import { getAdminDb } from '@/lib/firebase/admin';
import { FieldValue } from 'firebase-admin/firestore';
import type { ChatExtractionRecord } from '@/lib/types/chat-extraction';
const COLLECTION = 'chat_extractions';
interface CreateChatExtractionInput<TData> {
projectId: string;
knowledgeItemId: string;
data: TData;
overallCompletion: number;
overallConfidence: number;
}
export async function createChatExtraction<TData>(
input: CreateChatExtractionInput<TData>,
): Promise<ChatExtractionRecord<TData>> {
const adminDb = getAdminDb();
const docRef = adminDb.collection(COLLECTION).doc();
const payload = {
id: docRef.id,
projectId: input.projectId,
knowledgeItemId: input.knowledgeItemId,
data: input.data,
overallCompletion: input.overallCompletion,
overallConfidence: input.overallConfidence,
createdAt: FieldValue.serverTimestamp(),
updatedAt: FieldValue.serverTimestamp(),
};
await docRef.set(payload);
const snapshot = await docRef.get();
return snapshot.data() as ChatExtractionRecord<TData>;
}
export async function listChatExtractions<TData>(
projectId: string,
): Promise<ChatExtractionRecord<TData>[]> {
const adminDb = getAdminDb();
const querySnapshot = await adminDb
.collection(COLLECTION)
.where('projectId', '==', projectId)
.orderBy('createdAt', 'desc')
.get();
return querySnapshot.docs.map(
(doc) => doc.data() as ChatExtractionRecord<TData>,
);
}
export async function getChatExtraction<TData>(
extractionId: string,
): Promise<ChatExtractionRecord<TData> | null> {
const adminDb = getAdminDb();
const docRef = adminDb.collection(COLLECTION).doc(extractionId);
const snapshot = await docRef.get();
if (!snapshot.exists) {
return null;
}
return snapshot.data() as ChatExtractionRecord<TData>;
}

View File

@@ -0,0 +1,190 @@
/**
* Chat Mode Resolution Logic
*
* Determines which chat mode (collector, extraction_review, vision, mvp, marketing, general)
* should be active based on project state.
*/
import { getAdminDb } from '@/lib/firebase/admin';
import type { ChatMode } from '@/lib/ai/chat-modes';
/**
* Resolve the appropriate chat mode for a project
*
* Logic:
* 1. No knowledge_items → collector_mode
* 2. Has knowledge but no extractions → collector_mode (needs to run extraction)
* 3. Has extractions but no canonicalProductModel → extraction_review_mode
* 4. Has canonicalProductModel but no mvpPlan → vision_mode
* 5. Has mvpPlan but no marketingPlan → mvp_mode
* 6. Has marketingPlan → marketing_mode
* 7. Otherwise → general_chat_mode
*
* @param projectId - Firestore project ID
* @returns The appropriate chat mode
*/
export async function resolveChatMode(projectId: string): Promise<ChatMode> {
try {
const adminDb = getAdminDb();
// Load project data
const projectSnapshot = await adminDb.collection('projects').doc(projectId).get();
if (!projectSnapshot.exists) {
throw new Error(`Project ${projectId} not found`);
}
const projectData = projectSnapshot.data() ?? {};
const phaseData = (projectData.phaseData ?? {}) as Record<string, any>;
// Check for knowledge_items (top-level collection)
const knowledgeSnapshot = await adminDb
.collection('knowledge_items')
.where('projectId', '==', projectId)
.limit(1)
.get();
const hasKnowledge = !knowledgeSnapshot.empty;
// Check for chat_extractions (top-level collection)
const extractionsSnapshot = await adminDb
.collection('chat_extractions')
.where('projectId', '==', projectId)
.limit(1)
.get();
const hasExtractions = !extractionsSnapshot.empty;
// Apply resolution logic
// PRIORITY: Check explicit phase transitions FIRST (overrides knowledge checks)
if (projectData.currentPhase === 'extraction_review' || projectData.currentPhase === 'analyzed') {
return 'extraction_review_mode';
}
if (projectData.currentPhase === 'vision') {
return 'vision_mode';
}
if (projectData.currentPhase === 'mvp') {
return 'mvp_mode';
}
if (projectData.currentPhase === 'marketing') {
return 'marketing_mode';
}
if (!hasKnowledge) {
return 'collector_mode';
}
if (hasKnowledge && !hasExtractions) {
return 'collector_mode'; // Has knowledge but needs extraction
}
// Fallback: Has extractions but no canonicalProductModel
if (hasExtractions && !phaseData.canonicalProductModel) {
return 'extraction_review_mode';
}
if (phaseData.canonicalProductModel && !phaseData.mvpPlan) {
return 'vision_mode';
}
if (phaseData.mvpPlan && !phaseData.marketingPlan) {
return 'mvp_mode';
}
if (phaseData.marketingPlan) {
return 'marketing_mode';
}
return 'general_chat_mode';
} catch (error) {
console.error('[Chat Mode Resolver] Failed to resolve mode:', error);
// Default to collector on error
return 'collector_mode';
}
}
/**
* Get a summary of knowledge_items for context building
*/
export async function summarizeKnowledgeItems(
projectId: string
): Promise<{
totalCount: number;
bySourceType: Record<string, number>;
recentTitles: string[];
}> {
try {
const adminDb = getAdminDb();
const snapshot = await adminDb
.collection('knowledge_items')
.where('projectId', '==', projectId)
.orderBy('createdAt', 'desc')
.limit(20)
.get();
const totalCount = snapshot.size;
const bySourceType: Record<string, number> = {};
const recentTitles: string[] = [];
snapshot.docs.forEach((doc) => {
const data = doc.data();
const sourceType = data.sourceType ?? 'unknown';
bySourceType[sourceType] = (bySourceType[sourceType] ?? 0) + 1;
if (data.title && recentTitles.length < 5) {
recentTitles.push(data.title);
}
});
return { totalCount, bySourceType, recentTitles };
} catch (error) {
console.error('[Chat Mode Resolver] Failed to summarize knowledge:', error);
return { totalCount: 0, bySourceType: {}, recentTitles: [] };
}
}
/**
* Get a summary of chat_extractions for context building
*/
export async function summarizeExtractions(
projectId: string
): Promise<{
totalCount: number;
avgConfidence: number;
avgCompletion: number;
}> {
try {
const adminDb = getAdminDb();
const snapshot = await adminDb
.collection('chat_extractions')
.where('projectId', '==', projectId)
.get();
if (snapshot.empty) {
return { totalCount: 0, avgConfidence: 0, avgCompletion: 0 };
}
let sumConfidence = 0;
let sumCompletion = 0;
let count = 0;
snapshot.docs.forEach((doc) => {
const data = doc.data();
sumConfidence += data.overallConfidence ?? 0;
sumCompletion += data.overallCompletion ?? 0;
count++;
});
return {
totalCount: count,
avgConfidence: count > 0 ? sumConfidence / count : 0,
avgCompletion: count > 0 ? sumCompletion / count : 0,
};
} catch (error) {
console.error('[Chat Mode Resolver] Failed to summarize extractions:', error);
return { totalCount: 0, avgConfidence: 0, avgCompletion: 0 };
}
}

View File

@@ -0,0 +1,298 @@
/**
* GitHub Repository Analyzer
* Fetches and analyzes repository structure and key files for AI context
*/
import { getAdminDb } from '@/lib/firebase/admin';
interface RepositoryAnalysis {
repoFullName: string;
totalFiles: number;
fileStructure: {
directories: string[];
keyFiles: string[];
};
readme: string | null;
packageJson: Record<string, unknown> | null;
techStack: string[];
summary: string;
}
/**
* Analyze a GitHub repository to extract key information for AI context
*/
export async function analyzeGitHubRepository(
userId: string,
repoFullName: string,
branch = 'main'
): Promise<RepositoryAnalysis | null> {
try {
const adminDb = getAdminDb();
// Get GitHub access token
const connectionDoc = await adminDb
.collection('githubConnections')
.doc(userId)
.get();
if (!connectionDoc.exists) {
console.log('[GitHub Analyzer] No GitHub connection found');
return null;
}
const connection = connectionDoc.data()!;
const accessToken = connection.accessToken;
const [owner, repo] = repoFullName.split('/');
// Fetch repository tree
const treeResponse = await fetch(
`https://api.github.com/repos/${owner}/${repo}/git/trees/${branch}?recursive=1`,
{
headers: {
Authorization: `Bearer ${accessToken}`,
Accept: 'application/vnd.github.v3+json',
},
}
);
if (!treeResponse.ok) {
console.error('[GitHub Analyzer] Failed to fetch tree:', treeResponse.statusText);
return null;
}
const treeData = await treeResponse.json();
// Extract directories and key files
const directories = new Set<string>();
const keyFiles: string[] = [];
let totalFiles = 0;
treeData.tree?.forEach((item: { path: string; type: string }) => {
if (item.type === 'blob') {
totalFiles++;
// Track key files
const fileName = item.path.toLowerCase();
if (
fileName === 'readme.md' ||
fileName === 'package.json' ||
fileName === 'requirements.txt' ||
fileName === 'cargo.toml' ||
fileName === 'go.mod' ||
fileName === 'pom.xml' ||
fileName.startsWith('dockerfile')
) {
keyFiles.push(item.path);
}
}
// Track top-level directories
const parts = item.path.split('/');
if (parts.length > 1) {
directories.add(parts[0]);
}
});
// Fetch README content (truncate to first 3000 chars to avoid bloating prompts)
let readme: string | null = null;
const readmePath = keyFiles.find(f => f.toLowerCase().endsWith('readme.md'));
if (readmePath) {
const fullReadme = await fetchFileContent(accessToken, owner, repo, readmePath, branch);
if (fullReadme) {
// Truncate to first 3000 characters (roughly 750 tokens)
readme = fullReadme.length > 3000
? fullReadme.substring(0, 3000) + '\n\n[... README truncated for brevity ...]'
: fullReadme;
}
}
// Fetch package.json content
let packageJson: Record<string, unknown> | null = null;
const packageJsonPath = keyFiles.find(f => f.toLowerCase().endsWith('package.json'));
if (packageJsonPath) {
const content = await fetchFileContent(accessToken, owner, repo, packageJsonPath, branch);
if (content) {
try {
packageJson = JSON.parse(content);
} catch (e) {
console.error('[GitHub Analyzer] Failed to parse package.json');
}
}
}
// Detect tech stack
const techStack = detectTechStack(keyFiles, Array.from(directories), packageJson);
// Generate summary
const summary = generateRepositorySummary({
repoFullName,
totalFiles,
directories: Array.from(directories),
keyFiles,
techStack,
readme,
packageJson,
});
return {
repoFullName,
totalFiles,
fileStructure: {
directories: Array.from(directories).slice(0, 20), // Limit to top 20
keyFiles,
},
readme: readme ? readme.substring(0, 2000) : null, // First 2000 chars
packageJson,
techStack,
summary,
};
} catch (error) {
console.error('[GitHub Analyzer] Error analyzing repository:', error);
return null;
}
}
/**
* Fetch file content from GitHub
*/
async function fetchFileContent(
accessToken: string,
owner: string,
repo: string,
path: string,
branch: string
): Promise<string | null> {
try {
const response = await fetch(
`https://api.github.com/repos/${owner}/${repo}/contents/${encodeURIComponent(path)}?ref=${branch}`,
{
headers: {
Authorization: `Bearer ${accessToken}`,
Accept: 'application/vnd.github.v3+json',
},
}
);
if (!response.ok) return null;
const data = await response.json();
return Buffer.from(data.content, 'base64').toString('utf-8');
} catch (error) {
console.error(`[GitHub Analyzer] Failed to fetch ${path}:`, error);
return null;
}
}
/**
* Detect tech stack from repository structure
*/
function detectTechStack(
keyFiles: string[],
directories: string[],
packageJson: Record<string, unknown> | null
): string[] {
const stack: string[] = [];
// From key files
if (keyFiles.some(f => f.toLowerCase().includes('package.json'))) {
stack.push('Node.js/JavaScript');
if (packageJson) {
const deps = {
...(packageJson.dependencies as Record<string, unknown> || {}),
...(packageJson.devDependencies as Record<string, unknown> || {})
};
if (deps.next) stack.push('Next.js');
if (deps.react) stack.push('React');
if (deps.vue) stack.push('Vue');
if (deps.express) stack.push('Express');
if (deps.typescript) stack.push('TypeScript');
}
}
if (keyFiles.some(f => f.toLowerCase().includes('requirements.txt') || f.toLowerCase().includes('pyproject.toml'))) {
stack.push('Python');
}
if (keyFiles.some(f => f.toLowerCase().includes('cargo.toml'))) {
stack.push('Rust');
}
if (keyFiles.some(f => f.toLowerCase().includes('go.mod'))) {
stack.push('Go');
}
if (keyFiles.some(f => f.toLowerCase().includes('pom.xml') || f.toLowerCase().includes('build.gradle'))) {
stack.push('Java');
}
if (keyFiles.some(f => f.toLowerCase().startsWith('dockerfile'))) {
stack.push('Docker');
}
// From directories
if (directories.includes('.github')) stack.push('GitHub Actions');
if (directories.includes('terraform') || directories.includes('infrastructure')) {
stack.push('Infrastructure as Code');
}
return stack;
}
/**
* Generate a human-readable summary
*/
function generateRepositorySummary(analysis: {
repoFullName: string;
totalFiles: number;
directories: string[];
keyFiles: string[];
techStack: string[];
readme: string | null;
packageJson: Record<string, unknown> | null;
}): string {
const parts: string[] = [];
parts.push(`## Repository Analysis: ${analysis.repoFullName}`);
parts.push(`\n**Structure:**`);
parts.push(`- Total files: ${analysis.totalFiles}`);
if (analysis.directories.length > 0) {
parts.push(`- Main directories: ${analysis.directories.slice(0, 15).join(', ')}`);
}
if (analysis.techStack.length > 0) {
parts.push(`\n**Tech Stack:** ${analysis.techStack.join(', ')}`);
}
if (analysis.packageJson) {
const pkg = analysis.packageJson;
parts.push(`\n**Package Info:**`);
if (pkg.name) parts.push(`- Name: ${pkg.name}`);
if (pkg.description) parts.push(`- Description: ${pkg.description}`);
if (pkg.version) parts.push(`- Version: ${pkg.version}`);
// Show key dependencies
const deps = pkg.dependencies as Record<string, string> || {};
const devDeps = pkg.devDependencies as Record<string, string> || {};
const allDeps = { ...deps, ...devDeps };
const keyDeps = Object.keys(allDeps).slice(0, 10);
if (keyDeps.length > 0) {
parts.push(`- Key dependencies: ${keyDeps.join(', ')}`);
}
}
if (analysis.readme) {
parts.push(`\n**README Content:**`);
// Get first few paragraphs or up to 1000 chars
const readmeExcerpt = analysis.readme.substring(0, 1000);
parts.push(readmeExcerpt);
if (analysis.readme.length > 1000) {
parts.push('...(truncated)');
}
}
return parts.join('\n');
}

74
lib/server/knowledge.ts Normal file
View File

@@ -0,0 +1,74 @@
import { getAdminDb } from '@/lib/firebase/admin';
import { FieldValue } from 'firebase-admin/firestore';
import type {
KnowledgeItem,
KnowledgeSourceMeta,
KnowledgeSourceType,
} from '@/lib/types/knowledge';
const COLLECTION = 'knowledge_items';
interface CreateKnowledgeItemInput {
projectId: string;
sourceType: KnowledgeSourceType;
title?: string | null;
content: string;
sourceMeta?: KnowledgeSourceMeta;
}
export async function createKnowledgeItem(
input: CreateKnowledgeItemInput,
): Promise<KnowledgeItem> {
const adminDb = getAdminDb();
const docRef = adminDb.collection(COLLECTION).doc();
const payload = {
id: docRef.id,
projectId: input.projectId,
sourceType: input.sourceType,
title: input.title ?? null,
content: input.content,
sourceMeta: input.sourceMeta ?? null,
createdAt: FieldValue.serverTimestamp(),
updatedAt: FieldValue.serverTimestamp(),
};
await docRef.set(payload);
const snapshot = await docRef.get();
return snapshot.data() as KnowledgeItem;
}
export async function getKnowledgeItem(
projectId: string,
knowledgeItemId: string,
): Promise<KnowledgeItem | null> {
const adminDb = getAdminDb();
const docRef = adminDb.collection(COLLECTION).doc(knowledgeItemId);
const snapshot = await docRef.get();
if (!snapshot.exists) {
return null;
}
const data = snapshot.data() as KnowledgeItem;
if (data.projectId !== projectId) {
return null;
}
return data;
}
export async function listKnowledgeItems(
projectId: string,
): Promise<KnowledgeItem[]> {
const adminDb = getAdminDb();
const querySnapshot = await adminDb
.collection(COLLECTION)
.where('projectId', '==', projectId)
.orderBy('createdAt', 'desc')
.get();
return querySnapshot.docs.map((doc) => doc.data() as KnowledgeItem);
}

232
lib/server/logs.ts Normal file
View File

@@ -0,0 +1,232 @@
/**
* Server-side logging utilities
*
* Logs project events to Firestore for monitoring, debugging, and analytics.
*/
import { getAdminDb } from '@/lib/firebase/admin';
import { FieldValue } from 'firebase-admin/firestore';
import type { CreateProjectLogInput, ProjectLogEntry, ProjectLogFilters, ProjectLogStats } from '@/lib/types/logs';
/**
* Log a project-related event
*
* This is a fire-and-forget operation - errors are logged but not thrown
* to avoid impacting the main request flow.
*
* @param input - Log entry data
*
* @example
* ```typescript
* await logProjectEvent({
* projectId: 'proj123',
* userId: 'user456',
* eventType: 'chat_interaction',
* mode: 'vision_mode',
* phase: 'vision_ready',
* artifactsUsed: ['Product Model', '5 Vector Chunks'],
* usedVectorSearch: true,
* vectorChunkCount: 5,
* promptVersion: '1.0',
* modelUsed: 'gemini-2.0-flash-exp',
* success: true,
* errorMessage: null,
* });
* ```
*/
export async function logProjectEvent(input: CreateProjectLogInput): Promise<void> {
try {
const adminDb = getAdminDb();
const docRef = adminDb.collection('project_logs').doc();
await docRef.set({
...input,
id: docRef.id,
createdAt: FieldValue.serverTimestamp(),
});
// Silent success
} catch (error) {
// Log to console but don't throw - logging should never break the main flow
console.error('[Logs] Failed to log project event:', error);
}
}
/**
* Query project logs with filters
*
* @param filters - Query filters
* @returns Array of log entries
*/
export async function queryProjectLogs(
filters: ProjectLogFilters
): Promise<ProjectLogEntry[]> {
try {
const adminDb = getAdminDb();
let query = adminDb.collection('project_logs').orderBy('createdAt', 'desc');
// Apply filters
if (filters.projectId) {
query = query.where('projectId', '==', filters.projectId) as any;
}
if (filters.userId) {
query = query.where('userId', '==', filters.userId) as any;
}
if (filters.eventType) {
query = query.where('eventType', '==', filters.eventType) as any;
}
if (filters.mode) {
query = query.where('mode', '==', filters.mode) as any;
}
if (filters.phase) {
query = query.where('phase', '==', filters.phase) as any;
}
if (filters.success !== undefined) {
query = query.where('success', '==', filters.success) as any;
}
if (filters.startDate) {
query = query.where('createdAt', '>=', filters.startDate) as any;
}
if (filters.endDate) {
query = query.where('createdAt', '<=', filters.endDate) as any;
}
if (filters.limit) {
query = query.limit(filters.limit) as any;
}
const snapshot = await query.get();
return snapshot.docs.map((doc) => {
const data = doc.data();
return {
...data,
createdAt: data.createdAt?.toDate?.() ?? data.createdAt,
} as ProjectLogEntry;
});
} catch (error) {
console.error('[Logs] Failed to query project logs:', error);
return [];
}
}
/**
* Get aggregated stats for a project
*
* @param projectId - Project ID to analyze
* @param since - Optional date to filter from
* @returns Aggregated statistics
*/
export async function getProjectLogStats(
projectId: string,
since?: Date
): Promise<ProjectLogStats> {
try {
const filters: ProjectLogFilters = { projectId, limit: 1000 };
if (since) {
filters.startDate = since;
}
const logs = await queryProjectLogs(filters);
const stats: ProjectLogStats = {
totalLogs: logs.length,
successCount: 0,
errorCount: 0,
byEventType: {},
byMode: {},
avgVectorChunks: 0,
vectorSearchUsageRate: 0,
};
let totalVectorChunks = 0;
let vectorSearchCount = 0;
logs.forEach((log) => {
// Success/error counts
if (log.success) {
stats.successCount++;
} else {
stats.errorCount++;
}
// By event type
stats.byEventType[log.eventType] = (stats.byEventType[log.eventType] ?? 0) + 1;
// By mode
if (log.mode) {
stats.byMode[log.mode] = (stats.byMode[log.mode] ?? 0) + 1;
}
// Vector search stats
if (log.usedVectorSearch) {
vectorSearchCount++;
if (log.vectorChunkCount) {
totalVectorChunks += log.vectorChunkCount;
}
}
});
// Calculate averages
if (vectorSearchCount > 0) {
stats.avgVectorChunks = totalVectorChunks / vectorSearchCount;
stats.vectorSearchUsageRate = vectorSearchCount / logs.length;
}
return stats;
} catch (error) {
console.error('[Logs] Failed to get project log stats:', error);
return {
totalLogs: 0,
successCount: 0,
errorCount: 0,
byEventType: {},
byMode: {},
avgVectorChunks: 0,
vectorSearchUsageRate: 0,
};
}
}
/**
* Delete old logs (for maintenance/cleanup)
*
* @param before - Delete logs older than this date
* @returns Number of logs deleted
*/
export async function deleteOldLogs(before: Date): Promise<number> {
try {
const adminDb = getAdminDb();
const snapshot = await adminDb
.collection('project_logs')
.where('createdAt', '<', before)
.limit(500) // Process in batches to avoid overwhelming Firestore
.get();
if (snapshot.empty) {
return 0;
}
const batch = adminDb.batch();
snapshot.docs.forEach((doc) => {
batch.delete(doc.ref);
});
await batch.commit();
console.log(`[Logs] Deleted ${snapshot.size} old logs`);
return snapshot.size;
} catch (error) {
console.error('[Logs] Failed to delete old logs:', error);
return 0;
}
}

102
lib/server/product-model.ts Normal file
View File

@@ -0,0 +1,102 @@
import { listChatExtractions } from '@/lib/server/chat-extraction';
import { clamp, nowIso, persistPhaseArtifacts, uniqueStrings, toStage } from '@/lib/server/projects';
import type { CanonicalProductModel } from '@/lib/types/product-model';
import type { ChatExtractionRecord } from '@/lib/types/chat-extraction';
const average = (numbers: number[]) =>
numbers.length ? numbers.reduce((sum, value) => sum + value, 0) / numbers.length : 0;
export async function buildCanonicalProductModel(projectId: string): Promise<CanonicalProductModel> {
const extractions = await listChatExtractions(projectId);
if (!extractions.length) {
throw new Error('No chat extractions found for project');
}
const completionAvg = average(
extractions.map(
(record) =>
(record.data as any)?.summary_scores?.overall_completion ?? record.overallCompletion ?? 0,
),
);
const confidenceAvg = average(
extractions.map(
(record) =>
(record.data as any)?.summary_scores?.overall_confidence ?? record.overallConfidence ?? 0,
),
);
const canonical = mapExtractionToCanonical(
projectId,
pickHighestConfidence(extractions as any),
completionAvg,
confidenceAvg,
);
await persistPhaseArtifacts(projectId, (phaseData, phaseScores, phaseHistory) => {
phaseData.canonicalProductModel = canonical;
phaseScores.vision = {
overallCompletion: canonical.overallCompletion,
overallConfidence: canonical.overallConfidence,
updatedAt: nowIso(),
};
phaseHistory.push({ phase: 'vision', status: 'completed', timestamp: nowIso() });
return { phaseData, phaseScores, phaseHistory, nextPhase: 'vision_ready' };
});
return canonical;
}
function pickHighestConfidence(records: ChatExtractionRecord[]) {
return records.reduce((best, record) =>
record.overallConfidence > best.overallConfidence ? record : best,
);
}
function mapExtractionToCanonical(
projectId: string,
record: ChatExtractionRecord,
completionAvg: number,
confidenceAvg: number,
): CanonicalProductModel {
const data = record.data;
const coreFeatures = data.solution_and_features.core_features.map(
(feature) => feature.name || feature.description,
);
const niceToHaveFeatures = data.solution_and_features.nice_to_have_features.map(
(feature) => feature.name || feature.description,
);
return {
projectId,
workingTitle: data.project_summary.working_title ?? null,
oneLiner: data.project_summary.one_liner ?? null,
problem: data.product_vision.problem_statement.description ?? null,
targetUser: data.target_users.primary_segment.description ?? null,
desiredOutcome: data.product_vision.target_outcome.description ?? null,
coreSolution: data.solution_and_features.core_solution.description ?? null,
coreFeatures: uniqueStrings(coreFeatures),
niceToHaveFeatures: uniqueStrings(niceToHaveFeatures),
marketCategory: data.market_and_competition.market_category.description ?? null,
competitors: uniqueStrings(
data.market_and_competition.competitors.map((competitor) => competitor.name),
),
techStack: uniqueStrings(
data.tech_and_constraints.stack_mentions.map((item) => item.description),
),
constraints: uniqueStrings(
data.tech_and_constraints.constraints.map((constraint) => constraint.description),
),
currentStage: toStage(data.project_summary.stage),
shortTermGoals: uniqueStrings(
data.goals_and_success.short_term_goals.map((goal) => goal.description),
),
longTermGoals: uniqueStrings(
data.goals_and_success.long_term_goals.map((goal) => goal.description),
),
overallCompletion: clamp(completionAvg),
overallConfidence: clamp(confidenceAvg),
};
}

View File

@@ -0,0 +1,2 @@
export {};

64
lib/server/projects.ts Normal file
View File

@@ -0,0 +1,64 @@
import { FieldValue } from 'firebase-admin/firestore';
import { getAdminDb } from '@/lib/firebase/admin';
import type {
ProjectPhase,
ProjectPhaseData,
ProjectPhaseScores,
ProjectStage,
} from '@/lib/types/project-artifacts';
export const clamp = (value: number) => Math.max(0, Math.min(1, value));
export const nowIso = () => new Date().toISOString();
export function uniqueStrings(values: Array<string | null | undefined>): string[] {
return Array.from(new Set(values.filter((value): value is string => Boolean(value))));
}
export function toStage(stage?: string | null): ProjectStage {
const allowed: ProjectStage[] = ['idea', 'prototype', 'mvp_in_progress', 'live_beta', 'live_paid', 'unknown'];
if (!stage) return 'unknown';
return allowed.includes(stage as ProjectStage) ? (stage as ProjectStage) : 'unknown';
}
export async function loadPhaseContainers(projectId: string) {
const adminDb = getAdminDb();
const projectRef = adminDb.collection('projects').doc(projectId);
const snapshot = await projectRef.get();
const doc = snapshot.data() || {};
const phaseData = (doc.phaseData ?? {}) as ProjectPhaseData;
const phaseScores = (doc.phaseScores ?? {}) as ProjectPhaseScores;
const phaseHistory = Array.isArray(doc.phaseHistory) ? [...doc.phaseHistory] : [];
return { projectRef, phaseData, phaseScores, phaseHistory };
}
interface PersistencePayload {
phaseData: ProjectPhaseData;
phaseScores: ProjectPhaseScores;
phaseHistory: Array<Record<string, unknown>>;
nextPhase?: ProjectPhase;
}
export async function persistPhaseArtifacts(
projectId: string,
builder: (
phaseData: ProjectPhaseData,
phaseScores: ProjectPhaseScores,
phaseHistory: Array<Record<string, unknown>>,
) => PersistencePayload,
) {
const { projectRef, phaseData, phaseScores, phaseHistory } = await loadPhaseContainers(projectId);
const payload = builder(phaseData, phaseScores, phaseHistory);
await projectRef.set(
{
phaseData: payload.phaseData,
phaseScores: payload.phaseScores,
phaseHistory: payload.phaseHistory,
...(payload.nextPhase ? { currentPhase: payload.nextPhase, phaseStatus: 'completed' as const } : {}),
updatedAt: FieldValue.serverTimestamp(),
},
{ merge: true },
);
}

453
lib/server/vector-memory.ts Normal file
View File

@@ -0,0 +1,453 @@
/**
* Server-side helpers for AlloyDB vector memory operations
*
* Handles CRUD operations on knowledge_chunks and semantic search.
*/
import { getAlloyDbClient, executeQuery, getPooledClient } from '@/lib/db/alloydb';
import type {
KnowledgeChunk,
KnowledgeChunkRow,
KnowledgeChunkSearchResult,
VectorSearchOptions,
CreateKnowledgeChunkInput,
BatchCreateKnowledgeChunksInput,
} from '@/lib/types/vector-memory';
/**
* Convert database row (snake_case) to TypeScript object (camelCase)
*/
function rowToKnowledgeChunk(row: KnowledgeChunkRow): KnowledgeChunk {
return {
id: row.id,
projectId: row.project_id,
knowledgeItemId: row.knowledge_item_id,
chunkIndex: row.chunk_index,
content: row.content,
sourceType: row.source_type,
importance: row.importance,
createdAt: row.created_at,
updatedAt: row.updated_at,
};
}
/**
* Retrieve relevant knowledge chunks using vector similarity search
*
* @param projectId - Firestore project ID to filter by
* @param queryEmbedding - Vector embedding of the query (e.g., user's question)
* @param options - Search options (limit, filters, etc.)
* @returns Array of chunks ordered by similarity (most relevant first)
*
* @example
* ```typescript
* const embedding = await embedText("What's the MVP scope?");
* const chunks = await retrieveRelevantChunks('proj123', embedding, { limit: 10, minSimilarity: 0.7 });
* ```
*/
export async function retrieveRelevantChunks(
projectId: string,
queryEmbedding: number[],
options: VectorSearchOptions = {}
): Promise<KnowledgeChunkSearchResult[]> {
const {
limit = 10,
minSimilarity,
sourceTypes,
importanceLevels,
} = options;
try {
// Build the query with optional filters
let queryText = `
SELECT
id,
project_id,
knowledge_item_id,
chunk_index,
content,
source_type,
importance,
created_at,
updated_at,
1 - (embedding <=> $1::vector) AS similarity
FROM knowledge_chunks
WHERE project_id = $2
`;
const params: any[] = [JSON.stringify(queryEmbedding), projectId];
let paramIndex = 3;
// Filter by source types
if (sourceTypes && sourceTypes.length > 0) {
queryText += ` AND source_type = ANY($${paramIndex})`;
params.push(sourceTypes);
paramIndex++;
}
// Filter by importance levels
if (importanceLevels && importanceLevels.length > 0) {
queryText += ` AND importance = ANY($${paramIndex})`;
params.push(importanceLevels);
paramIndex++;
}
// Filter by minimum similarity
if (minSimilarity !== undefined) {
queryText += ` AND (1 - (embedding <=> $1::vector)) >= $${paramIndex}`;
params.push(minSimilarity);
paramIndex++;
}
// Order by similarity and limit
queryText += ` ORDER BY embedding <=> $1::vector LIMIT $${paramIndex}`;
params.push(limit);
const result = await executeQuery<KnowledgeChunkRow & { similarity: number }>(
queryText,
params
);
return result.rows.map((row) => ({
...rowToKnowledgeChunk(row),
similarity: row.similarity,
}));
} catch (error) {
console.error('[Vector Memory] Failed to retrieve relevant chunks:', error);
throw new Error(
`Failed to retrieve chunks: ${error instanceof Error ? error.message : String(error)}`
);
}
}
/**
* Create a single knowledge chunk
*/
export async function createKnowledgeChunk(
input: CreateKnowledgeChunkInput
): Promise<KnowledgeChunk> {
const {
projectId,
knowledgeItemId,
chunkIndex,
content,
embedding,
sourceType = null,
importance = null,
} = input;
try {
const queryText = `
INSERT INTO knowledge_chunks (
project_id,
knowledge_item_id,
chunk_index,
content,
embedding,
source_type,
importance
)
VALUES ($1, $2, $3, $4, $5::vector, $6, $7)
RETURNING
id,
project_id,
knowledge_item_id,
chunk_index,
content,
source_type,
importance,
created_at,
updated_at
`;
const result = await executeQuery<KnowledgeChunkRow>(queryText, [
projectId,
knowledgeItemId,
chunkIndex,
content,
JSON.stringify(embedding),
sourceType,
importance,
]);
if (result.rows.length === 0) {
throw new Error('Failed to insert knowledge chunk');
}
return rowToKnowledgeChunk(result.rows[0]);
} catch (error) {
console.error('[Vector Memory] Failed to create knowledge chunk:', error);
throw new Error(
`Failed to create chunk: ${error instanceof Error ? error.message : String(error)}`
);
}
}
/**
* Batch create multiple knowledge chunks efficiently
*
* Uses a transaction to ensure atomicity.
*/
export async function batchCreateKnowledgeChunks(
input: BatchCreateKnowledgeChunksInput
): Promise<KnowledgeChunk[]> {
const { projectId, knowledgeItemId, chunks } = input;
if (chunks.length === 0) {
return [];
}
const client = await getPooledClient();
try {
await client.query('BEGIN');
const createdChunks: KnowledgeChunk[] = [];
for (const chunk of chunks) {
const queryText = `
INSERT INTO knowledge_chunks (
project_id,
knowledge_item_id,
chunk_index,
content,
embedding,
source_type,
importance
)
VALUES ($1, $2, $3, $4, $5::vector, $6, $7)
RETURNING
id,
project_id,
knowledge_item_id,
chunk_index,
content,
source_type,
importance,
created_at,
updated_at
`;
const result = await client.query<KnowledgeChunkRow>(queryText, [
projectId,
knowledgeItemId,
chunk.chunkIndex,
chunk.content,
JSON.stringify(chunk.embedding),
chunk.sourceType ?? null,
chunk.importance ?? null,
]);
if (result.rows.length > 0) {
createdChunks.push(rowToKnowledgeChunk(result.rows[0]));
}
}
await client.query('COMMIT');
console.log(
`[Vector Memory] Batch created ${createdChunks.length} chunks for knowledge_item ${knowledgeItemId}`
);
return createdChunks;
} catch (error) {
await client.query('ROLLBACK');
console.error('[Vector Memory] Failed to batch create chunks:', error);
throw new Error(
`Failed to batch create chunks: ${error instanceof Error ? error.message : String(error)}`
);
} finally {
client.release();
}
}
/**
* Delete all chunks for a specific knowledge_item
*
* Used when regenerating chunks or removing a knowledge_item.
*/
export async function deleteChunksForKnowledgeItem(
knowledgeItemId: string
): Promise<number> {
try {
const queryText = `
DELETE FROM knowledge_chunks
WHERE knowledge_item_id = $1
RETURNING id
`;
const result = await executeQuery(queryText, [knowledgeItemId]);
console.log(
`[Vector Memory] Deleted ${result.rowCount ?? 0} chunks for knowledge_item ${knowledgeItemId}`
);
return result.rowCount ?? 0;
} catch (error) {
console.error('[Vector Memory] Failed to delete chunks:', error);
throw new Error(
`Failed to delete chunks: ${error instanceof Error ? error.message : String(error)}`
);
}
}
/**
* Delete all chunks for a specific project
*
* Used when cleaning up or resetting a project.
*/
export async function deleteChunksForProject(projectId: string): Promise<number> {
try {
const queryText = `
DELETE FROM knowledge_chunks
WHERE project_id = $1
RETURNING id
`;
const result = await executeQuery(queryText, [projectId]);
console.log(
`[Vector Memory] Deleted ${result.rowCount ?? 0} chunks for project ${projectId}`
);
return result.rowCount ?? 0;
} catch (error) {
console.error('[Vector Memory] Failed to delete project chunks:', error);
throw new Error(
`Failed to delete project chunks: ${error instanceof Error ? error.message : String(error)}`
);
}
}
/**
* Get chunk count for a knowledge_item
*/
export async function getChunkCountForKnowledgeItem(
knowledgeItemId: string
): Promise<number> {
try {
const result = await executeQuery<{ count: string }>(
'SELECT COUNT(*) as count FROM knowledge_chunks WHERE knowledge_item_id = $1',
[knowledgeItemId]
);
return parseInt(result.rows[0]?.count ?? '0', 10);
} catch (error) {
console.error('[Vector Memory] Failed to get chunk count:', error);
return 0;
}
}
/**
* Get chunk count for a project
*/
export async function getChunkCountForProject(projectId: string): Promise<number> {
try {
const result = await executeQuery<{ count: string }>(
'SELECT COUNT(*) as count FROM knowledge_chunks WHERE project_id = $1',
[projectId]
);
return parseInt(result.rows[0]?.count ?? '0', 10);
} catch (error) {
console.error('[Vector Memory] Failed to get project chunk count:', error);
return 0;
}
}
/**
* Regenerate knowledge_chunks for a single knowledge_item
*
* This is the main pipeline that:
* 1. Chunks the knowledge_item.content
* 2. Generates embeddings for each chunk
* 3. Deletes existing chunks for this item
* 4. Inserts new chunks into AlloyDB
*
* @param knowledgeItem - The knowledge item to process
*
* @example
* ```typescript
* const knowledgeItem = await getKnowledgeItem(projectId, itemId);
* await writeKnowledgeChunksForItem(knowledgeItem);
* ```
*/
export async function writeKnowledgeChunksForItem(
knowledgeItem: {
id: string;
projectId: string;
content: string;
sourceMeta?: { sourceType?: string; importance?: 'primary' | 'supporting' | 'irrelevant' };
}
): Promise<void> {
const { chunkText } = await import('@/lib/ai/chunking');
const { embedTextBatch } = await import('@/lib/ai/embeddings');
try {
console.log(
`[Vector Memory] Starting chunking pipeline for knowledge_item ${knowledgeItem.id}`
);
// Step 1: Chunk the content
const textChunks = chunkText(knowledgeItem.content, {
maxTokens: 800,
overlapChars: 200,
preserveParagraphs: true,
});
if (textChunks.length === 0) {
console.warn(
`[Vector Memory] No chunks generated for knowledge_item ${knowledgeItem.id} - content may be empty`
);
return;
}
console.log(
`[Vector Memory] Generated ${textChunks.length} chunks for knowledge_item ${knowledgeItem.id}`
);
// Step 2: Generate embeddings for all chunks
const chunkTexts = textChunks.map((chunk) => chunk.text);
const embeddings = await embedTextBatch(chunkTexts, {
delayMs: 50, // Small delay to avoid rate limiting
skipEmpty: true,
});
if (embeddings.length !== textChunks.length) {
throw new Error(
`Embedding count mismatch: got ${embeddings.length}, expected ${textChunks.length}`
);
}
// Step 3: Delete existing chunks for this knowledge_item
await deleteChunksForKnowledgeItem(knowledgeItem.id);
// Step 4: Insert new chunks
const chunksToInsert = textChunks.map((chunk, index) => ({
chunkIndex: chunk.index,
content: chunk.text,
embedding: embeddings[index],
sourceType: knowledgeItem.sourceMeta?.sourceType ?? null,
importance: knowledgeItem.sourceMeta?.importance ?? null,
}));
await batchCreateKnowledgeChunks({
projectId: knowledgeItem.projectId,
knowledgeItemId: knowledgeItem.id,
chunks: chunksToInsert,
});
console.log(
`[Vector Memory] Successfully processed ${chunksToInsert.length} chunks for knowledge_item ${knowledgeItem.id}`
);
} catch (error) {
console.error(
`[Vector Memory] Failed to write chunks for knowledge_item ${knowledgeItem.id}:`,
error
);
throw new Error(
`Failed to write chunks: ${error instanceof Error ? error.message : String(error)}`
);
}
}