VIBN Frontend for Coolify deployment
This commit is contained in:
228
lib/server/backend-extractor.ts
Normal file
228
lib/server/backend-extractor.ts
Normal file
@@ -0,0 +1,228 @@
|
||||
/**
|
||||
* Backend Extraction Module
|
||||
*
|
||||
* Runs extraction as a pure backend job, not in chat.
|
||||
* Called when Collector phase completes.
|
||||
*/
|
||||
|
||||
import { getAdminDb } from '@/lib/firebase/admin';
|
||||
import { GeminiLlmClient } from '@/lib/ai/gemini-client';
|
||||
import { BACKEND_EXTRACTOR_SYSTEM_PROMPT } from '@/lib/ai/prompts/extractor';
|
||||
import { writeKnowledgeChunksForItem } from '@/lib/server/vector-memory';
|
||||
import type { ExtractionOutput, ExtractedInsight } from '@/lib/types/extraction-output';
|
||||
import type { PhaseHandoff } from '@/lib/types/phase-handoff';
|
||||
import { z } from 'zod';
|
||||
|
||||
const ExtractionOutputSchema = z.object({
|
||||
insights: z.array(z.object({
|
||||
id: z.string(),
|
||||
type: z.enum(["problem", "user", "feature", "constraint", "opportunity", "other"]),
|
||||
title: z.string(),
|
||||
description: z.string(),
|
||||
sourceText: z.string(),
|
||||
sourceKnowledgeItemId: z.string(),
|
||||
importance: z.enum(["primary", "supporting"]),
|
||||
confidence: z.number().min(0).max(1),
|
||||
})),
|
||||
problems: z.array(z.string()),
|
||||
targetUsers: z.array(z.string()),
|
||||
features: z.array(z.string()),
|
||||
constraints: z.array(z.string()),
|
||||
opportunities: z.array(z.string()),
|
||||
uncertainties: z.array(z.string()),
|
||||
missingInformation: z.array(z.string()),
|
||||
overallConfidence: z.number().min(0).max(1),
|
||||
});
|
||||
|
||||
export async function runBackendExtractionForProject(projectId: string): Promise<void> {
|
||||
console.log(`[Backend Extractor] Starting extraction for project ${projectId}`);
|
||||
|
||||
const adminDb = getAdminDb();
|
||||
|
||||
try {
|
||||
// 1. Load project
|
||||
const projectDoc = await adminDb.collection('projects').doc(projectId).get();
|
||||
if (!projectDoc.exists) {
|
||||
throw new Error(`Project ${projectId} not found`);
|
||||
}
|
||||
|
||||
const projectData = projectDoc.data();
|
||||
|
||||
// 2. Load knowledge items
|
||||
const knowledgeSnapshot = await adminDb
|
||||
.collection('knowledge_items')
|
||||
.where('projectId', '==', projectId)
|
||||
.where('sourceType', '==', 'imported_document')
|
||||
.get();
|
||||
|
||||
if (knowledgeSnapshot.empty) {
|
||||
console.log(`[Backend Extractor] No documents to extract for project ${projectId} - creating empty handoff`);
|
||||
|
||||
// Create a minimal extraction handoff even with no documents
|
||||
const emptyHandoff: PhaseHandoff = {
|
||||
phase: 'extraction',
|
||||
readyForNextPhase: false, // Not ready - no materials to extract from
|
||||
confidence: 0,
|
||||
confirmed: {
|
||||
problems: [],
|
||||
targetUsers: [],
|
||||
features: [],
|
||||
constraints: [],
|
||||
opportunities: [],
|
||||
},
|
||||
uncertain: {},
|
||||
missing: ['No documents uploaded - need product requirements, specs, or notes'],
|
||||
questionsForUser: [
|
||||
'You haven\'t uploaded any documents yet. Do you have any product specs, requirements, or notes to share?',
|
||||
],
|
||||
sourceEvidence: [],
|
||||
version: 'extraction_v1',
|
||||
timestamp: new Date().toISOString(),
|
||||
};
|
||||
|
||||
await adminDb.collection('projects').doc(projectId).update({
|
||||
'phaseData.phaseHandoffs.extraction': emptyHandoff,
|
||||
currentPhase: 'extraction_review',
|
||||
phaseStatus: 'in_progress',
|
||||
'phaseData.extractionCompletedAt': new Date().toISOString(),
|
||||
updatedAt: new Date().toISOString(),
|
||||
});
|
||||
|
||||
console.log(`[Backend Extractor] Set phase to extraction_review with empty handoff`);
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`[Backend Extractor] Found ${knowledgeSnapshot.size} documents to process`);
|
||||
|
||||
const llm = new GeminiLlmClient();
|
||||
const allExtractionOutputs: ExtractionOutput[] = [];
|
||||
const processedKnowledgeItemIds: string[] = [];
|
||||
|
||||
// 3. Process each document
|
||||
for (const knowledgeDoc of knowledgeSnapshot.docs) {
|
||||
const knowledgeData = knowledgeDoc.data();
|
||||
const knowledgeItemId = knowledgeDoc.id;
|
||||
|
||||
try {
|
||||
console.log(`[Backend Extractor] Processing document: ${knowledgeData.title || knowledgeItemId}`);
|
||||
|
||||
// Call LLM with structured extraction + thinking mode
|
||||
const extraction = await llm.structuredCall<ExtractionOutput>({
|
||||
model: 'gemini',
|
||||
systemPrompt: BACKEND_EXTRACTOR_SYSTEM_PROMPT,
|
||||
messages: [{
|
||||
role: 'user',
|
||||
content: `Document Title: ${knowledgeData.title || 'Untitled'}\nSource Type: ${knowledgeData.sourceType}\n\nContent:\n${knowledgeData.content}`,
|
||||
}],
|
||||
schema: ExtractionOutputSchema as any,
|
||||
temperature: 1.0, // Gemini 3 default (changed from 0.3)
|
||||
thinking_config: {
|
||||
thinking_level: 'high', // Enable deep reasoning for document analysis
|
||||
include_thoughts: false, // Don't include thought tokens in output (saves cost)
|
||||
},
|
||||
});
|
||||
|
||||
// Add knowledgeItemId to each insight
|
||||
extraction.insights.forEach(insight => {
|
||||
insight.sourceKnowledgeItemId = knowledgeItemId;
|
||||
});
|
||||
|
||||
allExtractionOutputs.push(extraction);
|
||||
processedKnowledgeItemIds.push(knowledgeItemId);
|
||||
|
||||
// 4. Persist extraction to chat_extractions
|
||||
await adminDb.collection('chat_extractions').add({
|
||||
projectId,
|
||||
knowledgeItemId,
|
||||
data: extraction,
|
||||
overallConfidence: extraction.overallConfidence,
|
||||
overallCompletion: extraction.overallConfidence > 0.7 ? 0.9 : 0.6,
|
||||
createdAt: new Date().toISOString(),
|
||||
updatedAt: new Date().toISOString(),
|
||||
});
|
||||
|
||||
console.log(`[Backend Extractor] Extracted ${extraction.insights.length} insights from ${knowledgeData.title || knowledgeItemId}`);
|
||||
|
||||
// 5. Write vector chunks for primary insights
|
||||
const primaryInsights = extraction.insights.filter(i => i.importance === 'primary');
|
||||
for (const insight of primaryInsights) {
|
||||
try {
|
||||
// Create a knowledge chunk for this insight
|
||||
await writeKnowledgeChunksForItem({
|
||||
id: knowledgeItemId,
|
||||
projectId,
|
||||
content: `${insight.title}\n\n${insight.description}\n\nSource: ${insight.sourceText}`,
|
||||
sourceMeta: {
|
||||
sourceType: 'extracted_insight',
|
||||
importance: 'primary',
|
||||
},
|
||||
});
|
||||
} catch (chunkError) {
|
||||
console.error(`[Backend Extractor] Failed to write chunk for insight ${insight.id}:`, chunkError);
|
||||
// Continue processing other insights
|
||||
}
|
||||
}
|
||||
|
||||
} catch (docError) {
|
||||
console.error(`[Backend Extractor] Failed to process document ${knowledgeItemId}:`, docError);
|
||||
// Continue with next document
|
||||
}
|
||||
}
|
||||
|
||||
// 6. Build extraction PhaseHandoff
|
||||
// Flatten all extracted items (they're already strings, not objects)
|
||||
const allProblems = [...new Set(allExtractionOutputs.flatMap(e => e.problems))];
|
||||
const allUsers = [...new Set(allExtractionOutputs.flatMap(e => e.targetUsers))];
|
||||
const allFeatures = [...new Set(allExtractionOutputs.flatMap(e => e.features))];
|
||||
const allConstraints = [...new Set(allExtractionOutputs.flatMap(e => e.constraints))];
|
||||
const allOpportunities = [...new Set(allExtractionOutputs.flatMap(e => e.opportunities))];
|
||||
const allUncertainties = [...new Set(allExtractionOutputs.flatMap(e => e.uncertainties))];
|
||||
const allMissing = [...new Set(allExtractionOutputs.flatMap(e => e.missingInformation))];
|
||||
|
||||
const avgConfidence = allExtractionOutputs.length > 0
|
||||
? allExtractionOutputs.reduce((sum, e) => sum + e.overallConfidence, 0) / allExtractionOutputs.length
|
||||
: 0;
|
||||
|
||||
const readyForNextPhase = allProblems.length > 0 && allFeatures.length > 0 && avgConfidence > 0.5;
|
||||
|
||||
const extractionHandoff: PhaseHandoff = {
|
||||
phase: 'extraction',
|
||||
readyForNextPhase,
|
||||
confidence: avgConfidence,
|
||||
confirmed: {
|
||||
problems: allProblems,
|
||||
targetUsers: allUsers,
|
||||
features: allFeatures,
|
||||
constraints: allConstraints,
|
||||
opportunities: allOpportunities,
|
||||
},
|
||||
uncertain: {},
|
||||
missing: allMissing,
|
||||
questionsForUser: allUncertainties,
|
||||
sourceEvidence: processedKnowledgeItemIds,
|
||||
version: 'extraction_v1',
|
||||
timestamp: new Date().toISOString(),
|
||||
};
|
||||
|
||||
// 7. Persist handoff and update phase
|
||||
await adminDb.collection('projects').doc(projectId).update({
|
||||
'phaseData.phaseHandoffs.extraction': extractionHandoff,
|
||||
currentPhase: 'extraction_review',
|
||||
phaseStatus: 'in_progress',
|
||||
'phaseData.extractionCompletedAt': new Date().toISOString(),
|
||||
updatedAt: new Date().toISOString(),
|
||||
});
|
||||
|
||||
console.log(`[Backend Extractor] ✅ Extraction complete for project ${projectId}`);
|
||||
console.log(`[Backend Extractor] - Problems: ${allProblems.length}`);
|
||||
console.log(`[Backend Extractor] - Users: ${allUsers.length}`);
|
||||
console.log(`[Backend Extractor] - Features: ${allFeatures.length}`);
|
||||
console.log(`[Backend Extractor] - Confidence: ${(avgConfidence * 100).toFixed(1)}%`);
|
||||
console.log(`[Backend Extractor] - Ready for next phase: ${readyForNextPhase}`);
|
||||
|
||||
} catch (error) {
|
||||
console.error(`[Backend Extractor] Fatal error during extraction:`, error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
402
lib/server/chat-context.ts
Normal file
402
lib/server/chat-context.ts
Normal file
@@ -0,0 +1,402 @@
|
||||
/**
|
||||
* Project Context Builder for Chat
|
||||
*
|
||||
* Loads project state from Firestore and AlloyDB vector memory,
|
||||
* building a compact context object for LLM consumption.
|
||||
*/
|
||||
|
||||
import { getAdminDb } from '@/lib/firebase/admin';
|
||||
import { retrieveRelevantChunks } from '@/lib/server/vector-memory';
|
||||
import { embedText } from '@/lib/ai/embeddings';
|
||||
import {
|
||||
summarizeKnowledgeItems,
|
||||
summarizeExtractions,
|
||||
} from '@/lib/server/chat-mode-resolver';
|
||||
import type { ChatMode } from '@/lib/ai/chat-modes';
|
||||
import type { ProjectPhaseData, ProjectPhaseScores } from '@/lib/types/project-artifacts';
|
||||
import type { PhaseHandoff } from '@/lib/types/phase-handoff';
|
||||
|
||||
/**
|
||||
* Compact project context for LLM
|
||||
*/
|
||||
export interface ProjectChatContext {
|
||||
/** Basic project info */
|
||||
project: {
|
||||
id: string;
|
||||
name: string;
|
||||
currentPhase: string;
|
||||
phaseStatus: string;
|
||||
githubRepo?: string | null;
|
||||
githubRepoUrl?: string | null;
|
||||
extensionLinked?: boolean;
|
||||
visionAnswers?: {
|
||||
q1?: string;
|
||||
q2?: string;
|
||||
q3?: string;
|
||||
updatedAt?: string;
|
||||
};
|
||||
};
|
||||
|
||||
/** Phase-specific artifacts */
|
||||
phaseData: {
|
||||
canonicalProductModel?: any;
|
||||
mvpPlan?: any;
|
||||
marketingPlan?: any;
|
||||
};
|
||||
|
||||
/** Phase scores and progress */
|
||||
phaseScores: ProjectPhaseScores;
|
||||
|
||||
/** Phase handoffs for smart transitions */
|
||||
phaseHandoffs: Partial<Record<'collector' | 'extraction' | 'vision' | 'mvp' | 'marketing', PhaseHandoff>>;
|
||||
|
||||
/** Knowledge summary (counts, types) */
|
||||
knowledgeSummary: {
|
||||
totalCount: number;
|
||||
bySourceType: Record<string, number>;
|
||||
recentTitles: string[];
|
||||
};
|
||||
|
||||
/** Extraction summary */
|
||||
extractionSummary: {
|
||||
totalCount: number;
|
||||
avgConfidence: number;
|
||||
avgCompletion: number;
|
||||
};
|
||||
|
||||
/** Relevant chunks from vector search */
|
||||
retrievedChunks: {
|
||||
content: string;
|
||||
sourceType?: string | null;
|
||||
importance?: string | null;
|
||||
similarity: number;
|
||||
}[];
|
||||
|
||||
/** Repository analysis (if GitHub connected) */
|
||||
repositoryAnalysis?: {
|
||||
repoFullName: string;
|
||||
totalFiles: number;
|
||||
directories: string[];
|
||||
keyFiles: string[];
|
||||
techStack: string[];
|
||||
readme: string | null;
|
||||
summary: string;
|
||||
} | null;
|
||||
|
||||
/** Session history from linked Cursor sessions */
|
||||
sessionHistory: {
|
||||
totalSessions: number;
|
||||
messages: Array<{
|
||||
role: string;
|
||||
content: string;
|
||||
timestamp: string;
|
||||
sessionId?: string;
|
||||
}>;
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Build project context for a chat interaction
|
||||
*
|
||||
* @param projectId - Firestore project ID
|
||||
* @param mode - Current chat mode
|
||||
* @param userMessage - User's message (used for vector retrieval)
|
||||
* @param options - Context building options
|
||||
* @returns Compact context object
|
||||
*/
|
||||
export async function buildProjectContextForChat(
|
||||
projectId: string,
|
||||
mode: ChatMode,
|
||||
userMessage: string,
|
||||
options: {
|
||||
retrievalLimit?: number;
|
||||
includeVectorSearch?: boolean;
|
||||
includeGitHubAnalysis?: boolean;
|
||||
} = {}
|
||||
): Promise<ProjectChatContext> {
|
||||
const {
|
||||
retrievalLimit = 10,
|
||||
includeVectorSearch = true,
|
||||
includeGitHubAnalysis = true,
|
||||
} = options;
|
||||
|
||||
try {
|
||||
const adminDb = getAdminDb();
|
||||
|
||||
// Load project document
|
||||
const projectSnapshot = await adminDb.collection('projects').doc(projectId).get();
|
||||
if (!projectSnapshot.exists) {
|
||||
throw new Error(`Project ${projectId} not found`);
|
||||
}
|
||||
|
||||
const projectData = projectSnapshot.data() ?? {};
|
||||
|
||||
// Load summaries in parallel
|
||||
const [knowledgeSummary, extractionSummary] = await Promise.all([
|
||||
summarizeKnowledgeItems(projectId),
|
||||
summarizeExtractions(projectId),
|
||||
]);
|
||||
|
||||
// Vector retrieval
|
||||
let retrievedChunks: ProjectChatContext['retrievedChunks'] = [];
|
||||
|
||||
// extraction_review_mode does NOT load documents - it reviews extraction results
|
||||
// Normal vector search for modes that need it
|
||||
if (includeVectorSearch && mode !== 'collector_mode' && mode !== 'extraction_review_mode' && userMessage.trim().length > 0) {
|
||||
try {
|
||||
const queryEmbedding = await embedText(userMessage);
|
||||
const chunks = await retrieveRelevantChunks(projectId, queryEmbedding, {
|
||||
limit: retrievalLimit,
|
||||
minSimilarity: 0.7, // Only include reasonably relevant chunks
|
||||
});
|
||||
|
||||
retrievedChunks = chunks.map((chunk) => ({
|
||||
content: chunk.content,
|
||||
sourceType: chunk.sourceType,
|
||||
importance: chunk.importance,
|
||||
similarity: chunk.similarity,
|
||||
}));
|
||||
|
||||
console.log(
|
||||
`[Chat Context] Retrieved ${retrievedChunks.length} chunks for project ${projectId}`
|
||||
);
|
||||
} catch (vectorError) {
|
||||
console.error('[Chat Context] Vector retrieval failed:', vectorError);
|
||||
// Continue without vector results
|
||||
}
|
||||
}
|
||||
|
||||
// GitHub repository analysis
|
||||
let repositoryAnalysis = null;
|
||||
if (includeGitHubAnalysis && projectData.githubRepo && projectData.userId) {
|
||||
try {
|
||||
const { analyzeGitHubRepository } = await import('@/lib/server/github-analyzer');
|
||||
repositoryAnalysis = await analyzeGitHubRepository(
|
||||
projectData.userId,
|
||||
projectData.githubRepo,
|
||||
projectData.githubDefaultBranch || 'main'
|
||||
);
|
||||
} catch (githubError) {
|
||||
console.error('[Chat Context] GitHub analysis failed:', githubError);
|
||||
// Continue without GitHub analysis
|
||||
}
|
||||
}
|
||||
|
||||
// Fetch linked Cursor session history
|
||||
let sessionHistory = {
|
||||
totalSessions: 0,
|
||||
messages: [] as Array<{
|
||||
role: string;
|
||||
content: string;
|
||||
timestamp: string;
|
||||
sessionId?: string;
|
||||
}>,
|
||||
};
|
||||
|
||||
try {
|
||||
// Query sessions linked to this project
|
||||
const sessionsSnapshot = await adminDb
|
||||
.collection('sessions')
|
||||
.where('projectId', '==', projectId)
|
||||
.orderBy('startTime', 'asc')
|
||||
.get();
|
||||
|
||||
if (!sessionsSnapshot.empty) {
|
||||
sessionHistory.totalSessions = sessionsSnapshot.size;
|
||||
|
||||
// Extract all messages from all sessions in chronological order
|
||||
const allMessages: Array<{
|
||||
role: string;
|
||||
content: string;
|
||||
timestamp: string;
|
||||
sessionId: string;
|
||||
}> = [];
|
||||
|
||||
for (const sessionDoc of sessionsSnapshot.docs) {
|
||||
const sessionData = sessionDoc.data();
|
||||
const conversation = sessionData.conversation || [];
|
||||
|
||||
// Add messages from this session
|
||||
for (const msg of conversation) {
|
||||
if (msg.content && msg.content.trim()) {
|
||||
allMessages.push({
|
||||
role: msg.role || 'unknown',
|
||||
content: msg.content,
|
||||
timestamp: msg.timestamp instanceof Date
|
||||
? msg.timestamp.toISOString()
|
||||
: (typeof msg.timestamp === 'string' ? msg.timestamp : new Date().toISOString()),
|
||||
sessionId: sessionDoc.id,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sort all messages by timestamp (chronological order)
|
||||
allMessages.sort((a, b) =>
|
||||
new Date(a.timestamp).getTime() - new Date(b.timestamp).getTime()
|
||||
);
|
||||
|
||||
sessionHistory.messages = allMessages;
|
||||
|
||||
console.log(
|
||||
`[Chat Context] Loaded ${sessionHistory.totalSessions} sessions with ${allMessages.length} total messages for project ${projectId}`
|
||||
);
|
||||
} else {
|
||||
console.log(`[Chat Context] No linked sessions found for project ${projectId}`);
|
||||
}
|
||||
} catch (sessionError) {
|
||||
console.error('[Chat Context] Session history fetch failed:', sessionError);
|
||||
// Continue without session history
|
||||
}
|
||||
|
||||
// Build context object
|
||||
const context: ProjectChatContext = {
|
||||
project: {
|
||||
id: projectId,
|
||||
name: projectData.name ?? 'Unnamed Project',
|
||||
currentPhase: projectData.currentPhase ?? 'collector',
|
||||
phaseStatus: projectData.phaseStatus ?? 'not_started',
|
||||
githubRepo: projectData.githubRepo ?? null,
|
||||
githubRepoUrl: projectData.githubRepoUrl ?? null,
|
||||
extensionLinked: projectData.extensionLinked ?? false,
|
||||
visionAnswers: projectData.visionAnswers ?? {},
|
||||
},
|
||||
phaseData: {
|
||||
canonicalProductModel: projectData.phaseData?.canonicalProductModel ?? null,
|
||||
mvpPlan: projectData.phaseData?.mvpPlan ?? null,
|
||||
marketingPlan: projectData.phaseData?.marketingPlan ?? null,
|
||||
},
|
||||
phaseScores: projectData.phaseScores ?? {},
|
||||
phaseHandoffs: projectData.phaseData?.phaseHandoffs ?? {},
|
||||
knowledgeSummary,
|
||||
extractionSummary,
|
||||
retrievedChunks,
|
||||
repositoryAnalysis: repositoryAnalysis as any,
|
||||
sessionHistory, // ✅ Include session history in context
|
||||
};
|
||||
|
||||
return context;
|
||||
} catch (error) {
|
||||
console.error('[Chat Context] Failed to build context:', error);
|
||||
throw new Error(
|
||||
`Failed to build chat context: ${error instanceof Error ? error.message : String(error)}`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine which artifacts were used in building the context
|
||||
*
|
||||
* This helps the UI show what sources the AI is drawing from.
|
||||
*/
|
||||
export function determineArtifactsUsed(context: ProjectChatContext): string[] {
|
||||
const artifacts: string[] = [];
|
||||
|
||||
if (context.phaseData.canonicalProductModel) {
|
||||
artifacts.push('Product Model');
|
||||
}
|
||||
|
||||
if (context.phaseData.mvpPlan) {
|
||||
artifacts.push('MVP Plan');
|
||||
}
|
||||
|
||||
if (context.phaseData.marketingPlan) {
|
||||
artifacts.push('Marketing Plan');
|
||||
}
|
||||
|
||||
if (context.retrievedChunks.length > 0) {
|
||||
artifacts.push(`${context.retrievedChunks.length} Vector Chunks`);
|
||||
}
|
||||
|
||||
if (context.repositoryAnalysis) {
|
||||
artifacts.push('GitHub Repo Analysis');
|
||||
}
|
||||
|
||||
if (context.knowledgeSummary.totalCount > 0) {
|
||||
artifacts.push(`${context.knowledgeSummary.totalCount} Knowledge Items`);
|
||||
}
|
||||
|
||||
if (context.extractionSummary.totalCount > 0) {
|
||||
artifacts.push(`${context.extractionSummary.totalCount} Extractions`);
|
||||
}
|
||||
|
||||
if (context.sessionHistory.totalSessions > 0) {
|
||||
artifacts.push(`${context.sessionHistory.totalSessions} Cursor Sessions (${context.sessionHistory.messages.length} messages)`);
|
||||
}
|
||||
|
||||
return artifacts;
|
||||
}
|
||||
|
||||
/**
|
||||
* Format project context as a string for LLM system prompt
|
||||
*
|
||||
* Provides a human-readable summary of the context.
|
||||
*/
|
||||
export function formatContextForPrompt(context: ProjectChatContext): string {
|
||||
const sections: string[] = [];
|
||||
|
||||
// Project info
|
||||
sections.push(`Project: ${context.project.name} (ID: ${context.project.id})`);
|
||||
sections.push(
|
||||
`Phase: ${context.project.currentPhase} (${context.project.phaseStatus})`
|
||||
);
|
||||
|
||||
// Knowledge summary
|
||||
if (context.knowledgeSummary.totalCount > 0) {
|
||||
sections.push(`\nKnowledge Items: ${context.knowledgeSummary.totalCount} total`);
|
||||
if (Object.keys(context.knowledgeSummary.bySourceType).length > 0) {
|
||||
sections.push(
|
||||
` By type: ${JSON.stringify(context.knowledgeSummary.bySourceType)}`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Extraction summary
|
||||
if (context.extractionSummary.totalCount > 0) {
|
||||
sections.push(
|
||||
`\nExtractions: ${context.extractionSummary.totalCount} analyzed (avg confidence: ${(context.extractionSummary.avgConfidence * 100).toFixed(1)}%)`
|
||||
);
|
||||
}
|
||||
|
||||
// Retrieved chunks
|
||||
if (context.retrievedChunks.length > 0) {
|
||||
sections.push(`\nRelevant Context (vector search):`);
|
||||
context.retrievedChunks.slice(0, 3).forEach((chunk, i) => {
|
||||
sections.push(
|
||||
` ${i + 1}. [${chunk.sourceType ?? 'unknown'}] (similarity: ${(chunk.similarity * 100).toFixed(1)}%)`
|
||||
);
|
||||
sections.push(` ${chunk.content.substring(0, 150)}...`);
|
||||
});
|
||||
}
|
||||
|
||||
// GitHub repo
|
||||
if (context.repositoryAnalysis) {
|
||||
sections.push(`\nGitHub Repository: ${context.repositoryAnalysis.repoFullName}`);
|
||||
sections.push(` Files: ${context.repositoryAnalysis.totalFiles}`);
|
||||
sections.push(` Tech: ${context.repositoryAnalysis.techStack.join(', ')}`);
|
||||
}
|
||||
|
||||
// Phase handoffs
|
||||
const handoffs = Object.keys(context.phaseHandoffs);
|
||||
if (handoffs.length > 0) {
|
||||
sections.push(`\nPhase Handoffs: ${handoffs.join(', ')}`);
|
||||
}
|
||||
|
||||
// Session history
|
||||
if (context.sessionHistory.totalSessions > 0) {
|
||||
sections.push(`\n## Cursor Session History (${context.sessionHistory.totalSessions} sessions, ${context.sessionHistory.messages.length} messages)`);
|
||||
sections.push(`This is your complete conversation history with the user from Cursor IDE, in chronological order.`);
|
||||
sections.push(`Use this to understand what has been built, discussed, and decided so far.\n`);
|
||||
|
||||
// Include all messages chronologically
|
||||
context.sessionHistory.messages.forEach((msg, i) => {
|
||||
const timestamp = new Date(msg.timestamp).toLocaleString();
|
||||
sections.push(`[${timestamp}] ${msg.role}:`);
|
||||
sections.push(msg.content);
|
||||
sections.push(''); // Empty line between messages
|
||||
});
|
||||
}
|
||||
|
||||
return sections.join('\n');
|
||||
}
|
||||
|
||||
64
lib/server/chat-extraction.ts
Normal file
64
lib/server/chat-extraction.ts
Normal file
@@ -0,0 +1,64 @@
|
||||
import { getAdminDb } from '@/lib/firebase/admin';
|
||||
import { FieldValue } from 'firebase-admin/firestore';
|
||||
import type { ChatExtractionRecord } from '@/lib/types/chat-extraction';
|
||||
|
||||
const COLLECTION = 'chat_extractions';
|
||||
|
||||
interface CreateChatExtractionInput<TData> {
|
||||
projectId: string;
|
||||
knowledgeItemId: string;
|
||||
data: TData;
|
||||
overallCompletion: number;
|
||||
overallConfidence: number;
|
||||
}
|
||||
|
||||
export async function createChatExtraction<TData>(
|
||||
input: CreateChatExtractionInput<TData>,
|
||||
): Promise<ChatExtractionRecord<TData>> {
|
||||
const adminDb = getAdminDb();
|
||||
const docRef = adminDb.collection(COLLECTION).doc();
|
||||
|
||||
const payload = {
|
||||
id: docRef.id,
|
||||
projectId: input.projectId,
|
||||
knowledgeItemId: input.knowledgeItemId,
|
||||
data: input.data,
|
||||
overallCompletion: input.overallCompletion,
|
||||
overallConfidence: input.overallConfidence,
|
||||
createdAt: FieldValue.serverTimestamp(),
|
||||
updatedAt: FieldValue.serverTimestamp(),
|
||||
};
|
||||
|
||||
await docRef.set(payload);
|
||||
const snapshot = await docRef.get();
|
||||
return snapshot.data() as ChatExtractionRecord<TData>;
|
||||
}
|
||||
|
||||
export async function listChatExtractions<TData>(
|
||||
projectId: string,
|
||||
): Promise<ChatExtractionRecord<TData>[]> {
|
||||
const adminDb = getAdminDb();
|
||||
const querySnapshot = await adminDb
|
||||
.collection(COLLECTION)
|
||||
.where('projectId', '==', projectId)
|
||||
.orderBy('createdAt', 'desc')
|
||||
.get();
|
||||
|
||||
return querySnapshot.docs.map(
|
||||
(doc) => doc.data() as ChatExtractionRecord<TData>,
|
||||
);
|
||||
}
|
||||
|
||||
export async function getChatExtraction<TData>(
|
||||
extractionId: string,
|
||||
): Promise<ChatExtractionRecord<TData> | null> {
|
||||
const adminDb = getAdminDb();
|
||||
const docRef = adminDb.collection(COLLECTION).doc(extractionId);
|
||||
const snapshot = await docRef.get();
|
||||
if (!snapshot.exists) {
|
||||
return null;
|
||||
}
|
||||
return snapshot.data() as ChatExtractionRecord<TData>;
|
||||
}
|
||||
|
||||
|
||||
190
lib/server/chat-mode-resolver.ts
Normal file
190
lib/server/chat-mode-resolver.ts
Normal file
@@ -0,0 +1,190 @@
|
||||
/**
|
||||
* Chat Mode Resolution Logic
|
||||
*
|
||||
* Determines which chat mode (collector, extraction_review, vision, mvp, marketing, general)
|
||||
* should be active based on project state.
|
||||
*/
|
||||
|
||||
import { getAdminDb } from '@/lib/firebase/admin';
|
||||
import type { ChatMode } from '@/lib/ai/chat-modes';
|
||||
|
||||
/**
|
||||
* Resolve the appropriate chat mode for a project
|
||||
*
|
||||
* Logic:
|
||||
* 1. No knowledge_items → collector_mode
|
||||
* 2. Has knowledge but no extractions → collector_mode (needs to run extraction)
|
||||
* 3. Has extractions but no canonicalProductModel → extraction_review_mode
|
||||
* 4. Has canonicalProductModel but no mvpPlan → vision_mode
|
||||
* 5. Has mvpPlan but no marketingPlan → mvp_mode
|
||||
* 6. Has marketingPlan → marketing_mode
|
||||
* 7. Otherwise → general_chat_mode
|
||||
*
|
||||
* @param projectId - Firestore project ID
|
||||
* @returns The appropriate chat mode
|
||||
*/
|
||||
export async function resolveChatMode(projectId: string): Promise<ChatMode> {
|
||||
try {
|
||||
const adminDb = getAdminDb();
|
||||
|
||||
// Load project data
|
||||
const projectSnapshot = await adminDb.collection('projects').doc(projectId).get();
|
||||
if (!projectSnapshot.exists) {
|
||||
throw new Error(`Project ${projectId} not found`);
|
||||
}
|
||||
|
||||
const projectData = projectSnapshot.data() ?? {};
|
||||
const phaseData = (projectData.phaseData ?? {}) as Record<string, any>;
|
||||
|
||||
// Check for knowledge_items (top-level collection)
|
||||
const knowledgeSnapshot = await adminDb
|
||||
.collection('knowledge_items')
|
||||
.where('projectId', '==', projectId)
|
||||
.limit(1)
|
||||
.get();
|
||||
|
||||
const hasKnowledge = !knowledgeSnapshot.empty;
|
||||
|
||||
// Check for chat_extractions (top-level collection)
|
||||
const extractionsSnapshot = await adminDb
|
||||
.collection('chat_extractions')
|
||||
.where('projectId', '==', projectId)
|
||||
.limit(1)
|
||||
.get();
|
||||
|
||||
const hasExtractions = !extractionsSnapshot.empty;
|
||||
|
||||
// Apply resolution logic
|
||||
// PRIORITY: Check explicit phase transitions FIRST (overrides knowledge checks)
|
||||
if (projectData.currentPhase === 'extraction_review' || projectData.currentPhase === 'analyzed') {
|
||||
return 'extraction_review_mode';
|
||||
}
|
||||
|
||||
if (projectData.currentPhase === 'vision') {
|
||||
return 'vision_mode';
|
||||
}
|
||||
|
||||
if (projectData.currentPhase === 'mvp') {
|
||||
return 'mvp_mode';
|
||||
}
|
||||
|
||||
if (projectData.currentPhase === 'marketing') {
|
||||
return 'marketing_mode';
|
||||
}
|
||||
|
||||
if (!hasKnowledge) {
|
||||
return 'collector_mode';
|
||||
}
|
||||
|
||||
if (hasKnowledge && !hasExtractions) {
|
||||
return 'collector_mode'; // Has knowledge but needs extraction
|
||||
}
|
||||
|
||||
// Fallback: Has extractions but no canonicalProductModel
|
||||
if (hasExtractions && !phaseData.canonicalProductModel) {
|
||||
return 'extraction_review_mode';
|
||||
}
|
||||
|
||||
if (phaseData.canonicalProductModel && !phaseData.mvpPlan) {
|
||||
return 'vision_mode';
|
||||
}
|
||||
|
||||
if (phaseData.mvpPlan && !phaseData.marketingPlan) {
|
||||
return 'mvp_mode';
|
||||
}
|
||||
|
||||
if (phaseData.marketingPlan) {
|
||||
return 'marketing_mode';
|
||||
}
|
||||
|
||||
return 'general_chat_mode';
|
||||
} catch (error) {
|
||||
console.error('[Chat Mode Resolver] Failed to resolve mode:', error);
|
||||
// Default to collector on error
|
||||
return 'collector_mode';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a summary of knowledge_items for context building
|
||||
*/
|
||||
export async function summarizeKnowledgeItems(
|
||||
projectId: string
|
||||
): Promise<{
|
||||
totalCount: number;
|
||||
bySourceType: Record<string, number>;
|
||||
recentTitles: string[];
|
||||
}> {
|
||||
try {
|
||||
const adminDb = getAdminDb();
|
||||
const snapshot = await adminDb
|
||||
.collection('knowledge_items')
|
||||
.where('projectId', '==', projectId)
|
||||
.orderBy('createdAt', 'desc')
|
||||
.limit(20)
|
||||
.get();
|
||||
|
||||
const totalCount = snapshot.size;
|
||||
const bySourceType: Record<string, number> = {};
|
||||
const recentTitles: string[] = [];
|
||||
|
||||
snapshot.docs.forEach((doc) => {
|
||||
const data = doc.data();
|
||||
const sourceType = data.sourceType ?? 'unknown';
|
||||
bySourceType[sourceType] = (bySourceType[sourceType] ?? 0) + 1;
|
||||
|
||||
if (data.title && recentTitles.length < 5) {
|
||||
recentTitles.push(data.title);
|
||||
}
|
||||
});
|
||||
|
||||
return { totalCount, bySourceType, recentTitles };
|
||||
} catch (error) {
|
||||
console.error('[Chat Mode Resolver] Failed to summarize knowledge:', error);
|
||||
return { totalCount: 0, bySourceType: {}, recentTitles: [] };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a summary of chat_extractions for context building
|
||||
*/
|
||||
export async function summarizeExtractions(
|
||||
projectId: string
|
||||
): Promise<{
|
||||
totalCount: number;
|
||||
avgConfidence: number;
|
||||
avgCompletion: number;
|
||||
}> {
|
||||
try {
|
||||
const adminDb = getAdminDb();
|
||||
const snapshot = await adminDb
|
||||
.collection('chat_extractions')
|
||||
.where('projectId', '==', projectId)
|
||||
.get();
|
||||
|
||||
if (snapshot.empty) {
|
||||
return { totalCount: 0, avgConfidence: 0, avgCompletion: 0 };
|
||||
}
|
||||
|
||||
let sumConfidence = 0;
|
||||
let sumCompletion = 0;
|
||||
let count = 0;
|
||||
|
||||
snapshot.docs.forEach((doc) => {
|
||||
const data = doc.data();
|
||||
sumConfidence += data.overallConfidence ?? 0;
|
||||
sumCompletion += data.overallCompletion ?? 0;
|
||||
count++;
|
||||
});
|
||||
|
||||
return {
|
||||
totalCount: count,
|
||||
avgConfidence: count > 0 ? sumConfidence / count : 0,
|
||||
avgCompletion: count > 0 ? sumCompletion / count : 0,
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('[Chat Mode Resolver] Failed to summarize extractions:', error);
|
||||
return { totalCount: 0, avgConfidence: 0, avgCompletion: 0 };
|
||||
}
|
||||
}
|
||||
|
||||
298
lib/server/github-analyzer.ts
Normal file
298
lib/server/github-analyzer.ts
Normal file
@@ -0,0 +1,298 @@
|
||||
/**
|
||||
* GitHub Repository Analyzer
|
||||
* Fetches and analyzes repository structure and key files for AI context
|
||||
*/
|
||||
|
||||
import { getAdminDb } from '@/lib/firebase/admin';
|
||||
|
||||
interface RepositoryAnalysis {
|
||||
repoFullName: string;
|
||||
totalFiles: number;
|
||||
fileStructure: {
|
||||
directories: string[];
|
||||
keyFiles: string[];
|
||||
};
|
||||
readme: string | null;
|
||||
packageJson: Record<string, unknown> | null;
|
||||
techStack: string[];
|
||||
summary: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Analyze a GitHub repository to extract key information for AI context
|
||||
*/
|
||||
export async function analyzeGitHubRepository(
|
||||
userId: string,
|
||||
repoFullName: string,
|
||||
branch = 'main'
|
||||
): Promise<RepositoryAnalysis | null> {
|
||||
try {
|
||||
const adminDb = getAdminDb();
|
||||
|
||||
// Get GitHub access token
|
||||
const connectionDoc = await adminDb
|
||||
.collection('githubConnections')
|
||||
.doc(userId)
|
||||
.get();
|
||||
|
||||
if (!connectionDoc.exists) {
|
||||
console.log('[GitHub Analyzer] No GitHub connection found');
|
||||
return null;
|
||||
}
|
||||
|
||||
const connection = connectionDoc.data()!;
|
||||
const accessToken = connection.accessToken;
|
||||
const [owner, repo] = repoFullName.split('/');
|
||||
|
||||
// Fetch repository tree
|
||||
const treeResponse = await fetch(
|
||||
`https://api.github.com/repos/${owner}/${repo}/git/trees/${branch}?recursive=1`,
|
||||
{
|
||||
headers: {
|
||||
Authorization: `Bearer ${accessToken}`,
|
||||
Accept: 'application/vnd.github.v3+json',
|
||||
},
|
||||
}
|
||||
);
|
||||
|
||||
if (!treeResponse.ok) {
|
||||
console.error('[GitHub Analyzer] Failed to fetch tree:', treeResponse.statusText);
|
||||
return null;
|
||||
}
|
||||
|
||||
const treeData = await treeResponse.json();
|
||||
|
||||
// Extract directories and key files
|
||||
const directories = new Set<string>();
|
||||
const keyFiles: string[] = [];
|
||||
let totalFiles = 0;
|
||||
|
||||
treeData.tree?.forEach((item: { path: string; type: string }) => {
|
||||
if (item.type === 'blob') {
|
||||
totalFiles++;
|
||||
|
||||
// Track key files
|
||||
const fileName = item.path.toLowerCase();
|
||||
if (
|
||||
fileName === 'readme.md' ||
|
||||
fileName === 'package.json' ||
|
||||
fileName === 'requirements.txt' ||
|
||||
fileName === 'cargo.toml' ||
|
||||
fileName === 'go.mod' ||
|
||||
fileName === 'pom.xml' ||
|
||||
fileName.startsWith('dockerfile')
|
||||
) {
|
||||
keyFiles.push(item.path);
|
||||
}
|
||||
}
|
||||
|
||||
// Track top-level directories
|
||||
const parts = item.path.split('/');
|
||||
if (parts.length > 1) {
|
||||
directories.add(parts[0]);
|
||||
}
|
||||
});
|
||||
|
||||
// Fetch README content (truncate to first 3000 chars to avoid bloating prompts)
|
||||
let readme: string | null = null;
|
||||
const readmePath = keyFiles.find(f => f.toLowerCase().endsWith('readme.md'));
|
||||
if (readmePath) {
|
||||
const fullReadme = await fetchFileContent(accessToken, owner, repo, readmePath, branch);
|
||||
if (fullReadme) {
|
||||
// Truncate to first 3000 characters (roughly 750 tokens)
|
||||
readme = fullReadme.length > 3000
|
||||
? fullReadme.substring(0, 3000) + '\n\n[... README truncated for brevity ...]'
|
||||
: fullReadme;
|
||||
}
|
||||
}
|
||||
|
||||
// Fetch package.json content
|
||||
let packageJson: Record<string, unknown> | null = null;
|
||||
const packageJsonPath = keyFiles.find(f => f.toLowerCase().endsWith('package.json'));
|
||||
if (packageJsonPath) {
|
||||
const content = await fetchFileContent(accessToken, owner, repo, packageJsonPath, branch);
|
||||
if (content) {
|
||||
try {
|
||||
packageJson = JSON.parse(content);
|
||||
} catch (e) {
|
||||
console.error('[GitHub Analyzer] Failed to parse package.json');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Detect tech stack
|
||||
const techStack = detectTechStack(keyFiles, Array.from(directories), packageJson);
|
||||
|
||||
// Generate summary
|
||||
const summary = generateRepositorySummary({
|
||||
repoFullName,
|
||||
totalFiles,
|
||||
directories: Array.from(directories),
|
||||
keyFiles,
|
||||
techStack,
|
||||
readme,
|
||||
packageJson,
|
||||
});
|
||||
|
||||
return {
|
||||
repoFullName,
|
||||
totalFiles,
|
||||
fileStructure: {
|
||||
directories: Array.from(directories).slice(0, 20), // Limit to top 20
|
||||
keyFiles,
|
||||
},
|
||||
readme: readme ? readme.substring(0, 2000) : null, // First 2000 chars
|
||||
packageJson,
|
||||
techStack,
|
||||
summary,
|
||||
};
|
||||
} catch (error) {
|
||||
console.error('[GitHub Analyzer] Error analyzing repository:', error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch file content from GitHub
|
||||
*/
|
||||
async function fetchFileContent(
|
||||
accessToken: string,
|
||||
owner: string,
|
||||
repo: string,
|
||||
path: string,
|
||||
branch: string
|
||||
): Promise<string | null> {
|
||||
try {
|
||||
const response = await fetch(
|
||||
`https://api.github.com/repos/${owner}/${repo}/contents/${encodeURIComponent(path)}?ref=${branch}`,
|
||||
{
|
||||
headers: {
|
||||
Authorization: `Bearer ${accessToken}`,
|
||||
Accept: 'application/vnd.github.v3+json',
|
||||
},
|
||||
}
|
||||
);
|
||||
|
||||
if (!response.ok) return null;
|
||||
|
||||
const data = await response.json();
|
||||
return Buffer.from(data.content, 'base64').toString('utf-8');
|
||||
} catch (error) {
|
||||
console.error(`[GitHub Analyzer] Failed to fetch ${path}:`, error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect tech stack from repository structure
|
||||
*/
|
||||
function detectTechStack(
|
||||
keyFiles: string[],
|
||||
directories: string[],
|
||||
packageJson: Record<string, unknown> | null
|
||||
): string[] {
|
||||
const stack: string[] = [];
|
||||
|
||||
// From key files
|
||||
if (keyFiles.some(f => f.toLowerCase().includes('package.json'))) {
|
||||
stack.push('Node.js/JavaScript');
|
||||
|
||||
if (packageJson) {
|
||||
const deps = {
|
||||
...(packageJson.dependencies as Record<string, unknown> || {}),
|
||||
...(packageJson.devDependencies as Record<string, unknown> || {})
|
||||
};
|
||||
|
||||
if (deps.next) stack.push('Next.js');
|
||||
if (deps.react) stack.push('React');
|
||||
if (deps.vue) stack.push('Vue');
|
||||
if (deps.express) stack.push('Express');
|
||||
if (deps.typescript) stack.push('TypeScript');
|
||||
}
|
||||
}
|
||||
|
||||
if (keyFiles.some(f => f.toLowerCase().includes('requirements.txt') || f.toLowerCase().includes('pyproject.toml'))) {
|
||||
stack.push('Python');
|
||||
}
|
||||
|
||||
if (keyFiles.some(f => f.toLowerCase().includes('cargo.toml'))) {
|
||||
stack.push('Rust');
|
||||
}
|
||||
|
||||
if (keyFiles.some(f => f.toLowerCase().includes('go.mod'))) {
|
||||
stack.push('Go');
|
||||
}
|
||||
|
||||
if (keyFiles.some(f => f.toLowerCase().includes('pom.xml') || f.toLowerCase().includes('build.gradle'))) {
|
||||
stack.push('Java');
|
||||
}
|
||||
|
||||
if (keyFiles.some(f => f.toLowerCase().startsWith('dockerfile'))) {
|
||||
stack.push('Docker');
|
||||
}
|
||||
|
||||
// From directories
|
||||
if (directories.includes('.github')) stack.push('GitHub Actions');
|
||||
if (directories.includes('terraform') || directories.includes('infrastructure')) {
|
||||
stack.push('Infrastructure as Code');
|
||||
}
|
||||
|
||||
return stack;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a human-readable summary
|
||||
*/
|
||||
function generateRepositorySummary(analysis: {
|
||||
repoFullName: string;
|
||||
totalFiles: number;
|
||||
directories: string[];
|
||||
keyFiles: string[];
|
||||
techStack: string[];
|
||||
readme: string | null;
|
||||
packageJson: Record<string, unknown> | null;
|
||||
}): string {
|
||||
const parts: string[] = [];
|
||||
|
||||
parts.push(`## Repository Analysis: ${analysis.repoFullName}`);
|
||||
parts.push(`\n**Structure:**`);
|
||||
parts.push(`- Total files: ${analysis.totalFiles}`);
|
||||
|
||||
if (analysis.directories.length > 0) {
|
||||
parts.push(`- Main directories: ${analysis.directories.slice(0, 15).join(', ')}`);
|
||||
}
|
||||
|
||||
if (analysis.techStack.length > 0) {
|
||||
parts.push(`\n**Tech Stack:** ${analysis.techStack.join(', ')}`);
|
||||
}
|
||||
|
||||
if (analysis.packageJson) {
|
||||
const pkg = analysis.packageJson;
|
||||
parts.push(`\n**Package Info:**`);
|
||||
if (pkg.name) parts.push(`- Name: ${pkg.name}`);
|
||||
if (pkg.description) parts.push(`- Description: ${pkg.description}`);
|
||||
if (pkg.version) parts.push(`- Version: ${pkg.version}`);
|
||||
|
||||
// Show key dependencies
|
||||
const deps = pkg.dependencies as Record<string, string> || {};
|
||||
const devDeps = pkg.devDependencies as Record<string, string> || {};
|
||||
const allDeps = { ...deps, ...devDeps };
|
||||
const keyDeps = Object.keys(allDeps).slice(0, 10);
|
||||
if (keyDeps.length > 0) {
|
||||
parts.push(`- Key dependencies: ${keyDeps.join(', ')}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (analysis.readme) {
|
||||
parts.push(`\n**README Content:**`);
|
||||
// Get first few paragraphs or up to 1000 chars
|
||||
const readmeExcerpt = analysis.readme.substring(0, 1000);
|
||||
parts.push(readmeExcerpt);
|
||||
if (analysis.readme.length > 1000) {
|
||||
parts.push('...(truncated)');
|
||||
}
|
||||
}
|
||||
|
||||
return parts.join('\n');
|
||||
}
|
||||
|
||||
74
lib/server/knowledge.ts
Normal file
74
lib/server/knowledge.ts
Normal file
@@ -0,0 +1,74 @@
|
||||
import { getAdminDb } from '@/lib/firebase/admin';
|
||||
import { FieldValue } from 'firebase-admin/firestore';
|
||||
import type {
|
||||
KnowledgeItem,
|
||||
KnowledgeSourceMeta,
|
||||
KnowledgeSourceType,
|
||||
} from '@/lib/types/knowledge';
|
||||
|
||||
const COLLECTION = 'knowledge_items';
|
||||
|
||||
interface CreateKnowledgeItemInput {
|
||||
projectId: string;
|
||||
sourceType: KnowledgeSourceType;
|
||||
title?: string | null;
|
||||
content: string;
|
||||
sourceMeta?: KnowledgeSourceMeta;
|
||||
}
|
||||
|
||||
export async function createKnowledgeItem(
|
||||
input: CreateKnowledgeItemInput,
|
||||
): Promise<KnowledgeItem> {
|
||||
const adminDb = getAdminDb();
|
||||
const docRef = adminDb.collection(COLLECTION).doc();
|
||||
|
||||
const payload = {
|
||||
id: docRef.id,
|
||||
projectId: input.projectId,
|
||||
sourceType: input.sourceType,
|
||||
title: input.title ?? null,
|
||||
content: input.content,
|
||||
sourceMeta: input.sourceMeta ?? null,
|
||||
createdAt: FieldValue.serverTimestamp(),
|
||||
updatedAt: FieldValue.serverTimestamp(),
|
||||
};
|
||||
|
||||
await docRef.set(payload);
|
||||
const snapshot = await docRef.get();
|
||||
return snapshot.data() as KnowledgeItem;
|
||||
}
|
||||
|
||||
export async function getKnowledgeItem(
|
||||
projectId: string,
|
||||
knowledgeItemId: string,
|
||||
): Promise<KnowledgeItem | null> {
|
||||
const adminDb = getAdminDb();
|
||||
const docRef = adminDb.collection(COLLECTION).doc(knowledgeItemId);
|
||||
const snapshot = await docRef.get();
|
||||
|
||||
if (!snapshot.exists) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const data = snapshot.data() as KnowledgeItem;
|
||||
if (data.projectId !== projectId) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
export async function listKnowledgeItems(
|
||||
projectId: string,
|
||||
): Promise<KnowledgeItem[]> {
|
||||
const adminDb = getAdminDb();
|
||||
const querySnapshot = await adminDb
|
||||
.collection(COLLECTION)
|
||||
.where('projectId', '==', projectId)
|
||||
.orderBy('createdAt', 'desc')
|
||||
.get();
|
||||
|
||||
return querySnapshot.docs.map((doc) => doc.data() as KnowledgeItem);
|
||||
}
|
||||
|
||||
|
||||
232
lib/server/logs.ts
Normal file
232
lib/server/logs.ts
Normal file
@@ -0,0 +1,232 @@
|
||||
/**
|
||||
* Server-side logging utilities
|
||||
*
|
||||
* Logs project events to Firestore for monitoring, debugging, and analytics.
|
||||
*/
|
||||
|
||||
import { getAdminDb } from '@/lib/firebase/admin';
|
||||
import { FieldValue } from 'firebase-admin/firestore';
|
||||
import type { CreateProjectLogInput, ProjectLogEntry, ProjectLogFilters, ProjectLogStats } from '@/lib/types/logs';
|
||||
|
||||
/**
|
||||
* Log a project-related event
|
||||
*
|
||||
* This is a fire-and-forget operation - errors are logged but not thrown
|
||||
* to avoid impacting the main request flow.
|
||||
*
|
||||
* @param input - Log entry data
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* await logProjectEvent({
|
||||
* projectId: 'proj123',
|
||||
* userId: 'user456',
|
||||
* eventType: 'chat_interaction',
|
||||
* mode: 'vision_mode',
|
||||
* phase: 'vision_ready',
|
||||
* artifactsUsed: ['Product Model', '5 Vector Chunks'],
|
||||
* usedVectorSearch: true,
|
||||
* vectorChunkCount: 5,
|
||||
* promptVersion: '1.0',
|
||||
* modelUsed: 'gemini-2.0-flash-exp',
|
||||
* success: true,
|
||||
* errorMessage: null,
|
||||
* });
|
||||
* ```
|
||||
*/
|
||||
export async function logProjectEvent(input: CreateProjectLogInput): Promise<void> {
|
||||
try {
|
||||
const adminDb = getAdminDb();
|
||||
const docRef = adminDb.collection('project_logs').doc();
|
||||
|
||||
await docRef.set({
|
||||
...input,
|
||||
id: docRef.id,
|
||||
createdAt: FieldValue.serverTimestamp(),
|
||||
});
|
||||
|
||||
// Silent success
|
||||
} catch (error) {
|
||||
// Log to console but don't throw - logging should never break the main flow
|
||||
console.error('[Logs] Failed to log project event:', error);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Query project logs with filters
|
||||
*
|
||||
* @param filters - Query filters
|
||||
* @returns Array of log entries
|
||||
*/
|
||||
export async function queryProjectLogs(
|
||||
filters: ProjectLogFilters
|
||||
): Promise<ProjectLogEntry[]> {
|
||||
try {
|
||||
const adminDb = getAdminDb();
|
||||
let query = adminDb.collection('project_logs').orderBy('createdAt', 'desc');
|
||||
|
||||
// Apply filters
|
||||
if (filters.projectId) {
|
||||
query = query.where('projectId', '==', filters.projectId) as any;
|
||||
}
|
||||
|
||||
if (filters.userId) {
|
||||
query = query.where('userId', '==', filters.userId) as any;
|
||||
}
|
||||
|
||||
if (filters.eventType) {
|
||||
query = query.where('eventType', '==', filters.eventType) as any;
|
||||
}
|
||||
|
||||
if (filters.mode) {
|
||||
query = query.where('mode', '==', filters.mode) as any;
|
||||
}
|
||||
|
||||
if (filters.phase) {
|
||||
query = query.where('phase', '==', filters.phase) as any;
|
||||
}
|
||||
|
||||
if (filters.success !== undefined) {
|
||||
query = query.where('success', '==', filters.success) as any;
|
||||
}
|
||||
|
||||
if (filters.startDate) {
|
||||
query = query.where('createdAt', '>=', filters.startDate) as any;
|
||||
}
|
||||
|
||||
if (filters.endDate) {
|
||||
query = query.where('createdAt', '<=', filters.endDate) as any;
|
||||
}
|
||||
|
||||
if (filters.limit) {
|
||||
query = query.limit(filters.limit) as any;
|
||||
}
|
||||
|
||||
const snapshot = await query.get();
|
||||
|
||||
return snapshot.docs.map((doc) => {
|
||||
const data = doc.data();
|
||||
return {
|
||||
...data,
|
||||
createdAt: data.createdAt?.toDate?.() ?? data.createdAt,
|
||||
} as ProjectLogEntry;
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('[Logs] Failed to query project logs:', error);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get aggregated stats for a project
|
||||
*
|
||||
* @param projectId - Project ID to analyze
|
||||
* @param since - Optional date to filter from
|
||||
* @returns Aggregated statistics
|
||||
*/
|
||||
export async function getProjectLogStats(
|
||||
projectId: string,
|
||||
since?: Date
|
||||
): Promise<ProjectLogStats> {
|
||||
try {
|
||||
const filters: ProjectLogFilters = { projectId, limit: 1000 };
|
||||
if (since) {
|
||||
filters.startDate = since;
|
||||
}
|
||||
|
||||
const logs = await queryProjectLogs(filters);
|
||||
|
||||
const stats: ProjectLogStats = {
|
||||
totalLogs: logs.length,
|
||||
successCount: 0,
|
||||
errorCount: 0,
|
||||
byEventType: {},
|
||||
byMode: {},
|
||||
avgVectorChunks: 0,
|
||||
vectorSearchUsageRate: 0,
|
||||
};
|
||||
|
||||
let totalVectorChunks = 0;
|
||||
let vectorSearchCount = 0;
|
||||
|
||||
logs.forEach((log) => {
|
||||
// Success/error counts
|
||||
if (log.success) {
|
||||
stats.successCount++;
|
||||
} else {
|
||||
stats.errorCount++;
|
||||
}
|
||||
|
||||
// By event type
|
||||
stats.byEventType[log.eventType] = (stats.byEventType[log.eventType] ?? 0) + 1;
|
||||
|
||||
// By mode
|
||||
if (log.mode) {
|
||||
stats.byMode[log.mode] = (stats.byMode[log.mode] ?? 0) + 1;
|
||||
}
|
||||
|
||||
// Vector search stats
|
||||
if (log.usedVectorSearch) {
|
||||
vectorSearchCount++;
|
||||
if (log.vectorChunkCount) {
|
||||
totalVectorChunks += log.vectorChunkCount;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Calculate averages
|
||||
if (vectorSearchCount > 0) {
|
||||
stats.avgVectorChunks = totalVectorChunks / vectorSearchCount;
|
||||
stats.vectorSearchUsageRate = vectorSearchCount / logs.length;
|
||||
}
|
||||
|
||||
return stats;
|
||||
} catch (error) {
|
||||
console.error('[Logs] Failed to get project log stats:', error);
|
||||
return {
|
||||
totalLogs: 0,
|
||||
successCount: 0,
|
||||
errorCount: 0,
|
||||
byEventType: {},
|
||||
byMode: {},
|
||||
avgVectorChunks: 0,
|
||||
vectorSearchUsageRate: 0,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete old logs (for maintenance/cleanup)
|
||||
*
|
||||
* @param before - Delete logs older than this date
|
||||
* @returns Number of logs deleted
|
||||
*/
|
||||
export async function deleteOldLogs(before: Date): Promise<number> {
|
||||
try {
|
||||
const adminDb = getAdminDb();
|
||||
const snapshot = await adminDb
|
||||
.collection('project_logs')
|
||||
.where('createdAt', '<', before)
|
||||
.limit(500) // Process in batches to avoid overwhelming Firestore
|
||||
.get();
|
||||
|
||||
if (snapshot.empty) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const batch = adminDb.batch();
|
||||
snapshot.docs.forEach((doc) => {
|
||||
batch.delete(doc.ref);
|
||||
});
|
||||
|
||||
await batch.commit();
|
||||
|
||||
console.log(`[Logs] Deleted ${snapshot.size} old logs`);
|
||||
|
||||
return snapshot.size;
|
||||
} catch (error) {
|
||||
console.error('[Logs] Failed to delete old logs:', error);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
102
lib/server/product-model.ts
Normal file
102
lib/server/product-model.ts
Normal file
@@ -0,0 +1,102 @@
|
||||
import { listChatExtractions } from '@/lib/server/chat-extraction';
|
||||
import { clamp, nowIso, persistPhaseArtifacts, uniqueStrings, toStage } from '@/lib/server/projects';
|
||||
import type { CanonicalProductModel } from '@/lib/types/product-model';
|
||||
import type { ChatExtractionRecord } from '@/lib/types/chat-extraction';
|
||||
|
||||
const average = (numbers: number[]) =>
|
||||
numbers.length ? numbers.reduce((sum, value) => sum + value, 0) / numbers.length : 0;
|
||||
|
||||
export async function buildCanonicalProductModel(projectId: string): Promise<CanonicalProductModel> {
|
||||
const extractions = await listChatExtractions(projectId);
|
||||
if (!extractions.length) {
|
||||
throw new Error('No chat extractions found for project');
|
||||
}
|
||||
|
||||
const completionAvg = average(
|
||||
extractions.map(
|
||||
(record) =>
|
||||
(record.data as any)?.summary_scores?.overall_completion ?? record.overallCompletion ?? 0,
|
||||
),
|
||||
);
|
||||
const confidenceAvg = average(
|
||||
extractions.map(
|
||||
(record) =>
|
||||
(record.data as any)?.summary_scores?.overall_confidence ?? record.overallConfidence ?? 0,
|
||||
),
|
||||
);
|
||||
|
||||
const canonical = mapExtractionToCanonical(
|
||||
projectId,
|
||||
pickHighestConfidence(extractions as any),
|
||||
completionAvg,
|
||||
confidenceAvg,
|
||||
);
|
||||
|
||||
await persistPhaseArtifacts(projectId, (phaseData, phaseScores, phaseHistory) => {
|
||||
phaseData.canonicalProductModel = canonical;
|
||||
phaseScores.vision = {
|
||||
overallCompletion: canonical.overallCompletion,
|
||||
overallConfidence: canonical.overallConfidence,
|
||||
updatedAt: nowIso(),
|
||||
};
|
||||
phaseHistory.push({ phase: 'vision', status: 'completed', timestamp: nowIso() });
|
||||
return { phaseData, phaseScores, phaseHistory, nextPhase: 'vision_ready' };
|
||||
});
|
||||
|
||||
return canonical;
|
||||
}
|
||||
|
||||
function pickHighestConfidence(records: ChatExtractionRecord[]) {
|
||||
return records.reduce((best, record) =>
|
||||
record.overallConfidence > best.overallConfidence ? record : best,
|
||||
);
|
||||
}
|
||||
|
||||
function mapExtractionToCanonical(
|
||||
projectId: string,
|
||||
record: ChatExtractionRecord,
|
||||
completionAvg: number,
|
||||
confidenceAvg: number,
|
||||
): CanonicalProductModel {
|
||||
const data = record.data;
|
||||
|
||||
const coreFeatures = data.solution_and_features.core_features.map(
|
||||
(feature) => feature.name || feature.description,
|
||||
);
|
||||
const niceToHaveFeatures = data.solution_and_features.nice_to_have_features.map(
|
||||
(feature) => feature.name || feature.description,
|
||||
);
|
||||
|
||||
return {
|
||||
projectId,
|
||||
workingTitle: data.project_summary.working_title ?? null,
|
||||
oneLiner: data.project_summary.one_liner ?? null,
|
||||
problem: data.product_vision.problem_statement.description ?? null,
|
||||
targetUser: data.target_users.primary_segment.description ?? null,
|
||||
desiredOutcome: data.product_vision.target_outcome.description ?? null,
|
||||
coreSolution: data.solution_and_features.core_solution.description ?? null,
|
||||
coreFeatures: uniqueStrings(coreFeatures),
|
||||
niceToHaveFeatures: uniqueStrings(niceToHaveFeatures),
|
||||
marketCategory: data.market_and_competition.market_category.description ?? null,
|
||||
competitors: uniqueStrings(
|
||||
data.market_and_competition.competitors.map((competitor) => competitor.name),
|
||||
),
|
||||
techStack: uniqueStrings(
|
||||
data.tech_and_constraints.stack_mentions.map((item) => item.description),
|
||||
),
|
||||
constraints: uniqueStrings(
|
||||
data.tech_and_constraints.constraints.map((constraint) => constraint.description),
|
||||
),
|
||||
currentStage: toStage(data.project_summary.stage),
|
||||
shortTermGoals: uniqueStrings(
|
||||
data.goals_and_success.short_term_goals.map((goal) => goal.description),
|
||||
),
|
||||
longTermGoals: uniqueStrings(
|
||||
data.goals_and_success.long_term_goals.map((goal) => goal.description),
|
||||
),
|
||||
overallCompletion: clamp(completionAvg),
|
||||
overallConfidence: clamp(confidenceAvg),
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
2
lib/server/project-artifacts.ts
Normal file
2
lib/server/project-artifacts.ts
Normal file
@@ -0,0 +1,2 @@
|
||||
export {};
|
||||
|
||||
64
lib/server/projects.ts
Normal file
64
lib/server/projects.ts
Normal file
@@ -0,0 +1,64 @@
|
||||
import { FieldValue } from 'firebase-admin/firestore';
|
||||
import { getAdminDb } from '@/lib/firebase/admin';
|
||||
import type {
|
||||
ProjectPhase,
|
||||
ProjectPhaseData,
|
||||
ProjectPhaseScores,
|
||||
ProjectStage,
|
||||
} from '@/lib/types/project-artifacts';
|
||||
|
||||
export const clamp = (value: number) => Math.max(0, Math.min(1, value));
|
||||
export const nowIso = () => new Date().toISOString();
|
||||
|
||||
export function uniqueStrings(values: Array<string | null | undefined>): string[] {
|
||||
return Array.from(new Set(values.filter((value): value is string => Boolean(value))));
|
||||
}
|
||||
|
||||
export function toStage(stage?: string | null): ProjectStage {
|
||||
const allowed: ProjectStage[] = ['idea', 'prototype', 'mvp_in_progress', 'live_beta', 'live_paid', 'unknown'];
|
||||
if (!stage) return 'unknown';
|
||||
return allowed.includes(stage as ProjectStage) ? (stage as ProjectStage) : 'unknown';
|
||||
}
|
||||
|
||||
export async function loadPhaseContainers(projectId: string) {
|
||||
const adminDb = getAdminDb();
|
||||
const projectRef = adminDb.collection('projects').doc(projectId);
|
||||
const snapshot = await projectRef.get();
|
||||
const doc = snapshot.data() || {};
|
||||
const phaseData = (doc.phaseData ?? {}) as ProjectPhaseData;
|
||||
const phaseScores = (doc.phaseScores ?? {}) as ProjectPhaseScores;
|
||||
const phaseHistory = Array.isArray(doc.phaseHistory) ? [...doc.phaseHistory] : [];
|
||||
return { projectRef, phaseData, phaseScores, phaseHistory };
|
||||
}
|
||||
|
||||
interface PersistencePayload {
|
||||
phaseData: ProjectPhaseData;
|
||||
phaseScores: ProjectPhaseScores;
|
||||
phaseHistory: Array<Record<string, unknown>>;
|
||||
nextPhase?: ProjectPhase;
|
||||
}
|
||||
|
||||
export async function persistPhaseArtifacts(
|
||||
projectId: string,
|
||||
builder: (
|
||||
phaseData: ProjectPhaseData,
|
||||
phaseScores: ProjectPhaseScores,
|
||||
phaseHistory: Array<Record<string, unknown>>,
|
||||
) => PersistencePayload,
|
||||
) {
|
||||
const { projectRef, phaseData, phaseScores, phaseHistory } = await loadPhaseContainers(projectId);
|
||||
const payload = builder(phaseData, phaseScores, phaseHistory);
|
||||
|
||||
await projectRef.set(
|
||||
{
|
||||
phaseData: payload.phaseData,
|
||||
phaseScores: payload.phaseScores,
|
||||
phaseHistory: payload.phaseHistory,
|
||||
...(payload.nextPhase ? { currentPhase: payload.nextPhase, phaseStatus: 'completed' as const } : {}),
|
||||
updatedAt: FieldValue.serverTimestamp(),
|
||||
},
|
||||
{ merge: true },
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
453
lib/server/vector-memory.ts
Normal file
453
lib/server/vector-memory.ts
Normal file
@@ -0,0 +1,453 @@
|
||||
/**
|
||||
* Server-side helpers for AlloyDB vector memory operations
|
||||
*
|
||||
* Handles CRUD operations on knowledge_chunks and semantic search.
|
||||
*/
|
||||
|
||||
import { getAlloyDbClient, executeQuery, getPooledClient } from '@/lib/db/alloydb';
|
||||
import type {
|
||||
KnowledgeChunk,
|
||||
KnowledgeChunkRow,
|
||||
KnowledgeChunkSearchResult,
|
||||
VectorSearchOptions,
|
||||
CreateKnowledgeChunkInput,
|
||||
BatchCreateKnowledgeChunksInput,
|
||||
} from '@/lib/types/vector-memory';
|
||||
|
||||
/**
|
||||
* Convert database row (snake_case) to TypeScript object (camelCase)
|
||||
*/
|
||||
function rowToKnowledgeChunk(row: KnowledgeChunkRow): KnowledgeChunk {
|
||||
return {
|
||||
id: row.id,
|
||||
projectId: row.project_id,
|
||||
knowledgeItemId: row.knowledge_item_id,
|
||||
chunkIndex: row.chunk_index,
|
||||
content: row.content,
|
||||
sourceType: row.source_type,
|
||||
importance: row.importance,
|
||||
createdAt: row.created_at,
|
||||
updatedAt: row.updated_at,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve relevant knowledge chunks using vector similarity search
|
||||
*
|
||||
* @param projectId - Firestore project ID to filter by
|
||||
* @param queryEmbedding - Vector embedding of the query (e.g., user's question)
|
||||
* @param options - Search options (limit, filters, etc.)
|
||||
* @returns Array of chunks ordered by similarity (most relevant first)
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* const embedding = await embedText("What's the MVP scope?");
|
||||
* const chunks = await retrieveRelevantChunks('proj123', embedding, { limit: 10, minSimilarity: 0.7 });
|
||||
* ```
|
||||
*/
|
||||
export async function retrieveRelevantChunks(
|
||||
projectId: string,
|
||||
queryEmbedding: number[],
|
||||
options: VectorSearchOptions = {}
|
||||
): Promise<KnowledgeChunkSearchResult[]> {
|
||||
const {
|
||||
limit = 10,
|
||||
minSimilarity,
|
||||
sourceTypes,
|
||||
importanceLevels,
|
||||
} = options;
|
||||
|
||||
try {
|
||||
// Build the query with optional filters
|
||||
let queryText = `
|
||||
SELECT
|
||||
id,
|
||||
project_id,
|
||||
knowledge_item_id,
|
||||
chunk_index,
|
||||
content,
|
||||
source_type,
|
||||
importance,
|
||||
created_at,
|
||||
updated_at,
|
||||
1 - (embedding <=> $1::vector) AS similarity
|
||||
FROM knowledge_chunks
|
||||
WHERE project_id = $2
|
||||
`;
|
||||
|
||||
const params: any[] = [JSON.stringify(queryEmbedding), projectId];
|
||||
let paramIndex = 3;
|
||||
|
||||
// Filter by source types
|
||||
if (sourceTypes && sourceTypes.length > 0) {
|
||||
queryText += ` AND source_type = ANY($${paramIndex})`;
|
||||
params.push(sourceTypes);
|
||||
paramIndex++;
|
||||
}
|
||||
|
||||
// Filter by importance levels
|
||||
if (importanceLevels && importanceLevels.length > 0) {
|
||||
queryText += ` AND importance = ANY($${paramIndex})`;
|
||||
params.push(importanceLevels);
|
||||
paramIndex++;
|
||||
}
|
||||
|
||||
// Filter by minimum similarity
|
||||
if (minSimilarity !== undefined) {
|
||||
queryText += ` AND (1 - (embedding <=> $1::vector)) >= $${paramIndex}`;
|
||||
params.push(minSimilarity);
|
||||
paramIndex++;
|
||||
}
|
||||
|
||||
// Order by similarity and limit
|
||||
queryText += ` ORDER BY embedding <=> $1::vector LIMIT $${paramIndex}`;
|
||||
params.push(limit);
|
||||
|
||||
const result = await executeQuery<KnowledgeChunkRow & { similarity: number }>(
|
||||
queryText,
|
||||
params
|
||||
);
|
||||
|
||||
return result.rows.map((row) => ({
|
||||
...rowToKnowledgeChunk(row),
|
||||
similarity: row.similarity,
|
||||
}));
|
||||
} catch (error) {
|
||||
console.error('[Vector Memory] Failed to retrieve relevant chunks:', error);
|
||||
throw new Error(
|
||||
`Failed to retrieve chunks: ${error instanceof Error ? error.message : String(error)}`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a single knowledge chunk
|
||||
*/
|
||||
export async function createKnowledgeChunk(
|
||||
input: CreateKnowledgeChunkInput
|
||||
): Promise<KnowledgeChunk> {
|
||||
const {
|
||||
projectId,
|
||||
knowledgeItemId,
|
||||
chunkIndex,
|
||||
content,
|
||||
embedding,
|
||||
sourceType = null,
|
||||
importance = null,
|
||||
} = input;
|
||||
|
||||
try {
|
||||
const queryText = `
|
||||
INSERT INTO knowledge_chunks (
|
||||
project_id,
|
||||
knowledge_item_id,
|
||||
chunk_index,
|
||||
content,
|
||||
embedding,
|
||||
source_type,
|
||||
importance
|
||||
)
|
||||
VALUES ($1, $2, $3, $4, $5::vector, $6, $7)
|
||||
RETURNING
|
||||
id,
|
||||
project_id,
|
||||
knowledge_item_id,
|
||||
chunk_index,
|
||||
content,
|
||||
source_type,
|
||||
importance,
|
||||
created_at,
|
||||
updated_at
|
||||
`;
|
||||
|
||||
const result = await executeQuery<KnowledgeChunkRow>(queryText, [
|
||||
projectId,
|
||||
knowledgeItemId,
|
||||
chunkIndex,
|
||||
content,
|
||||
JSON.stringify(embedding),
|
||||
sourceType,
|
||||
importance,
|
||||
]);
|
||||
|
||||
if (result.rows.length === 0) {
|
||||
throw new Error('Failed to insert knowledge chunk');
|
||||
}
|
||||
|
||||
return rowToKnowledgeChunk(result.rows[0]);
|
||||
} catch (error) {
|
||||
console.error('[Vector Memory] Failed to create knowledge chunk:', error);
|
||||
throw new Error(
|
||||
`Failed to create chunk: ${error instanceof Error ? error.message : String(error)}`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Batch create multiple knowledge chunks efficiently
|
||||
*
|
||||
* Uses a transaction to ensure atomicity.
|
||||
*/
|
||||
export async function batchCreateKnowledgeChunks(
|
||||
input: BatchCreateKnowledgeChunksInput
|
||||
): Promise<KnowledgeChunk[]> {
|
||||
const { projectId, knowledgeItemId, chunks } = input;
|
||||
|
||||
if (chunks.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const client = await getPooledClient();
|
||||
|
||||
try {
|
||||
await client.query('BEGIN');
|
||||
|
||||
const createdChunks: KnowledgeChunk[] = [];
|
||||
|
||||
for (const chunk of chunks) {
|
||||
const queryText = `
|
||||
INSERT INTO knowledge_chunks (
|
||||
project_id,
|
||||
knowledge_item_id,
|
||||
chunk_index,
|
||||
content,
|
||||
embedding,
|
||||
source_type,
|
||||
importance
|
||||
)
|
||||
VALUES ($1, $2, $3, $4, $5::vector, $6, $7)
|
||||
RETURNING
|
||||
id,
|
||||
project_id,
|
||||
knowledge_item_id,
|
||||
chunk_index,
|
||||
content,
|
||||
source_type,
|
||||
importance,
|
||||
created_at,
|
||||
updated_at
|
||||
`;
|
||||
|
||||
const result = await client.query<KnowledgeChunkRow>(queryText, [
|
||||
projectId,
|
||||
knowledgeItemId,
|
||||
chunk.chunkIndex,
|
||||
chunk.content,
|
||||
JSON.stringify(chunk.embedding),
|
||||
chunk.sourceType ?? null,
|
||||
chunk.importance ?? null,
|
||||
]);
|
||||
|
||||
if (result.rows.length > 0) {
|
||||
createdChunks.push(rowToKnowledgeChunk(result.rows[0]));
|
||||
}
|
||||
}
|
||||
|
||||
await client.query('COMMIT');
|
||||
|
||||
console.log(
|
||||
`[Vector Memory] Batch created ${createdChunks.length} chunks for knowledge_item ${knowledgeItemId}`
|
||||
);
|
||||
|
||||
return createdChunks;
|
||||
} catch (error) {
|
||||
await client.query('ROLLBACK');
|
||||
console.error('[Vector Memory] Failed to batch create chunks:', error);
|
||||
throw new Error(
|
||||
`Failed to batch create chunks: ${error instanceof Error ? error.message : String(error)}`
|
||||
);
|
||||
} finally {
|
||||
client.release();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete all chunks for a specific knowledge_item
|
||||
*
|
||||
* Used when regenerating chunks or removing a knowledge_item.
|
||||
*/
|
||||
export async function deleteChunksForKnowledgeItem(
|
||||
knowledgeItemId: string
|
||||
): Promise<number> {
|
||||
try {
|
||||
const queryText = `
|
||||
DELETE FROM knowledge_chunks
|
||||
WHERE knowledge_item_id = $1
|
||||
RETURNING id
|
||||
`;
|
||||
|
||||
const result = await executeQuery(queryText, [knowledgeItemId]);
|
||||
|
||||
console.log(
|
||||
`[Vector Memory] Deleted ${result.rowCount ?? 0} chunks for knowledge_item ${knowledgeItemId}`
|
||||
);
|
||||
|
||||
return result.rowCount ?? 0;
|
||||
} catch (error) {
|
||||
console.error('[Vector Memory] Failed to delete chunks:', error);
|
||||
throw new Error(
|
||||
`Failed to delete chunks: ${error instanceof Error ? error.message : String(error)}`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete all chunks for a specific project
|
||||
*
|
||||
* Used when cleaning up or resetting a project.
|
||||
*/
|
||||
export async function deleteChunksForProject(projectId: string): Promise<number> {
|
||||
try {
|
||||
const queryText = `
|
||||
DELETE FROM knowledge_chunks
|
||||
WHERE project_id = $1
|
||||
RETURNING id
|
||||
`;
|
||||
|
||||
const result = await executeQuery(queryText, [projectId]);
|
||||
|
||||
console.log(
|
||||
`[Vector Memory] Deleted ${result.rowCount ?? 0} chunks for project ${projectId}`
|
||||
);
|
||||
|
||||
return result.rowCount ?? 0;
|
||||
} catch (error) {
|
||||
console.error('[Vector Memory] Failed to delete project chunks:', error);
|
||||
throw new Error(
|
||||
`Failed to delete project chunks: ${error instanceof Error ? error.message : String(error)}`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get chunk count for a knowledge_item
|
||||
*/
|
||||
export async function getChunkCountForKnowledgeItem(
|
||||
knowledgeItemId: string
|
||||
): Promise<number> {
|
||||
try {
|
||||
const result = await executeQuery<{ count: string }>(
|
||||
'SELECT COUNT(*) as count FROM knowledge_chunks WHERE knowledge_item_id = $1',
|
||||
[knowledgeItemId]
|
||||
);
|
||||
|
||||
return parseInt(result.rows[0]?.count ?? '0', 10);
|
||||
} catch (error) {
|
||||
console.error('[Vector Memory] Failed to get chunk count:', error);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get chunk count for a project
|
||||
*/
|
||||
export async function getChunkCountForProject(projectId: string): Promise<number> {
|
||||
try {
|
||||
const result = await executeQuery<{ count: string }>(
|
||||
'SELECT COUNT(*) as count FROM knowledge_chunks WHERE project_id = $1',
|
||||
[projectId]
|
||||
);
|
||||
|
||||
return parseInt(result.rows[0]?.count ?? '0', 10);
|
||||
} catch (error) {
|
||||
console.error('[Vector Memory] Failed to get project chunk count:', error);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Regenerate knowledge_chunks for a single knowledge_item
|
||||
*
|
||||
* This is the main pipeline that:
|
||||
* 1. Chunks the knowledge_item.content
|
||||
* 2. Generates embeddings for each chunk
|
||||
* 3. Deletes existing chunks for this item
|
||||
* 4. Inserts new chunks into AlloyDB
|
||||
*
|
||||
* @param knowledgeItem - The knowledge item to process
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* const knowledgeItem = await getKnowledgeItem(projectId, itemId);
|
||||
* await writeKnowledgeChunksForItem(knowledgeItem);
|
||||
* ```
|
||||
*/
|
||||
export async function writeKnowledgeChunksForItem(
|
||||
knowledgeItem: {
|
||||
id: string;
|
||||
projectId: string;
|
||||
content: string;
|
||||
sourceMeta?: { sourceType?: string; importance?: 'primary' | 'supporting' | 'irrelevant' };
|
||||
}
|
||||
): Promise<void> {
|
||||
const { chunkText } = await import('@/lib/ai/chunking');
|
||||
const { embedTextBatch } = await import('@/lib/ai/embeddings');
|
||||
|
||||
try {
|
||||
console.log(
|
||||
`[Vector Memory] Starting chunking pipeline for knowledge_item ${knowledgeItem.id}`
|
||||
);
|
||||
|
||||
// Step 1: Chunk the content
|
||||
const textChunks = chunkText(knowledgeItem.content, {
|
||||
maxTokens: 800,
|
||||
overlapChars: 200,
|
||||
preserveParagraphs: true,
|
||||
});
|
||||
|
||||
if (textChunks.length === 0) {
|
||||
console.warn(
|
||||
`[Vector Memory] No chunks generated for knowledge_item ${knowledgeItem.id} - content may be empty`
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(
|
||||
`[Vector Memory] Generated ${textChunks.length} chunks for knowledge_item ${knowledgeItem.id}`
|
||||
);
|
||||
|
||||
// Step 2: Generate embeddings for all chunks
|
||||
const chunkTexts = textChunks.map((chunk) => chunk.text);
|
||||
const embeddings = await embedTextBatch(chunkTexts, {
|
||||
delayMs: 50, // Small delay to avoid rate limiting
|
||||
skipEmpty: true,
|
||||
});
|
||||
|
||||
if (embeddings.length !== textChunks.length) {
|
||||
throw new Error(
|
||||
`Embedding count mismatch: got ${embeddings.length}, expected ${textChunks.length}`
|
||||
);
|
||||
}
|
||||
|
||||
// Step 3: Delete existing chunks for this knowledge_item
|
||||
await deleteChunksForKnowledgeItem(knowledgeItem.id);
|
||||
|
||||
// Step 4: Insert new chunks
|
||||
const chunksToInsert = textChunks.map((chunk, index) => ({
|
||||
chunkIndex: chunk.index,
|
||||
content: chunk.text,
|
||||
embedding: embeddings[index],
|
||||
sourceType: knowledgeItem.sourceMeta?.sourceType ?? null,
|
||||
importance: knowledgeItem.sourceMeta?.importance ?? null,
|
||||
}));
|
||||
|
||||
await batchCreateKnowledgeChunks({
|
||||
projectId: knowledgeItem.projectId,
|
||||
knowledgeItemId: knowledgeItem.id,
|
||||
chunks: chunksToInsert,
|
||||
});
|
||||
|
||||
console.log(
|
||||
`[Vector Memory] Successfully processed ${chunksToInsert.length} chunks for knowledge_item ${knowledgeItem.id}`
|
||||
);
|
||||
} catch (error) {
|
||||
console.error(
|
||||
`[Vector Memory] Failed to write chunks for knowledge_item ${knowledgeItem.id}:`,
|
||||
error
|
||||
);
|
||||
throw new Error(
|
||||
`Failed to write chunks: ${error instanceof Error ? error.message : String(error)}`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user