VIBN Frontend for Coolify deployment

2026-02-15 19:25:52 -08:00
commit 40bf8428cd
398 changed files with 76513 additions and 0 deletions
--- a/lib/server/backend-extractor.ts
+++ b/lib/server/backend-extractor.ts
@@ -0,0 +1,228 @@
+/**
+ * Backend Extraction Module
+ * 
+ * Runs extraction as a pure backend job, not in chat.
+ * Called when Collector phase completes.
+ */
+
+import { getAdminDb } from '@/lib/firebase/admin';
+import { GeminiLlmClient } from '@/lib/ai/gemini-client';
+import { BACKEND_EXTRACTOR_SYSTEM_PROMPT } from '@/lib/ai/prompts/extractor';
+import { writeKnowledgeChunksForItem } from '@/lib/server/vector-memory';
+import type { ExtractionOutput, ExtractedInsight } from '@/lib/types/extraction-output';
+import type { PhaseHandoff } from '@/lib/types/phase-handoff';
+import { z } from 'zod';
+
+const ExtractionOutputSchema = z.object({
+  insights: z.array(z.object({
+    id: z.string(),
+    type: z.enum(["problem", "user", "feature", "constraint", "opportunity", "other"]),
+    title: z.string(),
+    description: z.string(),
+    sourceText: z.string(),
+    sourceKnowledgeItemId: z.string(),
+    importance: z.enum(["primary", "supporting"]),
+    confidence: z.number().min(0).max(1),
+  })),
+  problems: z.array(z.string()),
+  targetUsers: z.array(z.string()),
+  features: z.array(z.string()),
+  constraints: z.array(z.string()),
+  opportunities: z.array(z.string()),
+  uncertainties: z.array(z.string()),
+  missingInformation: z.array(z.string()),
+  overallConfidence: z.number().min(0).max(1),
+});
+
+export async function runBackendExtractionForProject(projectId: string): Promise<void> {
+  console.log(`[Backend Extractor] Starting extraction for project ${projectId}`);
+  
+  const adminDb = getAdminDb();
+  
+  try {
+    // 1. Load project
+    const projectDoc = await adminDb.collection('projects').doc(projectId).get();
+    if (!projectDoc.exists) {
+      throw new Error(`Project ${projectId} not found`);
+    }
+    
+    const projectData = projectDoc.data();
+    
+    // 2. Load knowledge items
+    const knowledgeSnapshot = await adminDb
+      .collection('knowledge_items')
+      .where('projectId', '==', projectId)
+      .where('sourceType', '==', 'imported_document')
+      .get();
+    
+    if (knowledgeSnapshot.empty) {
+      console.log(`[Backend Extractor] No documents to extract for project ${projectId} - creating empty handoff`);
+      
+      // Create a minimal extraction handoff even with no documents
+      const emptyHandoff: PhaseHandoff = {
+        phase: 'extraction',
+        readyForNextPhase: false, // Not ready - no materials to extract from
+        confidence: 0,
+        confirmed: {
+          problems: [],
+          targetUsers: [],
+          features: [],
+          constraints: [],
+          opportunities: [],
+        },
+        uncertain: {},
+        missing: ['No documents uploaded - need product requirements, specs, or notes'],
+        questionsForUser: [
+          'You haven\'t uploaded any documents yet. Do you have any product specs, requirements, or notes to share?',
+        ],
+        sourceEvidence: [],
+        version: 'extraction_v1',
+        timestamp: new Date().toISOString(),
+      };
+      
+      await adminDb.collection('projects').doc(projectId).update({
+        'phaseData.phaseHandoffs.extraction': emptyHandoff,
+        currentPhase: 'extraction_review',
+        phaseStatus: 'in_progress',
+        'phaseData.extractionCompletedAt': new Date().toISOString(),
+        updatedAt: new Date().toISOString(),
+      });
+      
+      console.log(`[Backend Extractor] Set phase to extraction_review with empty handoff`);
+      return;
+    }
+    
+    console.log(`[Backend Extractor] Found ${knowledgeSnapshot.size} documents to process`);
+    
+    const llm = new GeminiLlmClient();
+    const allExtractionOutputs: ExtractionOutput[] = [];
+    const processedKnowledgeItemIds: string[] = [];
+    
+    // 3. Process each document
+    for (const knowledgeDoc of knowledgeSnapshot.docs) {
+      const knowledgeData = knowledgeDoc.data();
+      const knowledgeItemId = knowledgeDoc.id;
+      
+      try {
+        console.log(`[Backend Extractor] Processing document: ${knowledgeData.title || knowledgeItemId}`);
+        
+        // Call LLM with structured extraction + thinking mode
+        const extraction = await llm.structuredCall<ExtractionOutput>({
+          model: 'gemini',
+          systemPrompt: BACKEND_EXTRACTOR_SYSTEM_PROMPT,
+          messages: [{
+            role: 'user',
+            content: `Document Title: ${knowledgeData.title || 'Untitled'}\nSource Type: ${knowledgeData.sourceType}\n\nContent:\n${knowledgeData.content}`,
+          }],
+          schema: ExtractionOutputSchema as any,
+          temperature: 1.0, // Gemini 3 default (changed from 0.3)
+          thinking_config: {
+            thinking_level: 'high', // Enable deep reasoning for document analysis
+            include_thoughts: false, // Don't include thought tokens in output (saves cost)
+          },
+        });
+        
+        // Add knowledgeItemId to each insight
+        extraction.insights.forEach(insight => {
+          insight.sourceKnowledgeItemId = knowledgeItemId;
+        });
+        
+        allExtractionOutputs.push(extraction);
+        processedKnowledgeItemIds.push(knowledgeItemId);
+        
+        // 4. Persist extraction to chat_extractions
+        await adminDb.collection('chat_extractions').add({
+          projectId,
+          knowledgeItemId,
+          data: extraction,
+          overallConfidence: extraction.overallConfidence,
+          overallCompletion: extraction.overallConfidence > 0.7 ? 0.9 : 0.6,
+          createdAt: new Date().toISOString(),
+          updatedAt: new Date().toISOString(),
+        });
+        
+        console.log(`[Backend Extractor] Extracted ${extraction.insights.length} insights from ${knowledgeData.title || knowledgeItemId}`);
+        
+        // 5. Write vector chunks for primary insights
+        const primaryInsights = extraction.insights.filter(i => i.importance === 'primary');
+        for (const insight of primaryInsights) {
+          try {
+            // Create a knowledge chunk for this insight
+            await writeKnowledgeChunksForItem({
+              id: knowledgeItemId,
+              projectId,
+              content: `${insight.title}\n\n${insight.description}\n\nSource: ${insight.sourceText}`,
+              sourceMeta: {
+                sourceType: 'extracted_insight',
+                importance: 'primary',
+              },
+            });
+          } catch (chunkError) {
+            console.error(`[Backend Extractor] Failed to write chunk for insight ${insight.id}:`, chunkError);
+            // Continue processing other insights
+          }
+        }
+        
+      } catch (docError) {
+        console.error(`[Backend Extractor] Failed to process document ${knowledgeItemId}:`, docError);
+        // Continue with next document
+      }
+    }
+    
+    // 6. Build extraction PhaseHandoff
+    // Flatten all extracted items (they're already strings, not objects)
+    const allProblems = [...new Set(allExtractionOutputs.flatMap(e => e.problems))];
+    const allUsers = [...new Set(allExtractionOutputs.flatMap(e => e.targetUsers))];
+    const allFeatures = [...new Set(allExtractionOutputs.flatMap(e => e.features))];
+    const allConstraints = [...new Set(allExtractionOutputs.flatMap(e => e.constraints))];
+    const allOpportunities = [...new Set(allExtractionOutputs.flatMap(e => e.opportunities))];
+    const allUncertainties = [...new Set(allExtractionOutputs.flatMap(e => e.uncertainties))];
+    const allMissing = [...new Set(allExtractionOutputs.flatMap(e => e.missingInformation))];
+    
+    const avgConfidence = allExtractionOutputs.length > 0
+      ? allExtractionOutputs.reduce((sum, e) => sum + e.overallConfidence, 0) / allExtractionOutputs.length
+      : 0;
+    
+    const readyForNextPhase = allProblems.length > 0 && allFeatures.length > 0 && avgConfidence > 0.5;
+    
+    const extractionHandoff: PhaseHandoff = {
+      phase: 'extraction',
+      readyForNextPhase,
+      confidence: avgConfidence,
+      confirmed: {
+        problems: allProblems,
+        targetUsers: allUsers,
+        features: allFeatures,
+        constraints: allConstraints,
+        opportunities: allOpportunities,
+      },
+      uncertain: {},
+      missing: allMissing,
+      questionsForUser: allUncertainties,
+      sourceEvidence: processedKnowledgeItemIds,
+      version: 'extraction_v1',
+      timestamp: new Date().toISOString(),
+    };
+    
+    // 7. Persist handoff and update phase
+    await adminDb.collection('projects').doc(projectId).update({
+      'phaseData.phaseHandoffs.extraction': extractionHandoff,
+      currentPhase: 'extraction_review',
+      phaseStatus: 'in_progress',
+      'phaseData.extractionCompletedAt': new Date().toISOString(),
+      updatedAt: new Date().toISOString(),
+    });
+    
+    console.log(`[Backend Extractor] ✅ Extraction complete for project ${projectId}`);
+    console.log(`[Backend Extractor] - Problems: ${allProblems.length}`);
+    console.log(`[Backend Extractor] - Users: ${allUsers.length}`);
+    console.log(`[Backend Extractor] - Features: ${allFeatures.length}`);
+    console.log(`[Backend Extractor] - Confidence: ${(avgConfidence * 100).toFixed(1)}%`);
+    console.log(`[Backend Extractor] - Ready for next phase: ${readyForNextPhase}`);
+    
+  } catch (error) {
+    console.error(`[Backend Extractor] Fatal error during extraction:`, error);
+    throw error;
+  }
+}
+