VIBN Frontend for Coolify deployment
This commit is contained in:
228
lib/server/backend-extractor.ts
Normal file
228
lib/server/backend-extractor.ts
Normal file
@@ -0,0 +1,228 @@
|
||||
/**
|
||||
* Backend Extraction Module
|
||||
*
|
||||
* Runs extraction as a pure backend job, not in chat.
|
||||
* Called when Collector phase completes.
|
||||
*/
|
||||
|
||||
import { getAdminDb } from '@/lib/firebase/admin';
|
||||
import { GeminiLlmClient } from '@/lib/ai/gemini-client';
|
||||
import { BACKEND_EXTRACTOR_SYSTEM_PROMPT } from '@/lib/ai/prompts/extractor';
|
||||
import { writeKnowledgeChunksForItem } from '@/lib/server/vector-memory';
|
||||
import type { ExtractionOutput, ExtractedInsight } from '@/lib/types/extraction-output';
|
||||
import type { PhaseHandoff } from '@/lib/types/phase-handoff';
|
||||
import { z } from 'zod';
|
||||
|
||||
const ExtractionOutputSchema = z.object({
|
||||
insights: z.array(z.object({
|
||||
id: z.string(),
|
||||
type: z.enum(["problem", "user", "feature", "constraint", "opportunity", "other"]),
|
||||
title: z.string(),
|
||||
description: z.string(),
|
||||
sourceText: z.string(),
|
||||
sourceKnowledgeItemId: z.string(),
|
||||
importance: z.enum(["primary", "supporting"]),
|
||||
confidence: z.number().min(0).max(1),
|
||||
})),
|
||||
problems: z.array(z.string()),
|
||||
targetUsers: z.array(z.string()),
|
||||
features: z.array(z.string()),
|
||||
constraints: z.array(z.string()),
|
||||
opportunities: z.array(z.string()),
|
||||
uncertainties: z.array(z.string()),
|
||||
missingInformation: z.array(z.string()),
|
||||
overallConfidence: z.number().min(0).max(1),
|
||||
});
|
||||
|
||||
export async function runBackendExtractionForProject(projectId: string): Promise<void> {
|
||||
console.log(`[Backend Extractor] Starting extraction for project ${projectId}`);
|
||||
|
||||
const adminDb = getAdminDb();
|
||||
|
||||
try {
|
||||
// 1. Load project
|
||||
const projectDoc = await adminDb.collection('projects').doc(projectId).get();
|
||||
if (!projectDoc.exists) {
|
||||
throw new Error(`Project ${projectId} not found`);
|
||||
}
|
||||
|
||||
const projectData = projectDoc.data();
|
||||
|
||||
// 2. Load knowledge items
|
||||
const knowledgeSnapshot = await adminDb
|
||||
.collection('knowledge_items')
|
||||
.where('projectId', '==', projectId)
|
||||
.where('sourceType', '==', 'imported_document')
|
||||
.get();
|
||||
|
||||
if (knowledgeSnapshot.empty) {
|
||||
console.log(`[Backend Extractor] No documents to extract for project ${projectId} - creating empty handoff`);
|
||||
|
||||
// Create a minimal extraction handoff even with no documents
|
||||
const emptyHandoff: PhaseHandoff = {
|
||||
phase: 'extraction',
|
||||
readyForNextPhase: false, // Not ready - no materials to extract from
|
||||
confidence: 0,
|
||||
confirmed: {
|
||||
problems: [],
|
||||
targetUsers: [],
|
||||
features: [],
|
||||
constraints: [],
|
||||
opportunities: [],
|
||||
},
|
||||
uncertain: {},
|
||||
missing: ['No documents uploaded - need product requirements, specs, or notes'],
|
||||
questionsForUser: [
|
||||
'You haven\'t uploaded any documents yet. Do you have any product specs, requirements, or notes to share?',
|
||||
],
|
||||
sourceEvidence: [],
|
||||
version: 'extraction_v1',
|
||||
timestamp: new Date().toISOString(),
|
||||
};
|
||||
|
||||
await adminDb.collection('projects').doc(projectId).update({
|
||||
'phaseData.phaseHandoffs.extraction': emptyHandoff,
|
||||
currentPhase: 'extraction_review',
|
||||
phaseStatus: 'in_progress',
|
||||
'phaseData.extractionCompletedAt': new Date().toISOString(),
|
||||
updatedAt: new Date().toISOString(),
|
||||
});
|
||||
|
||||
console.log(`[Backend Extractor] Set phase to extraction_review with empty handoff`);
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`[Backend Extractor] Found ${knowledgeSnapshot.size} documents to process`);
|
||||
|
||||
const llm = new GeminiLlmClient();
|
||||
const allExtractionOutputs: ExtractionOutput[] = [];
|
||||
const processedKnowledgeItemIds: string[] = [];
|
||||
|
||||
// 3. Process each document
|
||||
for (const knowledgeDoc of knowledgeSnapshot.docs) {
|
||||
const knowledgeData = knowledgeDoc.data();
|
||||
const knowledgeItemId = knowledgeDoc.id;
|
||||
|
||||
try {
|
||||
console.log(`[Backend Extractor] Processing document: ${knowledgeData.title || knowledgeItemId}`);
|
||||
|
||||
// Call LLM with structured extraction + thinking mode
|
||||
const extraction = await llm.structuredCall<ExtractionOutput>({
|
||||
model: 'gemini',
|
||||
systemPrompt: BACKEND_EXTRACTOR_SYSTEM_PROMPT,
|
||||
messages: [{
|
||||
role: 'user',
|
||||
content: `Document Title: ${knowledgeData.title || 'Untitled'}\nSource Type: ${knowledgeData.sourceType}\n\nContent:\n${knowledgeData.content}`,
|
||||
}],
|
||||
schema: ExtractionOutputSchema as any,
|
||||
temperature: 1.0, // Gemini 3 default (changed from 0.3)
|
||||
thinking_config: {
|
||||
thinking_level: 'high', // Enable deep reasoning for document analysis
|
||||
include_thoughts: false, // Don't include thought tokens in output (saves cost)
|
||||
},
|
||||
});
|
||||
|
||||
// Add knowledgeItemId to each insight
|
||||
extraction.insights.forEach(insight => {
|
||||
insight.sourceKnowledgeItemId = knowledgeItemId;
|
||||
});
|
||||
|
||||
allExtractionOutputs.push(extraction);
|
||||
processedKnowledgeItemIds.push(knowledgeItemId);
|
||||
|
||||
// 4. Persist extraction to chat_extractions
|
||||
await adminDb.collection('chat_extractions').add({
|
||||
projectId,
|
||||
knowledgeItemId,
|
||||
data: extraction,
|
||||
overallConfidence: extraction.overallConfidence,
|
||||
overallCompletion: extraction.overallConfidence > 0.7 ? 0.9 : 0.6,
|
||||
createdAt: new Date().toISOString(),
|
||||
updatedAt: new Date().toISOString(),
|
||||
});
|
||||
|
||||
console.log(`[Backend Extractor] Extracted ${extraction.insights.length} insights from ${knowledgeData.title || knowledgeItemId}`);
|
||||
|
||||
// 5. Write vector chunks for primary insights
|
||||
const primaryInsights = extraction.insights.filter(i => i.importance === 'primary');
|
||||
for (const insight of primaryInsights) {
|
||||
try {
|
||||
// Create a knowledge chunk for this insight
|
||||
await writeKnowledgeChunksForItem({
|
||||
id: knowledgeItemId,
|
||||
projectId,
|
||||
content: `${insight.title}\n\n${insight.description}\n\nSource: ${insight.sourceText}`,
|
||||
sourceMeta: {
|
||||
sourceType: 'extracted_insight',
|
||||
importance: 'primary',
|
||||
},
|
||||
});
|
||||
} catch (chunkError) {
|
||||
console.error(`[Backend Extractor] Failed to write chunk for insight ${insight.id}:`, chunkError);
|
||||
// Continue processing other insights
|
||||
}
|
||||
}
|
||||
|
||||
} catch (docError) {
|
||||
console.error(`[Backend Extractor] Failed to process document ${knowledgeItemId}:`, docError);
|
||||
// Continue with next document
|
||||
}
|
||||
}
|
||||
|
||||
// 6. Build extraction PhaseHandoff
|
||||
// Flatten all extracted items (they're already strings, not objects)
|
||||
const allProblems = [...new Set(allExtractionOutputs.flatMap(e => e.problems))];
|
||||
const allUsers = [...new Set(allExtractionOutputs.flatMap(e => e.targetUsers))];
|
||||
const allFeatures = [...new Set(allExtractionOutputs.flatMap(e => e.features))];
|
||||
const allConstraints = [...new Set(allExtractionOutputs.flatMap(e => e.constraints))];
|
||||
const allOpportunities = [...new Set(allExtractionOutputs.flatMap(e => e.opportunities))];
|
||||
const allUncertainties = [...new Set(allExtractionOutputs.flatMap(e => e.uncertainties))];
|
||||
const allMissing = [...new Set(allExtractionOutputs.flatMap(e => e.missingInformation))];
|
||||
|
||||
const avgConfidence = allExtractionOutputs.length > 0
|
||||
? allExtractionOutputs.reduce((sum, e) => sum + e.overallConfidence, 0) / allExtractionOutputs.length
|
||||
: 0;
|
||||
|
||||
const readyForNextPhase = allProblems.length > 0 && allFeatures.length > 0 && avgConfidence > 0.5;
|
||||
|
||||
const extractionHandoff: PhaseHandoff = {
|
||||
phase: 'extraction',
|
||||
readyForNextPhase,
|
||||
confidence: avgConfidence,
|
||||
confirmed: {
|
||||
problems: allProblems,
|
||||
targetUsers: allUsers,
|
||||
features: allFeatures,
|
||||
constraints: allConstraints,
|
||||
opportunities: allOpportunities,
|
||||
},
|
||||
uncertain: {},
|
||||
missing: allMissing,
|
||||
questionsForUser: allUncertainties,
|
||||
sourceEvidence: processedKnowledgeItemIds,
|
||||
version: 'extraction_v1',
|
||||
timestamp: new Date().toISOString(),
|
||||
};
|
||||
|
||||
// 7. Persist handoff and update phase
|
||||
await adminDb.collection('projects').doc(projectId).update({
|
||||
'phaseData.phaseHandoffs.extraction': extractionHandoff,
|
||||
currentPhase: 'extraction_review',
|
||||
phaseStatus: 'in_progress',
|
||||
'phaseData.extractionCompletedAt': new Date().toISOString(),
|
||||
updatedAt: new Date().toISOString(),
|
||||
});
|
||||
|
||||
console.log(`[Backend Extractor] ✅ Extraction complete for project ${projectId}`);
|
||||
console.log(`[Backend Extractor] - Problems: ${allProblems.length}`);
|
||||
console.log(`[Backend Extractor] - Users: ${allUsers.length}`);
|
||||
console.log(`[Backend Extractor] - Features: ${allFeatures.length}`);
|
||||
console.log(`[Backend Extractor] - Confidence: ${(avgConfidence * 100).toFixed(1)}%`);
|
||||
console.log(`[Backend Extractor] - Ready for next phase: ${readyForNextPhase}`);
|
||||
|
||||
} catch (error) {
|
||||
console.error(`[Backend Extractor] Fatal error during extraction:`, error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user