import { NextResponse } from 'next/server'; import { getAdminDb } from '@/lib/firebase/admin'; import { runChatExtraction } from '@/lib/ai/chat-extractor'; import { GeminiLlmClient } from '@/lib/ai/gemini-client'; import { createChatExtraction } from '@/lib/server/chat-extraction'; import { FieldValue } from 'firebase-admin/firestore'; import type { ProjectPhaseScores } from '@/lib/types/project-artifacts'; import type { KnowledgeItem } from '@/lib/types/knowledge'; export const maxDuration = 300; // 5 minutes for batch processing interface BatchExtractionResult { knowledgeItemId: string; success: boolean; error?: string; } export async function POST( request: Request, context: { params?: Promise<{ projectId?: string }> | { projectId?: string } } = {}, ) { try { // Await params if it's a Promise (Next.js 15+) const params = context.params instanceof Promise ? await context.params : context.params; const url = new URL(request.url); const pathSegments = url.pathname.split('/'); const projectsIndex = pathSegments.indexOf('projects'); const projectIdFromPath = projectsIndex !== -1 ? pathSegments[projectsIndex + 1] : undefined; const projectId = (params?.projectId ?? projectIdFromPath ?? url.searchParams.get('projectId') ?? '').trim(); if (!projectId) { return NextResponse.json({ error: 'Missing projectId' }, { status: 400 }); } const adminDb = getAdminDb(); // Get all knowledge_items for this project const knowledgeSnapshot = await adminDb .collection('projects') .doc(projectId) .collection('knowledge_items') .get(); if (knowledgeSnapshot.empty) { return NextResponse.json({ message: 'No knowledge items to extract', results: [] }); } const knowledgeItems = knowledgeSnapshot.docs.map(doc => ({ id: doc.id, ...doc.data() })) as KnowledgeItem[]; // Get existing extractions to avoid re-processing const extractionsSnapshot = await adminDb .collection('projects') .doc(projectId) .collection('chat_extractions') .get(); const processedKnowledgeIds = new Set( extractionsSnapshot.docs.map(doc => doc.data().knowledgeItemId) ); // Filter to only unprocessed items const itemsToProcess = knowledgeItems.filter( item => !processedKnowledgeIds.has(item.id) ); if (itemsToProcess.length === 0) { return NextResponse.json({ message: 'All knowledge items already extracted', results: [] }); } console.log(`[batch-extract] Processing ${itemsToProcess.length} knowledge items for project ${projectId}`); const llm = new GeminiLlmClient(); const results: BatchExtractionResult[] = []; let successCount = 0; let lastSuccessfulExtraction = null; // Process each item for (const knowledgeItem of itemsToProcess) { try { console.log(`[batch-extract] Extracting from knowledgeItemId=${knowledgeItem.id}`); const extractionData = await runChatExtraction(knowledgeItem, llm); const overallCompletion = extractionData.summary_scores.overall_completion ?? 0; const overallConfidence = extractionData.summary_scores.overall_confidence ?? 0; const extraction = await createChatExtraction({ projectId, knowledgeItemId: knowledgeItem.id, data: extractionData, overallCompletion, overallConfidence, }); lastSuccessfulExtraction = extraction; successCount++; results.push({ knowledgeItemId: knowledgeItem.id, success: true }); console.log(`[batch-extract] Successfully extracted from knowledgeItemId=${knowledgeItem.id}`); // Also chunk and embed this item (fire-and-forget) (async () => { try { const { writeKnowledgeChunksForItem } = await import('@/lib/server/vector-memory'); await writeKnowledgeChunksForItem({ id: knowledgeItem.id, projectId: knowledgeItem.projectId, content: knowledgeItem.content, sourceMeta: knowledgeItem.sourceMeta, }); } catch (chunkError) { console.error(`[batch-extract] Failed to chunk item ${knowledgeItem.id}:`, chunkError); } })(); } catch (error) { console.error(`[batch-extract] Failed to extract from knowledgeItemId=${knowledgeItem.id}:`, error); results.push({ knowledgeItemId: knowledgeItem.id, success: false, error: error instanceof Error ? error.message : String(error) }); } } // Update project phase if we had any successful extractions if (successCount > 0 && lastSuccessfulExtraction) { const projectRef = adminDb.collection('projects').doc(projectId); const snapshot = await projectRef.get(); const docData = snapshot.data() ?? {}; const existingScores = (docData.phaseScores ?? {}) as ProjectPhaseScores; const phaseHistory = Array.isArray(docData.phaseHistory) ? [...docData.phaseHistory] : []; phaseHistory.push({ phase: 'extractor', status: 'completed', knowledgeItemId: 'batch_extraction', timestamp: new Date().toISOString(), }); // Use the last extraction's scores as representative const lastData = lastSuccessfulExtraction.data as { summary_scores?: { overall_completion?: number; overall_confidence?: number } }; existingScores.extractor = { knowledgeItemId: 'batch_extraction', overallCompletion: lastData.summary_scores?.overall_completion ?? 0, overallConfidence: lastData.summary_scores?.overall_confidence ?? 0, updatedAt: new Date().toISOString(), }; await projectRef.set( { currentPhase: 'analyzed', phaseScores: existingScores, phaseStatus: 'in_progress', phaseHistory, updatedAt: FieldValue.serverTimestamp(), }, { merge: true }, ); console.log(`[batch-extract] Updated project phase to 'analyzed' for project ${projectId}`); } return NextResponse.json({ message: `Processed ${itemsToProcess.length} items: ${successCount} succeeded, ${results.filter(r => !r.success).length} failed`, results, successCount, totalProcessed: itemsToProcess.length }); } catch (error) { console.error('[batch-extract] Batch extraction failed:', error); return NextResponse.json( { error: 'Failed to batch extract knowledge items', details: error instanceof Error ? error.message : String(error), }, { status: 500 }, ); } }