Files
vibn-frontend/app/api/projects/[projectId]/knowledge/batch-extract/route.ts

197 lines
6.7 KiB
TypeScript

import { NextResponse } from 'next/server';
import { getAdminDb } from '@/lib/firebase/admin';
import { runChatExtraction } from '@/lib/ai/chat-extractor';
import { GeminiLlmClient } from '@/lib/ai/gemini-client';
import { createChatExtraction } from '@/lib/server/chat-extraction';
import { FieldValue } from 'firebase-admin/firestore';
import type { ProjectPhaseScores } from '@/lib/types/project-artifacts';
import type { KnowledgeItem } from '@/lib/types/knowledge';
export const maxDuration = 300; // 5 minutes for batch processing
interface BatchExtractionResult {
knowledgeItemId: string;
success: boolean;
error?: string;
}
export async function POST(
request: Request,
context: { params?: Promise<{ projectId?: string }> | { projectId?: string } } = {},
) {
try {
// Await params if it's a Promise (Next.js 15+)
const params = context.params instanceof Promise ? await context.params : context.params;
const url = new URL(request.url);
const pathSegments = url.pathname.split('/');
const projectsIndex = pathSegments.indexOf('projects');
const projectIdFromPath =
projectsIndex !== -1 ? pathSegments[projectsIndex + 1] : undefined;
const projectId =
(params?.projectId ?? projectIdFromPath ?? url.searchParams.get('projectId') ?? '').trim();
if (!projectId) {
return NextResponse.json({ error: 'Missing projectId' }, { status: 400 });
}
const adminDb = getAdminDb();
// Get all knowledge_items for this project
const knowledgeSnapshot = await adminDb
.collection('projects')
.doc(projectId)
.collection('knowledge_items')
.get();
if (knowledgeSnapshot.empty) {
return NextResponse.json({
message: 'No knowledge items to extract',
results: []
});
}
const knowledgeItems = knowledgeSnapshot.docs.map(doc => ({
id: doc.id,
...doc.data()
})) as KnowledgeItem[];
// Get existing extractions to avoid re-processing
const extractionsSnapshot = await adminDb
.collection('projects')
.doc(projectId)
.collection('chat_extractions')
.get();
const processedKnowledgeIds = new Set(
extractionsSnapshot.docs.map(doc => doc.data().knowledgeItemId)
);
// Filter to only unprocessed items
const itemsToProcess = knowledgeItems.filter(
item => !processedKnowledgeIds.has(item.id)
);
if (itemsToProcess.length === 0) {
return NextResponse.json({
message: 'All knowledge items already extracted',
results: []
});
}
console.log(`[batch-extract] Processing ${itemsToProcess.length} knowledge items for project ${projectId}`);
const llm = new GeminiLlmClient();
const results: BatchExtractionResult[] = [];
let successCount = 0;
let lastSuccessfulExtraction = null;
// Process each item
for (const knowledgeItem of itemsToProcess) {
try {
console.log(`[batch-extract] Extracting from knowledgeItemId=${knowledgeItem.id}`);
const extractionData = await runChatExtraction(knowledgeItem, llm);
const overallCompletion = extractionData.summary_scores.overall_completion ?? 0;
const overallConfidence = extractionData.summary_scores.overall_confidence ?? 0;
const extraction = await createChatExtraction({
projectId,
knowledgeItemId: knowledgeItem.id,
data: extractionData,
overallCompletion,
overallConfidence,
});
lastSuccessfulExtraction = extraction;
successCount++;
results.push({
knowledgeItemId: knowledgeItem.id,
success: true
});
console.log(`[batch-extract] Successfully extracted from knowledgeItemId=${knowledgeItem.id}`);
// Also chunk and embed this item (fire-and-forget)
(async () => {
try {
const { writeKnowledgeChunksForItem } = await import('@/lib/server/vector-memory');
await writeKnowledgeChunksForItem({
id: knowledgeItem.id,
projectId: knowledgeItem.projectId,
content: knowledgeItem.content,
sourceMeta: knowledgeItem.sourceMeta,
});
} catch (chunkError) {
console.error(`[batch-extract] Failed to chunk item ${knowledgeItem.id}:`, chunkError);
}
})();
} catch (error) {
console.error(`[batch-extract] Failed to extract from knowledgeItemId=${knowledgeItem.id}:`, error);
results.push({
knowledgeItemId: knowledgeItem.id,
success: false,
error: error instanceof Error ? error.message : String(error)
});
}
}
// Update project phase if we had any successful extractions
if (successCount > 0 && lastSuccessfulExtraction) {
const projectRef = adminDb.collection('projects').doc(projectId);
const snapshot = await projectRef.get();
const docData = snapshot.data() ?? {};
const existingScores = (docData.phaseScores ?? {}) as ProjectPhaseScores;
const phaseHistory = Array.isArray(docData.phaseHistory) ? [...docData.phaseHistory] : [];
phaseHistory.push({
phase: 'extractor',
status: 'completed',
knowledgeItemId: 'batch_extraction',
timestamp: new Date().toISOString(),
});
// Use the last extraction's scores as representative
const lastData = lastSuccessfulExtraction.data as { summary_scores?: { overall_completion?: number; overall_confidence?: number } };
existingScores.extractor = {
knowledgeItemId: 'batch_extraction',
overallCompletion: lastData.summary_scores?.overall_completion ?? 0,
overallConfidence: lastData.summary_scores?.overall_confidence ?? 0,
updatedAt: new Date().toISOString(),
};
await projectRef.set(
{
currentPhase: 'analyzed',
phaseScores: existingScores,
phaseStatus: 'in_progress',
phaseHistory,
updatedAt: FieldValue.serverTimestamp(),
},
{ merge: true },
);
console.log(`[batch-extract] Updated project phase to 'analyzed' for project ${projectId}`);
}
return NextResponse.json({
message: `Processed ${itemsToProcess.length} items: ${successCount} succeeded, ${results.filter(r => !r.success).length} failed`,
results,
successCount,
totalProcessed: itemsToProcess.length
});
} catch (error) {
console.error('[batch-extract] Batch extraction failed:', error);
return NextResponse.json(
{
error: 'Failed to batch extract knowledge items',
details: error instanceof Error ? error.message : String(error),
},
{ status: 500 },
);
}
}