197 lines
6.7 KiB
TypeScript
197 lines
6.7 KiB
TypeScript
import { NextResponse } from 'next/server';
|
|
import { getAdminDb } from '@/lib/firebase/admin';
|
|
import { runChatExtraction } from '@/lib/ai/chat-extractor';
|
|
import { GeminiLlmClient } from '@/lib/ai/gemini-client';
|
|
import { createChatExtraction } from '@/lib/server/chat-extraction';
|
|
import { FieldValue } from 'firebase-admin/firestore';
|
|
import type { ProjectPhaseScores } from '@/lib/types/project-artifacts';
|
|
import type { KnowledgeItem } from '@/lib/types/knowledge';
|
|
|
|
export const maxDuration = 300; // 5 minutes for batch processing
|
|
|
|
interface BatchExtractionResult {
|
|
knowledgeItemId: string;
|
|
success: boolean;
|
|
error?: string;
|
|
}
|
|
|
|
export async function POST(
|
|
request: Request,
|
|
context: { params?: Promise<{ projectId?: string }> | { projectId?: string } } = {},
|
|
) {
|
|
try {
|
|
// Await params if it's a Promise (Next.js 15+)
|
|
const params = context.params instanceof Promise ? await context.params : context.params;
|
|
|
|
const url = new URL(request.url);
|
|
const pathSegments = url.pathname.split('/');
|
|
const projectsIndex = pathSegments.indexOf('projects');
|
|
const projectIdFromPath =
|
|
projectsIndex !== -1 ? pathSegments[projectsIndex + 1] : undefined;
|
|
|
|
const projectId =
|
|
(params?.projectId ?? projectIdFromPath ?? url.searchParams.get('projectId') ?? '').trim();
|
|
|
|
if (!projectId) {
|
|
return NextResponse.json({ error: 'Missing projectId' }, { status: 400 });
|
|
}
|
|
|
|
const adminDb = getAdminDb();
|
|
|
|
// Get all knowledge_items for this project
|
|
const knowledgeSnapshot = await adminDb
|
|
.collection('projects')
|
|
.doc(projectId)
|
|
.collection('knowledge_items')
|
|
.get();
|
|
|
|
if (knowledgeSnapshot.empty) {
|
|
return NextResponse.json({
|
|
message: 'No knowledge items to extract',
|
|
results: []
|
|
});
|
|
}
|
|
|
|
const knowledgeItems = knowledgeSnapshot.docs.map(doc => ({
|
|
id: doc.id,
|
|
...doc.data()
|
|
})) as KnowledgeItem[];
|
|
|
|
// Get existing extractions to avoid re-processing
|
|
const extractionsSnapshot = await adminDb
|
|
.collection('projects')
|
|
.doc(projectId)
|
|
.collection('chat_extractions')
|
|
.get();
|
|
|
|
const processedKnowledgeIds = new Set(
|
|
extractionsSnapshot.docs.map(doc => doc.data().knowledgeItemId)
|
|
);
|
|
|
|
// Filter to only unprocessed items
|
|
const itemsToProcess = knowledgeItems.filter(
|
|
item => !processedKnowledgeIds.has(item.id)
|
|
);
|
|
|
|
if (itemsToProcess.length === 0) {
|
|
return NextResponse.json({
|
|
message: 'All knowledge items already extracted',
|
|
results: []
|
|
});
|
|
}
|
|
|
|
console.log(`[batch-extract] Processing ${itemsToProcess.length} knowledge items for project ${projectId}`);
|
|
|
|
const llm = new GeminiLlmClient();
|
|
const results: BatchExtractionResult[] = [];
|
|
let successCount = 0;
|
|
let lastSuccessfulExtraction = null;
|
|
|
|
// Process each item
|
|
for (const knowledgeItem of itemsToProcess) {
|
|
try {
|
|
console.log(`[batch-extract] Extracting from knowledgeItemId=${knowledgeItem.id}`);
|
|
|
|
const extractionData = await runChatExtraction(knowledgeItem, llm);
|
|
const overallCompletion = extractionData.summary_scores.overall_completion ?? 0;
|
|
const overallConfidence = extractionData.summary_scores.overall_confidence ?? 0;
|
|
|
|
const extraction = await createChatExtraction({
|
|
projectId,
|
|
knowledgeItemId: knowledgeItem.id,
|
|
data: extractionData,
|
|
overallCompletion,
|
|
overallConfidence,
|
|
});
|
|
|
|
lastSuccessfulExtraction = extraction;
|
|
successCount++;
|
|
|
|
results.push({
|
|
knowledgeItemId: knowledgeItem.id,
|
|
success: true
|
|
});
|
|
|
|
console.log(`[batch-extract] Successfully extracted from knowledgeItemId=${knowledgeItem.id}`);
|
|
|
|
// Also chunk and embed this item (fire-and-forget)
|
|
(async () => {
|
|
try {
|
|
const { writeKnowledgeChunksForItem } = await import('@/lib/server/vector-memory');
|
|
await writeKnowledgeChunksForItem({
|
|
id: knowledgeItem.id,
|
|
projectId: knowledgeItem.projectId,
|
|
content: knowledgeItem.content,
|
|
sourceMeta: knowledgeItem.sourceMeta,
|
|
});
|
|
} catch (chunkError) {
|
|
console.error(`[batch-extract] Failed to chunk item ${knowledgeItem.id}:`, chunkError);
|
|
}
|
|
})();
|
|
} catch (error) {
|
|
console.error(`[batch-extract] Failed to extract from knowledgeItemId=${knowledgeItem.id}:`, error);
|
|
results.push({
|
|
knowledgeItemId: knowledgeItem.id,
|
|
success: false,
|
|
error: error instanceof Error ? error.message : String(error)
|
|
});
|
|
}
|
|
}
|
|
|
|
// Update project phase if we had any successful extractions
|
|
if (successCount > 0 && lastSuccessfulExtraction) {
|
|
const projectRef = adminDb.collection('projects').doc(projectId);
|
|
const snapshot = await projectRef.get();
|
|
const docData = snapshot.data() ?? {};
|
|
const existingScores = (docData.phaseScores ?? {}) as ProjectPhaseScores;
|
|
const phaseHistory = Array.isArray(docData.phaseHistory) ? [...docData.phaseHistory] : [];
|
|
|
|
phaseHistory.push({
|
|
phase: 'extractor',
|
|
status: 'completed',
|
|
knowledgeItemId: 'batch_extraction',
|
|
timestamp: new Date().toISOString(),
|
|
});
|
|
|
|
// Use the last extraction's scores as representative
|
|
const lastData = lastSuccessfulExtraction.data as { summary_scores?: { overall_completion?: number; overall_confidence?: number } };
|
|
existingScores.extractor = {
|
|
knowledgeItemId: 'batch_extraction',
|
|
overallCompletion: lastData.summary_scores?.overall_completion ?? 0,
|
|
overallConfidence: lastData.summary_scores?.overall_confidence ?? 0,
|
|
updatedAt: new Date().toISOString(),
|
|
};
|
|
|
|
await projectRef.set(
|
|
{
|
|
currentPhase: 'analyzed',
|
|
phaseScores: existingScores,
|
|
phaseStatus: 'in_progress',
|
|
phaseHistory,
|
|
updatedAt: FieldValue.serverTimestamp(),
|
|
},
|
|
{ merge: true },
|
|
);
|
|
|
|
console.log(`[batch-extract] Updated project phase to 'analyzed' for project ${projectId}`);
|
|
}
|
|
|
|
return NextResponse.json({
|
|
message: `Processed ${itemsToProcess.length} items: ${successCount} succeeded, ${results.filter(r => !r.success).length} failed`,
|
|
results,
|
|
successCount,
|
|
totalProcessed: itemsToProcess.length
|
|
});
|
|
} catch (error) {
|
|
console.error('[batch-extract] Batch extraction failed:', error);
|
|
return NextResponse.json(
|
|
{
|
|
error: 'Failed to batch extract knowledge items',
|
|
details: error instanceof Error ? error.message : String(error),
|
|
},
|
|
{ status: 500 },
|
|
);
|
|
}
|
|
}
|
|
|