162 lines
4.5 KiB
TypeScript
Executable File
162 lines
4.5 KiB
TypeScript
Executable File
#!/usr/bin/env tsx
|
||
/**
|
||
* One-time migration script to process existing knowledge_items into AlloyDB
|
||
*
|
||
* This script:
|
||
* 1. Fetches all knowledge_items from Firestore
|
||
* 2. For each item, chunks and embeds it into AlloyDB
|
||
* 3. Shows progress and handles errors gracefully
|
||
*
|
||
* Usage:
|
||
* npx tsx scripts/migrate-existing-knowledge-to-alloydb.ts [projectId]
|
||
*
|
||
* - If projectId is provided, processes only that project
|
||
* - If omitted, processes ALL projects
|
||
*/
|
||
|
||
import { getAdminDb } from '../lib/firebase/admin';
|
||
import { writeKnowledgeChunksForItem, getChunkCountForKnowledgeItem } from '../lib/server/vector-memory';
|
||
|
||
interface KnowledgeItem {
|
||
id: string;
|
||
projectId: string;
|
||
content: string;
|
||
sourceMeta?: {
|
||
sourceType?: string;
|
||
importance?: 'primary' | 'supporting' | 'irrelevant';
|
||
};
|
||
}
|
||
|
||
async function getAllKnowledgeItems(projectId?: string): Promise<KnowledgeItem[]> {
|
||
const adminDb = getAdminDb();
|
||
const items: KnowledgeItem[] = [];
|
||
|
||
if (projectId) {
|
||
// Single project
|
||
console.log(`[Migration] Fetching knowledge items for project ${projectId}...`);
|
||
const snapshot = await adminDb
|
||
.collection('knowledge_items')
|
||
.where('projectId', '==', projectId)
|
||
.get();
|
||
|
||
snapshot.forEach((doc) => {
|
||
const data = doc.data();
|
||
items.push({
|
||
id: doc.id,
|
||
projectId: data.projectId,
|
||
content: data.content || '',
|
||
sourceMeta: data.sourceMeta,
|
||
});
|
||
});
|
||
} else {
|
||
// All projects
|
||
console.log(`[Migration] Fetching ALL knowledge items...`);
|
||
const snapshot = await adminDb.collection('knowledge_items').get();
|
||
|
||
snapshot.forEach((doc) => {
|
||
const data = doc.data();
|
||
items.push({
|
||
id: doc.id,
|
||
projectId: data.projectId,
|
||
content: data.content || '',
|
||
sourceMeta: data.sourceMeta,
|
||
});
|
||
});
|
||
}
|
||
|
||
return items;
|
||
}
|
||
|
||
async function migrateKnowledgeItems(projectId?: string) {
|
||
console.log('');
|
||
console.log('='.repeat(60));
|
||
console.log('🚀 AlloyDB Knowledge Migration');
|
||
console.log('='.repeat(60));
|
||
console.log('');
|
||
|
||
try {
|
||
// Fetch all items
|
||
const items = await getAllKnowledgeItems(projectId);
|
||
console.log(`✅ Found ${items.length} knowledge items to process`);
|
||
console.log('');
|
||
|
||
if (items.length === 0) {
|
||
console.log('ℹ️ No knowledge items found. Nothing to migrate.');
|
||
return;
|
||
}
|
||
|
||
let successCount = 0;
|
||
let skipCount = 0;
|
||
let errorCount = 0;
|
||
|
||
// Process each item
|
||
for (let i = 0; i < items.length; i++) {
|
||
const item = items[i];
|
||
const progress = `[${i + 1}/${items.length}]`;
|
||
|
||
try {
|
||
// Check if already processed (skip if chunks exist)
|
||
const existingChunks = await getChunkCountForKnowledgeItem(item.id);
|
||
if (existingChunks > 0) {
|
||
console.log(`${progress} ⏭️ Skipping ${item.id} (already has ${existingChunks} chunks)`);
|
||
skipCount++;
|
||
continue;
|
||
}
|
||
|
||
console.log(`${progress} 🔄 Processing ${item.id}...`);
|
||
|
||
// Chunk and embed
|
||
await writeKnowledgeChunksForItem(item);
|
||
|
||
const newChunks = await getChunkCountForKnowledgeItem(item.id);
|
||
console.log(`${progress} ✅ Success! Created ${newChunks} chunks`);
|
||
successCount++;
|
||
|
||
// Small delay to avoid rate limiting
|
||
if (i < items.length - 1) {
|
||
await new Promise((resolve) => setTimeout(resolve, 100));
|
||
}
|
||
} catch (error) {
|
||
console.error(`${progress} ❌ Failed to process ${item.id}:`, error);
|
||
errorCount++;
|
||
}
|
||
}
|
||
|
||
// Summary
|
||
console.log('');
|
||
console.log('='.repeat(60));
|
||
console.log('📊 Migration Complete');
|
||
console.log('='.repeat(60));
|
||
console.log(`✅ Processed: ${successCount}`);
|
||
console.log(`⏭️ Skipped (already exists): ${skipCount}`);
|
||
console.log(`❌ Errors: ${errorCount}`);
|
||
console.log(`📦 Total: ${items.length}`);
|
||
console.log('');
|
||
|
||
if (errorCount > 0) {
|
||
console.log('⚠️ Some items failed. Check logs above for details.');
|
||
process.exit(1);
|
||
} else {
|
||
console.log('🎉 All knowledge items successfully migrated to AlloyDB!');
|
||
process.exit(0);
|
||
}
|
||
} catch (error) {
|
||
console.error('');
|
||
console.error('❌ Migration failed:', error);
|
||
process.exit(1);
|
||
}
|
||
}
|
||
|
||
// Parse command line arguments
|
||
const projectId = process.argv[2];
|
||
|
||
if (projectId) {
|
||
console.log(`ℹ️ Processing single project: ${projectId}`);
|
||
} else {
|
||
console.log('ℹ️ No projectId provided - processing ALL projects');
|
||
}
|
||
|
||
// Run migration
|
||
migrateKnowledgeItems(projectId);
|
||
|