VIBN Frontend for Coolify deployment
This commit is contained in:
35
lib/utils/api-url.ts
Normal file
35
lib/utils/api-url.ts
Normal file
@@ -0,0 +1,35 @@
|
||||
/**
|
||||
* Get the base URL for internal API calls
|
||||
* Works in both development and production environments
|
||||
*/
|
||||
export function getBaseUrl(request?: Request): string {
|
||||
// In production (Firebase/Vercel), use the request origin
|
||||
if (request && typeof window === 'undefined') {
|
||||
const origin = request.headers.get('origin') || request.headers.get('referer');
|
||||
if (origin) {
|
||||
return new URL(origin).origin;
|
||||
}
|
||||
}
|
||||
|
||||
// Check environment variables
|
||||
if (process.env.NEXT_PUBLIC_APP_URL) {
|
||||
return process.env.NEXT_PUBLIC_APP_URL;
|
||||
}
|
||||
|
||||
if (process.env.VERCEL_URL) {
|
||||
return `https://${process.env.VERCEL_URL}`;
|
||||
}
|
||||
|
||||
// Default to localhost for development
|
||||
return 'http://localhost:3000';
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the full API URL for internal API calls
|
||||
*/
|
||||
export function getApiUrl(path: string, request?: Request): string {
|
||||
const baseUrl = getBaseUrl(request);
|
||||
const cleanPath = path.startsWith('/') ? path : `/${path}`;
|
||||
return `${baseUrl}${cleanPath}`;
|
||||
}
|
||||
|
||||
223
lib/utils/code-chunker.ts
Normal file
223
lib/utils/code-chunker.ts
Normal file
@@ -0,0 +1,223 @@
|
||||
/**
|
||||
* Code-specific chunking for source code files
|
||||
* Intelligently splits code while preserving context
|
||||
*/
|
||||
|
||||
export interface CodeChunk {
|
||||
content: string;
|
||||
metadata: {
|
||||
chunkIndex: number;
|
||||
totalChunks: number;
|
||||
startLine: number;
|
||||
endLine: number;
|
||||
tokenCount: number;
|
||||
filePath: string;
|
||||
language?: string;
|
||||
};
|
||||
}
|
||||
|
||||
export interface CodeChunkOptions {
|
||||
maxChunkSize?: number; // characters
|
||||
chunkOverlap?: number; // lines
|
||||
preserveFunctions?: boolean;
|
||||
preserveClasses?: boolean;
|
||||
filePath: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Estimate token count (rough approximation: 1 token ≈ 4 characters)
|
||||
*/
|
||||
function estimateTokens(text: string): number {
|
||||
return Math.ceil(text.length / 4);
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect language from file path
|
||||
*/
|
||||
function detectLanguage(filePath: string): string | undefined {
|
||||
const ext = filePath.split('.').pop()?.toLowerCase();
|
||||
const langMap: Record<string, string> = {
|
||||
ts: 'typescript',
|
||||
tsx: 'typescript',
|
||||
js: 'javascript',
|
||||
jsx: 'javascript',
|
||||
py: 'python',
|
||||
java: 'java',
|
||||
go: 'go',
|
||||
rs: 'rust',
|
||||
cpp: 'cpp',
|
||||
c: 'c',
|
||||
cs: 'csharp',
|
||||
rb: 'ruby',
|
||||
php: 'php',
|
||||
swift: 'swift',
|
||||
kt: 'kotlin',
|
||||
sql: 'sql',
|
||||
css: 'css',
|
||||
scss: 'scss',
|
||||
html: 'html',
|
||||
json: 'json',
|
||||
yaml: 'yaml',
|
||||
yml: 'yaml',
|
||||
md: 'markdown',
|
||||
};
|
||||
return langMap[ext || ''];
|
||||
}
|
||||
|
||||
/**
|
||||
* Chunk source code file intelligently
|
||||
*/
|
||||
export function chunkCode(
|
||||
content: string,
|
||||
options: CodeChunkOptions
|
||||
): CodeChunk[] {
|
||||
const {
|
||||
maxChunkSize = 3000, // Larger chunks for code context
|
||||
chunkOverlap = 5,
|
||||
preserveFunctions = true,
|
||||
preserveClasses = true,
|
||||
filePath,
|
||||
} = options;
|
||||
|
||||
const language = detectLanguage(filePath);
|
||||
const lines = content.split('\n');
|
||||
|
||||
// For small files, return as single chunk
|
||||
if (content.length <= maxChunkSize) {
|
||||
return [
|
||||
{
|
||||
content,
|
||||
metadata: {
|
||||
chunkIndex: 0,
|
||||
totalChunks: 1,
|
||||
startLine: 1,
|
||||
endLine: lines.length,
|
||||
tokenCount: estimateTokens(content),
|
||||
filePath,
|
||||
language,
|
||||
},
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
// For larger files, split by logical boundaries
|
||||
const chunks: CodeChunk[] = [];
|
||||
let currentChunk: string[] = [];
|
||||
let currentSize = 0;
|
||||
let chunkStartLine = 1;
|
||||
|
||||
// Patterns for detecting logical boundaries
|
||||
const functionPattern = /^\s*(function|def|fn|func|fun|public|private|protected|static|async|export)\s/;
|
||||
const classPattern = /^\s*(class|interface|struct|enum|type)\s/;
|
||||
const importPattern = /^\s*(import|from|require|using|include)\s/;
|
||||
const commentPattern = /^\s*(\/\/|\/\*|\*|#|--|<!--)/;
|
||||
|
||||
// Always include file header (imports, comments at top)
|
||||
let headerLines: string[] = [];
|
||||
for (let i = 0; i < Math.min(20, lines.length); i++) {
|
||||
const line = lines[i];
|
||||
if (importPattern.test(line) || commentPattern.test(line) || line.trim() === '') {
|
||||
headerLines.push(line);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (let i = 0; i < lines.length; i++) {
|
||||
const line = lines[i];
|
||||
const lineSize = line.length + 1; // +1 for newline
|
||||
|
||||
// Check if we should start a new chunk
|
||||
const shouldSplit =
|
||||
currentSize + lineSize > maxChunkSize &&
|
||||
currentChunk.length > 0 &&
|
||||
(functionPattern.test(line) ||
|
||||
classPattern.test(line) ||
|
||||
(line.trim() === '' && currentSize > maxChunkSize * 0.7));
|
||||
|
||||
if (shouldSplit) {
|
||||
// Save current chunk
|
||||
const chunkContent = currentChunk.join('\n');
|
||||
chunks.push({
|
||||
content: chunkContent,
|
||||
metadata: {
|
||||
chunkIndex: chunks.length,
|
||||
totalChunks: 0, // Will update at end
|
||||
startLine: chunkStartLine,
|
||||
endLine: chunkStartLine + currentChunk.length - 1,
|
||||
tokenCount: estimateTokens(chunkContent),
|
||||
filePath,
|
||||
language,
|
||||
},
|
||||
});
|
||||
|
||||
// Start new chunk with overlap and header
|
||||
const overlapStart = Math.max(0, currentChunk.length - chunkOverlap);
|
||||
currentChunk = [
|
||||
...headerLines,
|
||||
'',
|
||||
`// ... continued from line ${chunkStartLine}`,
|
||||
'',
|
||||
...currentChunk.slice(overlapStart),
|
||||
];
|
||||
currentSize = currentChunk.reduce((sum, l) => sum + l.length + 1, 0);
|
||||
chunkStartLine = chunkStartLine + overlapStart;
|
||||
}
|
||||
|
||||
currentChunk.push(line);
|
||||
currentSize += lineSize;
|
||||
}
|
||||
|
||||
// Add final chunk
|
||||
if (currentChunk.length > 0) {
|
||||
const chunkContent = currentChunk.join('\n');
|
||||
chunks.push({
|
||||
content: chunkContent,
|
||||
metadata: {
|
||||
chunkIndex: chunks.length,
|
||||
totalChunks: 0,
|
||||
startLine: chunkStartLine,
|
||||
endLine: lines.length,
|
||||
tokenCount: estimateTokens(chunkContent),
|
||||
filePath,
|
||||
language,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
// Update totalChunks for all chunks
|
||||
chunks.forEach((chunk) => {
|
||||
chunk.metadata.totalChunks = chunks.length;
|
||||
});
|
||||
|
||||
return chunks;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a summary header for a code file
|
||||
*/
|
||||
export function generateCodeSummary(
|
||||
filePath: string,
|
||||
content: string,
|
||||
language?: string
|
||||
): string {
|
||||
const lines = content.split('\n');
|
||||
const functions = lines.filter(line => /^\s*(function|def|fn|func|async function|export function)/.test(line));
|
||||
const classes = lines.filter(line => /^\s*(class|interface|struct|enum|type)\s/.test(line));
|
||||
|
||||
let summary = `File: ${filePath}\n`;
|
||||
if (language) {
|
||||
summary += `Language: ${language}\n`;
|
||||
}
|
||||
summary += `Lines: ${lines.length}\n`;
|
||||
|
||||
if (functions.length > 0) {
|
||||
summary += `Functions: ${functions.length}\n`;
|
||||
}
|
||||
if (classes.length > 0) {
|
||||
summary += `Classes/Types: ${classes.length}\n`;
|
||||
}
|
||||
|
||||
return summary;
|
||||
}
|
||||
|
||||
206
lib/utils/document-chunker.ts
Normal file
206
lib/utils/document-chunker.ts
Normal file
@@ -0,0 +1,206 @@
|
||||
/**
|
||||
* Document Chunking Utility
|
||||
*
|
||||
* Splits large documents into manageable chunks for AI processing.
|
||||
* Uses semantic chunking with configurable overlap for better context.
|
||||
*/
|
||||
|
||||
export interface ChunkMetadata {
|
||||
chunkIndex: number;
|
||||
totalChunks: number;
|
||||
startChar: number;
|
||||
endChar: number;
|
||||
tokenCount: number;
|
||||
}
|
||||
|
||||
export interface DocumentChunk {
|
||||
content: string;
|
||||
metadata: ChunkMetadata;
|
||||
}
|
||||
|
||||
export interface ChunkingOptions {
|
||||
maxChunkSize?: number; // Maximum characters per chunk (default: 2000)
|
||||
chunkOverlap?: number; // Overlap between chunks (default: 200)
|
||||
preserveParagraphs?: boolean; // Try to keep paragraphs intact (default: true)
|
||||
preserveCodeBlocks?: boolean; // Keep code blocks together (default: true)
|
||||
}
|
||||
|
||||
const DEFAULT_OPTIONS: Required<ChunkingOptions> = {
|
||||
maxChunkSize: 2000,
|
||||
chunkOverlap: 200,
|
||||
preserveParagraphs: true,
|
||||
preserveCodeBlocks: true,
|
||||
};
|
||||
|
||||
/**
|
||||
* Estimate token count (rough approximation: 1 token ≈ 4 characters)
|
||||
*/
|
||||
function estimateTokens(text: string): number {
|
||||
return Math.ceil(text.length / 4);
|
||||
}
|
||||
|
||||
/**
|
||||
* Find good split points (paragraph breaks, sentence boundaries)
|
||||
*/
|
||||
function findSplitPoint(text: string, idealSplit: number): number {
|
||||
// Try to split at paragraph break first
|
||||
const paragraphBreak = text.lastIndexOf('\n\n', idealSplit);
|
||||
if (paragraphBreak > idealSplit - 500 && paragraphBreak > 0) {
|
||||
return paragraphBreak + 2;
|
||||
}
|
||||
|
||||
// Try sentence boundary
|
||||
const sentenceEnd = text.lastIndexOf('. ', idealSplit);
|
||||
if (sentenceEnd > idealSplit - 300 && sentenceEnd > 0) {
|
||||
return sentenceEnd + 2;
|
||||
}
|
||||
|
||||
// Try any newline
|
||||
const newline = text.lastIndexOf('\n', idealSplit);
|
||||
if (newline > idealSplit - 200 && newline > 0) {
|
||||
return newline + 1;
|
||||
}
|
||||
|
||||
// Last resort: split at space
|
||||
const space = text.lastIndexOf(' ', idealSplit);
|
||||
return space > 0 ? space + 1 : idealSplit;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract code blocks to preserve them
|
||||
*/
|
||||
function extractCodeBlocks(text: string): { text: string; codeBlocks: Map<string, string> } {
|
||||
const codeBlocks = new Map<string, string>();
|
||||
let counter = 0;
|
||||
|
||||
const processedText = text.replace(/```[\s\S]*?```/g, (match) => {
|
||||
const placeholder = `__CODE_BLOCK_${counter}__`;
|
||||
codeBlocks.set(placeholder, match);
|
||||
counter++;
|
||||
return placeholder;
|
||||
});
|
||||
|
||||
return { text: processedText, codeBlocks };
|
||||
}
|
||||
|
||||
/**
|
||||
* Restore code blocks
|
||||
*/
|
||||
function restoreCodeBlocks(text: string, codeBlocks: Map<string, string>): string {
|
||||
let result = text;
|
||||
codeBlocks.forEach((code, placeholder) => {
|
||||
result = result.replace(placeholder, code);
|
||||
});
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Split a document into semantic chunks
|
||||
*/
|
||||
export function chunkDocument(content: string, options: ChunkingOptions = {}): DocumentChunk[] {
|
||||
const opts = { ...DEFAULT_OPTIONS, ...options };
|
||||
const chunks: DocumentChunk[] = [];
|
||||
|
||||
// Handle empty content
|
||||
if (!content || content.trim().length === 0) {
|
||||
return chunks;
|
||||
}
|
||||
|
||||
// Extract code blocks if preserving them
|
||||
let processedContent = content;
|
||||
let codeBlocks = new Map<string, string>();
|
||||
|
||||
if (opts.preserveCodeBlocks) {
|
||||
const extracted = extractCodeBlocks(content);
|
||||
processedContent = extracted.text;
|
||||
codeBlocks = extracted.codeBlocks;
|
||||
}
|
||||
|
||||
let position = 0;
|
||||
let chunkIndex = 0;
|
||||
|
||||
while (position < processedContent.length) {
|
||||
const remainingLength = processedContent.length - position;
|
||||
|
||||
// If remaining content fits in one chunk, take it all
|
||||
if (remainingLength <= opts.maxChunkSize) {
|
||||
const chunkContent = processedContent.substring(position);
|
||||
const finalContent = opts.preserveCodeBlocks
|
||||
? restoreCodeBlocks(chunkContent, codeBlocks)
|
||||
: chunkContent;
|
||||
|
||||
chunks.push({
|
||||
content: finalContent.trim(),
|
||||
metadata: {
|
||||
chunkIndex,
|
||||
totalChunks: 0, // Will be updated after loop
|
||||
startChar: position,
|
||||
endChar: processedContent.length,
|
||||
tokenCount: estimateTokens(finalContent),
|
||||
},
|
||||
});
|
||||
break;
|
||||
}
|
||||
|
||||
// Find a good split point
|
||||
const idealEnd = position + opts.maxChunkSize;
|
||||
const actualEnd = findSplitPoint(processedContent, idealEnd);
|
||||
|
||||
const chunkContent = processedContent.substring(position, actualEnd);
|
||||
const finalContent = opts.preserveCodeBlocks
|
||||
? restoreCodeBlocks(chunkContent, codeBlocks)
|
||||
: chunkContent;
|
||||
|
||||
chunks.push({
|
||||
content: finalContent.trim(),
|
||||
metadata: {
|
||||
chunkIndex,
|
||||
totalChunks: 0, // Will be updated after loop
|
||||
startChar: position,
|
||||
endChar: actualEnd,
|
||||
tokenCount: estimateTokens(finalContent),
|
||||
},
|
||||
});
|
||||
|
||||
// Move position forward with overlap
|
||||
position = actualEnd - opts.chunkOverlap;
|
||||
chunkIndex++;
|
||||
}
|
||||
|
||||
// Update totalChunks in all metadata
|
||||
const totalChunks = chunks.length;
|
||||
chunks.forEach((chunk) => {
|
||||
chunk.metadata.totalChunks = totalChunks;
|
||||
});
|
||||
|
||||
return chunks;
|
||||
}
|
||||
|
||||
/**
|
||||
* Chunk multiple documents and return with source tracking
|
||||
*/
|
||||
export interface SourcedChunk extends DocumentChunk {
|
||||
sourceFilename: string;
|
||||
sourceMimeType?: string;
|
||||
}
|
||||
|
||||
export function chunkDocuments(
|
||||
documents: Array<{ filename: string; content: string; mimeType?: string }>,
|
||||
options: ChunkingOptions = {}
|
||||
): SourcedChunk[] {
|
||||
const allChunks: SourcedChunk[] = [];
|
||||
|
||||
documents.forEach((doc) => {
|
||||
const chunks = chunkDocument(doc.content, options);
|
||||
chunks.forEach((chunk) => {
|
||||
allChunks.push({
|
||||
...chunk,
|
||||
sourceFilename: doc.filename,
|
||||
sourceMimeType: doc.mimeType,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
return allChunks;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user