Files
vibn-agent-runner/vibn-frontend/lib/server/parse-seed-document.ts

70 lines
1.9 KiB
TypeScript

const MAX_TEXT_STORE = 120_000;
const MAX_PDF_BYTES = 5 * 1024 * 1024;
/** Payload sent from Build wizard client (`SeedDocumentUpload`). */
export type SeedDocumentRequestBody = {
fileName: string;
kind: "markdown" | "pdf";
text?: string;
base64?: string;
};
/** Persisted on project `kickoff.sourceData.seedDocument` (extracted text only). */
export type SeedDocumentPersisted = {
fileName: string;
kind: "markdown" | "pdf";
textExtract: string;
};
export async function normalizeSeedDocument(
raw: unknown,
): Promise<SeedDocumentPersisted | null> {
if (!raw || typeof raw !== "object" || Array.isArray(raw)) return null;
const o = raw as Record<string, unknown>;
const fileNameRaw = typeof o.fileName === "string" ? o.fileName.trim() : "";
const fileName = fileNameRaw.length > 0 ? fileNameRaw.slice(0, 240) : "document";
const kind =
o.kind === "pdf" ? "pdf" : o.kind === "markdown" ? "markdown" : null;
if (!kind) return null;
if (kind === "markdown") {
const text = typeof o.text === "string" ? o.text : "";
const trimmed = text.trim();
if (!trimmed) return null;
return {
fileName,
kind: "markdown",
textExtract: trimmed.slice(0, MAX_TEXT_STORE),
};
}
const b64 = typeof o.base64 === "string" ? o.base64.replace(/\s/g, "") : "";
if (!b64) return null;
let buf: Buffer;
try {
buf = Buffer.from(b64, "base64");
} catch {
return null;
}
if (buf.length > MAX_PDF_BYTES) {
throw new Error("PDF exceeds 5MB limit");
}
const pdfParseMod = await import("pdf-parse");
const pdfParse = pdfParseMod.default ?? pdfParseMod;
const parsed = await pdfParse(buf);
const extracted = typeof parsed?.text === "string" ? parsed.text.trim() : "";
return {
fileName,
kind: "pdf",
textExtract:
extracted.length > 0
? extracted.slice(0, MAX_TEXT_STORE)
: "[No extractable text in PDF]",
};
}