const MAX_TEXT_STORE = 120_000; const MAX_PDF_BYTES = 5 * 1024 * 1024; /** Payload sent from Build wizard client (`SeedDocumentUpload`). */ export type SeedDocumentRequestBody = { fileName: string; kind: "markdown" | "pdf"; text?: string; base64?: string; }; /** Persisted on project `kickoff.sourceData.seedDocument` (extracted text only). */ export type SeedDocumentPersisted = { fileName: string; kind: "markdown" | "pdf"; textExtract: string; }; export async function normalizeSeedDocument( raw: unknown, ): Promise { if (!raw || typeof raw !== "object" || Array.isArray(raw)) return null; const o = raw as Record; const fileNameRaw = typeof o.fileName === "string" ? o.fileName.trim() : ""; const fileName = fileNameRaw.length > 0 ? fileNameRaw.slice(0, 240) : "document"; const kind = o.kind === "pdf" ? "pdf" : o.kind === "markdown" ? "markdown" : null; if (!kind) return null; if (kind === "markdown") { const text = typeof o.text === "string" ? o.text : ""; const trimmed = text.trim(); if (!trimmed) return null; return { fileName, kind: "markdown", textExtract: trimmed.slice(0, MAX_TEXT_STORE), }; } const b64 = typeof o.base64 === "string" ? o.base64.replace(/\s/g, "") : ""; if (!b64) return null; let buf: Buffer; try { buf = Buffer.from(b64, "base64"); } catch { return null; } if (buf.length > MAX_PDF_BYTES) { throw new Error("PDF exceeds 5MB limit"); } const pdfParseMod = await import("pdf-parse"); const pdfParse = pdfParseMod.default ?? pdfParseMod; const parsed = await pdfParse(buf); const extracted = typeof parsed?.text === "string" ? parsed.text.trim() : ""; return { fileName, kind: "pdf", textExtract: extracted.length > 0 ? extracted.slice(0, MAX_TEXT_STORE) : "[No extractable text in PDF]", }; }