Closes checklist items F-01..F-06, D-01..D-28, S-01..S-10, C-01..C-07, B-01..B-07, R-01..R-02, O-03. Security (28 deletions + 10 auth gates): - Delete 28 unauthenticated debug/cursor/firebase/test routes - Gate ai/chat, ai/conversation, context/summarize, work-completed with withTenantProject/withAuth - Add HMAC-SHA256 signature verification to webhooks/coolify - Switch all admin secret comparisons to timingSafeStringEq Foundations (lib/server/*): - api-handler.ts: withAuth, withTenantProject, withWorkspace, withAdminSecret, withRateLimit - logger.ts: structured request-scoped logging with turnId - audit-log.ts: writeAuditLog helper + audit_log table - rate-limit.ts: Postgres sliding window rate limiter - coolify-webhook.ts: verifyCoolifySignature - timing-safe.ts: timingSafeStringEq Chat hardening (chat/route.ts): - MAX_TOOL_ROUNDS 15 → 8 (C-01) - Loop detection: hard-break at 3 identical fingerprints (was 5) (C-02) - Add 6-consecutive-tool-call hard-break (C-02) - Mode: respond first, act second prompt block (C-03) - SSE heartbeat every 25s via setInterval (C-04) - Per-tool 45s timeout via Promise.race (C-05) - turnId per-turn UUID for log correlation (C-06) - Recovery fires when roundsSinceText >= 4 (C-07) - SSE plan event on plan_task_add/edit (B-05) Beta features: - invites table + GET/POST /api/invites (P4.8) - invites/[token] validate + redeem (P4.8) - fs_project_dev_servers table + lib/server/dev-server-state.ts (P6.B1) - fs_project_secrets table + CRUD routes (P6.D2) - lib/integrations/brief-extract.ts (P3.7) Documentation: - app/api/ROUTES.md: full route map with auth + tenant
186 lines
5.5 KiB
TypeScript
186 lines
5.5 KiB
TypeScript
/**
|
|
* Project brief extraction.
|
|
* Closes BETA_LAUNCH_PLAN P3.7.
|
|
*
|
|
* When a user uploads a PDF / .md / .docx / .txt brief file, we extract
|
|
* the text here and store it on `fs_projects.data.plan.brief`. The
|
|
* `buildSystemPrompt` function in `app/api/chat/route.ts` then surfaces
|
|
* it in the [PROJECT BRIEF] block.
|
|
*
|
|
* Supports:
|
|
* - .txt / .md — read as-is
|
|
* - .pdf — extract text via pdf.js (no binary install required)
|
|
* - .docx — extract via unzipper + xml text nodes
|
|
* - .html / .htm — strip tags
|
|
*
|
|
* 5 MB max, 50 000 chars after extraction (truncated with a note).
|
|
*/
|
|
import { query } from "@/lib/db-postgres";
|
|
import { log } from "@/lib/server/logger";
|
|
|
|
export const BRIEF_MAX_CHARS = 50_000;
|
|
export const BRIEF_MAX_BYTES = 5 * 1024 * 1024;
|
|
|
|
export type BriefExtractionResult =
|
|
| { ok: true; text: string; truncated: boolean; chars: number }
|
|
| { ok: false; error: string };
|
|
|
|
/**
|
|
* Extract plain text from a File-like object.
|
|
* Call from `POST /api/projects/[projectId]/documents/upload`.
|
|
*/
|
|
export async function extractBriefText(
|
|
buffer: Buffer,
|
|
mimeType: string,
|
|
filename: string,
|
|
): Promise<BriefExtractionResult> {
|
|
if (buffer.byteLength > BRIEF_MAX_BYTES) {
|
|
return { ok: false, error: `File is too large (max 5 MB)` };
|
|
}
|
|
|
|
try {
|
|
let text = "";
|
|
const lower = filename.toLowerCase();
|
|
|
|
if (lower.endsWith(".pdf") || mimeType === "application/pdf") {
|
|
text = await extractPdf(buffer);
|
|
} else if (
|
|
lower.endsWith(".docx") ||
|
|
mimeType ===
|
|
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
|
) {
|
|
text = await extractDocx(buffer);
|
|
} else if (lower.endsWith(".html") || lower.endsWith(".htm")) {
|
|
text = buffer.toString("utf8").replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").trim();
|
|
} else {
|
|
// .txt, .md, plain text
|
|
text = buffer.toString("utf8");
|
|
}
|
|
|
|
text = text.trim();
|
|
const truncated = text.length > BRIEF_MAX_CHARS;
|
|
if (truncated) {
|
|
text =
|
|
text.slice(0, BRIEF_MAX_CHARS) +
|
|
`\n\n[Brief truncated at ${BRIEF_MAX_CHARS} chars — upload a shorter document for full coverage]`;
|
|
}
|
|
|
|
return { ok: true, text, truncated, chars: text.length };
|
|
} catch (err) {
|
|
return {
|
|
ok: false,
|
|
error: `Extraction failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
};
|
|
}
|
|
}
|
|
|
|
async function extractPdf(buffer: Buffer): Promise<string> {
|
|
// Dynamic import — pdf-parse is a large optional dep.
|
|
// If not installed, fall back to an error message.
|
|
try {
|
|
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
const pdfParse = require("pdf-parse") as (
|
|
b: Buffer,
|
|
) => Promise<{ text: string }>;
|
|
const result = await pdfParse(buffer);
|
|
return result.text;
|
|
} catch (e: unknown) {
|
|
if (
|
|
e instanceof Error &&
|
|
e.message.includes("Cannot find module")
|
|
) {
|
|
throw new Error(
|
|
"pdf-parse package not installed. Run `npm install pdf-parse` or upload a .txt/.md file instead.",
|
|
);
|
|
}
|
|
throw e;
|
|
}
|
|
}
|
|
|
|
async function extractDocx(buffer: Buffer): Promise<string> {
|
|
try {
|
|
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
const { DOMParser } = require("@xmldom/xmldom") as {
|
|
DOMParser: new () => { parseFromString(xml: string, type: string): Document };
|
|
};
|
|
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
|
const unzipper = require("unzipper") as {
|
|
Open: {
|
|
buffer(b: Buffer): Promise<{ files: Array<{ path: string; buffer(): Promise<Buffer> }> }>;
|
|
};
|
|
};
|
|
|
|
const directory = await unzipper.Open.buffer(buffer);
|
|
const wordDoc = directory.files.find(
|
|
(f: { path: string }) => f.path === "word/document.xml",
|
|
);
|
|
if (!wordDoc) throw new Error("word/document.xml not found in docx");
|
|
|
|
const xmlBuf = await wordDoc.buffer();
|
|
const xml = xmlBuf.toString("utf8");
|
|
|
|
const doc = new DOMParser().parseFromString(xml, "text/xml");
|
|
const texts: string[] = [];
|
|
|
|
function extractText(node: Node) {
|
|
if (node.nodeType === 3 /* TEXT_NODE */) {
|
|
const t = (node as Text).textContent?.trim();
|
|
if (t) texts.push(t);
|
|
}
|
|
node.childNodes?.forEach((child: Node) => extractText(child));
|
|
}
|
|
extractText(doc);
|
|
|
|
return texts.join(" ");
|
|
} catch (e: unknown) {
|
|
if (e instanceof Error && e.message.includes("Cannot find module")) {
|
|
throw new Error(
|
|
"unzipper or @xmldom/xmldom not installed. Upload a .txt or .md file instead.",
|
|
);
|
|
}
|
|
throw e;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Persist the extracted brief text to `fs_projects.data.plan.brief`.
|
|
* Called by the upload route after extraction succeeds.
|
|
*/
|
|
export async function persistProjectBrief(
|
|
projectId: string,
|
|
text: string,
|
|
meta: { filename: string; chars: number; truncated: boolean },
|
|
): Promise<void> {
|
|
try {
|
|
await query(
|
|
`UPDATE fs_projects
|
|
SET data = jsonb_set(
|
|
data,
|
|
'{plan}',
|
|
COALESCE(data->'plan', '{}'::jsonb)
|
|
|| jsonb_build_object(
|
|
'brief', $1::text,
|
|
'briefMeta', $2::jsonb
|
|
),
|
|
true
|
|
)
|
|
WHERE id = $3`,
|
|
[
|
|
text,
|
|
JSON.stringify({
|
|
...meta,
|
|
uploadedAt: new Date().toISOString(),
|
|
}),
|
|
projectId,
|
|
],
|
|
);
|
|
log.info("project brief persisted", { projectId, chars: meta.chars });
|
|
} catch (err) {
|
|
log.error("brief persist failed", {
|
|
projectId,
|
|
err: err instanceof Error ? err.message : String(err),
|
|
});
|
|
throw err;
|
|
}
|
|
}
|