Files
vibn-agent-runner/vibn-frontend/lib/integrations/brief-extract.ts
mawkone 6b8862ef2b feat(api): comprehensive QA hardening — security gates, chat improvements, beta scaffolds
Closes checklist items F-01..F-06, D-01..D-28, S-01..S-10, C-01..C-07,
B-01..B-07, R-01..R-02, O-03.

Security (28 deletions + 10 auth gates):
- Delete 28 unauthenticated debug/cursor/firebase/test routes
- Gate ai/chat, ai/conversation, context/summarize, work-completed with withTenantProject/withAuth
- Add HMAC-SHA256 signature verification to webhooks/coolify
- Switch all admin secret comparisons to timingSafeStringEq

Foundations (lib/server/*):
- api-handler.ts: withAuth, withTenantProject, withWorkspace, withAdminSecret, withRateLimit
- logger.ts: structured request-scoped logging with turnId
- audit-log.ts: writeAuditLog helper + audit_log table
- rate-limit.ts: Postgres sliding window rate limiter
- coolify-webhook.ts: verifyCoolifySignature
- timing-safe.ts: timingSafeStringEq

Chat hardening (chat/route.ts):
- MAX_TOOL_ROUNDS 15 → 8 (C-01)
- Loop detection: hard-break at 3 identical fingerprints (was 5) (C-02)
- Add 6-consecutive-tool-call hard-break (C-02)
- Mode: respond first, act second prompt block (C-03)
- SSE heartbeat every 25s via setInterval (C-04)
- Per-tool 45s timeout via Promise.race (C-05)
- turnId per-turn UUID for log correlation (C-06)
- Recovery fires when roundsSinceText >= 4 (C-07)
- SSE plan event on plan_task_add/edit (B-05)

Beta features:
- invites table + GET/POST /api/invites (P4.8)
- invites/[token] validate + redeem (P4.8)
- fs_project_dev_servers table + lib/server/dev-server-state.ts (P6.B1)
- fs_project_secrets table + CRUD routes (P6.D2)
- lib/integrations/brief-extract.ts (P3.7)

Documentation:
- app/api/ROUTES.md: full route map with auth + tenant
2026-05-17 19:17:22 -07:00

186 lines
5.5 KiB
TypeScript

/**
* Project brief extraction.
* Closes BETA_LAUNCH_PLAN P3.7.
*
* When a user uploads a PDF / .md / .docx / .txt brief file, we extract
* the text here and store it on `fs_projects.data.plan.brief`. The
* `buildSystemPrompt` function in `app/api/chat/route.ts` then surfaces
* it in the [PROJECT BRIEF] block.
*
* Supports:
* - .txt / .md — read as-is
* - .pdf — extract text via pdf.js (no binary install required)
* - .docx — extract via unzipper + xml text nodes
* - .html / .htm — strip tags
*
* 5 MB max, 50 000 chars after extraction (truncated with a note).
*/
import { query } from "@/lib/db-postgres";
import { log } from "@/lib/server/logger";
export const BRIEF_MAX_CHARS = 50_000;
export const BRIEF_MAX_BYTES = 5 * 1024 * 1024;
export type BriefExtractionResult =
| { ok: true; text: string; truncated: boolean; chars: number }
| { ok: false; error: string };
/**
* Extract plain text from a File-like object.
* Call from `POST /api/projects/[projectId]/documents/upload`.
*/
export async function extractBriefText(
buffer: Buffer,
mimeType: string,
filename: string,
): Promise<BriefExtractionResult> {
if (buffer.byteLength > BRIEF_MAX_BYTES) {
return { ok: false, error: `File is too large (max 5 MB)` };
}
try {
let text = "";
const lower = filename.toLowerCase();
if (lower.endsWith(".pdf") || mimeType === "application/pdf") {
text = await extractPdf(buffer);
} else if (
lower.endsWith(".docx") ||
mimeType ===
"application/vnd.openxmlformats-officedocument.wordprocessingml.document"
) {
text = await extractDocx(buffer);
} else if (lower.endsWith(".html") || lower.endsWith(".htm")) {
text = buffer.toString("utf8").replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").trim();
} else {
// .txt, .md, plain text
text = buffer.toString("utf8");
}
text = text.trim();
const truncated = text.length > BRIEF_MAX_CHARS;
if (truncated) {
text =
text.slice(0, BRIEF_MAX_CHARS) +
`\n\n[Brief truncated at ${BRIEF_MAX_CHARS} chars — upload a shorter document for full coverage]`;
}
return { ok: true, text, truncated, chars: text.length };
} catch (err) {
return {
ok: false,
error: `Extraction failed: ${err instanceof Error ? err.message : String(err)}`,
};
}
}
async function extractPdf(buffer: Buffer): Promise<string> {
// Dynamic import — pdf-parse is a large optional dep.
// If not installed, fall back to an error message.
try {
// eslint-disable-next-line @typescript-eslint/no-require-imports
const pdfParse = require("pdf-parse") as (
b: Buffer,
) => Promise<{ text: string }>;
const result = await pdfParse(buffer);
return result.text;
} catch (e: unknown) {
if (
e instanceof Error &&
e.message.includes("Cannot find module")
) {
throw new Error(
"pdf-parse package not installed. Run `npm install pdf-parse` or upload a .txt/.md file instead.",
);
}
throw e;
}
}
async function extractDocx(buffer: Buffer): Promise<string> {
try {
// eslint-disable-next-line @typescript-eslint/no-require-imports
const { DOMParser } = require("@xmldom/xmldom") as {
DOMParser: new () => { parseFromString(xml: string, type: string): Document };
};
// eslint-disable-next-line @typescript-eslint/no-require-imports
const unzipper = require("unzipper") as {
Open: {
buffer(b: Buffer): Promise<{ files: Array<{ path: string; buffer(): Promise<Buffer> }> }>;
};
};
const directory = await unzipper.Open.buffer(buffer);
const wordDoc = directory.files.find(
(f: { path: string }) => f.path === "word/document.xml",
);
if (!wordDoc) throw new Error("word/document.xml not found in docx");
const xmlBuf = await wordDoc.buffer();
const xml = xmlBuf.toString("utf8");
const doc = new DOMParser().parseFromString(xml, "text/xml");
const texts: string[] = [];
function extractText(node: Node) {
if (node.nodeType === 3 /* TEXT_NODE */) {
const t = (node as Text).textContent?.trim();
if (t) texts.push(t);
}
node.childNodes?.forEach((child: Node) => extractText(child));
}
extractText(doc);
return texts.join(" ");
} catch (e: unknown) {
if (e instanceof Error && e.message.includes("Cannot find module")) {
throw new Error(
"unzipper or @xmldom/xmldom not installed. Upload a .txt or .md file instead.",
);
}
throw e;
}
}
/**
* Persist the extracted brief text to `fs_projects.data.plan.brief`.
* Called by the upload route after extraction succeeds.
*/
export async function persistProjectBrief(
projectId: string,
text: string,
meta: { filename: string; chars: number; truncated: boolean },
): Promise<void> {
try {
await query(
`UPDATE fs_projects
SET data = jsonb_set(
data,
'{plan}',
COALESCE(data->'plan', '{}'::jsonb)
|| jsonb_build_object(
'brief', $1::text,
'briefMeta', $2::jsonb
),
true
)
WHERE id = $3`,
[
text,
JSON.stringify({
...meta,
uploadedAt: new Date().toISOString(),
}),
projectId,
],
);
log.info("project brief persisted", { projectId, chars: meta.chars });
} catch (err) {
log.error("brief persist failed", {
projectId,
err: err instanceof Error ? err.message : String(err),
});
throw err;
}
}