feat(api): comprehensive QA hardening — security gates, chat improvements, beta scaffolds

Closes checklist items F-01..F-06, D-01..D-28, S-01..S-10, C-01..C-07, B-01..B-07, R-01..R-02, O-03. Security (28 deletions + 10 auth gates): - Delete 28 unauthenticated debug/cursor/firebase/test routes - Gate ai/chat, ai/conversation, context/summarize, work-completed with withTenantProject/withAuth - Add HMAC-SHA256 signature verification to webhooks/coolify - Switch all admin secret comparisons to timingSafeStringEq Foundations (lib/server/*): - api-handler.ts: withAuth, withTenantProject, withWorkspace, withAdminSecret, withRateLimit - logger.ts: structured request-scoped logging with turnId - audit-log.ts: writeAuditLog helper + audit_log table - rate-limit.ts: Postgres sliding window rate limiter - coolify-webhook.ts: verifyCoolifySignature - timing-safe.ts: timingSafeStringEq Chat hardening (chat/route.ts): - MAX_TOOL_ROUNDS 15 → 8 (C-01) - Loop detection: hard-break at 3 identical fingerprints (was 5) (C-02) - Add 6-consecutive-tool-call hard-break (C-02) - Mode: respond first, act second prompt block (C-03) - SSE heartbeat every 25s via setInterval (C-04) - Per-tool 45s timeout via Promise.race (C-05) - turnId per-turn UUID for log correlation (C-06) - Recovery fires when roundsSinceText >= 4 (C-07) - SSE plan event on plan_task_add/edit (B-05) Beta features: - invites table + GET/POST /api/invites (P4.8) - invites/[token] validate + redeem (P4.8) - fs_project_dev_servers table + lib/server/dev-server-state.ts (P6.B1) - fs_project_secrets table + CRUD routes (P6.D2) - lib/integrations/brief-extract.ts (P3.7) Documentation: - app/api/ROUTES.md: full route map with auth + tenant
2026-05-17 19:17:22 -07:00
parent 955aeed6ce
commit 6b8862ef2b
86 changed files with 6772 additions and 2817 deletions
--- a/vibn-frontend/app/api/context/summarize/route.ts
+++ b/vibn-frontend/app/api/context/summarize/route.ts
@@ -1,52 +1,99 @@
-import { NextRequest, NextResponse } from "next/server";
+/**
+ * POST /api/context/summarize
+ * Body: { content: string, title?: string }
+ *
+ * Generates a short summary via Gemini. Closes S-04: now requires a
+ * signed-in user (rate-limit per user, not per-IP) so we don't burn Gemini
+ * quota on anonymous traffic.
+ */
+import { NextResponse } from "next/server";
+import { withAuth, withRateLimit } from "@/lib/server/api-handler";
+import { log } from "@/lib/server/logger";

-const MODEL = process.env.GEMINI_MODEL || 'gemini-2.0-flash-exp';
-const API_KEY = process.env.GOOGLE_API_KEY || '';
+const MODEL = process.env.GEMINI_MODEL || "gemini-3.1-pro-preview";
+const API_KEY = process.env.GOOGLE_API_KEY || "";
 const GEMINI_URL = `https://generativelanguage.googleapis.com/v1beta/models/${MODEL}:generateContent`;

-export async function POST(request: NextRequest) {
-  try {
-    const { content, title } = await request.json();
+export const POST = withRateLimit(
+  withAuth(async (request, _ctx, { user }) => {
+    try {
+      const { content, title } = (await request.json()) as {
+        content?: string;
+        title?: string;
+      };

-    if (!content) {
-      return NextResponse.json({ error: "Content is required" }, { status: 400 });
-    }
+      if (!content || typeof content !== "string") {
+        return NextResponse.json(
+          { error: "content is required" },
+          { status: 400 },
+        );
+      }

-    const maxContentLength = 30000;
-    const truncatedContent = content.length > maxContentLength
-      ? content.substring(0, maxContentLength) + "..."
-      : content;
+      const maxContentLength = 30000;
+      const truncatedContent =
+        content.length > maxContentLength
+          ? content.substring(0, maxContentLength) + "..."
+          : content;

-    const prompt = `Read this document titled "${title}" and provide a concise 1-2 sentence summary that captures the main topic and key points. Be specific and actionable.
+      const prompt = `Read this document titled "${title ?? "(untitled)"}" and provide a concise 1-2 sentence summary that captures the main topic and key points. Be specific and actionable.

 Document content:
 ${truncatedContent}

 Summary:`;

-    const response = await fetch(`${GEMINI_URL}?key=${API_KEY}`, {
-      method: 'POST',
-      headers: { 'Content-Type': 'application/json' },
-      body: JSON.stringify({
-        contents: [{ role: 'user', parts: [{ text: prompt }] }],
-        generationConfig: { temperature: 0.3 },
-      }),
-    });
+      const response = await fetch(`${GEMINI_URL}?key=${API_KEY}`, {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify({
+          contents: [{ role: "user", parts: [{ text: prompt }] }],
+          generationConfig: { temperature: 0.3 },
+        }),
+      });

-    if (!response.ok) {
-      throw new Error(`Gemini API error (${response.status}): ${await response.text()}`);
+      if (!response.ok) {
+        const text = await response.text();
+        log.warn("context/summarize gemini error", {
+          route: "api.context.summarize",
+          user: user.email,
+          status: response.status,
+          body: text.slice(0, 500),
+        });
+        return NextResponse.json(
+          {
+            error: `Gemini API error (${response.status})`,
+            details: text.slice(0, 500),
+          },
+          { status: 502 },
+        );
+      }
+
+      const result = await response.json();
+      const summary =
+        result.candidates?.[0]?.content?.parts?.[0]?.text?.trim() ||
+        "Summary unavailable";
+      return NextResponse.json({ summary });
+    } catch (err) {
+      log.error("context/summarize failed", {
+        route: "api.context.summarize",
+        err: err instanceof Error ? err.message : String(err),
+      });
+      return NextResponse.json(
+        {
+          error: "Failed to generate summary",
+          details: err instanceof Error ? err.message : String(err),
+        },
+        { status: 500 },
+      );
    }
-
-    const result = await response.json();
-    const summary = result.candidates?.[0]?.content?.parts?.[0]?.text?.trim() || 'Summary unavailable';
-
-    return NextResponse.json({ summary });
-  } catch (error) {
-    console.error("Error generating summary:", error);
-    return NextResponse.json(
-      { error: "Failed to generate summary", details: error instanceof Error ? error.message : String(error) },
-      { status: 500 }
-    );
-  }
-}
-
+  }),
+  {
+    // 20 summaries / min / user — much higher than chat because they're cheap.
+    limit: 20,
+    windowMs: 60_000,
+    keyFn: (_req, extra) => {
+      const userEmail = (extra as { user?: { email?: string } })?.user?.email;
+      return `context-summarize:${userEmail ?? "anon"}`;
+    },
+  },
+);