vibn-agent-runner/vibn-frontend/lib/server/rate-limit.ts

/**
 * Postgres-backed sliding-window rate limiter.
 *
 * Designed for "small N, simple shape": a few thousand keys/min across the
 * platform, single primary, no Redis dependency to keep beta infra tight.
 * If we outgrow this, swap the storage backend without changing call sites.
 *
 * Schema (auto-created):
 *   rate_limit_log (key TEXT, ts TIMESTAMPTZ DEFAULT NOW())
 *   index on (key, ts DESC)
 *
 * Algorithm:
 *   1. Cleanup older rows for this key (best-effort, capped).
 *   2. Count remaining rows in window.
 *   3. If under limit, INSERT a row and return {ok: true, remaining}.
 *   4. Else return {ok: false, retryAfterMs}.
 *
 * NOT race-free across nodes — that's deliberate for cost. If you need
 * hard quotas (e.g. billing-tier caps), use `lib/quotas.ts` instead.
 */

import { query } from "@/lib/db-postgres";
import { log } from "@/lib/server/logger";

let tableReady = false;
async function ensureTable() {
  if (tableReady) return;
  await query(`
    CREATE TABLE IF NOT EXISTS rate_limit_log (
      key TEXT NOT NULL,
      ts  TIMESTAMPTZ NOT NULL DEFAULT NOW()
    )
  `);
  await query(`CREATE INDEX IF NOT EXISTS rate_limit_log_key_ts_idx ON rate_limit_log (key, ts DESC)`);
  tableReady = true;
}

export interface RateLimitOpts {
  /** Identity key — e.g. `chat:user@x.com`, `mcp:ws=mark:tool=apps_create`. Required. */
  key: string;
  /** Max calls inside the window. Default 60. */
  limit?: number;
  /** Window in ms. Default 60_000 (1 min). */
  windowMs?: number;
}

export interface RateLimitResult {
  ok: boolean;
  remaining: number;
  retryAfterMs?: number;
}

export async function rateLimit(opts: RateLimitOpts): Promise<RateLimitResult> {
  const limit = opts.limit ?? 60;
  const windowMs = opts.windowMs ?? 60_000;
  try {
    await ensureTable();
    // 1. Sweep stale rows for this key (cheap; index is `(key, ts DESC)`).
    await query(
      `DELETE FROM rate_limit_log WHERE key = $1 AND ts < NOW() - $2::interval`,
      [opts.key, `${Math.ceil(windowMs / 1000)} seconds`],
    );
    // 2. Count remaining.
    const rows = await query<{ n: string }>(
      `SELECT COUNT(*)::text AS n FROM rate_limit_log WHERE key = $1`,
      [opts.key],
    );
    const used = Number(rows[0]?.n ?? "0");
    if (used >= limit) {
      // Find oldest row in window to compute retry-after.
      const oldest = await query<{ ts: string }>(
        `SELECT ts FROM rate_limit_log WHERE key = $1 ORDER BY ts ASC LIMIT 1`,
        [opts.key],
      );
      const oldestMs = oldest[0]?.ts ? new Date(oldest[0].ts).getTime() : Date.now();
      const retryAfterMs = Math.max(0, oldestMs + windowMs - Date.now());
      return { ok: false, remaining: 0, retryAfterMs };
    }
    await query(`INSERT INTO rate_limit_log (key) VALUES ($1)`, [opts.key]);
    return { ok: true, remaining: Math.max(0, limit - used - 1) };
  } catch (err) {
    // Fail-open on DB problems — better than locking everyone out of chat
    // when Postgres has a hiccup. The downside (unbounded calls during the
    // outage) is acceptable for beta scale.
    log.warn("rate-limit DB unavailable, failing open", {
      key: opts.key,
      err: err instanceof Error ? err.message : String(err),
    });
    return { ok: true, remaining: limit };
  }
}