Files
vibn-frontend/lib/coolify-exec.ts
mawkone 5364bd8497 feat(api): comprehensive QA hardening — security gates, chat improvements, beta scaffolds
Closes checklist items F-01..F-06, D-01..D-28, S-01..S-10, C-01..C-07,
B-01..B-07, R-01..R-02, O-03.

Security (28 deletions + 10 auth gates):
- Delete 28 unauthenticated debug/cursor/firebase/test routes
- Gate ai/chat, ai/conversation, context/summarize, work-completed with withTenantProject/withAuth
- Add HMAC-SHA256 signature verification to webhooks/coolify
- Switch all admin secret comparisons to timingSafeStringEq

Foundations (lib/server/*):
- api-handler.ts: withAuth, withTenantProject, withWorkspace, withAdminSecret, withRateLimit
- logger.ts: structured request-scoped logging with turnId
- audit-log.ts: writeAuditLog helper + audit_log table
- rate-limit.ts: Postgres sliding window rate limiter
- coolify-webhook.ts: verifyCoolifySignature
- timing-safe.ts: timingSafeStringEq

Chat hardening (chat/route.ts):
- MAX_TOOL_ROUNDS 15 → 8 (C-01)
- Loop detection: hard-break at 3 identical fingerprints (was 5) (C-02)
- Add 6-consecutive-tool-call hard-break (C-02)
- Mode: respond first, act second prompt block (C-03)
- SSE heartbeat every 25s via setInterval (C-04)
- Per-tool 45s timeout via Promise.race (C-05)
- turnId per-turn UUID for log correlation (C-06)
- Recovery fires when roundsSinceText >= 4 (C-07)
- SSE plan event on plan_task_add/edit (B-05)

Beta features:
- invites table + GET/POST /api/invites (P4.8)
- invites/[token] validate + redeem (P4.8)
- fs_project_dev_servers table + lib/server/dev-server-state.ts (P6.B1)
- fs_project_secrets table + CRUD routes (P6.D2)
- lib/integrations/brief-extract.ts (P3.7)

Documentation:
- app/api/ROUTES.md: full route map with auth + tenant
2026-05-17 19:17:22 -07:00

137 lines
4.9 KiB
TypeScript

/**
* Run a one-shot command inside a Coolify-managed app container.
*
* Same SSH backbone as lib/coolify-logs.ts: we connect to the Coolify
* host as the locked-down `vibn-logs` user, resolve the target
* container via listContainersForApp, and execute `docker exec` in
* non-interactive mode. No TTY, no stdin — this is purpose-built for
* "run a command, return the output" operator actions (running
* migrations, sanity checks, one-off CLI invocations).
*
* Tenant safety: every caller must verify the app uuid belongs to the
* calling workspace BEFORE invoking this helper (via
* getApplicationInProject). This file trusts that check has already
* happened.
*/
import { runOnCoolifyHost } from "@/lib/coolify-ssh";
import {
resolveAppTargetContainer,
type ComposeContainer,
} from "@/lib/coolify-containers";
const DEFAULT_TIMEOUT_MS = 60_000;
const MAX_TIMEOUT_MS = 600_000; // 10 min — enough for migrations / seeds
const DEFAULT_MAX_BYTES = 1_000_000; // 1 MB combined stdout+stderr
const MAX_BYTES_CAP = 5_000_000; // 5 MB hard ceiling
export interface ExecInAppOptions {
appUuid: string;
/** Compose service name (`server`, `db`, `worker`, …). Required when the app has >1 container. */
service?: string;
/** The command to run. Passed through `sh -lc`, so shell syntax (pipes, redirects, `&&`) works. */
command: string;
/** Kill the channel after this many ms. Defaults to 60s, max 10 min. */
timeoutMs?: number;
/** Cap combined stdout+stderr. Defaults to 1 MB, max 5 MB. */
maxBytes?: number;
/** Optional `--user` passed to `docker exec`, e.g. `root` or `1000:1000`. */
user?: string;
/** Optional `--workdir` passed to `docker exec`, e.g. `/app`. */
workdir?: string;
}
export interface ExecInAppResult {
container: string;
service: string;
code: number | null;
stdout: string;
stderr: string;
truncated: boolean;
durationMs: number;
/** Container health at time of exec (parsed from `docker ps`). */
containerHealth: ComposeContainer["health"];
/** The command as it was actually executed (post-escape, for logs). */
executedCommand: string;
}
/** Shell-escape a single token for bash. */
function sq(s: string): string {
return `'${s.replace(/'/g, `'\\''`)}'`;
}
export async function execInCoolifyApp(
opts: ExecInAppOptions,
): Promise<ExecInAppResult> {
if (!opts.command || typeof opts.command !== "string") {
throw new Error("command is required");
}
const timeoutMs = Math.min(
Math.max(opts.timeoutMs ?? DEFAULT_TIMEOUT_MS, 1_000),
MAX_TIMEOUT_MS,
);
const maxBytes = Math.min(
Math.max(opts.maxBytes ?? DEFAULT_MAX_BYTES, 1_024),
MAX_BYTES_CAP,
);
const container = await resolveAppTargetContainer(opts.appUuid, opts.service);
// Build the `docker exec` invocation. We run the payload through
// `sh -lc` so callers can use shell syntax naturally. The payload
// itself is passed as a single-quoted argv token — no interpolation
// happens outside of what the child shell does.
const flags: string[] = [];
if (opts.user) flags.push(`--user ${sq(opts.user)}`);
if (opts.workdir) flags.push(`--workdir ${sq(opts.workdir)}`);
// NOTE: do NOT collapse whitespace on the outer docker-exec invocation.
// The command payload is already single-quoted via sq(), so only the
// docker/flag portion needs normalising — and newlines in the payload
// itself must be preserved so multi-line bash scripts work correctly.
// (Collapsing \n → ' ' turns `if ... ; then\n cmd\nfi` into invalid sh.)
// Keep the trailing space before the quoted command — dash (Ubuntu's sh)
// requires a space between -lc and its argument. Trimming it causes
// sh to mis-parse the quoted string as flags ("Illegal option -d").
const dockerPrefix =
`docker exec ${flags.join(" ")} ${sq(container.name)} sh -lc `.replace(
/ +/g,
" ",
);
const executedCommand = `${dockerPrefix}${sq(opts.command)}`;
const startedAt = Date.now();
// Audit log: record the command + target, NOT the output (output
// may contain secrets). Structured so downstream log shipping can
// parse it.
console.log(
"[apps.exec]",
JSON.stringify({
app_uuid: opts.appUuid,
container: container.name,
service: container.service,
timeout_ms: timeoutMs,
command: opts.command,
}),
);
const res = await runOnCoolifyHost(executedCommand, { timeoutMs, maxBytes });
const durationMs = Date.now() - startedAt;
// Our ssh helper merges stream metadata; we didn't redirect stderr
// to stdout in the command, so the ssh-layer separation is already
// correct. But docker-exec uses exit code 126/127 for "cannot exec"
// and >0 for the user's command failing; surface all of them.
return {
container: container.name,
service: container.service,
code: res.code,
stdout: res.stdout,
stderr: res.stderr,
truncated: res.truncated,
durationMs,
containerHealth: container.health,
executedCommand,
};
}