/** * Per-project AI dev container ("vibn-dev"). * * One Coolify Service per Vibn project, running the `vibn-dev` image. * The AI agent drives it via: * - shell.exec → docker exec into the container (via existing SSH path) * - fs.* → file ops (implemented as `cat` / `tee` / `rm` etc. * inside the container, on top of shell.exec) * - dev_server.* → start long-running processes (week 2) * - ship → git push to Gitea + trigger Coolify deploy (week 2) * * Lifecycle states: * - Not provisioned → ensureDevContainer() creates the Coolify service * - Suspended → Coolify-stopped (saves money). resume() starts it. * - Running → docker exec works. * * Tenant safety: every helper takes a workspace and the caller must have * already verified that the projectId belongs to that workspace via * fs_projects. The exec primitive ALSO verifies the resolved container * UUID is in the workspace's owned Coolify-project set, so a hijacked * projectId can't reach unrelated containers. * * See: AI_PATH_B_EXECUTION_PLAN.md §3. */ import { query, queryOne } from "@/lib/db-postgres"; import { createDockerComposeApp, startService, stopService, getService, } from "@/lib/coolify"; import { execInCoolifyApp, type ExecInAppResult } from "@/lib/coolify-exec"; import { isCoolifySshConfigured } from "@/lib/coolify-ssh"; import { ensureProjectCoolifyProject, getProjectCoolifyUuid, linkResourceToProject, } from "@/lib/projects"; import type { VibnWorkspace } from "@/lib/workspaces"; import { assertDevContainerQuota } from "@/lib/quotas"; import { sortDevPreviewsFrontendFirst } from "@/lib/dev-preview-priority"; // ── Configuration ──────────────────────────────────────────────────── /** * Image tag for vibn-dev. Built and pushed from /vibn-dev/Dockerfile. * Override per-environment with VIBN_DEV_IMAGE for staging/canary tags. */ export const VIBN_DEV_IMAGE = process.env.VIBN_DEV_IMAGE ?? "vibn-dev:latest"; /** Resource caps per dev container. Tweak in env per-tier later. */ const DEFAULT_CPU_LIMIT = process.env.VIBN_DEV_CPU_LIMIT ?? "1"; // 1 vCPU const DEFAULT_MEM_LIMIT = process.env.VIBN_DEV_MEM_LIMIT ?? "1g"; // 1 GiB const DEFAULT_DISK_LIMIT = process.env.VIBN_DEV_DISK_LIMIT ?? "10g"; // soft hint, not enforced by compose // ── Schema ─────────────────────────────────────────────────────────── let devContainersTableReady = false; export async function ensureDevContainersTable(): Promise { if (devContainersTableReady) return; await query( `CREATE TABLE IF NOT EXISTS fs_project_dev_containers ( project_id TEXT PRIMARY KEY, workspace TEXT NOT NULL, service_uuid TEXT NOT NULL, image TEXT NOT NULL, state TEXT NOT NULL DEFAULT 'provisioning', last_active_at TIMESTAMPTZ NOT NULL DEFAULT now(), suspended_at TIMESTAMPTZ, created_at TIMESTAMPTZ NOT NULL DEFAULT now() ); CREATE INDEX IF NOT EXISTS fs_project_dev_containers_ws_idx ON fs_project_dev_containers (workspace); CREATE INDEX IF NOT EXISTS fs_project_dev_containers_active_idx ON fs_project_dev_containers (last_active_at);`, [], ); devContainersTableReady = true; } export interface DevContainerRow { project_id: string; workspace: string; service_uuid: string; image: string; state: "provisioning" | "running" | "suspended" | "failed"; last_active_at: Date; suspended_at: Date | null; created_at: Date; } export async function getDevContainerRow( projectId: string, ): Promise { await ensureDevContainersTable(); return queryOne( `SELECT * FROM fs_project_dev_containers WHERE project_id = $1 LIMIT 1`, [projectId], ); } // ── Compose template ───────────────────────────────────────────────── /** * Render the docker-compose.yml that backs a single vibn-dev service. * * Two named volumes are intentional: * - workspace : everything in /workspace (the user's source tree). * Persists across suspends. Backed up to Gitea every * 5 min via the auto-push autosave loop (week 2). * - cache : language-toolchain caches (mise, npm, pip, cargo). * Persists across suspends; per-project (never shared). * * The container has NO Vibn-internal network access. We rely on the * default Coolify-bridge network being isolated from the vibn-postgres * / vibn-frontend bridge. (Network policy hardening lands in week 1 * day 2 alongside the auto-push job.) */ /** * Pre-allocated preview-port slots. We bake Traefik labels for * ports 3000..3000+PREVIEW_PORT_COUNT-1 directly into the compose, * so `dev_server.start` doesn't have to mutate the compose at runtime * (which would require a Coolify redeploy and ~30s of latency). * * The first slot is the project's "primary" preview; additional slots * cover the few-times-a-session case where the AI runs both a Vite * frontend and a separate API. Cap is intentionally low (10) so a * single user can't stand up dozens of public URLs. * * Subdomain shape: preview-{slot}-{projectSlug}-{token}.preview.vibnai.com * - slot is 0..9, used to disambiguate when one project runs >1 server * - token is a per-project random suffix written at compose-render * time so URLs aren't enumerable across projects */ export const PREVIEW_BASE_PORT = 3000; export const PREVIEW_PORT_COUNT = 10; function projectPreviewToken(projectId: string): string { // Stable per-project random — derived once and stored in the // dev-container row so the same subdomains survive container // restarts. We compute on first compose-render and persist below. return Buffer.from(projectId).toString("hex").slice(0, 8); } function renderDevCompose(projectSlug: string, projectId: string): string { // Image distribution: we build vibn-dev on the Coolify host once // (see /vibn-dev/setup-on-coolify.sh) and reference it locally. // pull_policy: never tells Docker not to attempt a registry pull. // // Network isolation: vibn-dev sits on its OWN bridge network // (`vibn-dev-net-${slug}`). On Coolify the Traefik proxy ALSO joins // this network so it can reach the dev container; vibn-postgres / // vibn-frontend do not. // // Traefik labels: pre-allocated routers for ports 3000..3009. Each // router uses a distinct subdomain. Routes only "activate" when a // process is actually listening on the port — Traefik does the // health check. const token = projectPreviewToken(projectId); const traefikLabels: string[] = ['"traefik.enable=true"']; for (let i = 0; i < PREVIEW_PORT_COUNT; i++) { const port = PREVIEW_BASE_PORT + i; const router = `vibn-dev-${projectSlug}-${i}`; const host = `preview-${i}-${projectSlug}-${token}.${PREVIEW_DOMAIN_BASE_RAW}`; traefikLabels.push( `"traefik.http.routers.${router}.rule=Host(\`${host}\`)"`, ); traefikLabels.push(`"traefik.http.routers.${router}.entrypoints=https"`); traefikLabels.push(`"traefik.http.routers.${router}.tls=true"`); traefikLabels.push( `"traefik.http.routers.${router}.tls.certresolver=letsencrypt-dns"`, ); traefikLabels.push( `"traefik.http.services.${router}.loadbalancer.server.port=${port}"`, ); traefikLabels.push(`"traefik.http.routers.${router}.service=${router}"`); } const labelsBlock = traefikLabels.map((l) => ` - ${l}`).join("\n"); return `services: vibn-dev: image: ${VIBN_DEV_IMAGE} pull_policy: never restart: unless-stopped working_dir: /workspace volumes: - workspace:/workspace - cache:/home/vibn/.cache environment: - VIBN_PROJECT_SLUG=${projectSlug} - VIBN_PROJECT_ID=${projectId} - VIBN_PREVIEW_TOKEN=${token} - VIBN_DEV_CONTAINER=1 networks: - vibn-dev-net - coolify labels: ${labelsBlock} deploy: resources: limits: cpus: '${DEFAULT_CPU_LIMIT}' memory: ${DEFAULT_MEM_LIMIT} networks: vibn-dev-net: name: vibn-dev-net-${projectSlug} driver: bridge coolify: external: true volumes: workspace: cache: `; } const PREVIEW_DOMAIN_BASE_RAW = process.env.VIBN_PREVIEW_DOMAIN_BASE ?? "preview.vibnai.com"; // ── Provisioning ───────────────────────────────────────────────────── export interface EnsureDevContainerOpts { projectId: string; projectSlug: string; projectName?: string; workspace: VibnWorkspace; /** Skip the initial start (provision-only). Default: start it. */ noStart?: boolean; } export interface EnsureDevContainerResult { serviceUuid: string; state: DevContainerRow["state"]; created: boolean; } /** * Idempotently ensure a vibn-dev service exists for the given Vibn project. * * - Already provisioned → returns the row, optionally resumes if suspended. * - Not provisioned → ensures the per-project Coolify Project exists, * creates the docker-compose service, links the * resource to the Vibn project, persists the row. * * Safe to call on every chat turn — first call is ~10s, subsequent * calls are a single SELECT. */ export async function ensureDevContainer( opts: EnsureDevContainerOpts, ): Promise { await ensureDevContainersTable(); const existing = await getDevContainerRow(opts.projectId); if (existing) { if (existing.state === "suspended" && !opts.noStart) { // Resume counts as "starting one more" against the quota, since // a suspended container is free but a running one isn't. await assertDevContainerQuota(opts.workspace.slug); await resumeDevContainer(opts.projectId); return { serviceUuid: existing.service_uuid, state: "running", created: false, }; } return { serviceUuid: existing.service_uuid, state: existing.state, created: false, }; } const allowDevContainerWithoutSsh = process.env.VIBN_ALLOW_DEV_CONTAINER_WITHOUT_SSH === "true"; if (!allowDevContainerWithoutSsh && !isCoolifySshConfigured()) { throw new Error( "Dev workspace unavailable: Coolify SSH is not configured on this server. " + "Set COOLIFY_SSH_HOST and COOLIFY_SSH_PRIVATE_KEY_B64 (see lib/coolify-ssh.ts). " + "Verify with GET /api/internal/infra-health using INFRA_HEALTH_SECRET. " + "Local-only: set VIBN_ALLOW_DEV_CONTAINER_WITHOUT_SSH=true to skip this check.", ); } // Net-new container creation hits the quota (skip if noStart=true, // since a never-started container costs nothing). The QuotaExceededError // bubbles up to the MCP route which surfaces it as a 402 to the AI; // the AI's recovery middleware can offer to suspend an idle one. if (!opts.noStart) { await assertDevContainerQuota(opts.workspace.slug); } // Need a Coolify project to land the service in. let coolifyProjectUuid = await getProjectCoolifyUuid( opts.projectId, opts.workspace, ); if (!coolifyProjectUuid) { coolifyProjectUuid = await ensureProjectCoolifyProject( opts.projectId, opts.workspace, { projectSlug: opts.projectSlug, projectName: opts.projectName }, ); } if (!coolifyProjectUuid) { throw new Error( `Could not provision Coolify project for ${opts.projectId}; dev container creation aborted.`, ); } const created = await createDockerComposeApp({ projectUuid: coolifyProjectUuid, name: `vibn-dev-${opts.projectSlug}`, description: `AI dev container for project ${opts.projectName ?? opts.projectSlug}`, composeRaw: renderDevCompose(opts.projectSlug, opts.projectId), instantDeploy: !opts.noStart, }); await query( `INSERT INTO fs_project_dev_containers (project_id, workspace, service_uuid, image, state) VALUES ($1, $2, $3, $4, $5) ON CONFLICT (project_id) DO UPDATE SET service_uuid = EXCLUDED.service_uuid, image = EXCLUDED.image, state = EXCLUDED.state`, [ opts.projectId, opts.workspace.slug, created.uuid, VIBN_DEV_IMAGE, opts.noStart ? "suspended" : "provisioning", ], ); // Bookkeeping link so apps_list / projects_get see the dev container // under the right Vibn project. try { await linkResourceToProject( opts.projectId, opts.workspace.slug, created.uuid, "service", ); } catch { // best-effort } return { serviceUuid: created.uuid, state: "provisioning", created: true }; } // ── Lifecycle ──────────────────────────────────────────────────────── export async function suspendDevContainer(projectId: string): Promise { const row = await getDevContainerRow(projectId); if (!row) return; if (row.state === "suspended") return; await stopService(row.service_uuid); await query( `UPDATE fs_project_dev_containers SET state = 'suspended', suspended_at = now() WHERE project_id = $1`, [projectId], ); } export async function resumeDevContainer(projectId: string): Promise { const row = await getDevContainerRow(projectId); if (!row) throw new Error(`No dev container provisioned for ${projectId}`); if (row.state === "running") return; await startService(row.service_uuid); await query( `UPDATE fs_project_dev_containers SET state = 'running', suspended_at = NULL, last_active_at = now() WHERE project_id = $1`, [projectId], ); } async function touchActivity(projectId: string): Promise { // Also flips state 'provisioning' → 'running' on first successful exec. // We can't rely on Coolify's deploy webhook alone (it fires before the // container's actually accepting docker exec), so the first exec that // returns is our authoritative liveness signal. await query( `UPDATE fs_project_dev_containers SET last_active_at = now(), state = CASE WHEN state IN ('provisioning','suspended') THEN 'running' ELSE state END, suspended_at = NULL WHERE project_id = $1`, [projectId], ); } // ── Exec primitive ─────────────────────────────────────────────────── export interface DevContainerExecOpts { projectId: string; command: string; cwd?: string; // defaults to /workspace timeoutMs?: number; maxBytes?: number; /** Override the user (default: vibn). Use 'root' only when needed. */ user?: string; /** Extra env vars (k=v lines prepended via `env` builtin). */ env?: Record; } /** * Run a command inside the project's vibn-dev service. * Resumes the container if suspended, then docker-exec's via the * existing SSH primitive. Stdout/stderr/exit-code returned synchronously. * * The caller is responsible for verifying the projectId belongs to the * workspace BEFORE calling this. We re-verify the container UUID via * the exec primitive's own resolution (it queries `docker ps --filter * name={uuid}`), so a mismatched projectId can't reach foreign containers. */ export async function execInDevContainer( opts: DevContainerExecOpts, ): Promise { if (!isCoolifySshConfigured()) { throw new Error( "shell.exec requires SSH access to the Coolify host; configure COOLIFY_SSH_* envs.", ); } const row = await getDevContainerRow(opts.projectId); if (!row) { throw new Error( `No dev container for project ${opts.projectId}. Call ensureDevContainer() first.`, ); } if (row.state === "suspended") { await resumeDevContainer(opts.projectId); } const cwd = opts.cwd && opts.cwd.trim() ? opts.cwd.trim() : "/workspace"; const envPrefix = opts.env ? Object.entries(opts.env) .map(([k, v]) => `${shellEscape(k)}=${shellEscape(v)}`) .join(" ") : ""; const wrapped = envPrefix ? `cd ${shellEscape(cwd)} && env ${envPrefix} ${opts.command}` : `cd ${shellEscape(cwd)} && ${opts.command}`; const result = await execInCoolifyApp({ appUuid: row.service_uuid, service: "vibn-dev", command: wrapped, user: opts.user ?? "vibn", timeoutMs: opts.timeoutMs, maxBytes: opts.maxBytes, }); await touchActivity(opts.projectId); return result; } function shellEscape(s: string): string { return `'${s.replace(/'/g, `'\\''`)}'`; } // ── Health ─────────────────────────────────────────────────────────── /** * Quick liveness check used by chat startup to decide whether to show * a "spinning up your environment…" banner. */ export async function getDevContainerStatus(projectId: string): Promise<{ exists: boolean; state: DevContainerRow["state"] | "absent"; serviceUuid: string | null; /** Seconds since the row was created; useful for AI to decide whether to keep polling. */ ageSeconds?: number; /** Set when state was just self-healed by this call. */ selfHealed?: boolean; /** Set when state is stuck in provisioning past the grace window (likely failed). */ likelyFailed?: boolean; /** Immediate blocker — no need to wait for provisioning timeout. */ blockedReason?: "coolify_ssh_not_configured" | "coolify_deploy_failed"; blockedHint?: string; /** Coolify's own view of the service status (only populated when stuck). */ coolifyStatus?: string | null; }> { const row = await getDevContainerRow(projectId); if (!row) return { exists: false, state: "absent", serviceUuid: null }; const ageMs = Date.now() - row.created_at.getTime(); const ageSeconds = Math.floor(ageMs / 1000); // If we already think it's running or suspended, return as-is. The // touchActivity() call inside execInDevContainer keeps the row honest. if (row.state !== "provisioning") { return { exists: true, state: row.state, serviceUuid: row.service_uuid, ageSeconds, }; } // State is 'provisioning'. The naive read-only return here used to // create a deadlock: the AI polls status forever waiting for a flip // that only happens via execInDevContainer. So instead, probe with // a cheap `true` exec. If it succeeds, mark running and return. // Coolify's service status alone isn't enough — Coolify reports // 'running:unknown' for any service without a healthcheck/fqdn, // which is every dev container. The exec is the source of truth. if (!isCoolifySshConfigured()) { return { exists: true, state: row.state, serviceUuid: row.service_uuid, ageSeconds, likelyFailed: true, blockedReason: "coolify_ssh_not_configured", blockedHint: "Server missing COOLIFY_SSH_HOST / COOLIFY_SSH_PRIVATE_KEY_B64 — docker exec cannot run. Configure on vibn-frontend; validate with GET /api/internal/infra-health (INFRA_HEALTH_SECRET).", }; } try { const probe = await execInCoolifyApp({ appUuid: row.service_uuid, service: "vibn-dev", command: "true", user: "vibn", timeoutMs: 5_000, }); if (probe.code === 0) { await touchActivity(projectId); return { exists: true, state: "running", serviceUuid: row.service_uuid, ageSeconds, selfHealed: true, }; } } catch { // Exec failed — container probably not yet up. Fall through // to age-based likelyFailed heuristic. } // If we've been "provisioning" for >120s, the container is almost // certainly stuck (image pull failure, scheduling failure, etc.). // Surface that distinct from "still booting" so the AI can stop // polling and tell the user instead of looping. const likelyFailed = ageSeconds > 120; let coolifyStatus: string | null = null; let blockedReason: "coolify_deploy_failed" | undefined; let blockedHint: string | undefined; if (likelyFailed) { // Pull the actual Coolify service status so the AI can see WHY // the deployment is stuck (image pull error, build failure, etc.) // instead of just knowing "it's been provisioning for a while." try { const svc = await getService(row.service_uuid).catch(() => null); coolifyStatus = svc?.status ?? null; if (coolifyStatus && /fail|error/i.test(coolifyStatus)) { blockedReason = "coolify_deploy_failed"; blockedHint = `Coolify reports service status: "${coolifyStatus}". The dev container image may have failed to build or pull. Check the Coolify dashboard for this service (uuid: ${row.service_uuid}) or regenerate the project. Do NOT keep polling — this will not self-heal.`; } } catch { // best-effort } } return { exists: true, state: row.state, serviceUuid: row.service_uuid, ageSeconds, likelyFailed, blockedReason, blockedHint, coolifyStatus, }; } // Re-export getService so route handlers can pull live Coolify status // without taking a separate dependency on lib/coolify. export { getService }; // ── Dev servers ────────────────────────────────────────────────────── // // Long-running processes (Vite, Next dev, etc.) launched inside the // dev container. We don't have a real supervisor; we shell out to // `nohup`, redirect logs to /var/log/vibn-dev/.log, and remember // the PID + port in fs_dev_servers so subsequent calls can stop or // list them. // // Preview URLs are exposed via Traefik's "host" router using the // internal Coolify network (the dev container's primary bridge IP is // reachable from Traefik). Full Traefik wildcard wiring lands in // /vibn-dev/PREVIEWS.md and a separate Traefik config commit; this // module just records the URL we WILL serve at, so the caller can // hand it back to the chat. let devServersTableReady = false; async function ensureDevServersTable(): Promise { if (devServersTableReady) return; await query( `CREATE TABLE IF NOT EXISTS fs_dev_servers ( id TEXT PRIMARY KEY, project_id TEXT NOT NULL REFERENCES fs_project_dev_containers(project_id) ON DELETE CASCADE, workspace TEXT NOT NULL, name TEXT NOT NULL, command TEXT NOT NULL, port INTEGER NOT NULL, pid INTEGER, preview_url TEXT NOT NULL, state TEXT NOT NULL DEFAULT 'starting', started_at TIMESTAMPTZ NOT NULL DEFAULT now(), stopped_at TIMESTAMPTZ ); CREATE INDEX IF NOT EXISTS fs_dev_servers_project_idx ON fs_dev_servers (project_id, state);`, [], ); devServersTableReady = true; } export interface DevServerRow { id: string; project_id: string; workspace: string; name: string; command: string; port: number; pid: number | null; preview_url: string; state: "starting" | "running" | "stopped" | "failed"; started_at: Date; stopped_at: Date | null; } function randomToken(bytes = 4): string { const buf = Buffer.alloc(bytes); for (let i = 0; i < bytes; i++) buf[i] = Math.floor(Math.random() * 256); return buf.toString("hex"); } /** * Map (projectSlug, port) → preview URL. Must match the Host() rules * baked into the compose labels by renderDevCompose. Slot index is * derived from `port - PREVIEW_BASE_PORT`. */ function buildPreviewUrl( projectId: string, projectSlug: string, port: number, ): string | null { const slot = port - PREVIEW_BASE_PORT; if (slot < 0 || slot >= PREVIEW_PORT_COUNT) return null; const token = projectPreviewToken(projectId); return `https://preview-${slot}-${projectSlug}-${token}.${PREVIEW_DOMAIN_BASE_RAW}`; } export interface StartDevServerOpts { projectId: string; projectSlug: string; command: string; port: number; name?: string; workspace: VibnWorkspace; } export class PortBusyError extends Error { constructor( public readonly port: number, public readonly listenerPid: number | null, public readonly listenerCmd: string, ) { super( `Port ${port} is already in use by pid ${listenerPid ?? "?"} (${listenerCmd}). ` + `Stop it first, or pick another port from ${PREVIEW_BASE_PORT}-${PREVIEW_BASE_PORT + PREVIEW_PORT_COUNT - 1}.`, ); this.name = "PortBusyError"; } } export class PortOutOfRangeError extends Error { constructor(public readonly port: number) { super( `Port ${port} is outside the preview slot range ${PREVIEW_BASE_PORT}-${PREVIEW_BASE_PORT + PREVIEW_PORT_COUNT - 1}. ` + `Pick a port in that range so the preview URL is reachable through Traefik.`, ); this.name = "PortOutOfRangeError"; } } /** * Traefik reaches the dev container on the Docker `coolify` network. If the * dev server binds loopback only, the proxy returns 502. Many CLIs ignore * HOST= — Next.js needs `-H 0.0.0.0`; Vite honours VITE_DEV_SERVER_HOST. */ export function ensurePreviewListenAllInterfaces(command: string): string { let cmd = command.trim(); if (!cmd) return cmd; const universalEnv = "export HOST=0.0.0.0 HOSTNAME=0.0.0.0 VITE_DEV_SERVER_HOST=0.0.0.0 WEBPACK_DEV_SERVER_HOST=0.0.0.0; "; if (/\bnext\s+dev\b/.test(cmd) && !/\b(?:-H|--hostname)\b/.test(cmd)) { cmd = cmd.replace(/\bnext\s+dev\b/, "next dev -H 0.0.0.0"); } return universalEnv + cmd; } /** * Poll localhost inside the container until the dev server answers or time out. * Promotes `starting` → `running` / `failed` in fs_dev_servers. Intended to be * fired asynchronously after dev_server.start returns so MCP latency stays low. */ export async function probeDevServerReadiness( projectId: string, serverId: string, port: number, ): Promise { await ensureDevServersTable(); // Up to ~300s: Next/Vite cold compile + potential npm installs can take a while. // We accept any HTTP response (including 404/500) as "listening" — only // connection failures stay 000 — because `curl -sf` falsely failed when `/` // returned a dev error page before the app was fully ready. const probeCmd = `last_code=000; ` + `for i in $(seq 1 300); do ` + `for path in / ''; do ` + `code=$(curl -sS -o /dev/null -w '%{http_code}' --max-time 2 --connect-timeout 2 ` + `"http://127.0.0.1:${port}$path" 2>/dev/null || printf '000'); ` + `last_code=$code; ` + `[ "$code" != "000" ] && [ -n "$code" ] && exit 0; ` + `done; ` + `sleep 1; done; ` + `echo "PROBE_FAIL last_code=$last_code port=${port}"; ` + `echo "PROBE_FAIL ps=$(ps aux | grep -E 'node|npm|next|vite' | grep -v grep | head -3 | tr '\\n' '|')"; ` + `echo "PROBE_FAIL log_tail=$(tail -20 /var/log/vibn-dev/${serverId}.log 2>/dev/null | tr '\\n' '|' | head -c 2000)"; ` + `exit 1`; try { const r = await execInDevContainer({ projectId, command: probeCmd, timeoutMs: 310_000, }); if (r.exitCode === 0) { await query( `UPDATE fs_dev_servers SET state = 'running' WHERE id = $1 AND project_id = $2 AND state != 'stopped'`, [serverId, projectId], ); } else { console.error( "[probe] FAILED", JSON.stringify({ projectId, serverId, port, exitCode: r.exitCode, stdout: (r.stdout || "").slice(0, 600), }), ); await query( `UPDATE fs_dev_servers SET state = 'failed' WHERE id = $1 AND project_id = $2 AND state != 'stopped'`, [serverId, projectId], ); } } catch (err) { console.error( "[probe] ERROR", JSON.stringify({ projectId, serverId, port, err: err instanceof Error ? err.message : String(err), }), ); await query( `UPDATE fs_dev_servers SET state = 'failed' WHERE id = $1 AND project_id = $2 AND state != 'stopped'`, [serverId, projectId], ); } } export async function startDevServer( opts: StartDevServerOpts, ): Promise { await ensureDevServersTable(); // 1. Validate slot range — outside this range we couldn't expose // the preview through Traefik anyway (no router pre-allocated). if ( opts.port < PREVIEW_BASE_PORT || opts.port >= PREVIEW_BASE_PORT + PREVIEW_PORT_COUNT ) { throw new PortOutOfRangeError(opts.port); } // 2. Stop ALL tracked rows for this project on the target port. // Previous runs may have crashed or exited without being marked // stopped, causing stale rows to accumulate (15+ rows seen in // prod). We reap them unconditionally before starting anything // new — the AI's intent is "I want THIS command on THIS port", // so most-recent-write-wins. const existingRows = await query<{ id: string; pid: number | null }>( `SELECT id, pid FROM fs_dev_servers WHERE project_id = $1 AND port = $2 AND state IN ('starting','running','failed')`, [opts.projectId, opts.port], ); for (const row of existingRows) { if (row.pid) { await execInDevContainer({ projectId: opts.projectId, command: `kill ${row.pid} 2>/dev/null || true`, timeoutMs: 3_000, }).catch(() => {}); } await query( `UPDATE fs_dev_servers SET state='stopped', stopped_at=now() WHERE id = $1`, [row.id], ); } // 3. Detect ANY listener on the requested port (including untracked // processes from earlier manual runs). We use ss (ships in // iproute2, default in Ubuntu base) because lsof isn't installed. const portCheck = await execInDevContainer({ projectId: opts.projectId, command: `ss -tlnp 2>/dev/null | grep ':${opts.port}\b' | head -1; ` + `lsof -iTCP:${opts.port} -sTCP:LISTEN -n -P 2>/dev/null | tail -n +2 | head -1 || true`, timeoutMs: 5_000, }); const listenerLine = portCheck.stdout.trim(); if (listenerLine) { const pidMatch = listenerLine.match(/pid=(\d+)/) || listenerLine.match(/^\S+\s+(\d+)/); const listenerPid = pidMatch ? parseInt(pidMatch[1], 10) : null; // Force-kill whatever is squatting on the port — we already // reaped our tracked rows above, so this is an orphan. if (listenerPid) { await execInDevContainer({ projectId: opts.projectId, command: `kill ${listenerPid} 2>/dev/null || true; sleep 0.5`, timeoutMs: 5_000, }).catch(() => {}); } // Double-check the port is actually free now const recheck = await execInDevContainer({ projectId: opts.projectId, command: `ss -tlnp 2>/dev/null | grep ':${opts.port}\b' | head -1`, timeoutMs: 3_000, }); if (recheck.stdout.trim()) { throw new PortBusyError( opts.port, listenerPid, listenerLine.slice(0, 200), ); } } // 3. Launch. const id = `ds_${randomToken(6)}`; const name = opts.name ?? `port-${opts.port}`; const previewUrl = buildPreviewUrl(opts.projectId, opts.projectSlug, opts.port) ?? `https://localhost-only:${opts.port}`; const logFile = `/var/log/vibn-dev/${id}.log`; const listenSafeCommand = ensurePreviewListenAllInterfaces(opts.command); const launch = `mkdir -p /var/log/vibn-dev && ` + `cd /workspace && ` + `nohup env PORT=${opts.port} VIBN_DEV_SERVER_ID=${id} ` + `bash -lc ${shellEscape(listenSafeCommand)} > ${logFile} 2>&1 & ` + `echo $!`; const result = await execInDevContainer({ projectId: opts.projectId, command: launch, timeoutMs: 5_000, }); const pid = parseInt(result.stdout.trim(), 10); await query( `INSERT INTO fs_dev_servers (id, project_id, workspace, name, command, port, pid, preview_url, state) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)`, [ id, opts.projectId, opts.workspace.slug, name, opts.command, opts.port, Number.isFinite(pid) ? pid : null, previewUrl, "starting", ], ); return { id, project_id: opts.projectId, workspace: opts.workspace.slug, name, command: opts.command, port: opts.port, pid: Number.isFinite(pid) ? pid : null, preview_url: previewUrl, state: "starting", started_at: new Date(), stopped_at: null, }; } export async function listDevServers( projectId: string, ): Promise { await ensureDevServersTable(); const rows = await query( `SELECT * FROM fs_dev_servers WHERE project_id = $1 AND state != 'stopped'`, [projectId], ); return sortDevPreviewsFrontendFirst(rows); } export async function stopDevServer( projectId: string, id: string, ): Promise { await ensureDevServersTable(); const row = await queryOne( `SELECT * FROM fs_dev_servers WHERE id = $1 AND project_id = $2 LIMIT 1`, [id, projectId], ); if (!row) throw new Error(`Dev server ${id} not found`); if (row.pid) { try { await execInDevContainer({ projectId, command: `kill ${row.pid} 2>/dev/null || true`, timeoutMs: 3_000, }); } catch {} } await query( `UPDATE fs_dev_servers SET state = 'stopped', stopped_at = now() WHERE id = $1`, [id], ); } export async function tailDevServerLog( projectId: string, id: string, lines = 200, ): Promise { const r = await execInDevContainer({ projectId, command: `tail -n ${Math.max(1, Math.min(2000, lines))} /var/log/vibn-dev/${id}.log 2>/dev/null || echo '(no log yet)'`, timeoutMs: 5_000, }); return r.stdout; } // ── Auto-push autosave ─────────────────────────────────────────────── // // Treats Gitea as the canonical store; the container disk is ephemeral. // On every chat turn (or every 5 min, whichever comes first) we push // /workspace to a `vibn-autosave/main` branch in the project's repo. // // We don't try to be clever about what changed — just `git add -A && // git commit --allow-empty -m "autosave $(date)" && git push`. If the // repo doesn't exist yet (fresh project, no `git init` done), we skip // silently — the AI is responsible for `git init`+ first push when it // scaffolds. export interface AutosaveOpts { projectId: string; projectSlug: string; workspace: VibnWorkspace; /** Repo name in the workspace's Gitea org. Defaults to projectSlug. */ repo?: string; /** Min interval between autosaves (default 5 min). */ minIntervalMs?: number; } export async function autosaveWorkspace(opts: AutosaveOpts): Promise<{ ran: boolean; reason: string; pushedAt?: Date; }> { const row = await getDevContainerRow(opts.projectId); if (!row) return { ran: false, reason: "no dev container" }; if (row.state !== "running") return { ran: false, reason: `state=${row.state}` }; // Throttle: don't autosave more than once per minIntervalMs. const minInterval = opts.minIntervalMs ?? 5 * 60_000; const last = await queryOne<{ pushed_at: Date }>( `SELECT pushed_at FROM fs_dev_autosaves WHERE project_id = $1 ORDER BY pushed_at DESC LIMIT 1`, [opts.projectId], ).catch(() => null); if (last && Date.now() - new Date(last.pushed_at).getTime() < minInterval) { return { ran: false, reason: "throttled" }; } await ensureAutosavesTable(); // The git config + remote set-url is idempotent; PAT lives in the // container's .netrc. Initial scaffold (init+add+commit+remote add) // runs only when the repo doesn't have git yet. const repo = opts.repo ?? opts.projectSlug; const cmd = `set -e cd /workspace/${opts.projectSlug} if [ ! -d .git ]; then echo '(no .git, skipping autosave)' exit 0 fi git config user.email vibn-bot@vibnai.com git config user.name 'Vibn Autosave' # Force push to the autosave branch — never collides with main. git checkout -B vibn-autosave/main 2>&1 | tail -1 git add -A if git diff --cached --quiet; then echo '(no changes)' else git commit -m "autosave $(date -Is)" --quiet fi git push -f origin vibn-autosave/main 2>&1 | tail -3`; try { const r = await execInDevContainer({ projectId: opts.projectId, command: cmd, timeoutMs: 30_000, }); await query( `INSERT INTO fs_dev_autosaves (project_id, workspace, repo, output, code) VALUES ($1, $2, $3, $4, $5)`, [ opts.projectId, opts.workspace.slug, repo, (r.stdout + r.stderr).slice(0, 4000), r.code, ], ); return { ran: true, reason: "pushed", pushedAt: new Date() }; } catch (err) { return { ran: false, reason: err instanceof Error ? err.message : String(err), }; } } let autosavesTableReady = false; async function ensureAutosavesTable(): Promise { if (autosavesTableReady) return; await query( `CREATE TABLE IF NOT EXISTS fs_dev_autosaves ( id BIGSERIAL PRIMARY KEY, project_id TEXT NOT NULL, workspace TEXT NOT NULL, repo TEXT NOT NULL, output TEXT, code INTEGER, pushed_at TIMESTAMPTZ NOT NULL DEFAULT now() ); CREATE INDEX IF NOT EXISTS fs_dev_autosaves_project_idx ON fs_dev_autosaves (project_id, pushed_at DESC);`, [], ); autosavesTableReady = true; } // ── Idle suspend ───────────────────────────────────────────────────── export interface IdleSweepResult { scanned: number; suspended: Array<{ projectId: string; idleMin: number }>; errors: Array<{ projectId: string; error: string }>; } /** * Suspend any running dev containers that haven't been touched in * `idleMinutes` minutes. Intended for a once-per-5-min cron. Idempotent: * re-running is a no-op for already-suspended containers. */ export async function suspendIdleContainers( idleMinutes = 30, ): Promise { await ensureDevContainersTable(); const cutoff = new Date(Date.now() - idleMinutes * 60_000); const rows = await query( `SELECT * FROM fs_project_dev_containers WHERE state = 'running' AND last_active_at < $1`, [cutoff], ); const result: IdleSweepResult = { scanned: rows.length, suspended: [], errors: [], }; for (const r of rows) { try { await suspendDevContainer(r.project_id); const idleMin = Math.floor( (Date.now() - new Date(r.last_active_at).getTime()) / 60_000, ); result.suspended.push({ projectId: r.project_id, idleMin }); } catch (err) { result.errors.push({ projectId: r.project_id, error: err instanceof Error ? err.message : String(err), }); } } return result; }