Files
vibn-agent-runner/vibn-frontend/lib/dev-container.ts

1171 lines
40 KiB
TypeScript

/**
* Per-project AI dev container ("vibn-dev").
*
* One Coolify Service per Vibn project, running the `vibn-dev` image.
* The AI agent drives it via:
* - shell.exec → docker exec into the container (via existing SSH path)
* - fs.* → file ops (implemented as `cat` / `tee` / `rm` etc.
* inside the container, on top of shell.exec)
* - dev_server.* → start long-running processes (week 2)
* - ship → git push to Gitea + trigger Coolify deploy (week 2)
*
* Lifecycle states:
* - Not provisioned → ensureDevContainer() creates the Coolify service
* - Suspended → Coolify-stopped (saves money). resume() starts it.
* - Running → docker exec works.
*
* Tenant safety: every helper takes a workspace and the caller must have
* already verified that the projectId belongs to that workspace via
* fs_projects. The exec primitive ALSO verifies the resolved container
* UUID is in the workspace's owned Coolify-project set, so a hijacked
* projectId can't reach unrelated containers.
*
* See: AI_PATH_B_EXECUTION_PLAN.md §3.
*/
import { query, queryOne } from "@/lib/db-postgres";
import {
createDockerComposeApp,
startService,
stopService,
getService,
} from "@/lib/coolify";
import { execInCoolifyApp, type ExecInAppResult } from "@/lib/coolify-exec";
import { isCoolifySshConfigured } from "@/lib/coolify-ssh";
import {
ensureProjectCoolifyProject,
getProjectCoolifyUuid,
linkResourceToProject,
} from "@/lib/projects";
import type { VibnWorkspace } from "@/lib/workspaces";
import { assertDevContainerQuota } from "@/lib/quotas";
import { sortDevPreviewsFrontendFirst } from "@/lib/dev-preview-priority";
// ── Configuration ────────────────────────────────────────────────────
/**
* Image tag for vibn-dev. Built and pushed from /vibn-dev/Dockerfile.
* Override per-environment with VIBN_DEV_IMAGE for staging/canary tags.
*/
export const VIBN_DEV_IMAGE = process.env.VIBN_DEV_IMAGE ?? "vibn-dev:latest";
/** Resource caps per dev container. Tweak in env per-tier later. */
const DEFAULT_CPU_LIMIT = process.env.VIBN_DEV_CPU_LIMIT ?? "1"; // 1 vCPU
const DEFAULT_MEM_LIMIT = process.env.VIBN_DEV_MEM_LIMIT ?? "1g"; // 1 GiB
const DEFAULT_DISK_LIMIT = process.env.VIBN_DEV_DISK_LIMIT ?? "10g"; // soft hint, not enforced by compose
// ── Schema ───────────────────────────────────────────────────────────
let devContainersTableReady = false;
export async function ensureDevContainersTable(): Promise<void> {
if (devContainersTableReady) return;
await query(
`CREATE TABLE IF NOT EXISTS fs_project_dev_containers (
project_id TEXT PRIMARY KEY,
workspace TEXT NOT NULL,
service_uuid TEXT NOT NULL,
image TEXT NOT NULL,
state TEXT NOT NULL DEFAULT 'provisioning',
last_active_at TIMESTAMPTZ NOT NULL DEFAULT now(),
suspended_at TIMESTAMPTZ,
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
CREATE INDEX IF NOT EXISTS fs_project_dev_containers_ws_idx
ON fs_project_dev_containers (workspace);
CREATE INDEX IF NOT EXISTS fs_project_dev_containers_active_idx
ON fs_project_dev_containers (last_active_at);`,
[],
);
devContainersTableReady = true;
}
export interface DevContainerRow {
project_id: string;
workspace: string;
service_uuid: string;
image: string;
state: "provisioning" | "running" | "suspended" | "failed";
last_active_at: Date;
suspended_at: Date | null;
created_at: Date;
}
export async function getDevContainerRow(
projectId: string,
): Promise<DevContainerRow | null> {
await ensureDevContainersTable();
return queryOne<DevContainerRow>(
`SELECT * FROM fs_project_dev_containers WHERE project_id = $1 LIMIT 1`,
[projectId],
);
}
// ── Compose template ─────────────────────────────────────────────────
/**
* Render the docker-compose.yml that backs a single vibn-dev service.
*
* Two named volumes are intentional:
* - workspace : everything in /workspace (the user's source tree).
* Persists across suspends. Backed up to Gitea every
* 5 min via the auto-push autosave loop (week 2).
* - cache : language-toolchain caches (mise, npm, pip, cargo).
* Persists across suspends; per-project (never shared).
*
* The container has NO Vibn-internal network access. We rely on the
* default Coolify-bridge network being isolated from the vibn-postgres
* / vibn-frontend bridge. (Network policy hardening lands in week 1
* day 2 alongside the auto-push job.)
*/
/**
* Pre-allocated preview-port slots. We bake Traefik labels for
* ports 3000..3000+PREVIEW_PORT_COUNT-1 directly into the compose,
* so `dev_server.start` doesn't have to mutate the compose at runtime
* (which would require a Coolify redeploy and ~30s of latency).
*
* The first slot is the project's "primary" preview; additional slots
* cover the few-times-a-session case where the AI runs both a Vite
* frontend and a separate API. Cap is intentionally low (10) so a
* single user can't stand up dozens of public URLs.
*
* Subdomain shape: preview-{slot}-{projectSlug}-{token}.preview.vibnai.com
* - slot is 0..9, used to disambiguate when one project runs >1 server
* - token is a per-project random suffix written at compose-render
* time so URLs aren't enumerable across projects
*/
export const PREVIEW_BASE_PORT = 3000;
export const PREVIEW_PORT_COUNT = 10;
function projectPreviewToken(projectId: string): string {
// Stable per-project random — derived once and stored in the
// dev-container row so the same subdomains survive container
// restarts. We compute on first compose-render and persist below.
return Buffer.from(projectId).toString("hex").slice(0, 8);
}
function renderDevCompose(projectSlug: string, projectId: string): string {
// Image distribution: we build vibn-dev on the Coolify host once
// (see /vibn-dev/setup-on-coolify.sh) and reference it locally.
// pull_policy: never tells Docker not to attempt a registry pull.
//
// Network isolation: vibn-dev sits on its OWN bridge network
// (`vibn-dev-net-${slug}`). On Coolify the Traefik proxy ALSO joins
// this network so it can reach the dev container; vibn-postgres /
// vibn-frontend do not.
//
// Traefik labels: pre-allocated routers for ports 3000..3009. Each
// router uses a distinct subdomain. Routes only "activate" when a
// process is actually listening on the port — Traefik does the
// health check.
const token = projectPreviewToken(projectId);
const traefikLabels: string[] = [
'"traefik.enable=true"',
'"traefik.docker.network=coolify"',
];
for (let i = 0; i < PREVIEW_PORT_COUNT; i++) {
const port = PREVIEW_BASE_PORT + i;
const router = `vibn-dev-${projectSlug}-${i}`;
const host = `preview-${i}-${projectSlug}-${token}.${PREVIEW_DOMAIN_BASE_RAW}`;
traefikLabels.push(
`"traefik.http.routers.${router}.rule=Host(\`${host}\`)"`,
);
traefikLabels.push(`"traefik.http.routers.${router}.entrypoints=https"`);
traefikLabels.push(`"traefik.http.routers.${router}.tls=true"`);
traefikLabels.push(
`"traefik.http.routers.${router}.tls.certresolver=letsencrypt-dns"`,
);
traefikLabels.push(
`"traefik.http.services.${router}.loadbalancer.server.port=${port}"`,
);
traefikLabels.push(`"traefik.http.routers.${router}.service=${router}"`);
}
const labelsBlock = traefikLabels.map((l) => ` - ${l}`).join("\n");
return `services:
vibn-dev:
image: ${VIBN_DEV_IMAGE}
pull_policy: never
restart: unless-stopped
working_dir: /workspace
volumes:
- workspace:/workspace
- cache:/home/vibn/.cache
environment:
- VIBN_PROJECT_SLUG=${projectSlug}
- VIBN_PROJECT_ID=${projectId}
- VIBN_PREVIEW_TOKEN=${token}
- VIBN_DEV_CONTAINER=1
networks:
- vibn-dev-net
- coolify
labels:
${labelsBlock}
deploy:
resources:
limits:
cpus: '${DEFAULT_CPU_LIMIT}'
memory: ${DEFAULT_MEM_LIMIT}
networks:
vibn-dev-net:
name: vibn-dev-net-${projectSlug}
driver: bridge
coolify:
external: true
volumes:
workspace:
cache:
`;
}
const PREVIEW_DOMAIN_BASE_RAW =
process.env.VIBN_PREVIEW_DOMAIN_BASE ?? "preview.vibnai.com";
// ── Provisioning ─────────────────────────────────────────────────────
export interface EnsureDevContainerOpts {
projectId: string;
projectSlug: string;
projectName?: string;
workspace: VibnWorkspace;
/** Skip the initial start (provision-only). Default: start it. */
noStart?: boolean;
}
export interface EnsureDevContainerResult {
serviceUuid: string;
state: DevContainerRow["state"];
created: boolean;
}
/**
* Idempotently ensure a vibn-dev service exists for the given Vibn project.
*
* - Already provisioned → returns the row, optionally resumes if suspended.
* - Not provisioned → ensures the per-project Coolify Project exists,
* creates the docker-compose service, links the
* resource to the Vibn project, persists the row.
*
* Safe to call on every chat turn — first call is ~10s, subsequent
* calls are a single SELECT.
*/
export async function ensureDevContainer(
opts: EnsureDevContainerOpts,
): Promise<EnsureDevContainerResult> {
await ensureDevContainersTable();
const existing = await getDevContainerRow(opts.projectId);
if (existing) {
if (existing.state === "suspended" && !opts.noStart) {
// Resume counts as "starting one more" against the quota, since
// a suspended container is free but a running one isn't.
await assertDevContainerQuota(opts.workspace.slug);
await resumeDevContainer(opts.projectId);
return {
serviceUuid: existing.service_uuid,
state: "running",
created: false,
};
}
return {
serviceUuid: existing.service_uuid,
state: existing.state,
created: false,
};
}
const allowDevContainerWithoutSsh =
process.env.VIBN_ALLOW_DEV_CONTAINER_WITHOUT_SSH === "true";
if (!allowDevContainerWithoutSsh && !isCoolifySshConfigured()) {
throw new Error(
"Dev workspace unavailable: Coolify SSH is not configured on this server. " +
"Set COOLIFY_SSH_HOST and COOLIFY_SSH_PRIVATE_KEY_B64 (see lib/coolify-ssh.ts). " +
"Verify with GET /api/internal/infra-health using INFRA_HEALTH_SECRET. " +
"Local-only: set VIBN_ALLOW_DEV_CONTAINER_WITHOUT_SSH=true to skip this check.",
);
}
// Net-new container creation hits the quota (skip if noStart=true,
// since a never-started container costs nothing). The QuotaExceededError
// bubbles up to the MCP route which surfaces it as a 402 to the AI;
// the AI's recovery middleware can offer to suspend an idle one.
if (!opts.noStart) {
await assertDevContainerQuota(opts.workspace.slug);
}
// Need a Coolify project to land the service in.
let coolifyProjectUuid = await getProjectCoolifyUuid(
opts.projectId,
opts.workspace,
);
if (!coolifyProjectUuid) {
coolifyProjectUuid = await ensureProjectCoolifyProject(
opts.projectId,
opts.workspace,
{ projectSlug: opts.projectSlug, projectName: opts.projectName },
);
}
if (!coolifyProjectUuid) {
throw new Error(
`Could not provision Coolify project for ${opts.projectId}; dev container creation aborted.`,
);
}
const created = await createDockerComposeApp({
projectUuid: coolifyProjectUuid,
name: `vibn-dev-${opts.projectSlug}`,
description: `AI dev container for project ${opts.projectName ?? opts.projectSlug}`,
composeRaw: renderDevCompose(opts.projectSlug, opts.projectId),
instantDeploy: !opts.noStart,
});
await query(
`INSERT INTO fs_project_dev_containers
(project_id, workspace, service_uuid, image, state)
VALUES ($1, $2, $3, $4, $5)
ON CONFLICT (project_id) DO UPDATE
SET service_uuid = EXCLUDED.service_uuid,
image = EXCLUDED.image,
state = EXCLUDED.state`,
[
opts.projectId,
opts.workspace.slug,
created.uuid,
VIBN_DEV_IMAGE,
opts.noStart ? "suspended" : "provisioning",
],
);
// Bookkeeping link so apps_list / projects_get see the dev container
// under the right Vibn project.
try {
await linkResourceToProject(
opts.projectId,
opts.workspace.slug,
created.uuid,
"service",
);
} catch {
// best-effort
}
return { serviceUuid: created.uuid, state: "provisioning", created: true };
}
// ── Lifecycle ────────────────────────────────────────────────────────
export async function suspendDevContainer(projectId: string): Promise<void> {
const row = await getDevContainerRow(projectId);
if (!row) return;
if (row.state === "suspended") return;
await stopService(row.service_uuid);
await query(
`UPDATE fs_project_dev_containers
SET state = 'suspended', suspended_at = now()
WHERE project_id = $1`,
[projectId],
);
}
export async function resumeDevContainer(projectId: string): Promise<void> {
const row = await getDevContainerRow(projectId);
if (!row) throw new Error(`No dev container provisioned for ${projectId}`);
if (row.state === "running") return;
await startService(row.service_uuid);
await query(
`UPDATE fs_project_dev_containers
SET state = 'running', suspended_at = NULL, last_active_at = now()
WHERE project_id = $1`,
[projectId],
);
}
async function touchActivity(projectId: string): Promise<void> {
// Also flips state 'provisioning' → 'running' on first successful exec.
// We can't rely on Coolify's deploy webhook alone (it fires before the
// container's actually accepting docker exec), so the first exec that
// returns is our authoritative liveness signal.
await query(
`UPDATE fs_project_dev_containers
SET last_active_at = now(),
state = CASE WHEN state IN ('provisioning','suspended') THEN 'running' ELSE state END,
suspended_at = NULL
WHERE project_id = $1`,
[projectId],
);
}
// ── Exec primitive ───────────────────────────────────────────────────
export interface DevContainerExecOpts {
projectId: string;
command: string;
cwd?: string; // defaults to /workspace
timeoutMs?: number;
maxBytes?: number;
/** Override the user (default: vibn). Use 'root' only when needed. */
user?: string;
/** Extra env vars (k=v lines prepended via `env` builtin). */
env?: Record<string, string>;
}
/**
* Run a command inside the project's vibn-dev service.
* Resumes the container if suspended, then docker-exec's via the
* existing SSH primitive. Stdout/stderr/exit-code returned synchronously.
*
* The caller is responsible for verifying the projectId belongs to the
* workspace BEFORE calling this. We re-verify the container UUID via
* the exec primitive's own resolution (it queries `docker ps --filter
* name={uuid}`), so a mismatched projectId can't reach foreign containers.
*/
export async function execInDevContainer(
opts: DevContainerExecOpts,
): Promise<ExecInAppResult> {
if (!isCoolifySshConfigured()) {
throw new Error(
"shell.exec requires SSH access to the Coolify host; configure COOLIFY_SSH_* envs.",
);
}
const row = await getDevContainerRow(opts.projectId);
if (!row) {
throw new Error(
`No dev container for project ${opts.projectId}. Call ensureDevContainer() first.`,
);
}
if (row.state === "suspended") {
await resumeDevContainer(opts.projectId);
}
// Self-healing migration hook: Migrate legacy nested repositories to root /workspace
try {
const projectRow = await queryOne<{ slug: string }>(
`SELECT slug FROM fs_projects WHERE id = $1 LIMIT 1`,
[opts.projectId],
);
if (projectRow?.slug) {
const slug = projectRow.slug;
const migrationCmd =
`if [ ! -f "/workspace/.vibn-migration-root-fix" ] && [ -d "/workspace/${slug}" ] && [ ! -d "/workspace/.git" ]; then ` +
`rsync -a "/workspace/${slug}/" "/workspace/" 2>/dev/null; ` +
`mv "/workspace/${slug}" "/workspace/.legacy-nested-${slug}-$(date +%s)" 2>/dev/null; ` +
`echo "Migrated nested repo from /workspace/${slug}" > /workspace/.vibn-migration-root-fix; ` +
`fi`;
await execInCoolifyApp({
appUuid: row.service_uuid,
service: "vibn-dev",
command: migrationCmd,
user: "vibn",
timeoutMs: 10000,
}).catch(() => null);
}
} catch (err) {
// non-fatal best effort
}
const cwd = opts.cwd && opts.cwd.trim() ? opts.cwd.trim() : "/workspace";
const envPrefix = opts.env
? Object.entries(opts.env)
.map(([k, v]) => `${shellEscape(k)}=${shellEscape(v)}`)
.join(" ")
: "";
const wrapped = envPrefix
? `cd ${shellEscape(cwd)} && env ${envPrefix} ${opts.command}`
: `cd ${shellEscape(cwd)} && ${opts.command}`;
const result = await execInCoolifyApp({
appUuid: row.service_uuid,
service: "vibn-dev",
command: wrapped,
user: opts.user ?? "vibn",
timeoutMs: opts.timeoutMs,
maxBytes: opts.maxBytes,
});
await touchActivity(opts.projectId);
return result;
}
function shellEscape(s: string): string {
return `'${s.replace(/'/g, `'\\''`)}'`;
}
// ── Health ───────────────────────────────────────────────────────────
/**
* Quick liveness check used by chat startup to decide whether to show
* a "spinning up your environment…" banner.
*/
export async function getDevContainerStatus(projectId: string): Promise<{
exists: boolean;
state: DevContainerRow["state"] | "absent";
serviceUuid: string | null;
/** Seconds since the row was created; useful for AI to decide whether to keep polling. */
ageSeconds?: number;
/** Set when state was just self-healed by this call. */
selfHealed?: boolean;
/** Set when state is stuck in provisioning past the grace window (likely failed). */
likelyFailed?: boolean;
/** Immediate blocker — no need to wait for provisioning timeout. */
blockedReason?: "coolify_ssh_not_configured" | "coolify_deploy_failed";
blockedHint?: string;
/** Coolify's own view of the service status (only populated when stuck). */
coolifyStatus?: string | null;
}> {
const row = await getDevContainerRow(projectId);
if (!row) return { exists: false, state: "absent", serviceUuid: null };
const ageMs = Date.now() - row.created_at.getTime();
const ageSeconds = Math.floor(ageMs / 1000);
// If we already think it's running or suspended, return as-is. The
// touchActivity() call inside execInDevContainer keeps the row honest.
if (row.state !== "provisioning") {
return {
exists: true,
state: row.state,
serviceUuid: row.service_uuid,
ageSeconds,
};
}
// State is 'provisioning'. The naive read-only return here used to
// create a deadlock: the AI polls status forever waiting for a flip
// that only happens via execInDevContainer. So instead, probe with
// a cheap `true` exec. If it succeeds, mark running and return.
// Coolify's service status alone isn't enough — Coolify reports
// 'running:unknown' for any service without a healthcheck/fqdn,
// which is every dev container. The exec is the source of truth.
if (!isCoolifySshConfigured()) {
return {
exists: true,
state: row.state,
serviceUuid: row.service_uuid,
ageSeconds,
likelyFailed: true,
blockedReason: "coolify_ssh_not_configured",
blockedHint:
"Server missing COOLIFY_SSH_HOST / COOLIFY_SSH_PRIVATE_KEY_B64 — docker exec cannot run. Configure on vibn-frontend; validate with GET /api/internal/infra-health (INFRA_HEALTH_SECRET).",
};
}
try {
const probe = await execInCoolifyApp({
appUuid: row.service_uuid,
service: "vibn-dev",
command: "true",
user: "vibn",
timeoutMs: 5_000,
});
if (probe.code === 0) {
await touchActivity(projectId);
return {
exists: true,
state: "running",
serviceUuid: row.service_uuid,
ageSeconds,
selfHealed: true,
};
}
} catch {
// Exec failed — container probably not yet up. Fall through
// to age-based likelyFailed heuristic.
}
// If we've been "provisioning" for >120s, the container is almost
// certainly stuck (image pull failure, scheduling failure, etc.).
// Surface that distinct from "still booting" so the AI can stop
// polling and tell the user instead of looping.
const likelyFailed = ageSeconds > 120;
let coolifyStatus: string | null = null;
let blockedReason: "coolify_deploy_failed" | undefined;
let blockedHint: string | undefined;
if (likelyFailed) {
// Pull the actual Coolify service status so the AI can see WHY
// the deployment is stuck (image pull error, build failure, etc.)
// instead of just knowing "it's been provisioning for a while."
try {
const svc = await getService(row.service_uuid).catch(() => null);
coolifyStatus = svc?.status ?? null;
if (coolifyStatus && /fail|error/i.test(coolifyStatus)) {
blockedReason = "coolify_deploy_failed";
blockedHint = `Coolify reports service status: "${coolifyStatus}". The dev container image may have failed to build or pull. Check the Coolify dashboard for this service (uuid: ${row.service_uuid}) or regenerate the project. Do NOT keep polling — this will not self-heal.`;
}
} catch {
// best-effort
}
}
return {
exists: true,
state: row.state,
serviceUuid: row.service_uuid,
ageSeconds,
likelyFailed,
blockedReason,
blockedHint,
coolifyStatus,
};
}
// Re-export getService so route handlers can pull live Coolify status
// without taking a separate dependency on lib/coolify.
export { getService };
// ── Dev servers ──────────────────────────────────────────────────────
//
// Long-running processes (Vite, Next dev, etc.) launched inside the
// dev container. We don't have a real supervisor; we shell out to
// `nohup`, redirect logs to /var/log/vibn-dev/<id>.log, and remember
// the PID + port in fs_dev_servers so subsequent calls can stop or
// list them.
//
// Preview URLs are exposed via Traefik's "host" router using the
// internal Coolify network (the dev container's primary bridge IP is
// reachable from Traefik). Full Traefik wildcard wiring lands in
// /vibn-dev/PREVIEWS.md and a separate Traefik config commit; this
// module just records the URL we WILL serve at, so the caller can
// hand it back to the chat.
let devServersTableReady = false;
async function ensureDevServersTable(): Promise<void> {
if (devServersTableReady) return;
await query(
`CREATE TABLE IF NOT EXISTS fs_dev_servers (
id TEXT PRIMARY KEY,
project_id TEXT NOT NULL REFERENCES fs_project_dev_containers(project_id) ON DELETE CASCADE,
workspace TEXT NOT NULL,
name TEXT NOT NULL,
command TEXT NOT NULL,
port INTEGER NOT NULL,
pid INTEGER,
preview_url TEXT NOT NULL,
state TEXT NOT NULL DEFAULT 'starting',
started_at TIMESTAMPTZ NOT NULL DEFAULT now(),
stopped_at TIMESTAMPTZ
);
CREATE INDEX IF NOT EXISTS fs_dev_servers_project_idx ON fs_dev_servers (project_id, state);`,
[],
);
devServersTableReady = true;
}
export interface DevServerRow {
id: string;
project_id: string;
workspace: string;
name: string;
command: string;
port: number;
pid: number | null;
preview_url: string;
state: "starting" | "running" | "stopped" | "failed";
started_at: Date;
stopped_at: Date | null;
}
function randomToken(bytes = 4): string {
const buf = Buffer.alloc(bytes);
for (let i = 0; i < bytes; i++) buf[i] = Math.floor(Math.random() * 256);
return buf.toString("hex");
}
/**
* Map (projectSlug, port) → preview URL. Must match the Host() rules
* baked into the compose labels by renderDevCompose. Slot index is
* derived from `port - PREVIEW_BASE_PORT`.
*/
function buildPreviewUrl(
projectId: string,
projectSlug: string,
port: number,
): string | null {
const slot = port - PREVIEW_BASE_PORT;
if (slot < 0 || slot >= PREVIEW_PORT_COUNT) return null;
const token = projectPreviewToken(projectId);
return `https://preview-${slot}-${projectSlug}-${token}.${PREVIEW_DOMAIN_BASE_RAW}`;
}
export interface StartDevServerOpts {
projectId: string;
projectSlug: string;
command: string;
port: number;
name?: string;
workspace: VibnWorkspace;
}
export class PortBusyError extends Error {
constructor(
public readonly port: number,
public readonly listenerPid: number | null,
public readonly listenerCmd: string,
) {
super(
`Port ${port} is already in use by pid ${listenerPid ?? "?"} (${listenerCmd}). ` +
`Stop it first, or pick another port from ${PREVIEW_BASE_PORT}-${PREVIEW_BASE_PORT + PREVIEW_PORT_COUNT - 1}.`,
);
this.name = "PortBusyError";
}
}
export class PortOutOfRangeError extends Error {
constructor(public readonly port: number) {
super(
`Port ${port} is outside the preview slot range ${PREVIEW_BASE_PORT}-${PREVIEW_BASE_PORT + PREVIEW_PORT_COUNT - 1}. ` +
`Pick a port in that range so the preview URL is reachable through Traefik.`,
);
this.name = "PortOutOfRangeError";
}
}
/**
* Traefik reaches the dev container on the Docker `coolify` network. If the
* dev server binds loopback only, the proxy returns 502. Many CLIs ignore
* HOST= — Next.js needs `-H 0.0.0.0`; Vite honours VITE_DEV_SERVER_HOST.
*/
export function ensurePreviewListenAllInterfaces(command: string): string {
let cmd = command.trim();
if (!cmd) return cmd;
const universalEnv =
"export HOST=0.0.0.0 HOSTNAME=0.0.0.0 VITE_DEV_SERVER_HOST=0.0.0.0 WEBPACK_DEV_SERVER_HOST=0.0.0.0; ";
if (/\bnext\s+dev\b/.test(cmd) && !/\b(?:-H|--hostname)\b/.test(cmd)) {
cmd = cmd.replace(/\bnext\s+dev\b/, "next dev -H 0.0.0.0");
}
return universalEnv + cmd;
}
/**
* Poll localhost inside the container until the dev server answers or time out.
* Promotes `starting` → `running` / `failed` in fs_dev_servers. Intended to be
* fired asynchronously after dev_server.start returns so MCP latency stays low.
*/
export async function probeDevServerReadiness(
projectId: string,
serverId: string,
port: number,
): Promise<void> {
await ensureDevServersTable();
// Up to ~300s: Next/Vite cold compile + potential npm installs can take a while.
// We accept any HTTP response (including 404/500) as "listening" — only
// connection failures stay 000 — because `curl -sf` falsely failed when `/`
// returned a dev error page before the app was fully ready.
const probeCmd =
`last_code=000; ` +
`for i in $(seq 1 300); do ` +
`for path in / ''; do ` +
`code=$(curl -sS -o /dev/null -w '%{http_code}' --max-time 2 --connect-timeout 2 ` +
`"http://localhost:${port}$path" 2>/dev/null || curl -sS -o /dev/null -w '%{http_code}' --max-time 2 --connect-timeout 2 "http://0.0.0.0:${port}$path" 2>/dev/null || printf '000'); ` +
`last_code=$code; ` +
`[ "$code" != "000" ] && [ -n "$code" ] && exit 0; ` +
`done; ` +
`sleep 1; done; ` +
`echo "PROBE_FAIL last_code=$last_code port=${port}"; ` +
`echo "PROBE_FAIL ps=$(ps aux | grep -E 'node|npm|next|vite' | grep -v grep | head -3 | tr '\\n' '|')"; ` +
`echo "PROBE_FAIL log_tail=$(tail -20 /var/log/vibn-dev/${serverId}.log 2>/dev/null | tr '\\n' '|' | head -c 2000)"; ` +
`exit 1`;
try {
const r = await execInDevContainer({
projectId,
command: probeCmd,
timeoutMs: 310_000,
});
if (r.code === 0) {
await query(
`UPDATE fs_dev_servers SET state = 'running' WHERE id = $1 AND project_id = $2 AND state != 'stopped'`,
[serverId, projectId],
);
} else {
console.error(
"[probe] FAILED",
JSON.stringify({
projectId,
serverId,
port,
exitCode: r.code,
stdout: (r.stdout || "").slice(0, 600),
}),
);
await query(
`UPDATE fs_dev_servers SET state = 'failed' WHERE id = $1 AND project_id = $2 AND state != 'stopped'`,
[serverId, projectId],
);
throw new Error(`Probe failed with exit code ${r.code}: ${r.stdout}`);
}
} catch (err) {
console.error(
"[probe] ERROR",
JSON.stringify({
projectId,
serverId,
port,
err: err instanceof Error ? err.message : String(err),
}),
);
await query(
`UPDATE fs_dev_servers SET state = 'failed' WHERE id = $1 AND project_id = $2 AND state != 'stopped'`,
[serverId, projectId],
);
throw err;
}
}
export async function startDevServer(
opts: StartDevServerOpts,
): Promise<DevServerRow> {
await ensureDevServersTable();
// 1. Validate slot range — outside this range we couldn't expose
// the preview through Traefik anyway (no router pre-allocated).
if (
opts.port < PREVIEW_BASE_PORT ||
opts.port >= PREVIEW_BASE_PORT + PREVIEW_PORT_COUNT
) {
throw new PortOutOfRangeError(opts.port);
}
// 2. Stop ALL tracked rows for this project on ALL preview ports.
// Because our socket reaper is infallible, the AI never needs to
// sprawl across multiple ports. We unconditionally reap and stop
// every active preview server for this project before starting a new one
// to keep the dashboard clean and prevent memory leaks.
const existingRows = await query<{
id: string;
pid: number | null;
port: number;
}>(
`SELECT id, pid, port FROM fs_dev_servers
WHERE project_id = $1 AND state IN ('starting','running','failed')`,
[opts.projectId],
);
const killPortNodeCmd =
`node -e '` +
`const fs = require("fs"); ` +
`const portsToKill = [${existingRows
.map((r) => r.port)
.concat(opts.port)
.join(",")}]; ` +
`try { ` +
`const tcp = fs.readFileSync("/proc/net/tcp", "utf8"); ` +
`const inodes = []; ` +
`tcp.split("\\n").forEach(line => { ` +
`const parts = line.trim().split(/\\s+/); ` +
`if (parts.length > 9) { ` +
`const local = parts[1]; ` +
`for (const p of portsToKill) { ` +
`const hexPort = p.toString(16).toUpperCase().padStart(4, "0"); ` +
`if (local.endsWith(":" + hexPort)) { inodes.push(parts[9]); } ` +
`} ` +
`} ` +
`}); ` +
`if (inodes.length > 0) { ` +
`fs.readdirSync("/proc").forEach(file => { ` +
`if (/^\\d+$/.test(file)) { ` +
`try { ` +
`const fds = fs.readdirSync("/proc/" + file + "/fd"); ` +
`for (const fd of fds) { ` +
`const link = fs.readlinkSync("/proc/" + file + "/fd/" + fd); ` +
`for (const inode of inodes) { ` +
`if (link.includes("socket:[" + inode + "]")) { ` +
`process.kill(parseInt(file, 10), 9); ` +
`break; ` +
`} ` +
`} ` +
`} ` +
`} catch (e) {} ` +
`} ` +
`}); ` +
`} ` +
`} catch (e) { ` +
`try { require("child_process").execSync("fuser -k -9 " + portsToKill.join(",") + "/tcp 2>/dev/null || true"); } catch (err) {} ` +
`}'`;
for (const row of existingRows) {
if (row.pid) {
await execInDevContainer({
projectId: opts.projectId,
command: `kill -9 ${row.pid} 2>/dev/null || true`,
timeoutMs: 3_000,
}).catch(() => {});
}
await query(
`UPDATE fs_dev_servers SET state='stopped', stopped_at=now() WHERE id = $1`,
[row.id],
);
}
// 3. Force-kill ANY process currently listening on the port inside the container
// (including untracked orphans or processes from other runs).
await execInDevContainer({
projectId: opts.projectId,
command: killPortNodeCmd,
timeoutMs: 5_000,
}).catch(() => {});
// 3. Launch.
const id = `ds_${randomToken(6)}`;
const name = opts.name ?? `port-${opts.port}`;
const previewUrl =
buildPreviewUrl(opts.projectId, opts.projectSlug, opts.port) ??
`https://localhost-only:${opts.port}`;
const logFile = `/var/log/vibn-dev/${id}.log`;
const listenSafeCommand = ensurePreviewListenAllInterfaces(opts.command);
const launch =
`mkdir -p /var/log/vibn-dev && ` +
`cd /workspace && ` +
`nohup env PORT=${opts.port} VIBN_DEV_SERVER_ID=${id} ` +
`bash -lc ${shellEscape(listenSafeCommand)} > ${logFile} 2>&1 & ` +
`echo $!`;
const result = await execInDevContainer({
projectId: opts.projectId,
command: launch,
timeoutMs: 5_000,
});
const pid = parseInt(result.stdout.trim(), 10);
await query(
`INSERT INTO fs_dev_servers
(id, project_id, workspace, name, command, port, pid, preview_url, state)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)`,
[
id,
opts.projectId,
opts.workspace.slug,
name,
opts.command,
opts.port,
Number.isFinite(pid) ? pid : null,
previewUrl,
"starting",
],
);
return {
id,
project_id: opts.projectId,
workspace: opts.workspace.slug,
name,
command: opts.command,
port: opts.port,
pid: Number.isFinite(pid) ? pid : null,
preview_url: previewUrl,
state: "starting",
started_at: new Date(),
stopped_at: null,
};
}
export async function listDevServers(
projectId: string,
): Promise<DevServerRow[]> {
await ensureDevServersTable();
const rows = await query<DevServerRow>(
`SELECT * FROM fs_dev_servers WHERE project_id = $1 AND state != 'stopped'`,
[projectId],
);
return sortDevPreviewsFrontendFirst(rows);
}
export async function stopDevServer(
projectId: string,
id: string,
): Promise<void> {
await ensureDevServersTable();
const row = await queryOne<DevServerRow>(
`SELECT * FROM fs_dev_servers WHERE id = $1 AND project_id = $2 LIMIT 1`,
[id, projectId],
);
if (!row) throw new Error(`Dev server ${id} not found`);
if (row.pid) {
try {
await execInDevContainer({
projectId,
command: `kill ${row.pid} 2>/dev/null || true`,
timeoutMs: 3_000,
});
} catch {}
}
await query(
`UPDATE fs_dev_servers SET state = 'stopped', stopped_at = now() WHERE id = $1`,
[id],
);
}
export async function tailDevServerLog(
projectId: string,
id: string,
lines = 200,
): Promise<string> {
const r = await execInDevContainer({
projectId,
command: `tail -n ${Math.max(1, Math.min(2000, lines))} /var/log/vibn-dev/${id}.log 2>/dev/null || echo '(no log yet)'`,
timeoutMs: 5_000,
});
return r.stdout;
}
// ── Auto-push autosave ───────────────────────────────────────────────
//
// Treats Gitea as the canonical store; the container disk is ephemeral.
// On every chat turn (or every 5 min, whichever comes first) we push
// /workspace to a `vibn-autosave/main` branch in the project's repo.
//
// We don't try to be clever about what changed — just `git add -A &&
// git commit --allow-empty -m "autosave $(date)" && git push`. If the
// repo doesn't exist yet (fresh project, no `git init` done), we skip
// silently — the AI is responsible for `git init`+ first push when it
// scaffolds.
export interface AutosaveOpts {
projectId: string;
projectSlug: string;
workspace: VibnWorkspace;
/** Repo name in the workspace's Gitea org. Defaults to projectSlug. */
repo?: string;
/** Min interval between autosaves (default 5 min). */
minIntervalMs?: number;
}
export async function autosaveWorkspace(opts: AutosaveOpts): Promise<{
ran: boolean;
reason: string;
pushedAt?: Date;
}> {
const row = await getDevContainerRow(opts.projectId);
if (!row) return { ran: false, reason: "no dev container" };
if (row.state !== "running")
return { ran: false, reason: `state=${row.state}` };
// Throttle: don't autosave more than once per minIntervalMs.
const minInterval = opts.minIntervalMs ?? 5 * 60_000;
const last = await queryOne<{ pushed_at: Date }>(
`SELECT pushed_at FROM fs_dev_autosaves WHERE project_id = $1 ORDER BY pushed_at DESC LIMIT 1`,
[opts.projectId],
).catch(() => null);
if (last && Date.now() - new Date(last.pushed_at).getTime() < minInterval) {
return { ran: false, reason: "throttled" };
}
await ensureAutosavesTable();
// The git config + remote set-url is idempotent; PAT lives in the
// container's .netrc. Initial scaffold (init+add+commit+remote add)
// runs only when the repo doesn't have git yet.
const repo = opts.repo ?? opts.projectSlug;
const cmd = `set -e
cd /workspace/${opts.projectSlug}
if [ ! -d .git ]; then
echo '(no .git, skipping autosave)'
exit 0
fi
git config user.email vibn-bot@vibnai.com
git config user.name 'Vibn Autosave'
# Force push to the autosave branch — never collides with main.
git checkout -B vibn-autosave/main 2>&1 | tail -1
git add -A
if git diff --cached --quiet; then
echo '(no changes)'
else
git commit -m "autosave $(date -Is)" --quiet
fi
git push -f origin vibn-autosave/main 2>&1 | tail -3`;
try {
const r = await execInDevContainer({
projectId: opts.projectId,
command: cmd,
timeoutMs: 30_000,
});
await query(
`INSERT INTO fs_dev_autosaves (project_id, workspace, repo, output, code)
VALUES ($1, $2, $3, $4, $5)`,
[
opts.projectId,
opts.workspace.slug,
repo,
(r.stdout + r.stderr).slice(0, 4000),
r.code,
],
);
return { ran: true, reason: "pushed", pushedAt: new Date() };
} catch (err) {
return {
ran: false,
reason: err instanceof Error ? err.message : String(err),
};
}
}
let autosavesTableReady = false;
async function ensureAutosavesTable(): Promise<void> {
if (autosavesTableReady) return;
await query(
`CREATE TABLE IF NOT EXISTS fs_dev_autosaves (
id BIGSERIAL PRIMARY KEY,
project_id TEXT NOT NULL,
workspace TEXT NOT NULL,
repo TEXT NOT NULL,
output TEXT,
code INTEGER,
pushed_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
CREATE INDEX IF NOT EXISTS fs_dev_autosaves_project_idx ON fs_dev_autosaves (project_id, pushed_at DESC);`,
[],
);
autosavesTableReady = true;
}
// ── Idle suspend ─────────────────────────────────────────────────────
export interface IdleSweepResult {
scanned: number;
suspended: Array<{ projectId: string; idleMin: number }>;
errors: Array<{ projectId: string; error: string }>;
}
/**
* Suspend any running dev containers that haven't been touched in
* `idleMinutes` minutes. Intended for a once-per-5-min cron. Idempotent:
* re-running is a no-op for already-suspended containers.
*/
export async function suspendIdleContainers(
idleMinutes = 30,
): Promise<IdleSweepResult> {
await ensureDevContainersTable();
const cutoff = new Date(Date.now() - idleMinutes * 60_000);
const rows = await query<DevContainerRow>(
`SELECT * FROM fs_project_dev_containers
WHERE state = 'running' AND last_active_at < $1`,
[cutoff],
);
const result: IdleSweepResult = {
scanned: rows.length,
suspended: [],
errors: [],
};
for (const r of rows) {
try {
await suspendDevContainer(r.project_id);
const idleMin = Math.floor(
(Date.now() - new Date(r.last_active_at).getTime()) / 60_000,
);
result.suspended.push({ projectId: r.project_id, idleMin });
} catch (err) {
result.errors.push({
projectId: r.project_id,
error: err instanceof Error ? err.message : String(err),
});
}
}
return result;
}