Before this fix, devcontainer.status was a read-only DB query that returned whatever state the row currently held. The state only flips provisioning→running via touchActivity() inside execInDevContainer. That created a deadlock: the AI polls devcontainer.status waiting for 'running'; status will never flip until something else execs. Caught live in smoke test 2026-05-01 (manifest project) — the AI fired devcontainer.status three times in a row, hit the loop guard, and surfaced the dead-end to the user. Two fixes: 1. getDevContainerStatus() now does a cheap 'true' exec probe when the row says 'provisioning'. If the probe lands, it flips the row to 'running' via touchActivity and reports selfHealed=true. If the probe fails AND the row is older than 120s, it reports likelyFailed=true so callers can stop polling and escalate. Also returns ageSeconds for the AI to reason about wait windows. Coolify's own service status is not used because dev containers have no fqdn/healthcheck and Coolify reports running:unknown for any such service forever. 2. New error-recovery rule 'devcontainer-still-provisioning' that fires whenever a status response contains state:'provisioning'. Tells the AI to send one status message, wait 15s, and prefer shell.exec (which lazy-provisions and proves reachability) over another devcontainer.status call. Explicit antipattern: do not poll status in a tight loop. Co-authored-by: Cursor <cursoragent@cursor.com>
901 lines
32 KiB
TypeScript
901 lines
32 KiB
TypeScript
/**
|
|
* Per-project AI dev container ("vibn-dev").
|
|
*
|
|
* One Coolify Service per Vibn project, running the `vibn-dev` image.
|
|
* The AI agent drives it via:
|
|
* - shell.exec → docker exec into the container (via existing SSH path)
|
|
* - fs.* → file ops (implemented as `cat` / `tee` / `rm` etc.
|
|
* inside the container, on top of shell.exec)
|
|
* - dev_server.* → start long-running processes (week 2)
|
|
* - ship → git push to Gitea + trigger Coolify deploy (week 2)
|
|
*
|
|
* Lifecycle states:
|
|
* - Not provisioned → ensureDevContainer() creates the Coolify service
|
|
* - Suspended → Coolify-stopped (saves money). resume() starts it.
|
|
* - Running → docker exec works.
|
|
*
|
|
* Tenant safety: every helper takes a workspace and the caller must have
|
|
* already verified that the projectId belongs to that workspace via
|
|
* fs_projects. The exec primitive ALSO verifies the resolved container
|
|
* UUID is in the workspace's owned Coolify-project set, so a hijacked
|
|
* projectId can't reach unrelated containers.
|
|
*
|
|
* See: AI_PATH_B_EXECUTION_PLAN.md §3.
|
|
*/
|
|
|
|
import { query, queryOne } from '@/lib/db-postgres';
|
|
import {
|
|
createDockerComposeApp,
|
|
startService,
|
|
stopService,
|
|
getService,
|
|
} from '@/lib/coolify';
|
|
import { execInCoolifyApp, type ExecInAppResult } from '@/lib/coolify-exec';
|
|
import { isCoolifySshConfigured } from '@/lib/coolify-ssh';
|
|
import {
|
|
ensureProjectCoolifyProject,
|
|
getProjectCoolifyUuid,
|
|
linkResourceToProject,
|
|
} from '@/lib/projects';
|
|
import type { VibnWorkspace } from '@/lib/workspaces';
|
|
import { assertDevContainerQuota } from '@/lib/quotas';
|
|
|
|
// ── Configuration ────────────────────────────────────────────────────
|
|
|
|
/**
|
|
* Image tag for vibn-dev. Built and pushed from /vibn-dev/Dockerfile.
|
|
* Override per-environment with VIBN_DEV_IMAGE for staging/canary tags.
|
|
*/
|
|
export const VIBN_DEV_IMAGE = process.env.VIBN_DEV_IMAGE ?? 'vibn-dev:latest';
|
|
|
|
/** Resource caps per dev container. Tweak in env per-tier later. */
|
|
const DEFAULT_CPU_LIMIT = process.env.VIBN_DEV_CPU_LIMIT ?? '1'; // 1 vCPU
|
|
const DEFAULT_MEM_LIMIT = process.env.VIBN_DEV_MEM_LIMIT ?? '1g'; // 1 GiB
|
|
const DEFAULT_DISK_LIMIT = process.env.VIBN_DEV_DISK_LIMIT ?? '10g'; // soft hint, not enforced by compose
|
|
|
|
// ── Schema ───────────────────────────────────────────────────────────
|
|
|
|
let devContainersTableReady = false;
|
|
export async function ensureDevContainersTable(): Promise<void> {
|
|
if (devContainersTableReady) return;
|
|
await query(
|
|
`CREATE TABLE IF NOT EXISTS fs_project_dev_containers (
|
|
project_id TEXT PRIMARY KEY,
|
|
workspace TEXT NOT NULL,
|
|
service_uuid TEXT NOT NULL,
|
|
image TEXT NOT NULL,
|
|
state TEXT NOT NULL DEFAULT 'provisioning',
|
|
last_active_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
|
suspended_at TIMESTAMPTZ,
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
|
);
|
|
CREATE INDEX IF NOT EXISTS fs_project_dev_containers_ws_idx
|
|
ON fs_project_dev_containers (workspace);
|
|
CREATE INDEX IF NOT EXISTS fs_project_dev_containers_active_idx
|
|
ON fs_project_dev_containers (last_active_at);`,
|
|
[],
|
|
);
|
|
devContainersTableReady = true;
|
|
}
|
|
|
|
export interface DevContainerRow {
|
|
project_id: string;
|
|
workspace: string;
|
|
service_uuid: string;
|
|
image: string;
|
|
state: 'provisioning' | 'running' | 'suspended' | 'failed';
|
|
last_active_at: Date;
|
|
suspended_at: Date | null;
|
|
created_at: Date;
|
|
}
|
|
|
|
export async function getDevContainerRow(projectId: string): Promise<DevContainerRow | null> {
|
|
await ensureDevContainersTable();
|
|
return queryOne<DevContainerRow>(
|
|
`SELECT * FROM fs_project_dev_containers WHERE project_id = $1 LIMIT 1`,
|
|
[projectId],
|
|
);
|
|
}
|
|
|
|
// ── Compose template ─────────────────────────────────────────────────
|
|
|
|
/**
|
|
* Render the docker-compose.yml that backs a single vibn-dev service.
|
|
*
|
|
* Two named volumes are intentional:
|
|
* - workspace : everything in /workspace (the user's source tree).
|
|
* Persists across suspends. Backed up to Gitea every
|
|
* 5 min via the auto-push autosave loop (week 2).
|
|
* - cache : language-toolchain caches (mise, npm, pip, cargo).
|
|
* Persists across suspends; per-project (never shared).
|
|
*
|
|
* The container has NO Vibn-internal network access. We rely on the
|
|
* default Coolify-bridge network being isolated from the vibn-postgres
|
|
* / vibn-frontend bridge. (Network policy hardening lands in week 1
|
|
* day 2 alongside the auto-push job.)
|
|
*/
|
|
/**
|
|
* Pre-allocated preview-port slots. We bake Traefik labels for
|
|
* ports 3000..3000+PREVIEW_PORT_COUNT-1 directly into the compose,
|
|
* so `dev_server.start` doesn't have to mutate the compose at runtime
|
|
* (which would require a Coolify redeploy and ~30s of latency).
|
|
*
|
|
* The first slot is the project's "primary" preview; additional slots
|
|
* cover the few-times-a-session case where the AI runs both a Vite
|
|
* frontend and a separate API. Cap is intentionally low (10) so a
|
|
* single user can't stand up dozens of public URLs.
|
|
*
|
|
* Subdomain shape: preview-{slot}-{projectSlug}-{token}.preview.vibnai.com
|
|
* - slot is 0..9, used to disambiguate when one project runs >1 server
|
|
* - token is a per-project random suffix written at compose-render
|
|
* time so URLs aren't enumerable across projects
|
|
*/
|
|
export const PREVIEW_BASE_PORT = 3000;
|
|
export const PREVIEW_PORT_COUNT = 10;
|
|
|
|
function projectPreviewToken(projectId: string): string {
|
|
// Stable per-project random — derived once and stored in the
|
|
// dev-container row so the same subdomains survive container
|
|
// restarts. We compute on first compose-render and persist below.
|
|
return Buffer.from(projectId).toString('hex').slice(0, 8);
|
|
}
|
|
|
|
function renderDevCompose(projectSlug: string, projectId: string): string {
|
|
// Image distribution: we build vibn-dev on the Coolify host once
|
|
// (see /vibn-dev/setup-on-coolify.sh) and reference it locally.
|
|
// pull_policy: never tells Docker not to attempt a registry pull.
|
|
//
|
|
// Network isolation: vibn-dev sits on its OWN bridge network
|
|
// (`vibn-dev-net-${slug}`). On Coolify the Traefik proxy ALSO joins
|
|
// this network so it can reach the dev container; vibn-postgres /
|
|
// vibn-frontend do not.
|
|
//
|
|
// Traefik labels: pre-allocated routers for ports 3000..3009. Each
|
|
// router uses a distinct subdomain. Routes only "activate" when a
|
|
// process is actually listening on the port — Traefik does the
|
|
// health check.
|
|
const token = projectPreviewToken(projectId);
|
|
const traefikLabels: string[] = ['"traefik.enable=true"'];
|
|
for (let i = 0; i < PREVIEW_PORT_COUNT; i++) {
|
|
const port = PREVIEW_BASE_PORT + i;
|
|
const router = `vibn-dev-${projectSlug}-${i}`;
|
|
const host = `preview-${i}-${projectSlug}-${token}.${PREVIEW_DOMAIN_BASE_RAW}`;
|
|
traefikLabels.push(`"traefik.http.routers.${router}.rule=Host(\`${host}\`)"`);
|
|
traefikLabels.push(`"traefik.http.routers.${router}.entrypoints=https"`);
|
|
traefikLabels.push(`"traefik.http.routers.${router}.tls=true"`);
|
|
traefikLabels.push(`"traefik.http.routers.${router}.tls.certresolver=letsencrypt-dns"`);
|
|
traefikLabels.push(`"traefik.http.services.${router}.loadbalancer.server.port=${port}"`);
|
|
traefikLabels.push(`"traefik.http.routers.${router}.service=${router}"`);
|
|
}
|
|
const labelsBlock = traefikLabels.map(l => ` - ${l}`).join('\n');
|
|
|
|
return `services:
|
|
vibn-dev:
|
|
image: ${VIBN_DEV_IMAGE}
|
|
pull_policy: never
|
|
restart: unless-stopped
|
|
working_dir: /workspace
|
|
volumes:
|
|
- workspace:/workspace
|
|
- cache:/home/vibn/.cache
|
|
environment:
|
|
- VIBN_PROJECT_SLUG=${projectSlug}
|
|
- VIBN_PROJECT_ID=${projectId}
|
|
- VIBN_PREVIEW_TOKEN=${token}
|
|
- VIBN_DEV_CONTAINER=1
|
|
networks:
|
|
- vibn-dev-net
|
|
- coolify
|
|
labels:
|
|
${labelsBlock}
|
|
deploy:
|
|
resources:
|
|
limits:
|
|
cpus: '${DEFAULT_CPU_LIMIT}'
|
|
memory: ${DEFAULT_MEM_LIMIT}
|
|
networks:
|
|
vibn-dev-net:
|
|
name: vibn-dev-net-${projectSlug}
|
|
driver: bridge
|
|
coolify:
|
|
external: true
|
|
volumes:
|
|
workspace:
|
|
cache:
|
|
`;
|
|
}
|
|
|
|
const PREVIEW_DOMAIN_BASE_RAW =
|
|
process.env.VIBN_PREVIEW_DOMAIN_BASE ?? 'preview.vibnai.com';
|
|
|
|
// ── Provisioning ─────────────────────────────────────────────────────
|
|
|
|
export interface EnsureDevContainerOpts {
|
|
projectId: string;
|
|
projectSlug: string;
|
|
projectName?: string;
|
|
workspace: VibnWorkspace;
|
|
/** Skip the initial start (provision-only). Default: start it. */
|
|
noStart?: boolean;
|
|
}
|
|
|
|
export interface EnsureDevContainerResult {
|
|
serviceUuid: string;
|
|
state: DevContainerRow['state'];
|
|
created: boolean;
|
|
}
|
|
|
|
/**
|
|
* Idempotently ensure a vibn-dev service exists for the given Vibn project.
|
|
*
|
|
* - Already provisioned → returns the row, optionally resumes if suspended.
|
|
* - Not provisioned → ensures the per-project Coolify Project exists,
|
|
* creates the docker-compose service, links the
|
|
* resource to the Vibn project, persists the row.
|
|
*
|
|
* Safe to call on every chat turn — first call is ~10s, subsequent
|
|
* calls are a single SELECT.
|
|
*/
|
|
export async function ensureDevContainer(
|
|
opts: EnsureDevContainerOpts,
|
|
): Promise<EnsureDevContainerResult> {
|
|
await ensureDevContainersTable();
|
|
|
|
const existing = await getDevContainerRow(opts.projectId);
|
|
if (existing) {
|
|
if (existing.state === 'suspended' && !opts.noStart) {
|
|
// Resume counts as "starting one more" against the quota, since
|
|
// a suspended container is free but a running one isn't.
|
|
await assertDevContainerQuota(opts.workspace.slug);
|
|
await resumeDevContainer(opts.projectId);
|
|
return { serviceUuid: existing.service_uuid, state: 'running', created: false };
|
|
}
|
|
return { serviceUuid: existing.service_uuid, state: existing.state, created: false };
|
|
}
|
|
|
|
// Net-new container creation hits the quota (skip if noStart=true,
|
|
// since a never-started container costs nothing). The QuotaExceededError
|
|
// bubbles up to the MCP route which surfaces it as a 402 to the AI;
|
|
// the AI's recovery middleware can offer to suspend an idle one.
|
|
if (!opts.noStart) {
|
|
await assertDevContainerQuota(opts.workspace.slug);
|
|
}
|
|
|
|
// Need a Coolify project to land the service in.
|
|
let coolifyProjectUuid = await getProjectCoolifyUuid(opts.projectId, opts.workspace);
|
|
if (!coolifyProjectUuid) {
|
|
coolifyProjectUuid = await ensureProjectCoolifyProject(
|
|
opts.projectId,
|
|
opts.workspace,
|
|
{ projectSlug: opts.projectSlug, projectName: opts.projectName },
|
|
);
|
|
}
|
|
if (!coolifyProjectUuid) {
|
|
throw new Error(
|
|
`Could not provision Coolify project for ${opts.projectId}; dev container creation aborted.`,
|
|
);
|
|
}
|
|
|
|
const created = await createDockerComposeApp({
|
|
projectUuid: coolifyProjectUuid,
|
|
name: `vibn-dev-${opts.projectSlug}`,
|
|
description: `AI dev container for project ${opts.projectName ?? opts.projectSlug}`,
|
|
composeRaw: renderDevCompose(opts.projectSlug, opts.projectId),
|
|
instantDeploy: !opts.noStart,
|
|
});
|
|
|
|
await query(
|
|
`INSERT INTO fs_project_dev_containers
|
|
(project_id, workspace, service_uuid, image, state)
|
|
VALUES ($1, $2, $3, $4, $5)
|
|
ON CONFLICT (project_id) DO UPDATE
|
|
SET service_uuid = EXCLUDED.service_uuid,
|
|
image = EXCLUDED.image,
|
|
state = EXCLUDED.state`,
|
|
[
|
|
opts.projectId,
|
|
opts.workspace.slug,
|
|
created.uuid,
|
|
VIBN_DEV_IMAGE,
|
|
opts.noStart ? 'suspended' : 'provisioning',
|
|
],
|
|
);
|
|
|
|
// Bookkeeping link so apps_list / projects_get see the dev container
|
|
// under the right Vibn project.
|
|
try {
|
|
await linkResourceToProject(opts.projectId, opts.workspace.slug, created.uuid, 'service');
|
|
} catch {
|
|
// best-effort
|
|
}
|
|
|
|
return { serviceUuid: created.uuid, state: 'provisioning', created: true };
|
|
}
|
|
|
|
// ── Lifecycle ────────────────────────────────────────────────────────
|
|
|
|
export async function suspendDevContainer(projectId: string): Promise<void> {
|
|
const row = await getDevContainerRow(projectId);
|
|
if (!row) return;
|
|
if (row.state === 'suspended') return;
|
|
await stopService(row.service_uuid);
|
|
await query(
|
|
`UPDATE fs_project_dev_containers
|
|
SET state = 'suspended', suspended_at = now()
|
|
WHERE project_id = $1`,
|
|
[projectId],
|
|
);
|
|
}
|
|
|
|
export async function resumeDevContainer(projectId: string): Promise<void> {
|
|
const row = await getDevContainerRow(projectId);
|
|
if (!row) throw new Error(`No dev container provisioned for ${projectId}`);
|
|
if (row.state === 'running') return;
|
|
await startService(row.service_uuid);
|
|
await query(
|
|
`UPDATE fs_project_dev_containers
|
|
SET state = 'running', suspended_at = NULL, last_active_at = now()
|
|
WHERE project_id = $1`,
|
|
[projectId],
|
|
);
|
|
}
|
|
|
|
async function touchActivity(projectId: string): Promise<void> {
|
|
// Also flips state 'provisioning' → 'running' on first successful exec.
|
|
// We can't rely on Coolify's deploy webhook alone (it fires before the
|
|
// container's actually accepting docker exec), so the first exec that
|
|
// returns is our authoritative liveness signal.
|
|
await query(
|
|
`UPDATE fs_project_dev_containers
|
|
SET last_active_at = now(),
|
|
state = CASE WHEN state IN ('provisioning','suspended') THEN 'running' ELSE state END,
|
|
suspended_at = NULL
|
|
WHERE project_id = $1`,
|
|
[projectId],
|
|
);
|
|
}
|
|
|
|
// ── Exec primitive ───────────────────────────────────────────────────
|
|
|
|
export interface DevContainerExecOpts {
|
|
projectId: string;
|
|
command: string;
|
|
cwd?: string; // defaults to /workspace
|
|
timeoutMs?: number;
|
|
maxBytes?: number;
|
|
/** Override the user (default: vibn). Use 'root' only when needed. */
|
|
user?: string;
|
|
/** Extra env vars (k=v lines prepended via `env` builtin). */
|
|
env?: Record<string, string>;
|
|
}
|
|
|
|
/**
|
|
* Run a command inside the project's vibn-dev service.
|
|
* Resumes the container if suspended, then docker-exec's via the
|
|
* existing SSH primitive. Stdout/stderr/exit-code returned synchronously.
|
|
*
|
|
* The caller is responsible for verifying the projectId belongs to the
|
|
* workspace BEFORE calling this. We re-verify the container UUID via
|
|
* the exec primitive's own resolution (it queries `docker ps --filter
|
|
* name={uuid}`), so a mismatched projectId can't reach foreign containers.
|
|
*/
|
|
export async function execInDevContainer(
|
|
opts: DevContainerExecOpts,
|
|
): Promise<ExecInAppResult> {
|
|
if (!isCoolifySshConfigured()) {
|
|
throw new Error(
|
|
'shell.exec requires SSH access to the Coolify host; configure COOLIFY_SSH_* envs.',
|
|
);
|
|
}
|
|
const row = await getDevContainerRow(opts.projectId);
|
|
if (!row) {
|
|
throw new Error(
|
|
`No dev container for project ${opts.projectId}. Call ensureDevContainer() first.`,
|
|
);
|
|
}
|
|
if (row.state === 'suspended') {
|
|
await resumeDevContainer(opts.projectId);
|
|
}
|
|
|
|
const cwd = opts.cwd && opts.cwd.trim() ? opts.cwd.trim() : '/workspace';
|
|
const envPrefix = opts.env
|
|
? Object.entries(opts.env)
|
|
.map(([k, v]) => `${shellEscape(k)}=${shellEscape(v)}`)
|
|
.join(' ')
|
|
: '';
|
|
const wrapped = envPrefix
|
|
? `cd ${shellEscape(cwd)} && env ${envPrefix} ${opts.command}`
|
|
: `cd ${shellEscape(cwd)} && ${opts.command}`;
|
|
|
|
const result = await execInCoolifyApp({
|
|
appUuid: row.service_uuid,
|
|
service: 'vibn-dev',
|
|
command: wrapped,
|
|
user: opts.user ?? 'vibn',
|
|
timeoutMs: opts.timeoutMs,
|
|
maxBytes: opts.maxBytes,
|
|
});
|
|
|
|
await touchActivity(opts.projectId);
|
|
return result;
|
|
}
|
|
|
|
function shellEscape(s: string): string {
|
|
return `'${s.replace(/'/g, `'\\''`)}'`;
|
|
}
|
|
|
|
// ── Health ───────────────────────────────────────────────────────────
|
|
|
|
/**
|
|
* Quick liveness check used by chat startup to decide whether to show
|
|
* a "spinning up your environment…" banner.
|
|
*/
|
|
export async function getDevContainerStatus(projectId: string): Promise<{
|
|
exists: boolean;
|
|
state: DevContainerRow['state'] | 'absent';
|
|
serviceUuid: string | null;
|
|
/** Seconds since the row was created; useful for AI to decide whether to keep polling. */
|
|
ageSeconds?: number;
|
|
/** Set when state was just self-healed by this call. */
|
|
selfHealed?: boolean;
|
|
/** Set when state is stuck in provisioning past the grace window (likely failed). */
|
|
likelyFailed?: boolean;
|
|
}> {
|
|
const row = await getDevContainerRow(projectId);
|
|
if (!row) return { exists: false, state: 'absent', serviceUuid: null };
|
|
|
|
const ageMs = Date.now() - row.created_at.getTime();
|
|
const ageSeconds = Math.floor(ageMs / 1000);
|
|
|
|
// If we already think it's running or suspended, return as-is. The
|
|
// touchActivity() call inside execInDevContainer keeps the row honest.
|
|
if (row.state !== 'provisioning') {
|
|
return { exists: true, state: row.state, serviceUuid: row.service_uuid, ageSeconds };
|
|
}
|
|
|
|
// State is 'provisioning'. The naive read-only return here used to
|
|
// create a deadlock: the AI polls status forever waiting for a flip
|
|
// that only happens via execInDevContainer. So instead, probe with
|
|
// a cheap `true` exec. If it succeeds, mark running and return.
|
|
// Coolify's service status alone isn't enough — Coolify reports
|
|
// 'running:unknown' for any service without a healthcheck/fqdn,
|
|
// which is every dev container. The exec is the source of truth.
|
|
if (isCoolifySshConfigured()) {
|
|
try {
|
|
const probe = await execInCoolifyApp({
|
|
appUuid: row.service_uuid,
|
|
service: 'vibn-dev',
|
|
command: 'true',
|
|
user: 'vibn',
|
|
timeoutMs: 5_000,
|
|
});
|
|
if (probe.exitCode === 0) {
|
|
await touchActivity(projectId);
|
|
return {
|
|
exists: true,
|
|
state: 'running',
|
|
serviceUuid: row.service_uuid,
|
|
ageSeconds,
|
|
selfHealed: true,
|
|
};
|
|
}
|
|
} catch {
|
|
// Exec failed — container probably not yet up. Fall through
|
|
// to age-based likelyFailed heuristic.
|
|
}
|
|
}
|
|
|
|
// If we've been "provisioning" for >120s, the container is almost
|
|
// certainly stuck (image pull failure, scheduling failure, etc.).
|
|
// Surface that distinct from "still booting" so the AI can stop
|
|
// polling and tell the user instead of looping.
|
|
const likelyFailed = ageSeconds > 120;
|
|
|
|
return {
|
|
exists: true,
|
|
state: row.state,
|
|
serviceUuid: row.service_uuid,
|
|
ageSeconds,
|
|
likelyFailed,
|
|
};
|
|
}
|
|
|
|
// Re-export getService so route handlers can pull live Coolify status
|
|
// without taking a separate dependency on lib/coolify.
|
|
export { getService };
|
|
|
|
// ── Dev servers ──────────────────────────────────────────────────────
|
|
//
|
|
// Long-running processes (Vite, Next dev, etc.) launched inside the
|
|
// dev container. We don't have a real supervisor; we shell out to
|
|
// `nohup`, redirect logs to /var/log/vibn-dev/<id>.log, and remember
|
|
// the PID + port in fs_dev_servers so subsequent calls can stop or
|
|
// list them.
|
|
//
|
|
// Preview URLs are exposed via Traefik's "host" router using the
|
|
// internal Coolify network (the dev container's primary bridge IP is
|
|
// reachable from Traefik). Full Traefik wildcard wiring lands in
|
|
// /vibn-dev/PREVIEWS.md and a separate Traefik config commit; this
|
|
// module just records the URL we WILL serve at, so the caller can
|
|
// hand it back to the chat.
|
|
|
|
let devServersTableReady = false;
|
|
async function ensureDevServersTable(): Promise<void> {
|
|
if (devServersTableReady) return;
|
|
await query(
|
|
`CREATE TABLE IF NOT EXISTS fs_dev_servers (
|
|
id TEXT PRIMARY KEY,
|
|
project_id TEXT NOT NULL REFERENCES fs_project_dev_containers(project_id) ON DELETE CASCADE,
|
|
workspace TEXT NOT NULL,
|
|
name TEXT NOT NULL,
|
|
command TEXT NOT NULL,
|
|
port INTEGER NOT NULL,
|
|
pid INTEGER,
|
|
preview_url TEXT NOT NULL,
|
|
state TEXT NOT NULL DEFAULT 'starting',
|
|
started_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
|
stopped_at TIMESTAMPTZ
|
|
);
|
|
CREATE INDEX IF NOT EXISTS fs_dev_servers_project_idx ON fs_dev_servers (project_id, state);`,
|
|
[],
|
|
);
|
|
devServersTableReady = true;
|
|
}
|
|
|
|
export interface DevServerRow {
|
|
id: string;
|
|
project_id: string;
|
|
workspace: string;
|
|
name: string;
|
|
command: string;
|
|
port: number;
|
|
pid: number | null;
|
|
preview_url: string;
|
|
state: 'starting' | 'running' | 'stopped' | 'failed';
|
|
started_at: Date;
|
|
stopped_at: Date | null;
|
|
}
|
|
|
|
function randomToken(bytes = 4): string {
|
|
const buf = Buffer.alloc(bytes);
|
|
for (let i = 0; i < bytes; i++) buf[i] = Math.floor(Math.random() * 256);
|
|
return buf.toString('hex');
|
|
}
|
|
|
|
/**
|
|
* Map (projectSlug, port) → preview URL. Must match the Host() rules
|
|
* baked into the compose labels by renderDevCompose. Slot index is
|
|
* derived from `port - PREVIEW_BASE_PORT`.
|
|
*/
|
|
function buildPreviewUrl(projectId: string, projectSlug: string, port: number): string | null {
|
|
const slot = port - PREVIEW_BASE_PORT;
|
|
if (slot < 0 || slot >= PREVIEW_PORT_COUNT) return null;
|
|
const token = projectPreviewToken(projectId);
|
|
return `https://preview-${slot}-${projectSlug}-${token}.${PREVIEW_DOMAIN_BASE_RAW}`;
|
|
}
|
|
|
|
export interface StartDevServerOpts {
|
|
projectId: string;
|
|
projectSlug: string;
|
|
command: string;
|
|
port: number;
|
|
name?: string;
|
|
workspace: VibnWorkspace;
|
|
}
|
|
|
|
export class PortBusyError extends Error {
|
|
constructor(
|
|
public readonly port: number,
|
|
public readonly listenerPid: number | null,
|
|
public readonly listenerCmd: string,
|
|
) {
|
|
super(
|
|
`Port ${port} is already in use by pid ${listenerPid ?? '?'} (${listenerCmd}). ` +
|
|
`Stop it first, or pick another port from ${PREVIEW_BASE_PORT}-${PREVIEW_BASE_PORT + PREVIEW_PORT_COUNT - 1}.`,
|
|
);
|
|
this.name = 'PortBusyError';
|
|
}
|
|
}
|
|
|
|
export class PortOutOfRangeError extends Error {
|
|
constructor(public readonly port: number) {
|
|
super(
|
|
`Port ${port} is outside the preview slot range ${PREVIEW_BASE_PORT}-${PREVIEW_BASE_PORT + PREVIEW_PORT_COUNT - 1}. ` +
|
|
`Pick a port in that range so the preview URL is reachable through Traefik.`,
|
|
);
|
|
this.name = 'PortOutOfRangeError';
|
|
}
|
|
}
|
|
|
|
export async function startDevServer(opts: StartDevServerOpts): Promise<DevServerRow> {
|
|
await ensureDevServersTable();
|
|
|
|
// 1. Validate slot range — outside this range we couldn't expose
|
|
// the preview through Traefik anyway (no router pre-allocated).
|
|
if (
|
|
opts.port < PREVIEW_BASE_PORT ||
|
|
opts.port >= PREVIEW_BASE_PORT + PREVIEW_PORT_COUNT
|
|
) {
|
|
throw new PortOutOfRangeError(opts.port);
|
|
}
|
|
|
|
// 2. Detect listeners on the requested port. We use ss (ships in
|
|
// iproute2, default in Ubuntu base) because lsof isn't installed.
|
|
// If a vibn-tracked dev server already owns the port, mark its
|
|
// row stopped and reuse the slot. If something untracked is
|
|
// listening, fail loudly so the AI surfaces a real error to the
|
|
// user instead of silently launching a doomed second process.
|
|
const portCheck = await execInDevContainer({
|
|
projectId: opts.projectId,
|
|
command:
|
|
`ss -tlnpH "sport = :${opts.port}" 2>/dev/null | head -1; ` +
|
|
// also include any process listening (without name resolution) as a fallback
|
|
`lsof -iTCP:${opts.port} -sTCP:LISTEN -n -P 2>/dev/null | tail -n +2 | head -1 || true`,
|
|
timeoutMs: 5_000,
|
|
});
|
|
const listenerLine = portCheck.stdout.trim();
|
|
if (listenerLine) {
|
|
// Try to extract pid from "users:((\"node\",pid=156,fd=...))" or lsof "node 156 vibn ..."
|
|
const pidMatch = listenerLine.match(/pid=(\d+)/) || listenerLine.match(/^\S+\s+(\d+)/);
|
|
const listenerPid = pidMatch ? parseInt(pidMatch[1], 10) : null;
|
|
|
|
const tracked = await queryOne<DevServerRow>(
|
|
`SELECT * FROM fs_dev_servers
|
|
WHERE project_id = $1 AND port = $2 AND state IN ('starting','running')
|
|
ORDER BY started_at DESC LIMIT 1`,
|
|
[opts.projectId, opts.port],
|
|
);
|
|
if (tracked && tracked.pid && listenerPid && tracked.pid === listenerPid) {
|
|
// Same project owns the port via a tracked row. Reap it cleanly
|
|
// so the new start has a clean slot. AI's expected behaviour is
|
|
// "I want THIS command on THIS port" — so we honour the
|
|
// most-recent-write-wins intent rather than throwing.
|
|
await execInDevContainer({
|
|
projectId: opts.projectId,
|
|
command: `kill ${tracked.pid} 2>/dev/null || true; sleep 0.3`,
|
|
timeoutMs: 5_000,
|
|
});
|
|
await query(
|
|
`UPDATE fs_dev_servers SET state='stopped', stopped_at=now() WHERE id = $1`,
|
|
[tracked.id],
|
|
);
|
|
} else {
|
|
throw new PortBusyError(opts.port, listenerPid, listenerLine.slice(0, 200));
|
|
}
|
|
}
|
|
|
|
// 3. Launch.
|
|
const id = `ds_${randomToken(6)}`;
|
|
const name = opts.name ?? `port-${opts.port}`;
|
|
const previewUrl =
|
|
buildPreviewUrl(opts.projectId, opts.projectSlug, opts.port) ??
|
|
`https://localhost-only:${opts.port}`;
|
|
const logFile = `/var/log/vibn-dev/${id}.log`;
|
|
|
|
const launch =
|
|
`mkdir -p /var/log/vibn-dev && ` +
|
|
`cd /workspace && ` +
|
|
`nohup env HOST=0.0.0.0 PORT=${opts.port} VIBN_DEV_SERVER_ID=${id} ` +
|
|
`bash -lc ${shellEscape(opts.command)} > ${logFile} 2>&1 & ` +
|
|
`echo $!`;
|
|
|
|
const result = await execInDevContainer({
|
|
projectId: opts.projectId,
|
|
command: launch,
|
|
timeoutMs: 5_000,
|
|
});
|
|
const pid = parseInt(result.stdout.trim(), 10);
|
|
|
|
await query(
|
|
`INSERT INTO fs_dev_servers
|
|
(id, project_id, workspace, name, command, port, pid, preview_url, state)
|
|
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)`,
|
|
[
|
|
id,
|
|
opts.projectId,
|
|
opts.workspace.slug,
|
|
name,
|
|
opts.command,
|
|
opts.port,
|
|
Number.isFinite(pid) ? pid : null,
|
|
previewUrl,
|
|
'starting',
|
|
],
|
|
);
|
|
|
|
return {
|
|
id,
|
|
project_id: opts.projectId,
|
|
workspace: opts.workspace.slug,
|
|
name,
|
|
command: opts.command,
|
|
port: opts.port,
|
|
pid: Number.isFinite(pid) ? pid : null,
|
|
preview_url: previewUrl,
|
|
state: 'starting',
|
|
started_at: new Date(),
|
|
stopped_at: null,
|
|
};
|
|
}
|
|
|
|
export async function listDevServers(projectId: string): Promise<DevServerRow[]> {
|
|
await ensureDevServersTable();
|
|
return query<DevServerRow>(
|
|
`SELECT * FROM fs_dev_servers WHERE project_id = $1 AND state != 'stopped' ORDER BY started_at DESC`,
|
|
[projectId],
|
|
);
|
|
}
|
|
|
|
export async function stopDevServer(projectId: string, id: string): Promise<void> {
|
|
await ensureDevServersTable();
|
|
const row = await queryOne<DevServerRow>(
|
|
`SELECT * FROM fs_dev_servers WHERE id = $1 AND project_id = $2 LIMIT 1`,
|
|
[id, projectId],
|
|
);
|
|
if (!row) throw new Error(`Dev server ${id} not found`);
|
|
if (row.pid) {
|
|
try {
|
|
await execInDevContainer({
|
|
projectId,
|
|
command: `kill ${row.pid} 2>/dev/null || true`,
|
|
timeoutMs: 3_000,
|
|
});
|
|
} catch {}
|
|
}
|
|
await query(
|
|
`UPDATE fs_dev_servers SET state = 'stopped', stopped_at = now() WHERE id = $1`,
|
|
[id],
|
|
);
|
|
}
|
|
|
|
export async function tailDevServerLog(
|
|
projectId: string,
|
|
id: string,
|
|
lines = 200,
|
|
): Promise<string> {
|
|
const r = await execInDevContainer({
|
|
projectId,
|
|
command: `tail -n ${Math.max(1, Math.min(2000, lines))} /var/log/vibn-dev/${id}.log 2>/dev/null || echo '(no log yet)'`,
|
|
timeoutMs: 5_000,
|
|
});
|
|
return r.stdout;
|
|
}
|
|
|
|
// ── Auto-push autosave ───────────────────────────────────────────────
|
|
//
|
|
// Treats Gitea as the canonical store; the container disk is ephemeral.
|
|
// On every chat turn (or every 5 min, whichever comes first) we push
|
|
// /workspace to a `vibn-autosave/main` branch in the project's repo.
|
|
//
|
|
// We don't try to be clever about what changed — just `git add -A &&
|
|
// git commit --allow-empty -m "autosave $(date)" && git push`. If the
|
|
// repo doesn't exist yet (fresh project, no `git init` done), we skip
|
|
// silently — the AI is responsible for `git init`+ first push when it
|
|
// scaffolds.
|
|
|
|
export interface AutosaveOpts {
|
|
projectId: string;
|
|
projectSlug: string;
|
|
workspace: VibnWorkspace;
|
|
/** Repo name in the workspace's Gitea org. Defaults to projectSlug. */
|
|
repo?: string;
|
|
/** Min interval between autosaves (default 5 min). */
|
|
minIntervalMs?: number;
|
|
}
|
|
|
|
export async function autosaveWorkspace(opts: AutosaveOpts): Promise<{
|
|
ran: boolean;
|
|
reason: string;
|
|
pushedAt?: Date;
|
|
}> {
|
|
const row = await getDevContainerRow(opts.projectId);
|
|
if (!row) return { ran: false, reason: 'no dev container' };
|
|
if (row.state !== 'running') return { ran: false, reason: `state=${row.state}` };
|
|
|
|
// Throttle: don't autosave more than once per minIntervalMs.
|
|
const minInterval = opts.minIntervalMs ?? 5 * 60_000;
|
|
const last = await queryOne<{ pushed_at: Date }>(
|
|
`SELECT pushed_at FROM fs_dev_autosaves WHERE project_id = $1 ORDER BY pushed_at DESC LIMIT 1`,
|
|
[opts.projectId],
|
|
).catch(() => null);
|
|
if (last && Date.now() - new Date(last.pushed_at).getTime() < minInterval) {
|
|
return { ran: false, reason: 'throttled' };
|
|
}
|
|
|
|
await ensureAutosavesTable();
|
|
|
|
// The git config + remote set-url is idempotent; PAT lives in the
|
|
// container's .netrc. Initial scaffold (init+add+commit+remote add)
|
|
// runs only when the repo doesn't have git yet.
|
|
const repo = opts.repo ?? opts.projectSlug;
|
|
const cmd = `set -e
|
|
cd /workspace
|
|
if [ ! -d .git ]; then
|
|
echo '(no .git, skipping autosave)'
|
|
exit 0
|
|
fi
|
|
git config user.email vibn-bot@vibnai.com
|
|
git config user.name 'Vibn Autosave'
|
|
# Force push to the autosave branch — never collides with main.
|
|
git checkout -B vibn-autosave/main 2>&1 | tail -1
|
|
git add -A
|
|
if git diff --cached --quiet; then
|
|
echo '(no changes)'
|
|
else
|
|
git commit -m "autosave $(date -Is)" --quiet
|
|
fi
|
|
git push -f origin vibn-autosave/main 2>&1 | tail -3`;
|
|
|
|
try {
|
|
const r = await execInDevContainer({
|
|
projectId: opts.projectId,
|
|
command: cmd,
|
|
timeoutMs: 30_000,
|
|
});
|
|
await query(
|
|
`INSERT INTO fs_dev_autosaves (project_id, workspace, repo, output, code)
|
|
VALUES ($1, $2, $3, $4, $5)`,
|
|
[opts.projectId, opts.workspace.slug, repo, (r.stdout + r.stderr).slice(0, 4000), r.code],
|
|
);
|
|
return { ran: true, reason: 'pushed', pushedAt: new Date() };
|
|
} catch (err) {
|
|
return { ran: false, reason: err instanceof Error ? err.message : String(err) };
|
|
}
|
|
}
|
|
|
|
let autosavesTableReady = false;
|
|
async function ensureAutosavesTable(): Promise<void> {
|
|
if (autosavesTableReady) return;
|
|
await query(
|
|
`CREATE TABLE IF NOT EXISTS fs_dev_autosaves (
|
|
id BIGSERIAL PRIMARY KEY,
|
|
project_id TEXT NOT NULL,
|
|
workspace TEXT NOT NULL,
|
|
repo TEXT NOT NULL,
|
|
output TEXT,
|
|
code INTEGER,
|
|
pushed_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
|
);
|
|
CREATE INDEX IF NOT EXISTS fs_dev_autosaves_project_idx ON fs_dev_autosaves (project_id, pushed_at DESC);`,
|
|
[],
|
|
);
|
|
autosavesTableReady = true;
|
|
}
|
|
|
|
// ── Idle suspend ─────────────────────────────────────────────────────
|
|
|
|
export interface IdleSweepResult {
|
|
scanned: number;
|
|
suspended: Array<{ projectId: string; idleMin: number }>;
|
|
errors: Array<{ projectId: string; error: string }>;
|
|
}
|
|
|
|
/**
|
|
* Suspend any running dev containers that haven't been touched in
|
|
* `idleMinutes` minutes. Intended for a once-per-5-min cron. Idempotent:
|
|
* re-running is a no-op for already-suspended containers.
|
|
*/
|
|
export async function suspendIdleContainers(idleMinutes = 30): Promise<IdleSweepResult> {
|
|
await ensureDevContainersTable();
|
|
const cutoff = new Date(Date.now() - idleMinutes * 60_000);
|
|
const rows = await query<DevContainerRow>(
|
|
`SELECT * FROM fs_project_dev_containers
|
|
WHERE state = 'running' AND last_active_at < $1`,
|
|
[cutoff],
|
|
);
|
|
const result: IdleSweepResult = { scanned: rows.length, suspended: [], errors: [] };
|
|
for (const r of rows) {
|
|
try {
|
|
await suspendDevContainer(r.project_id);
|
|
const idleMin = Math.floor((Date.now() - new Date(r.last_active_at).getTime()) / 60_000);
|
|
result.suspended.push({ projectId: r.project_id, idleMin });
|
|
} catch (err) {
|
|
result.errors.push({
|
|
projectId: r.project_id,
|
|
error: err instanceof Error ? err.message : String(err),
|
|
});
|
|
}
|
|
}
|
|
return result;
|
|
}
|