diff --git a/vibn-frontend/app/[workspace]/project/[projectId]/(home)/preview/page.tsx b/vibn-frontend/app/[workspace]/project/[projectId]/(home)/preview/page.tsx index cebec62b..9fd30a4b 100644 --- a/vibn-frontend/app/[workspace]/project/[projectId]/(home)/preview/page.tsx +++ b/vibn-frontend/app/[workspace]/project/[projectId]/(home)/preview/page.tsx @@ -51,7 +51,7 @@ export default function PreviewTab() { // Poll every 5s so state transitions (starting→running, build complete, etc.) // surface without a manual refresh. - const { anatomy, loading } = useAnatomy(projectId, { pollMs: 5000 }); + const { anatomy, loading, reload } = useAnatomy(projectId, { pollMs: 5000 }); const previews = anatomy?.hosting.previews ?? []; @@ -112,40 +112,19 @@ export default function PreviewTab() { : `https://${fallbackFqdn}` : null; - // ── Auto-ensure: fire a background restart when the pane loads and finds - // no running dev server, but there's a previous config to restart from. + // ── Auto-ensure: the single entry point that guarantees the preview is live. + // We call it on every mount — even when anatomy already says "running" — + // because a `running` row is only intent; the process may have died + // (idle-stop / OOM / crash / host restart) leaving a dead port behind a + // stale flag. The `ensure` endpoint verifies the port is ACTUALLY answering + // and resurrects it if not, but never bounces a healthy server. That makes + // "open the preview → it loads cleanly" reliable, and keeps the container + // warm (the liveness probe touches activity). const ensureCalledRef = useRef(false); const [ensureStatus, setEnsureStatus] = useState< - "idle" | "calling" | "starting" | "no_history" | "error" + "idle" | "calling" | "starting" | "running" | "no_history" | "error" >("idle"); - useEffect(() => { - // Only trigger once per mount, and only when anatomy has loaded with no running server. - if (ensureCalledRef.current) return; - if (loading || !anatomy) return; - if (primaryRunning || primaryStarting) return; // already up or already starting - - ensureCalledRef.current = true; - - fetch(`/api/projects/${projectId}/dev-server/ensure`, { - method: "POST", - credentials: "include", - }) - .then((r) => r.json()) - .then((data: { status?: string }) => { - if (data.status === "no_history" || data.status === "no_container") { - setEnsureStatus("no_history"); - } else if (data.status === "starting" || data.status === "running") { - setEnsureStatus("starting"); - // The 5s anatomy poll will pick up the new 'starting' row and - // transition the pane automatically — no extra work needed here. - } else { - setEnsureStatus("idle"); - } - }) - .catch(() => setEnsureStatus("error")); - }, [loading, anatomy, primaryRunning, primaryStarting, projectId]); - const [iframeSrc, setIframeSrc] = useState(null); const iframeDomRef = useRef(null); const bridge = usePreviewBridge(); @@ -157,21 +136,56 @@ export default function PreviewTab() { const [isForceStarting, setIsForceStarting] = useState(false); - // When the user clicks the manual refresh button in the toolbar, we don't - // just want to reload the iframe — we also want to trigger the same ghost/zombie - // check as the initial mount, in case the server died while they were looking at it. - const prevRefreshKeyRef = useRef(refreshKey); + // Auto-ensure + refresh-heal, in one effect. + // + // On mount (and whenever the refresh button bumps `refreshKey`) we hit the + // `ensure` endpoint, which is the single entry point that guarantees the + // preview is live. We call it even when anatomy already says "running", + // because a `running` row is only intent — the process may have died + // (idle-stop / OOM / crash / host restart) leaving a dead port behind a stale + // flag. `ensure` verifies the port is ACTUALLY answering and resurrects it if + // not, but never bounces a healthy server (and the unique index + + // `startDevServer` idempotency mean it can't duplicate one). So "open the + // preview" and "click refresh" both reliably land on a clean, loaded app. + // + // The re-arm is a ref write (not setState), so the effect body stays free of + // synchronous state updates; the only setState calls live in async callbacks. + const lastEnsuredRefreshKeyRef = useRef(refreshKey); useEffect(() => { - if (refreshKey === prevRefreshKeyRef.current) return; - prevRefreshKeyRef.current = refreshKey; - - // We only reset the ensure flag if we aren't currently waiting for a forced start. - // If they hit refresh while it's already booting, don't break the state machine. - if (!isForceStarting) { + if (refreshKey !== lastEnsuredRefreshKeyRef.current && !isForceStarting) { + lastEnsuredRefreshKeyRef.current = refreshKey; ensureCalledRef.current = false; - setEnsureStatus("idle"); } - }, [refreshKey, isForceStarting]); + + if (ensureCalledRef.current) return; + if (loading || !anatomy) return; + ensureCalledRef.current = true; + + fetch(`/api/projects/${projectId}/dev-server/ensure`, { + method: "POST", + credentials: "include", + }) + .then((r) => r.json()) + .then((data: { status?: string }) => { + if (data.status === "no_history" || data.status === "no_container") { + setEnsureStatus("no_history"); + } else if (data.status === "running") { + // Verified live — keep showing the iframe. + setEnsureStatus("running"); + } else if (data.status === "starting") { + // Fresh start or resurrection of a dead server. Flip to warming-up and + // force an immediate anatomy refetch: `ensure` has already marked any + // stale/dead `running` row as stopped, so the refetch drops the + // possibly-502 iframe and shows warming-up without waiting for the 5s + // poll. The readiness probe then carries it to a clean load. + setEnsureStatus("starting"); + reload(); + } else { + setEnsureStatus("idle"); + } + }) + .catch(() => setEnsureStatus("error")); + }, [loading, anatomy, projectId, refreshKey, isForceStarting, reload]); useLayoutEffect(() => { if (!primaryRunning?.url) { diff --git a/vibn-frontend/app/api/projects/[projectId]/dev-server/ensure/route.ts b/vibn-frontend/app/api/projects/[projectId]/dev-server/ensure/route.ts index fac409ca..d0ae4f2f 100644 --- a/vibn-frontend/app/api/projects/[projectId]/dev-server/ensure/route.ts +++ b/vibn-frontend/app/api/projects/[projectId]/dev-server/ensure/route.ts @@ -20,6 +20,7 @@ import { ensureDevContainer, startDevServer, probeDevServerReadiness, + isDevServerListening, } from "@/lib/dev-container"; export async function POST( @@ -55,8 +56,8 @@ export async function POST( const projectSlug = (project.data?.slug as string) || project.id; const projectName = (project.data?.name as string) || "Project"; - // 1. Is a dev server already running or starting on the primary port? - const running = await queryOne<{ + // 1. Is a dev server already active on the primary port? + const active = await queryOne<{ id: string; state: string; preview_url: string; @@ -75,15 +76,39 @@ export async function POST( [projectId], ); - if (running) { + // A `starting` row is mid cold-boot; the readiness probe will promote it to + // `running` once the port answers. Don't disturb it. + if (active?.state === "starting") { return NextResponse.json({ - status: running.state === "running" ? "running" : "starting", - previewUrl: running.preview_url, - command: running.command, - port: running.port, + status: "starting", + previewUrl: active.preview_url, + command: active.command, + port: active.port, }); } + // A `running` row is only a record of intent. Verify the process is ACTUALLY + // listening — it may have died from idle-stop / OOM / crash / host restart, + // which is the #1 cause of "preview was up, now it's a 502". Only return + // `running` if the port truly answers; otherwise fall through and resurrect. + if (active?.state === "running") { + const alive = await isDevServerListening(projectId, active.port); + if (alive) { + return NextResponse.json({ + status: "running", + previewUrl: active.preview_url, + command: active.command, + port: active.port, + }); + } + // Dead behind a stale flag. Mark it stopped so the UI stops embedding the + // 502 URL, then fall through to restart it with the same command below. + await query( + `UPDATE fs_dev_servers SET state = 'stopped', stopped_at = now() WHERE id = $1`, + [active.id], + ); + } + // 2. Do we have a previous config to restart from? // (Limit to port 3000 since that's what the preview pane embeds) const last = await queryOne<{ @@ -104,7 +129,13 @@ export async function POST( // If there's no history, we STILL want to auto-start! We just assume it's a standard // Next.js app on port 3000. Forcing the user to hit "Start Preview" on a new project // is unnecessary friction. - const commandToRun = last?.command || "npx next dev -H 0.0.0.0 --webpack"; + // + // Do NOT inject `--webpack`: that overrides the project's own bundler choice + // (Next 16 defaults to Turbopack) and forced the dev server to disagree with + // the project's `package.json` dev script. The default mirrors the script the + // scaffolds actually ship (`next dev -H 0.0.0.0`); a real `last.command` from + // a prior managed start always takes precedence anyway. + const commandToRun = last?.command || "npx next dev -H 0.0.0.0"; const portToRun = last?.port || 3000; const previewUrlToUse = last?.preview_url ?? null; diff --git a/vibn-frontend/lib/dev-container.ts b/vibn-frontend/lib/dev-container.ts index 10ffe490..f3ec452c 100644 --- a/vibn-frontend/lib/dev-container.ts +++ b/vibn-frontend/lib/dev-container.ts @@ -51,7 +51,7 @@ export const VIBN_DEV_IMAGE = process.env.VIBN_DEV_IMAGE ?? "vibn-dev:latest"; /** Resource caps per dev container. Tweak in env per-tier later. */ const DEFAULT_CPU_LIMIT = process.env.VIBN_DEV_CPU_LIMIT ?? "1"; // 1 vCPU -const DEFAULT_MEM_LIMIT = process.env.VIBN_DEV_MEM_LIMIT ?? "1g"; // 1 GiB +const DEFAULT_MEM_LIMIT = process.env.VIBN_DEV_MEM_LIMIT ?? "2g"; // 2 GiB — a single Next dev (Turbopack) + npm install OOM-kills at 1 GiB const DEFAULT_DISK_LIMIT = process.env.VIBN_DEV_DISK_LIMIT ?? "10g"; // soft hint, not enforced by compose // ── Schema ─────────────────────────────────────────────────────────── @@ -186,7 +186,7 @@ function renderDevCompose(projectSlug: string, projectId: string): string { image: ${VIBN_DEV_IMAGE} pull_policy: never restart: unless-stopped - command: ["bash", "-c", "echo 'Booting Vibn Container...'; if [ -f /workspace/package.json ]; then echo 'Found package.json, checking deps...'; if [ ! -d /workspace/node_modules ]; then npm install; fi; echo 'Starting dev server...'; npx next dev -H 0.0.0.0 --webpack; else echo 'No package.json found. Standing by...'; sleep infinity; fi"] + command: ["bash", "-c", "echo 'Booting Vibn Container...'; if [ -f /workspace/package.json ] && [ ! -d /workspace/node_modules ]; then echo 'Installing root dependencies...'; npm install; fi; echo 'Container ready — dev server is managed externally via dev_server_start.'; sleep infinity"] working_dir: /workspace volumes: - workspace:/workspace @@ -336,28 +336,14 @@ export async function ensureDevContainer( ], ); - // In Path 2, the dev container natively runs the Next.js server on port 3000. - // We automatically inject the static preview tracking row so the UI sees it instantly. - const previewUrl = buildPreviewUrl(opts.projectId, opts.projectSlug, 3000); - if (previewUrl) { - await query( - `INSERT INTO fs_dev_servers - (id, project_id, workspace, name, command, port, preview_url, state) - VALUES ($1, $2, $3, $4, $5, $6, $7, $8) - ON CONFLICT (id) DO UPDATE - SET state = EXCLUDED.state`, - [ - `ds_primary_${opts.projectId.replace(/-/g, "").slice(0, 10)}`, - opts.projectId, - opts.workspace.slug, - "Primary App", - "npx next dev -H 0.0.0.0 --webpack", - 3000, - previewUrl, - "running", - ], - ); - } + // NOTE: We deliberately do NOT seed a `state='running'` dev-server row here. + // The container boots to standby (`sleep infinity`) and the dev server is + // started lazily and exclusively by the managed flow (the preview pane's + // auto-ensure or the AI's `dev_server_start`). Seeding a fake "running" row + // pointed at a server that isn't actually listening produced 502s, and it + // competed with the managed start for port 3000. `startDevServer` + + // `probeDevServerReadiness` now own the row's lifecycle and only mark it + // `running` once the port truly answers. // Bookkeeping link so apps_list / projects_get see the dev container // under the right Vibn project. @@ -805,6 +791,41 @@ export function ensurePreviewListenAllInterfaces(command: string): string { return universalEnv + cmd; } +/** + * Fast one-shot liveness check: is *something* answering HTTP on `port` inside + * the dev container right now? Any HTTP status (even 404/500) counts as alive; + * only a refused/timed-out connection (curl yields `000`) means dead. Worst case + * ~3s. + * + * This exists because a `state='running'` row in fs_dev_servers is only a record + * of intent — the actual process can die out from under it (container idle-stop, + * OOM-kill, crash, host restart) with nothing to update the row. Trusting the + * flag blindly makes the preview embed a dead URL → 502. Callers use this to + * verify-then-resurrect instead. + */ +export async function isDevServerListening( + projectId: string, + port: number, +): Promise { + try { + const r = await execInDevContainer({ + projectId, + command: + `code=$(curl -sS -o /dev/null -w '%{http_code}' --max-time 2 --connect-timeout 2 ` + + `"http://localhost:${port}/" 2>/dev/null || ` + + `curl -sS -o /dev/null -w '%{http_code}' --max-time 2 --connect-timeout 2 ` + + `"http://0.0.0.0:${port}/" 2>/dev/null || printf '000'); ` + + `[ "$code" != "000" ] && [ -n "$code" ] && echo LIVE || echo DEAD`, + timeoutMs: 8_000, + }); + return /LIVE/.test(r.stdout); + } catch { + // Container itself is unreachable (down/provisioning). Report not-listening + // so the caller takes the (re)start path rather than embedding a dead iframe. + return false; + } +} + /** * Poll localhost inside the container until the dev server answers or time out. * Promotes `starting` → `running` / `failed` in fs_dev_servers. Intended to be