From 07fb3377ad5df51ab59b93396ba00fa3dcc02ca1 Mon Sep 17 00:00:00 2001 From: mawkone Date: Fri, 12 Jun 2026 15:18:34 -0700 Subject: [PATCH] fix(preview): restore resilient zombie-killer logic to auto-restart suspended previews --- .../api/projects/[projectId]/anatomy/route.ts | 76 ++++++++++++++++++- 1 file changed, 75 insertions(+), 1 deletion(-) diff --git a/vibn-frontend/app/api/projects/[projectId]/anatomy/route.ts b/vibn-frontend/app/api/projects/[projectId]/anatomy/route.ts index 57e32c4..917b867 100644 --- a/vibn-frontend/app/api/projects/[projectId]/anatomy/route.ts +++ b/vibn-frontend/app/api/projects/[projectId]/anatomy/route.ts @@ -793,7 +793,81 @@ async function loadPreviews(projectId: string): Promise { [projectId], ); - return sortDevPreviewsFrontendFirst(rows).map((r) => ({ + // Filter out zombies: if a server is marked 'running' but the URL returns a 50x + // Gateway error or times out, the process died. We mark it stopped so the + // UI can trigger an auto-restart. + const activePreviews: typeof rows = []; + + await Promise.all( + rows.map(async (r) => { + if (r.state !== "running") { + activePreviews.push(r); + return; + } + + try { + const controller = new AbortController(); + // We use a short timeout because we don't want to block the anatomy + // response. A slow response doesn't mean it's dead (Next.js might + // just be compiling) — we ONLY want to catch instant 502/503s from Traefik. + const timeout = setTimeout(() => controller.abort(), 2000); + const ping = await fetch(r.preview_url, { + method: "HEAD", + signal: controller.signal, + }); + clearTimeout(timeout); + + // 502/503/504 means Traefik is up but the container isn't answering. + // 404 means Traefik doesn't even know about the route. + if ( + ping.status === 502 || + ping.status === 503 || + ping.status === 504 || + ping.status === 404 + ) { + // GRACE PERIOD: If the server was started less than 60 seconds ago, + // Traefik might return a 502/504 simply because the Node process hasn't + // finished booting and binding to the port yet. Do not kill it! + const ageMs = Date.now() - new Date(r.started_at).getTime(); + if (ageMs < 60_000) { + activePreviews.push(r); // Give it the benefit of the doubt + return; + } + + console.warn( + `[anatomy] Preview zombie detected for ${r.preview_url} (HTTP ${ping.status}, age ${Math.round(ageMs / 1000)}s). Marking stopped.`, + ); + await query( + `UPDATE fs_dev_servers SET state = 'stopped' WHERE id = $1`, + [r.id], + ).catch(() => {}); + } else { + activePreviews.push(r); + } + } catch (e: any) { + // If the fetch aborts due to our 2s timeout, the server is just slow + // (likely doing a cold Webpack compile). DO NOT mark it as a zombie! + // Only kill it if we get a hard DNS/network error that isn't a timeout. + if ( + e.name === "AbortError" || + e.type === "aborted" || + e.message?.includes("timeout") + ) { + activePreviews.push(r); // Benefit of the doubt — it's thinking + } else { + console.warn( + `[anatomy] Preview zombie detected for ${r.preview_url} (${e.message}). Marking stopped.`, + ); + await query( + `UPDATE fs_dev_servers SET state = 'stopped' WHERE id = $1`, + [r.id], + ).catch(() => {}); + } + } + }), + ); + + return sortDevPreviewsFrontendFirst(activePreviews).map((r) => ({ id: r.id, name: r.name, command: r.command ?? undefined,