fix(preview): permanently restore resilient zombie-killer ping to auto-restart suspended containers
This commit is contained in:
@@ -793,7 +793,81 @@ async function loadPreviews(projectId: string): Promise<Preview[]> {
|
||||
[projectId],
|
||||
);
|
||||
|
||||
return sortDevPreviewsFrontendFirst(rows).map((r) => ({
|
||||
// Filter out zombies: if a server is marked 'running' but the URL returns a 50x
|
||||
// Gateway error or times out, the process died. We mark it stopped so the
|
||||
// UI can trigger an auto-restart.
|
||||
const activePreviews: typeof rows = [];
|
||||
|
||||
await Promise.all(
|
||||
rows.map(async (r) => {
|
||||
if (r.state !== "running") {
|
||||
activePreviews.push(r);
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const controller = new AbortController();
|
||||
// We use a short timeout because we don't want to block the anatomy
|
||||
// response. A slow response doesn't mean it's dead (Next.js might
|
||||
// just be compiling) — we ONLY want to catch instant 502/503s from Traefik.
|
||||
const timeout = setTimeout(() => controller.abort(), 2000);
|
||||
const ping = await fetch(r.preview_url, {
|
||||
method: "HEAD",
|
||||
signal: controller.signal,
|
||||
});
|
||||
clearTimeout(timeout);
|
||||
|
||||
// 502/503/504 means Traefik is up but the container isn't answering.
|
||||
// 404 means Traefik doesn't even know about the route.
|
||||
if (
|
||||
ping.status === 502 ||
|
||||
ping.status === 503 ||
|
||||
ping.status === 504 ||
|
||||
ping.status === 404
|
||||
) {
|
||||
// GRACE PERIOD: If the server was started less than 60 seconds ago,
|
||||
// Traefik might return a 502/504 simply because the Node process hasn't
|
||||
// finished booting and binding to the port yet. Do not kill it!
|
||||
const ageMs = Date.now() - new Date(r.started_at).getTime();
|
||||
if (ageMs < 60_000) {
|
||||
activePreviews.push(r); // Give it the benefit of the doubt
|
||||
return;
|
||||
}
|
||||
|
||||
console.warn(
|
||||
`[anatomy] Preview zombie detected for ${r.preview_url} (HTTP ${ping.status}, age ${Math.round(ageMs / 1000)}s). Marking stopped.`,
|
||||
);
|
||||
await query(
|
||||
`UPDATE fs_dev_servers SET state = 'stopped' WHERE id = $1`,
|
||||
[r.id],
|
||||
).catch(() => {});
|
||||
} else {
|
||||
activePreviews.push(r);
|
||||
}
|
||||
} catch (e: any) {
|
||||
// If the fetch aborts due to our 2s timeout, the server is just slow
|
||||
// (likely doing a cold Webpack compile). DO NOT mark it as a zombie!
|
||||
// Only kill it if we get a hard DNS/network error that isn't a timeout.
|
||||
if (
|
||||
e.name === "AbortError" ||
|
||||
e.type === "aborted" ||
|
||||
e.message?.includes("timeout")
|
||||
) {
|
||||
activePreviews.push(r); // Benefit of the doubt — it's thinking
|
||||
} else {
|
||||
console.warn(
|
||||
`[anatomy] Preview zombie detected for ${r.preview_url} (${e.message}). Marking stopped.`,
|
||||
);
|
||||
await query(
|
||||
`UPDATE fs_dev_servers SET state = 'stopped' WHERE id = $1`,
|
||||
[r.id],
|
||||
).catch(() => {});
|
||||
}
|
||||
}
|
||||
}),
|
||||
);
|
||||
|
||||
return sortDevPreviewsFrontendFirst(activePreviews).map((r) => ({
|
||||
id: r.id,
|
||||
name: r.name,
|
||||
command: r.command ?? undefined,
|
||||
|
||||
Reference in New Issue
Block a user