fix(preview): restore resilient zombie-killer logic to auto-restart suspended previews
This commit is contained in:
@@ -793,7 +793,81 @@ async function loadPreviews(projectId: string): Promise<Preview[]> {
|
|||||||
[projectId],
|
[projectId],
|
||||||
);
|
);
|
||||||
|
|
||||||
return sortDevPreviewsFrontendFirst(rows).map((r) => ({
|
// Filter out zombies: if a server is marked 'running' but the URL returns a 50x
|
||||||
|
// Gateway error or times out, the process died. We mark it stopped so the
|
||||||
|
// UI can trigger an auto-restart.
|
||||||
|
const activePreviews: typeof rows = [];
|
||||||
|
|
||||||
|
await Promise.all(
|
||||||
|
rows.map(async (r) => {
|
||||||
|
if (r.state !== "running") {
|
||||||
|
activePreviews.push(r);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const controller = new AbortController();
|
||||||
|
// We use a short timeout because we don't want to block the anatomy
|
||||||
|
// response. A slow response doesn't mean it's dead (Next.js might
|
||||||
|
// just be compiling) — we ONLY want to catch instant 502/503s from Traefik.
|
||||||
|
const timeout = setTimeout(() => controller.abort(), 2000);
|
||||||
|
const ping = await fetch(r.preview_url, {
|
||||||
|
method: "HEAD",
|
||||||
|
signal: controller.signal,
|
||||||
|
});
|
||||||
|
clearTimeout(timeout);
|
||||||
|
|
||||||
|
// 502/503/504 means Traefik is up but the container isn't answering.
|
||||||
|
// 404 means Traefik doesn't even know about the route.
|
||||||
|
if (
|
||||||
|
ping.status === 502 ||
|
||||||
|
ping.status === 503 ||
|
||||||
|
ping.status === 504 ||
|
||||||
|
ping.status === 404
|
||||||
|
) {
|
||||||
|
// GRACE PERIOD: If the server was started less than 60 seconds ago,
|
||||||
|
// Traefik might return a 502/504 simply because the Node process hasn't
|
||||||
|
// finished booting and binding to the port yet. Do not kill it!
|
||||||
|
const ageMs = Date.now() - new Date(r.started_at).getTime();
|
||||||
|
if (ageMs < 60_000) {
|
||||||
|
activePreviews.push(r); // Give it the benefit of the doubt
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
console.warn(
|
||||||
|
`[anatomy] Preview zombie detected for ${r.preview_url} (HTTP ${ping.status}, age ${Math.round(ageMs / 1000)}s). Marking stopped.`,
|
||||||
|
);
|
||||||
|
await query(
|
||||||
|
`UPDATE fs_dev_servers SET state = 'stopped' WHERE id = $1`,
|
||||||
|
[r.id],
|
||||||
|
).catch(() => {});
|
||||||
|
} else {
|
||||||
|
activePreviews.push(r);
|
||||||
|
}
|
||||||
|
} catch (e: any) {
|
||||||
|
// If the fetch aborts due to our 2s timeout, the server is just slow
|
||||||
|
// (likely doing a cold Webpack compile). DO NOT mark it as a zombie!
|
||||||
|
// Only kill it if we get a hard DNS/network error that isn't a timeout.
|
||||||
|
if (
|
||||||
|
e.name === "AbortError" ||
|
||||||
|
e.type === "aborted" ||
|
||||||
|
e.message?.includes("timeout")
|
||||||
|
) {
|
||||||
|
activePreviews.push(r); // Benefit of the doubt — it's thinking
|
||||||
|
} else {
|
||||||
|
console.warn(
|
||||||
|
`[anatomy] Preview zombie detected for ${r.preview_url} (${e.message}). Marking stopped.`,
|
||||||
|
);
|
||||||
|
await query(
|
||||||
|
`UPDATE fs_dev_servers SET state = 'stopped' WHERE id = $1`,
|
||||||
|
[r.id],
|
||||||
|
).catch(() => {});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
return sortDevPreviewsFrontendFirst(activePreviews).map((r) => ({
|
||||||
id: r.id,
|
id: r.id,
|
||||||
name: r.name,
|
name: r.name,
|
||||||
command: r.command ?? undefined,
|
command: r.command ?? undefined,
|
||||||
|
|||||||
Reference in New Issue
Block a user