diff --git a/app/api/mcp/route.ts b/app/api/mcp/route.ts index 79c2935c..426c2767 100644 --- a/app/api/mcp/route.ts +++ b/app/api/mcp/route.ts @@ -92,7 +92,7 @@ const GITEA_API_URL = process.env.GITEA_API_URL ?? 'https://git.vibnai.com'; export async function GET() { return NextResponse.json({ name: 'vibn-mcp', - version: '2.4.5', + version: '2.4.6', authentication: { scheme: 'Bearer', tokenPrefix: 'vibn_sk_', @@ -124,6 +124,7 @@ export async function GET() { 'apps.volumes.wipe', 'apps.containers.up', 'apps.containers.ps', + 'apps.repair', 'apps.templates.list', 'apps.templates.search', 'apps.envs.list', @@ -230,6 +231,8 @@ export async function POST(request: Request) { return await toolAppsContainersUp(principal, params); case 'apps.containers.ps': return await toolAppsContainersPs(principal, params); + case 'apps.repair': + return await toolAppsRepair(principal, params); case 'apps.templates.list': return await toolAppsTemplatesList(params); case 'apps.templates.search': @@ -1179,6 +1182,78 @@ async function toolAppsContainersPs(principal: Principal, params: Record.loadbalancer. + * server.port label into docker-compose.yml. + * 3. Connect coolify-proxy to the service's project network. + * 4. Force-recreate the public-facing app container. + * 5. Restart coolify-proxy so Traefik re-discovers labels. + * + * Params: + * uuid required — service uuid (the resource, not a single container) + * fqdn required — the public hostname (e.g. "crm.mark.vibnai.com") + * publicAppName required — docker-compose service name of the public app + * (usually equals the template slug: "twenty", "n8n", …) + * port optional — internal port (default: derived per template) + * + * Returns the same { ok, steps } shape as the post-deploy block in + * apps.create plus a final reachability probe. + */ +async function toolAppsRepair(_principal: Principal, params: Record) { + const uuid = String(params.uuid ?? '').trim(); + const fqdn = String(params.fqdn ?? '').trim(); + const publicAppName = String(params.publicAppName ?? '').trim(); + const port = params.port != null ? Number(params.port) : undefined; + if (!uuid || !fqdn || !publicAppName) { + return NextResponse.json( + { error: 'apps.repair requires { uuid, fqdn, publicAppName }' }, + { status: 400 } + ); + } + if (!isCoolifySshConfigured()) { + return NextResponse.json( + { error: 'apps.repair requires SSH to the Coolify host (set COOLIFY_SSH_*)' }, + { status: 501 } + ); + } + const postDeploy = await applyCoolifyPostDeployFixes({ uuid, fqdn, publicAppName, port }); + + let reachable = false; + let probeDiag = ''; + try { + const ctrl = new AbortController(); + const t = setTimeout(() => ctrl.abort(), 12_000); + const res = await fetch(`https://${fqdn}`, { signal: ctrl.signal, redirect: 'manual' }); + clearTimeout(t); + reachable = res.status >= 200 && res.status < 400; + probeDiag = `GET https://${fqdn} → ${res.status}`; + } catch (e) { + probeDiag = `probe failed: ${e instanceof Error ? e.message : String(e)}`; + } + + return NextResponse.json({ + result: { + uuid, + fqdn, + publicAppName, + reachable, + postDeploy, + probe: probeDiag, + note: reachable + ? `Repaired and reachable on https://${fqdn}.` + : `Repair steps applied but probe still failed. Check postDeploy.steps for any "ok: false" entries; otherwise wait 30s and retry the probe.`, + }, + }); +} + // ────────────────────────────────────────────────── // apps.templates.* — Coolify one-click catalog browse // ────────────────────────────────────────────────── @@ -1311,8 +1386,15 @@ async function ensureServiceReachable(opts: { // running:healthy. This field is truthful, unlike service.status // which routinely lies as "starting:unknown" while containers are // actually healthy. + // Coolify's queue worker can take 60-120s to dequeue a start + // request, during which time service.applications[*].status still + // reports the stale `exited` state (= "never started"). We only + // treat `exited` as terminal AFTER we've seen evidence of activity + // (`starting:*` or `running:*`) — otherwise it's just queue lag. const startedAt = Date.now(); let appStatus = 'unknown'; + let sawActivity = false; + let lastExitObservedAt = 0; while (Date.now() - startedAt < healthTimeoutMs) { try { const svc = (await getService(uuid)) as unknown as { @@ -1322,12 +1404,18 @@ async function ensureServiceReachable(opts: { const target = apps.find(a => a.name === publicAppName) ?? apps[0]; appStatus = target?.status ?? 'unknown'; if (/^running:healthy/i.test(appStatus)) break; - // Failure modes Coolify reports as terminal: exited (compose - // never ran), restarting (boot loop). We don't want to wait - // the full timeout in those cases. - if (/^exited/i.test(appStatus) && Date.now() - startedAt > 90_000) { - // Give it 90s to transition out of "exited" before declaring failure - break; + if (/^starting|^running/i.test(appStatus)) { + sawActivity = true; + lastExitObservedAt = 0; + } + // Once we've seen activity, an exited status is terminal — + // boot loop or compose failure. Wait 30s of consecutive + // `exited` to be sure it's not a Compose recreate cycle. + if (sawActivity && /^exited/i.test(appStatus)) { + if (lastExitObservedAt === 0) lastExitObservedAt = Date.now(); + if (Date.now() - lastExitObservedAt > 30_000) break; + } else if (!/^exited/i.test(appStatus)) { + lastExitObservedAt = 0; } } catch (e) { console.warn('[ensureServiceReachable] status probe failed', e);