fix(mcp v2.4.2): apps.create reports started=true on partial sidecar failure
Coolify's `compose up -d` returns non-zero whenever any sidecar container hits a `depends_on: condition: service_healthy` timeout. For slow-booting apps like Twenty (where the worker waits ~3 min for twenty's healthcheck), this caused apps.create to return started=false even when the primary stack was running fine. Now ensureServiceUp probes the host with `docker ps` after a non-zero compose exit and returns started=true whenever any container is running, surfacing the compose stderr in startDiag so agents can decide whether to retry apps.containers.up later. Made-with: Cursor
This commit is contained in:
@@ -86,7 +86,7 @@ const GITEA_API_URL = process.env.GITEA_API_URL ?? 'https://git.vibnai.com';
|
||||
export async function GET() {
|
||||
return NextResponse.json({
|
||||
name: 'vibn-mcp',
|
||||
version: '2.4.1',
|
||||
version: '2.4.2',
|
||||
authentication: {
|
||||
scheme: 'Bearer',
|
||||
tokenPrefix: 'vibn_sk_',
|
||||
@@ -893,8 +893,8 @@ async function toolAppsCreate(principal: Principal, params: Record<string, any>)
|
||||
startMethod,
|
||||
...(startDiag ? { startDiag } : {}),
|
||||
note: started
|
||||
? 'Containers are up. First boot may take 1-5 min while images finish pulling and migrations run. Use apps.logs to monitor.'
|
||||
: 'Service created but containers did not start. Call apps.containers.up to retry, or apps.logs to diagnose.',
|
||||
? 'Primary containers are up. First boot may take 1-5 min while images finish pulling and migrations run; use apps.logs to monitor. If startDiag mentions a sidecar dependency timeout (workers, schedulers), call apps.containers.up again once the primary is healthy to bring those up.'
|
||||
: 'Service created but no containers started. Call apps.containers.up to retry; check apps.containers.ps and apps.logs to diagnose.',
|
||||
},
|
||||
});
|
||||
}
|
||||
@@ -1270,17 +1270,39 @@ async function ensureServiceUp(uuid: string): Promise<{
|
||||
}
|
||||
|
||||
// 3. Fallback — run docker compose up -d ourselves
|
||||
let composeDiag = '';
|
||||
try {
|
||||
const r = await composeUp('service', uuid, { timeoutMs: 600_000 });
|
||||
composeDiag = (r.stderr || r.stdout).trim().slice(-400);
|
||||
if (r.code === 0) {
|
||||
return { started: true, startMethod: 'compose-up', diag: '' };
|
||||
}
|
||||
// Non-zero exit but compose ran — capture the tail for diagnosis
|
||||
const tail = (r.stderr || r.stdout).trim().slice(-400);
|
||||
return { started: false, startMethod: 'failed', diag: tail };
|
||||
// Non-zero exit DOES NOT mean nothing started. Compose returns
|
||||
// non-zero whenever any service hits a `depends_on:
|
||||
// condition: service_healthy` timeout — common for sidecar
|
||||
// containers (workers, schedulers) of apps with slow-booting
|
||||
// primary services (Twenty's worker waits on twenty's healthcheck,
|
||||
// which takes 2-5 min). Probe the host to see what's actually
|
||||
// running before declaring failure.
|
||||
} catch (e) {
|
||||
return { started: false, startMethod: 'failed', diag: e instanceof Error ? e.message : String(e) };
|
||||
composeDiag = e instanceof Error ? e.message : String(e);
|
||||
}
|
||||
|
||||
try {
|
||||
const probe = await runOnCoolifyHost(
|
||||
`docker ps --filter name=${uuid} --format '{{.Names}}'`,
|
||||
{ timeoutMs: 8_000 },
|
||||
);
|
||||
if (probe.stdout.trim().length > 0) {
|
||||
// Something IS running — partial success. Surface the diag so
|
||||
// agents see WHY compose returned non-zero (usually a sidecar
|
||||
// depends_on timeout) but report started=true so happy-path
|
||||
// workflows don't panic.
|
||||
return { started: true, startMethod: 'compose-up', diag: composeDiag };
|
||||
}
|
||||
} catch { /* fall through */ }
|
||||
|
||||
return { started: false, startMethod: 'failed', diag: composeDiag };
|
||||
}
|
||||
|
||||
/** Resolve fqdn from params.domain or auto-generate. Returns NextResponse on policy error. */
|
||||
|
||||
Reference in New Issue
Block a user