fix(mcp v2.4.2): apps.create reports started=true on partial sidecar failure
Coolify's `compose up -d` returns non-zero whenever any sidecar container hits a `depends_on: condition: service_healthy` timeout. For slow-booting apps like Twenty (where the worker waits ~3 min for twenty's healthcheck), this caused apps.create to return started=false even when the primary stack was running fine. Now ensureServiceUp probes the host with `docker ps` after a non-zero compose exit and returns started=true whenever any container is running, surfacing the compose stderr in startDiag so agents can decide whether to retry apps.containers.up later. Made-with: Cursor
This commit is contained in:
@@ -86,7 +86,7 @@ const GITEA_API_URL = process.env.GITEA_API_URL ?? 'https://git.vibnai.com';
|
|||||||
export async function GET() {
|
export async function GET() {
|
||||||
return NextResponse.json({
|
return NextResponse.json({
|
||||||
name: 'vibn-mcp',
|
name: 'vibn-mcp',
|
||||||
version: '2.4.1',
|
version: '2.4.2',
|
||||||
authentication: {
|
authentication: {
|
||||||
scheme: 'Bearer',
|
scheme: 'Bearer',
|
||||||
tokenPrefix: 'vibn_sk_',
|
tokenPrefix: 'vibn_sk_',
|
||||||
@@ -893,8 +893,8 @@ async function toolAppsCreate(principal: Principal, params: Record<string, any>)
|
|||||||
startMethod,
|
startMethod,
|
||||||
...(startDiag ? { startDiag } : {}),
|
...(startDiag ? { startDiag } : {}),
|
||||||
note: started
|
note: started
|
||||||
? 'Containers are up. First boot may take 1-5 min while images finish pulling and migrations run. Use apps.logs to monitor.'
|
? 'Primary containers are up. First boot may take 1-5 min while images finish pulling and migrations run; use apps.logs to monitor. If startDiag mentions a sidecar dependency timeout (workers, schedulers), call apps.containers.up again once the primary is healthy to bring those up.'
|
||||||
: 'Service created but containers did not start. Call apps.containers.up to retry, or apps.logs to diagnose.',
|
: 'Service created but no containers started. Call apps.containers.up to retry; check apps.containers.ps and apps.logs to diagnose.',
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -1270,17 +1270,39 @@ async function ensureServiceUp(uuid: string): Promise<{
|
|||||||
}
|
}
|
||||||
|
|
||||||
// 3. Fallback — run docker compose up -d ourselves
|
// 3. Fallback — run docker compose up -d ourselves
|
||||||
|
let composeDiag = '';
|
||||||
try {
|
try {
|
||||||
const r = await composeUp('service', uuid, { timeoutMs: 600_000 });
|
const r = await composeUp('service', uuid, { timeoutMs: 600_000 });
|
||||||
|
composeDiag = (r.stderr || r.stdout).trim().slice(-400);
|
||||||
if (r.code === 0) {
|
if (r.code === 0) {
|
||||||
return { started: true, startMethod: 'compose-up', diag: '' };
|
return { started: true, startMethod: 'compose-up', diag: '' };
|
||||||
}
|
}
|
||||||
// Non-zero exit but compose ran — capture the tail for diagnosis
|
// Non-zero exit DOES NOT mean nothing started. Compose returns
|
||||||
const tail = (r.stderr || r.stdout).trim().slice(-400);
|
// non-zero whenever any service hits a `depends_on:
|
||||||
return { started: false, startMethod: 'failed', diag: tail };
|
// condition: service_healthy` timeout — common for sidecar
|
||||||
|
// containers (workers, schedulers) of apps with slow-booting
|
||||||
|
// primary services (Twenty's worker waits on twenty's healthcheck,
|
||||||
|
// which takes 2-5 min). Probe the host to see what's actually
|
||||||
|
// running before declaring failure.
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
return { started: false, startMethod: 'failed', diag: e instanceof Error ? e.message : String(e) };
|
composeDiag = e instanceof Error ? e.message : String(e);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const probe = await runOnCoolifyHost(
|
||||||
|
`docker ps --filter name=${uuid} --format '{{.Names}}'`,
|
||||||
|
{ timeoutMs: 8_000 },
|
||||||
|
);
|
||||||
|
if (probe.stdout.trim().length > 0) {
|
||||||
|
// Something IS running — partial success. Surface the diag so
|
||||||
|
// agents see WHY compose returned non-zero (usually a sidecar
|
||||||
|
// depends_on timeout) but report started=true so happy-path
|
||||||
|
// workflows don't panic.
|
||||||
|
return { started: true, startMethod: 'compose-up', diag: composeDiag };
|
||||||
|
}
|
||||||
|
} catch { /* fall through */ }
|
||||||
|
|
||||||
|
return { started: false, startMethod: 'failed', diag: composeDiag };
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Resolve fqdn from params.domain or auto-generate. Returns NextResponse on policy error. */
|
/** Resolve fqdn from params.domain or auto-generate. Returns NextResponse on policy error. */
|
||||||
|
|||||||
Reference in New Issue
Block a user