feat(mcp v2.4.1): apps.containers.{up,ps} + auto-fallback for queued-start
Coolify's POST /services/{uuid}/start writes the rendered compose
files but its Laravel queue worker routinely fails to actually
invoke `docker compose up -d`. Until now agents had to SSH to
recover. For an MVP that promises "tell vibn what app you want,
get a URL", that's unacceptable.
- lib/coolify-compose.ts: composeUp/composeDown/composePs over SSH
via a one-shot docker:cli container that bind-mounts the rendered
compose dir (works around vibn-logs being in docker group but not
having read access to /data/coolify/services).
- apps.create (template + composeRaw pathways) now uses
ensureServiceUp which probes whether Coolify's queue actually
spawned containers and falls back to direct docker compose up -d
if not. Result includes startMethod for visibility.
- apps.containers.up / apps.containers.ps exposed as MCP tools for
recovery scenarios and post-env-change recreations.
- Tenant safety: resolveAppOrService validates uuid against the
caller's project before touching anything on the host.
Made-with: Cursor
This commit is contained in:
@@ -29,6 +29,7 @@ import { VIBN_GCS_LOCATION } from '@/lib/gcp/storage';
|
||||
import { getApplicationRuntimeLogs } from '@/lib/coolify-logs';
|
||||
import { execInCoolifyApp } from '@/lib/coolify-exec';
|
||||
import { isCoolifySshConfigured, runOnCoolifyHost } from '@/lib/coolify-ssh';
|
||||
import { composeUp, composePs, type ResourceKind } from '@/lib/coolify-compose';
|
||||
import { listContainersForApp } from '@/lib/coolify-containers';
|
||||
import {
|
||||
deployApplication,
|
||||
@@ -85,7 +86,7 @@ const GITEA_API_URL = process.env.GITEA_API_URL ?? 'https://git.vibnai.com';
|
||||
export async function GET() {
|
||||
return NextResponse.json({
|
||||
name: 'vibn-mcp',
|
||||
version: '2.4.0',
|
||||
version: '2.4.1',
|
||||
authentication: {
|
||||
scheme: 'Bearer',
|
||||
tokenPrefix: 'vibn_sk_',
|
||||
@@ -115,6 +116,8 @@ export async function GET() {
|
||||
'apps.exec',
|
||||
'apps.volumes.list',
|
||||
'apps.volumes.wipe',
|
||||
'apps.containers.up',
|
||||
'apps.containers.ps',
|
||||
'apps.templates.list',
|
||||
'apps.templates.search',
|
||||
'apps.envs.list',
|
||||
@@ -217,6 +220,10 @@ export async function POST(request: Request) {
|
||||
return await toolAppsVolumesList(principal, params);
|
||||
case 'apps.volumes.wipe':
|
||||
return await toolAppsVolumesWipe(principal, params);
|
||||
case 'apps.containers.up':
|
||||
return await toolAppsContainersUp(principal, params);
|
||||
case 'apps.containers.ps':
|
||||
return await toolAppsContainersPs(principal, params);
|
||||
case 'apps.templates.list':
|
||||
return await toolAppsTemplatesList(params);
|
||||
case 'apps.templates.search':
|
||||
@@ -867,13 +874,10 @@ async function toolAppsCreate(principal: Principal, params: Record<string, any>)
|
||||
}
|
||||
|
||||
let started = false;
|
||||
let startMethod: 'coolify-queue' | 'compose-up' | 'failed' = 'failed';
|
||||
let startDiag = '';
|
||||
if (params.instantDeploy !== false) {
|
||||
try {
|
||||
await startService(created.uuid);
|
||||
started = true;
|
||||
} catch (e) {
|
||||
console.warn('[mcp apps.create/template] service start failed', e);
|
||||
}
|
||||
({ started, startMethod, diag: startDiag } = await ensureServiceUp(created.uuid));
|
||||
}
|
||||
|
||||
return NextResponse.json({
|
||||
@@ -886,9 +890,11 @@ async function toolAppsCreate(principal: Principal, params: Record<string, any>)
|
||||
template: templateSlug,
|
||||
urlsApplied,
|
||||
started,
|
||||
startMethod,
|
||||
...(startDiag ? { startDiag } : {}),
|
||||
note: started
|
||||
? 'Service start was queued. First boot may take 1-5 min while Coolify pulls images and runs migrations. Use apps.logs to monitor.'
|
||||
: 'Service created but not yet started. Call apps.deploy to start it.',
|
||||
? 'Containers are up. First boot may take 1-5 min while images finish pulling and migrations run. Use apps.logs to monitor.'
|
||||
: 'Service created but containers did not start. Call apps.containers.up to retry, or apps.logs to diagnose.',
|
||||
},
|
||||
});
|
||||
}
|
||||
@@ -945,15 +951,11 @@ async function toolAppsCreate(principal: Principal, params: Record<string, any>)
|
||||
}
|
||||
}
|
||||
|
||||
// Optionally start the service
|
||||
let started = false;
|
||||
let startMethod: 'coolify-queue' | 'compose-up' | 'failed' = 'failed';
|
||||
let startDiag = '';
|
||||
if (params.instantDeploy !== false) {
|
||||
try {
|
||||
await startService(created.uuid);
|
||||
started = true;
|
||||
} catch (e) {
|
||||
console.warn('[mcp apps.create/composeRaw] service start failed', e);
|
||||
}
|
||||
({ started, startMethod, diag: startDiag } = await ensureServiceUp(created.uuid));
|
||||
}
|
||||
|
||||
return NextResponse.json({
|
||||
@@ -964,7 +966,9 @@ async function toolAppsCreate(principal: Principal, params: Record<string, any>)
|
||||
url: `https://${fqdn}`,
|
||||
resourceType: 'service',
|
||||
started,
|
||||
note: 'Domain routing for compose services must be configured in Coolify after initial startup — set SERVER_URL env to the desired URL.',
|
||||
startMethod,
|
||||
...(startDiag ? { startDiag } : {}),
|
||||
note: 'Domain routing for compose services must be configured after initial startup — set SERVER_URL env to the desired URL, then call apps.containers.up to apply.',
|
||||
},
|
||||
});
|
||||
}
|
||||
@@ -1036,6 +1040,113 @@ async function toolAppsCreate(principal: Principal, params: Record<string, any>)
|
||||
});
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────────
|
||||
// apps.containers.* — direct lifecycle for compose stacks
|
||||
// ──────────────────────────────────────────────────
|
||||
//
|
||||
// These bypass Coolify's queued-start worker (which is unreliable for
|
||||
// compose Services) and run `docker compose up -d` / `ps` against the
|
||||
// rendered compose dir on the Coolify host. Used as the recovery
|
||||
// path when Coolify's start API returns "queued" but no containers
|
||||
// materialise.
|
||||
//
|
||||
// Tenant safety: the uuid is resolved via getApplicationInProject /
|
||||
// getServiceInProject, so a workspace can't drive containers it
|
||||
// doesn't own.
|
||||
|
||||
/** Resolve a uuid to either an Application or a compose Service in the
|
||||
* caller's project. Returns the canonical resource kind for
|
||||
* coolify-compose helpers. NextResponse on policy error / not found. */
|
||||
async function resolveAppOrService(
|
||||
principal: Principal,
|
||||
uuid: string,
|
||||
): Promise<{ uuid: string; kind: ResourceKind } | NextResponse> {
|
||||
const projectUuid = requireCoolifyProject(principal);
|
||||
if (projectUuid instanceof NextResponse) return projectUuid;
|
||||
try {
|
||||
await getApplicationInProject(uuid, projectUuid);
|
||||
return { uuid, kind: 'application' };
|
||||
} catch (e) {
|
||||
if (!(e instanceof Error && /404|not found/i.test(e.message))) {
|
||||
// Tenant errors and other unexpected ones — surface them
|
||||
if (e instanceof TenantError) return NextResponse.json({ error: e.message }, { status: 403 });
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
try {
|
||||
await getServiceInProject(uuid, projectUuid);
|
||||
return { uuid, kind: 'service' };
|
||||
} catch (e) {
|
||||
if (e instanceof TenantError) {
|
||||
return NextResponse.json({ error: e.message }, { status: 403 });
|
||||
}
|
||||
return NextResponse.json({ error: `App or service ${uuid} not found in this workspace` }, { status: 404 });
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* apps.containers.up — `docker compose up -d` against the rendered
|
||||
* compose dir on the Coolify host.
|
||||
*
|
||||
* Use when Coolify's queued-start left the stack in "Created" or
|
||||
* "no containers" state, or after editing env vars / domains to
|
||||
* apply the changes (compose env file is regenerated; containers
|
||||
* need to be recreated to pick it up).
|
||||
*
|
||||
* Idempotent — already-running containers are no-op'd. Returns
|
||||
* `{ ok, code, stdout, stderr, durationMs }` so agents can show the
|
||||
* user what happened.
|
||||
*/
|
||||
async function toolAppsContainersUp(principal: Principal, params: Record<string, any>) {
|
||||
const uuid = String(params.uuid ?? params.appUuid ?? '').trim();
|
||||
if (!uuid) return NextResponse.json({ error: 'Param "uuid" is required' }, { status: 400 });
|
||||
if (!isCoolifySshConfigured()) {
|
||||
return NextResponse.json({ error: 'apps.containers.up requires SSH to the Coolify host' }, { status: 501 });
|
||||
}
|
||||
const resolved = await resolveAppOrService(principal, uuid);
|
||||
if (resolved instanceof NextResponse) return resolved;
|
||||
|
||||
const t0 = Date.now();
|
||||
const r = await composeUp(resolved.kind, resolved.uuid, { timeoutMs: 600_000 });
|
||||
return NextResponse.json({
|
||||
result: {
|
||||
ok: r.code === 0,
|
||||
code: r.code,
|
||||
stdout: r.stdout.slice(-4000),
|
||||
stderr: r.stderr.slice(-4000),
|
||||
truncated: r.truncated,
|
||||
durationMs: Date.now() - t0,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* apps.containers.ps — `docker compose ps -a` for diagnostics.
|
||||
*
|
||||
* Returns a one-line-per-container summary including names, image,
|
||||
* state, and exit codes. Use to check whether containers are stuck
|
||||
* in `Created` (Coolify queued-start failure) vs `Exited` (app crash)
|
||||
* vs `Restarting` (boot loop).
|
||||
*/
|
||||
async function toolAppsContainersPs(principal: Principal, params: Record<string, any>) {
|
||||
const uuid = String(params.uuid ?? params.appUuid ?? '').trim();
|
||||
if (!uuid) return NextResponse.json({ error: 'Param "uuid" is required' }, { status: 400 });
|
||||
if (!isCoolifySshConfigured()) {
|
||||
return NextResponse.json({ error: 'apps.containers.ps requires SSH to the Coolify host' }, { status: 501 });
|
||||
}
|
||||
const resolved = await resolveAppOrService(principal, uuid);
|
||||
if (resolved instanceof NextResponse) return resolved;
|
||||
|
||||
const r = await composePs(resolved.kind, resolved.uuid);
|
||||
return NextResponse.json({
|
||||
result: {
|
||||
ok: r.code === 0,
|
||||
stdout: r.stdout.slice(-4000),
|
||||
stderr: r.stderr.slice(-2000),
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────────
|
||||
// apps.templates.* — Coolify one-click catalog browse
|
||||
// ──────────────────────────────────────────────────
|
||||
@@ -1106,6 +1217,72 @@ async function toolAppsTemplatesSearch(params: Record<string, any>) {
|
||||
return NextResponse.json({ result: { items } });
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure a Coolify Service is actually running (containers exist and
|
||||
* are healthy/starting), with a fallback path for Coolify's flaky
|
||||
* queued-start worker.
|
||||
*
|
||||
* Strategy:
|
||||
* 1. Call POST /services/{uuid}/start so Coolify's records show
|
||||
* "starting" and any internal hooks fire.
|
||||
* 2. Wait briefly, then probe the host for any container belonging
|
||||
* to this service via `docker ps --filter name={uuid}`.
|
||||
* 3. If no containers materialised, run `docker compose up -d`
|
||||
* directly via SSH against the rendered compose dir. This is
|
||||
* the same command Coolify's worker would run; we just bypass
|
||||
* the unreliable queue.
|
||||
*
|
||||
* Returns:
|
||||
* started true if at least one container is running for this service
|
||||
* startMethod which path got us there
|
||||
* diag human-readable note for failures (truncated stderr)
|
||||
*/
|
||||
async function ensureServiceUp(uuid: string): Promise<{
|
||||
started: boolean;
|
||||
startMethod: 'coolify-queue' | 'compose-up' | 'failed';
|
||||
diag: string;
|
||||
}> {
|
||||
// 1. Ask Coolify nicely
|
||||
try {
|
||||
await startService(uuid);
|
||||
} catch (e) {
|
||||
console.warn('[ensureServiceUp] startService failed (will fall back)', e);
|
||||
}
|
||||
|
||||
// 2. Probe — has the queue actually started anything?
|
||||
if (!isCoolifySshConfigured()) {
|
||||
return { started: true, startMethod: 'coolify-queue', diag: '' };
|
||||
}
|
||||
// Allow up to ~12s for the worker to wake up; checking every 3s.
|
||||
for (let i = 0; i < 4; i++) {
|
||||
await new Promise(r => setTimeout(r, 3_000));
|
||||
try {
|
||||
const probe = await runOnCoolifyHost(
|
||||
`docker ps --filter name=${uuid} --format '{{.Names}}'`,
|
||||
{ timeoutMs: 8_000 },
|
||||
);
|
||||
if (probe.stdout.trim().length > 0) {
|
||||
return { started: true, startMethod: 'coolify-queue', diag: '' };
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn('[ensureServiceUp] probe failed', e);
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Fallback — run docker compose up -d ourselves
|
||||
try {
|
||||
const r = await composeUp('service', uuid, { timeoutMs: 600_000 });
|
||||
if (r.code === 0) {
|
||||
return { started: true, startMethod: 'compose-up', diag: '' };
|
||||
}
|
||||
// Non-zero exit but compose ran — capture the tail for diagnosis
|
||||
const tail = (r.stderr || r.stdout).trim().slice(-400);
|
||||
return { started: false, startMethod: 'failed', diag: tail };
|
||||
} catch (e) {
|
||||
return { started: false, startMethod: 'failed', diag: e instanceof Error ? e.message : String(e) };
|
||||
}
|
||||
}
|
||||
|
||||
/** Resolve fqdn from params.domain or auto-generate. Returns NextResponse on policy error. */
|
||||
function resolveFqdn(domainParam: unknown, slug: string, appName: string): string | NextResponse {
|
||||
const fqdn = String(domainParam ?? '').trim()
|
||||
|
||||
Reference in New Issue
Block a user