feat(mcp v2.4.1): apps.containers.{up,ps} + auto-fallback for queued-start

Coolify's POST /services/{uuid}/start writes the rendered compose
files but its Laravel queue worker routinely fails to actually
invoke `docker compose up -d`. Until now agents had to SSH to
recover. For an MVP that promises "tell vibn what app you want,
get a URL", that's unacceptable.

- lib/coolify-compose.ts: composeUp/composeDown/composePs over SSH
  via a one-shot docker:cli container that bind-mounts the rendered
  compose dir (works around vibn-logs being in docker group but not
  having read access to /data/coolify/services).
- apps.create (template + composeRaw pathways) now uses
  ensureServiceUp which probes whether Coolify's queue actually
  spawned containers and falls back to direct docker compose up -d
  if not. Result includes startMethod for visibility.
- apps.containers.up / apps.containers.ps exposed as MCP tools for
  recovery scenarios and post-env-change recreations.
- Tenant safety: resolveAppOrService validates uuid against the
  caller's project before touching anything on the host.

Made-with: Cursor
This commit is contained in:
2026-04-23 18:41:42 -07:00
parent e453e780cc
commit 62cb77b5a7
2 changed files with 320 additions and 17 deletions

View File

@@ -29,6 +29,7 @@ import { VIBN_GCS_LOCATION } from '@/lib/gcp/storage';
import { getApplicationRuntimeLogs } from '@/lib/coolify-logs';
import { execInCoolifyApp } from '@/lib/coolify-exec';
import { isCoolifySshConfigured, runOnCoolifyHost } from '@/lib/coolify-ssh';
import { composeUp, composePs, type ResourceKind } from '@/lib/coolify-compose';
import { listContainersForApp } from '@/lib/coolify-containers';
import {
deployApplication,
@@ -85,7 +86,7 @@ const GITEA_API_URL = process.env.GITEA_API_URL ?? 'https://git.vibnai.com';
export async function GET() {
return NextResponse.json({
name: 'vibn-mcp',
version: '2.4.0',
version: '2.4.1',
authentication: {
scheme: 'Bearer',
tokenPrefix: 'vibn_sk_',
@@ -115,6 +116,8 @@ export async function GET() {
'apps.exec',
'apps.volumes.list',
'apps.volumes.wipe',
'apps.containers.up',
'apps.containers.ps',
'apps.templates.list',
'apps.templates.search',
'apps.envs.list',
@@ -217,6 +220,10 @@ export async function POST(request: Request) {
return await toolAppsVolumesList(principal, params);
case 'apps.volumes.wipe':
return await toolAppsVolumesWipe(principal, params);
case 'apps.containers.up':
return await toolAppsContainersUp(principal, params);
case 'apps.containers.ps':
return await toolAppsContainersPs(principal, params);
case 'apps.templates.list':
return await toolAppsTemplatesList(params);
case 'apps.templates.search':
@@ -867,13 +874,10 @@ async function toolAppsCreate(principal: Principal, params: Record<string, any>)
}
let started = false;
let startMethod: 'coolify-queue' | 'compose-up' | 'failed' = 'failed';
let startDiag = '';
if (params.instantDeploy !== false) {
try {
await startService(created.uuid);
started = true;
} catch (e) {
console.warn('[mcp apps.create/template] service start failed', e);
}
({ started, startMethod, diag: startDiag } = await ensureServiceUp(created.uuid));
}
return NextResponse.json({
@@ -886,9 +890,11 @@ async function toolAppsCreate(principal: Principal, params: Record<string, any>)
template: templateSlug,
urlsApplied,
started,
startMethod,
...(startDiag ? { startDiag } : {}),
note: started
? 'Service start was queued. First boot may take 1-5 min while Coolify pulls images and runs migrations. Use apps.logs to monitor.'
: 'Service created but not yet started. Call apps.deploy to start it.',
? 'Containers are up. First boot may take 1-5 min while images finish pulling and migrations run. Use apps.logs to monitor.'
: 'Service created but containers did not start. Call apps.containers.up to retry, or apps.logs to diagnose.',
},
});
}
@@ -945,15 +951,11 @@ async function toolAppsCreate(principal: Principal, params: Record<string, any>)
}
}
// Optionally start the service
let started = false;
let startMethod: 'coolify-queue' | 'compose-up' | 'failed' = 'failed';
let startDiag = '';
if (params.instantDeploy !== false) {
try {
await startService(created.uuid);
started = true;
} catch (e) {
console.warn('[mcp apps.create/composeRaw] service start failed', e);
}
({ started, startMethod, diag: startDiag } = await ensureServiceUp(created.uuid));
}
return NextResponse.json({
@@ -964,7 +966,9 @@ async function toolAppsCreate(principal: Principal, params: Record<string, any>)
url: `https://${fqdn}`,
resourceType: 'service',
started,
note: 'Domain routing for compose services must be configured in Coolify after initial startup — set SERVER_URL env to the desired URL.',
startMethod,
...(startDiag ? { startDiag } : {}),
note: 'Domain routing for compose services must be configured after initial startup — set SERVER_URL env to the desired URL, then call apps.containers.up to apply.',
},
});
}
@@ -1036,6 +1040,113 @@ async function toolAppsCreate(principal: Principal, params: Record<string, any>)
});
}
// ──────────────────────────────────────────────────
// apps.containers.* — direct lifecycle for compose stacks
// ──────────────────────────────────────────────────
//
// These bypass Coolify's queued-start worker (which is unreliable for
// compose Services) and run `docker compose up -d` / `ps` against the
// rendered compose dir on the Coolify host. Used as the recovery
// path when Coolify's start API returns "queued" but no containers
// materialise.
//
// Tenant safety: the uuid is resolved via getApplicationInProject /
// getServiceInProject, so a workspace can't drive containers it
// doesn't own.
/** Resolve a uuid to either an Application or a compose Service in the
* caller's project. Returns the canonical resource kind for
* coolify-compose helpers. NextResponse on policy error / not found. */
async function resolveAppOrService(
principal: Principal,
uuid: string,
): Promise<{ uuid: string; kind: ResourceKind } | NextResponse> {
const projectUuid = requireCoolifyProject(principal);
if (projectUuid instanceof NextResponse) return projectUuid;
try {
await getApplicationInProject(uuid, projectUuid);
return { uuid, kind: 'application' };
} catch (e) {
if (!(e instanceof Error && /404|not found/i.test(e.message))) {
// Tenant errors and other unexpected ones — surface them
if (e instanceof TenantError) return NextResponse.json({ error: e.message }, { status: 403 });
throw e;
}
}
try {
await getServiceInProject(uuid, projectUuid);
return { uuid, kind: 'service' };
} catch (e) {
if (e instanceof TenantError) {
return NextResponse.json({ error: e.message }, { status: 403 });
}
return NextResponse.json({ error: `App or service ${uuid} not found in this workspace` }, { status: 404 });
}
}
/**
* apps.containers.up — `docker compose up -d` against the rendered
* compose dir on the Coolify host.
*
* Use when Coolify's queued-start left the stack in "Created" or
* "no containers" state, or after editing env vars / domains to
* apply the changes (compose env file is regenerated; containers
* need to be recreated to pick it up).
*
* Idempotent — already-running containers are no-op'd. Returns
* `{ ok, code, stdout, stderr, durationMs }` so agents can show the
* user what happened.
*/
async function toolAppsContainersUp(principal: Principal, params: Record<string, any>) {
const uuid = String(params.uuid ?? params.appUuid ?? '').trim();
if (!uuid) return NextResponse.json({ error: 'Param "uuid" is required' }, { status: 400 });
if (!isCoolifySshConfigured()) {
return NextResponse.json({ error: 'apps.containers.up requires SSH to the Coolify host' }, { status: 501 });
}
const resolved = await resolveAppOrService(principal, uuid);
if (resolved instanceof NextResponse) return resolved;
const t0 = Date.now();
const r = await composeUp(resolved.kind, resolved.uuid, { timeoutMs: 600_000 });
return NextResponse.json({
result: {
ok: r.code === 0,
code: r.code,
stdout: r.stdout.slice(-4000),
stderr: r.stderr.slice(-4000),
truncated: r.truncated,
durationMs: Date.now() - t0,
},
});
}
/**
* apps.containers.ps — `docker compose ps -a` for diagnostics.
*
* Returns a one-line-per-container summary including names, image,
* state, and exit codes. Use to check whether containers are stuck
* in `Created` (Coolify queued-start failure) vs `Exited` (app crash)
* vs `Restarting` (boot loop).
*/
async function toolAppsContainersPs(principal: Principal, params: Record<string, any>) {
const uuid = String(params.uuid ?? params.appUuid ?? '').trim();
if (!uuid) return NextResponse.json({ error: 'Param "uuid" is required' }, { status: 400 });
if (!isCoolifySshConfigured()) {
return NextResponse.json({ error: 'apps.containers.ps requires SSH to the Coolify host' }, { status: 501 });
}
const resolved = await resolveAppOrService(principal, uuid);
if (resolved instanceof NextResponse) return resolved;
const r = await composePs(resolved.kind, resolved.uuid);
return NextResponse.json({
result: {
ok: r.code === 0,
stdout: r.stdout.slice(-4000),
stderr: r.stderr.slice(-2000),
},
});
}
// ──────────────────────────────────────────────────
// apps.templates.* — Coolify one-click catalog browse
// ──────────────────────────────────────────────────
@@ -1106,6 +1217,72 @@ async function toolAppsTemplatesSearch(params: Record<string, any>) {
return NextResponse.json({ result: { items } });
}
/**
* Ensure a Coolify Service is actually running (containers exist and
* are healthy/starting), with a fallback path for Coolify's flaky
* queued-start worker.
*
* Strategy:
* 1. Call POST /services/{uuid}/start so Coolify's records show
* "starting" and any internal hooks fire.
* 2. Wait briefly, then probe the host for any container belonging
* to this service via `docker ps --filter name={uuid}`.
* 3. If no containers materialised, run `docker compose up -d`
* directly via SSH against the rendered compose dir. This is
* the same command Coolify's worker would run; we just bypass
* the unreliable queue.
*
* Returns:
* started true if at least one container is running for this service
* startMethod which path got us there
* diag human-readable note for failures (truncated stderr)
*/
async function ensureServiceUp(uuid: string): Promise<{
started: boolean;
startMethod: 'coolify-queue' | 'compose-up' | 'failed';
diag: string;
}> {
// 1. Ask Coolify nicely
try {
await startService(uuid);
} catch (e) {
console.warn('[ensureServiceUp] startService failed (will fall back)', e);
}
// 2. Probe — has the queue actually started anything?
if (!isCoolifySshConfigured()) {
return { started: true, startMethod: 'coolify-queue', diag: '' };
}
// Allow up to ~12s for the worker to wake up; checking every 3s.
for (let i = 0; i < 4; i++) {
await new Promise(r => setTimeout(r, 3_000));
try {
const probe = await runOnCoolifyHost(
`docker ps --filter name=${uuid} --format '{{.Names}}'`,
{ timeoutMs: 8_000 },
);
if (probe.stdout.trim().length > 0) {
return { started: true, startMethod: 'coolify-queue', diag: '' };
}
} catch (e) {
console.warn('[ensureServiceUp] probe failed', e);
}
}
// 3. Fallback — run docker compose up -d ourselves
try {
const r = await composeUp('service', uuid, { timeoutMs: 600_000 });
if (r.code === 0) {
return { started: true, startMethod: 'compose-up', diag: '' };
}
// Non-zero exit but compose ran — capture the tail for diagnosis
const tail = (r.stderr || r.stdout).trim().slice(-400);
return { started: false, startMethod: 'failed', diag: tail };
} catch (e) {
return { started: false, startMethod: 'failed', diag: e instanceof Error ? e.message : String(e) };
}
}
/** Resolve fqdn from params.domain or auto-generate. Returns NextResponse on policy error. */
function resolveFqdn(domainParam: unknown, slug: string, appName: string): string | NextResponse {
const fqdn = String(domainParam ?? '').trim()