Files
vibn-frontend/lib/coolify-compose.ts
Mark Henderson 8b5c876f91 fix(mcp v2.4.3): attach stack containers to coolify proxy network
The Twenty (and any service-template) stack was reachable on its private
project network but invisible to coolify-proxy/Traefik because no
container was joined to the `coolify` network. Public URLs like
crm.mark.vibnai.com returned 503 "no available server" even though the
underlying app was healthy.

Coolify's UI deploy attaches the proxy network as a post-step after the
full stack is up. When a sidecar (e.g. Twenty's worker, which waits ~3
min on twenty's healthcheck) fails its depends_on gate, that post-step
can be skipped and the stack is left isolated.

composeUp now calls attachToCoolifyProxyNetwork() after compose
finishes (best-effort, idempotent), and ensureServiceUp does the same
on the Coolify-queue happy path. Single apps.create call should now
result in a publicly reachable app.

Made-with: Cursor
2026-04-27 12:08:27 -07:00

166 lines
6.3 KiB
TypeScript

/**
* Bring a Coolify Service or compose Application up via raw
* `docker compose up -d`.
*
* Why this exists
* ---------------
* Coolify's `POST /services/{uuid}/start` and `POST /deploy` endpoints
* write the rendered docker-compose.yml + .env to
* `/data/coolify/services/{uuid}/` (or `applications/{uuid}/` for
* compose apps), then enqueue a Laravel job to run
* `docker compose up -d`. In practice that worker queue is unreliable:
* it routinely returns "Service starting request queued" and then
* never actually invokes docker compose. The user's stack just sits
* there with rendered files and no containers.
*
* For a hands-off SaaS we can't ship that experience. This helper
* does the work directly via SSH, so a single MCP `apps.create` call
* really does leave a running app.
*
* Permissions model
* -----------------
* The `vibn-logs` SSH user (created by deploy/setup-coolify-ssh.sh)
* is in the `docker` group but has no shell sudo. It also can't read
* `/data/coolify/services/` directly because Coolify chmods that to
* 700 root. We work around both constraints by running the docker
* CLI inside a one-shot container that bind-mounts the path. The
* docker daemon runs as root so it can read the directory; the
* `vibn-logs` user only needs `docker` socket access.
*/
import { runOnCoolifyHost, type CoolifySshResult } from './coolify-ssh';
/** Slug for the Coolify-managed compose dir. */
export type ResourceKind = 'service' | 'application';
function composeDir(kind: ResourceKind, uuid: string): string {
// Coolify v4 path layout — these are stable across the v4 line.
return kind === 'service'
? `/data/coolify/services/${uuid}`
: `/data/coolify/applications/${uuid}`;
}
/** Shell-quote a single argument as a POSIX single-quoted string. */
function sq(s: string): string {
return `'${String(s).replace(/'/g, `'\\''`)}'`;
}
/**
* Run a `docker compose` subcommand inside the rendered compose
* directory using a one-shot `docker:cli` container. Falls back to
* pulling the image on the first call.
*
* The `docker:cli` image (~50MB) is the official Docker CLI without
* the daemon. By bind-mounting the host docker socket it talks to
* the host's daemon, so containers it creates are first-class
* children of the same Docker engine — exactly what we want.
*/
async function composeRun(
kind: ResourceKind,
uuid: string,
args: string[],
opts: { timeoutMs?: number } = {},
): Promise<CoolifySshResult> {
const dir = composeDir(kind, uuid);
// Use --workdir + bind-mount so docker compose finds compose.yml + .env
// automatically. The `--rm` cleans the helper container after each call.
const cmd = [
'docker', 'run', '--rm',
'-v', sq(`${dir}:/work`),
'-w', '/work',
'-v', '/var/run/docker.sock:/var/run/docker.sock',
'--network', 'host',
'docker:cli',
'compose', ...args.map(sq),
].join(' ');
return runOnCoolifyHost(cmd, { timeoutMs: opts.timeoutMs ?? 600_000, maxBytes: 2_000_000 });
}
/**
* `docker compose up -d` for a Coolify service or compose app.
*
* Idempotent — Compose already-running containers are no-op'd.
* Returns the raw SSH result so callers can surface diagnostics on
* failure (most common: image-pull errors, port conflicts).
*
* After compose succeeds we also attach every stack container to the
* `coolify` proxy network. Coolify's UI-driven deploy does this as a
* post-step so Traefik can route public traffic to the container, but
* the rendered compose file only declares the service-private network.
* If we skip this step the stack runs fine on its own bridge but
* `crm.mark.vibnai.com` returns "no available server" from Traefik.
*/
export async function composeUp(
kind: ResourceKind,
uuid: string,
opts: { timeoutMs?: number } = {},
): Promise<CoolifySshResult> {
const r = await composeRun(kind, uuid, ['up', '-d', '--remove-orphans'], opts);
// Best-effort: attach to the proxy network even if compose returned
// non-zero (sidecar `depends_on` timeouts still leave primary
// containers running, and we want them reachable).
await attachToCoolifyProxyNetwork(uuid).catch(() => { /* swallow */ });
return r;
}
/**
* Attach every container belonging to this Coolify resource to the
* `coolify` proxy network. Idempotent — `network connect` errors when
* the container is already attached, which we ignore.
*/
export async function attachToCoolifyProxyNetwork(
uuid: string,
): Promise<void> {
// List containers on the resource's project network. Coolify names
// the bridge network after the resource UUID, so all stack members
// are reachable through it.
const ls = await runOnCoolifyHost(
`docker ps --filter network=${uuid} --format '{{.Names}}'`,
{ timeoutMs: 10_000 },
);
const names = ls.stdout
.split('\n')
.map(s => s.trim())
.filter(Boolean);
if (names.length === 0) return;
// Attach each one. `|| true` so already-connected returns 0.
const attaches = names.map(n =>
`docker network connect coolify ${sq(n)} 2>/dev/null || true`,
).join(' && ');
await runOnCoolifyHost(attaches, { timeoutMs: 30_000 });
}
/** `docker compose down` — stops + removes containers; volumes preserved. */
export async function composeDown(
kind: ResourceKind,
uuid: string,
opts: { timeoutMs?: number } = {},
): Promise<CoolifySshResult> {
return composeRun(kind, uuid, ['down'], opts);
}
/** `docker compose ps -a` — useful for diagnosing why up didn't yield healthy containers. */
export async function composePs(
kind: ResourceKind,
uuid: string,
): Promise<CoolifySshResult> {
return composeRun(kind, uuid, ['ps', '-a', '--format', 'table'], { timeoutMs: 30_000 });
}
/**
* Verify the rendered compose dir exists before trying to run docker
* compose against it. Returns a friendly null-on-missing instead of
* an opaque ENOENT.
*/
export async function composeDirExists(
kind: ResourceKind,
uuid: string,
): Promise<boolean> {
// We can't `ls` the dir directly (perm denied), but a docker bind-mount
// probe will fail-closed if the path is missing.
const dir = composeDir(kind, uuid);
const cmd = `docker run --rm -v ${sq(`${dir}:/w`)} alpine sh -c 'test -f /w/docker-compose.yml && echo OK || echo MISSING'`;
const r = await runOnCoolifyHost(cmd, { timeoutMs: 30_000 });
return r.stdout.trim().endsWith('OK');
}