Files
vibn-frontend/lib/coolify-compose.ts
Mark Henderson d6b8ba4d67 fix(mcp v2.4.4): only attach traefik-enabled containers to coolify proxy net
v2.4.3 attached every stack container to the `coolify` network so
Traefik could reach the public container. But that network also hosts
coolify-db (alias `postgres`) and coolify-redis (alias `redis`).
Docker's embedded DNS resolves unqualified hostnames to the first
container with that name on the network, so once Twenty's
`postgres-<uuid>` joined the coolify network, Twenty's connection
string `postgres://postgres:5432/...` started resolving to coolify-db
and auth-failing in a tight restart loop.

Coolify's own pipeline only attaches the proxied container — filter
by the `traefik.enable=true` label so internal stack members (db,
redis, worker) stay isolated on the project network.

Made-with: Cursor
2026-04-27 12:36:44 -07:00

180 lines
7.0 KiB
TypeScript

/**
* Bring a Coolify Service or compose Application up via raw
* `docker compose up -d`.
*
* Why this exists
* ---------------
* Coolify's `POST /services/{uuid}/start` and `POST /deploy` endpoints
* write the rendered docker-compose.yml + .env to
* `/data/coolify/services/{uuid}/` (or `applications/{uuid}/` for
* compose apps), then enqueue a Laravel job to run
* `docker compose up -d`. In practice that worker queue is unreliable:
* it routinely returns "Service starting request queued" and then
* never actually invokes docker compose. The user's stack just sits
* there with rendered files and no containers.
*
* For a hands-off SaaS we can't ship that experience. This helper
* does the work directly via SSH, so a single MCP `apps.create` call
* really does leave a running app.
*
* Permissions model
* -----------------
* The `vibn-logs` SSH user (created by deploy/setup-coolify-ssh.sh)
* is in the `docker` group but has no shell sudo. It also can't read
* `/data/coolify/services/` directly because Coolify chmods that to
* 700 root. We work around both constraints by running the docker
* CLI inside a one-shot container that bind-mounts the path. The
* docker daemon runs as root so it can read the directory; the
* `vibn-logs` user only needs `docker` socket access.
*/
import { runOnCoolifyHost, type CoolifySshResult } from './coolify-ssh';
/** Slug for the Coolify-managed compose dir. */
export type ResourceKind = 'service' | 'application';
function composeDir(kind: ResourceKind, uuid: string): string {
// Coolify v4 path layout — these are stable across the v4 line.
return kind === 'service'
? `/data/coolify/services/${uuid}`
: `/data/coolify/applications/${uuid}`;
}
/** Shell-quote a single argument as a POSIX single-quoted string. */
function sq(s: string): string {
return `'${String(s).replace(/'/g, `'\\''`)}'`;
}
/**
* Run a `docker compose` subcommand inside the rendered compose
* directory using a one-shot `docker:cli` container. Falls back to
* pulling the image on the first call.
*
* The `docker:cli` image (~50MB) is the official Docker CLI without
* the daemon. By bind-mounting the host docker socket it talks to
* the host's daemon, so containers it creates are first-class
* children of the same Docker engine — exactly what we want.
*/
async function composeRun(
kind: ResourceKind,
uuid: string,
args: string[],
opts: { timeoutMs?: number } = {},
): Promise<CoolifySshResult> {
const dir = composeDir(kind, uuid);
// Use --workdir + bind-mount so docker compose finds compose.yml + .env
// automatically. The `--rm` cleans the helper container after each call.
const cmd = [
'docker', 'run', '--rm',
'-v', sq(`${dir}:/work`),
'-w', '/work',
'-v', '/var/run/docker.sock:/var/run/docker.sock',
'--network', 'host',
'docker:cli',
'compose', ...args.map(sq),
].join(' ');
return runOnCoolifyHost(cmd, { timeoutMs: opts.timeoutMs ?? 600_000, maxBytes: 2_000_000 });
}
/**
* `docker compose up -d` for a Coolify service or compose app.
*
* Idempotent — Compose already-running containers are no-op'd.
* Returns the raw SSH result so callers can surface diagnostics on
* failure (most common: image-pull errors, port conflicts).
*
* After compose succeeds we also attach every stack container to the
* `coolify` proxy network. Coolify's UI-driven deploy does this as a
* post-step so Traefik can route public traffic to the container, but
* the rendered compose file only declares the service-private network.
* If we skip this step the stack runs fine on its own bridge but
* `crm.mark.vibnai.com` returns "no available server" from Traefik.
*/
export async function composeUp(
kind: ResourceKind,
uuid: string,
opts: { timeoutMs?: number } = {},
): Promise<CoolifySshResult> {
const r = await composeRun(kind, uuid, ['up', '-d', '--remove-orphans'], opts);
// Best-effort: attach to the proxy network even if compose returned
// non-zero (sidecar `depends_on` timeouts still leave primary
// containers running, and we want them reachable).
await attachToCoolifyProxyNetwork(uuid).catch(() => { /* swallow */ });
return r;
}
/**
* Attach the public-facing containers of a Coolify resource to the
* `coolify` proxy network so Traefik can reach them.
*
* IMPORTANT: only attach containers that have Traefik labels. The
* coolify network is shared across the whole platform (it hosts
* coolify-db, coolify-redis, etc.) and Docker's embedded DNS resolves
* unqualified hostnames like `postgres` and `redis` to the FIRST
* container with that name on the network. If we attach Twenty's
* `postgres-<uuid>` container to coolify, Twenty's
* `postgres://postgres:5432/...` connection string starts resolving
* to `coolify-db` instead, which fails auth (different password).
*
* Coolify's own deploy pipeline does the same selective attach — only
* the proxied container goes on the proxy network. Idempotent —
* already-attached containers are no-ops.
*/
export async function attachToCoolifyProxyNetwork(
uuid: string,
): Promise<void> {
// List running containers on the resource's project network with
// their `traefik.enable` label. Only those with `traefik.enable=true`
// need to be reachable by the proxy.
const ls = await runOnCoolifyHost(
`docker ps --filter network=${uuid} --format '{{.Names}}|{{.Label "traefik.enable"}}'`,
{ timeoutMs: 10_000 },
);
const names = ls.stdout
.split('\n')
.map(s => s.trim())
.filter(Boolean)
.filter(line => line.endsWith('|true'))
.map(line => line.split('|')[0]);
if (names.length === 0) return;
// Attach each one. `|| true` so already-connected returns 0.
const attaches = names.map(n =>
`docker network connect coolify ${sq(n)} 2>/dev/null || true`,
).join(' && ');
await runOnCoolifyHost(attaches, { timeoutMs: 30_000 });
}
/** `docker compose down` — stops + removes containers; volumes preserved. */
export async function composeDown(
kind: ResourceKind,
uuid: string,
opts: { timeoutMs?: number } = {},
): Promise<CoolifySshResult> {
return composeRun(kind, uuid, ['down'], opts);
}
/** `docker compose ps -a` — useful for diagnosing why up didn't yield healthy containers. */
export async function composePs(
kind: ResourceKind,
uuid: string,
): Promise<CoolifySshResult> {
return composeRun(kind, uuid, ['ps', '-a', '--format', 'table'], { timeoutMs: 30_000 });
}
/**
* Verify the rendered compose dir exists before trying to run docker
* compose against it. Returns a friendly null-on-missing instead of
* an opaque ENOENT.
*/
export async function composeDirExists(
kind: ResourceKind,
uuid: string,
): Promise<boolean> {
// We can't `ls` the dir directly (perm denied), but a docker bind-mount
// probe will fail-closed if the path is missing.
const dir = composeDir(kind, uuid);
const cmd = `docker run --rm -v ${sq(`${dir}:/w`)} alpine sh -c 'test -f /w/docker-compose.yml && echo OK || echo MISSING'`;
const r = await runOnCoolifyHost(cmd, { timeoutMs: 30_000 });
return r.stdout.trim().endsWith('OK');
}