feat(path-b): persistent dev containers + shell.exec + fs.* tools

Kicks off Path B (AI_PATH_B_EXECUTION_PLAN.md): each Vibn project gets
its own vibn-dev Coolify service that the AI drives directly via shell
and filesystem tools. Sub-second iteration vs the 5-min Gitea redeploy
loop.

What's in this commit (week 1, slice 1):

- vibn-dev Dockerfile: small Ubuntu base (~500 MB target). git, ripgrep,
  python3, mise. Language toolchains lazy-install on first use.
- lib/dev-container.ts: ensureDevContainer / suspend / resume /
  execInDevContainer. Backed by a new fs_project_dev_containers table.
- lib/feature-flags.ts + /api/admin/path-b/{disable,enable}: kill switch.
  Bearer NEXTAUTH_SECRET flips path_b_disabled, propagates in ~10s.
- New MCP tools wired into /api/mcp: devcontainer.{ensure,status,suspend},
  shell.exec, fs.{read,write,edit,list,delete,glob,grep}. All enforce
  workspace isolation via fs_projects ownership check.
- vibn-tools.ts: 11 new Gemini tool defs (smoke test passes, 63 total).
- chat system prompt: shell-first guidance; gitea_file_* marked
  deprecated for iterative work (still available, removed week 3).

Safety nets baked in:
- pathBGuard() returns 503 from every Path B tool when the kill switch
  flips
- fs.* paths locked to /workspace
- ensureResourceInWorkspaceProjects via fs_project_dev_containers PK
- per-project resource limits (1 vCPU, 1 GiB RAM) on the compose spec

Still pending (queued):
- dev_server.* (preview URLs through Traefik)
- ship tool (push to Gitea + trigger prod deploy)
- auto-push autosave to vibn-autosave/main every 5 min
- idle-suspend cron after 30 min inactivity
- HMR-through-Traefik spike
- eval harness

Made-with: Cursor
This commit is contained in:
2026-04-28 12:53:16 -07:00
parent c8dec7c656
commit 4ba9407534
8 changed files with 1159 additions and 11 deletions

View File

@@ -637,6 +637,156 @@ Auto-domain {name}.{workspace}.vibnai.com is assigned automatically.`,
},
},
// ── Path B: dev container + shell + filesystem (PREFERRED for code authoring) ──
//
// These run inside the per-project vibn-dev container. Dramatically faster
// iteration than gitea_file_* (sub-second feedback vs ~5 min redeploy).
// Use these for ALL code writing/editing/scaffolding work. Keep gitea_*
// for orchestration (creating new repos, listing branches) only.
{
name: 'devcontainer_ensure',
description:
'Ensure a per-project AI dev container exists and is running. Idempotent — first call ~10s (provisions a Coolify service), subsequent calls are instant. ' +
'Call this at the start of any code-authoring session. Returns the dev container service UUID and state.',
parameters: {
type: 'OBJECT',
properties: {
projectId: { type: 'STRING', description: 'The Vibn project ID.' },
},
required: ['projectId'],
},
},
{
name: 'devcontainer_status',
description: 'Cheap status check for the project dev container. Returns { exists, state, serviceUuid }.',
parameters: {
type: 'OBJECT',
properties: {
projectId: { type: 'STRING', description: 'The Vibn project ID.' },
},
required: ['projectId'],
},
},
{
name: 'shell_exec',
description:
'Run a shell command inside the project dev container as the `vibn` user (uid 1000) under /workspace. ' +
'This is your universal escape hatch — install deps (`npm install`), run tests (`npm test`), scaffold code (`npx create-...`), ' +
'inspect output, run migrations. Use this instead of gitea_file_* for any iterative work. ' +
'Output is capped at 1 MB; default timeout 60s, max 600s.',
parameters: {
type: 'OBJECT',
properties: {
projectId: { type: 'STRING', description: 'The Vibn project ID.' },
command: { type: 'STRING', description: 'Shell command (passes through `sh -lc`, so pipes/redirects work).' },
cwd: { type: 'STRING', description: 'Working directory (default /workspace). Must stay under /workspace.' },
timeoutMs: { type: 'NUMBER', description: 'Timeout in ms. Default 60000, max 600000.' },
},
required: ['projectId', 'command'],
},
},
{
name: 'fs_read',
description: 'Read a file inside the project dev container. Returns the full text. Optional offset/limit for windowed reads on big files.',
parameters: {
type: 'OBJECT',
properties: {
projectId: { type: 'STRING', description: 'The Vibn project ID.' },
path: { type: 'STRING', description: 'File path. Relative paths are resolved under /workspace.' },
offset: { type: 'NUMBER', description: 'Optional 0-based starting line.' },
limit: { type: 'NUMBER', description: 'Optional max lines to return.' },
},
required: ['projectId', 'path'],
},
},
{
name: 'fs_write',
description:
'Create or overwrite a file inside the project dev container. Use to scaffold new files. ' +
'For surgical edits to existing files, prefer fs_edit (less brittle, smaller diffs).',
parameters: {
type: 'OBJECT',
properties: {
projectId: { type: 'STRING', description: 'The Vibn project ID.' },
path: { type: 'STRING', description: 'File path. Relative paths under /workspace. Parent dirs are mkdir -p\'d.' },
content: { type: 'STRING', description: 'Full file content.' },
},
required: ['projectId', 'path', 'content'],
},
},
{
name: 'fs_edit',
description:
'Aider-style search-and-replace edit. Finds `oldString` in the file and replaces it with `newString`. ' +
'Fails (HTTP 404) if oldString is missing, fails (HTTP 409) if oldString matches more than once unless replaceAll=true. ' +
'Always include 2-3 lines of surrounding context in oldString so the match is unique.',
parameters: {
type: 'OBJECT',
properties: {
projectId: { type: 'STRING', description: 'The Vibn project ID.' },
path: { type: 'STRING', description: 'File path under /workspace.' },
oldString: { type: 'STRING', description: 'Exact substring to find (with surrounding context for uniqueness).' },
newString: { type: 'STRING', description: 'Replacement text.' },
replaceAll: { type: 'BOOLEAN', description: 'If true, replace every occurrence. Default false (must be unique).' },
},
required: ['projectId', 'path', 'oldString', 'newString'],
},
},
{
name: 'fs_list',
description: 'List files in a directory inside the project dev container (`ls -lA`). Capped at 200 entries.',
parameters: {
type: 'OBJECT',
properties: {
projectId: { type: 'STRING', description: 'The Vibn project ID.' },
path: { type: 'STRING', description: 'Directory path. Default /workspace.' },
},
required: ['projectId'],
},
},
{
name: 'fs_delete',
description: 'Delete a file or directory inside the project dev container. Set recursive=true to remove a non-empty directory.',
parameters: {
type: 'OBJECT',
properties: {
projectId: { type: 'STRING', description: 'The Vibn project ID.' },
path: { type: 'STRING', description: 'Path to delete. Cannot be /workspace itself.' },
recursive: { type: 'BOOLEAN', description: 'rm -rf if true. Default false.' },
},
required: ['projectId', 'path'],
},
},
{
name: 'fs_glob',
description: 'Find files matching a glob pattern (ripgrep-backed, respects .gitignore). Returns up to 500 paths.',
parameters: {
type: 'OBJECT',
properties: {
projectId: { type: 'STRING', description: 'The Vibn project ID.' },
pattern: { type: 'STRING', description: 'Glob, e.g. "**/*.tsx" or "src/**/*.ts".' },
cwd: { type: 'STRING', description: 'Search root (default /workspace).' },
},
required: ['projectId', 'pattern'],
},
},
{
name: 'fs_grep',
description: 'ripgrep-backed code search inside the project dev container. Capped at 50 matches per file, 500 total.',
parameters: {
type: 'OBJECT',
properties: {
projectId: { type: 'STRING', description: 'The Vibn project ID.' },
pattern: { type: 'STRING', description: 'Regex or literal string.' },
glob: { type: 'STRING', description: 'Optional file glob to filter (e.g. "*.ts").' },
cwd: { type: 'STRING', description: 'Search root (default /workspace).' },
contextLines: { type: 'NUMBER', description: 'Lines of context around each match (0-10).' },
},
required: ['projectId', 'pattern'],
},
},
// ── Non-MCP: GitHub & web ─────────────────────────────────────────────────
{

356
lib/dev-container.ts Normal file
View File

@@ -0,0 +1,356 @@
/**
* Per-project AI dev container ("vibn-dev").
*
* One Coolify Service per Vibn project, running the `vibn-dev` image.
* The AI agent drives it via:
* - shell.exec → docker exec into the container (via existing SSH path)
* - fs.* → file ops (implemented as `cat` / `tee` / `rm` etc.
* inside the container, on top of shell.exec)
* - dev_server.* → start long-running processes (week 2)
* - ship → git push to Gitea + trigger Coolify deploy (week 2)
*
* Lifecycle states:
* - Not provisioned → ensureDevContainer() creates the Coolify service
* - Suspended → Coolify-stopped (saves money). resume() starts it.
* - Running → docker exec works.
*
* Tenant safety: every helper takes a workspace and the caller must have
* already verified that the projectId belongs to that workspace via
* fs_projects. The exec primitive ALSO verifies the resolved container
* UUID is in the workspace's owned Coolify-project set, so a hijacked
* projectId can't reach unrelated containers.
*
* See: AI_PATH_B_EXECUTION_PLAN.md §3.
*/
import { query, queryOne } from '@/lib/db-postgres';
import {
createDockerComposeApp,
startService,
stopService,
getService,
} from '@/lib/coolify';
import { execInCoolifyApp, type ExecInAppResult } from '@/lib/coolify-exec';
import { isCoolifySshConfigured } from '@/lib/coolify-ssh';
import {
ensureProjectCoolifyProject,
getProjectCoolifyUuid,
linkResourceToProject,
} from '@/lib/projects';
import type { VibnWorkspace } from '@/lib/workspaces';
// ── Configuration ────────────────────────────────────────────────────
/**
* Image tag for vibn-dev. Built and pushed from /vibn-dev/Dockerfile.
* Override per-environment with VIBN_DEV_IMAGE for staging/canary tags.
*/
export const VIBN_DEV_IMAGE = process.env.VIBN_DEV_IMAGE ?? 'vibn-dev:latest';
/** Resource caps per dev container. Tweak in env per-tier later. */
const DEFAULT_CPU_LIMIT = process.env.VIBN_DEV_CPU_LIMIT ?? '1'; // 1 vCPU
const DEFAULT_MEM_LIMIT = process.env.VIBN_DEV_MEM_LIMIT ?? '1g'; // 1 GiB
const DEFAULT_DISK_LIMIT = process.env.VIBN_DEV_DISK_LIMIT ?? '10g'; // soft hint, not enforced by compose
// ── Schema ───────────────────────────────────────────────────────────
let devContainersTableReady = false;
export async function ensureDevContainersTable(): Promise<void> {
if (devContainersTableReady) return;
await query(
`CREATE TABLE IF NOT EXISTS fs_project_dev_containers (
project_id TEXT PRIMARY KEY,
workspace TEXT NOT NULL,
service_uuid TEXT NOT NULL,
image TEXT NOT NULL,
state TEXT NOT NULL DEFAULT 'provisioning',
last_active_at TIMESTAMPTZ NOT NULL DEFAULT now(),
suspended_at TIMESTAMPTZ,
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
CREATE INDEX IF NOT EXISTS fs_project_dev_containers_ws_idx
ON fs_project_dev_containers (workspace);
CREATE INDEX IF NOT EXISTS fs_project_dev_containers_active_idx
ON fs_project_dev_containers (last_active_at);`,
[],
);
devContainersTableReady = true;
}
export interface DevContainerRow {
project_id: string;
workspace: string;
service_uuid: string;
image: string;
state: 'provisioning' | 'running' | 'suspended' | 'failed';
last_active_at: Date;
suspended_at: Date | null;
created_at: Date;
}
export async function getDevContainerRow(projectId: string): Promise<DevContainerRow | null> {
await ensureDevContainersTable();
return queryOne<DevContainerRow>(
`SELECT * FROM fs_project_dev_containers WHERE project_id = $1 LIMIT 1`,
[projectId],
);
}
// ── Compose template ─────────────────────────────────────────────────
/**
* Render the docker-compose.yml that backs a single vibn-dev service.
*
* Two named volumes are intentional:
* - workspace : everything in /workspace (the user's source tree).
* Persists across suspends. Backed up to Gitea every
* 5 min via the auto-push autosave loop (week 2).
* - cache : language-toolchain caches (mise, npm, pip, cargo).
* Persists across suspends; per-project (never shared).
*
* The container has NO Vibn-internal network access. We rely on the
* default Coolify-bridge network being isolated from the vibn-postgres
* / vibn-frontend bridge. (Network policy hardening lands in week 1
* day 2 alongside the auto-push job.)
*/
function renderDevCompose(projectSlug: string): string {
return `services:
vibn-dev:
image: ${VIBN_DEV_IMAGE}
restart: unless-stopped
working_dir: /workspace
volumes:
- workspace:/workspace
- cache:/home/vibn/.cache
environment:
- VIBN_PROJECT_SLUG=${projectSlug}
- VIBN_DEV_CONTAINER=1
deploy:
resources:
limits:
cpus: '${DEFAULT_CPU_LIMIT}'
memory: ${DEFAULT_MEM_LIMIT}
volumes:
workspace:
cache:
`;
}
// ── Provisioning ─────────────────────────────────────────────────────
export interface EnsureDevContainerOpts {
projectId: string;
projectSlug: string;
projectName?: string;
workspace: VibnWorkspace;
/** Skip the initial start (provision-only). Default: start it. */
noStart?: boolean;
}
export interface EnsureDevContainerResult {
serviceUuid: string;
state: DevContainerRow['state'];
created: boolean;
}
/**
* Idempotently ensure a vibn-dev service exists for the given Vibn project.
*
* - Already provisioned → returns the row, optionally resumes if suspended.
* - Not provisioned → ensures the per-project Coolify Project exists,
* creates the docker-compose service, links the
* resource to the Vibn project, persists the row.
*
* Safe to call on every chat turn — first call is ~10s, subsequent
* calls are a single SELECT.
*/
export async function ensureDevContainer(
opts: EnsureDevContainerOpts,
): Promise<EnsureDevContainerResult> {
await ensureDevContainersTable();
const existing = await getDevContainerRow(opts.projectId);
if (existing) {
if (existing.state === 'suspended' && !opts.noStart) {
await resumeDevContainer(opts.projectId);
return { serviceUuid: existing.service_uuid, state: 'running', created: false };
}
return { serviceUuid: existing.service_uuid, state: existing.state, created: false };
}
// Need a Coolify project to land the service in.
let coolifyProjectUuid = await getProjectCoolifyUuid(opts.projectId, opts.workspace);
if (!coolifyProjectUuid) {
coolifyProjectUuid = await ensureProjectCoolifyProject(
opts.projectId,
opts.workspace,
{ projectSlug: opts.projectSlug, projectName: opts.projectName },
);
}
if (!coolifyProjectUuid) {
throw new Error(
`Could not provision Coolify project for ${opts.projectId}; dev container creation aborted.`,
);
}
const created = await createDockerComposeApp({
projectUuid: coolifyProjectUuid,
name: `vibn-dev-${opts.projectSlug}`,
description: `AI dev container for project ${opts.projectName ?? opts.projectSlug}`,
composeRaw: renderDevCompose(opts.projectSlug),
instantDeploy: !opts.noStart,
});
await query(
`INSERT INTO fs_project_dev_containers
(project_id, workspace, service_uuid, image, state)
VALUES ($1, $2, $3, $4, $5)
ON CONFLICT (project_id) DO UPDATE
SET service_uuid = EXCLUDED.service_uuid,
image = EXCLUDED.image,
state = EXCLUDED.state`,
[
opts.projectId,
opts.workspace.slug,
created.uuid,
VIBN_DEV_IMAGE,
opts.noStart ? 'suspended' : 'provisioning',
],
);
// Bookkeeping link so apps_list / projects_get see the dev container
// under the right Vibn project.
try {
await linkResourceToProject(opts.projectId, opts.workspace.slug, created.uuid, 'service');
} catch {
// best-effort
}
return { serviceUuid: created.uuid, state: 'provisioning', created: true };
}
// ── Lifecycle ────────────────────────────────────────────────────────
export async function suspendDevContainer(projectId: string): Promise<void> {
const row = await getDevContainerRow(projectId);
if (!row) return;
if (row.state === 'suspended') return;
await stopService(row.service_uuid);
await query(
`UPDATE fs_project_dev_containers
SET state = 'suspended', suspended_at = now()
WHERE project_id = $1`,
[projectId],
);
}
export async function resumeDevContainer(projectId: string): Promise<void> {
const row = await getDevContainerRow(projectId);
if (!row) throw new Error(`No dev container provisioned for ${projectId}`);
if (row.state === 'running') return;
await startService(row.service_uuid);
await query(
`UPDATE fs_project_dev_containers
SET state = 'running', suspended_at = NULL, last_active_at = now()
WHERE project_id = $1`,
[projectId],
);
}
async function touchActivity(projectId: string): Promise<void> {
await query(
`UPDATE fs_project_dev_containers SET last_active_at = now() WHERE project_id = $1`,
[projectId],
);
}
// ── Exec primitive ───────────────────────────────────────────────────
export interface DevContainerExecOpts {
projectId: string;
command: string;
cwd?: string; // defaults to /workspace
timeoutMs?: number;
maxBytes?: number;
/** Override the user (default: vibn). Use 'root' only when needed. */
user?: string;
/** Extra env vars (k=v lines prepended via `env` builtin). */
env?: Record<string, string>;
}
/**
* Run a command inside the project's vibn-dev service.
* Resumes the container if suspended, then docker-exec's via the
* existing SSH primitive. Stdout/stderr/exit-code returned synchronously.
*
* The caller is responsible for verifying the projectId belongs to the
* workspace BEFORE calling this. We re-verify the container UUID via
* the exec primitive's own resolution (it queries `docker ps --filter
* name={uuid}`), so a mismatched projectId can't reach foreign containers.
*/
export async function execInDevContainer(
opts: DevContainerExecOpts,
): Promise<ExecInAppResult> {
if (!isCoolifySshConfigured()) {
throw new Error(
'shell.exec requires SSH access to the Coolify host; configure COOLIFY_SSH_* envs.',
);
}
const row = await getDevContainerRow(opts.projectId);
if (!row) {
throw new Error(
`No dev container for project ${opts.projectId}. Call ensureDevContainer() first.`,
);
}
if (row.state === 'suspended') {
await resumeDevContainer(opts.projectId);
}
const cwd = opts.cwd && opts.cwd.trim() ? opts.cwd.trim() : '/workspace';
const envPrefix = opts.env
? Object.entries(opts.env)
.map(([k, v]) => `${shellEscape(k)}=${shellEscape(v)}`)
.join(' ')
: '';
const wrapped = envPrefix
? `cd ${shellEscape(cwd)} && env ${envPrefix} ${opts.command}`
: `cd ${shellEscape(cwd)} && ${opts.command}`;
const result = await execInCoolifyApp({
appUuid: row.service_uuid,
service: 'vibn-dev',
command: wrapped,
user: opts.user ?? 'vibn',
timeoutMs: opts.timeoutMs,
maxBytes: opts.maxBytes,
});
await touchActivity(opts.projectId);
return result;
}
function shellEscape(s: string): string {
return `'${s.replace(/'/g, `'\\''`)}'`;
}
// ── Health ───────────────────────────────────────────────────────────
/**
* Quick liveness check used by chat startup to decide whether to show
* a "spinning up your environment…" banner.
*/
export async function getDevContainerStatus(projectId: string): Promise<{
exists: boolean;
state: DevContainerRow['state'] | 'absent';
serviceUuid: string | null;
}> {
const row = await getDevContainerRow(projectId);
if (!row) return { exists: false, state: 'absent', serviceUuid: null };
// Optional: poke Coolify for fresh state. Skipped for now to keep this
// hot path cheap; consumers that care can call getService(uuid) directly.
return { exists: true, state: row.state, serviceUuid: row.service_uuid };
}
// Re-export getService so route handlers can pull live Coolify status
// without taking a separate dependency on lib/coolify.
export { getService };

62
lib/feature-flags.ts Normal file
View File

@@ -0,0 +1,62 @@
/**
* Runtime feature flags. Backed by a tiny single-row table so an admin
* can flip a flag and have every Vibn pod pick it up within seconds (no
* redeploy required).
*
* Currently used for:
* - path_b_disabled : kill switch for the Path B AI dev-container
* architecture. When true, shell.exec / fs.* /
* devcontainer.* tools return 503 and the chat
* system prompt falls back to Path A guidance.
*
* See AI_PATH_B_EXECUTION_PLAN.md §7 for the rollback story.
*/
import { query, queryOne } from '@/lib/db-postgres';
let tableReady = false;
async function ensureFlagsTable(): Promise<void> {
if (tableReady) return;
await query(
`CREATE TABLE IF NOT EXISTS fs_feature_flags (
key TEXT PRIMARY KEY,
value JSONB NOT NULL,
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
);`,
[],
);
tableReady = true;
}
const TTL_MS = 10_000;
const cache = new Map<string, { value: any; expires: number }>();
export async function getFlag<T = unknown>(key: string, defaultValue: T): Promise<T> {
const cached = cache.get(key);
if (cached && cached.expires > Date.now()) return cached.value as T;
await ensureFlagsTable();
const row = await queryOne<{ value: T }>(
`SELECT value FROM fs_feature_flags WHERE key = $1 LIMIT 1`,
[key],
);
const value = row?.value ?? defaultValue;
cache.set(key, { value, expires: Date.now() + TTL_MS });
return value;
}
export async function setFlag(key: string, value: unknown): Promise<void> {
await ensureFlagsTable();
await query(
`INSERT INTO fs_feature_flags (key, value, updated_at)
VALUES ($1, $2::jsonb, now())
ON CONFLICT (key) DO UPDATE
SET value = EXCLUDED.value,
updated_at = now()`,
[key, JSON.stringify(value)],
);
cache.delete(key);
}
export async function isPathBDisabled(): Promise<boolean> {
return Boolean(await getFlag<boolean>('path_b_disabled', false));
}