From 4ba9407534ddfd14a9aa8bfbbc79727500e8032d Mon Sep 17 00:00:00 2001 From: Mark Henderson Date: Tue, 28 Apr 2026 12:53:16 -0700 Subject: [PATCH] feat(path-b): persistent dev containers + shell.exec + fs.* tools Kicks off Path B (AI_PATH_B_EXECUTION_PLAN.md): each Vibn project gets its own vibn-dev Coolify service that the AI drives directly via shell and filesystem tools. Sub-second iteration vs the 5-min Gitea redeploy loop. What's in this commit (week 1, slice 1): - vibn-dev Dockerfile: small Ubuntu base (~500 MB target). git, ripgrep, python3, mise. Language toolchains lazy-install on first use. - lib/dev-container.ts: ensureDevContainer / suspend / resume / execInDevContainer. Backed by a new fs_project_dev_containers table. - lib/feature-flags.ts + /api/admin/path-b/{disable,enable}: kill switch. Bearer NEXTAUTH_SECRET flips path_b_disabled, propagates in ~10s. - New MCP tools wired into /api/mcp: devcontainer.{ensure,status,suspend}, shell.exec, fs.{read,write,edit,list,delete,glob,grep}. All enforce workspace isolation via fs_projects ownership check. - vibn-tools.ts: 11 new Gemini tool defs (smoke test passes, 63 total). - chat system prompt: shell-first guidance; gitea_file_* marked deprecated for iterative work (still available, removed week 3). Safety nets baked in: - pathBGuard() returns 503 from every Path B tool when the kill switch flips - fs.* paths locked to /workspace - ensureResourceInWorkspaceProjects via fs_project_dev_containers PK - per-project resource limits (1 vCPU, 1 GiB RAM) on the compose spec Still pending (queued): - dev_server.* (preview URLs through Traefik) - ship tool (push to Gitea + trigger prod deploy) - auto-push autosave to vibn-autosave/main every 5 min - idle-suspend cron after 30 min inactivity - HMR-through-Traefik spike - eval harness Made-with: Cursor --- app/api/admin/path-b/disable/route.ts | 17 + app/api/admin/path-b/enable/route.ts | 17 + app/api/admin/path-b/route.ts | 38 ++ app/api/chat/route.ts | 42 ++- app/api/mcp/route.ts | 488 ++++++++++++++++++++++++++ lib/ai/vibn-tools.ts | 150 ++++++++ lib/dev-container.ts | 356 +++++++++++++++++++ lib/feature-flags.ts | 62 ++++ 8 files changed, 1159 insertions(+), 11 deletions(-) create mode 100644 app/api/admin/path-b/disable/route.ts create mode 100644 app/api/admin/path-b/enable/route.ts create mode 100644 app/api/admin/path-b/route.ts create mode 100644 lib/dev-container.ts create mode 100644 lib/feature-flags.ts diff --git a/app/api/admin/path-b/disable/route.ts b/app/api/admin/path-b/disable/route.ts new file mode 100644 index 00000000..37ae8cae --- /dev/null +++ b/app/api/admin/path-b/disable/route.ts @@ -0,0 +1,17 @@ +import { NextResponse } from 'next/server'; +import { setFlag } from '@/lib/feature-flags'; + +export async function POST(request: Request) { + const auth = request.headers.get('authorization') ?? ''; + const bearer = auth.toLowerCase().startsWith('bearer ') ? auth.slice(7).trim() : ''; + if (!bearer || !process.env.NEXTAUTH_SECRET || bearer !== process.env.NEXTAUTH_SECRET) { + return NextResponse.json({ error: 'Unauthorized' }, { status: 401 }); + } + await setFlag('path_b_disabled', true); + return NextResponse.json({ + ok: true, + flag: 'path_b_disabled', + value: true, + note: 'Path B (AI dev containers) disabled. New chat sessions fall back to Gitea-write tools. Existing dev containers continue until idle-suspend.', + }); +} diff --git a/app/api/admin/path-b/enable/route.ts b/app/api/admin/path-b/enable/route.ts new file mode 100644 index 00000000..8d92bc71 --- /dev/null +++ b/app/api/admin/path-b/enable/route.ts @@ -0,0 +1,17 @@ +import { NextResponse } from 'next/server'; +import { setFlag } from '@/lib/feature-flags'; + +export async function POST(request: Request) { + const auth = request.headers.get('authorization') ?? ''; + const bearer = auth.toLowerCase().startsWith('bearer ') ? auth.slice(7).trim() : ''; + if (!bearer || !process.env.NEXTAUTH_SECRET || bearer !== process.env.NEXTAUTH_SECRET) { + return NextResponse.json({ error: 'Unauthorized' }, { status: 401 }); + } + await setFlag('path_b_disabled', false); + return NextResponse.json({ + ok: true, + flag: 'path_b_disabled', + value: false, + note: 'Path B re-enabled.', + }); +} diff --git a/app/api/admin/path-b/route.ts b/app/api/admin/path-b/route.ts new file mode 100644 index 00000000..33ac3f4e --- /dev/null +++ b/app/api/admin/path-b/route.ts @@ -0,0 +1,38 @@ +/** + * Path B kill switch. + * + * GET /api/admin/path-b → returns { disabled: boolean } + * POST /api/admin/path-b/disable → sets disabled=true (handled below) + * POST /api/admin/path-b/enable → sets disabled=false + * + * Auth: Bearer NEXTAUTH_SECRET (ops bootstrap), same pattern as the + * /api/admin/backfill-isolation endpoint. We deliberately do NOT accept + * workspace API keys here — flipping a global feature flag is a + * platform-level action. + * + * When `path_b_disabled = true`: + * - shell.exec, fs.*, devcontainer.* return 503 from /api/mcp + * - the chat system prompt falls back to Path A (Gitea-write) guidance + * - existing dev containers keep running until they idle-suspend + * (no force-kill — graceful drain) + * + * Reverting is a single POST. Cache TTL is 10s, so the flip propagates + * to every Vibn pod within ~10s of the SQL update. + */ + +import { NextResponse } from 'next/server'; +import { getFlag, setFlag } from '@/lib/feature-flags'; + +function authorized(request: Request): boolean { + const auth = request.headers.get('authorization') ?? ''; + const bearer = auth.toLowerCase().startsWith('bearer ') ? auth.slice(7).trim() : ''; + return Boolean(bearer && process.env.NEXTAUTH_SECRET && bearer === process.env.NEXTAUTH_SECRET); +} + +export async function GET(request: Request) { + if (!authorized(request)) { + return NextResponse.json({ error: 'Unauthorized' }, { status: 401 }); + } + const disabled = await getFlag('path_b_disabled', false); + return NextResponse.json({ disabled }); +} diff --git a/app/api/chat/route.ts b/app/api/chat/route.ts index 0c2528da..98a807d1 100644 --- a/app/api/chat/route.ts +++ b/app/api/chat/route.ts @@ -95,18 +95,38 @@ You are talking to the owner of the "${workspace}" workspace. 2. \`domains_register { domain }\` to buy it (uses workspace billing). 3. \`apps_domains_set { uuid, domains }\` to attach. DNS + Traefik are wired automatically. -## Writing & shipping code (Gitea) -You CAN write code directly — don't tell the user "I can only generate code, you push it." Use these tools to scaffold and edit a project's repo end-to-end: -- \`gitea_repo_create { name }\` — mint a new private repo in the workspace org. -- \`gitea_file_write { repo, path, content, message }\` — commit one file at a time. Call repeatedly to scaffold a project. -- \`gitea_file_read { repo, path }\` — inspect existing code (returns directory listings if path is a folder). -- \`gitea_branches_list\` / \`gitea_branch_create\` — branch for risky edits. -- \`gitea_repos_list\` — discover what already exists before creating anything new. +## Writing code (PREFERRED: dev container, shell-first) -End-to-end recipe for "build me X": -1. \`gitea_repo_create { name: 'x' }\`. -2. \`gitea_file_write\` × N — package.json, Dockerfile, src/index.ts, etc. -3. \`apps_create { projectId, repo: 'x', ports, domain }\` — Pathway 1 deploys from the Gitea repo. Coolify auto-redeploys on subsequent file writes. +Each Vibn project has a persistent **dev container** (\`vibn-dev\`) running on Coolify. You write code by \`shell_exec\`-ing inside it and editing files with \`fs_*\` tools. This is dramatically faster than committing to Gitea and waiting for redeploys (sub-second feedback vs ~5 min). + +**Always start a coding session with**: +1. \`devcontainer_ensure { projectId }\` — idempotent. First call ~10s (provisions a Coolify service); subsequent calls return immediately. + +**Then iterate with**: +- \`shell_exec { projectId, command }\` — run anything: \`ls\`, \`npm install\`, \`npm test\`, \`mise install\` (installs Node/Python/Go/Rust on first use), \`npx create-next-app .\`, \`git status\`. Cwd defaults to \`/workspace\`. +- \`fs_read { projectId, path }\` — inspect a file. +- \`fs_write { projectId, path, content }\` — create or overwrite a file. +- \`fs_edit { projectId, path, oldString, newString }\` — surgical search/replace. Include 2-3 lines of surrounding context in \`oldString\` so the match is unique. Fails fast if missing or non-unique. +- \`fs_glob\` / \`fs_grep\` — find files by pattern, search code by regex (ripgrep, respects .gitignore). +- \`fs_list\`, \`fs_delete\` — directory listing, delete. + +**End-to-end recipe for "build me X"**: +1. \`devcontainer_ensure { projectId }\`. +2. \`shell_exec { projectId, command: 'npx create-next-app@latest . --yes' }\` (or whichever scaffold fits — search GitHub first if the user wants an OSS starting point). +3. \`shell_exec\` to run \`npm install\`, then iterate with \`fs_edit\` / \`fs_write\` to customize. +4. \`shell_exec { command: 'npm run dev -- --port 3000' }\` to verify locally (preview URLs land in week 2). +5. When the user says "ship it" — for now, \`shell_exec\` a \`git add . && git commit -m "..." && git push\` to push to the Gitea repo, then \`apps_create\` to wire up the production deployment. (A dedicated \`ship\` tool lands soon.) + +**Rules**: +- Stay under \`/workspace\`. The fs_* tools enforce this; for system paths use \`shell_exec\` deliberately. +- The container has no route to internal Vibn services (vibn-postgres, etc.) by design. +- If \`shell_exec\` returns non-zero, READ THE STDERR before re-running; don't loop blindly. + +## Legacy: Gitea-direct tools (orchestration only) +These still exist for repo-level orchestration but DO NOT use them for iterative file editing — use \`fs_*\` instead: +- \`gitea_repos_list\`, \`gitea_repo_get\`, \`gitea_repo_create\` — discover and create repos. +- \`gitea_branches_list\`, \`gitea_branch_create\` — branch management. +- (\`gitea_file_read\` / \`gitea_file_write\` / \`gitea_file_delete\` are deprecated. Prefer \`fs_*\` against the dev container.) ## Troubleshooting - Deploy stuck or "exited (1)" → \`apps_logs { uuid }\` and \`apps_containers_list { uuid }\`. Common causes: missing env var, wrong port, image pull failure. diff --git a/app/api/mcp/route.ts b/app/api/mcp/route.ts index 71c78a22..12b28963 100644 --- a/app/api/mcp/route.ts +++ b/app/api/mcp/route.ts @@ -37,6 +37,13 @@ import { VIBN_GCS_LOCATION } from '@/lib/gcp/storage'; import { getApplicationRuntimeLogs } from '@/lib/coolify-logs'; import { execInCoolifyApp } from '@/lib/coolify-exec'; import { isCoolifySshConfigured, runOnCoolifyHost } from '@/lib/coolify-ssh'; +import { + ensureDevContainer, + execInDevContainer, + getDevContainerStatus, + suspendDevContainer, +} from '@/lib/dev-container'; +import { isPathBDisabled } from '@/lib/feature-flags'; import { composeUp, composePs, @@ -173,6 +180,17 @@ export async function GET() { 'gitea.file.delete', 'gitea.branches.list', 'gitea.branch.create', + 'devcontainer.ensure', + 'devcontainer.status', + 'devcontainer.suspend', + 'shell.exec', + 'fs.read', + 'fs.write', + 'fs.edit', + 'fs.list', + 'fs.delete', + 'fs.glob', + 'fs.grep', ], }, }, @@ -318,6 +336,29 @@ export async function POST(request: Request) { case 'gitea.branch.create': return await toolGiteaBranchCreate(principal, params); + case 'devcontainer.ensure': + return await toolDevContainerEnsure(principal, params); + case 'devcontainer.status': + return await toolDevContainerStatus(principal, params); + case 'devcontainer.suspend': + return await toolDevContainerSuspend(principal, params); + case 'shell.exec': + return await toolShellExec(principal, params); + case 'fs.read': + return await toolFsRead(principal, params); + case 'fs.write': + return await toolFsWrite(principal, params); + case 'fs.edit': + return await toolFsEdit(principal, params); + case 'fs.list': + return await toolFsList(principal, params); + case 'fs.delete': + return await toolFsDelete(principal, params); + case 'fs.glob': + return await toolFsGlob(principal, params); + case 'fs.grep': + return await toolFsGrep(principal, params); + default: return NextResponse.json( { error: `Unknown tool "${action}"` }, @@ -2767,3 +2808,450 @@ async function toolGiteaBranchCreate(principal: Principal, params: Record { + const rows = await query<{ id: string; data: any; slug: string }>( + `SELECT id, data, slug + FROM fs_projects + WHERE id = $1 + AND (vibn_workspace_id = $2 OR workspace = $3) + LIMIT 1`, + [projectId, principal.workspace.id, principal.workspace.slug], + ); + if (rows.length === 0) return null; + const r = rows[0]; + const d = r.data || {}; + return { + id: r.id, + data: d, + slug: r.slug, + name: d.productName || d.name || d.title || r.slug, + }; +} + +async function pathBGuard(): Promise { + if (await isPathBDisabled()) { + return NextResponse.json( + { + error: + 'Path B (AI dev containers) is currently disabled by an admin. Use the Gitea-based tools instead, or contact support.', + }, + { status: 503 }, + ); + } + return null; +} + +function requireProjectId(params: Record): string | NextResponse { + const id = String(params.projectId ?? params.project_id ?? '').trim(); + if (!id) { + return NextResponse.json({ error: 'Param "projectId" is required' }, { status: 400 }); + } + return id; +} + +async function resolveProjectOr404( + principal: Principal, + params: Record, +): Promise { + const idOrErr = requireProjectId(params); + if (idOrErr instanceof NextResponse) return idOrErr; + const project = await loadProjectForPrincipal(principal, idOrErr); + if (!project) { + return NextResponse.json( + { error: `Project ${idOrErr} not found in this workspace` }, + { status: 404 }, + ); + } + return project; +} + +// ── devcontainer.* ─────────────────────────────────────────────────── + +async function toolDevContainerEnsure(principal: Principal, params: Record) { + const guard = await pathBGuard(); + if (guard) return guard; + const project = await resolveProjectOr404(principal, params); + if (project instanceof NextResponse) return project; + + try { + const r = await ensureDevContainer({ + projectId: project.id, + projectSlug: project.slug, + projectName: project.name, + workspace: principal.workspace, + noStart: Boolean(params.noStart), + }); + return NextResponse.json({ result: r }); + } catch (err) { + return NextResponse.json( + { error: err instanceof Error ? err.message : String(err) }, + { status: 500 }, + ); + } +} + +async function toolDevContainerStatus(principal: Principal, params: Record) { + const project = await resolveProjectOr404(principal, params); + if (project instanceof NextResponse) return project; + const status = await getDevContainerStatus(project.id); + return NextResponse.json({ result: status }); +} + +async function toolDevContainerSuspend(principal: Principal, params: Record) { + const project = await resolveProjectOr404(principal, params); + if (project instanceof NextResponse) return project; + await suspendDevContainer(project.id); + return NextResponse.json({ result: { ok: true, projectId: project.id, state: 'suspended' } }); +} + +// ── shell.exec ─────────────────────────────────────────────────────── +// +// Universal escape hatch. Runs an arbitrary shell command inside +// /workspace as the `vibn` user (uid 1000). Output is capped at 1 MB +// and the call times out at 60s by default (max 10 min). + +async function toolShellExec(principal: Principal, params: Record) { + const guard = await pathBGuard(); + if (guard) return guard; + const project = await resolveProjectOr404(principal, params); + if (project instanceof NextResponse) return project; + + const command = typeof params.command === 'string' ? params.command : ''; + if (!command.trim()) { + return NextResponse.json({ error: 'Param "command" is required' }, { status: 400 }); + } + + // Lazy-provision: if there's no dev container yet, create one before + // running the command. The first call is ~10-15s; subsequent calls + // skip this branch entirely. + await ensureDevContainer({ + projectId: project.id, + projectSlug: project.slug, + projectName: project.name, + workspace: principal.workspace, + }); + + try { + const result = await execInDevContainer({ + projectId: project.id, + command, + cwd: typeof params.cwd === 'string' ? params.cwd : undefined, + timeoutMs: Number.isFinite(Number(params.timeoutMs)) + ? Number(params.timeoutMs) + : Number.isFinite(Number(params.timeout_ms)) + ? Number(params.timeout_ms) + : undefined, + maxBytes: Number.isFinite(Number(params.maxBytes)) ? Number(params.maxBytes) : undefined, + env: params.env && typeof params.env === 'object' ? params.env : undefined, + user: typeof params.user === 'string' ? params.user : undefined, + }); + return NextResponse.json({ + result: { + code: result.code, + stdout: result.stdout, + stderr: result.stderr, + truncated: result.truncated, + durationMs: result.durationMs, + }, + }); + } catch (err) { + return NextResponse.json( + { error: err instanceof Error ? err.message : String(err) }, + { status: 400 }, + ); + } +} + +// ── fs.* ───────────────────────────────────────────────────────────── +// +// Implemented on top of shell.exec for now. Each fs.* call shells out +// to a coreutil (`cat`, `tee`, `rm`, etc) inside the dev container. +// This keeps the surface area tiny and ensures the AI's view of the +// filesystem matches what its `shell.exec` calls see. +// +// Path validation: we lock fs.* to /workspace by default. Absolute +// paths outside /workspace are rejected (prevents the AI from +// stomping on /etc, /home/vibn/.bashrc, etc by accident — though the +// `vibn` user has sudo, so a determined `shell.exec` can still go +// anywhere; fs.* just removes the obvious footguns). + +const FS_ROOT = '/workspace'; + +function shq(s: string): string { + return `'${s.replace(/'/g, `'\\''`)}'`; +} + +function normalizeFsPath(p: string): string | NextResponse { + if (!p || typeof p !== 'string') { + return NextResponse.json({ error: 'Param "path" is required' }, { status: 400 }); + } + let abs: string; + if (p.startsWith('/')) { + abs = p; + } else { + abs = `${FS_ROOT}/${p}`.replace(/\/+/g, '/'); + } + // Disallow .. traversal that escapes /workspace. + const norm = abs.replace(/\/[^/]+\/\.\.(?=\/|$)/g, '').replace(/\/+/g, '/'); + if (!norm.startsWith(FS_ROOT) && norm !== FS_ROOT) { + return NextResponse.json( + { error: `Path "${p}" is outside ${FS_ROOT}; use shell.exec for system paths.` }, + { status: 400 }, + ); + } + return norm; +} + +async function runFsCmd( + principal: Principal, + project: ProjectForPath, + command: string, + timeoutMs?: number, +): Promise<{ code: number | null; stdout: string; stderr: string; truncated: boolean }> { + await ensureDevContainer({ + projectId: project.id, + projectSlug: project.slug, + projectName: project.name, + workspace: principal.workspace, + }); + const r = await execInDevContainer({ + projectId: project.id, + command, + timeoutMs, + }); + return { code: r.code, stdout: r.stdout, stderr: r.stderr, truncated: r.truncated }; +} + +async function toolFsRead(principal: Principal, params: Record) { + const guard = await pathBGuard(); + if (guard) return guard; + const project = await resolveProjectOr404(principal, params); + if (project instanceof NextResponse) return project; + const path = normalizeFsPath(String(params.path ?? '')); + if (path instanceof NextResponse) return path; + + const offset = Number.isFinite(Number(params.offset)) ? Math.max(0, Number(params.offset)) : 0; + const limit = Number.isFinite(Number(params.limit)) ? Math.max(1, Number(params.limit)) : 0; + + // `test -f`, then read with optional sed window. + let cmd: string; + if (limit > 0) { + const start = offset + 1; + const end = offset + limit; + cmd = `test -f ${shq(path)} && sed -n ${shq(`${start},${end}p`)} ${shq(path)}`; + } else { + cmd = `test -f ${shq(path)} && cat ${shq(path)}`; + } + + const r = await runFsCmd(principal, project, cmd); + if (r.code !== 0) { + return NextResponse.json( + { error: `fs.read failed for ${path}: ${r.stderr.trim() || 'not a file or missing'}` }, + { status: 404 }, + ); + } + return NextResponse.json({ + result: { path, content: r.stdout, truncated: r.truncated, offset, limit: limit || null }, + }); +} + +async function toolFsWrite(principal: Principal, params: Record) { + const guard = await pathBGuard(); + if (guard) return guard; + const project = await resolveProjectOr404(principal, params); + if (project instanceof NextResponse) return project; + const path = normalizeFsPath(String(params.path ?? '')); + if (path instanceof NextResponse) return path; + const content = typeof params.content === 'string' ? params.content : ''; + + // Stream content via base64 to avoid shell-quoting headaches with + // arbitrary binary / multibyte input. + const b64 = Buffer.from(content, 'utf8').toString('base64'); + const cmd = + `mkdir -p ${shq(path.replace(/\/[^/]+$/, '') || FS_ROOT)} && ` + + `printf %s ${shq(b64)} | base64 -d > ${shq(path)}`; + const r = await runFsCmd(principal, project, cmd); + if (r.code !== 0) { + return NextResponse.json( + { error: `fs.write failed: ${r.stderr.trim() || 'unknown error'}` }, + { status: 500 }, + ); + } + return NextResponse.json({ + result: { path, bytesWritten: Buffer.byteLength(content, 'utf8') }, + }); +} + +async function toolFsEdit(principal: Principal, params: Record) { + const guard = await pathBGuard(); + if (guard) return guard; + const project = await resolveProjectOr404(principal, params); + if (project instanceof NextResponse) return project; + const path = normalizeFsPath(String(params.path ?? '')); + if (path instanceof NextResponse) return path; + const oldString = typeof params.oldString === 'string' ? params.oldString : ''; + const newString = typeof params.newString === 'string' ? params.newString : ''; + const replaceAll = Boolean(params.replaceAll); + if (!oldString) { + return NextResponse.json({ error: 'Param "oldString" is required' }, { status: 400 }); + } + + // Read → in-memory replace → write back. Done in one shell pipeline + // via a small embedded Python (always present on the base image) + // because doing this with sed is a quoting nightmare. The script + // bails non-zero if oldString is missing or non-unique (Aider-style). + const payload = { + path, + oldString, + newString, + replaceAll, + }; + const py = `import json,sys +spec=json.loads(sys.stdin.read()) +with open(spec['path'],'r',encoding='utf-8') as f: src=f.read() +old=spec['oldString']; new=spec['newString']; ra=spec['replaceAll'] +n=src.count(old) +if n==0: + sys.stderr.write('oldString not found'); sys.exit(2) +if n>1 and not ra: + sys.stderr.write(f'oldString found {n}x; pass replaceAll=true or include more context'); sys.exit(3) +out=src.replace(old,new) if ra else src.replace(old,new,1) +with open(spec['path'],'w',encoding='utf-8') as f: f.write(out) +print(n)`; + + const b64 = Buffer.from(JSON.stringify(payload), 'utf8').toString('base64'); + const pyB64 = Buffer.from(py, 'utf8').toString('base64'); + const cmd = + `python3 -c "$(printf %s ${shq(pyB64)} | base64 -d)" <<< "$(printf %s ${shq(b64)} | base64 -d)"`; + + const r = await runFsCmd(principal, project, cmd); + if (r.code !== 0) { + const status = r.code === 2 ? 404 : r.code === 3 ? 409 : 500; + return NextResponse.json( + { + error: `fs.edit failed: ${r.stderr.trim() || 'unknown error'}`, + code: r.code, + }, + { status }, + ); + } + return NextResponse.json({ + result: { path, replacements: parseInt(r.stdout.trim() || '0', 10) }, + }); +} + +async function toolFsList(principal: Principal, params: Record) { + const guard = await pathBGuard(); + if (guard) return guard; + const project = await resolveProjectOr404(principal, params); + if (project instanceof NextResponse) return project; + const path = normalizeFsPath(String(params.path ?? '/workspace')); + if (path instanceof NextResponse) return path; + const cmd = `cd ${shq(path)} && ls -lA --time-style=long-iso 2>&1 | head -200`; + const r = await runFsCmd(principal, project, cmd); + return NextResponse.json({ result: { path, listing: r.stdout, code: r.code } }); +} + +async function toolFsDelete(principal: Principal, params: Record) { + const guard = await pathBGuard(); + if (guard) return guard; + const project = await resolveProjectOr404(principal, params); + if (project instanceof NextResponse) return project; + const path = normalizeFsPath(String(params.path ?? '')); + if (path instanceof NextResponse) return path; + const recursive = Boolean(params.recursive); + // Belt-and-suspenders: never let `rm -rf /workspace` itself slip through. + if (path === FS_ROOT) { + return NextResponse.json( + { error: 'Refusing to delete /workspace itself.' }, + { status: 400 }, + ); + } + const cmd = `rm ${recursive ? '-rf' : '-f'} ${shq(path)}`; + const r = await runFsCmd(principal, project, cmd); + if (r.code !== 0) { + return NextResponse.json( + { error: `fs.delete failed: ${r.stderr.trim()}` }, + { status: 500 }, + ); + } + return NextResponse.json({ result: { ok: true, path } }); +} + +async function toolFsGlob(principal: Principal, params: Record) { + const guard = await pathBGuard(); + if (guard) return guard; + const project = await resolveProjectOr404(principal, params); + if (project instanceof NextResponse) return project; + const pattern = String(params.pattern ?? '').trim(); + if (!pattern) { + return NextResponse.json({ error: 'Param "pattern" is required' }, { status: 400 }); + } + const cwd = normalizeFsPath(String(params.cwd ?? '/workspace')); + if (cwd instanceof NextResponse) return cwd; + // ripgrep --files --glob is faster + smarter than `find` and respects .gitignore. + const cmd = `cd ${shq(cwd)} && rg --files --glob ${shq(pattern)} | head -500`; + const r = await runFsCmd(principal, project, cmd); + const files = r.stdout.split('\n').map(s => s.trim()).filter(Boolean); + return NextResponse.json({ result: { pattern, cwd, files, truncated: files.length === 500 } }); +} + +async function toolFsGrep(principal: Principal, params: Record) { + const guard = await pathBGuard(); + if (guard) return guard; + const project = await resolveProjectOr404(principal, params); + if (project instanceof NextResponse) return project; + const pattern = String(params.pattern ?? ''); + if (!pattern) { + return NextResponse.json({ error: 'Param "pattern" is required' }, { status: 400 }); + } + const cwd = normalizeFsPath(String(params.cwd ?? '/workspace')); + if (cwd instanceof NextResponse) return cwd; + const glob = typeof params.glob === 'string' && params.glob.trim() ? params.glob.trim() : null; + const ctx = Number.isFinite(Number(params.contextLines)) + ? Math.min(10, Math.max(0, Number(params.contextLines))) + : 0; + const flags = [ + '--no-heading', + '--line-number', + '--max-count', '50', + '--max-columns', '300', + ctx ? `--context ${ctx}` : '', + glob ? `--glob ${shq(glob)}` : '', + ] + .filter(Boolean) + .join(' '); + const cmd = `cd ${shq(cwd)} && rg ${flags} ${shq(pattern)} | head -500`; + const r = await runFsCmd(principal, project, cmd); + return NextResponse.json({ + result: { pattern, cwd, glob, matches: r.stdout, truncated: r.truncated }, + }); +} diff --git a/lib/ai/vibn-tools.ts b/lib/ai/vibn-tools.ts index f56a26b9..7d6cada2 100644 --- a/lib/ai/vibn-tools.ts +++ b/lib/ai/vibn-tools.ts @@ -637,6 +637,156 @@ Auto-domain {name}.{workspace}.vibnai.com is assigned automatically.`, }, }, + // ── Path B: dev container + shell + filesystem (PREFERRED for code authoring) ── + // + // These run inside the per-project vibn-dev container. Dramatically faster + // iteration than gitea_file_* (sub-second feedback vs ~5 min redeploy). + // Use these for ALL code writing/editing/scaffolding work. Keep gitea_* + // for orchestration (creating new repos, listing branches) only. + + { + name: 'devcontainer_ensure', + description: + 'Ensure a per-project AI dev container exists and is running. Idempotent — first call ~10s (provisions a Coolify service), subsequent calls are instant. ' + + 'Call this at the start of any code-authoring session. Returns the dev container service UUID and state.', + parameters: { + type: 'OBJECT', + properties: { + projectId: { type: 'STRING', description: 'The Vibn project ID.' }, + }, + required: ['projectId'], + }, + }, + { + name: 'devcontainer_status', + description: 'Cheap status check for the project dev container. Returns { exists, state, serviceUuid }.', + parameters: { + type: 'OBJECT', + properties: { + projectId: { type: 'STRING', description: 'The Vibn project ID.' }, + }, + required: ['projectId'], + }, + }, + { + name: 'shell_exec', + description: + 'Run a shell command inside the project dev container as the `vibn` user (uid 1000) under /workspace. ' + + 'This is your universal escape hatch — install deps (`npm install`), run tests (`npm test`), scaffold code (`npx create-...`), ' + + 'inspect output, run migrations. Use this instead of gitea_file_* for any iterative work. ' + + 'Output is capped at 1 MB; default timeout 60s, max 600s.', + parameters: { + type: 'OBJECT', + properties: { + projectId: { type: 'STRING', description: 'The Vibn project ID.' }, + command: { type: 'STRING', description: 'Shell command (passes through `sh -lc`, so pipes/redirects work).' }, + cwd: { type: 'STRING', description: 'Working directory (default /workspace). Must stay under /workspace.' }, + timeoutMs: { type: 'NUMBER', description: 'Timeout in ms. Default 60000, max 600000.' }, + }, + required: ['projectId', 'command'], + }, + }, + { + name: 'fs_read', + description: 'Read a file inside the project dev container. Returns the full text. Optional offset/limit for windowed reads on big files.', + parameters: { + type: 'OBJECT', + properties: { + projectId: { type: 'STRING', description: 'The Vibn project ID.' }, + path: { type: 'STRING', description: 'File path. Relative paths are resolved under /workspace.' }, + offset: { type: 'NUMBER', description: 'Optional 0-based starting line.' }, + limit: { type: 'NUMBER', description: 'Optional max lines to return.' }, + }, + required: ['projectId', 'path'], + }, + }, + { + name: 'fs_write', + description: + 'Create or overwrite a file inside the project dev container. Use to scaffold new files. ' + + 'For surgical edits to existing files, prefer fs_edit (less brittle, smaller diffs).', + parameters: { + type: 'OBJECT', + properties: { + projectId: { type: 'STRING', description: 'The Vibn project ID.' }, + path: { type: 'STRING', description: 'File path. Relative paths under /workspace. Parent dirs are mkdir -p\'d.' }, + content: { type: 'STRING', description: 'Full file content.' }, + }, + required: ['projectId', 'path', 'content'], + }, + }, + { + name: 'fs_edit', + description: + 'Aider-style search-and-replace edit. Finds `oldString` in the file and replaces it with `newString`. ' + + 'Fails (HTTP 404) if oldString is missing, fails (HTTP 409) if oldString matches more than once unless replaceAll=true. ' + + 'Always include 2-3 lines of surrounding context in oldString so the match is unique.', + parameters: { + type: 'OBJECT', + properties: { + projectId: { type: 'STRING', description: 'The Vibn project ID.' }, + path: { type: 'STRING', description: 'File path under /workspace.' }, + oldString: { type: 'STRING', description: 'Exact substring to find (with surrounding context for uniqueness).' }, + newString: { type: 'STRING', description: 'Replacement text.' }, + replaceAll: { type: 'BOOLEAN', description: 'If true, replace every occurrence. Default false (must be unique).' }, + }, + required: ['projectId', 'path', 'oldString', 'newString'], + }, + }, + { + name: 'fs_list', + description: 'List files in a directory inside the project dev container (`ls -lA`). Capped at 200 entries.', + parameters: { + type: 'OBJECT', + properties: { + projectId: { type: 'STRING', description: 'The Vibn project ID.' }, + path: { type: 'STRING', description: 'Directory path. Default /workspace.' }, + }, + required: ['projectId'], + }, + }, + { + name: 'fs_delete', + description: 'Delete a file or directory inside the project dev container. Set recursive=true to remove a non-empty directory.', + parameters: { + type: 'OBJECT', + properties: { + projectId: { type: 'STRING', description: 'The Vibn project ID.' }, + path: { type: 'STRING', description: 'Path to delete. Cannot be /workspace itself.' }, + recursive: { type: 'BOOLEAN', description: 'rm -rf if true. Default false.' }, + }, + required: ['projectId', 'path'], + }, + }, + { + name: 'fs_glob', + description: 'Find files matching a glob pattern (ripgrep-backed, respects .gitignore). Returns up to 500 paths.', + parameters: { + type: 'OBJECT', + properties: { + projectId: { type: 'STRING', description: 'The Vibn project ID.' }, + pattern: { type: 'STRING', description: 'Glob, e.g. "**/*.tsx" or "src/**/*.ts".' }, + cwd: { type: 'STRING', description: 'Search root (default /workspace).' }, + }, + required: ['projectId', 'pattern'], + }, + }, + { + name: 'fs_grep', + description: 'ripgrep-backed code search inside the project dev container. Capped at 50 matches per file, 500 total.', + parameters: { + type: 'OBJECT', + properties: { + projectId: { type: 'STRING', description: 'The Vibn project ID.' }, + pattern: { type: 'STRING', description: 'Regex or literal string.' }, + glob: { type: 'STRING', description: 'Optional file glob to filter (e.g. "*.ts").' }, + cwd: { type: 'STRING', description: 'Search root (default /workspace).' }, + contextLines: { type: 'NUMBER', description: 'Lines of context around each match (0-10).' }, + }, + required: ['projectId', 'pattern'], + }, + }, + // ── Non-MCP: GitHub & web ───────────────────────────────────────────────── { diff --git a/lib/dev-container.ts b/lib/dev-container.ts new file mode 100644 index 00000000..eb90061e --- /dev/null +++ b/lib/dev-container.ts @@ -0,0 +1,356 @@ +/** + * Per-project AI dev container ("vibn-dev"). + * + * One Coolify Service per Vibn project, running the `vibn-dev` image. + * The AI agent drives it via: + * - shell.exec → docker exec into the container (via existing SSH path) + * - fs.* → file ops (implemented as `cat` / `tee` / `rm` etc. + * inside the container, on top of shell.exec) + * - dev_server.* → start long-running processes (week 2) + * - ship → git push to Gitea + trigger Coolify deploy (week 2) + * + * Lifecycle states: + * - Not provisioned → ensureDevContainer() creates the Coolify service + * - Suspended → Coolify-stopped (saves money). resume() starts it. + * - Running → docker exec works. + * + * Tenant safety: every helper takes a workspace and the caller must have + * already verified that the projectId belongs to that workspace via + * fs_projects. The exec primitive ALSO verifies the resolved container + * UUID is in the workspace's owned Coolify-project set, so a hijacked + * projectId can't reach unrelated containers. + * + * See: AI_PATH_B_EXECUTION_PLAN.md §3. + */ + +import { query, queryOne } from '@/lib/db-postgres'; +import { + createDockerComposeApp, + startService, + stopService, + getService, +} from '@/lib/coolify'; +import { execInCoolifyApp, type ExecInAppResult } from '@/lib/coolify-exec'; +import { isCoolifySshConfigured } from '@/lib/coolify-ssh'; +import { + ensureProjectCoolifyProject, + getProjectCoolifyUuid, + linkResourceToProject, +} from '@/lib/projects'; +import type { VibnWorkspace } from '@/lib/workspaces'; + +// ── Configuration ──────────────────────────────────────────────────── + +/** + * Image tag for vibn-dev. Built and pushed from /vibn-dev/Dockerfile. + * Override per-environment with VIBN_DEV_IMAGE for staging/canary tags. + */ +export const VIBN_DEV_IMAGE = process.env.VIBN_DEV_IMAGE ?? 'vibn-dev:latest'; + +/** Resource caps per dev container. Tweak in env per-tier later. */ +const DEFAULT_CPU_LIMIT = process.env.VIBN_DEV_CPU_LIMIT ?? '1'; // 1 vCPU +const DEFAULT_MEM_LIMIT = process.env.VIBN_DEV_MEM_LIMIT ?? '1g'; // 1 GiB +const DEFAULT_DISK_LIMIT = process.env.VIBN_DEV_DISK_LIMIT ?? '10g'; // soft hint, not enforced by compose + +// ── Schema ─────────────────────────────────────────────────────────── + +let devContainersTableReady = false; +export async function ensureDevContainersTable(): Promise { + if (devContainersTableReady) return; + await query( + `CREATE TABLE IF NOT EXISTS fs_project_dev_containers ( + project_id TEXT PRIMARY KEY, + workspace TEXT NOT NULL, + service_uuid TEXT NOT NULL, + image TEXT NOT NULL, + state TEXT NOT NULL DEFAULT 'provisioning', + last_active_at TIMESTAMPTZ NOT NULL DEFAULT now(), + suspended_at TIMESTAMPTZ, + created_at TIMESTAMPTZ NOT NULL DEFAULT now() + ); + CREATE INDEX IF NOT EXISTS fs_project_dev_containers_ws_idx + ON fs_project_dev_containers (workspace); + CREATE INDEX IF NOT EXISTS fs_project_dev_containers_active_idx + ON fs_project_dev_containers (last_active_at);`, + [], + ); + devContainersTableReady = true; +} + +export interface DevContainerRow { + project_id: string; + workspace: string; + service_uuid: string; + image: string; + state: 'provisioning' | 'running' | 'suspended' | 'failed'; + last_active_at: Date; + suspended_at: Date | null; + created_at: Date; +} + +export async function getDevContainerRow(projectId: string): Promise { + await ensureDevContainersTable(); + return queryOne( + `SELECT * FROM fs_project_dev_containers WHERE project_id = $1 LIMIT 1`, + [projectId], + ); +} + +// ── Compose template ───────────────────────────────────────────────── + +/** + * Render the docker-compose.yml that backs a single vibn-dev service. + * + * Two named volumes are intentional: + * - workspace : everything in /workspace (the user's source tree). + * Persists across suspends. Backed up to Gitea every + * 5 min via the auto-push autosave loop (week 2). + * - cache : language-toolchain caches (mise, npm, pip, cargo). + * Persists across suspends; per-project (never shared). + * + * The container has NO Vibn-internal network access. We rely on the + * default Coolify-bridge network being isolated from the vibn-postgres + * / vibn-frontend bridge. (Network policy hardening lands in week 1 + * day 2 alongside the auto-push job.) + */ +function renderDevCompose(projectSlug: string): string { + return `services: + vibn-dev: + image: ${VIBN_DEV_IMAGE} + restart: unless-stopped + working_dir: /workspace + volumes: + - workspace:/workspace + - cache:/home/vibn/.cache + environment: + - VIBN_PROJECT_SLUG=${projectSlug} + - VIBN_DEV_CONTAINER=1 + deploy: + resources: + limits: + cpus: '${DEFAULT_CPU_LIMIT}' + memory: ${DEFAULT_MEM_LIMIT} +volumes: + workspace: + cache: +`; +} + +// ── Provisioning ───────────────────────────────────────────────────── + +export interface EnsureDevContainerOpts { + projectId: string; + projectSlug: string; + projectName?: string; + workspace: VibnWorkspace; + /** Skip the initial start (provision-only). Default: start it. */ + noStart?: boolean; +} + +export interface EnsureDevContainerResult { + serviceUuid: string; + state: DevContainerRow['state']; + created: boolean; +} + +/** + * Idempotently ensure a vibn-dev service exists for the given Vibn project. + * + * - Already provisioned → returns the row, optionally resumes if suspended. + * - Not provisioned → ensures the per-project Coolify Project exists, + * creates the docker-compose service, links the + * resource to the Vibn project, persists the row. + * + * Safe to call on every chat turn — first call is ~10s, subsequent + * calls are a single SELECT. + */ +export async function ensureDevContainer( + opts: EnsureDevContainerOpts, +): Promise { + await ensureDevContainersTable(); + + const existing = await getDevContainerRow(opts.projectId); + if (existing) { + if (existing.state === 'suspended' && !opts.noStart) { + await resumeDevContainer(opts.projectId); + return { serviceUuid: existing.service_uuid, state: 'running', created: false }; + } + return { serviceUuid: existing.service_uuid, state: existing.state, created: false }; + } + + // Need a Coolify project to land the service in. + let coolifyProjectUuid = await getProjectCoolifyUuid(opts.projectId, opts.workspace); + if (!coolifyProjectUuid) { + coolifyProjectUuid = await ensureProjectCoolifyProject( + opts.projectId, + opts.workspace, + { projectSlug: opts.projectSlug, projectName: opts.projectName }, + ); + } + if (!coolifyProjectUuid) { + throw new Error( + `Could not provision Coolify project for ${opts.projectId}; dev container creation aborted.`, + ); + } + + const created = await createDockerComposeApp({ + projectUuid: coolifyProjectUuid, + name: `vibn-dev-${opts.projectSlug}`, + description: `AI dev container for project ${opts.projectName ?? opts.projectSlug}`, + composeRaw: renderDevCompose(opts.projectSlug), + instantDeploy: !opts.noStart, + }); + + await query( + `INSERT INTO fs_project_dev_containers + (project_id, workspace, service_uuid, image, state) + VALUES ($1, $2, $3, $4, $5) + ON CONFLICT (project_id) DO UPDATE + SET service_uuid = EXCLUDED.service_uuid, + image = EXCLUDED.image, + state = EXCLUDED.state`, + [ + opts.projectId, + opts.workspace.slug, + created.uuid, + VIBN_DEV_IMAGE, + opts.noStart ? 'suspended' : 'provisioning', + ], + ); + + // Bookkeeping link so apps_list / projects_get see the dev container + // under the right Vibn project. + try { + await linkResourceToProject(opts.projectId, opts.workspace.slug, created.uuid, 'service'); + } catch { + // best-effort + } + + return { serviceUuid: created.uuid, state: 'provisioning', created: true }; +} + +// ── Lifecycle ──────────────────────────────────────────────────────── + +export async function suspendDevContainer(projectId: string): Promise { + const row = await getDevContainerRow(projectId); + if (!row) return; + if (row.state === 'suspended') return; + await stopService(row.service_uuid); + await query( + `UPDATE fs_project_dev_containers + SET state = 'suspended', suspended_at = now() + WHERE project_id = $1`, + [projectId], + ); +} + +export async function resumeDevContainer(projectId: string): Promise { + const row = await getDevContainerRow(projectId); + if (!row) throw new Error(`No dev container provisioned for ${projectId}`); + if (row.state === 'running') return; + await startService(row.service_uuid); + await query( + `UPDATE fs_project_dev_containers + SET state = 'running', suspended_at = NULL, last_active_at = now() + WHERE project_id = $1`, + [projectId], + ); +} + +async function touchActivity(projectId: string): Promise { + await query( + `UPDATE fs_project_dev_containers SET last_active_at = now() WHERE project_id = $1`, + [projectId], + ); +} + +// ── Exec primitive ─────────────────────────────────────────────────── + +export interface DevContainerExecOpts { + projectId: string; + command: string; + cwd?: string; // defaults to /workspace + timeoutMs?: number; + maxBytes?: number; + /** Override the user (default: vibn). Use 'root' only when needed. */ + user?: string; + /** Extra env vars (k=v lines prepended via `env` builtin). */ + env?: Record; +} + +/** + * Run a command inside the project's vibn-dev service. + * Resumes the container if suspended, then docker-exec's via the + * existing SSH primitive. Stdout/stderr/exit-code returned synchronously. + * + * The caller is responsible for verifying the projectId belongs to the + * workspace BEFORE calling this. We re-verify the container UUID via + * the exec primitive's own resolution (it queries `docker ps --filter + * name={uuid}`), so a mismatched projectId can't reach foreign containers. + */ +export async function execInDevContainer( + opts: DevContainerExecOpts, +): Promise { + if (!isCoolifySshConfigured()) { + throw new Error( + 'shell.exec requires SSH access to the Coolify host; configure COOLIFY_SSH_* envs.', + ); + } + const row = await getDevContainerRow(opts.projectId); + if (!row) { + throw new Error( + `No dev container for project ${opts.projectId}. Call ensureDevContainer() first.`, + ); + } + if (row.state === 'suspended') { + await resumeDevContainer(opts.projectId); + } + + const cwd = opts.cwd && opts.cwd.trim() ? opts.cwd.trim() : '/workspace'; + const envPrefix = opts.env + ? Object.entries(opts.env) + .map(([k, v]) => `${shellEscape(k)}=${shellEscape(v)}`) + .join(' ') + : ''; + const wrapped = envPrefix + ? `cd ${shellEscape(cwd)} && env ${envPrefix} ${opts.command}` + : `cd ${shellEscape(cwd)} && ${opts.command}`; + + const result = await execInCoolifyApp({ + appUuid: row.service_uuid, + service: 'vibn-dev', + command: wrapped, + user: opts.user ?? 'vibn', + timeoutMs: opts.timeoutMs, + maxBytes: opts.maxBytes, + }); + + await touchActivity(opts.projectId); + return result; +} + +function shellEscape(s: string): string { + return `'${s.replace(/'/g, `'\\''`)}'`; +} + +// ── Health ─────────────────────────────────────────────────────────── + +/** + * Quick liveness check used by chat startup to decide whether to show + * a "spinning up your environment…" banner. + */ +export async function getDevContainerStatus(projectId: string): Promise<{ + exists: boolean; + state: DevContainerRow['state'] | 'absent'; + serviceUuid: string | null; +}> { + const row = await getDevContainerRow(projectId); + if (!row) return { exists: false, state: 'absent', serviceUuid: null }; + // Optional: poke Coolify for fresh state. Skipped for now to keep this + // hot path cheap; consumers that care can call getService(uuid) directly. + return { exists: true, state: row.state, serviceUuid: row.service_uuid }; +} + +// Re-export getService so route handlers can pull live Coolify status +// without taking a separate dependency on lib/coolify. +export { getService }; diff --git a/lib/feature-flags.ts b/lib/feature-flags.ts new file mode 100644 index 00000000..cbe37713 --- /dev/null +++ b/lib/feature-flags.ts @@ -0,0 +1,62 @@ +/** + * Runtime feature flags. Backed by a tiny single-row table so an admin + * can flip a flag and have every Vibn pod pick it up within seconds (no + * redeploy required). + * + * Currently used for: + * - path_b_disabled : kill switch for the Path B AI dev-container + * architecture. When true, shell.exec / fs.* / + * devcontainer.* tools return 503 and the chat + * system prompt falls back to Path A guidance. + * + * See AI_PATH_B_EXECUTION_PLAN.md §7 for the rollback story. + */ + +import { query, queryOne } from '@/lib/db-postgres'; + +let tableReady = false; +async function ensureFlagsTable(): Promise { + if (tableReady) return; + await query( + `CREATE TABLE IF NOT EXISTS fs_feature_flags ( + key TEXT PRIMARY KEY, + value JSONB NOT NULL, + updated_at TIMESTAMPTZ NOT NULL DEFAULT now() + );`, + [], + ); + tableReady = true; +} + +const TTL_MS = 10_000; +const cache = new Map(); + +export async function getFlag(key: string, defaultValue: T): Promise { + const cached = cache.get(key); + if (cached && cached.expires > Date.now()) return cached.value as T; + await ensureFlagsTable(); + const row = await queryOne<{ value: T }>( + `SELECT value FROM fs_feature_flags WHERE key = $1 LIMIT 1`, + [key], + ); + const value = row?.value ?? defaultValue; + cache.set(key, { value, expires: Date.now() + TTL_MS }); + return value; +} + +export async function setFlag(key: string, value: unknown): Promise { + await ensureFlagsTable(); + await query( + `INSERT INTO fs_feature_flags (key, value, updated_at) + VALUES ($1, $2::jsonb, now()) + ON CONFLICT (key) DO UPDATE + SET value = EXCLUDED.value, + updated_at = now()`, + [key, JSON.stringify(value)], + ); + cache.delete(key); +} + +export async function isPathBDisabled(): Promise { + return Boolean(await getFlag('path_b_disabled', false)); +}