From 4d404967398da89ba54cb3e31d9b6c993b6fe48a Mon Sep 17 00:00:00 2001 From: mawkone Date: Sat, 6 Jun 2026 17:53:13 -0700 Subject: [PATCH] feat: complete live-verified GTM onboarding flow & places autocomplete search proxies --- VIBNCODE_THIN_CLIENT_CHANGES.md | 412 + VIBN_HANDOFF_TICKETS.md | 249 + VIBN_PRODUCT_BLUEPRINT.md | 433 + .../dist/agent-session-runner.d.ts | 12 +- .../dist/agent-session-runner.js | 509 +- vibn-agent-runner/dist/llm/gemini-chat.js | 17 + vibn-agent-runner/dist/prompts/coder.js | 34 +- vibn-agent-runner/dist/server.js | 283 +- .../dist/test-execute-hardening.d.ts | 1 + .../dist/test-execute-hardening.js | 139 + vibn-agent-runner/package.json | 3 +- vibn-agent-runner/src/agent-session-runner.ts | 724 +- vibn-agent-runner/src/llm/gemini-chat.ts | 33 + vibn-agent-runner/src/prompts/coder.ts | 43 +- vibn-agent-runner/src/server.ts | 635 +- .../src/test-execute-hardening.ts | 164 + vibn-code | 2 +- .../onboarding/onboarding-agency-mock.ts | 596 + .../onboarding/onboarding-agency-types.ts | 123 + .../onboarding/onboarding-agency.tsx | 493 + .../onboarding/onboarding-fork.tsx | 177 +- .../onboarding/onboarding-owner.tsx | 1995 ++- .../onboarding/onboarding-primitives.tsx | 103 +- .../app/(onboarding)/onboarding/page.tsx | 612 +- vibn-frontend/app/api/agency/cities/route.ts | 99 + .../app/api/agency/places/search/route.ts | 540 + vibn-frontend/app/api/chat/debug/route.ts | 26 + vibn-frontend/app/api/chat/route.ts | 138 +- .../app/api/chat/threads/[id]/route.ts | 43 +- vibn-frontend/app/api/chat/threads/route.ts | 50 +- vibn-frontend/app/api/mcp/browser.ts | 56 +- vibn-frontend/app/api/mcp/mcp-tools.test.ts | 58 + vibn-frontend/app/api/mcp/route.ts | 481 +- vibn-frontend/app/api/onboarding/route.ts | 131 + .../agent/sessions/[sessionId]/route.ts | 24 +- .../agent/sessions/[sessionId]/stop/route.ts | 26 +- .../[projectId]/agent/sessions/route.ts | 119 +- .../api/projects/[projectId]/stream/route.ts | 1 + vibn-frontend/app/api/workspaces/route.ts | 73 +- vibn-frontend/app/globals.css | 11 + .../components/vibn-chat/chat-panel.tsx | 82 +- vibn-frontend/lib/ai/gemini-chat.ts | 69 +- vibn-frontend/lib/ai/vibn-tools.ts | 3 +- vibn-frontend/package.json | 4 +- vibn-frontend/pnpm-lock.yaml | 14741 ++++++++++++++++ 45 files changed, 23333 insertions(+), 1234 deletions(-) create mode 100644 VIBNCODE_THIN_CLIENT_CHANGES.md create mode 100644 VIBN_HANDOFF_TICKETS.md create mode 100644 VIBN_PRODUCT_BLUEPRINT.md create mode 100644 vibn-agent-runner/dist/test-execute-hardening.d.ts create mode 100644 vibn-agent-runner/dist/test-execute-hardening.js create mode 100644 vibn-agent-runner/src/test-execute-hardening.ts create mode 100644 vibn-frontend/app/(onboarding)/onboarding/onboarding-agency-mock.ts create mode 100644 vibn-frontend/app/(onboarding)/onboarding/onboarding-agency-types.ts create mode 100644 vibn-frontend/app/(onboarding)/onboarding/onboarding-agency.tsx create mode 100644 vibn-frontend/app/api/agency/cities/route.ts create mode 100644 vibn-frontend/app/api/agency/places/search/route.ts create mode 100644 vibn-frontend/app/api/chat/debug/route.ts create mode 100644 vibn-frontend/app/api/mcp/mcp-tools.test.ts create mode 100644 vibn-frontend/app/api/onboarding/route.ts create mode 100644 vibn-frontend/pnpm-lock.yaml diff --git a/VIBNCODE_THIN_CLIENT_CHANGES.md b/VIBNCODE_THIN_CLIENT_CHANGES.md new file mode 100644 index 00000000..cd9e7abf --- /dev/null +++ b/VIBNCODE_THIN_CLIENT_CHANGES.md @@ -0,0 +1,412 @@ +# VibnCode — Thin-Client Conversion: Major Change List + +> **Audience:** an implementation agent (a cheaper model). Follow this **top to bottom**. Each change has +> exact files, exact steps, and **Acceptance Criteria (AC)**. Do not start a later change until the earlier +> change's AC pass. Tick `[x]` when done. +> +> **This is the single source of truth for the thin-client conversion.** The original product vision lives in +> `VIBNCODE_PLAN.md`; infra/deploy details live in `VIBNDEV.md`; new-thread bootstrap context lives in +> `ai-new-thread.md`. + +--- + +## STATUS (last updated 2026-06-02) + +**Thin-Client Conversion is fully completed and verified!** The desktop application has been successfully transformed into a pristine, lightweight Cloud-IDE Shell with **zero local compute** and native multi-user task isolation. + +Completed & Shipped: +- ✅ **CHANGE 1** (cascade-delete / non-blocking local SQLite) — desktop, live. +- ✅ **CHANGE 1.5a** (empty `appPath` → `"."`) — desktop, live. +- ✅ **CHANGE 1.5b** (Cloud Hardening & Failure Surfacing) — runner `/agent/execute` is fully hardened (defaults empty `appPath` to `"."`), and the frontend API intercepts HTTP response errors, securely updating status to `failed` using process-injected authentication keys. Surfaced immediately to the desktop UI instead of spinning! +- ✅ **CHANGE 1.6** (runner `vibnApiUrl`/`mcpToken` wiring so agent tools reach `/api/mcp`) — committed and deployed. +- ✅ **CHANGE 2** (remove hardcoded API keys & SSO deep-link) — fully integrated. Custom `vibncode://auth/callback` handles tokens and authenticates natively. +- ✅ **CHANGE 3 & 8.3** (Cloud-backed Chat History & Hydration) — loaded and hydrated directly from PostgreSQL `/api/chat/threads/[id]`. +- ✅ **CHANGE 4** (VibnAI single-model Gemini 3.5 Flash restriction) — client locked to main model keys. +- ✅ **CHANGE 5** (Zero local compute teardown / dead code cleanup) — the client-side `AgentRegistry` has been stubbed with lightweight, static in-memory registries. **All 18+ obsolete local agent compilation/execution files have been permanently deleted from the codebase**, compiling completely clean with `0 errors`! +- ✅ **CHANGE 6** (Cloud-Backed Terminal) — keyboard commands in the terminal window execute cleanly inside your remote container via `/api/workspaces/[slug]/apps/[uuid]/exec`, completely bypassing your Mac's local system. +- ✅ **CHANGE 7 & 8** (Streaming Interactive `/api/chat` Brain) — routed standard chats directly through Next.js's interactive, high-performance SSE stream. +- ✅ **CHANGE 8.5** (Minimalist, Icon-Only Sidebar Redesign) — shrunk the navigation panel down to just **5 focused icons** (Projects, Workspace, Plan, Infrastructure, Settings), completely removing all obsolete pages. Re-ordered sidebar, applied custom semantic icons, and added a warm "Tasks Board Coming Soon" canvas. +- ✅ **CHANGE 8.6 & Chat Auto-scroll** — bypassed local title generation. Programmed the chat viewport to auto-scroll and lock to the bottom on user-submits and stream-completion. +- ✅ **CLOUD ISOLATION (Git Worktree Pool)** — implemented dynamic, sub-second workspace isolation inside the runner using native Git Worktrees (`/workspaces/tasks/[sessionId]`), enabling flawless parallel chats without file locks or push collisions. +- ✅ **AUTO-CORRECTING COMPILE LOOP (Ralph Loop)** — integrated automatic `npm run build` compilation checks inside the runner on completion, capturing stderr logs and re-prompting the AI to self-correct and heal its own bugs. +- ✅ **MONOREPO PREVIEW DROPDOWN** — added an always-on dropdown in the Wildcard Browser address bar allowing you to hot-swap between multiple running dev server ports (or the base domain) in real-time. + +--- + +## 0. The one-paragraph goal (read this first) + +`vibn-code` is a **fork of `talkcody`**, a local-first desktop IDE. We are converting it into a **thin-client +IDE shell** for the VibnAI cloud. The desktop should provide the *look and feel* of an IDE (Monaco editor, +file tree, chat, tabs, settings, long-term memory UI) but do **zero local compute**: no local builds, no local +code execution, no local git, no local file storage as the source of truth. **The cloud is the single source of +truth** (`vibn-frontend` Next.js API + Postgres on Coolify + `vibn-agent-runner` + Gitea). Anything that +compiles, executes, indexes, or persists state must happen in the cloud or be removed. + +**You may delete or disable anything in `/Users/markhenderson/master-ai/vibn-code`.** It is a fork; there is no +need to preserve talkcody's local-first machinery. + +--- + +## 1. Context the agent needs + +### 1.1 Repo map & git remotes (these are SEPARATE Gitea repos, not one monorepo) + +| Folder | Purpose | Push remote | +|---|---|---| +| `vibn-code/` | Tauri desktop client (React 19 + Monaco + Rust) — **what you edit** | `origin` → `git.vibnai.com/mark/vibn-code.git` | +| `vibn-frontend/` | Next.js web app + cloud API + Postgres (the "server") | `coolify_gitea` → `git.vibnai.com/mark/vibn-frontend.git` | +| `vibn-agent-runner/` | Cloud agent execution engine (Docker) | `coolify_agent_gitea` → `git.vibnai.com/mark/vibn-agent-runner.git` | + +Commit inside each folder and push to its matching remote. + +### 1.2 How chat works *today* (verified in code) + +1. User types → `chat-box.tsx` → `executionService.startExecution()` (`src/services/execution-service.ts`). +2. `startExecution` sends **only the task text** to the cloud: `POST /api/projects/{projectId}/agent/sessions` + with body `{ appName, appPath, task }`. It **ignores** the local `model`, `systemPrompt`, `tools`, and history. +3. The cloud (`vibn-agent-runner`) runs the agent with **its own** model (Gemini, set by `VIBN_CHAT_PROVIDER` / + `VIBN_CHAT_MODEL` env on the runner) and streams output rows into the Postgres `agent_sessions` table. +4. The desktop **polls** `GET /api/projects/{projectId}/agent/sessions/{sessionId}` every ~1.5s and appends new + output lines into the in-memory chat store, which renders in Monaco. + +So the chat answer is produced **100% in the cloud**. The desktop's model picker is currently only a label. + +### 1.3 The bug that breaks chat (root cause) + +The fork kept talkcody's **local SQLite** database. Chat is still written to SQLite tables with **foreign keys**: + +- `src/services/database/turso-schema.ts` → `createChatTables()`: + - `conversations.project_id` → **FK** `projects(id)` (line ~54) + - `messages.conversation_id` → **FK** `conversations(id)` (line ~69) + +Because your real projects live in **cloud Postgres** (UUIDs like `be169fe8-…`), not in local SQLite, inserting a +conversation/message fails: + +``` +SQLite failure: `FOREIGN KEY constraint failed` +INSERT INTO messages (... conversation_id ...) VALUES ("…","qcb2wQkduW","assistant",…) +``` + +There is a workaround in `database-service.ts` (`getProjects`/`getProject`) that copies cloud projects into local +SQLite "so foreign key constraints pass" — but it only runs when `useAuthStore.isAuthenticated === true`. Cloud +calls succeed via a hardcoded API key, but `auth-store` doesn't know the user is "logged in", so the mirror is +skipped, the project never lands in SQLite, and the FK fails. **This is the split-brain we are removing.** + +**Precise root cause found while debugging (FIXED — see CHANGE 1):** the mirror used `INSERT OR REPLACE INTO +projects`. In SQLite, `INSERT OR REPLACE` *deletes* the existing row before inserting, and because +`conversations.project_id` has `ON DELETE CASCADE`, replacing a project row **cascade-deletes all of that +project's conversations** — wiping the in-flight chat. That's why the *user* message saved but the *assistant* +message (inserted moments later, after a project refresh) failed the `conversation_id` foreign key. The fix was +to switch the mirror to an UPSERT (`ON CONFLICT(id) DO UPDATE`) so the project row is updated in place and never +deleted. + +### 1.4 Guardrails (apply to every change) + +- **Do not break the desktop UI** (Monaco, chat panel, file tree, tabs, settings, theme). +- **Local Mac uses `pnpm` / `node`, NOT `bun`.** Build desktop: `cd vibn-code && pnpm dev:tauri`. Web-only: `pnpm dev`. +- **Rust clippy warnings = build errors.** If you touch `src-tauri`, fix clippy or annotate `#[allow(dead_code)]`. +- If a commit is blocked by a cargo file lock while the app runs, commit with `--no-verify`. +- After editing TypeScript, run the editor diagnostics / `pnpm tsc --noEmit` (or `pnpm build`) to confirm no type errors. +- **Never put secrets in source.** (See Change 2.) + +--- + +## CHANGE 1 — Unblock chat: stop the cascade-delete + make persistence non-blocking ✅ DONE + +**Goal:** A failed/again local SQLite write must NEVER break chat or wipe the on-screen conversation. The chat UI +is already driven by the in-memory Zustand store + cloud polling; SQLite is only a side-cache. + +### 1.1 Stop the cascade-delete (root cause) — DONE +- File: `src/services/database-service.ts`, in **both** `getProjects()` and `getProject()`. +- Changed `INSERT OR REPLACE INTO projects (...)` → an UPSERT: + `INSERT INTO projects (...) VALUES (...) ON CONFLICT(id) DO UPDATE SET name=excluded.name, ...`. +- Why: `INSERT OR REPLACE` deleted the project row first, and `conversations.project_id ON DELETE CASCADE` + then deleted the project's conversations, breaking the next message insert. UPSERT updates in place, so + conversations survive. +- **AC:** Refreshing projects no longer deletes conversations; assistant message inserts no longer hit + `FOREIGN KEY constraint failed` for an existing conversation. ✅ + +### 1.2 Make task persistence best-effort — DONE +- File: `src/services/task-service.ts`, `createTask()`. The `catch` no longer calls `removeTask(taskId)` or + rethrows; it logs a warning and keeps the in-memory task so the chat proceeds even if the local DB write fails. +- `src/services/message-service.ts` already swallows DB errors (`addUserMessage` try/catch, `createAssistantMessage` + fire-and-forget) and keeps the in-memory messages — left as-is. +- **AC:** Even if a project isn't cached locally (so inserts FK-fail and are caught), sending a message still shows + your message + a streaming assistant bubble + cloud output. Only warnings are logged. ✅ (verify in-app) + +### 1.3 Verify end-to-end (needs a human to run the app) +- `cd vibn-code && pnpm dev:tauri`, open a cloud project, send "hello". Expect: your message shows, then the cloud + agent's streamed reply renders in Monaco, with **no** `FOREIGN KEY constraint failed` in the logs. +- NOTE: these are TypeScript-only changes (Vite will hot-reload / a normal app relaunch picks them up). No Rust + recompile required for CHANGE 1. + +> OPTIONAL hardening (not required now): also remove the FK clauses entirely in +> `src/services/database/turso-schema.ts` (`createChatTables`) and add a table-rebuild migration in +> `turso-database-init.ts`. Skipped for now because the UPSERT fix removes the actual failure without a risky +> schema migration. + +--- + +## CHANGE 1.5 — Fix the silent agent-execute rejection (empty `appPath`) ✅ DONE (desktop) + ☁️ recommended cloud hardening + +**This was the real reason chat produced no output.** Diagnosis (confirmed against the live cloud): +- The runner (`agents.vibnai.com`) is up and reachable from the frontend. +- BUT the runner's `POST /agent/execute` validation is `if (!sessionId || !projectId || !appPath || !task) return 400`. +- The desktop sent **`appPath: ""`** (empty string). `!""` is `true`, so the runner returned **HTTP 400 and did nothing** — no clone, no agent, no logs, no output. +- The frontend's call to the runner is fire-and-forget; a `400` is a *resolved* response (not a network error), so its `.catch` never ran and the session was **never marked failed**. Result: the desktop polled a `running` session with empty `output` forever. +- Proven live: `POST /agent/execute` with `appPath:""` → `400`; with `appPath:"."` → `202 running`. + +### 1.5a Desktop fix — DONE +- File: `src/services/execution-service.ts`. Changed the session-create body from `appPath: ""` to `appPath: "."` + (repo root). No cloud redeploy needed — the runner already accepts `"."`. +- **AC:** Sending a chat now reaches the runner (`202`), so the Coder agent starts and streams output back. + +### 1.5b Cloud hardening (recommended; needs redeploy) — TODO +1. **Runner should accept an empty `appPath`** (treat it as repo root) instead of 400ing: + - File: `vibn-agent-runner/src/server.ts`, `/agent/execute`. Change the guard from `!appPath` to + `appPath === undefined || appPath === null` (empty string = repo root is valid). Redeploy the runner. +2. **Surface early failures** so they're never silent again: + - File: `vibn-agent-runner/src/server.ts`. The emergency failure `PATCH`es (buildContext failed, agent not + registered, crash) omit the `x-agent-runner-secret` header, so if `AGENT_RUNNER_SECRET` is set they get + `403` and the session is never marked `failed`. Add the header to those `fetch(... PATCH ...)` calls. + - File: `vibn-frontend/app/api/projects/[projectId]/agent/sessions/route.ts`. After the fire-and-forget + `fetch(.../agent/execute)`, also check `!res.ok` and mark the session `failed` with the runner's response + body, so a non-2xx from the runner surfaces to the desktop instead of spinning forever. +- **AC:** A bad/edge request shows a clear error in the desktop chat within seconds instead of an infinite spinner. + +### 1.5c Fix the `/stop` 401 — TODO (needs redeploy) +- File: `vibn-frontend/app/api/projects/[projectId]/agent/sessions/[sessionId]/stop/route.ts`. It authenticates + with `authSession()` (browser/NextAuth only), so the desktop's `vibn_sk_` API key gets **401** on cancel. The + sibling routes (create/get) use `requireWorkspacePrincipal`. Switch `/stop` to `requireWorkspacePrincipal` too. +- **AC:** Cancelling a run from the desktop returns `200` and the session is marked `stopped`. + +> NOTE on model: the runner's actual model is set by `GEMINI_MODEL` env and currently runs +> **`gemini-3.1-pro-preview`** (seen in the runner startup log), NOT the desktop's "Gemini 3.5 Flash" label. +> Until CHANGE 4.1 (model passthrough) is done, set `GEMINI_MODEL` on the runner to whatever you want chat to use. + +--- + +## CHANGE 1.6 — Fix agent tools `fetch failed` (runner used localhost + no token) ✅ CODE DONE / ☁️ needs runner redeploy + +**Symptom:** chat works, but the agent's tools (`projects_list`, `workspace_describe`, `apps_list`, …) return +`Failed to execute tool ... via MCP: fetch failed`. + +**Root cause:** every tool forwards to `${ctx.vibnApiUrl}/api/mcp` with `Bearer ${ctx.mcpToken}` +(`vibn-agent-runner/src/tools/mcp-client.ts`). But `buildContext()` in `vibn-agent-runner/src/server.ts` +hardcoded `vibnApiUrl: 'http://localhost:3000'` and `mcpToken: ''`. So the runner fetched *itself* on a dead +port (→ `fetch failed`), and had no auth token. The frontend already passes the correct `mcpToken` in the +`/agent/execute` body, but the runner never read it. + +**Fix (done in `vibn-agent-runner/src/server.ts`):** +- `buildContext()` default `vibnApiUrl` → `process.env.VIBN_API_URL ?? 'https://vibnai.com'`. +- `/agent/execute` now destructures `mcpToken` from the body and sets `ctx.vibnApiUrl`, `ctx.mcpToken`, + `ctx.projectId` from the authoritative values before running the agent. + +**Deploy required (runner):** build → commit → push to `coolify_agent_gitea` → redeploy on Coolify (the runner +runs from compiled `dist/`). After redeploy, re-test: tools should reach `/api/mcp`. If a tool then returns an +HTTP error (not `fetch failed`), that means the `/api/mcp` action name isn't supported — a separate follow-up +(verify the frontend `/api/mcp` supports `projects.list`, `workspace.describe`, `apps.list`, etc.). + +> The desktop `src/components/chat/**` does NOT need changes for this — it only renders tool results the runner +> streams back. Tool execution and tool wiring are entirely server-side (runner + frontend `/api/mcp`). + +--- + +## CHANGE 2 — Auth: remove the hardcoded key & make sign-in real 🔒 HIGH PRIORITY + +**Goal:** No secrets in source; the app authenticates the user and `auth-store.isAuthenticated` reflects reality. + +### 2.1 Remove the hardcoded API key +- File: `src/services/api-client.ts` (~lines 32–35). Delete the block that sets + `token = "vibn_sk_QaUF..."` when no token is found. If `requireAuth` is true and there is no token, throw the existing auth-required error. +- **AC:** `grep -rn "vibn_sk_" vibn-code/src` returns nothing. App compiles. + +### 2.2 Make `isAuthenticated` true after a successful connect +- Files: `src/stores/auth-store.ts`, `src/services/auth-service.ts`, `src/services/secure-storage.ts`. +- When a valid workspace token (`vibn_sk_…`) is stored, set `auth-store.isAuthenticated = true`. The project-mirror and cloud branches in `database-service.ts` depend on this. +- **AC:** After connecting, `useAuthStore.getState().isAuthenticated === true`, and `GET /api/projects` returns the user's projects. + +### 2.3 "Connect Workspace" flow (SSO deep link) +- The `vibncode://` URL scheme is registered (`src-tauri/Info.plist`). There is a login dialog/step already: `src/components/vibncode-free-login-dialog.tsx` and `src/components/onboarding/steps/login-step.tsx`. +- Wire it so: user clicks Connect → browser opens vibnai.com sign-in/API-key page → token returns via `vibncode://auth/callback?token=…` → stored with `secureStorage.setAuthToken(token)` → `auth-store.isAuthenticated = true`. Confirm the Rust deep-link handler in `src-tauri` forwards the URL to the frontend. +- **AC:** Fresh install (no token) → Connect → sign in → token stored → projects load. 401 from the API signs the user out and shows the Connect card again (no crash, no infinite spinner). + +--- + +## CHANGE 3 — Make the cloud the source of truth for chat 🧠 MEDIUM PRIORITY + +**Goal:** Chat history comes from the cloud, so it's identical on any machine and survives reinstalls. Local +SQLite becomes an optional, non-authoritative cache (or is removed for chat entirely). + +### 3.1 Load history from the cloud +- The cloud already stores sessions: `GET /api/projects/{projectId}/agent/sessions` (list) and + `GET /api/projects/{projectId}/agent/sessions/{sessionId}` (detail with `output[]`). +- On opening a project, populate the task list and message history from these endpoints instead of from SQLite + `getTasks`/`getMessages`. Map a cloud `agent_session` → a task; map its `output[]` rows → assistant messages, + and `task` → the user message. +- Files: `src/services/task-service.ts` (`loadTasks`, `loadMessages`), `src/services/database-service.ts` + (`getTasks`, `getMessages`). Add cloud-backed implementations; keep the function signatures the same so the UI + doesn't change. +- **AC:** Sign in on a second machine (or clear local SQLite) → previous chat sessions for the project appear. + +### 3.2 Demote or remove local SQLite for chat +- Once 3.1 works, the SQLite writes in `message-service.ts` / `task-service.ts` are redundant. Either: + - (preferred) make them a write-through cache that is never read as the source of truth, or + - delete the chat-related SQLite reads/writes entirely and remove the now-dead code paths. +- Keep SQLite only for genuinely local prefs if needed (e.g. `settings`, `recent_files`). Do NOT keep it for `conversations`/`messages` as a source of truth. +- **AC:** Deleting the local SQLite file and restarting loses **no** chat history (it reloads from cloud). + +--- + +## CHANGE 4 — Single model = VibnAI Gemini 3.5 Flash 🤖 MOSTLY DONE + +**Status:** The senior agent already (a) filtered the desktop model list to the `vibncode` provider +(`src/providers/stores/provider-store.ts`, `restrictToVibnai`), (b) relabeled the VibnAI model to +**Gemini 3.5 Flash** (`packages/shared/src/data/models-config.json`), and (c) pointed default model types at it +(`src/types/model-types.ts`, `src/providers/config/model-constants.ts`). + +> The model JSON is embedded into Rust via `include_str!`, so a **`pnpm dev:tauri` recompile** is required for the +> backend to pick up Gemini 3.5 Flash. + +### 4.1 Remaining: make the desktop model choice actually drive the cloud (model passthrough) +- Today the cloud uses the runner's env model regardless of the desktop pick. To make the picker authoritative: + 1. `src/services/execution-service.ts`: include `model` in the `POST /agent/sessions` body. + 2. `vibn-frontend/app/api/projects/[projectId]/agent/sessions/route.ts`: accept `model`, store it on the session, and forward it to the runner in the `/agent/execute` payload. + 3. `vibn-agent-runner/src/agent-session-runner.ts` + `src/llm/vibn-chat-model.ts`: use the passed model instead of only `VIBN_CHAT_PROVIDER`/`VIBN_CHAT_MODEL` env. +- **AC:** Selecting Gemini 3.5 Flash in the desktop results in the runner using Gemini 3.5 Flash (verify in runner logs). (Until this is done, the runner env must be set to Gemini 3.5 Flash so behavior matches the label.) + +--- + +## CHANGE 5 — Zero local compute teardown 🧹 MEDIUM PRIORITY + +**Goal:** Remove or redirect every local-compute surface inherited from talkcody. Disposition table: + +| File(s) | What it does locally | Action | +|---|---|---| +| `src/services/bash-executor.ts`, `src/services/terminal-service.ts` | Runs shell on the Mac | Redirect to cloud (see Change 6) or disable | +| `src/services/repository-service.ts` | Has a **local Tauri FS fallback** for read/write/tree | Remove the local fallback; cloud FS only (`cloud-fs-service.ts`). On cloud failure, show a "disconnected" error, never read local disk | +| `src/services/fast-directory-tree-service.ts` | Scans local disk for the tree | Disable; the tree must come from cloud `fs_tree` | +| `src/services/git-service.ts`, `src/services/worktree-service.ts` | Local git + worktrees | Disable; the cloud runner owns git | +| `src/services/project-indexer.ts`, `src/services/code-navigation-service.ts` | Local code indexing | Disable (or move to cloud later) | +| `src/services/tools/custom-tool-compiler.ts`, `custom-tool-bun-runner.ts` | Compiles/runs custom tools locally (needs Bun) | Disable or redirect to cloud | + +- For each: remove the local execution path. Where a feature can't yet go to the cloud, make it a no-op that + surfaces a clear "runs in the cloud" message rather than silently executing locally. +- **AC:** `grep` for `@tauri-apps/plugin-fs`, `@tauri-apps/plugin-shell`, and local `invoke(` calls in the services above shows they are removed or gated behind an explicitly-disabled flag. The app never writes to or executes on the local disk during normal chat/file use. + +--- + +## CHANGE 6 — Cloud-backed terminal 💻 MEDIUM PRIORITY + +**Goal:** Keep the terminal UI (part of the IDE feel) but execute every command **inside the cloud container**. + +- Backend endpoint already exists: `vibn-frontend/app/api/workspaces/[slug]/apps/[uuid]/exec/route.ts`. +- File: `src/services/terminal-service.ts`. Replace local shell execution with calls to that exec endpoint for the + active project's container. Stream stdout/stderr back to the terminal UI. +- **AC:** Running `ls` / `pwd` / `node -v` in the desktop terminal returns results from the **cloud container**, not the Mac. Nothing executes on the Mac. + +--- + +## CHANGE 7 — Replace polling with SSE (optional polish) 🔌 LOW PRIORITY + +**Goal:** Lower-latency streaming. The backend already exposes an SSE endpoint: +`GET /api/projects/{projectId}/agent/sessions/{sessionId}/events/stream`. + +- File: `src/services/execution-service.ts`. Replace the `while (isRunning)` 1.5s poll loop with an SSE connection + (read the streamed body and parse `data:` lines). Keep the `AbortController` cancel path (call `.../stop` on abort). + Keep polling as a fallback if SSE errors/closes while status is still `running`. +- Also fix: the `.../stop` call currently returns **401** on cancel — confirm the stop route accepts the same auth as + create/get (`vibn-frontend/app/api/projects/[projectId]/agent/sessions/[sessionId]/stop/route.ts`). +- **AC:** Chat streams token-by-token with no visible 1.5s steps; cancel stops the cloud session with a 200. + +--- + +## CHANGE 8 — Route desktop chat to the frontend `/api/chat` (interactive brain); retire the local agent loop ⭐ HIGH PRIORITY (the interactivity fix) + +**Why:** The desktop currently sends every message to the headless runner (`/agent/sessions` → `/agent/execute`), whose `coder` prompt is explicitly non-interactive ("running headlessly… do NOT ask questions"). The **interactive** agent already exists in the frontend: `POST /api/chat` → `buildSystemPrompt()` in `vibn-frontend/app/api/chat/route.ts`, with `vibe`/`collaborate`/`delegate` modes and a "respond first, act second" policy (greetings/questions get a text reply; only imperatives run tools). Pointing the desktop at `/api/chat` gives one brain for web + desktop, keeps all compute server-side, and lets us delete the inherited local agent loop. + +> Decision (chosen): **frontend owns the brain.** The desktop's local agents / Plan Mode / Ralph loop / local background-tasks become dead code and should be removed (see 8.5). Keep all *rendering* + shell UI (Monaco, file tree, chat message components, Plan tab). + +### `/api/chat` contract (verified in code) +- **Auth:** currently `authSession()` (browser cookies) on BOTH `/api/chat` and `/api/chat/threads`. **They reject the desktop's `vibn_sk_` key with 401** (same bug class as the `/stop` route). Must be fixed first — see 8.1. +- **Threads:** `POST /api/chat/threads` (optionally `{ projectId, workspace }`) → `{ id }`. `GET /api/chat/threads?projectId=…` lists them. Tables `fs_chat_threads` / `fs_chat_messages` (history persisted server-side). +- **Chat:** `POST /api/chat` body: + ```ts + { thread_id: string; message: string; workspace: string; + mcp_token?: string; chatMode?: "vibe" | "collaborate" | "delegate"; attachedFiles?: string[] } + ``` + Response is **SSE** (`text/event-stream`). Event shapes: `data: {"type":"text","text":"…"}`, `data: {"type":"thinking","text":"…"}`, plus tool/done/error events. Tools run **server-side** (in the dev container); the desktop only renders them. + +### 8.1 Backend: accept the workspace API key on the chat routes (PREREQUISITE) +- Files: `vibn-frontend/app/api/chat/route.ts` and `vibn-frontend/app/api/chat/threads/route.ts` (and `threads/[id]/route.ts`). +- Replace `authSession()`-only auth with `requireWorkspacePrincipal(req)` (falling back to browser session), exactly like the agent/sessions routes. Resolve the user email from `principal.userId` via `fs_users`. +- **AC:** `POST /api/chat` and `POST /api/chat/threads` with a `Bearer vibn_sk_…` key return 200, not 401. (Deploy frontend.) + +### 8.2 Desktop: a streaming chat client +- File: `vibn-code/src/services/api-client.ts` — add a `stream(endpoint, body)` helper that POSTs and yields parsed SSE `data:` events (reuse the Tauri fetch streaming in `src/lib/tauri-fetch.ts`). +- **AC:** can consume an SSE response line-by-line and surface `{type,text}` events. + +### 8.3 Desktop: thread management +- On new conversation, call `POST /api/chat/threads { projectId, workspace }` and store the returned `thread_id` on the task (map desktop task ↔ cloud thread). Resolve `workspace` from the active project (project detail includes it; see `preview-page.tsx` which reads `project.workspace`). +- **AC:** each desktop conversation has a backing `fs_chat_threads` row; reopening shows persisted history (`GET /api/chat/threads` + messages). + +### 8.4 Desktop: send chat through `/api/chat` instead of the runner +- File: `vibn-code/src/services/execution-service.ts` (or a new `chat-service.ts`). For normal chat, `POST /api/chat { thread_id, message, workspace, chatMode }` and stream events into the existing UI via `messageService.updateStreamingContent` (text), reasoning (thinking), and tool messages (tool events) — the same store the poller fed. +- Keep the `AbortController` cancel path (close the stream on Stop). +- **Keep the runner path ONLY for `chatMode === "delegate"`** (long autonomous jobs) — that still uses `/agent/sessions` (already working). +- **AC:** sending "hi" gets a conversational text reply (no tool spiral); an imperative ("add a button") runs tools server-side and streams tool pills + result; Stop closes the stream cleanly. + +### 8.5 Desktop: mode selector + retire the local brain +- Add a small **vibe / collaborate / delegate** selector in the chat input (replace the now-defunct agent dropdown); persist as a setting (e.g. `chat_mode`). `collaborate` = the interactive PRD/plan interview; `vibe` = build; `delegate` = hand to runner. +- Mark for removal (now dead once 8.4 lands): the local agent loop and execution brain — `src/services/agents/llm-service.ts`, `tool-executor.ts`, `tool-dependency-analyzer.ts`, `ralph-loop-service.ts`, `*-hook-service.ts`, the per-agent files in `src/services/agents/*-agent.ts`, the local `plan-mode-store` execution path, and local `background-task-store` / `components/background-tasks`. **Keep** the chat *rendering* components in `src/components/chat/**`, the Plan tab (`pages/plan-page.tsx`), settings, Monaco, and file tree. +- Do the removal incrementally and behind the working `/api/chat` path — don't delete until 8.4's AC pass. +- **AC:** chat works end-to-end via `/api/chat`; removed modules are no longer imported (no dead-import build errors); app still builds (`pnpm dev:tauri`). + +### 8.6 Title generation (cleanup) +- The local title service calls `https://api.vibncode.com/…` (dead host) and always fails. Either point it at the real endpoint or generate the title from the first `/api/chat` exchange. **AC:** new conversations get a real title, no `api.vibncode.com` errors in logs. + +--- + +## Verification & Release (run after each change group) + +1. `cd vibn-code && pnpm dev:tauri` launches with no console errors. +2. End-to-end: Connect → open project → file tree from cloud → edit+save a file (persists) → send a chat message + (streams cloud reply, no FK errors) → terminal runs in cloud. +3. `cd vibn-code && pnpm test` — fix regressions you introduced. +4. Commit & push each repo to its correct remote (see §1.1). Redeploy `vibn-frontend` / `vibn-agent-runner` per `VIBNDEV.md` if you changed them. + +--- + +## Priority order (do in this sequence) + +1. ~~**CHANGE 1 / 1.5 / 1.6**~~ — done (chat reaches the runner; tools wired). +2. **CHANGE 8** — route chat to `/api/chat` + mode selector + retire local brain. **This is the main work now** and it delivers interactivity. Subsumes/reframes: + - **CHANGE 7 (SSE)** — absorbed: `/api/chat` is already SSE. + - **CHANGE 3 (cloud source of truth for chat)** — largely absorbed: `/api/chat` persists threads/messages server-side (`fs_chat_threads`/`fs_chat_messages`). + - **CHANGE 4.1 (model passthrough)** — reframed: with `/api/chat`, the model is chosen server-side; expose a model/mode selector that the frontend honors instead of passing a model to the runner. +3. **CHANGE 2** (remove the hardcoded `vibn_sk_` key + real Connect Workspace) — still required for shipping. +4. **CHANGE 5** (local-compute teardown) — now includes deleting the local agent brain made dead by CHANGE 8. +5. **CHANGE 6** (cloud terminal). +6. **CHANGE 1.5b** (runner failure surfacing) — only matters for the `delegate` path; do when convenient. + +--- + +## Quick reference — key files + +| Concern | File | +|---|---| +| HTTP + auth | `src/services/api-client.ts` | +| Auth state | `src/stores/auth-store.ts`, `src/services/auth-service.ts`, `src/services/secure-storage.ts` | +| Chat send flow | `src/components/chat-box.tsx` | +| Cloud agent run/stream | `src/services/execution-service.ts` | +| Messages (local) | `src/services/message-service.ts` | +| Tasks (local) | `src/services/task-service.ts` | +| Local DB service | `src/services/database-service.ts`, `src/services/database/task-service.ts` | +| **SQLite schema + FKs** | `src/services/database/turso-schema.ts`, `turso-database-init.ts` | +| Cloud FS | `src/services/cloud-fs-service.ts`, `src/services/repository-service.ts` | +| Model list/picker | `src/providers/stores/provider-store.ts`, `src/components/chat/model-selector-button.tsx` | +| Model config (embedded in Rust) | `packages/shared/src/data/models-config.json` | +| Model defaults/constants | `src/types/model-types.ts`, `src/providers/config/model-constants.ts` | +| Backend sessions API | `vibn-frontend/app/api/projects/[projectId]/agent/sessions/**` | +| Cloud runner model | `vibn-agent-runner/src/agent-session-runner.ts`, `src/llm/vibn-chat-model.ts` | +| Cloud terminal exec | `vibn-frontend/app/api/workspaces/[slug]/apps/[uuid]/exec/route.ts` | diff --git a/VIBN_HANDOFF_TICKETS.md b/VIBN_HANDOFF_TICKETS.md new file mode 100644 index 00000000..f7cb3117 --- /dev/null +++ b/VIBN_HANDOFF_TICKETS.md @@ -0,0 +1,249 @@ +# Vibn — Backend Handoff Tickets (for Sonnet) + +> Companion to `VIBN_PRODUCT_BLUEPRINT.md`. These are the **backend / plumbing** +> tasks to hand to a cheaper model (latest Sonnet, standard reasoning; use high +> reasoning only on T2/T3/T6, which Opus should also review). +> +> Rule of the seam: the frontend is already built against typed contracts + +> mock data. **Implement endpoints to the exact shapes; do not change the +> contract types or the frontend components.** When an endpoint is live, swap the +> mock call for a fetch — that's the only frontend edit allowed. +> +> Disjoint write scope: these tickets touch `app/api/**`, `lib/**`, the agent +> runner, and the prompt files — NOT the onboarding `.tsx` UI (except the one +> documented mock→fetch swap in T11). + +--- + +## Milestone 0 — Foundation (do first; nothing is safe until these land) + +### T1 — One task ledger: markdown everywhere · ⚠️ Opus review +**Problem:** three prompts disagree on task tracking (route.ts says `plan_*` are +retired; the agent-runner `coder.ts` says call `plan_task_complete`; the session +runner toggles markdown). This causes the "loops on task 1" bug. +**Do:** make `.vibncode/specs/*.md` markdown checkboxes (`- [ ]` / `- [x]`) the +single source of truth in all three. Retire the DB `plan_*` tools (or make them +thin markdown writers). Ensure `.vibncode/` is committed and never removed by +`git clean -fd`. +**Files:** `vibn-frontend/app/api/chat/route.ts`, `vibn-agent-runner/src/prompts/coder.ts`, `vibn-agent-runner/src/agent-session-runner.ts`. +**Accept:** a delegated run that completes a task flips the markdown checkbox AND +the desktop Interactive Backlog reflects it; no prompt references `plan_*`. + +### T2 — Extract BASE + MODE prompt modules · ⚠️ Opus review +**Do:** factor the shared prompt into `BASE` (identity, voice, spine/task-ledger +contract, infra model, hard rules, untrusted-content rule, project state) + +three `MODE` deltas (Collab / Build / Grow). Both `route.ts` and the agent-runner +import the same modules. See blueprint §5.3. +**Accept:** one source of truth for BASE/MODE; route + runner import it; Architect +("Collab") no longer contains the code/deploy body. + +### T3 — MODE_TOOLS map + enforced gating · ⚠️ Opus review +**Do:** one `MODE_TOOLS: Record<"collab"|"build"|"grow", ToolName[]>` (blueprint +§5.2). Filter exposed tool schemas per mode in the prompt builder AND reject +out-of-mode calls in the dispatcher. Apply in route + runner. +**Accept:** Collab cannot call `ship`/`shell_exec`/`apps_create` (not in schema + +dispatcher rejects); `market_research_run` only callable in Collab; tool count +per turn drops to ~20–30. + +### T4 — Phantom-tool + template-literal fixes (mechanical) +**Do:** in `route.ts`: `apps_envs_set`→`apps_envs_upsert`; `apps_containers_list`→ +`apps_containers_ps`; remove `plan_decision_log` (doesn't exist); un-escape the +`\${activeProject.slug}` at ~L306 so it interpolates. +**Accept:** every tool name in prose exists in the registry; no literal +`${activeProject.slug}` in the compiled prompt. + +### T5 — De-contaminate hardcoded specs +**Do:** the 10-file spec manifest in `route.ts` (~L346–357, COPPA/Missinglettr/ +Dracula) ships to every user. Derive it from the active project, or replace with a +generic "read whatever exists in `.vibncode/specs/`." +**Accept:** a fresh project's prompt contains no GetAcquired-specific spec names. + +### T6 — Metering ledger foundation · ⚠️ Opus review +**Do:** a per-event usage ledger `{ workspaceId, clientId, projectId, costType, +quantity, rawCost, ts }` (blueprint §8.2). Emit an event from every cost-incurring +tool (AI tokens, deploys, domains, market research, media, Missinglettr). Build on +`lib/quotas.ts`. +**Accept:** every billable action writes a ledger row tagged by project; a query +can total raw cost per client per period. (Invoicing UI is T16 — not now.) + +--- + +## Milestone 1 — Onboarding + dashboard endpoints (implement to the contracts) + +> Contracts: `vibn-frontend/app/(onboarding)/onboarding/onboarding-agency-types.ts`. +> Mock to replace: `…/onboarding-agency-mock.ts`. +> Flow reminder: onboarding ends at **ideal customer → dashboard**; the targeting +> recommendation lives in the **dashboard** (T8), not onboarding. Steps are: +> Identity (T7b) → Presence → Ideal Customer (T7) → POST (T9) → Dashboard (T10). + +### T7 — POST /api/agency/analyze-expertise `{ text }` +**Returns:** `{ tools: string[] }`. **Do:** an LLM call that maps the consultant's +free-text ideal customer / problem description to canonical tool-category +labels that match `smb_to_software_mapping` (so they join in T8). The FE mock is +`extractTools()` (keyword stub) — replace with the LLM, keep the output shape. +**Accept:** "I want to help dentists automate booking" -> `["Appointment Scheduling +Software"]` (or better); labels exist in the mapping. Matches them with potential customers in their area, and drives brand awareness. + +### T7b — GET /api/agency/cities?q= (city autocomplete) +**Returns:** `CityRef[]` (global). **Do:** proxy **Places API (New) Autocomplete** +(`places:autocomplete`, restrict to localities/cities) for predictions, then +**Place Details (New)** to resolve each into `CityRef` (name = locality, +region = admin area level 1 short name, country + `countryCode` = ISO alpha-2, +lat/lng = location). Key stays server-side. The frontend `CityLookup` already +calls this and falls back to the seed list when absent. +**Accept:** typing "Vic" returns Victoria BC *and* global matches (not a fixed list). + +### T7c — POST /api/agency/places/search `{ name: string, city: CityRef }` +**Returns:** `PlaceMatch[]` (top 3 matching businesses). +**Do (Three-Stage AI Category Resolver):** +1. **Stage 1: Google Places Lookup (Physical Context):** + - Query **Places API (New) Text Search** using `GOOGLE_PLACES_API_KEY` with `${name} ${city.name} ${city.region}` to find matching business entities. + - Extract: `id`, `displayName`, `formattedAddress`, `primaryType` (e.g., `"health"`), and `types`. +2. **Stage 2: DataForSEO Website Lookup (Digital Context):** + - If the business has a website, query **DataForSEO's OnPage/Database APIs** (or scrape the URL, falls back to raw query if offline) to retrieve the website's meta-title, description, and domain tags. +3. **Stage 3: AI Assessment (The Reasoning Bridge):** + - Feed both Stage 1 (Google Places categories/types) and Stage 2 (website title, description, domain tags) into a **Gemini LLM call** (`gemini-3.5-flash`). + - **Prompt:** + ``` + Google Places details: + - Name: {{displayName}} + - Primary Type: {{primaryType}} + - Types: {{types}} + + Website Details: + - Title: {{scrapedTitle}} + - Description: {{scrapedDescription}} + + Based on the above, select the single most relevant category ID (gcid) for this business from our canonical mapping list. Return only the raw GCID string (e.g., "gcid:dental_hygienist" or "gcid:plumber"). + ``` + - Map the returned GCID to one of our 5 baseline categories: `"service"` (trades/FSM), `"appointments"` (scheduling), `"food"` (dining), `"retail"` (pos/shop), or `"events"`. + - Retrieve the matched GCID's exact `"softwareNeeds"` list from `smb_to_software_mapping_final.json` and return it in `presetTools`. +**Accept:** looking up "Wheely Clean" (which Google labels `"health"`) correctly maps to `"gcid:dental_hygienist"` via AI assessment (by reading their teeth whitening website title), loading their actual dental scheduling, billing, and EHR/EMR custom blocks on Step 2! Shows loading spinner during execution. + +### T8 — POST /api/agency/targets `{ city: CityRef, tools: string[] }` (powers the dashboard) +**Returns:** `TerritoryOpportunity[]`, sorted by `opportunityScore` desc. +**Do:** intersect `tools` with each SMB type's `softwareNeeds` +(`smb_to_software_mapping_final.json`) to pick candidate niches; for each, get +**business counts via the Places Aggregate API** (`:computeInsights`, +`INSIGHT_COUNT`) filtered by the niche's Places `type` (mapped from `gcid`) +within the city's area (circle around `city.lat/lng`). `opportunityScore` = +demand × weak/no-software gap × low Vibn saturation, biased by tool-fit. Treat +"Reporting / Dashboard Software" as a **universal** need. Mirror `mockTargets`; +set `matchedTools` per result. +**Accept:** real per-city counts for any city worldwide; `vibnClaimedCount` from +our DB; honest numbers only (no fabricated scarcity — blueprint honesty guardrail). + +### T9 — POST /api/agency `AgencyOnboardingResult` `{ profile, expertise, tools }` +**Returns:** `{ workspaceSlug }`. +**Do (DB Storage Spec):** +1. **Workspace row:** Insert a new row into `fs_workspaces`. + - `name` = `profile.name` + - `slug` = derived slug from `profile.name` (idempotently deduplicated) + - Store all metadata inside a structured `agency_onboarding` JSONB field in `fs_workspaces.data` (or related column): + ```json + { + "city": { + "id": "victoria-bc", + "name": "Victoria", + "region": "BC", + "country": "Canada", + "countryCode": "CA", + "lat": 48.4284, + "lng": -123.3656 + }, + "hasWebsite": true, + "websiteUrl": "yoursite.com", + "hasSocials": true, + "hasBlog": false, + "hasCustomDomain": false, + "hasExistingClients": false, + "expertise": "I want to help dentists automate booking", + "tools": ["Appointment Scheduling Software"] + } + ``` +2. **Workspace member row:** Link the signed-in NextAuth user (`userId`) as the `'owner'` of this workspace in `fs_workspace_members`. +3. **Provisioning:** Trigger the standard workspace provision pipeline (Gitea org, Coolify project boundary via `lib/workspaces.ts`) asynchronously so the tenant stands up. +**Accept:** round-trips the posted result; a new row is created in `fs_workspaces` and `fs_workspace_members`; metadata is saved perfectly in JSONB; and the workspace slug is returned. +*(No pitch, no claimed territory — those were removed from onboarding.)* + +### T10 — The dashboard (the screen they land on) · ⚠️ Opus may build +**Do:** the agency dashboard at `/[workspace]` (light paper/ink theme). On load, +call T7 (analyze-expertise, or use stored `tools`) + T8 (targets for their city) +and render the **recommended local businesses to target** (the gold-rush list +with businesses / weak-software / claimed + matched-tools chips). Plus clients/ +prospects, projects, retainer MRR. Claiming a target creates a prospect. +**Accept:** lands from onboarding seeded with their ideal-customer description; shows real +recommendations; claim → prospect. *(FE craft — likely an Opus task; reuses +`extractTools` + `mockTargets` until T7/T8 are live.)* + +### T11 — Wire onboarding + dashboard to the endpoints +**Do:** `CityLookup` already calls `GET /api/agency/cities` (T7b) with a seed +fallback — just stand up the route. Implement `finishAgency` in `page.tsx` to +POST T9 and route to `/[workspace]`. In the dashboard, swap `extractTools`/ +`mockTargets` for T7/T8. No styling changes to onboarding. +**Accept:** the flow runs end-to-end on real data; onboarding behavior unchanged. + +### T12 — Preserve homepage intent through auth +**Do:** if the homepage hero captures input, persist it across Google OAuth +(localStorage/draft, like `vibn:firstName`) so onboarding resumes seeded. +**Accept:** typing on the homepage → sign in → onboarding has the value. + +--- + +## Milestone 2 — Design-first delivery (the custom tool) + +### T13 — Ingest the 4 design-kit families +**Do:** register `vibn-ai-templates`, `vibn-app`, `vibn-crm`, `vibn-marketplace` +(in `design-templates/VIBN (2)/`) into the design-kit registry: one kit per family, +themes as overrides; add `DESIGN.md` + `tokens.css` (+ `SKILL.md`) per the existing +`lib/scaffold/open-design/design-systems//` structure. +**Accept:** `get_design_template` returns each; they appear on the Design tab. + +### T14 — Build recipe: scaffold-from-kit first +**Do:** rewrite the Build mode recipe so building a client's **custom tool** +starts from a kit (fork into the client repo) + token reskin, instead of +`create-next-app`. The tool is scoped from the consultant's expertise + the +client's `softwareNeeds`; SMB domain → family; client brand → accent. +**Accept:** a build starts from a polished themed template, not an empty Next app. + +### T17 — Onboarding hardening note (low priority) +**Do:** `page.tsx` has pre-existing unused imports (`useState`/`useEffect`/ +`useMemo` on line 3) flagged as warnings — not from the agency work. Clean up if +touching the file. +**Accept:** no behavior change. + +--- + +## Milestone 3+ — Grow & billing (later) + +### T15 — `missinglettr_*` tool wrapper +**Do:** wrap the Missinglettr API (`workspaces.create`, `posts.create`, analytics). +Grow mode only. **Accept:** can schedule a multi-platform post; metered (T6). + +### T16 — Stripe retainers + invoicing +**Do:** on the metering ledger, roll up cost → apply pricing (retainer / cost-plus / +fixed) → Stripe one-off invoice + recurring subscription for retainers. +**Accept:** an agency can invoice a client and start a monthly retainer. + +### T18 — Google Business Profile (GMB) OAuth & Token storage +**Do:** add `https://www.googleapis.com/auth/business.manage` to the NextAuth Google Provider config. +Upon user sign-in, save the authorized OAuth `access_token` and `refresh_token` in `fs_users.data`. +On the backend, write a helper to list GMB locations for the authorized user and support posting Google Local Business posts. +This is the core engine for automated GBP posting and review management in Grow mode. +**Accept:** signing in with Google requests the GMB permission; tokens are securely saved in `fs_users.data` and are queryable by the server. + +### T19 — DataForSEO OnPage API Website Auditor +**Do:** implement a backend helper to post and retrieve data from **DataForSEO's OnPage API** (`/v3/on_page/task_post` -> `/v3/on_page/summary`). +Extract domain-wide metrics: `domain_info.cms` (to auto-detect what builder they are renting), `domain_info.ssl_info`, `page_metrics.broken_links`, and favicon availability. +**⚠️ Hard constraint:** DataForSEO's OnPage crawler strictly requires the target URL to include the protocol (e.g., must be `"https://allardcontractorsltd.com"` or `"http://..."`, NOT a bare domain). Ensure the server-side payload prepends `"https://"` automatically when creating the crawler task. +Expose this audit dataset to the dashboard so consultants can auto-generate SEO health audits for their prospects. +**Accept:** posting a scan request triggers the DataForSEO crawl; returns unified CMS, SSL, and link metrics; tags them to the client's project row. + +--- + +## Notes for the implementer +- Don't touch the onboarding `.tsx` files except T11's documented swap. +- Keep `onboarding-agency-types.ts` as the contract; if a shape must change, change + it there and flag it (the UI depends on it). +- Honesty guardrail (T8/T9): never show fabricated market/scarcity numbers. +- Flag T1/T2/T3/T6 for an Opus review pass before merge. diff --git a/VIBN_PRODUCT_BLUEPRINT.md b/VIBN_PRODUCT_BLUEPRINT.md new file mode 100644 index 00000000..92d9c830 --- /dev/null +++ b/VIBN_PRODUCT_BLUEPRINT.md @@ -0,0 +1,433 @@ +# Vibn — Product Blueprint & Go-to-Market Architecture + +> Status: Draft v3 · Owner: Mark · Last updated: 2026-06-04 +> v3 sharpens the wedge to **custom tools** for local businesses (not +> websites/marketing), makes onboarding **expertise-first → dashboard** (no pitch +> generator), and moves the targeting/"gold rush" recommendation into the +> dashboard. v2 (services+margins+pitch onboarding) is superseded; v1 (founder/ +> build-first) before it. +> +> Implemented so far (FE, against mocks + typed contracts): the contractor +> onboarding flow — `app/(onboarding)/onboarding/onboarding-agency*.{tsx,ts}` + +> the front-door fork in `page.tsx`. Backend + dashboard pending (handoff doc). + +--- + +## 1. Positioning + +**Vibn is the operating system for a new breed of local-business consultant — it +helps them find local SMBs, build them the *custom tools* they actually need +(without writing code), and bill for it profitably — so those businesses stop +paying for a stack of expensive SaaS apps that don't talk to each other.** + +The wedge is **custom tools, not websites/marketing.** Every local business is +overpaying for generic SaaS that half-fits; the consultant builds one tool that +fits their workflow exactly. + +Think **Harvest for AI vibe coding**: the place a consultant runs the whole +client business — find, build, host, invoice. + +Two audiences, one engine — but a clear hero: +- **PRIMARY · The new consultant / freelancer / small studio.** Often not a deep + engineer (a marketer, designer, or hustler starting a "websites + marketing for + local business" practice). Vibn is their unfair advantage. **They are the buyer.** +- **SECONDARY · The SMB owner doing it themselves.** Same engine, no markup. Served + by self-serve, not chased. +- **De-emphasized:** startup founders. That lane (Lovable, v0, Bolt, Replit) is a + bloodbath and is *not* where our infrastructure points. + +### Why this wedge (the infra already leans here) +- `market_categories_suggest` returns **Google Business Profile** categories — a + *local business* construct. +- `market_research_run` pulls local **business leads, TAM, competitors** (DataForSEO). +- Missinglettr lists **Google Business** among its 12 platforms — local social + GBP. +- The "owner" persona pitch is *"replace the stack of tools you rent"* — SMB ops. +- The Cadence CRM template = contacts + scheduling; domains + Stripe = every SMB. + +None of this was built for startups. It was built for **local SMBs and the people +who serve them.** This is a focus, not a pivot. + +--- + +## 2. The two front doors + +Opposite motivations require opposite openings: + +| Door | Who | First question they're asking | Opening | +|---|---|---|---| +| **"Personal"** | SMB owner / self-builder | *"Can I see my thing built?"* | **Build-first** — straight to a live themed preview (§6) | +| **"Agency"** (HERO) | New consultant | *"Can I do this? What do I build? Who needs it? How do I get a client?"* | **Contractor-first** — set up the agency, state your expertise, land in a dashboard of local targets (§4) | + +The homepage leads with the consultant promise and routes self-builders to the +simpler path — it does **not** treat them as equals. + +> Reversal from v1: "get to a live preview ASAP" is right for a self-builder and +> *wrong* for a consultant. A consultant is evaluating a **business**, not a build +> tool. Lead with the contractor parts; building happens later, per client. + +--- + +## 3. The lifecycle = one client engagement + +For the consultant, the product lifecycle is the shape of **a single client +engagement**, run once per SMB client: + +``` +DISCOVER → BUILD → REFINE → GROW +the pitch deliver iterate the retainer +(win it) the site with client (recurring $) +``` + +It's driven by two orthogonal axes (kept distinct in code — today they're conflated): + +| Axis | Controls | Decided | +|---|---|---| +| **Engagement stage** | The *path* (research/pitch vs build vs grow) | Per client, by where the deal is | +| **SMB domain** (trades, salon, dental, food, fitness…) | The *look* — template family + theme | Inferred from the business type | + +A consultant runs many clients at different stages simultaneously. + +--- + +## 4. Contractor-first onboarding — "Set up your AI agency" + +Onboarding is short and has one job: learn **who the consultant is** and **what +they love building**. The moment we know their sweet spot, we drop them into +their **dashboard** — where the local-business recommendations live as an +ongoing feature (not a one-shot screen). No pitch generator, no terminal +targeting screen in onboarding. *(Implemented: `app/(onboarding)/onboarding/onboarding-agency.tsx`.)* + +```mermaid +flowchart TD + A["1 · Your agency
name · city (Places lookup)"] --> B["2 · Your presence
what does your agency have today? (checklist)"] + B --> C["3 · Your ideal customer (free text)
'who / what problem do you want to solve?'"] + C --> D["Open my dashboard →"] + D --> E["DASHBOARD
AI reads description → recommends local businesses to target"] +``` + +- **Agency** option details: "I want to do billable AI work for others" (VIBN helps you find local businesses that you can build custom solutions for). +- **Personal** option details: "I want to build my own ideas" (Go from idea to market, and beyond). + +### Step contents +1. **Your agency** — name, **city** (global Places-API lookup, §6.x). +2. **Your presence** — "What does your agency have today?" Checklist of assets + (Website, social media accounts, blog, custom domain, existing billing). + Light profiling to customize their dashboard experience. +3. **Your ideal customer** *(the heart of it)* — a **free-text** box: *"Is there a + certain type of business or business problem you are passionate about?"* + (e.g. "I want to help dentists automate patient booking"). If they are undecided, + clicking **"I'm not sure right now"** bypasses the step with a neutral default + ("help any local business automate workflows"). This replaces the old examples list. + Vibn will help them match with potential customers in their area, and drive awareness of their brand. +4. **→ Dashboard.** CTA "Open my dashboard →" finishes onboarding with + `{ profile, expertise, tools }` (AI-extracted tool categories, where `expertise` + holds the ideal-customer string). + +### The Local Business Category Lookup & Mapping Pipeline (onboarding) +This is the core "Business Identity & Needs" pipeline run on Step 1 of the self-builder flow, designed to bypass Google's messy administrative category labels: +1. **Step 1: City Geocoding & Radius Setup:** + - The user selects their city in Step 1. The frontend retrieves their structured `CityRef` (holding `lat`/`lng` coordinates from Google Places) and sets a default radial search geofence of **50km**. +2. **Step 2: Geofenced DataForSEO Business Search:** + - The backend takes the business name (or URL) and queries the **DataForSEO Business Listings Search API** (`/v3/business_data/business_listings/search/live`) using geofenced `"location_coordinate"` radial search: + `"location_coordinate": "{{lat}},{{lng}},50"` + - This bypasses Google's strict SAB restrictions, pulling down the full business records (including mobile businesses with hidden addresses like "Wheely Clean Mobile Dental"). + - The server extracts GMB's main `"category"` and `"additional_categories"` arrays. + - It joins them to our `smb_to_software_mapping_final.json` dataset (the 4,006-item database) to fetch their exact, customized software tool requirements. +3. **Step 3: Unpacked Category Card Selection ("Which best describes your business?"):** + - The frontend receives the matched business, and automatically unpacks **both its primary category and all discovered GMB alternative categories** into individual clickable cards. + - The screen displays: **"Which best describes your business?"** + - Selecting any card (e.g. *Dental hygienist* or *Teeth whitening service*) instantly loads that specific subcategory's custom presets and advances to Step 2! + - *Fallback:* If DataForSEO or geocoding fails (e.g. offline dev), it gracefully triggers Google Places Text Search (New) + Gemini 2.5 Flash as an automated fallback reasoning bridge. + +### The targeting engine (lives in the dashboard) + +### The dashboard (the home screen they land on) +- **Recommended targets** — the AI's local-business recommendations (above), + refreshable; claim one to start a client/prospect. +- **Clients / prospects** — each SMB; status (prospect → won → live → retainer). +- **Projects** — per client (a custom tool build + hosting/support retainer). +- **Revenue & margin** — what each client costs me vs. what I bill; retainer MRR + (illustrative until metering lands, then live). +- Building a client's tool is entered **from a client**, not the dashboard root. +- *(To build next — reuses `extractTools` + `mockTargets`; light paper/ink theme.)* + +--- + +## 5. Modes — capability surfaces with enforced tool gating + +Three modes (capability surfaces, not vibes). **Refine is not a mode** — it's Build +against an already-live project. + +| Mode | Engagement role | Stop condition | +|---|---|---| +| **Collab** | Discover — research + pitch + spec | PRD + decisions + backlog (the spine) | +| **Build** | Deliver the site + refine | A clickable preview / shipped `fqdn` | +| **Grow** | The retainer — distribute + monitor | Scheduled content + live analytics | + +### 5.1 Tool gating is enforced, not described +Today "DO NOT WRITE CODE" is a prompt *request* while `fs_write`/`ship`/`shell_exec` +stay in the tool list → the constraint is soft and the prompt re-teaches the +forbidden workflow. Fix: one `MODE_TOOLS: Record` map, read by: +1. **Prompt builder** — filters exposed tool schemas per mode (model can't see what + it can't call; also cuts ~88 schemas → ~20–30 = token/latency win). +2. **Dispatcher** — rejects out-of-mode calls (guards hallucinated names). +Applied in **both** `vibn-frontend/app/api/chat/route.ts` and the agent-runner. + +### 5.2 Allowlist sketch +| Capability | Collab | Build | Grow | +|---|---|---|---| +| Reads (`projects_get`, `apps_*` reads, `get_design_template`) | ✅ | ✅ | ✅ | +| Research (`market_*` 💲, `github_*`, `http_fetch`) | ✅ | ❌ | ✅ (seo/insights) | +| Spine docs — `fs_*` **scoped to `.vibncode/specs/`** | ✅ | ✅ (+ repo) | ✅ (blog/SEO) | +| Design kit / `apps_templates_scaffold` | propose | ✅ full | theme marketing | +| Engineering (`shell_exec`, `dev_server_*`, `apps_create`, `ship`, `databases_*`) | ❌ | ✅ | `apps_create { repo }` only | +| Distribution (`missinglettr_*`, `generate_media`) | ❌ | ❌ | ✅ | +| Destructive (`*_delete`, `apps_volumes_wipe`) | ❌ | ⚠️ confirm | ❌ | + +Gating gives each guardrail a home: **money gate** in Collab, **destructive-confirm** +in Build, **untrusted-content rule** in BASE (Collab + Grow read the open web). + +### 5.3 Prompt composition +``` +BASE identity · voice · spine/task-ledger contract · infra model · hard rules · + untrusted-content rule · project + client/agency state + + +MODE { Collab | Build | Grow } — behavior + stop condition + protocols + + +CONTEXT design kit · decisions/backlog · stage seed · SMB-domain template guidance +``` +BASE + MODE must be **shared modules imported by both** the chat route and the +agent-runner (today there are three drifted copies — root of the "loops on task 1" +bug). + +### 5.4 Visibility +Modes **auto-select** by stage + project state. The toggle remains a power-user +override. Nobody picks a mode manually. + +--- + +## 6. Build-first door & design-first delivery + +The build flow is no longer the front door — but it's still how work gets +*delivered* (and how a self-builder enters). It must be **design-first, not +code-first.** + +- **Stop** scaffolding from `create-next-app` + hand-building UI (slow, generic, + the source of visual-QA loops). +- **Start** from a polished, CSS-variable-themed template family, reskin to the + SMB's brand via the design-kit token system, then populate content. + +### 6.1 Template families (assets in `design-templates/VIBN (2)/`) +| Family | Use | SMB domain | +|---|---|---| +| `vibn-ai-templates` | Shared base library (components + 4 themes) | foundation | +| `vibn-app` | Marketing / landing / lead capture + payments | most local SMB sites | +| `vibn-crm` (Cadence) | Ops: contacts, scheduling, dashboard | trades, salons, clinics | +| `vibn-marketplace` (Atlas) | Listings / booking / two-sided | directories, multi-vendor | + +Themes (`minimal` / `dark` / `glass` / `editorial`) + accent come from the SMB's +brand. **Demos must look visibly local-SMB** (a plumber, a salon, a dental office), +never a generic SaaS dashboard. + +### 6.2 Wired vs. to-build +- ✅ Design-kit registry, Design tab, token injection, `apps_templates_scaffold`, `get_design_template`. +- ❌ The four families ingested as registered kits (`DESIGN.md` + `tokens.css` + `SKILL.md`). +- ❌ Build recipe rewritten to "scaffold-from-kit first." +- ❌ Build entered per client from the console; SMB brand → kit selection. +- ❌ Real session streamed to a real themed preview (today: `setTimeout` animation + fake URL, answers discarded). + +### 6.3 Fork, don't depend +Fork the kit into the client's repo (the READMEs say "fork it") — self-contained, +fully editable. + +--- + +## 7. Grow — the retainer (Missinglettr) + +Grow is the consultant's **recurring revenue.** Once a client's site is live, the +consultant runs their marketing as a monthly retainer. + +- **Missinglettr API** = one API to post/schedule across 12 platforms (incl. Google + Business) with shortening, analytics, webhooks. The engine of Grow. +- **My Business Business Information API (GMB)** = used specifically via OAuth 2.0 to manage verified locations, publish Google Local Business posts, and retrieve and reply to reviews (reputation management). Combined with Missinglettr, it forms the core Grow suite. +- **DataForSEO OnPage API Website Auditor** = crawlers that fetch a client's website and return full on-page diagnostics: `cms` auto-detection (Wix, WordPress, Squarespace), SSL status, mobile responsiveness, and broken link counts. +- Capabilities: AI-generated social + blog + **local SEO pages**, styled to match + the client's design kit, scheduled via Missinglettr; reviews; analytics reporting. +- Existing blocks: `market_seo_analyze`, `generate_media`, `project_recent_errors` + (monitoring), and `vibn-attribution-package` (UTM → first-touch attribution). +- To build: `missinglettr_*` tool wrapper, content/SEO generation, analytics loop, + and a **client-facing monthly report** (the retainer's visible value). + +--- + +## 8. Agency / billing layer — "Harvest for AI vibe coding" (CORE, not a fast-follow) + +For the consultant ICP, **getting paid is half the value prop.** It cannot be an +afterthought. The most important primitive is the **monthly retainer** (the Grow +fee = the consultant's MRR), not just the one-off project invoice. + +### 8.1 Cost sources (all metered, tagged by client/project) +| Cost type | Source | +|---|---| +| AI usage (tokens) | `lib/ai/llm-client.ts` | +| Compute / infra (apps, dev containers, DBs) | Coolify / `lib/dev-container.ts` | +| Domains | `domains_register` / `lib/opensrs.ts` | +| Market research 💲 | `market_research_run` (DataForSEO) | +| Distribution | Missinglettr (usage / subscription) | +| Media | `generate_media` | + +### 8.2 Components +1. **Metering ledger** — `{ workspaceId, clientId, projectId, costType, quantity, + rawCost, ts }`. Seeded by `lib/quotas.ts` + telemetry. Every cost-incurring tool + emits an event. +2. **Client ↔ project mapping** — agency workspace holds many client projects. +3. **Pricing engine** — per client: **retainer** (recurring), cost-plus markup %, + or fixed project price. Roll up raw cost → apply pricing → billable. +4. **Invoicing & retainers** — Stripe (in stack): one-off invoices **and recurring + subscriptions** for retainers; optional client statement/portal. +5. **In-agent cost transparency** — AI surfaces estimated cost *before* spending + (the money-gate guardrail as a platform concept); every spend is attributable. + +### 8.3 Consequence +Accurate per-client cost accounting is needed regardless of when invoicing UI ships, +so **the metering ledger is launch-foundation work** — retrofitting attribution is +painful. Onboarding's "your margins" can be *illustrative* until metering is live, +provided the numbers are honestly labeled. + +--- + +## 9. The spine — single source of truth + +**`.vibncode/specs/*.md` (markdown) is the law.** Retire DB `plan_*` tools (or make +them thin markdown writers). The desktop Interactive Backlog already reads markdown; +the session runner already toggles checkboxes; it's git-tracked; it's the artifact +the user sees and edits. +- Task state = `- [ ]` / `- [x]`; all brains obey this. +- `.vibncode/` **must be committed and never removable by `git clean -fd`** (the + earlier unattended-loop bug). +- The **design kit travels in the spine** — styling source of truth from Build → Grow. +- The spine carries the engagement: Discover writes the spec/pitch, Build consumes + it + adds the kit, Grow reads product + kit to generate matching marketing. + +--- + +## 10. Hardening (from the prompt/tool audit) + +- **Phantom tools:** `apps_envs_set`→`apps_envs_upsert`; `apps_containers_list`→ + `apps_containers_ps`; `plan_decision_log` (Architect; doesn't exist) → remove. +- **Template-literal leak:** `route.ts` ~L306 escaped `\${activeProject.slug}` → + un-escape so it interpolates. +- **Task-tracking civil war:** route vs shared-body vs runner disagree → markdown + checkboxes everywhere (§9). +- **Hardcoded project specs:** route ships one project's private spec list + (COPPA / Missinglettr / Dracula) to every user → generalize/derive. +- **Architect contradiction:** solved by mode gating (§5) + composition (§5.3). +- **Sentry snippet path** unreachable from dev container → inline / ship in scaffold. +- **`request_visual_qa` "Always"** → "for UI work." +- **Infra clarity:** add infra model + first-deploy recipe (resolve `ship` "if + linked" + `ship` vs `apps_create { repo }`). + +--- + +## 11. Go-to-market sequencing + +| Milestone | Scope | Why | +|---|---|---| +| **0 · Foundation** | Spine = markdown everywhere; BASE+MODE shared modules; `MODE_TOOLS` gating; phantom/leak fixes; de-contaminate specs; **metering ledger** | Nothing safe until brains agree on the ledger; metering before any spend | +| **1 · Contractor onboarding (launch front door)** | "Start an AI agency" flow: opportunity → identity (Places city lookup) → presence → **free-text expertise** → dashboard. ✅ FE built against mocks | What the hero ICP evaluates first; short and contained | +| **2 · The dashboard + targeting** | Land them in the dashboard; AI extracts tools from expertise (`analyze-expertise`) → recommends local businesses to target (`targets` via Places Aggregate); claim → prospect | The "gold rush" payoff + ongoing home | +| **3 · Deliver (design-first build)** | Ingest 4 kits → kit-first build recipe → build the client's **custom tool** per client → real preview | Turns a claimed target into a delivered tool | +| **4 · Grow retainer + Missinglettr** | `missinglettr_*`, content/local-SEO, analytics loop, client monthly report | Recurring-revenue hook | +| **5 · Full billing** | Retainers + one-off invoicing (Stripe) + client statements on the metering ledger | Completes "bill for it" | + +**Launch line:** Milestones 0 + 1 + 2 (onboarding → dashboard with real +recommendations) + a slice of 3 (one custom tool built end-to-end). Grow/billing +shown as the model, delivered next. + +**Build status:** onboarding FE (steps 1–4) is built and compiles clean against +mock data + typed contracts. Backend endpoints + the dashboard are next (see +`VIBN_HANDOFF_TICKETS.md`). + +--- + +## 12. The journeys (consultant serving a local SMB) + +### Sofia — the new consultant (the hero path) +Sofia wants to start a side-business building custom tools for local businesses. +She signs up, selects the "Agency" option, names it, sets her city (a global Places lookup), +and declares what assets she has today. Then the +one question that matters: *"Is there a certain type of business or business problem you are passionate about?"* She types +"I want to help local dentists automate patient booking" and clicks **Open my dashboard**. She's +in. Her dashboard already shows local businesses that fit — salons, gyms, auto +shops, dentists — each flagged as stuck on disconnected SaaS and ripe for one +custom tool. She hasn't done any work and already has a target list. + +### Joe's Plumbing — the client gets a custom tool (Build) +Sofia claims "plumbers," picks Joe as a client, and builds him a custom +scheduling + invoicing + reporting tool — one app around his workflow, replacing +the three half-fitting SaaS subscriptions he was paying for. Design-first from a +kit, in his colors, live in a day instead of weeks. + +### The SMB owner — self-serve (secondary door) +An owner who finds Vibn directly selects the "Personal" option, enters their business name, city, and optional website, gets their business type auto-analyzed by the AI, and goes straight to building: +### The retainer — recurring revenue (Grow) +With Joe live, Sofia adds a marketing retainer. Vibn generates his Google Business +posts, a couple of local-SEO pages, and social content in his brand voice, schedules +it across platforms via Missinglettr, and produces a monthly report Sofia forwards to +Joe. Joe sees leads; Sofia bills a recurring fee. + +### Getting paid (Billing) +Every cost on Joe's projects — AI, hosting, his domain, the market research, the +social scheduling — is metered and attributed. Sofia applies her markup, and Vibn +turns the build into a one-off invoice and the marketing into a recurring Stripe +subscription. She's not just delivering work; she's running a business with real margin. + +### The SMB owner — self-serve (secondary door) +An owner who finds Vibn directly skips the agency setup and goes build-first: +"a booking tool for my salon" → a quick look confirm → watches it build → lands +in the chat refining it. Same engine, no markup, no dashboard. + +--- + +## 13. Open decisions +1. ~~**Front-door fork**~~ — **DECIDED:** explicit "Agency" vs "Personal" (neutral presentation, no pre-selection bias). +2. **Targeting spend** — the dashboard's recommendations need per-city counts + (Places Aggregate API) and may touch paid `market_research_run`. Use a + free/cached read for the first dashboard view; gate any paid run on consent. +3. **`MODE_TOOLS`** as the single source of truth for prompt + dispatcher. (Lean: yes.) +4. **Collab write scope** path-scoped to `.vibncode/`. (Lean: yes.) +5. **Kit ingestion** — one kit per family, themes as overrides. (Lean: yes.) +6. **Pricing model first** — retainer vs cost-plus vs fixed: which to ship first. + (Lean: retainer + simple markup.) +7. **Launch SMB vertical** — which single domain to polish end-to-end first + (trades? salon? dental?). +8. ~~**Core brand color**~~ — **DECIDED** (see §14). + +--- + +## 14. Design system (aligned — build everything to this) + +The canonical visual spec. All net-new frontend is built to it; the cheaper model +inherits it too. + +- **Foundation: warm paper-and-ink** (the differentiator). Product is light + (`--vibn-paper` bg, `--vibn-ink` text, the warm-neutral ramp); marketing may be + dramatically dark. +- **One brand color: matured clay-coral**, threaded through *both* marketing and + product, used **sparingly** (primary action, active state, brand mark, progress, + the single "next thing"). No second chromatic color. + - Tokens in `app/globals.css`: `--vibn-coral` `oklch(0.68 0.16 35)` (actions), + `--vibn-coral-hover`, `--vibn-coral-glow` (the brighter `0.74 0.175 35` for + glow/focus), `--vibn-coral-soft`, `--vibn-coral-fg`. + - Added **additively** — existing `--primary`/`--accent` not rewired blind; + new surfaces use the coral tokens directly and get visual QA. +- **Hard rule:** Vibn chrome accent (coral) ≠ a client app's accent. The product + chrome is coral; each *built client site* uses its own design-kit accent. +- **Two contexts:** onboarding wizard = dark + coral (existing primitives in + `app/(onboarding)/onboarding/onboarding-primitives.tsx`); product/console = + light paper/ink + coral. +- **Client builds** come from the 4 design-kit families (base + app/crm/ + marketplace), CSS-var themed (minimal/dark/glass/editorial), forked into the + client repo. Demos must look visibly local-SMB. +- **Type:** product = Inter (`--font-inter`); editorial moments may use a serif + display per the kit. diff --git a/vibn-agent-runner/dist/agent-session-runner.d.ts b/vibn-agent-runner/dist/agent-session-runner.d.ts index edbbe1cf..7c8cc6de 100644 --- a/vibn-agent-runner/dist/agent-session-runner.d.ts +++ b/vibn-agent-runner/dist/agent-session-runner.d.ts @@ -1,15 +1,9 @@ /** * agent-session-runner.ts * - * Streaming variant of runAgent wired to a VIBN agent_sessions row. - * After every LLM turn + tool call, it PATCHes the session in the VIBN DB - * so the frontend can poll (and later WebSocket) the live output. - * - * Key differences from runAgent: - * - Accepts an `emit` callback instead of updating job-store - * - Accepts an `isStopped` check so the frontend can cancel mid-run - * - Tracks which files were written/modified for the changed_files panel - * - Calls vibn-frontend's PATCH /api/projects/[id]/agent/sessions/[sid] + * Upgraded Cloud Agent Executor for VibnCode. + * Implements 4-level Smart Concurrency (parallel reads/lookups) and the + * Ralph Loop (autonomous self-correction) entirely inside your secure Cloud VM. */ import { AgentConfig } from "./agents"; import { ToolContext } from "./tools"; diff --git a/vibn-agent-runner/dist/agent-session-runner.js b/vibn-agent-runner/dist/agent-session-runner.js index 201682d1..86ff150b 100644 --- a/vibn-agent-runner/dist/agent-session-runner.js +++ b/vibn-agent-runner/dist/agent-session-runner.js @@ -2,15 +2,9 @@ /** * agent-session-runner.ts * - * Streaming variant of runAgent wired to a VIBN agent_sessions row. - * After every LLM turn + tool call, it PATCHes the session in the VIBN DB - * so the frontend can poll (and later WebSocket) the live output. - * - * Key differences from runAgent: - * - Accepts an `emit` callback instead of updating job-store - * - Accepts an `isStopped` check so the frontend can cancel mid-run - * - Tracks which files were written/modified for the changed_files panel - * - Calls vibn-frontend's PATCH /api/projects/[id]/agent/sessions/[sid] + * Upgraded Cloud Agent Executor for VibnCode. + * Implements 4-level Smart Concurrency (parallel reads/lookups) and the + * Ralph Loop (autonomous self-correction) entirely inside your secure Cloud VM. */ Object.defineProperty(exports, "__esModule", { value: true }); exports.runSessionAgent = runSessionAgent; @@ -18,7 +12,68 @@ const child_process_1 = require("child_process"); const vibn_chat_model_1 = require("./llm/vibn-chat-model"); const tools_1 = require("./tools"); const loader_1 = require("./prompts/loader"); -const MAX_TURNS = 60; +const MAX_TURNS = 80; +function runBuildVerification(repoRoot, appPath) { + const fs = require("fs"); + const path = require("path"); + const { execSync } = require("child_process"); + const absoluteAppPath = path.join(repoRoot, appPath); + // Find all directories containing package.json (excluding node_modules, .git, .next, .vibncode, dist) + const pkgDirs = []; + function scan(dir) { + try { + const files = fs.readdirSync(dir); + if (files.includes("package.json")) { + pkgDirs.push(dir); + } + for (const file of files) { + if (file === "node_modules" || + file === ".git" || + file === ".next" || + file === ".vibncode" || + file === "dist") { + continue; + } + const full = path.join(dir, file); + if (fs.statSync(full).isDirectory()) { + scan(full); + } + } + } + catch { } + } + scan(absoluteAppPath); + if (pkgDirs.length === 0) { + return { success: true }; // No package.json anywhere, skip build check + } + for (const dir of pkgDirs) { + const pkgJsonPath = path.join(dir, "package.json"); + try { + const pkg = JSON.parse(fs.readFileSync(pkgJsonPath, "utf8")); + // Skip if there's no build script or if it's the root container which is just a workspace wrapper + if (!pkg.scripts || !pkg.scripts.build || pkg.name === "workspace") { + continue; + } + console.log(`[Ralph Loop] Running automatic build verification: npm run build inside ${dir}...`); + execSync("npm run build", { + cwd: dir, + stdio: "pipe", + timeout: 60000, + }); + } + catch (err) { + const stderr = err.stderr + ? err.stderr.toString() + : err.message || String(err); + console.warn(`[Ralph Loop] Build verification failed inside ${dir}:`, stderr); + return { + success: false, + error: `Build failed in directory "${path.relative(repoRoot, dir)}":\n${stderr.slice(-3000)}`, + }; + } + } + return { success: true }; +} // ── VIBN DB bridge ──────────────────────────────────────────────────────────── async function patchSession(opts, payload) { const url = `${opts.vibnApiUrl}/api/projects/${opts.projectId}/agent/sessions/${opts.sessionId}`; @@ -33,7 +88,6 @@ async function patchSession(opts, payload) { }); } catch (err) { - // Log but don't crash — output will be lost for this line but loop continues console.warn("[session-runner] PATCH failed:", err instanceof Error ? err.message : err); } } @@ -45,6 +99,8 @@ const FILE_WRITE_TOOLS = new Set([ "write_file", "replace_in_file", "create_file", + "fs_write", + "fs_edit", ]); function extractChangedFile(toolName, args, workspaceRoot, appPath) { if (!FILE_WRITE_TOOLS.has(toolName)) @@ -56,7 +112,7 @@ function extractChangedFile(toolName, args, workspaceRoot, appPath) { const fullPrefix = `${workspaceRoot}/${appPath}/`; const appPrefix = `${appPath}/`; let displayPath = rawPath.replace(fullPrefix, "").replace(appPrefix, ""); - const fileStatus = toolName === "write_file" ? "added" : "modified"; + const fileStatus = toolName === "write_file" || toolName === "fs_write" ? "added" : "modified"; return { path: displayPath, status: fileStatus }; } // ── Auto-commit helper ──────────────────────────────────────────────────────── @@ -151,59 +207,209 @@ async function autoCommitAndDeploy(opts, task, emit) { await patchSession(opts, { status: "done" }); } } -// ── Main streaming execution loop ───────────────────────────────────────────── -async function runSessionAgent(config, task, ctx, opts) { - const systemPrompt = (0, loader_1.resolvePrompt)(config.promptId); - const emit = async (line) => { - console.log(`[session ${opts.sessionId}] ${line.type}: ${line.text}`); - await patchSession(opts, { outputLine: line }); - }; - await emit({ - ts: now(), - type: "info", - text: `Agent starting working in ${opts.appPath}`, +function findTasksDir(root) { + const fs = require("fs"); + const path = require("path"); + // 1. Check if root/.vibncode/tasks exists directly + const direct = path.join(root, ".vibncode", "tasks"); + if (fs.existsSync(direct)) + return direct; + // 2. Recursively scan subdirectories (excluding node_modules, .git, etc.) + try { + const files = fs.readdirSync(root); + for (const file of files) { + if (file === "node_modules" || + file === ".git" || + file === ".next" || + file === "dist") { + continue; + } + const full = path.join(root, file); + if (fs.statSync(full).isDirectory()) { + const found = findTasksDir(full); + if (found) + return found; + } + } + } + catch { } + return null; +} +function parseTaskItems(repoRoot) { + const fs = require("fs"); + const path = require("path"); + const tasksDir = findTasksDir(repoRoot); + console.log(`[Orchestrator] repoRoot: "${repoRoot}", resolved tasksDir: "${tasksDir}"`); + if (!tasksDir) + return []; + const items = []; + try { + const files = fs + .readdirSync(tasksDir) + .filter((f) => f.endsWith(".md")); + console.log(`[Orchestrator] Found task files:`, files); + files.sort(); + for (const file of files) { + const filePath = path.join(tasksDir, file); + const content = fs.readFileSync(filePath, "utf8"); + console.log(`[Orchestrator] Reading ${file} (length: ${content.length} bytes). Head:\n${content.slice(0, 500)}`); + const lines = content.split("\n"); + lines.forEach((line, lineIndex) => { + const match = line.match(/^(\s*)(?:-\s*)?\[([ xX])\]\s+(.+)$/); + if (match && match[2] !== undefined && match[3] !== undefined) { + const checked = match[2].toLowerCase() === "x"; + console.log(`[Orchestrator] Parsed line ${lineIndex + 1}: isChecked=${checked}, text="${match[3].trim()}"`); + items.push({ + text: match[3].trim(), + filePath, + lineIndex, + isChecked: checked, + fileName: file, + }); + } + }); + } + } + catch (err) { + console.error("[Orchestrator] Error parsing task items:", err); + } + return items; +} +function toggleTaskOnDisk(task) { + const fs = require("fs"); + const content = fs.readFileSync(task.filePath, "utf8"); + const lines = content.split("\n"); + const line = lines[task.lineIndex]; + if (line) { + const match = line.match(/^(\s*)(?:-\s*)?\[([ xX])\]\s+(.+)$/); + if (match && match[1] !== undefined && match[3] !== undefined) { + const indent = match[1] || ""; + const hasDash = line.includes("-"); + const prefix = hasDash ? `${indent}- ` : indent; + lines[task.lineIndex] = `${prefix}[x] ${match[3]}`; + fs.writeFileSync(task.filePath, lines.join("\n"), "utf8"); + } + } +} +async function generateBacklogFromPrompt(taskPrompt, repoRoot) { + const fs = require("fs"); + const path = require("path"); + const tasksDir = path.join(repoRoot, ".vibncode", "tasks"); + fs.mkdirSync(tasksDir, { recursive: true }); + const prompt = `You are an elite Software Engineering Orchestrator. +Your goal is to break down the user's high-level objective into a highly detailed, sequential checklist of concrete, atomic, self-contained implementation tasks. + +High-Level Objective: +"${taskPrompt}" + +Please output a standard Markdown file containing: +1. A brief 1-sentence overview. +2. A list of tasks, where each task MUST be formatted as a standard Markdown checkbox starting with "- [ ] ": +- [ ] Implement database schema changes for ... +- [ ] Add endpoint handler for ... +- [ ] Write tests ... + +Be extremely thorough and break the objective down into small, digestible units of work (e.g. 5-15 tasks). +Do NOT include any extra conversational text or explanations. Just output the clean markdown.`; + const resp = await (0, vibn_chat_model_1.callVibnChat)({ + systemPrompt: "You are a precise technical orchestrator who only outputs markdown checklist files.", + messages: [{ role: "user", content: prompt }], + temperature: 0.1, }); - // Scope the system prompt to the specific app within the monorepo + const content = resp.text || `# Delegated Backlog\n\n- [ ] ${taskPrompt}`; + const backlogPath = path.join(tasksDir, "00-delegated-backlog.md"); + fs.writeFileSync(backlogPath, content, "utf8"); +} +function commitTaskProgress(task, repoRoot) { + const { execSync } = require("child_process"); + try { + console.log(`[Orchestrator] Committing task progress: ${task.text}`); + execSync("git add -A", { cwd: repoRoot, stdio: "pipe" }); + const msg = `feat(tasks): [Completed] ${task.text}`; + execSync(`git commit -m "${msg.replace(/"/g, '\\"')}"`, { + cwd: repoRoot, + stdio: "pipe", + }); + } + catch (err) { + // If nothing to commit, that's fine + } +} +async function runSingleSubTask(task, config, ctx, opts, emit) { + const path = require("path"); + const fs = require("fs"); const basePrompt = (0, loader_1.resolvePrompt)(config.promptId); + let devServersContext = "No active dev servers running on port 3000."; + try { + const listResult = await (0, tools_1.executeTool)("dev_server_list", { projectId: opts.projectId }, ctx); + if (Array.isArray(listResult) && listResult.length > 0) { + devServersContext = listResult + .map((s) => `- Port ${s.port} (${s.state}): ${s.command} -> Preview URL: ${s.previewUrl}`) + .join("\n"); + } + } + catch { } const scopedPrompt = `${basePrompt} -## Active context -You are working inside the monorepo directory: ${opts.appPath} -All file paths you use should be relative to this directory unless otherwise specified. -When running commands, always cd into ${opts.appPath} first unless already there. -Do NOT run git commit or git push — the platform handles committing after you finish. +## ACTIVE DEVELOPER WORKSPACE STATE +Active Dev Servers: +${devServersContext} + +## ACTIVE SUBTASK OBJECTIVE +You are working on a single task in your task queue: +TASK: "${task.text}" +File: "${path.relative(opts.repoRoot ?? ctx.workspaceRoot, task.filePath)}" (line ${task.lineIndex + 1}) + +## CRITICAL EXECUTION CONSTRAINTS +1. 🎯 STAY HIGHLY FOCUSED: Your only objective is to implement this specific task. Do NOT wander, do NOT explore other unrelated parts of the codebase, and do NOT attempt unrelated tasks. +2. 🚫 NO EXPLORATION COMMANDS: DO NOT execute generic orientation/search commands like 'ls', 'find', 'pwd', 'grep', 'git diff', 'git status'. You already know the repository structure. Go straight to editing or reading the targeted files. +3. 🛠️ TOGGLE CHECKBOX: Once your implementation is done, you MUST read and rewrite "${path.relative(opts.repoRoot ?? ctx.workspaceRoot, task.filePath)}" at line ${task.lineIndex + 1} to change "- [ ]" to "- [x]". +4. 🔴 NO COMMITS: Do NOT run 'git commit' or 'git push'. The platform handles committing automatically after you finish. +5. 🟢 COMPLETED SIGNAL: When you are finished, verify the build compiles clean using the Ralph Loop checks. If successful, stop executing tools and end your response. `; - const history = [{ role: "user", content: task }]; - let turn = 0; + const userPrompt = `Please implement the following task: "${task.text}" and then check it off in the task list.`; + const history = [{ role: "user", content: userPrompt }]; + let subTurn = 0; + const SUB_MAX_TURNS = 40; let toolCallsSinceText = 0; let roundsSinceText = 0; const toolFingerprints = []; - let loopBreakReason = null; + let ralphIteration = 0; function fingerprintToolCall(tc) { - if (tc.name === "shell_exec") { - const cmd = String(tc.args?.command ?? "").trim(); - const verb = cmd.split("&&").map(s => s.trim()).find(s => !s.startsWith("cd "))?.split(/\s+/)[0] ?? "shell"; - return `shell_exec:${verb}`; + const name = tc.name; + const args = tc.args ?? {}; + if (name === "shell_exec") { + const cmd = String(args.command ?? "").trim(); + const firstWord = cmd.split(/\s+/)[0] ?? "shell"; + return `shell_exec:${firstWord}`; } - if (tc.name === "fs_write" || tc.name === "fs_edit" || tc.name === "fs_read") { - return `${tc.name}:${tc.args?.path}`; + // Determine target based on most common descriptive parameter keys + const target = args.path ?? + args.pattern ?? + args.command ?? + args.commandId ?? + args.appUuid ?? + args.uuid ?? + ""; + if (target) { + return `${name}:${target}`; } - return `${tc.name}:${Object.values(tc.args ?? {})[0]}`; + // Filter out common metadata like projectId, and use first real argument + const keys = Object.keys(args).filter((k) => k !== "projectId"); + if (keys.length > 0) { + return `${name}:${args[keys[0]]}`; + } + return `${name}:default`; } - while (turn < 30) { + while (subTurn < SUB_MAX_TURNS) { if (opts.isStopped()) { await emit({ ts: now(), type: "info", text: "Stopped by user." }); - await patchSession(opts, { status: "stopped" }); - return; + return false; } - turn++; - const isSilent = roundsSinceText >= 15 || toolCallsSinceText >= 20; + subTurn++; + const isSilent = roundsSinceText >= 8 || toolCallsSinceText >= 12; const extraSystem = isSilent - ? "\n\n[STATUS NUDGE] You have run " + - `${toolCallsSinceText} tool call(s) over ${roundsSinceText} round(s) ` + - "without sending the user any text. Before any more tool calls, " + - "send ONE short sentence describing what you are currently working " + - "on and why. The user is staring at silent tool pills." + ? "\n\n[STATUS NUDGE] Focus on completing the current task. Do not make any more tool calls without a short sentence explaining what you are working on." : ""; let resp; try { @@ -211,19 +417,25 @@ Do NOT run git commit or git push — the platform handles committing after you systemPrompt: scopedPrompt + extraSystem, messages: history, tools: config.tools, - temperature: 0.2 + temperature: 0.1, }); } catch (err) { const msg = err instanceof Error ? err.message : String(err); - await emit({ ts: now(), type: "error", text: `LLM error: ${msg}` }); - await patchSession(opts, { status: "failed", error: msg }); - return; + await emit({ + ts: now(), + type: "error", + text: `LLM sub-session error: ${msg}`, + }); + return false; } if (resp.error) { - await emit({ ts: now(), type: "error", text: `LLM error: ${resp.error}` }); - await patchSession(opts, { status: "failed", error: resp.error }); - return; + await emit({ + ts: now(), + type: "error", + text: `LLM sub-session error: ${resp.error}`, + }); + return false; } if (resp.text) { await emit({ ts: now(), type: "info", text: resp.text }); @@ -235,13 +447,79 @@ Do NOT run git commit or git push — the platform handles committing after you toolCallsSinceText += resp.toolCalls.length; } if (!resp.toolCalls.length) { - await patchSession(opts, { status: "completed" }); - return; + if (opts.repoRoot && ralphIteration < 3) { + await emit({ + ts: now(), + type: "info", + text: "🔍 [Ralph Loop] Verifying build for this task...", + }); + const verification = runBuildVerification(opts.repoRoot, opts.appPath); + if (!verification.success) { + ralphIteration++; + await emit({ + ts: now(), + type: "error", + text: `❌ [Ralph Loop] Build failed (iteration ${ralphIteration}/3) for this task.`, + }); + history.push({ + role: "user", + content: `Your previous edits completed, but the project's build check failed with compilation errors. + +========================================= +🚨 SURGICAL HEALING PROTOCOL ACTIVE 🚨 +========================================= +The project's compilation/build has failed. You are currently in an autonomous, auto-correcting healing loop and must fix this compilation error immediately. + +To prevent cognitive loop spirals and command limits, you MUST follow this strict, non-negotiable troubleshooting protocol: + +1. 🚫 STRICTLY BLOCK EXPLORATION: DO NOT execute general directory exploration or orientation commands such as 'ls', 'find', 'pwd', 'grep', 'git status', 'git diff', or other search commands. You do not need to look around. +2. 🎯 SURGICAL TARGETING: Scan the compiler error logs below to locate the EXACT filename, line number, and column where the compilation failed. +3. 🛠️ IMMEDIATE CORRECTION: Read that file immediately using your specific file-reading tool (using precise start/end lines if it is large) and apply a targeted, surgical edit to correct the exact syntax or type error. Do not write a placeholder or partial fix. + +Here are the precise compilation errors from the compiler: +\`\`\`text +${verification.error} +\`\`\` + +Implement the exact fix directly in the code now.`, + }); + continue; + } + else { + await emit({ + ts: now(), + type: "info", + text: "🟢 [Ralph Loop] Build passed successfully! 0 errors.", + }); + } + } + let diskChecked = false; + try { + const fileContent = fs.readFileSync(task.filePath, "utf8"); + const lines = fileContent.split("\n"); + const line = lines[task.lineIndex]; + if (line) { + const match = line.match(/^(\s*)-\s*\[([ xX])\]\s+(.+)$/); + if (match && match[2].toLowerCase() === "x") { + diskChecked = true; + } + } + } + catch { } + if (!diskChecked) { + await emit({ + ts: now(), + type: "info", + text: `✍️ [Orchestrator] Task implementation completed. Automatically checking off task on disk.`, + }); + toggleTaskOnDisk(task); + } + return true; } for (const tc of resp.toolCalls) { toolFingerprints.push(fingerprintToolCall(tc)); } - const window = toolFingerprints.slice(-10); + const window = toolFingerprints.slice(-12); const counts = new Map(); for (const fp of window) counts.set(fp, (counts.get(fp) ?? 0) + 1); @@ -250,20 +528,30 @@ Do NOT run git commit or git push — the platform handles committing after you for (const [fp, n] of counts.entries()) { if (n > maxRepeats) { maxRepeats = n; - repeatedCmd = fp.split("|")[0]; + repeatedCmd = fp; } } if (maxRepeats >= 6) { - loopBreakReason = `Repeated ${repeatedCmd} ${maxRepeats}× in last 10 calls`; - break; + await emit({ + ts: now(), + type: "error", + text: `Loop detected in subtask execution (repeated "${repeatedCmd}" ${maxRepeats}x in last 12 calls), breaking loop.`, + }); + return false; } history.push({ role: "assistant", content: resp.text, - toolCalls: resp.toolCalls + toolCalls: resp.toolCalls, }); for (const tc of resp.toolCalls) { - await emit({ ts: now(), type: "step", text: `Running ${tc.name}...` }); + if (opts.isStopped()) + return false; + await emit({ + ts: now(), + type: "step", + text: `Running ${tc.name}...`, + }); let result; try { result = await (0, tools_1.executeTool)(tc.name, tc.args, ctx); @@ -276,15 +564,98 @@ Do NOT run git commit or git push — the platform handles committing after you role: "tool", content: resultStr, toolCallId: tc.id, - toolName: tc.name + toolName: tc.name, }); } } - if (loopBreakReason) { - await emit({ ts: now(), type: "error", text: `Loop broken: ${loopBreakReason}` }); - await patchSession(opts, { status: "failed", error: loopBreakReason }); + await emit({ + ts: now(), + type: "error", + text: `Subtask exceeded maximum turns limit of ${SUB_MAX_TURNS}.`, + }); + return false; +} +async function runSessionAgent(config, task, ctx, opts) { + const emit = async (line) => { + console.log(`[session ${opts.sessionId}] ${line.type}: ${line.text}`); + await patchSession(opts, { outputLine: line }); + }; + await emit({ + ts: now(), + type: "info", + text: `Agent started offline delegation orchestrator in ${opts.appPath}`, + }); + const repoRoot = opts.repoRoot ?? ctx.workspaceRoot; + let tasks = parseTaskItems(repoRoot); + if (tasks.length === 0) { + await emit({ + ts: now(), + type: "info", + text: "🤖 [Orchestrator] No active tasks backlog found on disk. Analyzing prompt to plan atomic execution backlog...", + }); + try { + await generateBacklogFromPrompt(task, repoRoot); + tasks = parseTaskItems(repoRoot); + } + catch (err) { + await emit({ + ts: now(), + type: "error", + text: `❌ [Orchestrator] Failed to generate backlog: ${err.message || String(err)}`, + }); + await patchSession(opts, { + status: "failed", + error: "Backlog generation failed", + }); + return; + } + } + const openTasks = tasks.filter((t) => !t.isChecked); + if (openTasks.length === 0) { + await emit({ + ts: now(), + type: "info", + text: "🟢 [Orchestrator] All tasks in the queue are already completed!", + }); + await patchSession(opts, { status: "completed" }); + return; + } + await emit({ + ts: now(), + type: "info", + text: `🤖 [Orchestrator] Found ${openTasks.length} open tasks. Executing task-by-task Meta-Loop...`, + }); + for (let i = 0; i < openTasks.length; i++) { + const currentTask = openTasks[i]; + await emit({ + ts: now(), + type: "info", + text: `🚀 [Orchestrator] Task ${i + 1}/${openTasks.length}: "${currentTask.text}"`, + }); + const success = await runSingleSubTask(currentTask, config, ctx, opts, emit); + if (!success) { + await emit({ + ts: now(), + type: "error", + text: `❌ [Orchestrator] Bailed out! Task execution failed on: "${currentTask.text}".`, + }); + await patchSession(opts, { + status: "failed", + error: `Delegation loop halted at task: "${currentTask.text}"`, + }); + return; + } + commitTaskProgress(currentTask, repoRoot); + } + await emit({ + ts: now(), + type: "info", + text: `🎉 [Orchestrator] All delegated tasks completed successfully with green compilation builds!`, + }); + if (opts.autoApprove) { + await autoCommitAndDeploy(opts, task, emit); } else { - await patchSession(opts, { status: "failed", error: "Max turns reached" }); + await patchSession(opts, { status: "completed" }); } } diff --git a/vibn-agent-runner/dist/llm/gemini-chat.js b/vibn-agent-runner/dist/llm/gemini-chat.js index 93a35257..c868ed9c 100644 --- a/vibn-agent-runner/dist/llm/gemini-chat.js +++ b/vibn-agent-runner/dist/llm/gemini-chat.js @@ -78,11 +78,22 @@ async function callGeminiChat(opts) { const fns = toGeminiFunctions(opts.tools ?? []); if (fns) config.tools = fns; + console.log("\n========================================================"); + console.log("➡️ [GEMINI API REQUEST]"); + console.log("========================================================"); + console.log(`System Prompt: ${config.systemInstruction ? config.systemInstruction.slice(0, 1000) + "..." : "None"}`); + console.log("Contents Payload:", JSON.stringify(toGeminiContents(opts.messages), null, 2)); + console.log("========================================================\n"); const response = await ai.models.generateContent({ model: GEMINI_MODEL, contents: toGeminiContents(opts.messages), config, }); + console.log("\n========================================================"); + console.log("⬅️ [GEMINI API RESPONSE]"); + console.log("========================================================"); + console.log("Raw Candidates:", JSON.stringify(response.candidates, null, 2)); + console.log("========================================================\n"); let text = ""; let thoughts = ""; const toolCalls = []; @@ -131,6 +142,12 @@ async function* streamGeminiChat(opts) { const fns = toGeminiFunctions(opts.tools ?? []); if (fns) config.tools = fns; + console.log("\n========================================================"); + console.log("➡️ [GEMINI STREAM REQUEST]"); + console.log("========================================================"); + console.log(`System Prompt: ${config.systemInstruction ? config.systemInstruction.slice(0, 1000) + "..." : "None"}`); + console.log("Contents Payload:", JSON.stringify(toGeminiContents(opts.messages), null, 2)); + console.log("========================================================\n"); const streamResult = await ai.models.generateContentStream({ model: GEMINI_MODEL, contents: toGeminiContents(opts.messages), diff --git a/vibn-agent-runner/dist/prompts/coder.js b/vibn-agent-runner/dist/prompts/coder.js index b7507892..b857b737 100644 --- a/vibn-agent-runner/dist/prompts/coder.js +++ b/vibn-agent-runner/dist/prompts/coder.js @@ -4,15 +4,21 @@ const loader_1 = require("./loader"); // Because we deleted the local tools and adopted the full VIBN_TOOL_DEFINITIONS schema, // the runner agent now has the exact same capabilities as the frontend UI agent! // It uses fs_*, shell_exec, dev_server_*, apps_*, and ship. -(0, loader_1.registerPrompt)('coder', ` +(0, loader_1.registerPrompt)("coder", ` You are Vibn AI — the technical co-founder of every Vibn user. You are currently running headlessly in the background. The user is offline or waiting for you to finish. Your job is to read the task assigned to you, implement it, test it, and ship it to Coolify. Do NOT ask the user questions. If you get stuck, log the error and stop. +# Specifications & Product Requirements Docs (.vibncode/specs/) + +The project's technical specifications, data models, UX principles, and security requirements live in \`.vibncode/specs/\` as plain, Git-tracked Markdown files. This is your single source of truth: +- 📖 **PRIOR REFERENCE IS MANDATORY:** BEFORE starting any task or writing any code, you MUST use your file-reading tools to locate and read the matching specification file (for example, read \`.vibncode/specs/05-data-model.md\` when setting up database schemas or Prisma models, or \`.vibncode/specs/04-compliance-security.md\` when writing authorization/password hashing logic). Adhere exactly to the planned specifications to avoid drift. +- ✍️ **PROACTIVE DOCUMENTATION:** Keep these spec sheets updated. If you make an architectural decision or refine a schema, proactively update the matching markdown file in \`.vibncode/specs/\`. + # Mode: Action -Since you are running autonomously, you must take action immediately. +Since you are running autonomously, you must take action immediately. # What "done" looks like @@ -24,17 +30,17 @@ A turn ends when you have fully completed the task AND shipped the code. # Hard rules — non-negotiable **Honesty about tool results:** -- **Cite the tool result, don't claim from memory.** -- **Trust the \`ok\` field.** Every tool result carries \`ok: true | false\`. If \`ok\` is false (or \`exitCode\` is non-zero, or \`healthCheck.status\` is >= 400), the operation FAILED. -- **\`fs_write\` and \`fs_edit\` results carry \`sha256\` and \`bytes\` on success.** -- **\`dev_server_start\` results carry \`healthCheck\` on success.** Before saying "the preview is ready," confirm \`healthCheck.status === 200\`. +- **Cite the tool result, don't claim from memory.** +- **Trust the \`ok\` field.** Every tool result carries \`ok: true | false\`. If \`ok\` is false (or \`exitCode\` is non-zero, or \`healthCheck.status\` is >= 400), the operation FAILED. +- **\`fs_write\` and \`fs_edit\` results carry \`sha256\` and \`bytes\` on success.** +- **\`dev_server_start\` results carry \`healthCheck\` on success.** Before saying "the preview is ready," confirm \`healthCheck.status === 200\`. **Anchoring and scope:** -- **Anchor on current state before troubleshooting.** -- **Always pass \`projectId\`** to \`apps_create\` / \`databases_create\`. -- **Always \`apps_list { projectId }\` BEFORE \`apps_create\`** for a sanity check, and **always \`apps_templates_search\` BEFORE \`apps_create\`** for known third-party apps. +- **Anchor on current state before troubleshooting.** +- **Always pass \`projectId\`** to \`apps_create\` / \`databases_create\`. +- **Always \`apps_list { projectId }\` BEFORE \`apps_create\`** for a sanity check, and **always \`apps_templates_search\` BEFORE \`apps_create\`** for known third-party apps. - **Trust idempotency.** When \`apps_create\` / \`databases_create\` returns \`alreadyExisted: true\`, your job is done — use the returned uuid and move on. -- **Never delete-and-recreate to escape an error.** "Container name already in use" → \`apps_unstick { uuid }\` → \`apps_deploy { uuid }\`. +- **Never delete-and-recreate to escape an error.** "Container name already in use" → \`apps_unstick { uuid }\` → \`apps_deploy { uuid }\`. **Stopping conditions:** - **If a deploy or tool call fails twice with the same error, STOP.** @@ -61,15 +67,15 @@ Each project has a persistent \`vibn-dev\` container. Edit files via \`fs_*\` an **Path convention for fs_* tools:** Pass paths relative to the project root — \`src/app/page.tsx\`, NOT \`/workspace/slug/src/app/page.tsx\` and NOT \`slug/src/app/page.tsx\`. **Dev servers** (preview URL via \`*.preview.vibnai.com\` wildcard): -- \`dev_server_start { projectId, command, port: 3000 }\` is a **one-shot** call. It kills old processes on the port, checks the port is free, sets HOST=0.0.0.0 + PORT, launches your command, and returns a \`previewUrl\` plus a \`healthCheck\` block. -- **Port \`3000\` is reserved for the primary user-facing UI.** +- \`dev_server_start { projectId, command, port: 3000 }\` is a **one-shot** call. It kills old processes on the port, checks the port is free, sets HOST=0.0.0.0 + PORT, launches your command, and returns a \`previewUrl\` plus a \`healthCheck\` block. +- **Port \`3000\` is reserved for the primary user-facing UI.** - \`dev_server_stop\` / \`dev_server_list\` / \`dev_server_logs\` — use only AFTER a failed start, to diagnose the error the function returned. Never on success. **Verify the page actually renders:** - After \`dev_server_start\` returns a \`previewUrl\` AND \`healthCheck.status === 200\`, call \`browser_console { url: previewUrl }\` to capture frontend console errors. - If \`browser_console\` returns errors, fix them with \`fs_edit\` before declaring done. A green \`healthCheck\` plus a clean console is the real "done" signal for UI work. -**Visual QA:** \`request_visual_qa { targetPath }\` critiques a UI file against a 5-dim design rubric. **Call this whenever you modify visual UI code** before returning the \`previewUrl\`. +**Visual QA:** \`request_visual_qa { targetPath }\` critiques a UI file against a 5-dim design rubric. **Call this whenever you modify visual UI code** before returning the \`previewUrl\`. **Sentry is auto-provisioned per project.** \`NEXT_PUBLIC_SENTRY_DSN\` and \`SENTRY_AUTH_TOKEN\` are injected into the Coolify app env automatically by \`apps_create\`. @@ -77,7 +83,7 @@ Each project has a persistent \`vibn-dev\` container. Edit files via \`fs_*\` an For editing files in existing repos, ALWAYS use \`fs_*\` in the dev container — \`ship\` commits and pushes. ## Troubleshooting -- **"exited (1)" / deploy stuck:** \`apps_logs { uuid }\` + \`apps_containers_ps { uuid }\`. +- **"exited (1)" / deploy stuck:** \`apps_logs { uuid }\` + \`apps_containers_ps { uuid }\`. - **502 / "no available server":** \`apps_get\`; if \`fqdn\` is empty, attach a domain. - **"tenant" / "does not belong to":** uuid not in this workspace. Re-list with \`apps_list\`. - **Compose stack weird:** \`apps_repair { uuid }\` re-applies Traefik labels + port forwarding. diff --git a/vibn-agent-runner/dist/server.js b/vibn-agent-runner/dist/server.js index b469b95f..d6ef32c2 100644 --- a/vibn-agent-runner/dist/server.js +++ b/vibn-agent-runner/dist/server.js @@ -47,179 +47,299 @@ const app = (0, express_1.default)(); app.use((0, cors_1.default)()); const startTime = new Date(); // Raw body capture for webhook HMAC — must come before express.json() -app.use('/webhook/gitea', express_1.default.raw({ type: '*/*' })); +app.use("/webhook/gitea", express_1.default.raw({ type: "*/*" })); app.use(express_1.default.json()); const PORT = process.env.PORT || 3333; // --------------------------------------------------------------------------- // Build ToolContext from environment variables // --------------------------------------------------------------------------- -function ensureWorkspace(repo) { - const base = process.env.WORKSPACE_BASE || '/workspaces'; +function ensureWorkspace(repo, sessionId) { + const base = process.env.WORKSPACE_BASE || "/workspaces"; if (!repo) { - const dir = path.join(base, 'default'); + const dir = path.join(base, "default"); fs.mkdirSync(dir, { recursive: true }); return dir; } - const dir = path.join(base, repo.replace('/', '_')); + const mainRepoDir = path.join(base, repo.replace("/", "_")); const gitea = { - apiUrl: process.env.GITEA_API_URL || '', - apiToken: process.env.GITEA_API_TOKEN || '', - username: process.env.GITEA_USERNAME || '' + apiUrl: process.env.GITEA_API_URL || "", + apiToken: process.env.GITEA_API_TOKEN || "", + username: process.env.GITEA_USERNAME || "", }; - if (!fs.existsSync(path.join(dir, '.git'))) { - fs.mkdirSync(dir, { recursive: true }); - const authedUrl = `${gitea.apiUrl}/${repo}.git` - .replace('https://', `https://${gitea.username}:${gitea.apiToken}@`); + // 1. Ensure main repo clone exists + if (!fs.existsSync(path.join(mainRepoDir, ".git"))) { + fs.mkdirSync(mainRepoDir, { recursive: true }); + const authedUrl = `${gitea.apiUrl}/${repo}.git`.replace("https://", `https://${gitea.username}:${gitea.apiToken}@`); try { - (0, child_process_1.execSync)(`git clone "${authedUrl}" "${dir}"`, { stdio: 'pipe' }); + (0, child_process_1.execSync)(`git clone "${authedUrl}" "${mainRepoDir}"`, { stdio: "pipe" }); } catch { // Repo may not exist yet — just init - (0, child_process_1.execSync)(`git init`, { cwd: dir, stdio: 'pipe' }); - (0, child_process_1.execSync)(`git remote add origin "${authedUrl}"`, { cwd: dir, stdio: 'pipe' }); + (0, child_process_1.execSync)(`git init`, { cwd: mainRepoDir, stdio: "pipe" }); + (0, child_process_1.execSync)(`git remote add origin "${authedUrl}"`, { + cwd: mainRepoDir, + stdio: "pipe", + }); } } - return dir; + // 2. If no sessionId, fall back to main repo clone directly + if (!sessionId) { + return mainRepoDir; + } + // 3. Isolated Worktree Directory per task session + const taskWorktreePath = path.join(base, "tasks", sessionId); + fs.mkdirSync(path.join(base, "tasks"), { recursive: true }); + // 4. Create isolated worktree if not yet active + if (!fs.existsSync(path.join(taskWorktreePath, ".git"))) { + // Clean up any stale directory from previous failed runs before adding worktree + if (fs.existsSync(taskWorktreePath)) { + try { + fs.rmSync(taskWorktreePath, { recursive: true, force: true }); + } + catch { } + } + try { + console.log(`[worktree] Adding isolated git worktree for session ${sessionId} at ${taskWorktreePath}...`); + // Check if the branch task-sessionId already exists in the main repository + let branchExists = false; + try { + const branches = (0, child_process_1.execSync)(`git branch --list "task-${sessionId}"`, { + cwd: mainRepoDir, + }).toString(); + branchExists = branches.trim().length > 0; + } + catch { + branchExists = false; + } + if (branchExists) { + // Checkout the existing branch into the new worktree path + (0, child_process_1.execSync)(`git worktree add -f "${taskWorktreePath}" "task-${sessionId}"`, { cwd: mainRepoDir, stdio: "pipe" }); + } + else { + // Create and checkout a new isolated branch + (0, child_process_1.execSync)(`git worktree add -f -b "task-${sessionId}" "${taskWorktreePath}"`, { cwd: mainRepoDir, stdio: "pipe" }); + } + } + catch (e) { + console.error("[worktree] Failed to add git worktree, falling back to main clone:", e.message || String(e)); + return mainRepoDir; + } + } + // 5. Sync active workspace edits from mainRepoDir (containing Monaco edits) to taskWorktreePath + if (taskWorktreePath !== mainRepoDir) { + try { + console.log(`[worktree] Syncing active workspace edits from ${mainRepoDir} to ${taskWorktreePath}...`); + // Use rsync to copy active files while preserving structure and deleting files deleted in mainRepoDir + // Exclude node_modules, .git, .next, .vibncode/settings.json, etc. + (0, child_process_1.execSync)(`rsync -ar --delete --exclude="node_modules" --exclude=".git" --exclude=".next" --exclude=".vibncode/settings.json" "${mainRepoDir}/" "${taskWorktreePath}/"`, { stdio: "pipe" }); + } + catch (syncErr) { + console.warn("[worktree] rsync failed, falling back to cp:", syncErr.message || syncErr); + } + } + return taskWorktreePath; } -function buildContext(repo) { - const workspaceRoot = ensureWorkspace(repo); +function buildContext(repo, sessionId) { + const workspaceRoot = ensureWorkspace(repo, sessionId); return { workspaceRoot, gitea: { - apiUrl: process.env.GITEA_API_URL || '', - apiToken: process.env.GITEA_API_TOKEN || '', - username: process.env.GITEA_USERNAME || '' + apiUrl: process.env.GITEA_API_URL || "", + apiToken: process.env.GITEA_API_TOKEN || "", + username: process.env.GITEA_USERNAME || "", }, coolify: { - apiUrl: process.env.COOLIFY_API_URL || '', - apiToken: process.env.COOLIFY_API_TOKEN || '' + apiUrl: process.env.COOLIFY_API_URL || "", + apiToken: process.env.COOLIFY_API_TOKEN || "", }, - mcpToken: '', - vibnApiUrl: 'http://localhost:3000', - memoryUpdates: [] + mcpToken: "", + vibnApiUrl: process.env.VIBN_API_URL ?? "https://vibnai.com", + memoryUpdates: [], }; } +function cleanupWorkspace(repo, sessionId) { + const base = process.env.WORKSPACE_BASE || "/workspaces"; + const mainRepoDir = path.join(base, repo.replace("/", "_")); + const taskWorktreePath = path.join(base, "tasks", sessionId); + if (fs.existsSync(taskWorktreePath)) { + try { + console.log(`[worktree] Pruning and removing git worktree for session ${sessionId}...`); + // 1. Tell git to remove the worktree references + (0, child_process_1.execSync)(`git worktree remove --force "${taskWorktreePath}"`, { + cwd: mainRepoDir, + stdio: "pipe", + }); + // 2. Delete the temporary branch from the main repository index + (0, child_process_1.execSync)(`git branch -D "task-${sessionId}"`, { + cwd: mainRepoDir, + stdio: "pipe", + }); + // 3. Force clean directory + if (fs.existsSync(taskWorktreePath)) { + fs.rmSync(taskWorktreePath, { recursive: true, force: true }); + } + } + catch (e) { + console.warn(`[worktree] Non-fatal cleanup error for session ${sessionId}:`, e.message || String(e)); + } + } +} // --------------------------------------------------------------------------- // Routes // --------------------------------------------------------------------------- // Health check -app.get('/health', (_req, res) => { - res.json({ status: 'ok', timestamp: new Date().toISOString() }); +app.get("/health", (_req, res) => { + res.json({ status: "ok", timestamp: new Date().toISOString() }); }); // --------------------------------------------------------------------------- // GitHub mirror — clone a public GitHub repo and push to Gitea as-is // --------------------------------------------------------------------------- -app.post('/api/mirror', async (req, res) => { +app.post("/api/mirror", async (req, res) => { const { github_url, gitea_repo, project_name, github_token } = req.body; if (!github_url || !gitea_repo) { - res.status(400).json({ error: '"github_url" and "gitea_repo" are required' }); + res + .status(400) + .json({ error: '"github_url" and "gitea_repo" are required' }); return; } - const { execSync } = await Promise.resolve().then(() => __importStar(require('child_process'))); - const fs = await Promise.resolve().then(() => __importStar(require('fs'))); - const path = await Promise.resolve().then(() => __importStar(require('path'))); - const os = await Promise.resolve().then(() => __importStar(require('os'))); + const { execSync } = await Promise.resolve().then(() => __importStar(require("child_process"))); + const fs = await Promise.resolve().then(() => __importStar(require("fs"))); + const path = await Promise.resolve().then(() => __importStar(require("path"))); + const os = await Promise.resolve().then(() => __importStar(require("os"))); const mirrorId = `mirror_${Date.now()}`; const tmpDir = path.join(os.tmpdir(), mirrorId); const gitea = { - apiUrl: process.env.GITEA_API_URL || '', - apiToken: process.env.GITEA_API_TOKEN || '', - username: process.env.GITEA_USERNAME || '' + apiUrl: process.env.GITEA_API_URL || "", + apiToken: process.env.GITEA_API_TOKEN || "", + username: process.env.GITEA_USERNAME || "", }; try { // Build authenticated Gitea push URL // GITEA_API_URL is like https://git.vibnai.com — strip /api/v1 if present - const giteaBase = gitea.apiUrl.replace(/\/api\/v1\/?$/, ''); - const authedPushUrl = `${giteaBase}/${gitea_repo}.git` - .replace('https://', `https://${gitea.username}:${gitea.apiToken}@`); + const giteaBase = gitea.apiUrl.replace(/\/api\/v1\/?$/, ""); + const authedPushUrl = `${giteaBase}/${gitea_repo}.git`.replace("https://", `https://${gitea.username}:${gitea.apiToken}@`); console.log(`[mirror] Cloning ${github_url} → ${tmpDir}`); fs.mkdirSync(tmpDir, { recursive: true }); // Build authenticated clone URL for private repos let cloneUrl = github_url; if (github_token) { - cloneUrl = github_url.replace('https://', `https://${github_token}@`); + cloneUrl = github_url.replace("https://", `https://${github_token}@`); } // Mirror-clone the GitHub repo (preserves all branches + tags) execSync(`git clone --mirror "${cloneUrl}" "${tmpDir}/.git"`, { - stdio: 'pipe', - timeout: 120000 + stdio: "pipe", + timeout: 120000, }); - execSync(`git config --bool core.bare false`, { cwd: tmpDir, stdio: 'pipe' }); - execSync(`git checkout`, { cwd: tmpDir, stdio: 'pipe' }); + execSync(`git config --bool core.bare false`, { + cwd: tmpDir, + stdio: "pipe", + }); + execSync(`git checkout`, { cwd: tmpDir, stdio: "pipe" }); // Point origin at Gitea and push all refs - execSync(`git remote set-url origin "${authedPushUrl}"`, { cwd: tmpDir, stdio: 'pipe' }); - execSync(`git push --mirror origin`, { cwd: tmpDir, stdio: 'pipe', timeout: 120000 }); + execSync(`git remote set-url origin "${authedPushUrl}"`, { + cwd: tmpDir, + stdio: "pipe", + }); + execSync(`git push --mirror origin`, { + cwd: tmpDir, + stdio: "pipe", + timeout: 120000, + }); console.log(`[mirror] Pushed ${gitea_repo} successfully`); res.json({ success: true, gitea_repo, github_url }); } catch (err) { const msg = err instanceof Error ? err.message : String(err); console.error(`[mirror] Failed:`, msg); - res.status(500).json({ error: 'Mirror failed', details: msg }); + res.status(500).json({ error: "Mirror failed", details: msg }); } finally { // Clean up temp dir try { - const { execSync: rm } = await Promise.resolve().then(() => __importStar(require('child_process'))); - rm(`rm -rf "${tmpDir}"`, { stdio: 'pipe' }); + const { execSync: rm } = await Promise.resolve().then(() => __importStar(require("child_process"))); + rm(`rm -rf "${tmpDir}"`, { stdio: "pipe" }); + } + catch { + /* best effort */ } - catch { /* best effort */ } } }); // List available agents -app.get('/api/agents', (_req, res) => { - const agents = Object.values(agents_1.AGENTS).map(a => ({ +app.get("/api/agents", (_req, res) => { + const agents = Object.values(agents_1.AGENTS).map((a) => ({ name: a.name, description: a.description, - tools: a.tools.map(t => t.name) + tools: a.tools.map((t) => t.name), })); res.json(agents); }); const activeSessions = new Map(); -app.post('/agent/execute', async (req, res) => { - const { sessionId, projectId, appName, appPath, giteaRepo, task, continueTask, autoApprove, coolifyAppUuid, } = req.body; +app.post("/agent/execute", async (req, res) => { + const { sessionId, projectId, appName, appPath: rawAppPath, giteaRepo, task, continueTask, autoApprove, coolifyAppUuid, mcpToken, } = req.body; + const appPath = rawAppPath === undefined || rawAppPath === null || rawAppPath === "" + ? "." + : rawAppPath; if (!sessionId || !projectId || !appPath || !task) { - res.status(400).json({ error: 'sessionId, projectId, appPath and task are required' }); + res + .status(400) + .json({ error: "sessionId, projectId, appPath and task are required" }); return; } - const vibnApiUrl = process.env.VIBN_API_URL ?? 'https://vibnai.com'; + const vibnApiUrl = process.env.VIBN_API_URL ?? "https://vibnai.com"; + const patchHeaders = { + "Content-Type": "application/json", + ...(process.env.AGENT_RUNNER_SECRET + ? { "x-agent-runner-secret": process.env.AGENT_RUNNER_SECRET } + : {}), + }; // Register session as active const sessionState = { stopped: false }; activeSessions.set(sessionId, sessionState); // Respond immediately — execution is async - res.status(202).json({ sessionId, status: 'running' }); + res.status(202).json({ sessionId, status: "running" }); // Build workspace context — clone/update the Gitea repo if provided let ctx; try { - ctx = buildContext(giteaRepo); + ctx = buildContext(giteaRepo, sessionId); } catch (err) { const msg = err instanceof Error ? err.message : String(err); - console.error('[agent/execute] buildContext failed:', msg); + console.error("[agent/execute] buildContext failed:", msg); // Notify VIBN DB of failure fetch(`${vibnApiUrl}/api/projects/${projectId}/agent/sessions/${sessionId}`, { - method: 'PATCH', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ status: 'failed', error: msg }), + method: "PATCH", + headers: patchHeaders, + body: JSON.stringify({ status: "failed", error: msg }), }).catch(() => { }); activeSessions.delete(sessionId); return; } // Capture repo root before scoping to appPath — needed for git commit in auto-approve const repoRoot = ctx.workspaceRoot; + // Wire the ToolContext so its tools can call back into the VIBN frontend MCP + // with the right URL and auth. buildContext() defaults these to safe values, + // but the authoritative ones come from env (VIBN_API_URL) and the frontend + // (mcpToken passed in the /agent/execute body). Without this, tools fetch + // http://localhost:3000 with no token and fail with "fetch failed". + ctx.vibnApiUrl = vibnApiUrl; + ctx.mcpToken = mcpToken ?? ctx.mcpToken; + ctx.projectId = projectId; // Scope workspace to the app subdirectory so the agent works there naturally if (appPath) { - const path = require('path'); + const path = require("path"); ctx.workspaceRoot = path.join(ctx.workspaceRoot, appPath); - const fs = require('fs'); + const fs = require("fs"); fs.mkdirSync(ctx.workspaceRoot, { recursive: true }); } - const agentConfig = agents_1.AGENTS['Coder']; + const agentConfig = agents_1.AGENTS["Coder"]; if (!agentConfig) { fetch(`${vibnApiUrl}/api/projects/${projectId}/agent/sessions/${sessionId}`, { - method: 'PATCH', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ status: 'failed', error: 'Coder agent not registered' }), + method: "PATCH", + headers: patchHeaders, + body: JSON.stringify({ + status: "failed", + error: "Coder agent not registered", + }), }).catch(() => { }); activeSessions.delete(sessionId); return; @@ -243,38 +363,41 @@ app.post('/agent/execute', async (req, res) => { coolifyApiUrl: process.env.COOLIFY_API_URL, coolifyApiToken: process.env.COOLIFY_API_TOKEN, }) - .catch(err => { + .catch((err) => { const msg = err instanceof Error ? err.message : String(err); console.error(`[agent/execute] session ${sessionId} crashed:`, msg); fetch(`${vibnApiUrl}/api/projects/${projectId}/agent/sessions/${sessionId}`, { - method: 'PATCH', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ status: 'failed', error: msg }), + method: "PATCH", + headers: patchHeaders, + body: JSON.stringify({ status: "failed", error: msg }), }).catch(() => { }); }) .finally(() => { activeSessions.delete(sessionId); + if (giteaRepo && sessionId) { + cleanupWorkspace(giteaRepo, sessionId); + } }); }); -app.post('/agent/stop', (req, res) => { +app.post("/agent/stop", (req, res) => { const { sessionId } = req.body; if (!sessionId) { - res.status(400).json({ error: 'sessionId required' }); + res.status(400).json({ error: "sessionId required" }); return; } const session = activeSessions.get(sessionId); if (session) { session.stopped = true; - res.json({ status: 'stopped' }); + res.json({ status: "stopped" }); } else { - res.status(404).json({ error: 'session not found or not running' }); + res.status(404).json({ error: "session not found or not running" }); } }); app.listen(PORT, () => { console.log(`AgentRunner listening on port ${PORT}`); - console.log(`Agents available: ${Object.keys(agents_1.AGENTS).join(', ')}`); + console.log(`Agents available: ${Object.keys(agents_1.AGENTS).join(", ")}`); if (!process.env.GOOGLE_API_KEY) { - console.warn('WARNING: GOOGLE_API_KEY is not set — agents will fail'); + console.warn("WARNING: GOOGLE_API_KEY is not set — agents will fail"); } }); diff --git a/vibn-agent-runner/dist/test-execute-hardening.d.ts b/vibn-agent-runner/dist/test-execute-hardening.d.ts new file mode 100644 index 00000000..cb0ff5c3 --- /dev/null +++ b/vibn-agent-runner/dist/test-execute-hardening.d.ts @@ -0,0 +1 @@ +export {}; diff --git a/vibn-agent-runner/dist/test-execute-hardening.js b/vibn-agent-runner/dist/test-execute-hardening.js new file mode 100644 index 00000000..d5dfde31 --- /dev/null +++ b/vibn-agent-runner/dist/test-execute-hardening.js @@ -0,0 +1,139 @@ +"use strict"; +var __importDefault = (this && this.__importDefault) || function (mod) { + return (mod && mod.__esModule) ? mod : { "default": mod }; +}; +Object.defineProperty(exports, "__esModule", { value: true }); +const child_process_1 = require("child_process"); +const http_1 = __importDefault(require("http")); +// We will start the runner server on port 3334 +const PORT = 3334; +const BASE_URL = `http://localhost:${PORT}`; +console.log("🧪 Starting AgentRunner Hardening Test Suite..."); +// Set up environment variables +const env = { + ...process.env, + PORT: String(PORT), + AGENT_RUNNER_SECRET: "test-secret-123", + GOOGLE_API_KEY: "dummy-key-for-testing", // Pass dummy key to avoid Gemini API initialization crash + VIBN_API_URL: "http://localhost:3335", // Mock backend +}; +// Start mock backend on port 3335 to catch PATCH callbacks and verify headers +let receivedHeaders = null; +let receivedBody = null; +const mockBackend = http_1.default.createServer((req, res) => { + receivedHeaders = req.headers; + let body = ""; + req.on("data", (chunk) => { + body += chunk; + }); + req.on("end", () => { + try { + receivedBody = JSON.parse(body); + } + catch { + receivedBody = body; + } + res.writeHead(200, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ ok: true })); + }); +}); +mockBackend.listen(3335, () => { + console.log("✓ Mock backend server listening on port 3335"); +}); +// Spawn the runner server +const serverProcess = (0, child_process_1.spawn)("npx", ["ts-node", "src/server.ts"], { + env, + stdio: "pipe", +}); +// Wait for server to start +serverProcess.stdout.on("data", (data) => { + const output = data.toString(); + console.log(`[Server Out] ${output.trim()}`); +}); +serverProcess.stderr.on("data", (data) => { + console.error(`[Server Err] ${data.toString()}`); +}); +// Helper function to sleep +const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms)); +async function runTests() { + // Wait 4 seconds for server to boot + await sleep(4000); + let passed = 0; + let failed = 0; + const assert = (condition, message) => { + if (condition) { + console.log(` 🟢 PASSED: ${message}`); + passed++; + } + else { + console.error(` 🔴 FAILED: ${message}`); + failed++; + } + }; + try { + // Test 1: Empty appPath should be accepted and fall back to "." + console.log("\n1️⃣ Testing appPath empty string fallback..."); + const res1 = await fetch(`${BASE_URL}/agent/execute`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + sessionId: "test-session-1", + projectId: "test-project-1", + task: "Test empty appPath", + appPath: "", // Empty string! + giteaRepo: "test-repo", + }), + }); + assert(res1.status === 202, `Should return 202, got ${res1.status}`); + const data1 = (await res1.json()); + assert(data1.sessionId === "test-session-1", `Should return correct sessionId, got ${data1.sessionId}`); + // Test 2: Missing sessionId should return 400 + console.log("\n2️⃣ Testing missing required parameters (sessionId)..."); + const res2 = await fetch(`${BASE_URL}/agent/execute`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + projectId: "test-project-1", + task: "Test missing sessionId", + appPath: ".", + }), + }); + assert(res2.status === 400, `Should return 400, got ${res2.status}`); + // Test 3: Emergency callback headers should include x-agent-runner-secret + console.log("\n3️⃣ Testing early failure callback headers..."); + // Trigger a clone failure by passing a malformed giteaRepo containing slash, + // which triggers clone instead of default workspace but will fail clone. + console.log("Triggering clone failure on mock Gitea..."); + const res3 = await fetch(`${BASE_URL}/agent/execute`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + sessionId: "test-session-3", + projectId: "test-project-3", + task: "Trigger crash", + appPath: ".", + giteaRepo: "invalid_owner/invalid_repo", + }), + }); + assert(res3.status === 202, `Should return 202 Accepted, got ${res3.status}`); + // Wait for server to process async task and fail, calling our mock backend PATCH + console.log("Waiting for runner callback on mock backend..."); + await sleep(4000); + assert(receivedHeaders !== null, "Should call mock backend PATCH endpoint"); + if (receivedHeaders) { + assert(receivedHeaders["x-agent-runner-secret"] === "test-secret-123", `Callback should include secret header 'test-secret-123', got '${receivedHeaders["x-agent-runner-secret"]}'`); + assert(receivedBody && receivedBody.status === "failed", `Callback body should have status 'failed', got '${receivedBody?.status}'`); + } + } + catch (err) { + console.error("Test execution failed with exception:", err); + } + finally { + console.log("\n🧹 Cleaning up test servers..."); + serverProcess.kill(); + mockBackend.close(); + console.log(`\n📊 Tests complete. Passed: ${passed}, Failed: ${failed}`); + process.exit(failed > 0 ? 1 : 0); + } +} +runTests(); diff --git a/vibn-agent-runner/package.json b/vibn-agent-runner/package.json index 0cacb6f0..415b585a 100644 --- a/vibn-agent-runner/package.json +++ b/vibn-agent-runner/package.json @@ -37,5 +37,6 @@ "@types/uuid": "^9.0.8", "ts-node": "^10.9.2", "typescript": "^5.4.5" - } + }, + "packageManager": "pnpm@10.33.2+sha512.a90faf6feeab71ad6c6e57f94e0fe1a12f5dcc22cd754db40ae9593eb6a3e0b6b12e3540218bb37ae083404b1f2ce6db2a4121e979829b4aff94b99f49da1cf8" } diff --git a/vibn-agent-runner/src/agent-session-runner.ts b/vibn-agent-runner/src/agent-session-runner.ts index 8dc69f1b..d25e762b 100644 --- a/vibn-agent-runner/src/agent-session-runner.ts +++ b/vibn-agent-runner/src/agent-session-runner.ts @@ -13,7 +13,85 @@ import { AgentConfig } from "./agents"; import { executeTool, ToolContext } from "./tools"; import { resolvePrompt } from "./prompts/loader"; -const MAX_TURNS = 45; +const MAX_TURNS = 80; + +function runBuildVerification( + repoRoot: string, + appPath: string, +): { success: boolean; error?: string } { + const fs = require("fs") as typeof import("fs"); + const path = require("path") as typeof import("path"); + const { execSync } = require("child_process"); + + const absoluteAppPath = path.join(repoRoot, appPath); + + // Find all directories containing package.json (excluding node_modules, .git, .next, .vibncode, dist) + const pkgDirs: string[] = []; + + function scan(dir: string) { + try { + const files = fs.readdirSync(dir); + if (files.includes("package.json")) { + pkgDirs.push(dir); + } + for (const file of files) { + if ( + file === "node_modules" || + file === ".git" || + file === ".next" || + file === ".vibncode" || + file === "dist" + ) { + continue; + } + const full = path.join(dir, file); + if (fs.statSync(full).isDirectory()) { + scan(full); + } + } + } catch {} + } + + scan(absoluteAppPath); + + if (pkgDirs.length === 0) { + return { success: true }; // No package.json anywhere, skip build check + } + + for (const dir of pkgDirs) { + const pkgJsonPath = path.join(dir, "package.json"); + try { + const pkg = JSON.parse(fs.readFileSync(pkgJsonPath, "utf8")); + // Skip if there's no build script or if it's the root container which is just a workspace wrapper + if (!pkg.scripts || !pkg.scripts.build || pkg.name === "workspace") { + continue; + } + + console.log( + `[Ralph Loop] Running automatic build verification: npm run build inside ${dir}...`, + ); + execSync("npm run build", { + cwd: dir, + stdio: "pipe", + timeout: 60000, + }); + } catch (err: any) { + const stderr = err.stderr + ? err.stderr.toString() + : err.message || String(err); + console.warn( + `[Ralph Loop] Build verification failed inside ${dir}:`, + stderr, + ); + return { + success: false, + error: `Build failed in directory "${path.relative(repoRoot, dir)}":\n${stderr.slice(-3000)}`, + }; + } + } + + return { success: true }; +} export interface OutputLine { ts: string; @@ -212,81 +290,261 @@ async function autoCommitAndDeploy( // ── Main streaming execution loop ───────────────────────────────────────────── -export async function runSessionAgent( - config: AgentConfig, - task: string, - ctx: ToolContext, - opts: SessionRunOptions, +interface TaskItem { + text: string; + filePath: string; + lineIndex: number; + isChecked: boolean; + fileName: string; +} + +function findTasksDir(root: string): string | null { + const fs = require("fs") as typeof import("fs"); + const path = require("path") as typeof import("path"); + + // 1. Check if root/.vibncode/tasks exists directly + const direct = path.join(root, ".vibncode", "tasks"); + if (fs.existsSync(direct)) return direct; + + // 2. Recursively scan subdirectories (excluding node_modules, .git, etc.) + try { + const files = fs.readdirSync(root); + for (const file of files) { + if ( + file === "node_modules" || + file === ".git" || + file === ".next" || + file === "dist" + ) { + continue; + } + const full = path.join(root, file); + if (fs.statSync(full).isDirectory()) { + const found = findTasksDir(full); + if (found) return found; + } + } + } catch {} + return null; +} + +function parseTaskItems(repoRoot: string): TaskItem[] { + const fs = require("fs") as typeof import("fs"); + const path = require("path") as typeof import("path"); + const tasksDir = findTasksDir(repoRoot); + console.log( + `[Orchestrator] repoRoot: "${repoRoot}", resolved tasksDir: "${tasksDir}"`, + ); + if (!tasksDir) return []; + + const items: TaskItem[] = []; + try { + const files = fs + .readdirSync(tasksDir) + .filter((f: string) => f.endsWith(".md")); + console.log(`[Orchestrator] Found task files:`, files); + files.sort(); + + for (const file of files) { + const filePath = path.join(tasksDir, file); + const content = fs.readFileSync(filePath, "utf8"); + console.log( + `[Orchestrator] Reading ${file} (length: ${content.length} bytes). Head:\n${content.slice(0, 500)}`, + ); + + const lines = content.split("\n"); + lines.forEach((line: string, lineIndex: number) => { + const match = line.match(/^(\s*)(?:-\s*)?\[([ xX])\]\s+(.+)$/); + if (match && match[2] !== undefined && match[3] !== undefined) { + const checked = match[2].toLowerCase() === "x"; + console.log( + `[Orchestrator] Parsed line ${lineIndex + 1}: isChecked=${checked}, text="${match[3].trim()}"`, + ); + items.push({ + text: match[3].trim(), + filePath, + lineIndex, + isChecked: checked, + fileName: file, + }); + } + }); + } + } catch (err) { + console.error("[Orchestrator] Error parsing task items:", err); + } + return items; +} + +function toggleTaskOnDisk(task: TaskItem): void { + const fs = require("fs") as typeof import("fs"); + const content = fs.readFileSync(task.filePath, "utf8"); + const lines = content.split("\n"); + const line = lines[task.lineIndex]; + if (line) { + const match = line.match(/^(\s*)(?:-\s*)?\[([ xX])\]\s+(.+)$/); + if (match && match[1] !== undefined && match[3] !== undefined) { + const indent = match[1] || ""; + const hasDash = line.includes("-"); + const prefix = hasDash ? `${indent}- ` : indent; + lines[task.lineIndex] = `${prefix}[x] ${match[3]}`; + fs.writeFileSync(task.filePath, lines.join("\n"), "utf8"); + } + } +} + +async function generateBacklogFromPrompt( + taskPrompt: string, + repoRoot: string, ): Promise { - const systemPrompt = resolvePrompt(config.promptId); + const fs = require("fs") as typeof import("fs"); + const path = require("path") as typeof import("path"); + const tasksDir = path.join(repoRoot, ".vibncode", "tasks"); + fs.mkdirSync(tasksDir, { recursive: true }); - const emit = async (line: OutputLine) => { - console.log(`[session ${opts.sessionId}] ${line.type}: ${line.text}`); - await patchSession(opts, { outputLine: line }); - }; + const prompt = `You are an elite Software Engineering Orchestrator. +Your goal is to break down the user's high-level objective into a highly detailed, sequential checklist of concrete, atomic, self-contained implementation tasks. - await emit({ - ts: now(), - type: "info", - text: `Agent starting working in ${opts.appPath}`, +High-Level Objective: +"${taskPrompt}" + +Please output a standard Markdown file containing: +1. A brief 1-sentence overview. +2. A list of tasks, where each task MUST be formatted as a standard Markdown checkbox starting with "- [ ] ": +- [ ] Implement database schema changes for ... +- [ ] Add endpoint handler for ... +- [ ] Write tests ... + +Be extremely thorough and break the objective down into small, digestible units of work (e.g. 5-15 tasks). +Do NOT include any extra conversational text or explanations. Just output the clean markdown.`; + + const resp = await callVibnChat({ + systemPrompt: + "You are a precise technical orchestrator who only outputs markdown checklist files.", + messages: [{ role: "user", content: prompt }], + temperature: 0.1, }); - // Scope the system prompt to the specific app within the monorepo + const content = resp.text || `# Delegated Backlog\n\n- [ ] ${taskPrompt}`; + const backlogPath = path.join(tasksDir, "00-delegated-backlog.md"); + fs.writeFileSync(backlogPath, content, "utf8"); +} + +function commitTaskProgress(task: TaskItem, repoRoot: string) { + const { execSync } = require("child_process"); + try { + console.log(`[Orchestrator] Committing task progress: ${task.text}`); + execSync("git add -A", { cwd: repoRoot, stdio: "pipe" }); + const msg = `feat(tasks): [Completed] ${task.text}`; + execSync(`git commit -m "${msg.replace(/"/g, '\\"')}"`, { + cwd: repoRoot, + stdio: "pipe", + }); + } catch (err) { + // If nothing to commit, that's fine + } +} + +async function runSingleSubTask( + task: TaskItem, + config: AgentConfig, + ctx: ToolContext, + opts: SessionRunOptions, + emit: (line: OutputLine) => Promise, +): Promise { + const path = require("path") as typeof import("path"); + const fs = require("fs") as typeof import("fs"); const basePrompt = resolvePrompt(config.promptId); + + let devServersContext = "No active dev servers running on port 3000."; + try { + const listResult = await executeTool( + "dev_server_list", + { projectId: opts.projectId }, + ctx, + ); + if (Array.isArray(listResult) && listResult.length > 0) { + devServersContext = listResult + .map( + (s: any) => + `- Port ${s.port} (${s.state}): ${s.command} -> Preview URL: ${s.previewUrl}`, + ) + .join("\n"); + } + } catch {} + const scopedPrompt = `${basePrompt} -\n\n## Active context -You are working inside the monorepo directory: ${opts.appPath} -All file paths you use should be relative to this directory unless otherwise specified. -When running commands, always cd into ${opts.appPath} first unless already there. -Do NOT run git commit or git push — the platform handles committing after you finish. + +## ACTIVE DEVELOPER WORKSPACE STATE +Active Dev Servers: +${devServersContext} + +## ACTIVE SUBTASK OBJECTIVE +You are working on a single task in your task queue: +TASK: "${task.text}" +File: "${path.relative(opts.repoRoot ?? ctx.workspaceRoot, task.filePath)}" (line ${task.lineIndex + 1}) + +## CRITICAL EXECUTION CONSTRAINTS +1. 🎯 STAY HIGHLY FOCUSED: Your only objective is to implement this specific task. Do NOT wander, do NOT explore other unrelated parts of the codebase, and do NOT attempt unrelated tasks. +2. 🚫 NO EXPLORATION COMMANDS: DO NOT execute generic orientation/search commands like 'ls', 'find', 'pwd', 'grep', 'git diff', 'git status'. You already know the repository structure. Go straight to editing or reading the targeted files. +3. 🛠️ TOGGLE CHECKBOX: Once your implementation is done, you MUST read and rewrite "${path.relative(opts.repoRoot ?? ctx.workspaceRoot, task.filePath)}" at line ${task.lineIndex + 1} to change "- [ ]" to "- [x]". +4. 🔴 NO COMMITS: Do NOT run 'git commit' or 'git push'. The platform handles committing automatically after you finish. +5. 🟢 COMPLETED SIGNAL: When you are finished, verify the build compiles clean using the Ralph Loop checks. If successful, stop executing tools and end your response. `; - const history: ChatMessage[] = [{ role: "user", content: task }]; + const userPrompt = `Please implement the following task: "${task.text}" and then check it off in the task list.`; + const history: ChatMessage[] = [{ role: "user", content: userPrompt }]; - let turn = 0; + let subTurn = 0; + const SUB_MAX_TURNS = 40; let toolCallsSinceText = 0; let roundsSinceText = 0; const toolFingerprints: string[] = []; - let loopBreakReason: string | null = null; let ralphIteration = 0; function fingerprintToolCall(tc: any) { - if (tc.name === "shell_exec") { - const cmd = String(tc.args?.command ?? "").trim(); - const verb = - cmd - .split("&&") - .map((s) => s.trim()) - .find((s) => !s.startsWith("cd ")) - ?.split(/\s+/)[0] ?? "shell"; - return `shell_exec:${verb}`; + const name = tc.name; + const args = tc.args ?? {}; + + if (name === "shell_exec") { + const cmd = String(args.command ?? "").trim(); + const firstWord = cmd.split(/\s+/)[0] ?? "shell"; + return `shell_exec:${firstWord}`; } - if ( - tc.name === "fs_write" || - tc.name === "fs_edit" || - tc.name === "fs_read" - ) { - return `${tc.name}:${tc.args?.path}`; + + // Determine target based on most common descriptive parameter keys + const target = + args.path ?? + args.pattern ?? + args.command ?? + args.commandId ?? + args.appUuid ?? + args.uuid ?? + ""; + if (target) { + return `${name}:${target}`; } - return `${tc.name}:${Object.values(tc.args ?? {})[0]}`; + + // Filter out common metadata like projectId, and use first real argument + const keys = Object.keys(args).filter((k) => k !== "projectId"); + if (keys.length > 0) { + return `${name}:${args[keys[0]]}`; + } + + return `${name}:default`; } - while (turn < MAX_TURNS) { + while (subTurn < SUB_MAX_TURNS) { if (opts.isStopped()) { await emit({ ts: now(), type: "info", text: "Stopped by user." }); - await patchSession(opts, { status: "stopped" }); - return; + return false; } - turn++; + subTurn++; - const isSilent = roundsSinceText >= 15 || toolCallsSinceText >= 20; + const isSilent = roundsSinceText >= 8 || toolCallsSinceText >= 12; const extraSystem = isSilent - ? "\n\n[STATUS NUDGE] You have run " + - `${toolCallsSinceText} tool call(s) over ${roundsSinceText} round(s) ` + - "without sending the user any text. Before any more tool calls, " + - "send ONE short sentence describing what you are currently working " + - "on and why." + ? "\n\n[STATUS NUDGE] Focus on completing the current task. Do not make any more tool calls without a short sentence explaining what you are working on." : ""; let resp: any; @@ -295,23 +553,25 @@ Do NOT run git commit or git push — the platform handles committing after you systemPrompt: scopedPrompt + extraSystem, messages: history as any[], tools: config.tools, - temperature: 0.2, + temperature: 0.1, }); } catch (err) { const msg = err instanceof Error ? err.message : String(err); - await emit({ ts: now(), type: "error", text: `LLM error: ${msg}` }); - await patchSession(opts, { status: "failed", error: msg }); - return; + await emit({ + ts: now(), + type: "error", + text: `LLM sub-session error: ${msg}`, + }); + return false; } if (resp.error) { await emit({ ts: now(), type: "error", - text: `LLM error: ${resp.error}`, + text: `LLM sub-session error: ${resp.error}`, }); - await patchSession(opts, { status: "failed", error: resp.error }); - return; + return false; } if (resp.text) { @@ -323,50 +583,84 @@ Do NOT run git commit or git push — the platform handles committing after you toolCallsSinceText += resp.toolCalls.length; } - // ── Self-Correcting Ralph Loop Autonomy ── if (!resp.toolCalls.length) { - const text = resp.text || ""; - const incompleteSignals = [ - "I need to", - "Let me", - "Next, I should", - "I should also", - "Additionally", - "I will now", - "I need first to", - ]; - const needsMoreWork = incompleteSignals.some((signal) => - text.includes(signal), - ); - - if (needsMoreWork && ralphIteration < 3) { - ralphIteration++; + if (opts.repoRoot && ralphIteration < 3) { await emit({ ts: now(), type: "info", - text: `🔄 [Ralph Loop] Self-reflection triggered (iteration ${ralphIteration}/3). Resuming execution...`, + text: "🔍 [Ralph Loop] Verifying build for this task...", }); - history.push({ - role: "user", - content: - "Please continue implementing the outstanding next steps to complete the task.", - }); - continue; + + const verification = runBuildVerification(opts.repoRoot, opts.appPath); + if (!verification.success) { + ralphIteration++; + await emit({ + ts: now(), + type: "error", + text: `❌ [Ralph Loop] Build failed (iteration ${ralphIteration}/3) for this task.`, + }); + + history.push({ + role: "user", + content: `Your previous edits completed, but the project's build check failed with compilation errors. + +========================================= +🚨 SURGICAL HEALING PROTOCOL ACTIVE 🚨 +========================================= +The project's compilation/build has failed. You are currently in an autonomous, auto-correcting healing loop and must fix this compilation error immediately. + +To prevent cognitive loop spirals and command limits, you MUST follow this strict, non-negotiable troubleshooting protocol: + +1. 🚫 STRICTLY BLOCK EXPLORATION: DO NOT execute general directory exploration or orientation commands such as 'ls', 'find', 'pwd', 'grep', 'git status', 'git diff', or other search commands. You do not need to look around. +2. 🎯 SURGICAL TARGETING: Scan the compiler error logs below to locate the EXACT filename, line number, and column where the compilation failed. +3. 🛠️ IMMEDIATE CORRECTION: Read that file immediately using your specific file-reading tool (using precise start/end lines if it is large) and apply a targeted, surgical edit to correct the exact syntax or type error. Do not write a placeholder or partial fix. + +Here are the precise compilation errors from the compiler: +\`\`\`text +${verification.error} +\`\`\` + +Implement the exact fix directly in the code now.`, + }); + continue; + } else { + await emit({ + ts: now(), + type: "info", + text: "🟢 [Ralph Loop] Build passed successfully! 0 errors.", + }); + } } - // If fully complete, trigger auto-commit and finish - if (opts.autoApprove) { - await autoCommitAndDeploy(opts, task, emit); - } else { - await patchSession(opts, { status: "completed" }); + let diskChecked = false; + try { + const fileContent = fs.readFileSync(task.filePath, "utf8"); + const lines = fileContent.split("\n"); + const line = lines[task.lineIndex]; + if (line) { + const match = line.match(/^(\s*)-\s*\[([ xX])\]\s+(.+)$/); + if (match && match[2].toLowerCase() === "x") { + diskChecked = true; + } + } + } catch {} + + if (!diskChecked) { + await emit({ + ts: now(), + type: "info", + text: `✍️ [Orchestrator] Task implementation completed. Automatically checking off task on disk.`, + }); + toggleTaskOnDisk(task); } - return; + + return true; } for (const tc of resp.toolCalls) { toolFingerprints.push(fingerprintToolCall(tc)); } - const window = toolFingerprints.slice(-10); + const window = toolFingerprints.slice(-12); const counts = new Map(); for (const fp of window) counts.set(fp, (counts.get(fp) ?? 0) + 1); @@ -375,13 +669,17 @@ Do NOT run git commit or git push — the platform handles committing after you for (const [fp, n] of counts.entries()) { if (n > maxRepeats) { maxRepeats = n; - repeatedCmd = fp.split("|")[0]; + repeatedCmd = fp; } } if (maxRepeats >= 6) { - loopBreakReason = `Repeated ${repeatedCmd} ${maxRepeats}× in last 10 calls`; - break; + await emit({ + ts: now(), + type: "error", + text: `Loop detected in subtask execution (repeated "${repeatedCmd}" ${maxRepeats}x in last 12 calls), breaking loop.`, + }); + return false; } history.push({ @@ -390,140 +688,142 @@ Do NOT run git commit or git push — the platform handles committing after you toolCalls: resp.toolCalls, }); - // ── 4-Level Smart Concurrency Tool Grouping ── - const parallelReads = resp.toolCalls.filter((tc: any) => - [ - "fs_read", - "fs_tree", - "fs_list", - "fs_glob", - "fs_grep", - "projects_list", - "project_recent_errors", - ].includes(tc.name), - ); - const sequentialWrites = resp.toolCalls.filter((tc: any) => - [ - "fs_write", - "fs_edit", - "create_file", - "write_file", - "replace_in_file", - "apps_create", - "databases_create", - ].includes(tc.name), - ); - const otherTools = resp.toolCalls.filter( - (tc: any) => - !parallelReads.includes(tc) && !sequentialWrites.includes(tc), - ); + for (const tc of resp.toolCalls) { + if (opts.isStopped()) return false; - // Stage 1: Parallel Reads - if (parallelReads.length > 0) { await emit({ ts: now(), type: "step", - text: `Executing ${parallelReads.length} read operations concurrently...`, + text: `Running ${tc.name}...`, }); - await Promise.all( - parallelReads.map(async (tc: any) => { - let result; - try { - result = await executeTool(tc.name, tc.args, ctx); - } catch (err) { - result = { - error: err instanceof Error ? err.message : String(err), - }; - } - const resultStr = - typeof result === "string" - ? result - : JSON.stringify(result, null, 2); - history.push({ - role: "tool", - content: resultStr, - toolCallId: tc.id, - toolName: tc.name, - }); - }), - ); - } - - // Stage 2: Parallelizable Other Tools - if (otherTools.length > 0) { - await Promise.all( - otherTools.map(async (tc: any) => { - await emit({ - ts: now(), - type: "step", - text: `Running ${tc.name}...`, - }); - let result; - try { - result = await executeTool(tc.name, tc.args, ctx); - } catch (err) { - result = { - error: err instanceof Error ? err.message : String(err), - }; - } - const resultStr = - typeof result === "string" - ? result - : JSON.stringify(result, null, 2); - history.push({ - role: "tool", - content: resultStr, - toolCallId: tc.id, - toolName: tc.name, - }); - }), - ); - } - - // Stage 3: Sequential User-Safe Writes/Edits - if (sequentialWrites.length > 0) { - for (const tc of sequentialWrites) { - await emit({ - ts: now(), - type: "step", - text: `Writing modifications: ${tc.name}...`, - }); - let result; - try { - result = await executeTool(tc.name, tc.args, ctx); - const changedFile = extractChangedFile( - tc.name, - tc.args, - ctx.workspaceRoot, - opts.appPath, - ); - if (changedFile) { - await patchSession(opts, { changedFile }); - } - } catch (err) { - result = { error: err instanceof Error ? err.message : String(err) }; - } - const resultStr = - typeof result === "string" ? result : JSON.stringify(result, null, 2); - history.push({ - role: "tool", - content: resultStr, - toolCallId: tc.id, - toolName: tc.name, - }); + let result: any; + try { + result = await executeTool(tc.name, tc.args, ctx); + } catch (err) { + result = { error: err instanceof Error ? err.message : String(err) }; } + + const resultStr = + typeof result === "string" ? result : JSON.stringify(result, null, 2); + history.push({ + role: "tool", + content: resultStr, + toolCallId: tc.id, + toolName: tc.name, + }); } } - if (loopBreakReason) { + await emit({ + ts: now(), + type: "error", + text: `Subtask exceeded maximum turns limit of ${SUB_MAX_TURNS}.`, + }); + return false; +} + +export async function runSessionAgent( + config: AgentConfig, + task: string, + ctx: ToolContext, + opts: SessionRunOptions, +): Promise { + const emit = async (line: OutputLine) => { + console.log(`[session ${opts.sessionId}] ${line.type}: ${line.text}`); + await patchSession(opts, { outputLine: line }); + }; + + await emit({ + ts: now(), + type: "info", + text: `Agent started offline delegation orchestrator in ${opts.appPath}`, + }); + + const repoRoot = opts.repoRoot ?? ctx.workspaceRoot; + + let tasks = parseTaskItems(repoRoot); + if (tasks.length === 0) { await emit({ ts: now(), - type: "error", - text: `Loop broken: ${loopBreakReason}`, + type: "info", + text: "🤖 [Orchestrator] No active tasks backlog found on disk. Analyzing prompt to plan atomic execution backlog...", }); - await patchSession(opts, { status: "failed", error: loopBreakReason }); + try { + await generateBacklogFromPrompt(task, repoRoot); + tasks = parseTaskItems(repoRoot); + } catch (err: any) { + await emit({ + ts: now(), + type: "error", + text: `❌ [Orchestrator] Failed to generate backlog: ${err.message || String(err)}`, + }); + await patchSession(opts, { + status: "failed", + error: "Backlog generation failed", + }); + return; + } + } + + const openTasks = tasks.filter((t) => !t.isChecked); + if (openTasks.length === 0) { + await emit({ + ts: now(), + type: "info", + text: "🟢 [Orchestrator] All tasks in the queue are already completed!", + }); + await patchSession(opts, { status: "completed" }); + return; + } + + await emit({ + ts: now(), + type: "info", + text: `🤖 [Orchestrator] Found ${openTasks.length} open tasks. Executing task-by-task Meta-Loop...`, + }); + + for (let i = 0; i < openTasks.length; i++) { + const currentTask = openTasks[i]; + await emit({ + ts: now(), + type: "info", + text: `🚀 [Orchestrator] Task ${i + 1}/${openTasks.length}: "${currentTask.text}"`, + }); + + const success = await runSingleSubTask( + currentTask, + config, + ctx, + opts, + emit, + ); + if (!success) { + await emit({ + ts: now(), + type: "error", + text: `❌ [Orchestrator] Bailed out! Task execution failed on: "${currentTask.text}".`, + }); + + await patchSession(opts, { + status: "failed", + error: `Delegation loop halted at task: "${currentTask.text}"`, + }); + return; + } + + commitTaskProgress(currentTask, repoRoot); + } + + await emit({ + ts: now(), + type: "info", + text: `🎉 [Orchestrator] All delegated tasks completed successfully with green compilation builds!`, + }); + + if (opts.autoApprove) { + await autoCommitAndDeploy(opts, task, emit); } else { - await patchSession(opts, { status: "failed", error: "Max turns reached" }); + await patchSession(opts, { status: "completed" }); } } diff --git a/vibn-agent-runner/src/llm/gemini-chat.ts b/vibn-agent-runner/src/llm/gemini-chat.ts index 2077a802..0c124a7b 100644 --- a/vibn-agent-runner/src/llm/gemini-chat.ts +++ b/vibn-agent-runner/src/llm/gemini-chat.ts @@ -121,12 +121,33 @@ export async function callGeminiChat(opts: { const fns = toGeminiFunctions(opts.tools ?? []); if (fns) config.tools = fns; + console.log("\n========================================================"); + console.log("➡️ [GEMINI API REQUEST]"); + console.log("========================================================"); + console.log( + `System Prompt: ${config.systemInstruction ? config.systemInstruction.slice(0, 1000) + "..." : "None"}`, + ); + console.log( + "Contents Payload:", + JSON.stringify(toGeminiContents(opts.messages), null, 2), + ); + console.log("========================================================\n"); + const response = await ai.models.generateContent({ model: GEMINI_MODEL, contents: toGeminiContents(opts.messages), config, }); + console.log("\n========================================================"); + console.log("⬅️ [GEMINI API RESPONSE]"); + console.log("========================================================"); + console.log( + "Raw Candidates:", + JSON.stringify(response.candidates, null, 2), + ); + console.log("========================================================\n"); + let text = ""; let thoughts = ""; const toolCalls: ToolCall[] = []; @@ -183,6 +204,18 @@ export async function* streamGeminiChat(opts: { const fns = toGeminiFunctions(opts.tools ?? []); if (fns) config.tools = fns; + console.log("\n========================================================"); + console.log("➡️ [GEMINI STREAM REQUEST]"); + console.log("========================================================"); + console.log( + `System Prompt: ${config.systemInstruction ? config.systemInstruction.slice(0, 1000) + "..." : "None"}`, + ); + console.log( + "Contents Payload:", + JSON.stringify(toGeminiContents(opts.messages), null, 2), + ); + console.log("========================================================\n"); + const streamResult = await ai.models.generateContentStream({ model: GEMINI_MODEL, contents: toGeminiContents(opts.messages), diff --git a/vibn-agent-runner/src/prompts/coder.ts b/vibn-agent-runner/src/prompts/coder.ts index 657b77c9..b711406a 100644 --- a/vibn-agent-runner/src/prompts/coder.ts +++ b/vibn-agent-runner/src/prompts/coder.ts @@ -1,18 +1,26 @@ -import { registerPrompt } from './loader'; +import { registerPrompt } from "./loader"; // Because we deleted the local tools and adopted the full VIBN_TOOL_DEFINITIONS schema, // the runner agent now has the exact same capabilities as the frontend UI agent! // It uses fs_*, shell_exec, dev_server_*, apps_*, and ship. -registerPrompt('coder', ` +registerPrompt( + "coder", + ` You are Vibn AI — the technical co-founder of every Vibn user. You are currently running headlessly in the background. The user is offline or waiting for you to finish. Your job is to read the task assigned to you, implement it, test it, and ship it to Coolify. Do NOT ask the user questions. If you get stuck, log the error and stop. +# Specifications & Product Requirements Docs (.vibncode/specs/) + +The project's technical specifications, data models, UX principles, and security requirements live in \`.vibncode/specs/\` as plain, Git-tracked Markdown files. This is your single source of truth: +- 📖 **PRIOR REFERENCE IS MANDATORY:** BEFORE starting any task or writing any code, you MUST use your file-reading tools to locate and read the matching specification file (for example, read \`.vibncode/specs/05-data-model.md\` when setting up database schemas or Prisma models, or \`.vibncode/specs/04-compliance-security.md\` when writing authorization/password hashing logic). Adhere exactly to the planned specifications to avoid drift. +- ✍️ **PROACTIVE DOCUMENTATION:** Keep these spec sheets updated. If you make an architectural decision or refine a schema, proactively update the matching markdown file in \`.vibncode/specs/\`. + # Mode: Action -Since you are running autonomously, you must take action immediately. +Since you are running autonomously, you must take action immediately. # What "done" looks like @@ -24,17 +32,17 @@ A turn ends when you have fully completed the task AND shipped the code. # Hard rules — non-negotiable **Honesty about tool results:** -- **Cite the tool result, don't claim from memory.** -- **Trust the \`ok\` field.** Every tool result carries \`ok: true | false\`. If \`ok\` is false (or \`exitCode\` is non-zero, or \`healthCheck.status\` is >= 400), the operation FAILED. -- **\`fs_write\` and \`fs_edit\` results carry \`sha256\` and \`bytes\` on success.** -- **\`dev_server_start\` results carry \`healthCheck\` on success.** Before saying "the preview is ready," confirm \`healthCheck.status === 200\`. +- **Cite the tool result, don't claim from memory.** +- **Trust the \`ok\` field.** Every tool result carries \`ok: true | false\`. If \`ok\` is false (or \`exitCode\` is non-zero, or \`healthCheck.status\` is >= 400), the operation FAILED. +- **\`fs_write\` and \`fs_edit\` results carry \`sha256\` and \`bytes\` on success.** +- **\`dev_server_start\` results carry \`healthCheck\` on success.** Before saying "the preview is ready," confirm \`healthCheck.status === 200\`. **Anchoring and scope:** -- **Anchor on current state before troubleshooting.** -- **Always pass \`projectId\`** to \`apps_create\` / \`databases_create\`. -- **Always \`apps_list { projectId }\` BEFORE \`apps_create\`** for a sanity check, and **always \`apps_templates_search\` BEFORE \`apps_create\`** for known third-party apps. +- **Anchor on current state before troubleshooting.** +- **Always pass \`projectId\`** to \`apps_create\` / \`databases_create\`. +- **Always \`apps_list { projectId }\` BEFORE \`apps_create\`** for a sanity check, and **always \`apps_templates_search\` BEFORE \`apps_create\`** for known third-party apps. - **Trust idempotency.** When \`apps_create\` / \`databases_create\` returns \`alreadyExisted: true\`, your job is done — use the returned uuid and move on. -- **Never delete-and-recreate to escape an error.** "Container name already in use" → \`apps_unstick { uuid }\` → \`apps_deploy { uuid }\`. +- **Never delete-and-recreate to escape an error.** "Container name already in use" → \`apps_unstick { uuid }\` → \`apps_deploy { uuid }\`. **Stopping conditions:** - **If a deploy or tool call fails twice with the same error, STOP.** @@ -61,15 +69,15 @@ Each project has a persistent \`vibn-dev\` container. Edit files via \`fs_*\` an **Path convention for fs_* tools:** Pass paths relative to the project root — \`src/app/page.tsx\`, NOT \`/workspace/slug/src/app/page.tsx\` and NOT \`slug/src/app/page.tsx\`. **Dev servers** (preview URL via \`*.preview.vibnai.com\` wildcard): -- \`dev_server_start { projectId, command, port: 3000 }\` is a **one-shot** call. It kills old processes on the port, checks the port is free, sets HOST=0.0.0.0 + PORT, launches your command, and returns a \`previewUrl\` plus a \`healthCheck\` block. -- **Port \`3000\` is reserved for the primary user-facing UI.** +- \`dev_server_start { projectId, command, port: 3000 }\` is a **one-shot** call. It kills old processes on the port, checks the port is free, sets HOST=0.0.0.0 + PORT, launches your command, and returns a \`previewUrl\` plus a \`healthCheck\` block. +- **Port \`3000\` is reserved for the primary user-facing UI.** - \`dev_server_stop\` / \`dev_server_list\` / \`dev_server_logs\` — use only AFTER a failed start, to diagnose the error the function returned. Never on success. **Verify the page actually renders:** - After \`dev_server_start\` returns a \`previewUrl\` AND \`healthCheck.status === 200\`, call \`browser_console { url: previewUrl }\` to capture frontend console errors. -- If \`browser_console\` returns errors, fix them with \`fs_edit\` before declaring done. A green \`healthCheck\` plus a clean console is the real "done" signal for UI work. +- **CRITICAL:** Next.js HMR overlay syntax errors do NOT crash the \`dev_server_start\` command. Even if \`dev_server_start\` returns \`Status: success\`, you MUST call \`browser_console\` to verify that there are no red syntax error overlays on the screen. If \`browser_console\` returns errors, fix them with \`fs_edit\` before declaring done. A green \`healthCheck\` plus a clean console is the real "done" signal for UI work. -**Visual QA:** \`request_visual_qa { targetPath }\` critiques a UI file against a 5-dim design rubric. **Call this whenever you modify visual UI code** before returning the \`previewUrl\`. +**Visual QA:** \`request_visual_qa { targetPath }\` critiques a UI file against a 5-dim design rubric. **Call this whenever you modify visual UI code** before returning the \`previewUrl\`. **Sentry is auto-provisioned per project.** \`NEXT_PUBLIC_SENTRY_DSN\` and \`SENTRY_AUTH_TOKEN\` are injected into the Coolify app env automatically by \`apps_create\`. @@ -77,11 +85,12 @@ Each project has a persistent \`vibn-dev\` container. Edit files via \`fs_*\` an For editing files in existing repos, ALWAYS use \`fs_*\` in the dev container — \`ship\` commits and pushes. ## Troubleshooting -- **"exited (1)" / deploy stuck:** \`apps_logs { uuid }\` + \`apps_containers_ps { uuid }\`. +- **"exited (1)" / deploy stuck:** \`apps_logs { uuid }\` + \`apps_containers_ps { uuid }\`. - **502 / "no available server":** \`apps_get\`; if \`fqdn\` is empty, attach a domain. - **"tenant" / "does not belong to":** uuid not in this workspace. Re-list with \`apps_list\`. - **Compose stack weird:** \`apps_repair { uuid }\` re-applies Traefik labels + port forwarding. - **Nuke and redeploy:** \`apps_delete { uuid, confirm }\` {{skills}} -`.trim()); +`.trim(), +); diff --git a/vibn-agent-runner/src/server.ts b/vibn-agent-runner/src/server.ts index 6b504c5f..51d564cc 100644 --- a/vibn-agent-runner/src/server.ts +++ b/vibn-agent-runner/src/server.ts @@ -1,13 +1,12 @@ -import express, { Request, Response, NextFunction } from 'express'; -import cors from 'cors'; -import * as fs from 'fs'; -import * as path from 'path'; -import * as crypto from 'crypto'; -import { execSync } from 'child_process'; -import { runSessionAgent } from './agent-session-runner'; -import { AGENTS } from './agents'; -import { ToolContext } from './tools'; - +import express, { Request, Response, NextFunction } from "express"; +import cors from "cors"; +import * as fs from "fs"; +import * as path from "path"; +import * as crypto from "crypto"; +import { execSync } from "child_process"; +import { runSessionAgent } from "./agent-session-runner"; +import { AGENTS } from "./agents"; +import { ToolContext } from "./tools"; const app = express(); app.use(cors()); @@ -15,7 +14,7 @@ app.use(cors()); const startTime = new Date(); // Raw body capture for webhook HMAC — must come before express.json() -app.use('/webhook/gitea', express.raw({ type: '*/*' })); +app.use("/webhook/gitea", express.raw({ type: "*/*" })); app.use(express.json()); @@ -25,52 +24,169 @@ const PORT = process.env.PORT || 3333; // Build ToolContext from environment variables // --------------------------------------------------------------------------- -function ensureWorkspace(repo?: string): string { - const base = process.env.WORKSPACE_BASE || '/workspaces'; - if (!repo) { - const dir = path.join(base, 'default'); - fs.mkdirSync(dir, { recursive: true }); - return dir; - } - const dir = path.join(base, repo.replace('/', '_')); - const gitea = { - apiUrl: process.env.GITEA_API_URL || '', - apiToken: process.env.GITEA_API_TOKEN || '', - username: process.env.GITEA_USERNAME || '' - }; - if (!fs.existsSync(path.join(dir, '.git'))) { - fs.mkdirSync(dir, { recursive: true }); - const authedUrl = `${gitea.apiUrl}/${repo}.git` - .replace('https://', `https://${gitea.username}:${gitea.apiToken}@`); - try { - execSync(`git clone "${authedUrl}" "${dir}"`, { stdio: 'pipe' }); - } catch { - // Repo may not exist yet — just init - execSync(`git init`, { cwd: dir, stdio: 'pipe' }); - execSync(`git remote add origin "${authedUrl}"`, { cwd: dir, stdio: 'pipe' }); - } - } +function ensureWorkspace(repo?: string, sessionId?: string): string { + const base = process.env.WORKSPACE_BASE || "/workspaces"; + if (!repo) { + const dir = path.join(base, "default"); + fs.mkdirSync(dir, { recursive: true }); return dir; + } + const mainRepoDir = path.join(base, repo.replace("/", "_")); + const gitea = { + apiUrl: process.env.GITEA_API_URL || "", + apiToken: process.env.GITEA_API_TOKEN || "", + username: process.env.GITEA_USERNAME || "", + }; + + // 1. Ensure main repo clone exists + if (!fs.existsSync(path.join(mainRepoDir, ".git"))) { + fs.mkdirSync(mainRepoDir, { recursive: true }); + const authedUrl = `${gitea.apiUrl}/${repo}.git`.replace( + "https://", + `https://${gitea.username}:${gitea.apiToken}@`, + ); + try { + execSync(`git clone "${authedUrl}" "${mainRepoDir}"`, { stdio: "pipe" }); + } catch { + // Repo may not exist yet — just init + execSync(`git init`, { cwd: mainRepoDir, stdio: "pipe" }); + execSync(`git remote add origin "${authedUrl}"`, { + cwd: mainRepoDir, + stdio: "pipe", + }); + } + } + + // 2. If no sessionId, fall back to main repo clone directly + if (!sessionId) { + return mainRepoDir; + } + + // 3. Isolated Worktree Directory per task session + const taskWorktreePath = path.join(base, "tasks", sessionId); + fs.mkdirSync(path.join(base, "tasks"), { recursive: true }); + + // 4. Create isolated worktree if not yet active + if (!fs.existsSync(path.join(taskWorktreePath, ".git"))) { + // Clean up any stale directory from previous failed runs before adding worktree + if (fs.existsSync(taskWorktreePath)) { + try { + fs.rmSync(taskWorktreePath, { recursive: true, force: true }); + } catch {} + } + + try { + console.log( + `[worktree] Adding isolated git worktree for session ${sessionId} at ${taskWorktreePath}...`, + ); + + // Check if the branch task-sessionId already exists in the main repository + let branchExists = false; + try { + const branches = execSync(`git branch --list "task-${sessionId}"`, { + cwd: mainRepoDir, + }).toString(); + branchExists = branches.trim().length > 0; + } catch { + branchExists = false; + } + + if (branchExists) { + // Checkout the existing branch into the new worktree path + execSync( + `git worktree add -f "${taskWorktreePath}" "task-${sessionId}"`, + { cwd: mainRepoDir, stdio: "pipe" }, + ); + } else { + // Create and checkout a new isolated branch + execSync( + `git worktree add -f -b "task-${sessionId}" "${taskWorktreePath}"`, + { cwd: mainRepoDir, stdio: "pipe" }, + ); + } + } catch (e: any) { + console.error( + "[worktree] Failed to add git worktree, falling back to main clone:", + e.message || String(e), + ); + return mainRepoDir; + } + } + + // 5. Sync active workspace edits from mainRepoDir (containing Monaco edits) to taskWorktreePath + if (taskWorktreePath !== mainRepoDir) { + try { + console.log( + `[worktree] Syncing active workspace edits from ${mainRepoDir} to ${taskWorktreePath}...`, + ); + // Use rsync to copy active files while preserving structure and deleting files deleted in mainRepoDir + // Exclude node_modules, .git, .next, .vibncode/settings.json, etc. + execSync( + `rsync -ar --delete --exclude="node_modules" --exclude=".git" --exclude=".next" --exclude=".vibncode/settings.json" "${mainRepoDir}/" "${taskWorktreePath}/"`, + { stdio: "pipe" }, + ); + } catch (syncErr: any) { + console.warn( + "[worktree] rsync failed, falling back to cp:", + syncErr.message || syncErr, + ); + } + } + + return taskWorktreePath; } -function buildContext(repo?: string): ToolContext { - const workspaceRoot = ensureWorkspace(repo); +function buildContext(repo?: string, sessionId?: string): ToolContext { + const workspaceRoot = ensureWorkspace(repo, sessionId); - return { - workspaceRoot, - gitea: { - apiUrl: process.env.GITEA_API_URL || '', - apiToken: process.env.GITEA_API_TOKEN || '', - username: process.env.GITEA_USERNAME || '' - }, - coolify: { - apiUrl: process.env.COOLIFY_API_URL || '', - apiToken: process.env.COOLIFY_API_TOKEN || '' - }, - mcpToken: '', - vibnApiUrl: 'http://localhost:3000', - memoryUpdates: [] - }; + return { + workspaceRoot, + gitea: { + apiUrl: process.env.GITEA_API_URL || "", + apiToken: process.env.GITEA_API_TOKEN || "", + username: process.env.GITEA_USERNAME || "", + }, + coolify: { + apiUrl: process.env.COOLIFY_API_URL || "", + apiToken: process.env.COOLIFY_API_TOKEN || "", + }, + mcpToken: "", + vibnApiUrl: process.env.VIBN_API_URL ?? "https://vibnai.com", + memoryUpdates: [], + }; +} + +function cleanupWorkspace(repo: string, sessionId: string) { + const base = process.env.WORKSPACE_BASE || "/workspaces"; + const mainRepoDir = path.join(base, repo.replace("/", "_")); + const taskWorktreePath = path.join(base, "tasks", sessionId); + + if (fs.existsSync(taskWorktreePath)) { + try { + console.log( + `[worktree] Pruning and removing git worktree for session ${sessionId}...`, + ); + // 1. Tell git to remove the worktree references + execSync(`git worktree remove --force "${taskWorktreePath}"`, { + cwd: mainRepoDir, + stdio: "pipe", + }); + // 2. Delete the temporary branch from the main repository index + execSync(`git branch -D "task-${sessionId}"`, { + cwd: mainRepoDir, + stdio: "pipe", + }); + // 3. Force clean directory + if (fs.existsSync(taskWorktreePath)) { + fs.rmSync(taskWorktreePath, { recursive: true, force: true }); + } + } catch (e: any) { + console.warn( + `[worktree] Non-fatal cleanup error for session ${sessionId}:`, + e.message || String(e), + ); + } + } } // --------------------------------------------------------------------------- @@ -78,216 +194,281 @@ function buildContext(repo?: string): ToolContext { // --------------------------------------------------------------------------- // Health check -app.get('/health', (_req: Request, res: Response) => { - res.json({ status: 'ok', timestamp: new Date().toISOString() }); +app.get("/health", (_req: Request, res: Response) => { + res.json({ status: "ok", timestamp: new Date().toISOString() }); }); // --------------------------------------------------------------------------- // GitHub mirror — clone a public GitHub repo and push to Gitea as-is // --------------------------------------------------------------------------- -app.post('/api/mirror', async (req: Request, res: Response) => { - const { github_url, gitea_repo, project_name, github_token } = req.body as { - github_url?: string; - gitea_repo?: string; // e.g. "mark/opsos" - project_name?: string; - github_token?: string; // PAT for private repos - }; +app.post("/api/mirror", async (req: Request, res: Response) => { + const { github_url, gitea_repo, project_name, github_token } = req.body as { + github_url?: string; + gitea_repo?: string; // e.g. "mark/opsos" + project_name?: string; + github_token?: string; // PAT for private repos + }; - if (!github_url || !gitea_repo) { - res.status(400).json({ error: '"github_url" and "gitea_repo" are required' }); - return; + if (!github_url || !gitea_repo) { + res + .status(400) + .json({ error: '"github_url" and "gitea_repo" are required' }); + return; + } + + const { execSync } = await import("child_process"); + const fs = await import("fs"); + const path = await import("path"); + const os = await import("os"); + + const mirrorId = `mirror_${Date.now()}`; + const tmpDir = path.join(os.tmpdir(), mirrorId); + + const gitea = { + apiUrl: process.env.GITEA_API_URL || "", + apiToken: process.env.GITEA_API_TOKEN || "", + username: process.env.GITEA_USERNAME || "", + }; + + try { + // Build authenticated Gitea push URL + // GITEA_API_URL is like https://git.vibnai.com — strip /api/v1 if present + const giteaBase = gitea.apiUrl.replace(/\/api\/v1\/?$/, ""); + const authedPushUrl = `${giteaBase}/${gitea_repo}.git`.replace( + "https://", + `https://${gitea.username}:${gitea.apiToken}@`, + ); + + console.log(`[mirror] Cloning ${github_url} → ${tmpDir}`); + fs.mkdirSync(tmpDir, { recursive: true }); + + // Build authenticated clone URL for private repos + let cloneUrl = github_url; + if (github_token) { + cloneUrl = github_url.replace("https://", `https://${github_token}@`); } - const { execSync } = await import('child_process'); - const fs = await import('fs'); - const path = await import('path'); - const os = await import('os'); + // Mirror-clone the GitHub repo (preserves all branches + tags) + execSync(`git clone --mirror "${cloneUrl}" "${tmpDir}/.git"`, { + stdio: "pipe", + timeout: 120_000, + }); + execSync(`git config --bool core.bare false`, { + cwd: tmpDir, + stdio: "pipe", + }); + execSync(`git checkout`, { cwd: tmpDir, stdio: "pipe" }); - const mirrorId = `mirror_${Date.now()}`; - const tmpDir = path.join(os.tmpdir(), mirrorId); - - const gitea = { - apiUrl: process.env.GITEA_API_URL || '', - apiToken: process.env.GITEA_API_TOKEN || '', - username: process.env.GITEA_USERNAME || '' - }; + // Point origin at Gitea and push all refs + execSync(`git remote set-url origin "${authedPushUrl}"`, { + cwd: tmpDir, + stdio: "pipe", + }); + execSync(`git push --mirror origin`, { + cwd: tmpDir, + stdio: "pipe", + timeout: 120_000, + }); + console.log(`[mirror] Pushed ${gitea_repo} successfully`); + res.json({ success: true, gitea_repo, github_url }); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + console.error(`[mirror] Failed:`, msg); + res.status(500).json({ error: "Mirror failed", details: msg }); + } finally { + // Clean up temp dir try { - // Build authenticated Gitea push URL - // GITEA_API_URL is like https://git.vibnai.com — strip /api/v1 if present - const giteaBase = gitea.apiUrl.replace(/\/api\/v1\/?$/, ''); - const authedPushUrl = `${giteaBase}/${gitea_repo}.git` - .replace('https://', `https://${gitea.username}:${gitea.apiToken}@`); - - console.log(`[mirror] Cloning ${github_url} → ${tmpDir}`); - fs.mkdirSync(tmpDir, { recursive: true }); - - // Build authenticated clone URL for private repos - let cloneUrl = github_url; - if (github_token) { - cloneUrl = github_url.replace('https://', `https://${github_token}@`); - } - - // Mirror-clone the GitHub repo (preserves all branches + tags) - execSync(`git clone --mirror "${cloneUrl}" "${tmpDir}/.git"`, { - stdio: 'pipe', - timeout: 120_000 - }); - execSync(`git config --bool core.bare false`, { cwd: tmpDir, stdio: 'pipe' }); - execSync(`git checkout`, { cwd: tmpDir, stdio: 'pipe' }); - - // Point origin at Gitea and push all refs - execSync(`git remote set-url origin "${authedPushUrl}"`, { cwd: tmpDir, stdio: 'pipe' }); - execSync(`git push --mirror origin`, { cwd: tmpDir, stdio: 'pipe', timeout: 120_000 }); - - console.log(`[mirror] Pushed ${gitea_repo} successfully`); - res.json({ success: true, gitea_repo, github_url }); - } catch (err) { - const msg = err instanceof Error ? err.message : String(err); - console.error(`[mirror] Failed:`, msg); - res.status(500).json({ error: 'Mirror failed', details: msg }); - } finally { - // Clean up temp dir - try { - const { execSync: rm } = await import('child_process'); - rm(`rm -rf "${tmpDir}"`, { stdio: 'pipe' }); - } catch { /* best effort */ } + const { execSync: rm } = await import("child_process"); + rm(`rm -rf "${tmpDir}"`, { stdio: "pipe" }); + } catch { + /* best effort */ } + } }); // List available agents -app.get('/api/agents', (_req: Request, res: Response) => { - const agents = Object.values(AGENTS).map(a => ({ - name: a.name, - description: a.description, - tools: a.tools.map(t => t.name) - })); - res.json(agents); +app.get("/api/agents", (_req: Request, res: Response) => { + const agents = Object.values(AGENTS).map((a) => ({ + name: a.name, + description: a.description, + tools: a.tools.map((t) => t.name), + })); + res.json(agents); }); - const activeSessions = new Map(); -app.post('/agent/execute', async (req: Request, res: Response) => { - const { - sessionId, projectId, appName, appPath, giteaRepo, task, continueTask, - autoApprove, coolifyAppUuid, - } = req.body as { - sessionId?: string; - projectId?: string; - appName?: string; - appPath?: string; - giteaRepo?: string; - task?: string; - continueTask?: string; - autoApprove?: boolean; - coolifyAppUuid?: string; - }; +app.post("/agent/execute", async (req: Request, res: Response) => { + const { + sessionId, + projectId, + appName, + appPath: rawAppPath, + giteaRepo, + task, + continueTask, + autoApprove, + coolifyAppUuid, + mcpToken, + } = req.body as { + sessionId?: string; + projectId?: string; + appName?: string; + appPath?: string; + giteaRepo?: string; + task?: string; + continueTask?: string; + autoApprove?: boolean; + coolifyAppUuid?: string; + mcpToken?: string; + }; - if (!sessionId || !projectId || !appPath || !task) { - res.status(400).json({ error: 'sessionId, projectId, appPath and task are required' }); - return; - } + const appPath = + rawAppPath === undefined || rawAppPath === null || rawAppPath === "" + ? "." + : rawAppPath; - const vibnApiUrl = process.env.VIBN_API_URL ?? 'https://vibnai.com'; + if (!sessionId || !projectId || !appPath || !task) { + res + .status(400) + .json({ error: "sessionId, projectId, appPath and task are required" }); + return; + } - // Register session as active - const sessionState = { stopped: false }; - activeSessions.set(sessionId, sessionState); + const vibnApiUrl = process.env.VIBN_API_URL ?? "https://vibnai.com"; - // Respond immediately — execution is async - res.status(202).json({ sessionId, status: 'running' }); + const patchHeaders = { + "Content-Type": "application/json", + ...(process.env.AGENT_RUNNER_SECRET + ? { "x-agent-runner-secret": process.env.AGENT_RUNNER_SECRET } + : {}), + }; - // Build workspace context — clone/update the Gitea repo if provided - let ctx: ReturnType; - try { - ctx = buildContext(giteaRepo); - } catch (err) { - const msg = err instanceof Error ? err.message : String(err); - console.error('[agent/execute] buildContext failed:', msg); - // Notify VIBN DB of failure - fetch(`${vibnApiUrl}/api/projects/${projectId}/agent/sessions/${sessionId}`, { - method: 'PATCH', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ status: 'failed', error: msg }), - }).catch(() => {}); - activeSessions.delete(sessionId); - return; - } + // Register session as active + const sessionState = { stopped: false }; + activeSessions.set(sessionId, sessionState); - // Capture repo root before scoping to appPath — needed for git commit in auto-approve - const repoRoot = ctx.workspaceRoot; + // Respond immediately — execution is async + res.status(202).json({ sessionId, status: "running" }); - // Scope workspace to the app subdirectory so the agent works there naturally - if (appPath) { - const path = require('path') as typeof import('path'); - ctx.workspaceRoot = path.join(ctx.workspaceRoot, appPath); - const fs = require('fs') as typeof import('fs'); - fs.mkdirSync(ctx.workspaceRoot, { recursive: true }); - } + // Build workspace context — clone/update the Gitea repo if provided + let ctx: ReturnType; + try { + ctx = buildContext(giteaRepo, sessionId); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + console.error("[agent/execute] buildContext failed:", msg); + // Notify VIBN DB of failure + fetch( + `${vibnApiUrl}/api/projects/${projectId}/agent/sessions/${sessionId}`, + { + method: "PATCH", + headers: patchHeaders, + body: JSON.stringify({ status: "failed", error: msg }), + }, + ).catch(() => {}); + activeSessions.delete(sessionId); + return; + } - const agentConfig = AGENTS['Coder']; - if (!agentConfig) { - fetch(`${vibnApiUrl}/api/projects/${projectId}/agent/sessions/${sessionId}`, { - method: 'PATCH', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ status: 'failed', error: 'Coder agent not registered' }), - }).catch(() => {}); - activeSessions.delete(sessionId); - return; - } + // Capture repo root before scoping to appPath — needed for git commit in auto-approve + const repoRoot = ctx.workspaceRoot; - // If continuing a previous task, combine into a single prompt so the agent - // understands what was already attempted. - const effectiveTask = continueTask - ? `Original task: ${task}\n\nFollow-up instruction: ${continueTask}` - : task!; + // Wire the ToolContext so its tools can call back into the VIBN frontend MCP + // with the right URL and auth. buildContext() defaults these to safe values, + // but the authoritative ones come from env (VIBN_API_URL) and the frontend + // (mcpToken passed in the /agent/execute body). Without this, tools fetch + // http://localhost:3000 with no token and fail with "fetch failed". + ctx.vibnApiUrl = vibnApiUrl; + ctx.mcpToken = mcpToken ?? ctx.mcpToken; + ctx.projectId = projectId; - // Run the streaming agent loop (fire and forget) - runSessionAgent(agentConfig, effectiveTask, ctx, { - sessionId, - projectId, - vibnApiUrl, - appPath, - repoRoot, - isStopped: () => sessionState.stopped, - autoApprove: autoApprove ?? true, - giteaRepo, - coolifyAppUuid, - coolifyApiUrl: process.env.COOLIFY_API_URL, - coolifyApiToken: process.env.COOLIFY_API_TOKEN, - }) - .catch(err => { - const msg = err instanceof Error ? err.message : String(err); - console.error(`[agent/execute] session ${sessionId} crashed:`, msg); - fetch(`${vibnApiUrl}/api/projects/${projectId}/agent/sessions/${sessionId}`, { - method: 'PATCH', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ status: 'failed', error: msg }), - }).catch(() => {}); + // Scope workspace to the app subdirectory so the agent works there naturally + if (appPath) { + const path = require("path") as typeof import("path"); + ctx.workspaceRoot = path.join(ctx.workspaceRoot, appPath); + const fs = require("fs") as typeof import("fs"); + fs.mkdirSync(ctx.workspaceRoot, { recursive: true }); + } + + const agentConfig = AGENTS["Coder"]; + if (!agentConfig) { + fetch( + `${vibnApiUrl}/api/projects/${projectId}/agent/sessions/${sessionId}`, + { + method: "PATCH", + headers: patchHeaders, + body: JSON.stringify({ + status: "failed", + error: "Coder agent not registered", + }), + }, + ).catch(() => {}); + activeSessions.delete(sessionId); + return; + } + + // If continuing a previous task, combine into a single prompt so the agent + // understands what was already attempted. + const effectiveTask = continueTask + ? `Original task: ${task}\n\nFollow-up instruction: ${continueTask}` + : task!; + + // Run the streaming agent loop (fire and forget) + runSessionAgent(agentConfig, effectiveTask, ctx, { + sessionId, + projectId, + vibnApiUrl, + appPath, + repoRoot, + isStopped: () => sessionState.stopped, + autoApprove: autoApprove ?? true, + giteaRepo, + coolifyAppUuid, + coolifyApiUrl: process.env.COOLIFY_API_URL, + coolifyApiToken: process.env.COOLIFY_API_TOKEN, + }) + .catch((err) => { + const msg = err instanceof Error ? err.message : String(err); + console.error(`[agent/execute] session ${sessionId} crashed:`, msg); + fetch( + `${vibnApiUrl}/api/projects/${projectId}/agent/sessions/${sessionId}`, + { + method: "PATCH", + headers: patchHeaders, + body: JSON.stringify({ status: "failed", error: msg }), + }, + ).catch(() => {}); }) .finally(() => { - activeSessions.delete(sessionId); + activeSessions.delete(sessionId); + if (giteaRepo && sessionId) { + cleanupWorkspace(giteaRepo, sessionId); + } }); }); -app.post('/agent/stop', (req: Request, res: Response) => { - const { sessionId } = req.body as { sessionId?: string }; - if (!sessionId) { res.status(400).json({ error: 'sessionId required' }); return; } - const session = activeSessions.get(sessionId); - if (session) { - session.stopped = true; - res.json({ status: 'stopped' }); - } else { - res.status(404).json({ error: 'session not found or not running' }); - } +app.post("/agent/stop", (req: Request, res: Response) => { + const { sessionId } = req.body as { sessionId?: string }; + if (!sessionId) { + res.status(400).json({ error: "sessionId required" }); + return; + } + const session = activeSessions.get(sessionId); + if (session) { + session.stopped = true; + res.json({ status: "stopped" }); + } else { + res.status(404).json({ error: "session not found or not running" }); + } }); app.listen(PORT, () => { - console.log(`AgentRunner listening on port ${PORT}`); - console.log(`Agents available: ${Object.keys(AGENTS).join(', ')}`); - if (!process.env.GOOGLE_API_KEY) { - console.warn('WARNING: GOOGLE_API_KEY is not set — agents will fail'); - } + console.log(`AgentRunner listening on port ${PORT}`); + console.log(`Agents available: ${Object.keys(AGENTS).join(", ")}`); + if (!process.env.GOOGLE_API_KEY) { + console.warn("WARNING: GOOGLE_API_KEY is not set — agents will fail"); + } }); diff --git a/vibn-agent-runner/src/test-execute-hardening.ts b/vibn-agent-runner/src/test-execute-hardening.ts new file mode 100644 index 00000000..ecb1e7fc --- /dev/null +++ b/vibn-agent-runner/src/test-execute-hardening.ts @@ -0,0 +1,164 @@ +import { spawn } from "child_process"; +import http from "http"; + +// We will start the runner server on port 3334 +const PORT = 3334; +const BASE_URL = `http://localhost:${PORT}`; + +console.log("🧪 Starting AgentRunner Hardening Test Suite..."); + +// Set up environment variables +const env = { + ...process.env, + PORT: String(PORT), + AGENT_RUNNER_SECRET: "test-secret-123", + GOOGLE_API_KEY: "dummy-key-for-testing", // Pass dummy key to avoid Gemini API initialization crash + VIBN_API_URL: "http://localhost:3335", // Mock backend +}; + +// Start mock backend on port 3335 to catch PATCH callbacks and verify headers +let receivedHeaders: any = null; +let receivedBody: any = null; + +const mockBackend = http.createServer((req, res) => { + receivedHeaders = req.headers; + let body = ""; + req.on("data", (chunk) => { + body += chunk; + }); + req.on("end", () => { + try { + receivedBody = JSON.parse(body); + } catch { + receivedBody = body; + } + res.writeHead(200, { "Content-Type": "application/json" }); + res.end(JSON.stringify({ ok: true })); + }); +}); + +mockBackend.listen(3335, () => { + console.log("✓ Mock backend server listening on port 3335"); +}); + +// Spawn the runner server +const serverProcess = spawn("npx", ["ts-node", "src/server.ts"], { + env, + stdio: "pipe", +}); + +// Wait for server to start +serverProcess.stdout.on("data", (data) => { + const output = data.toString(); + console.log(`[Server Out] ${output.trim()}`); +}); + +serverProcess.stderr.on("data", (data) => { + console.error(`[Server Err] ${data.toString()}`); +}); + +// Helper function to sleep +const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms)); + +async function runTests() { + // Wait 4 seconds for server to boot + await sleep(4000); + + let passed = 0; + let failed = 0; + + const assert = (condition: boolean, message: string) => { + if (condition) { + console.log(` 🟢 PASSED: ${message}`); + passed++; + } else { + console.error(` 🔴 FAILED: ${message}`); + failed++; + } + }; + + try { + // Test 1: Empty appPath should be accepted and fall back to "." + console.log("\n1️⃣ Testing appPath empty string fallback..."); + const res1 = await fetch(`${BASE_URL}/agent/execute`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + sessionId: "test-session-1", + projectId: "test-project-1", + task: "Test empty appPath", + appPath: "", // Empty string! + giteaRepo: "test-repo", + }), + }); + + assert(res1.status === 202, `Should return 202, got ${res1.status}`); + const data1 = (await res1.json()) as any; + assert( + data1.sessionId === "test-session-1", + `Should return correct sessionId, got ${data1.sessionId}`, + ); + + // Test 2: Missing sessionId should return 400 + console.log("\n2️⃣ Testing missing required parameters (sessionId)..."); + const res2 = await fetch(`${BASE_URL}/agent/execute`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + projectId: "test-project-1", + task: "Test missing sessionId", + appPath: ".", + }), + }); + assert(res2.status === 400, `Should return 400, got ${res2.status}`); + + // Test 3: Emergency callback headers should include x-agent-runner-secret + console.log("\n3️⃣ Testing early failure callback headers..."); + + // Trigger a clone failure by passing a malformed giteaRepo containing slash, + // which triggers clone instead of default workspace but will fail clone. + console.log("Triggering clone failure on mock Gitea..."); + const res3 = await fetch(`${BASE_URL}/agent/execute`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + sessionId: "test-session-3", + projectId: "test-project-3", + task: "Trigger crash", + appPath: ".", + giteaRepo: "invalid_owner/invalid_repo", + }), + }); + + assert( + res3.status === 202, + `Should return 202 Accepted, got ${res3.status}`, + ); + + // Wait for server to process async task and fail, calling our mock backend PATCH + console.log("Waiting for runner callback on mock backend..."); + await sleep(4000); + + assert(receivedHeaders !== null, "Should call mock backend PATCH endpoint"); + if (receivedHeaders) { + assert( + receivedHeaders["x-agent-runner-secret"] === "test-secret-123", + `Callback should include secret header 'test-secret-123', got '${receivedHeaders["x-agent-runner-secret"]}'`, + ); + assert( + receivedBody && receivedBody.status === "failed", + `Callback body should have status 'failed', got '${receivedBody?.status}'`, + ); + } + } catch (err) { + console.error("Test execution failed with exception:", err); + } finally { + console.log("\n🧹 Cleaning up test servers..."); + serverProcess.kill(); + mockBackend.close(); + console.log(`\n📊 Tests complete. Passed: ${passed}, Failed: ${failed}`); + process.exit(failed > 0 ? 1 : 0); + } +} + +runTests(); diff --git a/vibn-code b/vibn-code index 5783cc79..8c1c14de 160000 --- a/vibn-code +++ b/vibn-code @@ -1 +1 @@ -Subproject commit 5783cc7931f38c9b9798e482918bfa7a54d9e06c +Subproject commit 8c1c14de0bfcb12d0c0cd2c59d7baa93d4b7eaf1 diff --git a/vibn-frontend/app/(onboarding)/onboarding/onboarding-agency-mock.ts b/vibn-frontend/app/(onboarding)/onboarding/onboarding-agency-mock.ts new file mode 100644 index 00000000..371c2ba6 --- /dev/null +++ b/vibn-frontend/app/(onboarding)/onboarding/onboarding-agency-mock.ts @@ -0,0 +1,596 @@ +// Mock data for the contractor onboarding. The UI is fully interactive against +// these. The cheaper model replaces each with a real endpoint (see +// onboarding-agency-types.ts for the exact shapes) — DO NOT delete the types; +// only swap the data source. +// +// Vibn builds CUSTOM TOOLS for local businesses. The consultant picks the tool +// categories they want to build; we intersect those with each SMB type's +// `softwareNeeds` (market_data_assets/smb_to_software_mapping_final.json) to +// recommend local businesses they could target. + +import type { + CityRef, + TerritoryOpportunity, + ToolCategory, +} from "./onboarding-agency-types"; + +// ── Cities (DEV FALLBACK ONLY) ─────────────────────────────────────────────── +// Production city lookup hits Places API (New) Autocomplete (global, 200M+ +// places) via GET /api/agency/cities — see CityLookup, which calls that endpoint +// and only falls back to this list when offline. +export const STARTER_CITIES: CityRef[] = [ + { + id: "victoria-bc", + name: "Victoria", + region: "BC", + country: "Canada", + countryCode: "CA", + }, + { + id: "vancouver-bc", + name: "Vancouver", + region: "BC", + country: "Canada", + countryCode: "CA", + }, + { + id: "nanaimo-bc", + name: "Nanaimo", + region: "BC", + country: "Canada", + countryCode: "CA", + }, + { + id: "kelowna-bc", + name: "Kelowna", + region: "BC", + country: "Canada", + countryCode: "CA", + }, + { + id: "calgary-ab", + name: "Calgary", + region: "AB", + country: "Canada", + countryCode: "CA", + }, + { + id: "edmonton-ab", + name: "Edmonton", + region: "AB", + country: "Canada", + countryCode: "CA", + }, + { + id: "toronto-on", + name: "Toronto", + region: "ON", + country: "Canada", + countryCode: "CA", + }, + { + id: "ottawa-on", + name: "Ottawa", + region: "ON", + country: "Canada", + countryCode: "CA", + }, + { + id: "montreal-qc", + name: "Montreal", + region: "QC", + country: "Canada", + countryCode: "CA", + }, + { + id: "halifax-ns", + name: "Halifax", + region: "NS", + country: "Canada", + countryCode: "CA", + }, + { + id: "seattle-wa", + name: "Seattle", + region: "WA", + country: "United States", + countryCode: "US", + }, + { + id: "portland-or", + name: "Portland", + region: "OR", + country: "United States", + countryCode: "US", + }, + { + id: "san-francisco-ca", + name: "San Francisco", + region: "CA", + country: "United States", + countryCode: "US", + }, + { + id: "austin-tx", + name: "Austin", + region: "TX", + country: "United States", + countryCode: "US", + }, + { + id: "denver-co", + name: "Denver", + region: "CO", + country: "United States", + countryCode: "US", + }, + { + id: "chicago-il", + name: "Chicago", + region: "IL", + country: "United States", + countryCode: "US", + }, + { + id: "new-york-ny", + name: "New York", + region: "NY", + country: "United States", + countryCode: "US", + }, + { + id: "london-uk", + name: "London", + region: "England", + country: "United Kingdom", + countryCode: "GB", + }, + { + id: "sydney-au", + name: "Sydney", + region: "NSW", + country: "Australia", + countryCode: "AU", + }, +]; + +export const DEFAULT_CITY: CityRef = STARTER_CITIES[0]; +export const cityLabel = (c: CityRef) => `${c.name}, ${c.region}`; + +/** GET /api/agency/cities?q= — mock typeahead over the seed list. */ +export function searchCities(query: string): CityRef[] { + const q = query.trim().toLowerCase(); + if (!q) return STARTER_CITIES.slice(0, 8); + return STARTER_CITIES.filter( + (c) => + cityLabel(c).toLowerCase().includes(q) || + c.country.toLowerCase().includes(q), + ).slice(0, 8); +} + +// ── Tool categories the consultant can build ───────────────────────────────── +// GET /api/agency/tool-categories. `label` MUST match the strings in +// smb_to_software_mapping so the targeting intersection works. These are the +// common horizontal tools; the backend can expose the full ~524 from the mapping. +export const TOOL_CATEGORIES: ToolCategory[] = [ + { + id: "reporting", + label: "Reporting / Dashboard Software", + short: "Reporting & dashboards", + icon: "chart", + }, + { + id: "scheduling", + label: "Appointment Scheduling Software", + short: "Booking & scheduling", + icon: "calendar", + }, + { id: "crm", label: "CRM Software", short: "Customer CRM", icon: "users" }, + { + id: "invoicing", + label: "Invoicing & Payments Software", + short: "Invoicing & payments", + icon: "receipt", + }, + { + id: "billing", + label: "Billing Software", + short: "Billing", + icon: "receipt", + }, + { + id: "inventory", + label: "Inventory Management Software", + short: "Inventory", + icon: "box", + }, + { + id: "pos", + label: "Retail POS System", + short: "Point of sale", + icon: "card", + }, + { + id: "ordering", + label: "Online Ordering Software", + short: "Online ordering", + icon: "cart", + }, + { + id: "reservations", + label: "Reservations & Online Bookings", + short: "Reservations", + icon: "calendar", + }, + { + id: "fsm", + label: "Field Service Management (FSM)", + short: "Field service / dispatch", + icon: "clipboard", + }, + { + id: "staff", + label: "Employee Scheduling Software", + short: "Staff scheduling", + icon: "users", + }, + { + id: "membership", + label: "Membership Management Software", + short: "Memberships", + icon: "badge", + }, + { + id: "marketing", + label: "Marketing Automation Software", + short: "Marketing automation", + icon: "megaphone", + }, + { + id: "accounting", + label: "Accounting Software", + short: "Accounting", + icon: "chart", + }, + { + id: "orders", + label: "Order Management Software", + short: "Order management", + icon: "clipboard", + }, +]; + +// ── SMB types + their software needs (sample from the mapping) ──────────────── +// Real targeting reads the full smb_to_software_mapping; this is a grounded +// sample so the intersection responds to selections offline. Counts are mock. +type SmbTarget = { + gcid: string; + displayName: string; + softwareNeeds: string[]; + businessCount: number; + weakPct: number; + claimed: number; +}; + +const SMB_TARGETS: SmbTarget[] = [ + { + gcid: "gcid:dentist", + displayName: "Dentists", + businessCount: 312, + weakPct: 0.71, + claimed: 0, + softwareNeeds: [ + "Dental Practice Management", + "EHR / EMR Software", + "Medical Billing Software", + "Patient Scheduling Software", + "Appointment Scheduling Software", + ], + }, + { + gcid: "gcid:plumber", + displayName: "Plumbers", + businessCount: 287, + weakPct: 0.86, + claimed: 0, + softwareNeeds: [ + "Plumbing Software", + "Field Service Management (FSM)", + "Scheduling Software", + "Billing Software", + "Invoicing & Payments Software", + ], + }, + { + gcid: "gcid:hvac_contractor", + displayName: "HVAC contractors", + businessCount: 248, + weakPct: 0.83, + claimed: 1, + softwareNeeds: [ + "HVAC Software", + "Field Service Management (FSM)", + "Accounting Software", + "Invoicing & Payments Software", + ], + }, + { + gcid: "gcid:hair_salon", + displayName: "Hair salons", + businessCount: 524, + weakPct: 0.74, + claimed: 6, + softwareNeeds: [ + "Salon / Spa Management Software", + "Appointment Scheduling Software", + "Billing Software", + "CRM Software", + ], + }, + { + gcid: "gcid:beauty_salon", + displayName: "Beauty salons", + businessCount: 411, + weakPct: 0.76, + claimed: 3, + softwareNeeds: [ + "Salon / Spa Management Software", + "Appointment Scheduling Software", + "CRM Software", + "Retail POS System", + "Accounting Software", + ], + }, + { + gcid: "gcid:auto_repair_shop", + displayName: "Auto repair shops", + businessCount: 198, + weakPct: 0.8, + claimed: 1, + softwareNeeds: [ + "Auto Repair Shop Software", + "Invoicing & Payments Software", + "Appointment Scheduling Software", + "Inventory Management Software", + "CRM Software", + ], + }, + { + gcid: "gcid:gym", + displayName: "Gyms & fitness studios", + businessCount: 134, + weakPct: 0.58, + claimed: 2, + softwareNeeds: [ + "Fitness & Gym Management Software", + "Membership Management Software", + "Appointment Scheduling Software", + "Billing Software", + "CRM Software", + ], + }, + { + gcid: "gcid:restaurant", + displayName: "Restaurants", + businessCount: 642, + weakPct: 0.69, + claimed: 5, + softwareNeeds: [ + "Restaurant POS Software", + "Inventory Management Software", + "Employee Scheduling Software", + "Online Ordering Software", + ], + }, + { + gcid: "gcid:medical_spa", + displayName: "Med spas", + businessCount: 96, + weakPct: 0.62, + claimed: 0, + softwareNeeds: [ + "Medical Spa Software", + "Appointment Scheduling Software", + "CRM Software", + "Retail POS System", + "Marketing Automation Software", + ], + }, + { + gcid: "gcid:electrician", + displayName: "Electricians", + businessCount: 233, + weakPct: 0.82, + claimed: 1, + softwareNeeds: [ + "Electrical Contractor Software", + "Field Service Management (FSM)", + "Accounting Software", + "Invoicing & Payments Software", + ], + }, + { + gcid: "gcid:veterinarian", + displayName: "Veterinarians", + businessCount: 88, + weakPct: 0.64, + claimed: 0, + softwareNeeds: [ + "Veterinary Management Software", + "Appointment Scheduling Software", + "Inventory Management Software", + "Medical Billing Software", + ], + }, + { + gcid: "gcid:coffee_shop", + displayName: "Coffee shops", + businessCount: 276, + weakPct: 0.67, + claimed: 4, + softwareNeeds: [ + "Restaurant POS Software", + "Inventory Management Software", + "Employee Scheduling Software", + "Marketing Automation Software", + "Online Ordering Software", + ], + }, + { + gcid: "gcid:florist", + displayName: "Florists", + businessCount: 109, + weakPct: 0.78, + claimed: 0, + softwareNeeds: [ + "Florist Software", + "Retail POS System", + "Inventory Management Software", + "Online Ordering Software", + "Order Management Software", + ], + }, + { + gcid: "gcid:landscaper", + displayName: "Landscapers", + businessCount: 264, + weakPct: 0.85, + claimed: 1, + softwareNeeds: [ + "Landscape / Lawn Care Software", + "Field Service Management (FSM)", + "Appointment Scheduling Software", + "Billing Software", + "CRM Software", + ], + }, + { + gcid: "gcid:accounting_firm", + displayName: "Accounting firms", + businessCount: 187, + weakPct: 0.59, + claimed: 2, + softwareNeeds: [ + "Accounting Practice Management", + "Accounting Software", + "CRM Software", + ], + }, + { + gcid: "gcid:bakery", + displayName: "Bakeries", + businessCount: 142, + weakPct: 0.72, + claimed: 0, + softwareNeeds: [ + "Bakery Software", + "Inventory Management Software", + "Order Management Software", + "Employee Scheduling Software", + ], + }, +]; + +// ── AI expertise → tool categories ─────────────────────────────────────────── +// POST /api/agency/analyze-expertise { text } -> { tools }. The real version is +// an LLM call; this is a keyword stub so the flow works offline. Maps free-text +// expertise to canonical tool-category labels that join to the SMB mapping. +const EXPERTISE_KEYWORDS: Array<{ re: RegExp; label: string }> = [ + { + re: /report|dashboard|analytic|insight|\bbi\b/i, + label: "Reporting / Dashboard Software", + }, + { + re: /crm|customer relationship|\bleads?\b|\bclients?\b/i, + label: "CRM Software", + }, + { + re: /schedul|book|appointment|calendar/i, + label: "Appointment Scheduling Software", + }, + { re: /invoic|payment/i, label: "Invoicing & Payments Software" }, + { re: /billing/i, label: "Billing Software" }, + { re: /inventor|stock/i, label: "Inventory Management Software" }, + { + re: /\bpos\b|point of sale|checkout|register/i, + label: "Retail POS System", + }, + { re: /online order|ordering|takeout/i, label: "Online Ordering Software" }, + { re: /reservation/i, label: "Reservations & Online Bookings" }, + { + re: /field service|dispatch|\bfsm\b|work order/i, + label: "Field Service Management (FSM)", + }, + { + re: /staff schedul|employee schedul|\brota\b|\bshifts?\b/i, + label: "Employee Scheduling Software", + }, + { re: /member/i, label: "Membership Management Software" }, + { + re: /marketing|campaign|email automat|newsletter/i, + label: "Marketing Automation Software", + }, + { re: /account|bookkeep/i, label: "Accounting Software" }, + { + re: /order management|fulfilment|fulfillment/i, + label: "Order Management Software", + }, +]; + +export function extractTools(text: string): string[] { + const out: string[] = []; + for (const k of EXPERTISE_KEYWORDS) { + if (k.re.test(text) && !out.includes(k.label)) out.push(k.label); + } + return out; +} + +/** + * POST /api/agency/targets { city, tools } + * Recommends SMB types whose softwareNeeds intersect the consultant's tools, + * scored by demand × gap × low Vibn saturation, biased toward how many of the + * consultant's tools each SMB needs. + */ +export function mockTargets( + city: CityRef, + tools: string[], +): TerritoryOpportunity[] { + const want = new Set(tools); + // Reporting / dashboards apply to virtually every business, so treat it as a + // universal need rather than listing it on every SMB type. + const UNIVERSAL = ["Reporting / Dashboard Software"]; + return SMB_TARGETS.map((s, i) => { + const needs = [ + ...s.softwareNeeds, + ...UNIVERSAL.filter((u) => !s.softwareNeeds.includes(u)), + ]; + const matchedTools = needs.filter((n) => want.has(n)); + return { s, matchedTools, i }; + }) + .filter((x) => x.matchedTools.length > 0) + .map(({ s, matchedTools, i }) => { + const weak = Math.round(s.businessCount * s.weakPct); + const saturation = s.claimed / Math.max(1, s.businessCount / 20); + const demand = s.weakPct * (1 - Math.min(1, saturation)); + const fit = Math.min(1, matchedTools.length / Math.max(1, tools.length)); + const score = Math.max( + 28, + Math.min(98, Math.round((demand * 0.7 + fit * 0.3) * 100)), + ); + const status: TerritoryOpportunity["status"] = + s.claimed === 0 ? "open" : s.claimed <= 2 ? "contested" : "claimed"; + return { + id: `${s.gcid}-${i}`, + gcid: s.gcid, + niche: s.displayName, + city: city.name, + businessCount: s.businessCount, + weakWebsiteCount: weak, + vibnClaimedCount: s.claimed, + opportunityScore: score, + status, + matchedTools, + }; + }) + .sort((a, b) => b.opportunityScore - a.opportunityScore); +} diff --git a/vibn-frontend/app/(onboarding)/onboarding/onboarding-agency-types.ts b/vibn-frontend/app/(onboarding)/onboarding/onboarding-agency-types.ts new file mode 100644 index 00000000..d1919082 --- /dev/null +++ b/vibn-frontend/app/(onboarding)/onboarding/onboarding-agency-types.ts @@ -0,0 +1,123 @@ +// Data contracts for the contractor-first "Set up your agency" onboarding. +// +// THE SEAM: the frontend (onboarding-agency.tsx) is built fully against these +// types + mock data (onboarding-agency-mock.ts). The cheaper model implements +// the endpoints below to these exact shapes — no shared write scope. +// +// Endpoints to implement (all scoped to the signed-in consultant's workspace): +// GET /api/agency/cities?q= +// -> CityRef[] (Places API (New) Autocomplete → Place Details for the +// locality's region/country/latlng — GLOBAL, 200M+ places) +// POST /api/agency/analyze-expertise { text: string } +// -> { tools: string[] } (LLM extracts canonical tool-category labels from +// the consultant's free-text expertise) +// POST /api/agency/targets { city: CityRef, tools: string[] } +// -> TerritoryOpportunity[] (SMB types whose softwareNeeds ∩ tools; per-city +// counts via Places Aggregate API × gap × saturation) +// POST /api/agency AgencyOnboardingResult +// -> { workspaceSlug: string; prospectId: string } +// +// DATA ALIGNMENT (see market_data_assets/): +// - Cities are GLOBAL via Places API (New) Autocomplete + Place Details — not a fixed list. +// - Niches are Google Business Profile categories keyed by `gcid` (e.g. "gcid:dentist"); +// the gcid suffix maps to a Places `type` for the Aggregate query. Canonical list in +// gbp_categories.json / canada.md / us.md. +// - Per-city business counts come from the Places Aggregate API (computeInsights), +// filtered by place type within the city's area — works anywhere. +// dfs_categories_raw.json / camp_market_sizes.csv are country-level benchmarks for +// reference only; CityRef.countryCode joins to them where coverage exists (CA/US today). +// - TOOLS the consultant builds are software categories; the targeting step +// intersects them with each SMB type's `softwareNeeds` +// (smb_to_software_mapping_final.json) to recommend local businesses to target. +// +// "Illustrative" economics: until the metering ledger is live, cost/price numbers +// are platform estimates and MUST be labeled as such in the UI. + +/** + * A geocoded place. Sourced from Places Autocomplete at runtime; `countryCode` + * is the join key into our country-split market data (camp_market_sizes.csv etc.). + */ +export interface CityRef { + /** Stable id — a Places id, or a `${name}-${region}` slug for seed data. */ + id: string; + /** "Victoria" */ + name: string; + /** Province / state code — "BC", "WA". */ + region: string; + /** "Canada" / "United States". */ + country: string; + /** ISO 3166-1 alpha-2 ("CA", "US", "GB"…). From Place Details; benchmark join key. */ + countryCode: string; + lat?: number; + lng?: number; +} + +export interface AgencyProfile { + /** Agency / studio name the consultant chooses. */ + name: string; + /** Optional one-line positioning. */ + tagline?: string; + /** Optional uploaded logo. */ + logoUrl?: string; + /** Home city — seeds the targeting step. */ + city?: CityRef; + /** Online presence the consultant already has (else Vibn can set them up). */ + hasWebsite?: boolean; + websiteUrl?: string; + hasSocials?: boolean; + hasBlog?: boolean; + hasCustomDomain?: boolean; + hasExistingClients?: boolean; +} + +/** + * A category of custom tool the consultant builds. `label` matches the + * softwareNeeds strings in the SMB mapping so targeting can intersect on it. + */ +export interface ToolCategory { + id: string; + /** Canonical software-category label (matches smb_to_software_mapping). */ + label: string; + /** Short human label for the card. */ + short: string; + /** Icon key resolved by the UI. */ + icon: string; +} + +export type TerritoryStatus = "open" | "contested" | "claimed"; + +export interface TerritoryOpportunity { + id: string; + /** Google Business Profile category key, e.g. "gcid:dentist". Joins to market data. */ + gcid: string; + /** Display label, e.g. "Dentists" (GBP displayName). */ + niche: string; + city: string; + /** Total local businesses in this niche (TAM). */ + businessCount: number; + /** How many have a weak / missing website or Google profile (the gap). */ + weakWebsiteCount: number; + /** How many are already served by a Vibn consultant (saturation). */ + vibnClaimedCount: number; + /** 0..100 — demand × gap × low-saturation, biased by tool-fit. Higher = more open. */ + opportunityScore: number; + status: TerritoryStatus; + /** Which of the consultant's tools this SMB needs (the intersection). */ + matchedTools?: string[]; + /** Secondary matching niches discovered for this business */ + alternativeCategories?: Array<{ + gcid: string; + typeLabel: string; + presetTools: string[]; + description: string; + }>; +} + +/** Final payload posted to create the agency workspace; then route to the dashboard. */ +export interface AgencyOnboardingResult { + profile: AgencyProfile; + /** The consultant's free-text expertise ("what I love helping people build"). */ + expertise: string; + /** Tool-category labels the AI extracted from `expertise` (match the mapping). */ + tools: string[]; +} diff --git a/vibn-frontend/app/(onboarding)/onboarding/onboarding-agency.tsx b/vibn-frontend/app/(onboarding)/onboarding/onboarding-agency.tsx new file mode 100644 index 00000000..e55f723e --- /dev/null +++ b/vibn-frontend/app/(onboarding)/onboarding/onboarding-agency.tsx @@ -0,0 +1,493 @@ +"use client"; + +import React from "react"; +import { + WizardTop, + WizardBody, + WizardQ, + WizardFooter, + Field, +} from "./onboarding-primitives"; +import { + cityLabel, + extractTools, + searchCities, +} from "./onboarding-agency-mock"; +import { + type AgencyOnboardingResult, + type AgencyProfile, + type CityRef, +} from "./onboarding-agency-types"; + +// Contractor-first onboarding — "Set up your AI agency". +// Vibn builds CUSTOM TOOLS for local businesses. We capture who the consultant +// is and what they love building, then drop them into their dashboard — where +// the local-business targeting recommendations live as an ongoing feature. +// Steps: identity → presence → expertise → (dashboard). +// Built to the Vibn design concept: dark wizard surface, coral accent, used sparingly. +// Fully interactive against mock data; swap the mock calls for the endpoints +// documented in onboarding-agency-types.ts. + +const STEPS = ["identity", "presence", "expertise"] as const; +type Step = (typeof STEPS)[number]; + +export interface AgencyOnboardingProps { + /** Fired with the assembled result; wire to POST /api/agency then route to the dashboard. */ + onComplete: (result: AgencyOnboardingResult) => void; + /** Save & exit. */ + onExit: () => void; + /** Back to the front-door fork. */ + onBack: () => void; +} + +export function AgencyOnboarding({ + onComplete, + onExit, + onBack, +}: AgencyOnboardingProps) { + const [stepIdx, setStepIdx] = React.useState(0); + const step: Step = STEPS[stepIdx]; + + const [profile, setProfile] = React.useState({ + name: "", + city: undefined, + }); + const [expertise, setExpertise] = React.useState(""); + const detectedTools = React.useMemo( + () => extractTools(expertise), + [expertise], + ); + + const goNext = () => setStepIdx((i) => Math.min(STEPS.length - 1, i + 1)); + const goPrev = () => (stepIdx === 0 ? onBack() : setStepIdx((i) => i - 1)); + + const finish = () => { + onComplete({ + profile, + expertise, + tools: detectedTools, + }); + }; + + const stepLabel = ( + { + identity: "Your agency", + presence: "Your presence", + expertise: "Ideal customer", + } as Record + )[step]; + + return ( + <> + + {step === "identity" && ( + + )} + {step === "presence" && ( + + )} + {step === "expertise" && ( + + )} + + ); +} + +// ── Step 2 · Identity ──────────────────────────────────────────────────────── +function IdentityStep({ + profile, + onChange, + onNext, +}: { + profile: AgencyProfile; + onChange: (p: AgencyProfile) => void; + onNext: () => void; +}) { + return ( + + + + onChange({ ...profile, name: e.target.value })} + /> + + + onChange({ ...profile, city: c })} + /> + + 1 && !!profile.city} + nextLabel="Continue" + hint={ + profile.name && profile.city ? "Press ⌘↵" : "Name + city to continue" + } + /> + + ); +} + +// ── City lookup (typeahead) ────────────────────────────────────────────────── +// Hits Places API (New) Autocomplete via GET /api/agency/cities; falls back to +// the seed list only when that endpoint isn't reachable (offline dev). +export function CityLookup({ + value, + onChange, +}: { + value?: CityRef; + onChange: (c: CityRef) => void; +}) { + const [query, setQuery] = React.useState(value ? cityLabel(value) : ""); + const [open, setOpen] = React.useState(false); + const [results, setResults] = React.useState(() => + searchCities(""), + ); + const listboxId = React.useId(); + + React.useEffect(() => { + let cancelled = false; + const handle = setTimeout(async () => { + let next: CityRef[] | null = null; + try { + const res = await fetch( + `/api/agency/cities?q=${encodeURIComponent(query)}`, + ); + if (res.ok) { + const data = await res.json(); + if (Array.isArray(data)) next = data as CityRef[]; + } + } catch { + /* offline / endpoint missing — use the seed fallback */ + } + if (cancelled) return; + setResults(next && next.length ? next : searchCities(query)); + }, 180); + return () => { + cancelled = true; + clearTimeout(handle); + }; + }, [query]); + + const select = (c: CityRef) => { + onChange(c); + setQuery(cityLabel(c)); + setOpen(false); + }; + + return ( +
+ { + setQuery(e.target.value); + setOpen(true); + }} + onFocus={() => setOpen(true)} + onBlur={() => setTimeout(() => setOpen(false), 120)} + /> + {open && results.length > 0 && ( +
+ {results.map((c) => { + const active = value?.id === c.id; + return ( + + ); + })} +
+ )} +
+ ); +} + +// ── Step 3 · Online presence ───────────────────────────────────────────────── +function PresenceStep({ + profile, + onChange, + onNext, +}: { + profile: AgencyProfile; + onChange: (p: AgencyProfile) => void; + onNext: () => void; +}) { + return ( + + +
+ + onChange({ ...profile, hasWebsite: !profile.hasWebsite }) + } + /> + {profile.hasWebsite && ( + + onChange({ ...profile, websiteUrl: e.target.value }) + } + /> + )} + + onChange({ ...profile, hasSocials: !profile.hasSocials }) + } + /> + onChange({ ...profile, hasBlog: !profile.hasBlog })} + /> + + onChange({ ...profile, hasCustomDomain: !profile.hasCustomDomain }) + } + /> + + onChange({ + ...profile, + hasExistingClients: !profile.hasExistingClients, + }) + } + /> +
+ +
+ ); +} + +function ToggleRow({ + label, + on, + onToggle, +}: { + label: string; + on: boolean; + onToggle: () => void; +}) { + return ( + + ); +} + +// ── Step 4 · Your ideal customer (final step → dashboard) ──────────────────── +function IdealCustomerStep({ + value, + onChange, + onNext, +}: { + value: string; + onChange: (s: string) => void; + onNext: () => void; +}) { + const ready = value.trim().length >= 8; + return ( + + + +