fix(ai): implement Phase 2 and 3 prompt recommendations from review
This commit is contained in:
48
fix_mcp_fs.js
Normal file
48
fix_mcp_fs.js
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
const fs = require('fs');
|
||||||
|
|
||||||
|
const file = 'vibn-frontend/app/api/mcp/route.ts';
|
||||||
|
let code = fs.readFileSync(file, 'utf8');
|
||||||
|
|
||||||
|
// 1. Fix toolFsWrite
|
||||||
|
const oldFsWriteReturn = ` return NextResponse.json({
|
||||||
|
result: { path, bytesWritten: Buffer.byteLength(content, "utf8") },
|
||||||
|
});`;
|
||||||
|
|
||||||
|
const newFsWriteReturn = ` const { createHash } = require('crypto');
|
||||||
|
const bytes = Buffer.byteLength(content, "utf8");
|
||||||
|
const sha256 = createHash("sha256").update(content, "utf8").digest("hex");
|
||||||
|
return NextResponse.json({
|
||||||
|
result: { ok: true, path, bytes, sha256 },
|
||||||
|
});`;
|
||||||
|
|
||||||
|
code = code.replace(oldFsWriteReturn, newFsWriteReturn);
|
||||||
|
|
||||||
|
|
||||||
|
// 2. Fix toolFsEdit
|
||||||
|
const oldFsEditCmd = `const cmd = \`python3 -c "$(printf %s \${shq(pyB64)} | base64 -d)" <<< "$(printf %s \${shq(b64)} | base64 -d)"\`;`;
|
||||||
|
const newFsEditCmd = `const cmd = \`python3 -c "$(printf %s \${shq(pyB64)} | base64 -d)" <<< "$(printf %s \${shq(b64)} | base64 -d)" && echo "---" && sha256sum \${shq(path)} | cut -d' ' -f1 && wc -c < \${shq(path)}\`;`;
|
||||||
|
|
||||||
|
code = code.replace(oldFsEditCmd, newFsEditCmd);
|
||||||
|
|
||||||
|
const oldFsEditReturn = ` return NextResponse.json({
|
||||||
|
result: { path, replacements: parseInt(r.stdout.trim() || "0", 10) },
|
||||||
|
});`;
|
||||||
|
|
||||||
|
const newFsEditReturn = ` const stdoutParts = r.stdout.split('---');
|
||||||
|
const replacementsStr = stdoutParts[0].trim();
|
||||||
|
const hashAndSize = stdoutParts[1] ? stdoutParts[1].trim().split('\\n') : [];
|
||||||
|
|
||||||
|
return NextResponse.json({
|
||||||
|
result: {
|
||||||
|
ok: true,
|
||||||
|
path,
|
||||||
|
replacements: parseInt(replacementsStr || "0", 10),
|
||||||
|
sha256: hashAndSize[0] ? hashAndSize[0].trim() : undefined,
|
||||||
|
bytes: hashAndSize[1] ? parseInt(hashAndSize[1].trim(), 10) : undefined
|
||||||
|
},
|
||||||
|
});`;
|
||||||
|
|
||||||
|
code = code.replace(oldFsEditReturn, newFsEditReturn);
|
||||||
|
|
||||||
|
fs.writeFileSync(file, code);
|
||||||
|
console.log("Patched toolFsWrite and toolFsEdit");
|
||||||
82
fix_mcp_fs_normalize.js
Normal file
82
fix_mcp_fs_normalize.js
Normal file
@@ -0,0 +1,82 @@
|
|||||||
|
const fs = require('fs');
|
||||||
|
|
||||||
|
const file = 'vibn-frontend/app/api/mcp/route.ts';
|
||||||
|
let code = fs.readFileSync(file, 'utf8');
|
||||||
|
|
||||||
|
const oldNormalize = `function normalizeFsPath(p: string): string | NextResponse {
|
||||||
|
if (!p || typeof p !== "string") {
|
||||||
|
return NextResponse.json(
|
||||||
|
{ error: 'Param "path" is required' },
|
||||||
|
{ status: 400 },
|
||||||
|
);
|
||||||
|
}
|
||||||
|
let abs: string;
|
||||||
|
if (p.startsWith("/")) {
|
||||||
|
abs = p;
|
||||||
|
} else {
|
||||||
|
abs = \`\${FS_ROOT}/\${p}\`.replace(/\\/+/g, "/");
|
||||||
|
}
|
||||||
|
// Disallow .. traversal that escapes /workspace.
|
||||||
|
const norm = abs.replace(/\\/[^/]+\\/\\.\\.(?=\\/|$)/g, "").replace(/\\/+/g, "/");
|
||||||
|
if (!norm.startsWith(FS_ROOT) && norm !== FS_ROOT) {
|
||||||
|
return NextResponse.json(
|
||||||
|
{
|
||||||
|
error: \`Path "\${p}" is outside \${FS_ROOT}; use shell.exec for system paths.\`,
|
||||||
|
},
|
||||||
|
{ status: 400 },
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return norm;
|
||||||
|
}`;
|
||||||
|
|
||||||
|
const newNormalize = `function normalizeFsPath(
|
||||||
|
p: string,
|
||||||
|
projectSlug?: string,
|
||||||
|
): string | NextResponse {
|
||||||
|
if (!p || typeof p !== "string") {
|
||||||
|
return NextResponse.json(
|
||||||
|
{ error: 'Param "path" is required' },
|
||||||
|
{ status: 400 },
|
||||||
|
);
|
||||||
|
}
|
||||||
|
const projectRoot = projectSlug ? \`\${FS_ROOT}/\${projectSlug}\` : FS_ROOT;
|
||||||
|
let abs: string;
|
||||||
|
if (p.startsWith("/")) {
|
||||||
|
abs = p;
|
||||||
|
} else {
|
||||||
|
abs = \`\${projectRoot}/\${p}\`.replace(/\\/+/g, "/");
|
||||||
|
}
|
||||||
|
const norm = abs.replace(/\\/[^/]+\\/\\.\\.(?=\\/|$)/g, "").replace(/\\/+/g, "/");
|
||||||
|
|
||||||
|
// When projectSlug is set, REJECT paths outside the project root.
|
||||||
|
if (projectSlug) {
|
||||||
|
if (!norm.startsWith(projectRoot) && norm !== projectRoot) {
|
||||||
|
return NextResponse.json(
|
||||||
|
{
|
||||||
|
ok: false,
|
||||||
|
error: \`PATH_OUTSIDE_PROJECT: path "\${p}" resolves to "\${norm}" which is outside the active project at "\${projectRoot}". Did you mean "\${projectRoot}/\${p.replace(/^\\/+/, "")}"?\`,
|
||||||
|
},
|
||||||
|
{ status: 400 },
|
||||||
|
);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Workspace-level fallback (legacy behaviour)
|
||||||
|
if (!norm.startsWith(FS_ROOT) && norm !== FS_ROOT) {
|
||||||
|
return NextResponse.json(
|
||||||
|
{ error: \`Path "\${p}" is outside \${FS_ROOT}; use shell.exec for system paths.\` },
|
||||||
|
{ status: 400 },
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return norm;
|
||||||
|
}`;
|
||||||
|
|
||||||
|
code = code.replace(oldNormalize, newNormalize);
|
||||||
|
|
||||||
|
code = code.replace(/const path = normalizeFsPath\(String\(params\.path \?\? ""\)\);/g, 'const path = normalizeFsPath(String(params.path ?? ""), project.slug);');
|
||||||
|
code = code.replace(/const path = normalizeFsPath\(String\(params\.path \?\? "\/workspace"\)\);/g, 'const path = normalizeFsPath(String(params.path ?? "/workspace"), project.slug);');
|
||||||
|
code = code.replace(/const cwd = normalizeFsPath\(String\(params\.cwd \?\? "\/workspace"\)\);/g, 'const cwd = normalizeFsPath(String(params.cwd ?? "/workspace"), project.slug);');
|
||||||
|
code = code.replace(/const targetPath = normalizeFsPath\(String\(params\.targetPath \?\? ""\)\);/g, 'const targetPath = normalizeFsPath(String(params.targetPath ?? ""), project.slug);');
|
||||||
|
|
||||||
|
fs.writeFileSync(file, code);
|
||||||
|
console.log("Patched normalizeFsPath with projectSlug scoping");
|
||||||
601
may-19-ai-review.md
Normal file
601
may-19-ai-review.md
Normal file
@@ -0,0 +1,601 @@
|
|||||||
|
# Vibn Chat Harness — Fix Checklist
|
||||||
|
|
||||||
|
Work through items in order. Each fix has a clear **What**, **Where**,
|
||||||
|
**How**, and **Verify** section. Don't skip the verify step — many of
|
||||||
|
these fixes interact with each other and silent failures will
|
||||||
|
compound.
|
||||||
|
|
||||||
|
Mark `[x]` as you complete each item. If you can't complete an item,
|
||||||
|
add a short note under it explaining why and move on.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 1 — Backend fixes (highest leverage; do these first)
|
||||||
|
|
||||||
|
These three fix the failure modes the prompt currently promises but
|
||||||
|
the backend doesn't deliver. Until they're done, the prompt's hard
|
||||||
|
rules are partly fiction.
|
||||||
|
|
||||||
|
### [ ] 1. Add `sha256` and `bytes` to `fs.write` and `fs.edit` responses
|
||||||
|
|
||||||
|
**What:** The prompt's hard rules tell the model to cite `sha256` and
|
||||||
|
`bytes` as evidence of file changes. The tools don't return those
|
||||||
|
fields today, so the model is looking for evidence that doesn't exist.
|
||||||
|
|
||||||
|
**Where:** `app/api/mcp/route.ts` — functions `toolFsWrite` and
|
||||||
|
`toolFsEdit`.
|
||||||
|
|
||||||
|
**How:**
|
||||||
|
- In `toolFsWrite`, after the `runFsCmd` success branch, before
|
||||||
|
returning, compute the sha256 of `content` and return it alongside
|
||||||
|
`bytesWritten` renamed to `bytes`:
|
||||||
|
```ts
|
||||||
|
import { createHash } from "crypto";
|
||||||
|
// ...
|
||||||
|
const bytes = Buffer.byteLength(content, "utf8");
|
||||||
|
const sha256 = createHash("sha256").update(content, "utf8").digest("hex");
|
||||||
|
return NextResponse.json({
|
||||||
|
result: { ok: true, path, bytes, sha256 },
|
||||||
|
});
|
||||||
|
```
|
||||||
|
- In `toolFsEdit`, you don't have the final content in memory. Add a
|
||||||
|
second command that prints the sha + bytes after the edit:
|
||||||
|
```ts
|
||||||
|
const cmd = `python3 -c "$(printf %s ${shq(pyB64)} | base64 -d)" <<< "$(printf %s ${shq(b64)} | base64 -d)" && echo "---" && sha256sum ${shq(path)} | cut -d' ' -f1 && wc -c < ${shq(path)}`;
|
||||||
|
```
|
||||||
|
Then parse the trailing two lines after `---` for sha and bytes.
|
||||||
|
- Update the response shape:
|
||||||
|
```ts
|
||||||
|
return NextResponse.json({
|
||||||
|
result: { ok: true, path, replacements, bytes, sha256 },
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
**Verify:**
|
||||||
|
- [ ] Call `fs_write` with `{ path: "test.txt", content: "hello" }`.
|
||||||
|
Confirm response contains `sha256` (64 hex chars) and `bytes: 5`.
|
||||||
|
- [ ] Call `fs_edit` to change the same file. Confirm response
|
||||||
|
contains a new `sha256` and updated `bytes`.
|
||||||
|
- [ ] Replay a turn that does `fs_write` followed by `fs_read` of the
|
||||||
|
same file in chat. The model should now produce text like
|
||||||
|
"Updated `test.txt` (sha=a3f5c2…, 5b)" instead of a bare claim.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### [ ] 2. Add project-slug scoping to `normalizeFsPath`
|
||||||
|
|
||||||
|
**What:** The prompt tells the model to use paths like `src/app/page.tsx`
|
||||||
|
and claims the tool layer rejects writes outside the project root.
|
||||||
|
The tool layer does NOT do this today. It resolves all relative paths
|
||||||
|
under `/workspace` (workspace-level), so `fs_write { path: "src/app/page.tsx" }`
|
||||||
|
ends up at `/workspace/src/app/page.tsx` — the ghost file from the
|
||||||
|
failing session. Five different path conventions were used for the
|
||||||
|
same file in one session because nothing enforces the rule.
|
||||||
|
|
||||||
|
**Where:** `app/api/mcp/route.ts` — function `normalizeFsPath` and
|
||||||
|
every caller in `toolFsRead`, `toolFsWrite`, `toolFsEdit`,
|
||||||
|
`toolFsList`, `toolFsDelete`, `toolFsGlob`, `toolFsGrep`, `toolFsTree`,
|
||||||
|
`toolRequestVisualQA`, `toolGenerateMedia`.
|
||||||
|
|
||||||
|
**How:**
|
||||||
|
|
||||||
|
- Change `normalizeFsPath` to accept an optional `projectSlug`:
|
||||||
|
```ts
|
||||||
|
function normalizeFsPath(
|
||||||
|
p: string,
|
||||||
|
projectSlug?: string,
|
||||||
|
): string | NextResponse {
|
||||||
|
if (!p || typeof p !== "string") {
|
||||||
|
return NextResponse.json(
|
||||||
|
{ error: 'Param "path" is required' },
|
||||||
|
{ status: 400 },
|
||||||
|
);
|
||||||
|
}
|
||||||
|
const projectRoot = projectSlug ? `${FS_ROOT}/${projectSlug}` : FS_ROOT;
|
||||||
|
let abs: string;
|
||||||
|
if (p.startsWith("/")) {
|
||||||
|
abs = p;
|
||||||
|
} else {
|
||||||
|
abs = `${projectRoot}/${p}`.replace(/\/+/g, "/");
|
||||||
|
}
|
||||||
|
const norm = abs.replace(/\/[^/]+\/\.\.(?=\/|$)/g, "").replace(/\/+/g, "/");
|
||||||
|
|
||||||
|
// When projectSlug is set, REJECT paths outside the project root.
|
||||||
|
if (projectSlug) {
|
||||||
|
if (!norm.startsWith(projectRoot) && norm !== projectRoot) {
|
||||||
|
return NextResponse.json(
|
||||||
|
{
|
||||||
|
ok: false,
|
||||||
|
error: `PATH_OUTSIDE_PROJECT: path "${p}" resolves to "${norm}" which is outside the active project at "${projectRoot}". Did you mean "${projectRoot}/${p.replace(/^\/+/, "")}"?`,
|
||||||
|
},
|
||||||
|
{ status: 400 },
|
||||||
|
);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Workspace-level fallback (legacy behaviour)
|
||||||
|
if (!norm.startsWith(FS_ROOT) && norm !== FS_ROOT) {
|
||||||
|
return NextResponse.json(
|
||||||
|
{ error: `Path "${p}" is outside ${FS_ROOT}; use shell.exec for system paths.` },
|
||||||
|
{ status: 400 },
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return norm;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
- In every fs_* tool that already calls `resolveProjectOr404`, pass
|
||||||
|
the slug:
|
||||||
|
```ts
|
||||||
|
const path = normalizeFsPath(String(params.path ?? ""), project.slug);
|
||||||
|
```
|
||||||
|
- `toolFsRead`, `toolFsWrite`, `toolFsEdit`, `toolFsDelete`,
|
||||||
|
`toolRequestVisualQA`, `toolGenerateMedia` all already have
|
||||||
|
`project` in scope — pass `project.slug`.
|
||||||
|
- `toolFsList`, `toolFsGlob`, `toolFsGrep`, `toolFsTree` use
|
||||||
|
`params.path` or `params.cwd` — same treatment, pass `project.slug`.
|
||||||
|
|
||||||
|
**Verify:**
|
||||||
|
- [ ] From a project-scoped thread, call `fs_write { path: "/workspace/src/app/page.tsx", content: "x" }`.
|
||||||
|
Expect `PATH_OUTSIDE_PROJECT` error.
|
||||||
|
- [ ] From the same thread, call `fs_write { path: "src/app/page.tsx", content: "x" }`.
|
||||||
|
Expect success at `/workspace/<slug>/src/app/page.tsx`.
|
||||||
|
- [ ] Confirm `dev_server_start` with `command: "npm run dev"` runs
|
||||||
|
from the project root, not `/workspace`. (This is mostly already
|
||||||
|
true via dev-container logic; just confirm.)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### [ ] 3. Fix the broken `plan-extract` block
|
||||||
|
|
||||||
|
**What:** The fire-and-forget `plan-extract` block in
|
||||||
|
`app/api/chat/route.ts` has a syntax error — the `try` block builds a
|
||||||
|
transcript and then hits `}` followed by `catch` with no actual call
|
||||||
|
to `autoExtractPlanUpdates`. The body of the try is missing. Either
|
||||||
|
the auto-extraction was intentionally removed (in which case the
|
||||||
|
dead transcript-building code should also be deleted) or it was
|
||||||
|
accidentally truncated (in which case the call needs to be restored).
|
||||||
|
|
||||||
|
**Where:** `app/api/chat/route.ts`, around the second fire-and-forget
|
||||||
|
block (after the title/summary block, before `emit({ type: "done" })`).
|
||||||
|
|
||||||
|
**How:**
|
||||||
|
|
||||||
|
- Decide first: do we want auto-extraction to run? If YES, restore
|
||||||
|
the call:
|
||||||
|
```ts
|
||||||
|
(async () => {
|
||||||
|
try {
|
||||||
|
if (!threadProjectId) return;
|
||||||
|
const allMessages = [...history, finalMsg];
|
||||||
|
if (allMessages.length < 2) return;
|
||||||
|
const transcript = allMessages
|
||||||
|
.map((m) => {
|
||||||
|
const text =
|
||||||
|
typeof m.content === "string"
|
||||||
|
? m.content
|
||||||
|
: JSON.stringify(m.content);
|
||||||
|
return `${m.role.toUpperCase()}: ${text.slice(0, 1200)}`;
|
||||||
|
})
|
||||||
|
.join("\n\n");
|
||||||
|
const result = await autoExtractPlanUpdates(
|
||||||
|
threadProjectId,
|
||||||
|
transcript,
|
||||||
|
);
|
||||||
|
if (result) {
|
||||||
|
console.log(
|
||||||
|
"[chat] plan-extract:",
|
||||||
|
`${result.tasks} tasks, ${result.decisions} decisions, vision=${result.vision}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
console.warn("[chat] plan-extract failed (non-fatal):", err);
|
||||||
|
}
|
||||||
|
})().catch(() => {});
|
||||||
|
```
|
||||||
|
And re-add the `import { autoExtractPlanUpdates } from "@/lib/ai/plan-extract";`
|
||||||
|
at the top of the file.
|
||||||
|
- If NO (you removed it intentionally), delete the entire IIFE
|
||||||
|
including the transcript-building so the file compiles cleanly.
|
||||||
|
|
||||||
|
**Verify:**
|
||||||
|
- [ ] Run `tsc --noEmit` on the file. Confirm no syntax errors.
|
||||||
|
- [ ] If auto-extraction restored: have a chat that mentions a
|
||||||
|
decision (e.g. "let's use Postgres"). Confirm a new entry appears
|
||||||
|
in the project's `plan.decisions` with `confidence: "auto"`.
|
||||||
|
- [ ] Tail prod logs for `[chat] plan-extract:` — should fire on
|
||||||
|
every turn with content.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 2 — Prompt fixes (now that the backend matches)
|
||||||
|
|
||||||
|
These bring the prompt into line with what the tools actually do.
|
||||||
|
|
||||||
|
### [ ] 4. Fix the `apps_containers_list` typo in the prompt
|
||||||
|
|
||||||
|
**What:** The troubleshooting section references `apps_containers_list`
|
||||||
|
but the actual tool is `apps_containers_ps`. The model will call a
|
||||||
|
tool that doesn't exist.
|
||||||
|
|
||||||
|
**Where:** `app/api/chat/route.ts`, inside `buildSystemPrompt`, in the
|
||||||
|
"## Troubleshooting" section.
|
||||||
|
|
||||||
|
**How:**
|
||||||
|
- Find: `apps_logs { uuid } + apps_containers_list { uuid }`
|
||||||
|
- Replace: `apps_logs { uuid } + apps_containers_ps { uuid }`
|
||||||
|
|
||||||
|
**Verify:**
|
||||||
|
- [ ] Grep the prompt for `apps_containers_list` — no matches.
|
||||||
|
- [ ] Grep for `apps_containers_ps` — should appear in
|
||||||
|
troubleshooting and at least once in the apps section.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### [ ] 5. Soften the `ok` field rule
|
||||||
|
|
||||||
|
**What:** The current rule says "If `ok` is false (or absent, or
|
||||||
|
`exitCode` is non-zero, or `healthCheck.status` is >= 400) the
|
||||||
|
operation FAILED." The "or absent" clause is wrong — many tools
|
||||||
|
return data without an `ok` field (e.g. `projects_get`, `apps_list`,
|
||||||
|
`databases_get`). The model will treat every read as a failure.
|
||||||
|
|
||||||
|
**Where:** `buildSystemPrompt`, "Hard rules" section, the "Trust the
|
||||||
|
`ok` field" bullet.
|
||||||
|
|
||||||
|
**How:**
|
||||||
|
|
||||||
|
Replace the current rule with:
|
||||||
|
|
||||||
|
```
|
||||||
|
- **Read tool results carefully.** A tool FAILED when ANY of these
|
||||||
|
signals are present: `ok: false`, `error: "..."`, a non-zero
|
||||||
|
`exitCode`, or a `healthCheck.status` >= 400. If NONE of those
|
||||||
|
signals are present, look at the actual content of the response
|
||||||
|
to decide whether the operation succeeded. Many read-only tools
|
||||||
|
return data directly without an `ok` field — that's not a failure.
|
||||||
|
```
|
||||||
|
|
||||||
|
**Verify:**
|
||||||
|
- [ ] Pick a recent thread where the agent called `projects_get` or
|
||||||
|
`apps_list`. Confirm the agent didn't treat the response as a
|
||||||
|
failure (look at its post-tool text — should be a normal summary,
|
||||||
|
not "the operation failed").
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### [ ] 6. Tighten the status-nudge threshold
|
||||||
|
|
||||||
|
**What:** Current thresholds are `roundsSinceText >= 8 ||
|
||||||
|
toolCallsSinceText >= 12`. With `MAX_TOOL_ROUNDS = 8`, the round-based
|
||||||
|
nudge can never fire (loop ends first). The tool-call threshold of 12
|
||||||
|
is also too lenient — users typed "test" / "hello" by round 4-5 of
|
||||||
|
silence in the failing session.
|
||||||
|
|
||||||
|
**Where:** `app/api/chat/route.ts`, near the top of the main while
|
||||||
|
loop, the `isSilent` constant.
|
||||||
|
|
||||||
|
**How:**
|
||||||
|
|
||||||
|
```ts
|
||||||
|
const isSilent = roundsSinceText >= 3 || toolCallsSinceText >= 6;
|
||||||
|
```
|
||||||
|
|
||||||
|
**Verify:**
|
||||||
|
- [ ] Replay a chat that triggers 6+ tool calls without text. Confirm
|
||||||
|
the `[STATUS NUDGE]` system addendum is injected before the next
|
||||||
|
round.
|
||||||
|
- [ ] Confirm the model produces a one-line status sentence in
|
||||||
|
response to the nudge.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### [ ] 7. Update the path-convention guidance in the prompt
|
||||||
|
|
||||||
|
**What:** After Fix 2 ships, the path convention is now enforced. The
|
||||||
|
prompt should state this plainly without the "cd into your project"
|
||||||
|
workaround.
|
||||||
|
|
||||||
|
**Where:** `buildSystemPrompt`, inside `activeBlock`, the path
|
||||||
|
guidance section. Also inside "Dev servers" → "Directory" bullet.
|
||||||
|
|
||||||
|
**How:**
|
||||||
|
|
||||||
|
Replace the "Directory" bullet under "Dev servers":
|
||||||
|
|
||||||
|
```
|
||||||
|
- **Directory:** Tool paths are scoped to your project root
|
||||||
|
automatically. Pass `command: "npm run dev"` directly — no `cd`
|
||||||
|
prefix needed. The tool rejects any `fs_*` write outside
|
||||||
|
`/workspace/<slug>/`.
|
||||||
|
```
|
||||||
|
|
||||||
|
And after the "Project repo is auto-cloned" paragraph in
|
||||||
|
`activeBlock`, add:
|
||||||
|
|
||||||
|
```
|
||||||
|
**Path convention for fs_* tools:** Pass paths relative to the
|
||||||
|
project root — `src/app/page.tsx`, NOT `/workspace/<slug>/src/app/page.tsx`
|
||||||
|
and NOT `<slug>/src/app/page.tsx`. The tool layer rejects writes
|
||||||
|
outside the project root with a `PATH_OUTSIDE_PROJECT` error
|
||||||
|
suggesting the corrected path.
|
||||||
|
```
|
||||||
|
|
||||||
|
**Verify:**
|
||||||
|
- [ ] In a fresh chat, ask the agent to "edit the homepage". Confirm
|
||||||
|
the first `fs_read` call uses `src/app/page.tsx` (no slug prefix,
|
||||||
|
no `/workspace/` prefix).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 3 — Polish and safety nets
|
||||||
|
|
||||||
|
These are lower-priority but each removes a small foot-gun.
|
||||||
|
|
||||||
|
### [ ] 8. Add `fs_tree` recommendation to first-turn behavior
|
||||||
|
|
||||||
|
**What:** The agent ran `fs_tree` 5 times and `fs_glob` 9+ times in
|
||||||
|
the failing session, re-discovering paths it should have learned once.
|
||||||
|
The tool description already says "ALWAYS call this first" but the
|
||||||
|
prompt doesn't reinforce it.
|
||||||
|
|
||||||
|
**Where:** `buildSystemPrompt`, in the "Writing code" section.
|
||||||
|
|
||||||
|
**How:**
|
||||||
|
|
||||||
|
Add this near the top of "Writing code — dev container is the default":
|
||||||
|
|
||||||
|
```
|
||||||
|
**Orient yourself once.** On the first code-modifying turn of a
|
||||||
|
chat, call `fs_tree` once to learn the repo layout. Don't re-run it
|
||||||
|
on every turn — the layout doesn't change between user messages.
|
||||||
|
```
|
||||||
|
|
||||||
|
**Verify:**
|
||||||
|
- [ ] Manual review of the next 5 sessions: confirm `fs_tree` is
|
||||||
|
called at most once per chat (not per turn).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### [ ] 9. Add `browser_navigate` and `browser_console` as verification primitives
|
||||||
|
|
||||||
|
**What:** The backend has `browser_navigate` and `browser_console`
|
||||||
|
tools that headlessly render a page and capture console errors. The
|
||||||
|
prompt never mentions them. These are the missing post-deploy
|
||||||
|
verification step that the `healthCheck` field gestures at.
|
||||||
|
|
||||||
|
**Where:** `buildSystemPrompt`, in the "Dev servers" subsection or as
|
||||||
|
its own section after "Visual QA".
|
||||||
|
|
||||||
|
**How:**
|
||||||
|
|
||||||
|
Add a new bullet right after "Visual QA" guidance:
|
||||||
|
|
||||||
|
```
|
||||||
|
**Verify the page actually renders:**
|
||||||
|
- After `dev_server_start` returns a `previewUrl` AND `healthCheck.status === 200`,
|
||||||
|
for any UI-facing turn, call `browser_console { url: previewUrl }` to
|
||||||
|
capture frontend console errors. Hydration errors, missing assets,
|
||||||
|
and uncaught exceptions show up here even when the server is
|
||||||
|
technically "running".
|
||||||
|
- If `browser_console` returns errors, fix them with `fs_edit`
|
||||||
|
before declaring done. A green `healthCheck` plus a clean console
|
||||||
|
is the real "done" signal for UI work.
|
||||||
|
- Skip this for backend / SQL / config-only changes.
|
||||||
|
```
|
||||||
|
|
||||||
|
**Verify:**
|
||||||
|
- [ ] On the next UI-modifying chat, confirm `browser_console` is
|
||||||
|
called once after `dev_server_start`.
|
||||||
|
- [ ] Confirm any errors it returns get acknowledged in the agent's
|
||||||
|
reply.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### [ ] 10. Add the market research stack to the prompt
|
||||||
|
|
||||||
|
**What:** The backend exposes six market research tools
|
||||||
|
(`market_categories_suggest`, `market_research_run`, `market_seo_analyze`,
|
||||||
|
`tech_stack_analyze`, `market_competitor_research`,
|
||||||
|
`market_aggregate_insights`). The prompt never mentions them. For a
|
||||||
|
non-technical-founder product, these are some of the highest-leverage
|
||||||
|
tools — they answer "should I build for dentists or summer camps?"
|
||||||
|
with real TAM counts.
|
||||||
|
|
||||||
|
**Where:** `buildSystemPrompt`, add a new section after "Common
|
||||||
|
questions → tools" or before "How to deploy".
|
||||||
|
|
||||||
|
**How:**
|
||||||
|
|
||||||
|
Add this section:
|
||||||
|
|
||||||
|
```
|
||||||
|
## Helping the user pick what to build
|
||||||
|
|
||||||
|
Vibn has a market-research toolkit for non-technical founders who
|
||||||
|
need data on their target niche. Use it when the user is undecided,
|
||||||
|
validating an idea, or comparing markets:
|
||||||
|
|
||||||
|
- **"How big is the market for X in <location>?"** → `market_categories_suggest { niche }` to
|
||||||
|
propose Google Business categories, then `market_research_run` after
|
||||||
|
the user approves. Returns TAM count, sample domains, and review
|
||||||
|
data. NOTE: `market_research_run` costs real money — always confirm
|
||||||
|
with the user and pass `user_explicitly_approved: true`.
|
||||||
|
- **"What are competitors spending on Google Ads?"** → `market_seo_analyze { domain }`.
|
||||||
|
Returns organic traffic, paid traffic, ad spend, and top paid
|
||||||
|
keywords. Use to tell the user how aggressive a market is.
|
||||||
|
- **"What software do these businesses already use?"** → `tech_stack_analyze { urls, software_category_id }`.
|
||||||
|
Detects WordPress, Shopify, named competitors, and any custom
|
||||||
|
domains/scripts you pass. Use to find "X businesses use WordPress
|
||||||
|
but lack Y" market gaps.
|
||||||
|
- **"What are customers complaining about?"** → `market_aggregate_insights { category, location }`.
|
||||||
|
Returns top review topics — use the actual words customers use as
|
||||||
|
marketing copy and value-prop seeds.
|
||||||
|
- **"Who are the players in this niche?"** → `market_competitor_research { niche }`.
|
||||||
|
Returns proprietary competitors with pricing AND open-source
|
||||||
|
alternatives that could be forked.
|
||||||
|
|
||||||
|
These are conversational research tools — they don't build anything.
|
||||||
|
Use them BEFORE scaffolding when the user is exploring direction;
|
||||||
|
SKIP them once the user has committed to building.
|
||||||
|
```
|
||||||
|
|
||||||
|
**Verify:**
|
||||||
|
- [ ] In a fresh chat, ask the agent "should I build for dentists or
|
||||||
|
summer camps?". Confirm it proposes using `market_categories_suggest`
|
||||||
|
or `market_aggregate_insights` rather than guessing.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### [ ] 11. Surface `apps_exec`, `auth_create`, `generate_media`, `storage_*` briefly
|
||||||
|
|
||||||
|
**What:** Several capabilities the agent has are completely absent
|
||||||
|
from the prompt. The agent doesn't know it can run commands inside
|
||||||
|
production containers, deploy real auth servers, generate images, or
|
||||||
|
wire S3 storage. One-line mentions are enough.
|
||||||
|
|
||||||
|
**Where:** `buildSystemPrompt`, scattered across existing sections.
|
||||||
|
|
||||||
|
**How:**
|
||||||
|
|
||||||
|
- Under "Common questions → tools", add:
|
||||||
|
```
|
||||||
|
- "Run a migration / psql in prod" → `apps_exec { uuid, command }`.
|
||||||
|
- "Generate a hero image / illustration" → `generate_media { prompt, type, outputPath }`.
|
||||||
|
- "Wire up file storage / uploads" → `storage_provision` (if not already), then `storage_inject_env { uuid }`.
|
||||||
|
```
|
||||||
|
|
||||||
|
- In the "Decision defaults" section, augment the auth bullet:
|
||||||
|
```
|
||||||
|
- **Auth:** NextAuth with email magic-link for in-app auth.
|
||||||
|
Deploy a separate Pocketbase / Authentik / Keycloak service via
|
||||||
|
`auth_create { provider }` only if the user needs SSO, multi-app
|
||||||
|
SSO, or admin user management.
|
||||||
|
```
|
||||||
|
|
||||||
|
**Verify:**
|
||||||
|
- [ ] Ask the agent "can you run a SQL migration on prod?". Confirm
|
||||||
|
it references `apps_exec`.
|
||||||
|
- [ ] Ask "I need a hero image for the landing page". Confirm it
|
||||||
|
references `generate_media`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### [ ] 12. Flip the `fs_edit` preference to match the tool's documentation
|
||||||
|
|
||||||
|
**What:** The tool description says `startLine`/`endLine` is
|
||||||
|
preferred and `oldString` is the fallback. The prompt currently says
|
||||||
|
the opposite ("prefer `oldString` for small replacements"). The tool
|
||||||
|
is authoritative — match it.
|
||||||
|
|
||||||
|
**Where:** `buildSystemPrompt`, in the "Iterate" bullet under "Writing
|
||||||
|
code", the `fs_edit` guidance.
|
||||||
|
|
||||||
|
**How:**
|
||||||
|
|
||||||
|
Replace the current `fs_edit` guidance with:
|
||||||
|
|
||||||
|
```
|
||||||
|
- `fs_read` / `fs_write` / `fs_edit { path, oldString, newString, startLine, endLine }`.
|
||||||
|
**For `fs_edit`:** prefer `startLine`/`endLine` (deterministic; never
|
||||||
|
fails on duplicate strings). Use `oldString` only when you cannot
|
||||||
|
read the file first to get line numbers — and when you do, include
|
||||||
|
2-3 lines of surrounding context for uniqueness. If `fs_edit` keeps
|
||||||
|
failing, do NOT escape to `shell_exec` with patch scripts — read
|
||||||
|
the file fresh with `fs_read`, get the line numbers, and try again.
|
||||||
|
```
|
||||||
|
|
||||||
|
**Verify:**
|
||||||
|
- [ ] Confirm next 3 `fs_edit` calls in the wild use `startLine`/`endLine`,
|
||||||
|
not `oldString`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase 4 — Monitoring (after Phases 1-3 land)
|
||||||
|
|
||||||
|
Once the changes are in production, watch these for a week. Tune if
|
||||||
|
the numbers don't move.
|
||||||
|
|
||||||
|
### [ ] 13. Track the recovery-summary fire rate
|
||||||
|
|
||||||
|
**What:** The `needsRecovery` path runs when a turn ends badly (hit
|
||||||
|
the round cap, hit a loop, or last tool returned failure). It should
|
||||||
|
fire on <10% of turns. If it fires more often, the cap is too low or
|
||||||
|
the model is hitting real bugs.
|
||||||
|
|
||||||
|
**How:** Add a metric. In the `needsRecovery` block, before calling
|
||||||
|
`callVibnChat`, emit:
|
||||||
|
|
||||||
|
```ts
|
||||||
|
console.log("[chat] recovery_fired", {
|
||||||
|
turnId,
|
||||||
|
reason: loopBreakReason ? "loop"
|
||||||
|
: round >= MAX_TOOL_ROUNDS ? "round_cap"
|
||||||
|
: assistantText.trim().length === 0 ? "no_text"
|
||||||
|
: "tool_failure",
|
||||||
|
toolCalls: assistantToolCalls.length,
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
**Verify:**
|
||||||
|
- [ ] Aggregate over 1 week. If `round_cap` is the dominant reason
|
||||||
|
and the turns look legitimate, raise `MAX_TOOL_ROUNDS` to 10 or 12.
|
||||||
|
- [ ] If `loop` is dominant, the fingerprinter may need tuning.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### [ ] 14. Track conversational-guard fire rate
|
||||||
|
|
||||||
|
**What:** The first-turn conversational guard (regex match on user
|
||||||
|
message → no tools on round 1) is the biggest single behavioral
|
||||||
|
change in this revision. We want to know how often it fires and
|
||||||
|
whether it ever causes a problem.
|
||||||
|
|
||||||
|
**How:** Before the loop, after computing `firstMessageIsConversational`:
|
||||||
|
|
||||||
|
```ts
|
||||||
|
console.log("[chat] turn_start", {
|
||||||
|
turnId,
|
||||||
|
firstMessageIsConversational,
|
||||||
|
messagePreview: message.trim().slice(0, 80),
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
**Verify:**
|
||||||
|
- [ ] Aggregate over 1 week. Should fire on ~30-40% of first messages
|
||||||
|
in a chat.
|
||||||
|
- [ ] Spot-check 10 cases where it fired: confirm none were
|
||||||
|
legitimate "build me X" requests being miscategorized.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### [ ] 15. Track `PATH_OUTSIDE_PROJECT` rejections
|
||||||
|
|
||||||
|
**What:** After Fix 2 ships, this error tells you how often the model
|
||||||
|
was about to write to the wrong path. Should taper toward zero as the
|
||||||
|
prompt guidance + enforcement settles in.
|
||||||
|
|
||||||
|
**How:** Server-side log in `normalizeFsPath` when the project-scoped
|
||||||
|
rejection fires.
|
||||||
|
|
||||||
|
**Verify:**
|
||||||
|
- [ ] Week 1: count rejections per day.
|
||||||
|
- [ ] Week 2: count should be lower (model has internalized the rule
|
||||||
|
via the error messages it's seen).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## What this checklist deliberately doesn't include
|
||||||
|
|
||||||
|
A few things from earlier reviews that I'm intentionally leaving off:
|
||||||
|
|
||||||
|
- **Phase-aware behavior.** Phases were removed from the product; the
|
||||||
|
prompt no longer references them. No work needed.
|
||||||
|
- **Codebase summary auto-generation.** Lower priority once Fix 2
|
||||||
|
ships (paths can no longer drift) and Fix 8 lands (one `fs_tree`
|
||||||
|
per chat instead of nine).
|
||||||
|
- **Tool history reconstruction for DeepSeek compatibility.** Already
|
||||||
|
shipped in the current code via the `_rawToolResults` compact
|
||||||
|
summary. No additional work.
|
||||||
|
|
||||||
|
Don't add work for these unless a clear failure mode appears in
|
||||||
|
production after Phases 1-3 land.
|
||||||
73
patch_prompts.js
Normal file
73
patch_prompts.js
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
const fs = require('fs');
|
||||||
|
|
||||||
|
const file = 'vibn-frontend/app/api/chat/route.ts';
|
||||||
|
let code = fs.readFileSync(file, 'utf8');
|
||||||
|
|
||||||
|
// Fix 4: apps_containers_list -> apps_containers_ps
|
||||||
|
code = code.replace(/apps_containers_list \{ uuid \}/g, 'apps_containers_ps { uuid }');
|
||||||
|
|
||||||
|
// Fix 5: Soften ok field rule
|
||||||
|
const oldOkRule = `- **Trust the \\\`ok\\\` field.** Every tool result carries \\\`ok: true | false\\\`. If \\\`ok\\\` is false (or \\\`exitCode\\\` is non-zero, or \\\`healthCheck.status\\\` is >= 400), the operation FAILED. Do not describe a failed operation as successful. Surface the error verbatim and propose a next step.`;
|
||||||
|
const newOkRule = `- **Read tool results carefully.** A tool FAILED when ANY of these signals are present: \\\`ok: false\\\`, \\\`error: "..."\\\`, a non-zero \\\`exitCode\\\`, or a \\\`healthCheck.status\\\` >= 400. If NONE of those signals are present, look at the actual content of the response to decide whether the operation succeeded. Many read-only tools return data directly without an \\\`ok\\\` field — that's not a failure.`;
|
||||||
|
code = code.replace(oldOkRule, newOkRule);
|
||||||
|
|
||||||
|
// Fix 7: Path conventions
|
||||||
|
const oldDirRule = `- **Directory:** the tool resolves paths relative to the active project root — you can pass \\\`command: "npm run dev"\\\` directly. (If you need to manually \\\`cd\\\`, use the project slug.)`;
|
||||||
|
const newDirRule = `- **Directory:** Tool paths are scoped to your project root automatically. Pass \\\`command: "npm run dev"\\\` directly — no \\\`cd\\\` prefix needed. The tool rejects any \\\`fs_*\\\` write outside \\\`/workspace/<slug>/\\\`.`;
|
||||||
|
code = code.replace(oldDirRule, newDirRule);
|
||||||
|
|
||||||
|
const oldPathConv = `**fs_* path convention for this project:** Pass paths relative to \\\`/workspace/\${activeProject.slug ?? "<slug>"}/\\\` — e.g. \\\`src/app/page.tsx\\\`, not \\\`/workspace/\${activeProject.slug ?? "<slug>"}/src/app/page.tsx\\\` and not \\\`getacquired-style/src/app/page.tsx\\\`. The tool layer rejects paths outside the project root.`;
|
||||||
|
const newPathConv = `**Path convention for fs_* tools:** Pass paths relative to the project root — \\\`src/app/page.tsx\\\`, NOT \\\`/workspace/\${activeProject.slug ?? "<slug>"}/src/app/page.tsx\\\` and NOT \\\`\${activeProject.slug ?? "<slug>"}/src/app/page.tsx\\\`. The tool layer rejects writes outside the project root with a \\\`PATH_OUTSIDE_PROJECT\\\` error suggesting the corrected path.`;
|
||||||
|
code = code.replace(oldPathConv, newPathConv);
|
||||||
|
|
||||||
|
// Fix 8: fs_tree recommendation
|
||||||
|
const devContainerStart = `**Start a coding session:** \\\`devcontainer_ensure { projectId }\\\` (idempotent; first call ~10s, then instant).`;
|
||||||
|
const devContainerStartNew = `**Start a coding session:** \\\`devcontainer_ensure { projectId }\\\` (idempotent; first call ~10s, then instant).
|
||||||
|
|
||||||
|
**Orient yourself once.** On the first code-modifying turn of a chat, call \\\`fs_tree\\\` once to learn the repo layout. Don't re-run it on every turn — the layout doesn't change between user messages.`;
|
||||||
|
code = code.replace(devContainerStart, devContainerStartNew);
|
||||||
|
|
||||||
|
// Fix 9: browser_console
|
||||||
|
const visualQaBlock = `**Visual QA:** \\\`request_visual_qa { targetPath }\\\` critiques a UI file against a 5-dim design rubric. **Call this whenever you modify visual UI code** before returning the \\\`previewUrl\\\`. If it returns actionable issues, fix them with \\\`fs_edit\\\` before ending the turn. Skip for backend / SQL / config / non-visual changes.`;
|
||||||
|
const visualQaBlockNew = `**Visual QA:** \\\`request_visual_qa { targetPath }\\\` critiques a UI file against a 5-dim design rubric. **Call this whenever you modify visual UI code** before returning the \\\`previewUrl\\\`. If it returns actionable issues, fix them with \\\`fs_edit\\\` before ending the turn. Skip for backend / SQL / config / non-visual changes.
|
||||||
|
|
||||||
|
**Verify the page actually renders:**
|
||||||
|
- After \\\`dev_server_start\\\` returns a \\\`previewUrl\\\` AND \\\`healthCheck.status === 200\\\`, for any UI-facing turn, call \\\`browser_console { url: previewUrl }\\\` to capture frontend console errors. Hydration errors, missing assets, and uncaught exceptions show up here even when the server is technically "running".
|
||||||
|
- If \\\`browser_console\\\` returns errors, fix them with \\\`fs_edit\\\` before declaring done. A green \\\`healthCheck\\\` plus a clean console is the real "done" signal for UI work.
|
||||||
|
- Skip this for backend / SQL / config-only changes.`;
|
||||||
|
code = code.replace(visualQaBlock, visualQaBlockNew);
|
||||||
|
|
||||||
|
// Fix 10: Market research stack
|
||||||
|
const commonQuestionsBlock = `## Common questions → tools
|
||||||
|
- "What is project X?" → \\\`projects_get { projectId }\\\`
|
||||||
|
- "What's running / has a domain?" → \\\`apps_list { projectId }\\\` (or workspace-wide without projectId)
|
||||||
|
- "Show logs / containers / env" → \\\`apps_list\\\` to resolve uuid, then \\\`apps_logs\\\` / \\\`apps_containers_list\\\` / \\\`apps_envs_list\\\`
|
||||||
|
- "Find an OSS X" → \\\`github_search\\\` (include \\\`license:mit\\\` by default), then \\\`github_file\\\` to read README / docker-compose
|
||||||
|
- "What do the docs say about Y?" → \\\`http_fetch\\\``;
|
||||||
|
|
||||||
|
// Oops, we changed apps_containers_list to apps_containers_ps in Fix 4, so let's match the updated one:
|
||||||
|
const commonQuestionsBlockMatched = `## Common questions → tools
|
||||||
|
- "What is project X?" → \\\`projects_get { projectId }\\\`
|
||||||
|
- "What's running / has a domain?" → \\\`apps_list { projectId }\\\` (or workspace-wide without projectId)
|
||||||
|
- "Show logs / containers / env" → \\\`apps_list\\\` to resolve uuid, then \\\`apps_logs\\\` / \\\`apps_containers_ps\\\` / \\\`apps_envs_list\\\`
|
||||||
|
- "Find an OSS X" → \\\`github_search\\\` (include \\\`license:mit\\\` by default), then \\\`github_file\\\` to read README / docker-compose
|
||||||
|
- "What do the docs say about Y?" → \\\`http_fetch\\\``;
|
||||||
|
|
||||||
|
const marketResearchBlock = `
|
||||||
|
|
||||||
|
## Helping the user pick what to build
|
||||||
|
|
||||||
|
Vibn has a market-research toolkit for non-technical founders who need data on their target niche. Use it when the user is undecided, validating an idea, or comparing markets:
|
||||||
|
|
||||||
|
- **"How big is the market for X in <location>?"** → \\\`market_categories_suggest { niche }\\\` to propose Google Business categories, then \\\`market_research_run\\\` after the user approves. Returns TAM count, sample domains, and review data. NOTE: \\\`market_research_run\\\` costs real money — always confirm with the user and pass \\\`user_explicitly_approved: true\\\`.
|
||||||
|
- **"What are competitors spending on Google Ads?"** → \\\`market_seo_analyze { domain }\\\`. Returns organic traffic, paid traffic, ad spend, and top paid keywords. Use to tell the user how aggressive a market is.
|
||||||
|
- **"What software do these businesses already use?"** → \\\`tech_stack_analyze { urls, software_category_id }\\\`. Detects WordPress, Shopify, named competitors, and any custom domains/scripts you pass. Use to find "X businesses use WordPress but lack Y" market gaps.
|
||||||
|
- **"What are customers complaining about?"** → \\\`market_aggregate_insights { category, location }\\\`. Returns top review topics — use the actual words customers use as marketing copy and value-prop seeds.
|
||||||
|
- **"Who are the players in this niche?"** → \\\`market_competitor_research { niche }\\\`. Returns proprietary competitors with pricing AND open-source alternatives that could be forked.
|
||||||
|
|
||||||
|
These are conversational research tools — they don't build anything. Use them BEFORE scaffolding when the user is exploring direction; SKIP them once the user has committed to building.`;
|
||||||
|
|
||||||
|
code = code.replace(commonQuestionsBlockMatched, commonQuestionsBlockMatched + marketResearchBlock);
|
||||||
|
|
||||||
|
fs.writeFileSync(file, code);
|
||||||
|
console.log("Applied Phase 2 and 3 prompt fixes");
|
||||||
@@ -215,6 +215,8 @@ Each project has a persistent \`vibn-dev\` container. Edit files via \`fs_*\` an
|
|||||||
|
|
||||||
**Start a coding session:** \`devcontainer_ensure { projectId }\` (idempotent; first call ~10s, then instant).
|
**Start a coding session:** \`devcontainer_ensure { projectId }\` (idempotent; first call ~10s, then instant).
|
||||||
|
|
||||||
|
**Orient yourself once.** On the first code-modifying turn of a chat, call \`fs_tree\` once to learn the repo layout. Don't re-run it on every turn — the layout doesn't change between user messages.
|
||||||
|
|
||||||
**Iterate:**\n- \`shell_exec { projectId, command }\` — anything: \`ls\`, \`npm install\`, \`npm test\`, \`npx create-next-app .\`, \`git status\`. Cwd defaults to \`/workspace\`. Node (LTS), Python 3.12, and Go 1.23 are pre-installed — no setup needed.\n- \`fs_read\` / \`fs_write\` / \`fs_edit { path, oldString, newString, startLine, endLine }\`. IMPORTANT: For fs_edit, ALWAYS prefer using \`oldString\` for small replacements if you are confident. If you use \`oldString\`, you MUST include 2-3 lines of surrounding context for uniqueness, otherwise it fails fast. If you are replacing large blocks, use \`startLine\` and \`endLine\` instead.\n- \`fs_glob\` / \`fs_grep\` (ripgrep, respects .gitignore) / \`fs_list\` / \`fs_delete\`.\n
|
**Iterate:**\n- \`shell_exec { projectId, command }\` — anything: \`ls\`, \`npm install\`, \`npm test\`, \`npx create-next-app .\`, \`git status\`. Cwd defaults to \`/workspace\`. Node (LTS), Python 3.12, and Go 1.23 are pre-installed — no setup needed.\n- \`fs_read\` / \`fs_write\` / \`fs_edit { path, oldString, newString, startLine, endLine }\`. IMPORTANT: For fs_edit, ALWAYS prefer using \`oldString\` for small replacements if you are confident. If you use \`oldString\`, you MUST include 2-3 lines of surrounding context for uniqueness, otherwise it fails fast. If you are replacing large blocks, use \`startLine\` and \`endLine\` instead.\n- \`fs_glob\` / \`fs_grep\` (ripgrep, respects .gitignore) / \`fs_list\` / \`fs_delete\`.\n
|
||||||
|
|
||||||
**Dev servers (preview URL via \`*.preview.vibnai.com\` wildcard):**
|
**Dev servers (preview URL via \`*.preview.vibnai.com\` wildcard):**
|
||||||
@@ -250,7 +252,7 @@ For NEW repos / branches: \`gitea_repos_list\`, \`gitea_repo_get\`, \`gitea_repo
|
|||||||
|
|
||||||
## Troubleshooting
|
## Troubleshooting
|
||||||
- **Dev container stuck provisioning (>120s)**: \`devcontainer_status\` returns \`likelyFailed: true\` and a \`coolifyStatus\` field with Coolify's view. If \`blockedReason\` is set, TELL THE USER the specific reason ("SSH not configured", "Coolify deploy failed: image pull error") instead of continuing to poll. Do NOT loop on \`devcontainer_status\` — a stuck container will NOT self-heal. If the status says "failed" or "error", advise the user to check their Coolify dashboard or regenerate the project.
|
- **Dev container stuck provisioning (>120s)**: \`devcontainer_status\` returns \`likelyFailed: true\` and a \`coolifyStatus\` field with Coolify's view. If \`blockedReason\` is set, TELL THE USER the specific reason ("SSH not configured", "Coolify deploy failed: image pull error") instead of continuing to poll. Do NOT loop on \`devcontainer_status\` — a stuck container will NOT self-heal. If the status says "failed" or "error", advise the user to check their Coolify dashboard or regenerate the project.
|
||||||
- "exited (1)" / deploy stuck → \`apps_logs { uuid }\` + \`apps_containers_list { uuid }\`. Usual: missing env, wrong port, image pull fail.
|
- "exited (1)" / deploy stuck → \`apps_logs { uuid }\` + \`apps_containers_ps { uuid }\`. Usual: missing env, wrong port, image pull fail.
|
||||||
- 502 / "no available server" → \`apps_get\`; if \`fqdn\` is empty, attach a domain.
|
- 502 / "no available server" → \`apps_get\`; if \`fqdn\` is empty, attach a domain.
|
||||||
- "tenant" / "does not belong to" → uuid not in this workspace. Re-list with \`apps_list\`.
|
- "tenant" / "does not belong to" → uuid not in this workspace. Re-list with \`apps_list\`.
|
||||||
- Compose stack weird → \`apps_repair { uuid }\` re-applies Traefik labels + port forwarding.
|
- Compose stack weird → \`apps_repair { uuid }\` re-applies Traefik labels + port forwarding.
|
||||||
|
|||||||
@@ -4491,28 +4491,44 @@ function shq(s: string): string {
|
|||||||
return `'${s.replace(/'/g, `'\\''`)}'`;
|
return `'${s.replace(/'/g, `'\\''`)}'`;
|
||||||
}
|
}
|
||||||
|
|
||||||
function normalizeFsPath(p: string): string | NextResponse {
|
function normalizeFsPath(
|
||||||
|
p: string,
|
||||||
|
projectSlug?: string,
|
||||||
|
): string | NextResponse {
|
||||||
if (!p || typeof p !== "string") {
|
if (!p || typeof p !== "string") {
|
||||||
return NextResponse.json(
|
return NextResponse.json(
|
||||||
{ error: 'Param "path" is required' },
|
{ error: 'Param "path" is required' },
|
||||||
{ status: 400 },
|
{ status: 400 },
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
const projectRoot = projectSlug ? `${FS_ROOT}/${projectSlug}` : FS_ROOT;
|
||||||
let abs: string;
|
let abs: string;
|
||||||
if (p.startsWith("/")) {
|
if (p.startsWith("/")) {
|
||||||
abs = p;
|
abs = p;
|
||||||
} else {
|
} else {
|
||||||
abs = `${FS_ROOT}/${p}`.replace(/\/+/g, "/");
|
abs = `${projectRoot}/${p}`.replace(/\/+/g, "/");
|
||||||
}
|
}
|
||||||
// Disallow .. traversal that escapes /workspace.
|
|
||||||
const norm = abs.replace(/\/[^/]+\/\.\.(?=\/|$)/g, "").replace(/\/+/g, "/");
|
const norm = abs.replace(/\/[^/]+\/\.\.(?=\/|$)/g, "").replace(/\/+/g, "/");
|
||||||
if (!norm.startsWith(FS_ROOT) && norm !== FS_ROOT) {
|
|
||||||
return NextResponse.json(
|
// When projectSlug is set, REJECT paths outside the project root.
|
||||||
{
|
if (projectSlug) {
|
||||||
error: `Path "${p}" is outside ${FS_ROOT}; use shell.exec for system paths.`,
|
if (!norm.startsWith(projectRoot) && norm !== projectRoot) {
|
||||||
},
|
return NextResponse.json(
|
||||||
{ status: 400 },
|
{
|
||||||
);
|
ok: false,
|
||||||
|
error: `PATH_OUTSIDE_PROJECT: path "${p}" resolves to "${norm}" which is outside the active project at "${projectRoot}". Did you mean "${projectRoot}/${p.replace(/^\/+/, "")}"?`,
|
||||||
|
},
|
||||||
|
{ status: 400 },
|
||||||
|
);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Workspace-level fallback (legacy behaviour)
|
||||||
|
if (!norm.startsWith(FS_ROOT) && norm !== FS_ROOT) {
|
||||||
|
return NextResponse.json(
|
||||||
|
{ error: `Path "${p}" is outside ${FS_ROOT}; use shell.exec for system paths.` },
|
||||||
|
{ status: 400 },
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return norm;
|
return norm;
|
||||||
}
|
}
|
||||||
@@ -4627,7 +4643,7 @@ async function toolFsRead(principal: Principal, params: Record<string, any>) {
|
|||||||
if (guard) return guard;
|
if (guard) return guard;
|
||||||
const project = await resolveProjectOr404(principal, params);
|
const project = await resolveProjectOr404(principal, params);
|
||||||
if (project instanceof NextResponse) return project;
|
if (project instanceof NextResponse) return project;
|
||||||
const path = normalizeFsPath(String(params.path ?? ""));
|
const path = normalizeFsPath(String(params.path ?? ""), project.slug);
|
||||||
if (path instanceof NextResponse) return path;
|
if (path instanceof NextResponse) return path;
|
||||||
|
|
||||||
const offset = Number.isFinite(Number(params.offset))
|
const offset = Number.isFinite(Number(params.offset))
|
||||||
@@ -4873,7 +4889,7 @@ async function toolFsWrite(principal: Principal, params: Record<string, any>) {
|
|||||||
if (guard) return guard;
|
if (guard) return guard;
|
||||||
const project = await resolveProjectOr404(principal, params);
|
const project = await resolveProjectOr404(principal, params);
|
||||||
if (project instanceof NextResponse) return project;
|
if (project instanceof NextResponse) return project;
|
||||||
const path = normalizeFsPath(String(params.path ?? ""));
|
const path = normalizeFsPath(String(params.path ?? ""), project.slug);
|
||||||
if (path instanceof NextResponse) return path;
|
if (path instanceof NextResponse) return path;
|
||||||
const content = typeof params.content === "string" ? params.content : "";
|
const content = typeof params.content === "string" ? params.content : "";
|
||||||
|
|
||||||
@@ -4890,8 +4906,11 @@ async function toolFsWrite(principal: Principal, params: Record<string, any>) {
|
|||||||
{ status: 500 },
|
{ status: 500 },
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
const { createHash } = require('crypto');
|
||||||
|
const bytes = Buffer.byteLength(content, "utf8");
|
||||||
|
const sha256 = createHash("sha256").update(content, "utf8").digest("hex");
|
||||||
return NextResponse.json({
|
return NextResponse.json({
|
||||||
result: { path, bytesWritten: Buffer.byteLength(content, "utf8") },
|
result: { ok: true, path, bytes, sha256 },
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -4900,7 +4919,7 @@ async function toolFsEdit(principal: Principal, params: Record<string, any>) {
|
|||||||
if (guard) return guard;
|
if (guard) return guard;
|
||||||
const project = await resolveProjectOr404(principal, params);
|
const project = await resolveProjectOr404(principal, params);
|
||||||
if (project instanceof NextResponse) return project;
|
if (project instanceof NextResponse) return project;
|
||||||
const path = normalizeFsPath(String(params.path ?? ""));
|
const path = normalizeFsPath(String(params.path ?? ""), project.slug);
|
||||||
if (path instanceof NextResponse) return path;
|
if (path instanceof NextResponse) return path;
|
||||||
|
|
||||||
const newString =
|
const newString =
|
||||||
@@ -4966,7 +4985,7 @@ print(n)`;
|
|||||||
|
|
||||||
const b64 = Buffer.from(JSON.stringify(payload), "utf8").toString("base64");
|
const b64 = Buffer.from(JSON.stringify(payload), "utf8").toString("base64");
|
||||||
const pyB64 = Buffer.from(py, "utf8").toString("base64");
|
const pyB64 = Buffer.from(py, "utf8").toString("base64");
|
||||||
const cmd = `python3 -c "$(printf %s ${shq(pyB64)} | base64 -d)" <<< "$(printf %s ${shq(b64)} | base64 -d)"`;
|
const cmd = `python3 -c "$(printf %s ${shq(pyB64)} | base64 -d)" <<< "$(printf %s ${shq(b64)} | base64 -d)" && echo "---" && sha256sum ${shq(path)} | cut -d' ' -f1 && wc -c < ${shq(path)}`;
|
||||||
|
|
||||||
const r = await runFsCmd(principal, project, cmd);
|
const r = await runFsCmd(principal, project, cmd);
|
||||||
if (r.code !== 0) {
|
if (r.code !== 0) {
|
||||||
@@ -4979,8 +4998,18 @@ print(n)`;
|
|||||||
{ status },
|
{ status },
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
const stdoutParts = r.stdout.split('---');
|
||||||
|
const replacementsStr = stdoutParts[0].trim();
|
||||||
|
const hashAndSize = stdoutParts[1] ? stdoutParts[1].trim().split('\n') : [];
|
||||||
|
|
||||||
return NextResponse.json({
|
return NextResponse.json({
|
||||||
result: { path, replacements: parseInt(r.stdout.trim() || "0", 10) },
|
result: {
|
||||||
|
ok: true,
|
||||||
|
path,
|
||||||
|
replacements: parseInt(replacementsStr || "0", 10),
|
||||||
|
sha256: hashAndSize[0] ? hashAndSize[0].trim() : undefined,
|
||||||
|
bytes: hashAndSize[1] ? parseInt(hashAndSize[1].trim(), 10) : undefined
|
||||||
|
},
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -4989,7 +5018,7 @@ async function toolFsList(principal: Principal, params: Record<string, any>) {
|
|||||||
if (guard) return guard;
|
if (guard) return guard;
|
||||||
const project = await resolveProjectOr404(principal, params);
|
const project = await resolveProjectOr404(principal, params);
|
||||||
if (project instanceof NextResponse) return project;
|
if (project instanceof NextResponse) return project;
|
||||||
const path = normalizeFsPath(String(params.path ?? "/workspace"));
|
const path = normalizeFsPath(String(params.path ?? "/workspace"), project.slug);
|
||||||
if (path instanceof NextResponse) return path;
|
if (path instanceof NextResponse) return path;
|
||||||
const cmd = `cd ${shq(path)} && ls -lA --time-style=long-iso 2>&1 | head -200`;
|
const cmd = `cd ${shq(path)} && ls -lA --time-style=long-iso 2>&1 | head -200`;
|
||||||
const r = await runFsCmd(principal, project, cmd);
|
const r = await runFsCmd(principal, project, cmd);
|
||||||
@@ -5003,7 +5032,7 @@ async function toolFsDelete(principal: Principal, params: Record<string, any>) {
|
|||||||
if (guard) return guard;
|
if (guard) return guard;
|
||||||
const project = await resolveProjectOr404(principal, params);
|
const project = await resolveProjectOr404(principal, params);
|
||||||
if (project instanceof NextResponse) return project;
|
if (project instanceof NextResponse) return project;
|
||||||
const path = normalizeFsPath(String(params.path ?? ""));
|
const path = normalizeFsPath(String(params.path ?? ""), project.slug);
|
||||||
if (path instanceof NextResponse) return path;
|
if (path instanceof NextResponse) return path;
|
||||||
const recursive = Boolean(params.recursive);
|
const recursive = Boolean(params.recursive);
|
||||||
// Belt-and-suspenders: never let `rm -rf /workspace` itself slip through.
|
// Belt-and-suspenders: never let `rm -rf /workspace` itself slip through.
|
||||||
@@ -5036,7 +5065,7 @@ async function toolFsGlob(principal: Principal, params: Record<string, any>) {
|
|||||||
{ status: 400 },
|
{ status: 400 },
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
const cwd = normalizeFsPath(String(params.cwd ?? "/workspace"));
|
const cwd = normalizeFsPath(String(params.cwd ?? "/workspace"), project.slug);
|
||||||
if (cwd instanceof NextResponse) return cwd;
|
if (cwd instanceof NextResponse) return cwd;
|
||||||
// ripgrep --files --glob is faster + smarter than `find` and respects .gitignore.
|
// ripgrep --files --glob is faster + smarter than `find` and respects .gitignore.
|
||||||
const cmd = `cd ${shq(cwd)} && rg --files --glob ${shq(pattern)} | head -500`;
|
const cmd = `cd ${shq(cwd)} && rg --files --glob ${shq(pattern)} | head -500`;
|
||||||
@@ -5062,7 +5091,7 @@ async function toolFsGrep(principal: Principal, params: Record<string, any>) {
|
|||||||
{ status: 400 },
|
{ status: 400 },
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
const cwd = normalizeFsPath(String(params.cwd ?? "/workspace"));
|
const cwd = normalizeFsPath(String(params.cwd ?? "/workspace"), project.slug);
|
||||||
if (cwd instanceof NextResponse) return cwd;
|
if (cwd instanceof NextResponse) return cwd;
|
||||||
const glob =
|
const glob =
|
||||||
typeof params.glob === "string" && params.glob.trim()
|
typeof params.glob === "string" && params.glob.trim()
|
||||||
@@ -6005,7 +6034,7 @@ async function toolFsTree(principal: Principal, params: Record<string, any>) {
|
|||||||
if (guard) return guard;
|
if (guard) return guard;
|
||||||
const project = await resolveProjectOr404(principal, params);
|
const project = await resolveProjectOr404(principal, params);
|
||||||
if (project instanceof NextResponse) return project;
|
if (project instanceof NextResponse) return project;
|
||||||
const path = normalizeFsPath(String(params.path ?? "/workspace"));
|
const path = normalizeFsPath(String(params.path ?? "/workspace"), project.slug);
|
||||||
if (path instanceof NextResponse) return path;
|
if (path instanceof NextResponse) return path;
|
||||||
|
|
||||||
// Use find to generate a tree structure, ignoring node_modules and .git
|
// Use find to generate a tree structure, ignoring node_modules and .git
|
||||||
|
|||||||
Reference in New Issue
Block a user