From ba2eaa55f26ad8d29cc5250461e959b3c1856d73 Mon Sep 17 00:00:00 2001 From: mawkone Date: Wed, 3 Jun 2026 13:46:49 -0700 Subject: [PATCH] feat(runner): implement state-of-the-art task-by-task meta-loop for offline delegation --- .../dist/agent-session-runner.js | 467 ++++++++------- vibn-agent-runner/src/agent-session-runner.ts | 539 ++++++++++-------- 2 files changed, 583 insertions(+), 423 deletions(-) diff --git a/vibn-agent-runner/dist/agent-session-runner.js b/vibn-agent-runner/dist/agent-session-runner.js index 91c0ae3..8b91ae1 100644 --- a/vibn-agent-runner/dist/agent-session-runner.js +++ b/vibn-agent-runner/dist/agent-session-runner.js @@ -181,33 +181,123 @@ async function autoCommitAndDeploy(opts, task, emit) { await patchSession(opts, { status: "done" }); } } -// ── Main streaming execution loop ───────────────────────────────────────────── -async function runSessionAgent(config, task, ctx, opts) { - const systemPrompt = (0, loader_1.resolvePrompt)(config.promptId); - const emit = async (line) => { - console.log(`[session ${opts.sessionId}] ${line.type}: ${line.text}`); - await patchSession(opts, { outputLine: line }); - }; - await emit({ - ts: now(), - type: "info", - text: `Agent starting working in ${opts.appPath}`, +function parseTaskItems(repoRoot) { + const fs = require("fs"); + const path = require("path"); + const tasksDir = path.join(repoRoot, ".vibncode", "tasks"); + if (!fs.existsSync(tasksDir)) + return []; + const items = []; + try { + const files = fs + .readdirSync(tasksDir) + .filter((f) => f.endsWith(".md")); + files.sort(); + for (const file of files) { + const filePath = path.join(tasksDir, file); + const content = fs.readFileSync(filePath, "utf8"); + const lines = content.split("\n"); + lines.forEach((line, lineIndex) => { + const match = line.match(/^(\s*)-\s*\[([ xX])\]\s+(.+)$/); + if (match && match[2] !== undefined && match[3] !== undefined) { + items.push({ + text: match[3].trim(), + filePath, + lineIndex, + isChecked: match[2].toLowerCase() === "x", + fileName: file, + }); + } + }); + } + } + catch (err) { + console.error("[Orchestrator] Error parsing task items:", err); + } + return items; +} +function toggleTaskOnDisk(task) { + const fs = require("fs"); + const content = fs.readFileSync(task.filePath, "utf8"); + const lines = content.split("\n"); + const line = lines[task.lineIndex]; + if (line) { + const match = line.match(/^(\s*)-\s*\[([ xX])\]\s+(.+)$/); + if (match && match[1] !== undefined && match[3] !== undefined) { + lines[task.lineIndex] = `${match[1]}- [x] ${match[3]}`; + fs.writeFileSync(task.filePath, lines.join("\n"), "utf8"); + } + } +} +async function generateBacklogFromPrompt(taskPrompt, repoRoot) { + const fs = require("fs"); + const path = require("path"); + const tasksDir = path.join(repoRoot, ".vibncode", "tasks"); + fs.mkdirSync(tasksDir, { recursive: true }); + const prompt = `You are an elite Software Engineering Orchestrator. +Your goal is to break down the user's high-level objective into a highly detailed, sequential checklist of concrete, atomic, self-contained implementation tasks. + +High-Level Objective: +"${taskPrompt}" + +Please output a standard Markdown file containing: +1. A brief 1-sentence overview. +2. A list of tasks, where each task MUST be formatted as a standard Markdown checkbox starting with "- [ ] ": +- [ ] Implement database schema changes for ... +- [ ] Add endpoint handler for ... +- [ ] Write tests ... + +Be extremely thorough and break the objective down into small, digestible units of work (e.g. 5-15 tasks). +Do NOT include any extra conversational text or explanations. Just output the clean markdown.`; + const resp = await (0, vibn_chat_model_1.callVibnChat)({ + systemPrompt: "You are a precise technical orchestrator who only outputs markdown checklist files.", + messages: [{ role: "user", content: prompt }], + temperature: 0.1, }); - // Scope the system prompt to the specific app within the monorepo + const content = resp.text || `# Delegated Backlog\n\n- [ ] ${taskPrompt}`; + const backlogPath = path.join(tasksDir, "00-delegated-backlog.md"); + fs.writeFileSync(backlogPath, content, "utf8"); +} +function commitTaskProgress(task, repoRoot) { + const { execSync } = require("child_process"); + try { + console.log(`[Orchestrator] Committing task progress: ${task.text}`); + execSync("git add -A", { cwd: repoRoot, stdio: "pipe" }); + const msg = `feat(tasks): [Completed] ${task.text}`; + execSync(`git commit -m "${msg.replace(/"/g, '\\"')}"`, { + cwd: repoRoot, + stdio: "pipe", + }); + } + catch (err) { + // If nothing to commit, that's fine + } +} +async function runSingleSubTask(task, config, ctx, opts, emit) { + const path = require("path"); + const fs = require("fs"); const basePrompt = (0, loader_1.resolvePrompt)(config.promptId); const scopedPrompt = `${basePrompt} -\n\n## Active context -You are working inside the monorepo directory: ${opts.appPath} -All file paths you use should be relative to this directory unless otherwise specified. -When running commands, always cd into ${opts.appPath} first unless already there. -Do NOT run git commit or git push — the platform handles committing after you finish. + +## ACTIVE SUBTASK OBJECTIVE +You are working on a single task in your task queue: +TASK: "${task.text}" +File: "${path.relative(opts.repoRoot ?? ctx.workspaceRoot, task.filePath)}" (line ${task.lineIndex + 1}) + +## CRITICAL EXECUTION CONSTRAINTS +1. 🎯 STAY HIGHLY FOCUSED: Your only objective is to implement this specific task. Do NOT wander, do NOT explore other unrelated parts of the codebase, and do NOT attempt unrelated tasks. +2. 🚫 NO EXPLORATION COMMANDS: DO NOT execute generic orientation/search commands like 'ls', 'find', 'pwd', 'grep', 'git diff', 'git status'. You already know the repository structure. Go straight to editing or reading the targeted files. +3. 🛠️ TOGGLE CHECKBOX: Once your implementation is done, you MUST read and rewrite "${path.relative(opts.repoRoot ?? ctx.workspaceRoot, task.filePath)}" at line ${task.lineIndex + 1} to change "- [ ]" to "- [x]". +4. 🔴 NO COMMITS: Do NOT run 'git commit' or 'git push'. The platform handles committing automatically after you finish. +5. 🟢 COMPLETED SIGNAL: When you are finished, verify the build compiles clean using the Ralph Loop checks. If successful, stop executing tools and end your response. `; - const history = [{ role: "user", content: task }]; - let turn = 0; + const userPrompt = `Please implement the following task: "${task.text}" and then check it off in the task list.`; + const history = [{ role: "user", content: userPrompt }]; + let subTurn = 0; + const SUB_MAX_TURNS = 18; let toolCallsSinceText = 0; let roundsSinceText = 0; const toolFingerprints = []; - let loopBreakReason = null; let ralphIteration = 0; function fingerprintToolCall(tc) { if (tc.name === "shell_exec") { @@ -226,20 +316,15 @@ Do NOT run git commit or git push — the platform handles committing after you } return `${tc.name}:${Object.values(tc.args ?? {})[0]}`; } - while (turn < MAX_TURNS) { + while (subTurn < SUB_MAX_TURNS) { if (opts.isStopped()) { await emit({ ts: now(), type: "info", text: "Stopped by user." }); - await patchSession(opts, { status: "stopped" }); - return; + return false; } - turn++; - const isSilent = roundsSinceText >= 15 || toolCallsSinceText >= 20; + subTurn++; + const isSilent = roundsSinceText >= 8 || toolCallsSinceText >= 12; const extraSystem = isSilent - ? "\n\n[STATUS NUDGE] You have run " + - `${toolCallsSinceText} tool call(s) over ${roundsSinceText} round(s) ` + - "without sending the user any text. Before any more tool calls, " + - "send ONE short sentence describing what you are currently working " + - "on and why." + ? "\n\n[STATUS NUDGE] Focus on completing the current task. Do not make any more tool calls without a short sentence explaining what you are working on." : ""; let resp; try { @@ -247,23 +332,25 @@ Do NOT run git commit or git push — the platform handles committing after you systemPrompt: scopedPrompt + extraSystem, messages: history, tools: config.tools, - temperature: 0.2, + temperature: 0.1, }); } catch (err) { const msg = err instanceof Error ? err.message : String(err); - await emit({ ts: now(), type: "error", text: `LLM error: ${msg}` }); - await patchSession(opts, { status: "failed", error: msg }); - return; + await emit({ + ts: now(), + type: "error", + text: `LLM sub-session error: ${msg}`, + }); + return false; } if (resp.error) { await emit({ ts: now(), type: "error", - text: `LLM error: ${resp.error}`, + text: `LLM sub-session error: ${resp.error}`, }); - await patchSession(opts, { status: "failed", error: resp.error }); - return; + return false; } if (resp.text) { await emit({ ts: now(), type: "info", text: resp.text }); @@ -274,38 +361,12 @@ Do NOT run git commit or git push — the platform handles committing after you roundsSinceText++; toolCallsSinceText += resp.toolCalls.length; } - // ── Self-Correcting Ralph Loop Autonomy ── if (!resp.toolCalls.length) { - const text = resp.text || ""; - const incompleteSignals = [ - "I need to", - "Let me", - "Next, I should", - "I should also", - "Additionally", - "I will now", - "I need first to", - ]; - const needsMoreWork = incompleteSignals.some((signal) => text.includes(signal)); - if (needsMoreWork && ralphIteration < 3) { - ralphIteration++; - await emit({ - ts: now(), - type: "info", - text: `🔄 [Ralph Loop] Self-reflection triggered (iteration ${ralphIteration}/3). Resuming execution...`, - }); - history.push({ - role: "user", - content: "Please continue implementing the outstanding next steps to complete the task.", - }); - continue; - } - // ── Cloud Build Verification (Ralph Loop integration) ── if (opts.repoRoot && ralphIteration < 3) { await emit({ ts: now(), type: "info", - text: "🔍 [Ralph Loop] Initiating automatic build verification...", + text: "🔍 [Ralph Loop] Verifying build for this task...", }); const verification = runBuildVerification(opts.repoRoot, opts.appPath); if (!verification.success) { @@ -313,29 +374,16 @@ Do NOT run git commit or git push — the platform handles committing after you await emit({ ts: now(), type: "error", - text: `❌ [Ralph Loop] Build verification failed (iteration ${ralphIteration}/3). Feeding compilation errors back to the model...`, + text: `❌ [Ralph Loop] Build failed (iteration ${ralphIteration}/3) for this task.`, }); history.push({ role: "user", content: `Your previous edits completed, but the project's build check failed with compilation errors. +Please fix these errors immediately so the build compiles clean: -========================================= -🚨 SURGICAL HEALING PROTOCOL ACTIVE 🚨 -========================================= -The project's compilation/build has failed. You are currently in an autonomous, auto-correcting healing loop and must fix this compilation error immediately. - -To prevent cognitive loop spirals and command limits, you MUST follow this strict, non-negotiable troubleshooting protocol: - -1. 🚫 STRICTLY BLOCK EXPLORATION: DO NOT execute general directory exploration or orientation commands such as 'ls', 'find', 'pwd', 'grep', 'git status', 'git diff', or other search commands. You do not need to look around. -2. 🎯 SURGICAL TARGETING: Scan the compiler error logs below to locate the EXACT filename, line number, and column where the compilation failed. -3. 🛠️ IMMEDIATE CORRECTION: Read that file immediately using your specific file-reading tool (using precise start/end lines if it is large) and apply a targeted, surgical edit to correct the exact syntax or type error. Do not write a placeholder or partial fix. - -Here are the precise compilation errors from the compiler: \`\`\`text ${verification.error} -\`\`\` - -Implement the exact fix directly in the code now.`, +\`\`\``, }); continue; } @@ -343,157 +391,180 @@ Implement the exact fix directly in the code now.`, await emit({ ts: now(), type: "info", - text: "🟢 [Ralph Loop] Build verification passed successfully! 0 errors.", + text: "🟢 [Ralph Loop] Build passed successfully! 0 errors.", }); } } - // If fully complete, trigger auto-commit and finish - if (opts.autoApprove) { - await autoCommitAndDeploy(opts, task, emit); + let diskChecked = false; + try { + const fileContent = fs.readFileSync(task.filePath, "utf8"); + const lines = fileContent.split("\n"); + const line = lines[task.lineIndex]; + if (line) { + const match = line.match(/^(\s*)-\s*\[([ xX])\]\s+(.+)$/); + if (match && match[2].toLowerCase() === "x") { + diskChecked = true; + } + } } - else { - await patchSession(opts, { status: "completed" }); + catch { } + if (!diskChecked) { + await emit({ + ts: now(), + type: "info", + text: `✍️ [Orchestrator] Task implementation completed. Automatically checking off task on disk.`, + }); + toggleTaskOnDisk(task); } - return; + return true; } for (const tc of resp.toolCalls) { toolFingerprints.push(fingerprintToolCall(tc)); } - const window = toolFingerprints.slice(-10); + const window = toolFingerprints.slice(-6); const counts = new Map(); for (const fp of window) counts.set(fp, (counts.get(fp) ?? 0) + 1); let maxRepeats = 0; - let repeatedCmd = ""; for (const [fp, n] of counts.entries()) { - if (n > maxRepeats) { + if (n > maxRepeats) maxRepeats = n; - repeatedCmd = fp.split("|")[0]; - } } - if (maxRepeats >= 6) { - loopBreakReason = `Repeated ${repeatedCmd} ${maxRepeats}× in last 10 calls`; - break; + if (maxRepeats >= 4) { + await emit({ + ts: now(), + type: "error", + text: `Loop detected in subtask execution, breaking loop.`, + }); + return false; } history.push({ role: "assistant", content: resp.text, toolCalls: resp.toolCalls, }); - // ── 4-Level Smart Concurrency Tool Grouping ── - const parallelReads = resp.toolCalls.filter((tc) => [ - "fs_read", - "fs_tree", - "fs_list", - "fs_glob", - "fs_grep", - "projects_list", - "project_recent_errors", - ].includes(tc.name)); - const sequentialWrites = resp.toolCalls.filter((tc) => [ - "fs_write", - "fs_edit", - "create_file", - "write_file", - "replace_in_file", - "apps_create", - "databases_create", - ].includes(tc.name)); - const otherTools = resp.toolCalls.filter((tc) => !parallelReads.includes(tc) && !sequentialWrites.includes(tc)); - // Stage 1: Parallel Reads - if (parallelReads.length > 0) { + for (const tc of resp.toolCalls) { + if (opts.isStopped()) + return false; await emit({ ts: now(), type: "step", - text: `Executing ${parallelReads.length} read operations concurrently...`, + text: `Running ${tc.name}...`, }); - await Promise.all(parallelReads.map(async (tc) => { - let result; - try { - result = await (0, tools_1.executeTool)(tc.name, tc.args, ctx); - } - catch (err) { - result = { - error: err instanceof Error ? err.message : String(err), - }; - } - const resultStr = typeof result === "string" - ? result - : JSON.stringify(result, null, 2); - history.push({ - role: "tool", - content: resultStr, - toolCallId: tc.id, - toolName: tc.name, - }); - })); - } - // Stage 2: Parallelizable Other Tools - if (otherTools.length > 0) { - await Promise.all(otherTools.map(async (tc) => { - await emit({ - ts: now(), - type: "step", - text: `Running ${tc.name}...`, - }); - let result; - try { - result = await (0, tools_1.executeTool)(tc.name, tc.args, ctx); - } - catch (err) { - result = { - error: err instanceof Error ? err.message : String(err), - }; - } - const resultStr = typeof result === "string" - ? result - : JSON.stringify(result, null, 2); - history.push({ - role: "tool", - content: resultStr, - toolCallId: tc.id, - toolName: tc.name, - }); - })); - } - // Stage 3: Sequential User-Safe Writes/Edits - if (sequentialWrites.length > 0) { - for (const tc of sequentialWrites) { - await emit({ - ts: now(), - type: "step", - text: `Writing modifications: ${tc.name}...`, - }); - let result; - try { - result = await (0, tools_1.executeTool)(tc.name, tc.args, ctx); - const changedFile = extractChangedFile(tc.name, tc.args, ctx.workspaceRoot, opts.appPath); - if (changedFile) { - await patchSession(opts, { changedFile }); - } - } - catch (err) { - result = { error: err instanceof Error ? err.message : String(err) }; - } - const resultStr = typeof result === "string" ? result : JSON.stringify(result, null, 2); - history.push({ - role: "tool", - content: resultStr, - toolCallId: tc.id, - toolName: tc.name, - }); + let result; + try { + result = await (0, tools_1.executeTool)(tc.name, tc.args, ctx); } + catch (err) { + result = { error: err instanceof Error ? err.message : String(err) }; + } + const resultStr = typeof result === "string" ? result : JSON.stringify(result, null, 2); + history.push({ + role: "tool", + content: resultStr, + toolCallId: tc.id, + toolName: tc.name, + }); } } - if (loopBreakReason) { + await emit({ + ts: now(), + type: "error", + text: `Subtask exceeded maximum turns limit of ${SUB_MAX_TURNS}.`, + }); + return false; +} +async function runSessionAgent(config, task, ctx, opts) { + const emit = async (line) => { + console.log(`[session ${opts.sessionId}] ${line.type}: ${line.text}`); + await patchSession(opts, { outputLine: line }); + }; + await emit({ + ts: now(), + type: "info", + text: `Agent started offline delegation orchestrator in ${opts.appPath}`, + }); + const repoRoot = opts.repoRoot ?? ctx.workspaceRoot; + let tasks = parseTaskItems(repoRoot); + if (tasks.length === 0) { await emit({ ts: now(), - type: "error", - text: `Loop broken: ${loopBreakReason}`, + type: "info", + text: "🤖 [Orchestrator] No active tasks backlog found on disk. Analyzing prompt to plan atomic execution backlog...", }); - await patchSession(opts, { status: "failed", error: loopBreakReason }); + try { + await generateBacklogFromPrompt(task, repoRoot); + tasks = parseTaskItems(repoRoot); + } + catch (err) { + await emit({ + ts: now(), + type: "error", + text: `❌ [Orchestrator] Failed to generate backlog: ${err.message || String(err)}`, + }); + await patchSession(opts, { + status: "failed", + error: "Backlog generation failed", + }); + return; + } + } + const openTasks = tasks.filter((t) => !t.isChecked); + if (openTasks.length === 0) { + await emit({ + ts: now(), + type: "info", + text: "🟢 [Orchestrator] All tasks in the queue are already completed!", + }); + await patchSession(opts, { status: "completed" }); + return; + } + await emit({ + ts: now(), + type: "info", + text: `🤖 [Orchestrator] Found ${openTasks.length} open tasks. Executing task-by-task Meta-Loop...`, + }); + for (let i = 0; i < openTasks.length; i++) { + const currentTask = openTasks[i]; + await emit({ + ts: now(), + type: "info", + text: `🚀 [Orchestrator] Task ${i + 1}/${openTasks.length}: "${currentTask.text}"`, + }); + const success = await runSingleSubTask(currentTask, config, ctx, opts, emit); + if (!success) { + await emit({ + ts: now(), + type: "error", + text: `❌ [Orchestrator] Bailed out! Task execution failed on: "${currentTask.text}". Rolling back modifications for this task to keep the repository green...`, + }); + try { + const { execSync } = require("child_process"); + execSync("git checkout -- . && git clean -fd", { + cwd: repoRoot, + stdio: "pipe", + }); + } + catch (rollbackErr) { + console.error("Rollback failed:", rollbackErr.message || rollbackErr); + } + await patchSession(opts, { + status: "failed", + error: `Delegation loop halted at task: "${currentTask.text}"`, + }); + return; + } + commitTaskProgress(currentTask, repoRoot); + } + await emit({ + ts: now(), + type: "info", + text: `🎉 [Orchestrator] All delegated tasks completed successfully with green compilation builds!`, + }); + if (opts.autoApprove) { + await autoCommitAndDeploy(opts, task, emit); } else { - await patchSession(opts, { status: "failed", error: "Max turns reached" }); + await patchSession(opts, { status: "completed" }); } } diff --git a/vibn-agent-runner/src/agent-session-runner.ts b/vibn-agent-runner/src/agent-session-runner.ts index 5a51a28..4f8e145 100644 --- a/vibn-agent-runner/src/agent-session-runner.ts +++ b/vibn-agent-runner/src/agent-session-runner.ts @@ -256,42 +256,150 @@ async function autoCommitAndDeploy( // ── Main streaming execution loop ───────────────────────────────────────────── -export async function runSessionAgent( - config: AgentConfig, - task: string, - ctx: ToolContext, - opts: SessionRunOptions, +interface TaskItem { + text: string; + filePath: string; + lineIndex: number; + isChecked: boolean; + fileName: string; +} + +function parseTaskItems(repoRoot: string): TaskItem[] { + const fs = require("fs") as typeof import("fs"); + const path = require("path") as typeof import("path"); + const tasksDir = path.join(repoRoot, ".vibncode", "tasks"); + if (!fs.existsSync(tasksDir)) return []; + + const items: TaskItem[] = []; + try { + const files = fs + .readdirSync(tasksDir) + .filter((f: string) => f.endsWith(".md")); + files.sort(); + + for (const file of files) { + const filePath = path.join(tasksDir, file); + const content = fs.readFileSync(filePath, "utf8"); + const lines = content.split("\n"); + lines.forEach((line: string, lineIndex: number) => { + const match = line.match(/^(\s*)-\s*\[([ xX])\]\s+(.+)$/); + if (match && match[2] !== undefined && match[3] !== undefined) { + items.push({ + text: match[3].trim(), + filePath, + lineIndex, + isChecked: match[2].toLowerCase() === "x", + fileName: file, + }); + } + }); + } + } catch (err) { + console.error("[Orchestrator] Error parsing task items:", err); + } + return items; +} + +function toggleTaskOnDisk(task: TaskItem): void { + const fs = require("fs") as typeof import("fs"); + const content = fs.readFileSync(task.filePath, "utf8"); + const lines = content.split("\n"); + const line = lines[task.lineIndex]; + if (line) { + const match = line.match(/^(\s*)-\s*\[([ xX])\]\s+(.+)$/); + if (match && match[1] !== undefined && match[3] !== undefined) { + lines[task.lineIndex] = `${match[1]}- [x] ${match[3]}`; + fs.writeFileSync(task.filePath, lines.join("\n"), "utf8"); + } + } +} + +async function generateBacklogFromPrompt( + taskPrompt: string, + repoRoot: string, ): Promise { - const systemPrompt = resolvePrompt(config.promptId); + const fs = require("fs") as typeof import("fs"); + const path = require("path") as typeof import("path"); + const tasksDir = path.join(repoRoot, ".vibncode", "tasks"); + fs.mkdirSync(tasksDir, { recursive: true }); - const emit = async (line: OutputLine) => { - console.log(`[session ${opts.sessionId}] ${line.type}: ${line.text}`); - await patchSession(opts, { outputLine: line }); - }; + const prompt = `You are an elite Software Engineering Orchestrator. +Your goal is to break down the user's high-level objective into a highly detailed, sequential checklist of concrete, atomic, self-contained implementation tasks. - await emit({ - ts: now(), - type: "info", - text: `Agent starting working in ${opts.appPath}`, +High-Level Objective: +"${taskPrompt}" + +Please output a standard Markdown file containing: +1. A brief 1-sentence overview. +2. A list of tasks, where each task MUST be formatted as a standard Markdown checkbox starting with "- [ ] ": +- [ ] Implement database schema changes for ... +- [ ] Add endpoint handler for ... +- [ ] Write tests ... + +Be extremely thorough and break the objective down into small, digestible units of work (e.g. 5-15 tasks). +Do NOT include any extra conversational text or explanations. Just output the clean markdown.`; + + const resp = await callVibnChat({ + systemPrompt: + "You are a precise technical orchestrator who only outputs markdown checklist files.", + messages: [{ role: "user", content: prompt }], + temperature: 0.1, }); - // Scope the system prompt to the specific app within the monorepo + const content = resp.text || `# Delegated Backlog\n\n- [ ] ${taskPrompt}`; + const backlogPath = path.join(tasksDir, "00-delegated-backlog.md"); + fs.writeFileSync(backlogPath, content, "utf8"); +} + +function commitTaskProgress(task: TaskItem, repoRoot: string) { + const { execSync } = require("child_process"); + try { + console.log(`[Orchestrator] Committing task progress: ${task.text}`); + execSync("git add -A", { cwd: repoRoot, stdio: "pipe" }); + const msg = `feat(tasks): [Completed] ${task.text}`; + execSync(`git commit -m "${msg.replace(/"/g, '\\"')}"`, { + cwd: repoRoot, + stdio: "pipe", + }); + } catch (err) { + // If nothing to commit, that's fine + } +} + +async function runSingleSubTask( + task: TaskItem, + config: AgentConfig, + ctx: ToolContext, + opts: SessionRunOptions, + emit: (line: OutputLine) => Promise, +): Promise { + const path = require("path") as typeof import("path"); + const fs = require("fs") as typeof import("fs"); const basePrompt = resolvePrompt(config.promptId); + const scopedPrompt = `${basePrompt} -\n\n## Active context -You are working inside the monorepo directory: ${opts.appPath} -All file paths you use should be relative to this directory unless otherwise specified. -When running commands, always cd into ${opts.appPath} first unless already there. -Do NOT run git commit or git push — the platform handles committing after you finish. + +## ACTIVE SUBTASK OBJECTIVE +You are working on a single task in your task queue: +TASK: "${task.text}" +File: "${path.relative(opts.repoRoot ?? ctx.workspaceRoot, task.filePath)}" (line ${task.lineIndex + 1}) + +## CRITICAL EXECUTION CONSTRAINTS +1. 🎯 STAY HIGHLY FOCUSED: Your only objective is to implement this specific task. Do NOT wander, do NOT explore other unrelated parts of the codebase, and do NOT attempt unrelated tasks. +2. 🚫 NO EXPLORATION COMMANDS: DO NOT execute generic orientation/search commands like 'ls', 'find', 'pwd', 'grep', 'git diff', 'git status'. You already know the repository structure. Go straight to editing or reading the targeted files. +3. 🛠️ TOGGLE CHECKBOX: Once your implementation is done, you MUST read and rewrite "${path.relative(opts.repoRoot ?? ctx.workspaceRoot, task.filePath)}" at line ${task.lineIndex + 1} to change "- [ ]" to "- [x]". +4. 🔴 NO COMMITS: Do NOT run 'git commit' or 'git push'. The platform handles committing automatically after you finish. +5. 🟢 COMPLETED SIGNAL: When you are finished, verify the build compiles clean using the Ralph Loop checks. If successful, stop executing tools and end your response. `; - const history: ChatMessage[] = [{ role: "user", content: task }]; + const userPrompt = `Please implement the following task: "${task.text}" and then check it off in the task list.`; + const history: ChatMessage[] = [{ role: "user", content: userPrompt }]; - let turn = 0; + let subTurn = 0; + const SUB_MAX_TURNS = 18; let toolCallsSinceText = 0; let roundsSinceText = 0; const toolFingerprints: string[] = []; - let loopBreakReason: string | null = null; let ralphIteration = 0; function fingerprintToolCall(tc: any) { @@ -315,22 +423,17 @@ Do NOT run git commit or git push — the platform handles committing after you return `${tc.name}:${Object.values(tc.args ?? {})[0]}`; } - while (turn < MAX_TURNS) { + while (subTurn < SUB_MAX_TURNS) { if (opts.isStopped()) { await emit({ ts: now(), type: "info", text: "Stopped by user." }); - await patchSession(opts, { status: "stopped" }); - return; + return false; } - turn++; + subTurn++; - const isSilent = roundsSinceText >= 15 || toolCallsSinceText >= 20; + const isSilent = roundsSinceText >= 8 || toolCallsSinceText >= 12; const extraSystem = isSilent - ? "\n\n[STATUS NUDGE] You have run " + - `${toolCallsSinceText} tool call(s) over ${roundsSinceText} round(s) ` + - "without sending the user any text. Before any more tool calls, " + - "send ONE short sentence describing what you are currently working " + - "on and why." + ? "\n\n[STATUS NUDGE] Focus on completing the current task. Do not make any more tool calls without a short sentence explaining what you are working on." : ""; let resp: any; @@ -339,23 +442,25 @@ Do NOT run git commit or git push — the platform handles committing after you systemPrompt: scopedPrompt + extraSystem, messages: history as any[], tools: config.tools, - temperature: 0.2, + temperature: 0.1, }); } catch (err) { const msg = err instanceof Error ? err.message : String(err); - await emit({ ts: now(), type: "error", text: `LLM error: ${msg}` }); - await patchSession(opts, { status: "failed", error: msg }); - return; + await emit({ + ts: now(), + type: "error", + text: `LLM sub-session error: ${msg}`, + }); + return false; } if (resp.error) { await emit({ ts: now(), type: "error", - text: `LLM error: ${resp.error}`, + text: `LLM sub-session error: ${resp.error}`, }); - await patchSession(opts, { status: "failed", error: resp.error }); - return; + return false; } if (resp.text) { @@ -367,43 +472,12 @@ Do NOT run git commit or git push — the platform handles committing after you toolCallsSinceText += resp.toolCalls.length; } - // ── Self-Correcting Ralph Loop Autonomy ── if (!resp.toolCalls.length) { - const text = resp.text || ""; - const incompleteSignals = [ - "I need to", - "Let me", - "Next, I should", - "I should also", - "Additionally", - "I will now", - "I need first to", - ]; - const needsMoreWork = incompleteSignals.some((signal) => - text.includes(signal), - ); - - if (needsMoreWork && ralphIteration < 3) { - ralphIteration++; - await emit({ - ts: now(), - type: "info", - text: `🔄 [Ralph Loop] Self-reflection triggered (iteration ${ralphIteration}/3). Resuming execution...`, - }); - history.push({ - role: "user", - content: - "Please continue implementing the outstanding next steps to complete the task.", - }); - continue; - } - - // ── Cloud Build Verification (Ralph Loop integration) ── if (opts.repoRoot && ralphIteration < 3) { await emit({ ts: now(), type: "info", - text: "🔍 [Ralph Loop] Initiating automatic build verification...", + text: "🔍 [Ralph Loop] Verifying build for this task...", }); const verification = runBuildVerification(opts.repoRoot, opts.appPath); @@ -412,69 +486,72 @@ Do NOT run git commit or git push — the platform handles committing after you await emit({ ts: now(), type: "error", - text: `❌ [Ralph Loop] Build verification failed (iteration ${ralphIteration}/3). Feeding compilation errors back to the model...`, + text: `❌ [Ralph Loop] Build failed (iteration ${ralphIteration}/3) for this task.`, }); history.push({ role: "user", content: `Your previous edits completed, but the project's build check failed with compilation errors. +Please fix these errors immediately so the build compiles clean: -========================================= -🚨 SURGICAL HEALING PROTOCOL ACTIVE 🚨 -========================================= -The project's compilation/build has failed. You are currently in an autonomous, auto-correcting healing loop and must fix this compilation error immediately. - -To prevent cognitive loop spirals and command limits, you MUST follow this strict, non-negotiable troubleshooting protocol: - -1. 🚫 STRICTLY BLOCK EXPLORATION: DO NOT execute general directory exploration or orientation commands such as 'ls', 'find', 'pwd', 'grep', 'git status', 'git diff', or other search commands. You do not need to look around. -2. 🎯 SURGICAL TARGETING: Scan the compiler error logs below to locate the EXACT filename, line number, and column where the compilation failed. -3. 🛠️ IMMEDIATE CORRECTION: Read that file immediately using your specific file-reading tool (using precise start/end lines if it is large) and apply a targeted, surgical edit to correct the exact syntax or type error. Do not write a placeholder or partial fix. - -Here are the precise compilation errors from the compiler: \`\`\`text ${verification.error} -\`\`\` - -Implement the exact fix directly in the code now.`, +\`\`\``, }); continue; } else { await emit({ ts: now(), type: "info", - text: "🟢 [Ralph Loop] Build verification passed successfully! 0 errors.", + text: "🟢 [Ralph Loop] Build passed successfully! 0 errors.", }); } } - // If fully complete, trigger auto-commit and finish - if (opts.autoApprove) { - await autoCommitAndDeploy(opts, task, emit); - } else { - await patchSession(opts, { status: "completed" }); + let diskChecked = false; + try { + const fileContent = fs.readFileSync(task.filePath, "utf8"); + const lines = fileContent.split("\n"); + const line = lines[task.lineIndex]; + if (line) { + const match = line.match(/^(\s*)-\s*\[([ xX])\]\s+(.+)$/); + if (match && match[2].toLowerCase() === "x") { + diskChecked = true; + } + } + } catch {} + + if (!diskChecked) { + await emit({ + ts: now(), + type: "info", + text: `✍️ [Orchestrator] Task implementation completed. Automatically checking off task on disk.`, + }); + toggleTaskOnDisk(task); } - return; + + return true; } for (const tc of resp.toolCalls) { toolFingerprints.push(fingerprintToolCall(tc)); } - const window = toolFingerprints.slice(-10); + const window = toolFingerprints.slice(-6); const counts = new Map(); for (const fp of window) counts.set(fp, (counts.get(fp) ?? 0) + 1); let maxRepeats = 0; - let repeatedCmd = ""; for (const [fp, n] of counts.entries()) { - if (n > maxRepeats) { - maxRepeats = n; - repeatedCmd = fp.split("|")[0]; - } + if (n > maxRepeats) maxRepeats = n; } - if (maxRepeats >= 6) { - loopBreakReason = `Repeated ${repeatedCmd} ${maxRepeats}× in last 10 calls`; - break; + if (maxRepeats >= 4) { + await emit({ + ts: now(), + type: "error", + text: `Loop detected in subtask execution, breaking loop.`, + }); + return false; } history.push({ @@ -483,140 +560,152 @@ Implement the exact fix directly in the code now.`, toolCalls: resp.toolCalls, }); - // ── 4-Level Smart Concurrency Tool Grouping ── - const parallelReads = resp.toolCalls.filter((tc: any) => - [ - "fs_read", - "fs_tree", - "fs_list", - "fs_glob", - "fs_grep", - "projects_list", - "project_recent_errors", - ].includes(tc.name), - ); - const sequentialWrites = resp.toolCalls.filter((tc: any) => - [ - "fs_write", - "fs_edit", - "create_file", - "write_file", - "replace_in_file", - "apps_create", - "databases_create", - ].includes(tc.name), - ); - const otherTools = resp.toolCalls.filter( - (tc: any) => - !parallelReads.includes(tc) && !sequentialWrites.includes(tc), - ); + for (const tc of resp.toolCalls) { + if (opts.isStopped()) return false; - // Stage 1: Parallel Reads - if (parallelReads.length > 0) { await emit({ ts: now(), type: "step", - text: `Executing ${parallelReads.length} read operations concurrently...`, + text: `Running ${tc.name}...`, }); - await Promise.all( - parallelReads.map(async (tc: any) => { - let result; - try { - result = await executeTool(tc.name, tc.args, ctx); - } catch (err) { - result = { - error: err instanceof Error ? err.message : String(err), - }; - } - const resultStr = - typeof result === "string" - ? result - : JSON.stringify(result, null, 2); - history.push({ - role: "tool", - content: resultStr, - toolCallId: tc.id, - toolName: tc.name, - }); - }), - ); - } - - // Stage 2: Parallelizable Other Tools - if (otherTools.length > 0) { - await Promise.all( - otherTools.map(async (tc: any) => { - await emit({ - ts: now(), - type: "step", - text: `Running ${tc.name}...`, - }); - let result; - try { - result = await executeTool(tc.name, tc.args, ctx); - } catch (err) { - result = { - error: err instanceof Error ? err.message : String(err), - }; - } - const resultStr = - typeof result === "string" - ? result - : JSON.stringify(result, null, 2); - history.push({ - role: "tool", - content: resultStr, - toolCallId: tc.id, - toolName: tc.name, - }); - }), - ); - } - - // Stage 3: Sequential User-Safe Writes/Edits - if (sequentialWrites.length > 0) { - for (const tc of sequentialWrites) { - await emit({ - ts: now(), - type: "step", - text: `Writing modifications: ${tc.name}...`, - }); - let result; - try { - result = await executeTool(tc.name, tc.args, ctx); - const changedFile = extractChangedFile( - tc.name, - tc.args, - ctx.workspaceRoot, - opts.appPath, - ); - if (changedFile) { - await patchSession(opts, { changedFile }); - } - } catch (err) { - result = { error: err instanceof Error ? err.message : String(err) }; - } - const resultStr = - typeof result === "string" ? result : JSON.stringify(result, null, 2); - history.push({ - role: "tool", - content: resultStr, - toolCallId: tc.id, - toolName: tc.name, - }); + let result: any; + try { + result = await executeTool(tc.name, tc.args, ctx); + } catch (err) { + result = { error: err instanceof Error ? err.message : String(err) }; } + + const resultStr = + typeof result === "string" ? result : JSON.stringify(result, null, 2); + history.push({ + role: "tool", + content: resultStr, + toolCallId: tc.id, + toolName: tc.name, + }); } } - if (loopBreakReason) { + await emit({ + ts: now(), + type: "error", + text: `Subtask exceeded maximum turns limit of ${SUB_MAX_TURNS}.`, + }); + return false; +} + +export async function runSessionAgent( + config: AgentConfig, + task: string, + ctx: ToolContext, + opts: SessionRunOptions, +): Promise { + const emit = async (line: OutputLine) => { + console.log(`[session ${opts.sessionId}] ${line.type}: ${line.text}`); + await patchSession(opts, { outputLine: line }); + }; + + await emit({ + ts: now(), + type: "info", + text: `Agent started offline delegation orchestrator in ${opts.appPath}`, + }); + + const repoRoot = opts.repoRoot ?? ctx.workspaceRoot; + + let tasks = parseTaskItems(repoRoot); + if (tasks.length === 0) { await emit({ ts: now(), - type: "error", - text: `Loop broken: ${loopBreakReason}`, + type: "info", + text: "🤖 [Orchestrator] No active tasks backlog found on disk. Analyzing prompt to plan atomic execution backlog...", }); - await patchSession(opts, { status: "failed", error: loopBreakReason }); + try { + await generateBacklogFromPrompt(task, repoRoot); + tasks = parseTaskItems(repoRoot); + } catch (err: any) { + await emit({ + ts: now(), + type: "error", + text: `❌ [Orchestrator] Failed to generate backlog: ${err.message || String(err)}`, + }); + await patchSession(opts, { + status: "failed", + error: "Backlog generation failed", + }); + return; + } + } + + const openTasks = tasks.filter((t) => !t.isChecked); + if (openTasks.length === 0) { + await emit({ + ts: now(), + type: "info", + text: "🟢 [Orchestrator] All tasks in the queue are already completed!", + }); + await patchSession(opts, { status: "completed" }); + return; + } + + await emit({ + ts: now(), + type: "info", + text: `🤖 [Orchestrator] Found ${openTasks.length} open tasks. Executing task-by-task Meta-Loop...`, + }); + + for (let i = 0; i < openTasks.length; i++) { + const currentTask = openTasks[i]; + await emit({ + ts: now(), + type: "info", + text: `🚀 [Orchestrator] Task ${i + 1}/${openTasks.length}: "${currentTask.text}"`, + }); + + const success = await runSingleSubTask( + currentTask, + config, + ctx, + opts, + emit, + ); + if (!success) { + await emit({ + ts: now(), + type: "error", + text: `❌ [Orchestrator] Bailed out! Task execution failed on: "${currentTask.text}". Rolling back modifications for this task to keep the repository green...`, + }); + + try { + const { execSync } = require("child_process"); + execSync("git checkout -- . && git clean -fd", { + cwd: repoRoot, + stdio: "pipe", + }); + } catch (rollbackErr: any) { + console.error("Rollback failed:", rollbackErr.message || rollbackErr); + } + + await patchSession(opts, { + status: "failed", + error: `Delegation loop halted at task: "${currentTask.text}"`, + }); + return; + } + + commitTaskProgress(currentTask, repoRoot); + } + + await emit({ + ts: now(), + type: "info", + text: `🎉 [Orchestrator] All delegated tasks completed successfully with green compilation builds!`, + }); + + if (opts.autoApprove) { + await autoCommitAndDeploy(opts, task, emit); } else { - await patchSession(opts, { status: "failed", error: "Max turns reached" }); + await patchSession(opts, { status: "completed" }); } }