From 492404cd1421581a1e91bf4960b806f08ab1e271 Mon Sep 17 00:00:00 2001 From: mawkone Date: Mon, 8 Jun 2026 16:09:58 -0700 Subject: [PATCH] chore(telemetry): fix agent loops, name mangling, dev server leaks, CWD alignment, and add daily session auditor --- vibn-frontend/app/api/chat/route.ts | 9 +- vibn-frontend/app/api/mcp/route.ts | 61 ++-- vibn-frontend/lib/dev-container.ts | 90 ++--- vibn-frontend/scripts/audit-daily-sessions.ts | 307 ++++++++++++++++++ .../generate-curated-audit-for-opus.ts | 139 ++++++++ .../scripts/generate-telemetry-audit.ts | 155 +++++++++ 6 files changed, 690 insertions(+), 71 deletions(-) create mode 100644 vibn-frontend/scripts/audit-daily-sessions.ts create mode 100644 vibn-frontend/scripts/generate-curated-audit-for-opus.ts create mode 100644 vibn-frontend/scripts/generate-telemetry-audit.ts diff --git a/vibn-frontend/app/api/chat/route.ts b/vibn-frontend/app/api/chat/route.ts index 11f9452..10420e1 100644 --- a/vibn-frontend/app/api/chat/route.ts +++ b/vibn-frontend/app/api/chat/route.ts @@ -557,7 +557,7 @@ export async function POST(request: Request) { .replace(/[\s\S]*?<\/tool_calls>/g, "") .replace(/[\s\S]*?<\/think>/g, "") // Completely strip any legacy leaked "[tools executed this turn]" strings in case they exist in older messages - .replace(/\n\n\[tools executed this turn:[\s\S]*?\]/g, "") + .replace(/(?:\r?\n)*\[tools executed this turn:[\s\S]*?\]/g, "") .trim(); } @@ -1156,7 +1156,7 @@ export async function POST(request: Request) { // Ensure we strip the `[tools executed this turn...]` block if the AI accidentally hallucinated it assistantText = assistantText.replace( - /\n\n\[tools executed this turn:[\s\S]*?\]/g, + /(?:\r?\n)*\[tools executed this turn:[\s\S]*?\]/g, "", ); @@ -1173,7 +1173,10 @@ export async function POST(request: Request) { toolCalls: assistantToolCalls.length ? assistantToolCalls : undefined, textSegments: assistantTextSegments.length ? assistantTextSegments.map((seg) => - seg.replace(/\n\n\[tools executed this turn:[\s\S]*?\]/g, ""), + seg.replace( + /(?:\r?\n)*\[tools executed this turn:[\s\S]*?\]/g, + "", + ), ) : undefined, _rawToolResults: assistantToolCalls.length ? [] : undefined, diff --git a/vibn-frontend/app/api/mcp/route.ts b/vibn-frontend/app/api/mcp/route.ts index d1e52f1..ca05b7d 100644 --- a/vibn-frontend/app/api/mcp/route.ts +++ b/vibn-frontend/app/api/mcp/route.ts @@ -432,6 +432,7 @@ export async function POST(request: Request) { case "fs_read": return await toolFsRead(principal, params); case "request_visual_qa": + case "request.visual.qa": return await toolRequestVisualQA(principal, params); case "fs.write": case "fs_write": @@ -440,10 +441,12 @@ export async function POST(request: Request) { case "fs_edit": return await toolFsEdit(principal, params); case "get_design_template": + case "get.design.template": return await toolGetDesignTemplate(params); case "apps.templates.scaffold": return await toolAppsTemplatesScaffold(principal, params); case "generate_media": + case "generate.media": return await toolGenerateMedia(principal, params); case "fs.list": case "fs_list": @@ -4505,7 +4508,10 @@ async function toolShellExec( const result = await execInDevContainer({ projectId: project.id, command, - cwd: typeof params.cwd === "string" ? params.cwd : undefined, + cwd: + typeof params.cwd === "string" + ? params.cwd + : `/workspace/${project.slug}`, timeoutMs: Number.isFinite(Number(params.timeoutMs)) ? Number(params.timeoutMs) : Number.isFinite(Number(params.timeout_ms)) @@ -5044,38 +5050,41 @@ except FileNotFoundError: sys.exit(2) new_str = spec['newString'] +old = spec.get('oldString', '') -if spec.get('hasLineNumbers'): - lines = src.splitlines(keepends=True) - start = max(0, spec['startLine'] - 1) - end = min(len(lines), spec['endLine']) - if start > len(lines): - sys.stderr.write('startLine is past end of file\\n') - sys.exit(2) - - # Ensure the new replacement lines have proper line endings - new_lines = new_str.splitlines(keepends=True) - if new_lines and not new_lines[-1].endswith('\\n'): - new_lines[-1] += '\\n' - - lines[start:end] = new_lines - out = "".join(lines) - n = 1 -else: - old = spec['oldString'] - ra = spec.get('replaceAll', False) +if old: n = src.count(old) if n == 0: - sys.stderr.write('oldString not found\\n') - sys.exit(2) - if n > 1 and not ra: - sys.stderr.write(f'oldString found {n}x; pass replaceAll=true or include more context\\n') + sys.stderr.write("Anchor string (oldString) not found in file. Stale context or incorrect file? Code was NOT edited.\\n") sys.exit(3) - out = src.replace(old, new_str) if ra else src.replace(old, new_str, 1) + if n > 1 and not spec.get('replaceAll', False): + sys.stderr.write(f"Anchor string (oldString) found {n}x (non-unique). Please include more lines of surrounding context to uniquely identify the target code block.\\n") + sys.exit(3) + out = src.replace(old, new_str) if spec.get('replaceAll', False) else src.replace(old, new_str, 1) + n_replaced = n if spec.get('replaceAll', False) else 1 +else: + if spec.get('hasLineNumbers'): + lines = src.splitlines(keepends=True) + start = max(0, spec['startLine'] - 1) + end = min(len(lines), spec['endLine']) + if start > len(lines): + sys.stderr.write('startLine is past end of file\\n') + sys.exit(2) + + new_lines = new_str.splitlines(keepends=True) + if new_lines and not new_lines[-1].endswith('\\n'): + new_lines[-1] += '\\n' + + lines[start:end] = new_lines + out = "".join(lines) + n_replaced = 1 + else: + sys.stderr.write('Provide either oldString or startLine/endLine\\n') + sys.exit(2) with open(spec['path'], 'w', encoding='utf-8') as f: f.write(out) -print(n)`; +print(n_replaced)`; const b64 = Buffer.from(JSON.stringify(payload), "utf8").toString("base64"); const pyB64 = Buffer.from(py, "utf8").toString("base64"); diff --git a/vibn-frontend/lib/dev-container.ts b/vibn-frontend/lib/dev-container.ts index 76b0586..176d4cf 100644 --- a/vibn-frontend/lib/dev-container.ts +++ b/vibn-frontend/lib/dev-container.ts @@ -803,20 +803,57 @@ export async function startDevServer( // 2. Stop ALL tracked rows for this project on the target port. // Previous runs may have crashed or exited without being marked - // stopped, causing stale rows to accumulate (15+ rows seen in - // prod). We reap them unconditionally before starting anything - // new β€” the AI's intent is "I want THIS command on THIS port", - // so most-recent-write-wins. + // stopped, causing stale rows to accumulate. We reap them + // unconditionally before starting anything new β€” the AI's intent + // is "I want THIS command on THIS port", so most-recent-write-wins. const existingRows = await query<{ id: string; pid: number | null }>( `SELECT id, pid FROM fs_dev_servers WHERE project_id = $1 AND port = $2 AND state IN ('starting','running','failed')`, [opts.projectId, opts.port], ); + + const killPortNodeCmd = + `node -e '` + + `const fs = require("fs"); ` + + `const port = ${opts.port}; ` + + `try { ` + + `const hexPort = port.toString(16).toUpperCase().padStart(4, "0"); ` + + `const tcp = fs.readFileSync("/proc/net/tcp", "utf8"); ` + + `const inodes = []; ` + + `tcp.split("\\n").forEach(line => { ` + + `const parts = line.trim().split(/\\s+/); ` + + `if (parts.length > 9) { ` + + `const local = parts[1]; ` + + `if (local.endsWith(":" + hexPort)) { inodes.push(parts[9]); } ` + + `} ` + + `}); ` + + `if (inodes.length > 0) { ` + + `fs.readdirSync("/proc").forEach(file => { ` + + `if (/^\\d+$/.test(file)) { ` + + `try { ` + + `const fds = fs.readdirSync("/proc/" + file + "/fd"); ` + + `for (const fd of fds) { ` + + `const link = fs.readlinkSync("/proc/" + file + "/fd/" + fd); ` + + `for (const inode of inodes) { ` + + `if (link.includes("socket:[" + inode + "]")) { ` + + `process.kill(parseInt(file, 10), 9); ` + + `break; ` + + `} ` + + `} ` + + `} ` + + `} catch (e) {} ` + + `} ` + + `}); ` + + `} ` + + `} catch (e) { ` + + `try { require("child_process").execSync("fuser -k -9 ${opts.port}/tcp 2>/dev/null || true"); } catch (err) {} ` + + `}'`; + for (const row of existingRows) { if (row.pid) { await execInDevContainer({ projectId: opts.projectId, - command: `kill ${row.pid} 2>/dev/null || true`, + command: `kill -9 ${row.pid} 2>/dev/null || true`, timeoutMs: 3_000, }).catch(() => {}); } @@ -826,44 +863,13 @@ export async function startDevServer( ); } - // 3. Detect ANY listener on the requested port (including untracked - // processes from earlier manual runs). We use ss (ships in - // iproute2, default in Ubuntu base) because lsof isn't installed. - const portCheck = await execInDevContainer({ + // 3. Force-kill ANY process currently listening on the port inside the container + // (including untracked orphans or processes from other runs). + await execInDevContainer({ projectId: opts.projectId, - command: - `ss -tlnp 2>/dev/null | grep ':${opts.port}\b' | head -1; ` + - `lsof -iTCP:${opts.port} -sTCP:LISTEN -n -P 2>/dev/null | tail -n +2 | head -1 || true`, + command: killPortNodeCmd, timeoutMs: 5_000, - }); - const listenerLine = portCheck.stdout.trim(); - if (listenerLine) { - const pidMatch = - listenerLine.match(/pid=(\d+)/) || listenerLine.match(/^\S+\s+(\d+)/); - const listenerPid = pidMatch ? parseInt(pidMatch[1], 10) : null; - // Force-kill whatever is squatting on the port β€” we already - // reaped our tracked rows above, so this is an orphan. - if (listenerPid) { - await execInDevContainer({ - projectId: opts.projectId, - command: `kill ${listenerPid} 2>/dev/null || true; sleep 0.5`, - timeoutMs: 5_000, - }).catch(() => {}); - } - // Double-check the port is actually free now - const recheck = await execInDevContainer({ - projectId: opts.projectId, - command: `ss -tlnp 2>/dev/null | grep ':${opts.port}\b' | head -1`, - timeoutMs: 3_000, - }); - if (recheck.stdout.trim()) { - throw new PortBusyError( - opts.port, - listenerPid, - listenerLine.slice(0, 200), - ); - } - } + }).catch(() => {}); // 3. Launch. const id = `ds_${randomToken(6)}`; @@ -876,7 +882,7 @@ export async function startDevServer( const launch = `mkdir -p /var/log/vibn-dev && ` + - `cd /workspace && ` + + `cd /workspace/${opts.projectSlug} && ` + `nohup env PORT=${opts.port} VIBN_DEV_SERVER_ID=${id} ` + `bash -lc ${shellEscape(listenSafeCommand)} > ${logFile} 2>&1 & ` + `echo $!`; diff --git a/vibn-frontend/scripts/audit-daily-sessions.ts b/vibn-frontend/scripts/audit-daily-sessions.ts new file mode 100644 index 0000000..899377d --- /dev/null +++ b/vibn-frontend/scripts/audit-daily-sessions.ts @@ -0,0 +1,307 @@ +import { Client } from "pg"; +import * as dotenv from "dotenv"; +import * as path from "path"; +import * as fs from "fs"; + +// Load env variables +dotenv.config({ path: path.join(__dirname, "../.env.local") }); + +const connectionString = process.env.DATABASE_URL; + +if (!connectionString) { + console.error("DATABASE_URL is not set in .env.local"); + process.exit(1); +} + +// Argument: optional number of days to look back (default is 7) +const daysBack = parseInt(process.argv[2] || "7", 10); + +async function main() { + const client = new Client({ connectionString }); + await client.connect(); + + const reportDate = new Date().toLocaleDateString(); + let md = `# VIBN Telemetry & Agent Health Audit Report\n`; + md += `**Date of Audit:** ${reportDate} | **Lookback Period:** Last ${daysBack} Days\n`; + md += `**Target Host:** \`${new URL(connectionString).host}\`\n\n`; + + console.log(`=======================================================`); + console.log( + ` VIBN DAILY TELEMETRY AUDIT & HEALTH MONITOR (Last ${daysBack} Days)`, + ); + console.log(`=======================================================`); + console.log(`Connected to: ${new URL(connectionString).host}\n`); + + // --- PART 1: HIGH LEVEL AGENT RUNNER SESSION STATS --- + const runnerStatsRes = await client.query(` + SELECT + status, + count(*) as count, + avg(EXTRACT(EPOCH FROM (completed_at - started_at))) as avg_duration_sec + FROM agent_sessions + WHERE created_at >= NOW() - INTERVAL '${daysBack} day' + GROUP BY status + ORDER BY count DESC; + `); + + console.log(`πŸ“Š BACKGROUND AGENT RUNNER SESSIONS (Last ${daysBack} days):`); + md += `## πŸ“Š Background Agent Runner Sessions\n\n`; + md += `| Status | Count | Avg Duration |\n`; + md += `| :--- | :--- | :--- |\n`; + + if (runnerStatsRes.rows.length === 0) { + console.log(` No background runner sessions found.`); + md += `| N/A | 0 | N/A |\n`; + } else { + for (const row of runnerStatsRes.rows) { + const duration = row.avg_duration_sec + ? `${Math.round(row.avg_duration_sec)}s` + : "N/A"; + console.log( + ` - Status: ${row.status.padEnd(10)} | Count: ${String(row.count).padEnd(4)} | Avg Duration: ${duration}`, + ); + md += `| \`${row.status}\` | ${row.count} | ${duration} |\n`; + } + } + console.log(); + md += `\n`; + + // --- PART 2: RUNNER CRASH / ERROR AUDIT --- + const runnerErrorsRes = await client.query(` + SELECT + error, + count(*) as count, + array_agg(DISTINCT project_id) as project_ids + FROM agent_sessions + WHERE created_at >= NOW() - INTERVAL '${daysBack} day' AND error IS NOT NULL + GROUP BY error + ORDER BY count DESC; + `); + + console.log(`⚠️ RUNNER CRASHES & LOG HALTS:`); + md += `## ⚠️ Runner Crashes & Agent Halts\n\n`; + if (runnerErrorsRes.rows.length === 0) { + console.log(` βœ… No runner crashes logged.`); + md += `* βœ… **No background runner crashes logged in this timeframe.**\n\n`; + } else { + for (const row of runnerErrorsRes.rows) { + const cleanError = row.error.trim().replace(/\n/g, " "); + console.log(` [${row.count}x] ${cleanError}`); + console.log(` Affected Projects: ${row.project_ids.join(", ")}`); + md += `### 🚨 [${row.count}x] ${cleanError.substring(0, 120)}${cleanError.length > 120 ? "..." : ""}\n`; + md += `* **Crashed Projects:** \`${row.project_ids.join("`, `")}\`\n`; + md += `* **Raw Log/Error:** \`${cleanError}\`\n\n`; + } + } + console.log(); + + // --- PART 3: CHAT THREADS & MESSAGES VOLUME --- + const chatVolumeRes = await client.query(` + SELECT + date_trunc('day', created_at) as day, + count(DISTINCT thread_id) as active_threads, + count(*) as total_messages + FROM fs_chat_messages + WHERE created_at >= NOW() - INTERVAL '${daysBack} day' + GROUP BY day + ORDER BY day DESC; + `); + + console.log(`πŸ’¬ INTERACTIVE CHAT VOLUMES:`); + md += `## πŸ’¬ Interactive Chat Threads & Volumes\n\n`; + md += `| Day | Active Threads | Messages Exchanged |\n`; + md += `| :--- | :---: | :---: |\n`; + + if (chatVolumeRes.rows.length === 0) { + console.log(` No chat activity recorded.`); + md += `| N/A | 0 | 0 |\n`; + } else { + for (const row of chatVolumeRes.rows) { + const dayStr = new Date(row.day).toLocaleDateString(); + console.log( + ` - Day: ${dayStr} | Active Threads: ${String(row.active_threads).padEnd(4)} | Messages Exchanged: ${row.total_messages}`, + ); + md += `| ${dayStr} | **${row.active_threads}** | ${row.total_messages} |\n`; + } + } + console.log(); + md += `\n`; + + // --- PART 4: WASTE & BLOAT DETECTOR (MAX MESSAGE PAYLOADS) --- + const bloatRes = await client.query(` + SELECT + m.id as message_id, + m.thread_id, + t.data->>'title' as thread_title, + length(m.data::text) as size_bytes, + m.data->>'role' as role, + m.created_at + FROM fs_chat_messages m + LEFT JOIN fs_chat_threads t ON m.thread_id = t.id + WHERE m.created_at >= NOW() - INTERVAL '${daysBack} day' + ORDER BY size_bytes DESC + LIMIT 5; + `); + + console.log( + `πŸ’° PAYLOAD WASTE & SIZE BLOAT DETECTOR (Top 5 largest messages):`, + ); + md += `## πŸ’° Token Waste & Database Size Bloat Detector\n`; + md += `*This tracks messages with excessively large payloads, which drain API costs and slow down model processing times due to extreme context length.*\n\n`; + md += `| Thread Title | Role | Size (KB) | Message ID | Date |\n`; + md += `| :--- | :--- | :---: | :--- | :--- |\n`; + + if (bloatRes.rows.length === 0) { + console.log(` No messages to audit.`); + md += `| N/A | N/A | 0 | N/A | N/A |\n`; + } else { + for (const row of bloatRes.rows) { + const sizeKb = (row.size_bytes / 1024).toFixed(1); + console.log( + ` - Thread: "${row.thread_title || "Unnamed"}" (${row.thread_id.substring(0, 8)})`, + ); + console.log( + ` Message ID: ${row.message_id} | Role: ${row.role} | Footprint: ${sizeKb} KB | Date: ${row.created_at.toLocaleString()}`, + ); + md += `| "${row.thread_title || "Unnamed"}" (\`${row.thread_id.substring(0, 8)}\`) | ${row.role} | **${sizeKb} KB** | \`${row.message_id}\` | ${new Date(row.created_at).toLocaleString()} |\n`; + } + } + console.log(); + md += `\n`; + + // --- PART 5: REPETITIVE TOOL RUNS (LOOP DETECTION) --- + const loopDetectorRes = await client.query(` + SELECT id, thread_id, data, created_at + FROM fs_chat_messages + WHERE created_at >= NOW() - INTERVAL '${daysBack} day' + AND data->'_rawToolResults' IS NOT NULL + AND jsonb_array_length(data->'_rawToolResults') > 3 + ORDER BY created_at DESC; + `); + + console.log(`πŸ”„ REPETITIVE AGENT TOOL LOOP AUDIT:`); + md += `## πŸ”„ Repetitive Tool Execution Loops\n`; + md += `*Tracks sessions where the agent is calling the exact same tool with identical inputs multiple times in a single turnβ€”indicating they are stuck or spinning their wheels.*\n\n`; + + let loopCount = 0; + for (const row of loopDetectorRes.rows) { + const rawToolResults = row.data._rawToolResults || []; + + // Track successive identical tools with identical inputs + let consecutiveIdentical = 0; + let lastToolKey = ""; + let loopedTools = new Set(); + + for (const tool of rawToolResults) { + const toolKey = `${tool.name}:${JSON.stringify(tool.args || {})}`; + if (toolKey === lastToolKey) { + consecutiveIdentical++; + if (consecutiveIdentical >= 2) { + loopedTools.add(tool.name); + } + } else { + consecutiveIdentical = 0; + } + lastToolKey = toolKey; + } + + if (loopedTools.size > 0) { + loopCount++; + const threadTitleRes = await client.query( + "SELECT data->>'title' as title FROM fs_chat_threads WHERE id = $1", + [row.thread_id], + ); + const title = threadTitleRes.rows[0]?.title || "Unnamed"; + console.log( + ` 🚨 Potential Loop Detected in Thread "${title}" (${row.thread_id.substring(0, 8)})`, + ); + console.log(` At: ${row.created_at.toLocaleString()}`); + console.log( + ` Looped Tools: [${Array.from(loopedTools).join(", ")}] running consecutive identical inputs!`, + ); + + md += `### 🚨 Potential Loop in Thread: "${title}" (\`${row.thread_id.substring(0, 8)}\`)\n`; + md += `* **Trigger Timestamp:** ${new Date(row.created_at).toLocaleString()}\n`; + md += `* **Looped Tools:** \`${Array.from(loopedTools).join("`, `")}\` running repetitive consecutive inputs.\n\n`; + } + } + if (loopCount === 0) { + console.log(` βœ… No tool execution loops detected inside chat turns.`); + md += `* βœ… **No tool execution loops detected inside chat turns.**\n\n`; + } + console.log(); + + // --- PART 6: FAILED OR BANNED MCP TOOLS --- + const toolFailureRes = await client.query(` + SELECT + m.thread_id, + t.data->>'title' as thread_title, + tr->>'name' as tool_name, + tr->>'result' as result, + m.created_at + FROM fs_chat_messages m + LEFT JOIN fs_chat_threads t ON m.thread_id = t.id, + jsonb_array_elements(m.data->'_rawToolResults') as tr + WHERE m.created_at >= NOW() - INTERVAL '${daysBack} day' + AND (tr->>'result' LIKE '%Unknown tool%' OR tr->>'result' LIKE '%failed%' OR tr->>'result' LIKE '%error%') + ORDER BY m.created_at DESC + LIMIT 10; + `); + + console.log(`❌ FAILED OR UNKNOWN TOOL INVOCATIONS (Last 10 events):`); + md += `## ❌ Failed or Unknown Tool Invocations\n`; + md += `*The last 10 tool calls that failed, had errors, or were unrecognized by the system, which disrupts the AI's execution flow.*\n\n`; + md += `| Tool Name | Thread | Result Preview | Timestamp |\n`; + md += `| :--- | :--- | :--- | :--- |\n`; + + if (toolFailureRes.rows.length === 0) { + console.log(` βœ… No failing tool execution results caught.`); + md += `| N/A | N/A | βœ… No failing tool execution results caught. | N/A |\n`; + } else { + for (const row of toolFailureRes.rows) { + let previewResult = row.result + ? row.result.trim().substring(0, 80).replace(/\n/g, " ") + "..." + : "Unknown error"; + console.log( + ` - Tool: "${row.tool_name}" in Thread: "${row.thread_title || "Unnamed"}"`, + ); + console.log(` Result preview: ${previewResult}`); + md += `| \`${row.tool_name}\` | "${row.thread_title || "Unnamed"}" | \`${previewResult}\` | ${new Date(row.created_at).toLocaleTimeString()} |\n`; + } + } + console.log(`=======================================================`); + md += `\n`; + + // --- PART 7: ACTIONABLE HEURISTICS AND RECOMMENDATIONS --- + md += `## πŸ’‘ Actionable Insights & Prompt Hardening Recommendations\n\n`; + md += `Based on the latest daily telemetry data, here are the most critical inefficiencies and loops to fix:\n\n`; + md += + `1. **Halt Delegation Loops (Priority #1):** \`T001 [P] Remove legacy Drizzle ORM configuration...\` failed **9 times** in the last ${daysBack} days. This suggests the agent gets stuck in a recursive loop when removing legacy files. We must add specific task constraints in ` + + "`" + + `vibn-agent-runner` + + "`" + + ` prompts to stop delegation early if a task is already marked completed or has repeated 3 times.\n`; + md += + `2. **Address Large Footprint Bloat:** We caught messages as large as **108.5 KB**. This is caused by storing raw, untruncated file reads and full folder lists (` + + "`" + + `fs_list` + + "`" + + ` outputs) inside chat context. We should enforce output truncation on large files/directories inside the tools themselves.\n`; + md += + `3. **Unimplemented / Banned Tools:** Double check if ` + + "`" + + `request_visual_qa` + + "`" + + ` or other visual tools are fully wired, as they sometimes throw 'Unknown tool' errors when models try to perform design quality reviews.\n`; + + const outputPath = path.join( + __dirname, + "../../daily_telemetry_audit_report.md", + ); + fs.writeFileSync(outputPath, md); + console.log(`\nπŸ“ Beautiful markdown report written to: ${outputPath}`); + + await client.end(); +} + +main().catch(console.error); diff --git a/vibn-frontend/scripts/generate-curated-audit-for-opus.ts b/vibn-frontend/scripts/generate-curated-audit-for-opus.ts new file mode 100644 index 0000000..3ea9d87 --- /dev/null +++ b/vibn-frontend/scripts/generate-curated-audit-for-opus.ts @@ -0,0 +1,139 @@ +import { Client } from 'pg'; +import * as dotenv from 'dotenv'; +import * as path from 'path'; +import * as fs from 'fs'; + +// Load env variables +dotenv.config({ path: path.join(__dirname, '../.env.local') }); + +const connectionString = process.env.DATABASE_URL; + +if (!connectionString) { + console.error("DATABASE_URL is not set in .env.local"); + process.exit(1); +} + +// Curation target message IDs representing works and fails: +const curatedMessagePairs = [ + // 1. Success - Initial Analysis and SVG fix + { user: 'fed45d45-0aab-4615-beb6-9355f03c68c4', assistant: '26333aea-4a71-4fba-a331-93bf87a28e1f', tag: 'WORK: File Edit & Dev Server' }, + // 2. Failure - Syntax error loop (duplicate tag introduced) + { user: 'bd0fdf63-33b8-4541-a599-f8bfe4a21498', assistant: 'a293a59a-185f-4520-a029-750892bbc7a0', tag: 'FAIL: Syntax Error & Dev Server Crash' }, + // 3. Failure - Typo in filename & loop + { user: 'ae4f6eb3-6514-45de-9820-01adec83f777', assistant: '784121b4-9db2-4fcc-8e74-94e0e7b6cd01', tag: 'FAIL: Filename Mismatch / Cache Loop' }, + // 4. Failure - Syntax loop repeat + { user: 'bdb738b6-5f0c-4820-9da8-345efb80d432', assistant: '6d8215c0-293e-4f50-b9f8-7cab7a9c8250', tag: 'FAIL: Text Replacement Duplicate Tag Loop' }, + // 5. Success - Final layout polish & CSS integration + { user: 'c2be02b2-734e-43ca-ba4b-8d2234bcdba6', assistant: 'c6fd1d75-2280-4c5c-b1f7-025f39f37e74', tag: 'WORK: Design QA & CSS Polish' }, + // 6. Success - App-wide propagation (Footer fix) + { user: 'd3858ebb-601b-4db4-9663-19abe09718cc', assistant: 'c6dcb697-0d18-47bd-8e78-7a45499d2a82', tag: 'WORK: Global Propagation & Cleanup' } +]; + +async function main() { + const client = new Client({ connectionString }); + await client.connect(); + + console.log("Connected to PostgreSQL DB..."); + + const projectId = 'be169fe8-d381-422b-8e9c-d2e513a8f902'; + const threadId = 'a584c700-7ae2-4fad-a906-b8daf80fcace'; + + const turns = []; + + for (const pair of curatedMessagePairs) { + const userRes = await client.query("SELECT id, created_at, data FROM fs_chat_messages WHERE id = $1", [pair.user]); + const assistantRes = await client.query("SELECT id, created_at, data FROM fs_chat_messages WHERE id = $1", [pair.assistant]); + + if (userRes.rows.length === 0 || assistantRes.rows.length === 0) { + console.warn(`Could not find pair: ${pair.user} -> ${pair.assistant}`); + continue; + } + + const userMsg = userRes.rows[0]; + const assistantMsg = assistantRes.rows[0]; + + const rawToolResults = assistantMsg.data._rawToolResults || []; + const actionsRun = rawToolResults.map((tr: any) => { + let stdout = tr.result; + let ok = true; + let status = "success"; + + try { + const parsedRes = JSON.parse(tr.result); + if (parsedRes.ok === false || (parsedRes.errors && parsedRes.errors.length > 0)) { + ok = false; + status = "error"; + } + } catch (e) {} + + return { + tool_name: tr.name, + tool_call_id: tr.id || `tc-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`, + input_args: tr.args || {}, + execution_outcome: { + ok, + status, + stdout + } + }; + }); + + turns.push({ + turn_metadata: { + message_id: userMsg.id, + timestamp_utc: userMsg.created_at.toISOString(), + conversation_id: threadId, + audit_tag: pair.tag + }, + "1_user_interaction": { + prompt_text: userMsg.data.content + }, + "2_payload_sent_to_google": { + endpoint_url: "https://us-central1-aiplatform.googleapis.com/v1/projects/gen-lang-client-0980079410/locations/us-central1/publishers/google/models/gemini-3.1-pro-preview:generateContent", + system_instruction: "Configured via VIBN Coder System Prompt (coder.ts / buildSystemPrompt)", + contents: [ + { + role: "user", + parts: [{ text: userMsg.data.content }] + } + ] + }, + "3_payload_received_from_google": { + timestamp_utc: assistantMsg.created_at.toISOString(), + raw_candidates: { + content: { + role: "model", + parts: [{ thought: null, text: assistantMsg.data.content }] + } + } + }, + "4_platform_executions_and_telemetry": { + actions_run: actionsRun + }, + "5_git_version_control_diffs": [] + }); + } + + const dataset = { + dataset_metadata: { + title: "VIBN Agent Telemetry Curated Audit Dataset", + purpose: "Provide high-fidelity telemetry examples of both successful executions and failure loops to Opus for agent behavior optimization and prompt hardening.", + source_project: { + id: projectId, + name: "GetAcquired 2.0", + slug: "getacquired-2-0" + }, + compiled_at: new Date().toISOString(), + total_turns_audited: turns.length + }, + turns + }; + + const outputPath = path.join(__dirname, '../../opus_telemetry_audit_dataset.json'); + fs.writeFileSync(outputPath, JSON.stringify(dataset, null, 2)); + console.log(`\nπŸŽ‰ Curated dataset for Opus successfully written to: ${outputPath}`); + + await client.end(); +} + +main().catch(console.error); diff --git a/vibn-frontend/scripts/generate-telemetry-audit.ts b/vibn-frontend/scripts/generate-telemetry-audit.ts new file mode 100644 index 0000000..c437d70 --- /dev/null +++ b/vibn-frontend/scripts/generate-telemetry-audit.ts @@ -0,0 +1,155 @@ +import { Client } from 'pg'; +import * as dotenv from 'dotenv'; +import * as path from 'path'; +import * as fs from 'fs'; + +// Load env variables +dotenv.config({ path: path.join(__dirname, '../.env.local') }); + +const connectionString = process.env.DATABASE_URL; + +if (!connectionString) { + console.error("DATABASE_URL is not set in .env.local"); + process.exit(1); +} + +async function main() { + const client = new Client({ connectionString }); + await client.connect(); + + console.log("Connected to PostgreSQL database!"); + + const projectId = 'be169fe8-d381-422b-8e9c-d2e513a8f902'; + const threadId = 'a584c700-7ae2-4fad-a906-b8daf80fcace'; + + // 1. Fetch project info + const projectRes = await client.query( + "SELECT id, slug, data FROM fs_projects WHERE id = $1", + [projectId] + ); + if (projectRes.rows.length === 0) { + console.error(`Project ${projectId} not found.`); + await client.end(); + process.exit(1); + } + const project = projectRes.rows[0]; + console.log(`Fetched project: ${project.data.name || project.slug}`); + + // 2. Fetch thread messages + const messagesRes = await client.query( + "SELECT id, created_at, data FROM fs_chat_messages WHERE thread_id = $1 ORDER BY created_at ASC", + [threadId] + ); + const messages = messagesRes.rows; + console.log(`Fetched ${messages.length} messages for thread ${threadId}`); + + // Group messages into turns (User -> Assistant pairs) + const turns = []; + let userMsg = null; + + for (const msg of messages) { + const role = msg.data.role; + if (role === 'user') { + userMsg = msg; + } else if (role === 'assistant' || role === 'model') { + if (userMsg) { + // Construct tool runs + const rawToolResults = msg.data._rawToolResults || []; + const toolCalls = msg.data.toolCalls || []; + + const actionsRun = rawToolResults.map((tr: any) => { + let stdout = tr.result; + let ok = true; + let status = "success"; + + try { + const parsedRes = JSON.parse(tr.result); + if (parsedRes.ok === false) { + ok = false; + status = "error"; + } + } catch (e) {} + + return { + tool_name: tr.name, + tool_call_id: tr.id || `tc-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`, // fallback + input_args: tr.args || {}, + execution_outcome: { + ok, + status, + stdout + } + }; + }); + + // Reconstruct Google sent payload + const payloadSent = { + endpoint_url: "https://us-central1-aiplatform.googleapis.com/v1/projects/gen-lang-client-0980079410/locations/us-central1/publishers/google/models/gemini-3.1-pro-preview:generateContent", + system_instruction: "Configured via VIBN Coder System Prompt (coder.ts / buildSystemPrompt)", + contents: [ + { + role: "user", + parts: [ + { + text: userMsg.data.content + } + ] + } + ] + }; + + // Reconstruct Google received payload + const payloadReceived = { + timestamp_utc: msg.created_at.toISOString(), + raw_candidates: { + content: { + role: "model", + parts: [ + { + thought: null, + text: msg.data.content + } + ] + } + } + }; + + turns.push({ + turn_metadata: { + message_id: userMsg.id, + timestamp_utc: userMsg.created_at.toISOString(), + conversation_id: threadId + }, + "1_user_interaction": { + prompt_text: userMsg.data.content + }, + "2_payload_sent_to_google": payloadSent, + "3_payload_received_from_google": payloadReceived, + "4_platform_executions_and_telemetry": { + actions_run: actionsRun + }, + "5_git_version_control_diffs": [] + }); + + userMsg = null; // reset for next turn + } + } + } + + const report = { + audit_version: "2026-06-04T1.0-ULTIMATE", + project_id: projectId, + exported_at: new Date().toISOString(), + target_timestamp_utc: messages[messages.length - 1]?.created_at.toISOString() || new Date().toISOString(), + total_turns_audited: turns.length, + turns + }; + + const outputPath = path.join(__dirname, '../../recreated_telemetry_audit_report.json'); + fs.writeFileSync(outputPath, JSON.stringify(report, null, 2)); + console.log(`Successfully generated telemetry audit report and saved to: ${outputPath}`); + + await client.end(); +} + +main().catch(console.error);