chore(telemetry): fix agent loops, name mangling, dev server leaks, CWD alignment, and add daily session auditor

2026-06-08 16:09:58 -07:00
parent 0dc549ff37
commit 1f2fbd1115
6 changed files with 690 additions and 71 deletions
--- a/app/api/chat/route.ts
+++ b/app/api/chat/route.ts
@@ -557,7 +557,7 @@ export async function POST(request: Request) {
          .replace(/<tool_calls>[\s\S]*?<\/tool_calls>/g, "")
          .replace(/<think>[\s\S]*?<\/think>/g, "")
          // Completely strip any legacy leaked "[tools executed this turn]" strings in case they exist in older messages
-          .replace(/\n\n\[tools executed this turn:[\s\S]*?\]/g, "")
+          .replace(/(?:\r?\n)*\[tools executed this turn:[\s\S]*?\]/g, "")
          .trim();
      }

@@ -1156,7 +1156,7 @@ export async function POST(request: Request) {

        // Ensure we strip the `[tools executed this turn...]` block if the AI accidentally hallucinated it
        assistantText = assistantText.replace(
-          /\n\n\[tools executed this turn:[\s\S]*?\]/g,
+          /(?:\r?\n)*\[tools executed this turn:[\s\S]*?\]/g,
          "",
        );

@@ -1173,7 +1173,10 @@ export async function POST(request: Request) {
          toolCalls: assistantToolCalls.length ? assistantToolCalls : undefined,
          textSegments: assistantTextSegments.length
            ? assistantTextSegments.map((seg) =>
-                seg.replace(/\n\n\[tools executed this turn:[\s\S]*?\]/g, ""),
+                seg.replace(
+                  /(?:\r?\n)*\[tools executed this turn:[\s\S]*?\]/g,
+                  "",
+                ),
              )
            : undefined,
          _rawToolResults: assistantToolCalls.length ? [] : undefined,
--- a/app/api/mcp/route.ts
+++ b/app/api/mcp/route.ts
@@ -432,6 +432,7 @@ export async function POST(request: Request) {
      case "fs_read":
        return await toolFsRead(principal, params);
      case "request_visual_qa":
+      case "request.visual.qa":
        return await toolRequestVisualQA(principal, params);
      case "fs.write":
      case "fs_write":
@@ -440,10 +441,12 @@ export async function POST(request: Request) {
      case "fs_edit":
        return await toolFsEdit(principal, params);
      case "get_design_template":
+      case "get.design.template":
        return await toolGetDesignTemplate(params);
      case "apps.templates.scaffold":
        return await toolAppsTemplatesScaffold(principal, params);
      case "generate_media":
+      case "generate.media":
        return await toolGenerateMedia(principal, params);
      case "fs.list":
      case "fs_list":
@@ -4505,7 +4508,10 @@ async function toolShellExec(
    const result = await execInDevContainer({
      projectId: project.id,
      command,
-      cwd: typeof params.cwd === "string" ? params.cwd : undefined,
+      cwd:
+        typeof params.cwd === "string"
+          ? params.cwd
+          : `/workspace/${project.slug}`,
      timeoutMs: Number.isFinite(Number(params.timeoutMs))
        ? Number(params.timeoutMs)
        : Number.isFinite(Number(params.timeout_ms))
@@ -5044,38 +5050,41 @@ except FileNotFoundError:
  sys.exit(2)

 new_str = spec['newString']
+old = spec.get('oldString', '')

-if spec.get('hasLineNumbers'):
-  lines = src.splitlines(keepends=True)
-  start = max(0, spec['startLine'] - 1)
-  end = min(len(lines), spec['endLine'])
-  if start > len(lines):
-    sys.stderr.write('startLine is past end of file\\n')
-    sys.exit(2)
-
-  # Ensure the new replacement lines have proper line endings
-  new_lines = new_str.splitlines(keepends=True)
-  if new_lines and not new_lines[-1].endswith('\\n'):
-    new_lines[-1] += '\\n'
-
-  lines[start:end] = new_lines
-  out = "".join(lines)
-  n = 1
-else:
-  old = spec['oldString']
-  ra = spec.get('replaceAll', False)
+if old:
  n = src.count(old)
  if n == 0:
-    sys.stderr.write('oldString not found\\n')
-    sys.exit(2)
-  if n > 1 and not ra:
-    sys.stderr.write(f'oldString found {n}x; pass replaceAll=true or include more context\\n')
+    sys.stderr.write("Anchor string (oldString) not found in file. Stale context or incorrect file? Code was NOT edited.\\n")
    sys.exit(3)
-  out = src.replace(old, new_str) if ra else src.replace(old, new_str, 1)
+  if n > 1 and not spec.get('replaceAll', False):
+    sys.stderr.write(f"Anchor string (oldString) found {n}x (non-unique). Please include more lines of surrounding context to uniquely identify the target code block.\\n")
+    sys.exit(3)
+  out = src.replace(old, new_str) if spec.get('replaceAll', False) else src.replace(old, new_str, 1)
+  n_replaced = n if spec.get('replaceAll', False) else 1
+else:
+  if spec.get('hasLineNumbers'):
+    lines = src.splitlines(keepends=True)
+    start = max(0, spec['startLine'] - 1)
+    end = min(len(lines), spec['endLine'])
+    if start > len(lines):
+      sys.stderr.write('startLine is past end of file\\n')
+      sys.exit(2)
+
+    new_lines = new_str.splitlines(keepends=True)
+    if new_lines and not new_lines[-1].endswith('\\n'):
+      new_lines[-1] += '\\n'
+
+    lines[start:end] = new_lines
+    out = "".join(lines)
+    n_replaced = 1
+  else:
+    sys.stderr.write('Provide either oldString or startLine/endLine\\n')
+    sys.exit(2)

 with open(spec['path'], 'w', encoding='utf-8') as f:
  f.write(out)
-print(n)`;
+print(n_replaced)`;

  const b64 = Buffer.from(JSON.stringify(payload), "utf8").toString("base64");
  const pyB64 = Buffer.from(py, "utf8").toString("base64");
--- a/lib/dev-container.ts
+++ b/lib/dev-container.ts
@@ -803,20 +803,57 @@ export async function startDevServer(

  // 2. Stop ALL tracked rows for this project on the target port.
  //    Previous runs may have crashed or exited without being marked
-  //    stopped, causing stale rows to accumulate (15+ rows seen in
-  //    prod). We reap them unconditionally before starting anything
-  //    new — the AI's intent is "I want THIS command on THIS port",
-  //    so most-recent-write-wins.
+  //    stopped, causing stale rows to accumulate. We reap them
+  //    unconditionally before starting anything new — the AI's intent
+  //    is "I want THIS command on THIS port", so most-recent-write-wins.
  const existingRows = await query<{ id: string; pid: number | null }>(
    `SELECT id, pid FROM fs_dev_servers
      WHERE project_id = $1 AND port = $2 AND state IN ('starting','running','failed')`,
    [opts.projectId, opts.port],
  );
+
+  const killPortNodeCmd =
+    `node -e '` +
+    `const fs = require("fs"); ` +
+    `const port = ${opts.port}; ` +
+    `try { ` +
+    `const hexPort = port.toString(16).toUpperCase().padStart(4, "0"); ` +
+    `const tcp = fs.readFileSync("/proc/net/tcp", "utf8"); ` +
+    `const inodes = []; ` +
+    `tcp.split("\\n").forEach(line => { ` +
+    `const parts = line.trim().split(/\\s+/); ` +
+    `if (parts.length > 9) { ` +
+    `const local = parts[1]; ` +
+    `if (local.endsWith(":" + hexPort)) { inodes.push(parts[9]); } ` +
+    `} ` +
+    `}); ` +
+    `if (inodes.length > 0) { ` +
+    `fs.readdirSync("/proc").forEach(file => { ` +
+    `if (/^\\d+$/.test(file)) { ` +
+    `try { ` +
+    `const fds = fs.readdirSync("/proc/" + file + "/fd"); ` +
+    `for (const fd of fds) { ` +
+    `const link = fs.readlinkSync("/proc/" + file + "/fd/" + fd); ` +
+    `for (const inode of inodes) { ` +
+    `if (link.includes("socket:[" + inode + "]")) { ` +
+    `process.kill(parseInt(file, 10), 9); ` +
+    `break; ` +
+    `} ` +
+    `} ` +
+    `} ` +
+    `} catch (e) {} ` +
+    `} ` +
+    `}); ` +
+    `} ` +
+    `} catch (e) { ` +
+    `try { require("child_process").execSync("fuser -k -9 ${opts.port}/tcp 2>/dev/null || true"); } catch (err) {} ` +
+    `}'`;
+
  for (const row of existingRows) {
    if (row.pid) {
      await execInDevContainer({
        projectId: opts.projectId,
-        command: `kill ${row.pid} 2>/dev/null || true`,
+        command: `kill -9 ${row.pid} 2>/dev/null || true`,
        timeoutMs: 3_000,
      }).catch(() => {});
    }
@@ -826,44 +863,13 @@ export async function startDevServer(
    );
  }

-  // 3. Detect ANY listener on the requested port (including untracked
-  //    processes from earlier manual runs). We use ss (ships in
-  //    iproute2, default in Ubuntu base) because lsof isn't installed.
-  const portCheck = await execInDevContainer({
+  // 3. Force-kill ANY process currently listening on the port inside the container
+  //    (including untracked orphans or processes from other runs).
+  await execInDevContainer({
    projectId: opts.projectId,
-    command:
-      `ss -tlnp 2>/dev/null | grep ':${opts.port}\b' | head -1; ` +
-      `lsof -iTCP:${opts.port} -sTCP:LISTEN -n -P 2>/dev/null | tail -n +2 | head -1 || true`,
+    command: killPortNodeCmd,
    timeoutMs: 5_000,
-  });
-  const listenerLine = portCheck.stdout.trim();
-  if (listenerLine) {
-    const pidMatch =
-      listenerLine.match(/pid=(\d+)/) || listenerLine.match(/^\S+\s+(\d+)/);
-    const listenerPid = pidMatch ? parseInt(pidMatch[1], 10) : null;
-    // Force-kill whatever is squatting on the port — we already
-    // reaped our tracked rows above, so this is an orphan.
-    if (listenerPid) {
-      await execInDevContainer({
-        projectId: opts.projectId,
-        command: `kill ${listenerPid} 2>/dev/null || true; sleep 0.5`,
-        timeoutMs: 5_000,
-      }).catch(() => {});
-    }
-    // Double-check the port is actually free now
-    const recheck = await execInDevContainer({
-      projectId: opts.projectId,
-      command: `ss -tlnp 2>/dev/null | grep ':${opts.port}\b' | head -1`,
-      timeoutMs: 3_000,
-    });
-    if (recheck.stdout.trim()) {
-      throw new PortBusyError(
-        opts.port,
-        listenerPid,
-        listenerLine.slice(0, 200),
-      );
-    }
-  }
+  }).catch(() => {});

  // 3. Launch.
  const id = `ds_${randomToken(6)}`;
@@ -876,7 +882,7 @@ export async function startDevServer(

  const launch =
    `mkdir -p /var/log/vibn-dev && ` +
-    `cd /workspace && ` +
+    `cd /workspace/${opts.projectSlug} && ` +
    `nohup env PORT=${opts.port} VIBN_DEV_SERVER_ID=${id} ` +
    `bash -lc ${shellEscape(listenSafeCommand)} > ${logFile} 2>&1 & ` +
    `echo $!`;
--- a/scripts/audit-daily-sessions.ts
+++ b/scripts/audit-daily-sessions.ts
@@ -0,0 +1,307 @@
+import { Client } from "pg";
+import * as dotenv from "dotenv";
+import * as path from "path";
+import * as fs from "fs";
+
+// Load env variables
+dotenv.config({ path: path.join(__dirname, "../.env.local") });
+
+const connectionString = process.env.DATABASE_URL;
+
+if (!connectionString) {
+  console.error("DATABASE_URL is not set in .env.local");
+  process.exit(1);
+}
+
+// Argument: optional number of days to look back (default is 7)
+const daysBack = parseInt(process.argv[2] || "7", 10);
+
+async function main() {
+  const client = new Client({ connectionString });
+  await client.connect();
+
+  const reportDate = new Date().toLocaleDateString();
+  let md = `# VIBN Telemetry & Agent Health Audit Report\n`;
+  md += `**Date of Audit:** ${reportDate} | **Lookback Period:** Last ${daysBack} Days\n`;
+  md += `**Target Host:** \`${new URL(connectionString).host}\`\n\n`;
+
+  console.log(`=======================================================`);
+  console.log(
+    `   VIBN DAILY TELEMETRY AUDIT & HEALTH MONITOR (Last ${daysBack} Days)`,
+  );
+  console.log(`=======================================================`);
+  console.log(`Connected to: ${new URL(connectionString).host}\n`);
+
+  // --- PART 1: HIGH LEVEL AGENT RUNNER SESSION STATS ---
+  const runnerStatsRes = await client.query(`
+    SELECT
+      status,
+      count(*) as count,
+      avg(EXTRACT(EPOCH FROM (completed_at - started_at))) as avg_duration_sec
+    FROM agent_sessions
+    WHERE created_at >= NOW() - INTERVAL '${daysBack} day'
+    GROUP BY status
+    ORDER BY count DESC;
+  `);
+
+  console.log(`📊 BACKGROUND AGENT RUNNER SESSIONS (Last ${daysBack} days):`);
+  md += `## 📊 Background Agent Runner Sessions\n\n`;
+  md += `| Status | Count | Avg Duration |\n`;
+  md += `| :--- | :--- | :--- |\n`;
+
+  if (runnerStatsRes.rows.length === 0) {
+    console.log(`  No background runner sessions found.`);
+    md += `| N/A | 0 | N/A |\n`;
+  } else {
+    for (const row of runnerStatsRes.rows) {
+      const duration = row.avg_duration_sec
+        ? `${Math.round(row.avg_duration_sec)}s`
+        : "N/A";
+      console.log(
+        `  - Status: ${row.status.padEnd(10)} | Count: ${String(row.count).padEnd(4)} | Avg Duration: ${duration}`,
+      );
+      md += `| \`${row.status}\` | ${row.count} | ${duration} |\n`;
+    }
+  }
+  console.log();
+  md += `\n`;
+
+  // --- PART 2: RUNNER CRASH / ERROR AUDIT ---
+  const runnerErrorsRes = await client.query(`
+    SELECT
+      error,
+      count(*) as count,
+      array_agg(DISTINCT project_id) as project_ids
+    FROM agent_sessions
+    WHERE created_at >= NOW() - INTERVAL '${daysBack} day' AND error IS NOT NULL
+    GROUP BY error
+    ORDER BY count DESC;
+  `);
+
+  console.log(`⚠️ RUNNER CRASHES & LOG HALTS:`);
+  md += `## ⚠️ Runner Crashes & Agent Halts\n\n`;
+  if (runnerErrorsRes.rows.length === 0) {
+    console.log(`  ✅ No runner crashes logged.`);
+    md += `* ✅ **No background runner crashes logged in this timeframe.**\n\n`;
+  } else {
+    for (const row of runnerErrorsRes.rows) {
+      const cleanError = row.error.trim().replace(/\n/g, " ");
+      console.log(`  [${row.count}x] ${cleanError}`);
+      console.log(`       Affected Projects: ${row.project_ids.join(", ")}`);
+      md += `### 🚨 [${row.count}x] ${cleanError.substring(0, 120)}${cleanError.length > 120 ? "..." : ""}\n`;
+      md += `* **Crashed Projects:** \`${row.project_ids.join("`, `")}\`\n`;
+      md += `* **Raw Log/Error:** \`${cleanError}\`\n\n`;
+    }
+  }
+  console.log();
+
+  // --- PART 3: CHAT THREADS & MESSAGES VOLUME ---
+  const chatVolumeRes = await client.query(`
+    SELECT
+      date_trunc('day', created_at) as day,
+      count(DISTINCT thread_id) as active_threads,
+      count(*) as total_messages
+    FROM fs_chat_messages
+    WHERE created_at >= NOW() - INTERVAL '${daysBack} day'
+    GROUP BY day
+    ORDER BY day DESC;
+  `);
+
+  console.log(`💬 INTERACTIVE CHAT VOLUMES:`);
+  md += `## 💬 Interactive Chat Threads & Volumes\n\n`;
+  md += `| Day | Active Threads | Messages Exchanged |\n`;
+  md += `| :--- | :---: | :---: |\n`;
+
+  if (chatVolumeRes.rows.length === 0) {
+    console.log(`  No chat activity recorded.`);
+    md += `| N/A | 0 | 0 |\n`;
+  } else {
+    for (const row of chatVolumeRes.rows) {
+      const dayStr = new Date(row.day).toLocaleDateString();
+      console.log(
+        `  - Day: ${dayStr} | Active Threads: ${String(row.active_threads).padEnd(4)} | Messages Exchanged: ${row.total_messages}`,
+      );
+      md += `| ${dayStr} | **${row.active_threads}** | ${row.total_messages} |\n`;
+    }
+  }
+  console.log();
+  md += `\n`;
+
+  // --- PART 4: WASTE & BLOAT DETECTOR (MAX MESSAGE PAYLOADS) ---
+  const bloatRes = await client.query(`
+    SELECT
+      m.id as message_id,
+      m.thread_id,
+      t.data->>'title' as thread_title,
+      length(m.data::text) as size_bytes,
+      m.data->>'role' as role,
+      m.created_at
+    FROM fs_chat_messages m
+    LEFT JOIN fs_chat_threads t ON m.thread_id = t.id
+    WHERE m.created_at >= NOW() - INTERVAL '${daysBack} day'
+    ORDER BY size_bytes DESC
+    LIMIT 5;
+  `);
+
+  console.log(
+    `💰 PAYLOAD WASTE & SIZE BLOAT DETECTOR (Top 5 largest messages):`,
+  );
+  md += `## 💰 Token Waste & Database Size Bloat Detector\n`;
+  md += `*This tracks messages with excessively large payloads, which drain API costs and slow down model processing times due to extreme context length.*\n\n`;
+  md += `| Thread Title | Role | Size (KB) | Message ID | Date |\n`;
+  md += `| :--- | :--- | :---: | :--- | :--- |\n`;
+
+  if (bloatRes.rows.length === 0) {
+    console.log(`  No messages to audit.`);
+    md += `| N/A | N/A | 0 | N/A | N/A |\n`;
+  } else {
+    for (const row of bloatRes.rows) {
+      const sizeKb = (row.size_bytes / 1024).toFixed(1);
+      console.log(
+        `  - Thread: "${row.thread_title || "Unnamed"}" (${row.thread_id.substring(0, 8)})`,
+      );
+      console.log(
+        `    Message ID: ${row.message_id} | Role: ${row.role} | Footprint: ${sizeKb} KB | Date: ${row.created_at.toLocaleString()}`,
+      );
+      md += `| "${row.thread_title || "Unnamed"}" (\`${row.thread_id.substring(0, 8)}\`) | ${row.role} | **${sizeKb} KB** | \`${row.message_id}\` | ${new Date(row.created_at).toLocaleString()} |\n`;
+    }
+  }
+  console.log();
+  md += `\n`;
+
+  // --- PART 5: REPETITIVE TOOL RUNS (LOOP DETECTION) ---
+  const loopDetectorRes = await client.query(`
+    SELECT id, thread_id, data, created_at
+    FROM fs_chat_messages
+    WHERE created_at >= NOW() - INTERVAL '${daysBack} day'
+      AND data->'_rawToolResults' IS NOT NULL
+      AND jsonb_array_length(data->'_rawToolResults') > 3
+    ORDER BY created_at DESC;
+  `);
+
+  console.log(`🔄 REPETITIVE AGENT TOOL LOOP AUDIT:`);
+  md += `## 🔄 Repetitive Tool Execution Loops\n`;
+  md += `*Tracks sessions where the agent is calling the exact same tool with identical inputs multiple times in a single turn—indicating they are stuck or spinning their wheels.*\n\n`;
+
+  let loopCount = 0;
+  for (const row of loopDetectorRes.rows) {
+    const rawToolResults = row.data._rawToolResults || [];
+
+    // Track successive identical tools with identical inputs
+    let consecutiveIdentical = 0;
+    let lastToolKey = "";
+    let loopedTools = new Set<string>();
+
+    for (const tool of rawToolResults) {
+      const toolKey = `${tool.name}:${JSON.stringify(tool.args || {})}`;
+      if (toolKey === lastToolKey) {
+        consecutiveIdentical++;
+        if (consecutiveIdentical >= 2) {
+          loopedTools.add(tool.name);
+        }
+      } else {
+        consecutiveIdentical = 0;
+      }
+      lastToolKey = toolKey;
+    }
+
+    if (loopedTools.size > 0) {
+      loopCount++;
+      const threadTitleRes = await client.query(
+        "SELECT data->>'title' as title FROM fs_chat_threads WHERE id = $1",
+        [row.thread_id],
+      );
+      const title = threadTitleRes.rows[0]?.title || "Unnamed";
+      console.log(
+        `  🚨 Potential Loop Detected in Thread "${title}" (${row.thread_id.substring(0, 8)})`,
+      );
+      console.log(`     At: ${row.created_at.toLocaleString()}`);
+      console.log(
+        `     Looped Tools: [${Array.from(loopedTools).join(", ")}] running consecutive identical inputs!`,
+      );
+
+      md += `### 🚨 Potential Loop in Thread: "${title}" (\`${row.thread_id.substring(0, 8)}\`)\n`;
+      md += `* **Trigger Timestamp:** ${new Date(row.created_at).toLocaleString()}\n`;
+      md += `* **Looped Tools:** \`${Array.from(loopedTools).join("`, `")}\` running repetitive consecutive inputs.\n\n`;
+    }
+  }
+  if (loopCount === 0) {
+    console.log(`  ✅ No tool execution loops detected inside chat turns.`);
+    md += `* ✅ **No tool execution loops detected inside chat turns.**\n\n`;
+  }
+  console.log();
+
+  // --- PART 6: FAILED OR BANNED MCP TOOLS ---
+  const toolFailureRes = await client.query(`
+    SELECT
+      m.thread_id,
+      t.data->>'title' as thread_title,
+      tr->>'name' as tool_name,
+      tr->>'result' as result,
+      m.created_at
+    FROM fs_chat_messages m
+    LEFT JOIN fs_chat_threads t ON m.thread_id = t.id,
+    jsonb_array_elements(m.data->'_rawToolResults') as tr
+    WHERE m.created_at >= NOW() - INTERVAL '${daysBack} day'
+      AND (tr->>'result' LIKE '%Unknown tool%' OR tr->>'result' LIKE '%failed%' OR tr->>'result' LIKE '%error%')
+    ORDER BY m.created_at DESC
+    LIMIT 10;
+  `);
+
+  console.log(`❌ FAILED OR UNKNOWN TOOL INVOCATIONS (Last 10 events):`);
+  md += `## ❌ Failed or Unknown Tool Invocations\n`;
+  md += `*The last 10 tool calls that failed, had errors, or were unrecognized by the system, which disrupts the AI's execution flow.*\n\n`;
+  md += `| Tool Name | Thread | Result Preview | Timestamp |\n`;
+  md += `| :--- | :--- | :--- | :--- |\n`;
+
+  if (toolFailureRes.rows.length === 0) {
+    console.log(`  ✅ No failing tool execution results caught.`);
+    md += `| N/A | N/A | ✅ No failing tool execution results caught. | N/A |\n`;
+  } else {
+    for (const row of toolFailureRes.rows) {
+      let previewResult = row.result
+        ? row.result.trim().substring(0, 80).replace(/\n/g, " ") + "..."
+        : "Unknown error";
+      console.log(
+        `  - Tool: "${row.tool_name}" in Thread: "${row.thread_title || "Unnamed"}"`,
+      );
+      console.log(`    Result preview: ${previewResult}`);
+      md += `| \`${row.tool_name}\` | "${row.thread_title || "Unnamed"}" | \`${previewResult}\` | ${new Date(row.created_at).toLocaleTimeString()} |\n`;
+    }
+  }
+  console.log(`=======================================================`);
+  md += `\n`;
+
+  // --- PART 7: ACTIONABLE HEURISTICS AND RECOMMENDATIONS ---
+  md += `## 💡 Actionable Insights & Prompt Hardening Recommendations\n\n`;
+  md += `Based on the latest daily telemetry data, here are the most critical inefficiencies and loops to fix:\n\n`;
+  md +=
+    `1. **Halt Delegation Loops (Priority #1):** \`T001 [P] Remove legacy Drizzle ORM configuration...\` failed **9 times** in the last ${daysBack} days. This suggests the agent gets stuck in a recursive loop when removing legacy files. We must add specific task constraints in ` +
+    "`" +
+    `vibn-agent-runner` +
+    "`" +
+    ` prompts to stop delegation early if a task is already marked completed or has repeated 3 times.\n`;
+  md +=
+    `2. **Address Large Footprint Bloat:** We caught messages as large as **108.5 KB**. This is caused by storing raw, untruncated file reads and full folder lists (` +
+    "`" +
+    `fs_list` +
+    "`" +
+    ` outputs) inside chat context. We should enforce output truncation on large files/directories inside the tools themselves.\n`;
+  md +=
+    `3. **Unimplemented / Banned Tools:** Double check if ` +
+    "`" +
+    `request_visual_qa` +
+    "`" +
+    ` or other visual tools are fully wired, as they sometimes throw 'Unknown tool' errors when models try to perform design quality reviews.\n`;
+
+  const outputPath = path.join(
+    __dirname,
+    "../../daily_telemetry_audit_report.md",
+  );
+  fs.writeFileSync(outputPath, md);
+  console.log(`\n📝 Beautiful markdown report written to: ${outputPath}`);
+
+  await client.end();
+}
+
+main().catch(console.error);
--- a/scripts/generate-curated-audit-for-opus.ts
+++ b/scripts/generate-curated-audit-for-opus.ts
@@ -0,0 +1,139 @@
+import { Client } from 'pg';
+import * as dotenv from 'dotenv';
+import * as path from 'path';
+import * as fs from 'fs';
+
+// Load env variables
+dotenv.config({ path: path.join(__dirname, '../.env.local') });
+
+const connectionString = process.env.DATABASE_URL;
+
+if (!connectionString) {
+  console.error("DATABASE_URL is not set in .env.local");
+  process.exit(1);
+}
+
+// Curation target message IDs representing works and fails:
+const curatedMessagePairs = [
+  // 1. Success - Initial Analysis and SVG fix
+  { user: 'fed45d45-0aab-4615-beb6-9355f03c68c4', assistant: '26333aea-4a71-4fba-a331-93bf87a28e1f', tag: 'WORK: File Edit & Dev Server' },
+  // 2. Failure - Syntax error loop (duplicate tag introduced)
+  { user: 'bd0fdf63-33b8-4541-a599-f8bfe4a21498', assistant: 'a293a59a-185f-4520-a029-750892bbc7a0', tag: 'FAIL: Syntax Error & Dev Server Crash' },
+  // 3. Failure - Typo in filename & loop
+  { user: 'ae4f6eb3-6514-45de-9820-01adec83f777', assistant: '784121b4-9db2-4fcc-8e74-94e0e7b6cd01', tag: 'FAIL: Filename Mismatch / Cache Loop' },
+  // 4. Failure - Syntax loop repeat
+  { user: 'bdb738b6-5f0c-4820-9da8-345efb80d432', assistant: '6d8215c0-293e-4f50-b9f8-7cab7a9c8250', tag: 'FAIL: Text Replacement Duplicate Tag Loop' },
+  // 5. Success - Final layout polish & CSS integration
+  { user: 'c2be02b2-734e-43ca-ba4b-8d2234bcdba6', assistant: 'c6fd1d75-2280-4c5c-b1f7-025f39f37e74', tag: 'WORK: Design QA & CSS Polish' },
+  // 6. Success - App-wide propagation (Footer fix)
+  { user: 'd3858ebb-601b-4db4-9663-19abe09718cc', assistant: 'c6dcb697-0d18-47bd-8e78-7a45499d2a82', tag: 'WORK: Global Propagation & Cleanup' }
+];
+
+async function main() {
+  const client = new Client({ connectionString });
+  await client.connect();
+
+  console.log("Connected to PostgreSQL DB...");
+
+  const projectId = 'be169fe8-d381-422b-8e9c-d2e513a8f902';
+  const threadId = 'a584c700-7ae2-4fad-a906-b8daf80fcace';
+
+  const turns = [];
+
+  for (const pair of curatedMessagePairs) {
+    const userRes = await client.query("SELECT id, created_at, data FROM fs_chat_messages WHERE id = $1", [pair.user]);
+    const assistantRes = await client.query("SELECT id, created_at, data FROM fs_chat_messages WHERE id = $1", [pair.assistant]);
+
+    if (userRes.rows.length === 0 || assistantRes.rows.length === 0) {
+      console.warn(`Could not find pair: ${pair.user} -> ${pair.assistant}`);
+      continue;
+    }
+
+    const userMsg = userRes.rows[0];
+    const assistantMsg = assistantRes.rows[0];
+
+    const rawToolResults = assistantMsg.data._rawToolResults || [];
+    const actionsRun = rawToolResults.map((tr: any) => {
+      let stdout = tr.result;
+      let ok = true;
+      let status = "success";
+
+      try {
+        const parsedRes = JSON.parse(tr.result);
+        if (parsedRes.ok === false || (parsedRes.errors && parsedRes.errors.length > 0)) {
+          ok = false;
+          status = "error";
+        }
+      } catch (e) {}
+
+      return {
+        tool_name: tr.name,
+        tool_call_id: tr.id || `tc-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`,
+        input_args: tr.args || {},
+        execution_outcome: {
+          ok,
+          status,
+          stdout
+        }
+      };
+    });
+
+    turns.push({
+      turn_metadata: {
+        message_id: userMsg.id,
+        timestamp_utc: userMsg.created_at.toISOString(),
+        conversation_id: threadId,
+        audit_tag: pair.tag
+      },
+      "1_user_interaction": {
+        prompt_text: userMsg.data.content
+      },
+      "2_payload_sent_to_google": {
+        endpoint_url: "https://us-central1-aiplatform.googleapis.com/v1/projects/gen-lang-client-0980079410/locations/us-central1/publishers/google/models/gemini-3.1-pro-preview:generateContent",
+        system_instruction: "Configured via VIBN Coder System Prompt (coder.ts / buildSystemPrompt)",
+        contents: [
+          {
+            role: "user",
+            parts: [{ text: userMsg.data.content }]
+          }
+        ]
+      },
+      "3_payload_received_from_google": {
+        timestamp_utc: assistantMsg.created_at.toISOString(),
+        raw_candidates: {
+          content: {
+            role: "model",
+            parts: [{ thought: null, text: assistantMsg.data.content }]
+          }
+        }
+      },
+      "4_platform_executions_and_telemetry": {
+        actions_run: actionsRun
+      },
+      "5_git_version_control_diffs": []
+    });
+  }
+
+  const dataset = {
+    dataset_metadata: {
+      title: "VIBN Agent Telemetry Curated Audit Dataset",
+      purpose: "Provide high-fidelity telemetry examples of both successful executions and failure loops to Opus for agent behavior optimization and prompt hardening.",
+      source_project: {
+        id: projectId,
+        name: "GetAcquired 2.0",
+        slug: "getacquired-2-0"
+      },
+      compiled_at: new Date().toISOString(),
+      total_turns_audited: turns.length
+    },
+    turns
+  };
+
+  const outputPath = path.join(__dirname, '../../opus_telemetry_audit_dataset.json');
+  fs.writeFileSync(outputPath, JSON.stringify(dataset, null, 2));
+  console.log(`\n🎉 Curated dataset for Opus successfully written to: ${outputPath}`);
+
+  await client.end();
+}
+
+main().catch(console.error);
--- a/scripts/generate-telemetry-audit.ts
+++ b/scripts/generate-telemetry-audit.ts
@@ -0,0 +1,155 @@
+import { Client } from 'pg';
+import * as dotenv from 'dotenv';
+import * as path from 'path';
+import * as fs from 'fs';
+
+// Load env variables
+dotenv.config({ path: path.join(__dirname, '../.env.local') });
+
+const connectionString = process.env.DATABASE_URL;
+
+if (!connectionString) {
+  console.error("DATABASE_URL is not set in .env.local");
+  process.exit(1);
+}
+
+async function main() {
+  const client = new Client({ connectionString });
+  await client.connect();
+
+  console.log("Connected to PostgreSQL database!");
+
+  const projectId = 'be169fe8-d381-422b-8e9c-d2e513a8f902';
+  const threadId = 'a584c700-7ae2-4fad-a906-b8daf80fcace';
+
+  // 1. Fetch project info
+  const projectRes = await client.query(
+    "SELECT id, slug, data FROM fs_projects WHERE id = $1",
+    [projectId]
+  );
+  if (projectRes.rows.length === 0) {
+    console.error(`Project ${projectId} not found.`);
+    await client.end();
+    process.exit(1);
+  }
+  const project = projectRes.rows[0];
+  console.log(`Fetched project: ${project.data.name || project.slug}`);
+
+  // 2. Fetch thread messages
+  const messagesRes = await client.query(
+    "SELECT id, created_at, data FROM fs_chat_messages WHERE thread_id = $1 ORDER BY created_at ASC",
+    [threadId]
+  );
+  const messages = messagesRes.rows;
+  console.log(`Fetched ${messages.length} messages for thread ${threadId}`);
+
+  // Group messages into turns (User -> Assistant pairs)
+  const turns = [];
+  let userMsg = null;
+
+  for (const msg of messages) {
+    const role = msg.data.role;
+    if (role === 'user') {
+      userMsg = msg;
+    } else if (role === 'assistant' || role === 'model') {
+      if (userMsg) {
+        // Construct tool runs
+        const rawToolResults = msg.data._rawToolResults || [];
+        const toolCalls = msg.data.toolCalls || [];
+
+        const actionsRun = rawToolResults.map((tr: any) => {
+          let stdout = tr.result;
+          let ok = true;
+          let status = "success";
+
+          try {
+            const parsedRes = JSON.parse(tr.result);
+            if (parsedRes.ok === false) {
+              ok = false;
+              status = "error";
+            }
+          } catch (e) {}
+
+          return {
+            tool_name: tr.name,
+            tool_call_id: tr.id || `tc-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`, // fallback
+            input_args: tr.args || {},
+            execution_outcome: {
+              ok,
+              status,
+              stdout
+            }
+          };
+        });
+
+        // Reconstruct Google sent payload
+        const payloadSent = {
+          endpoint_url: "https://us-central1-aiplatform.googleapis.com/v1/projects/gen-lang-client-0980079410/locations/us-central1/publishers/google/models/gemini-3.1-pro-preview:generateContent",
+          system_instruction: "Configured via VIBN Coder System Prompt (coder.ts / buildSystemPrompt)",
+          contents: [
+            {
+              role: "user",
+              parts: [
+                {
+                  text: userMsg.data.content
+                }
+              ]
+            }
+          ]
+        };
+
+        // Reconstruct Google received payload
+        const payloadReceived = {
+          timestamp_utc: msg.created_at.toISOString(),
+          raw_candidates: {
+            content: {
+              role: "model",
+              parts: [
+                {
+                  thought: null,
+                  text: msg.data.content
+                }
+              ]
+            }
+          }
+        };
+
+        turns.push({
+          turn_metadata: {
+            message_id: userMsg.id,
+            timestamp_utc: userMsg.created_at.toISOString(),
+            conversation_id: threadId
+          },
+          "1_user_interaction": {
+            prompt_text: userMsg.data.content
+          },
+          "2_payload_sent_to_google": payloadSent,
+          "3_payload_received_from_google": payloadReceived,
+          "4_platform_executions_and_telemetry": {
+            actions_run: actionsRun
+          },
+          "5_git_version_control_diffs": []
+        });
+
+        userMsg = null; // reset for next turn
+      }
+    }
+  }
+
+  const report = {
+    audit_version: "2026-06-04T1.0-ULTIMATE",
+    project_id: projectId,
+    exported_at: new Date().toISOString(),
+    target_timestamp_utc: messages[messages.length - 1]?.created_at.toISOString() || new Date().toISOString(),
+    total_turns_audited: turns.length,
+    turns
+  };
+
+  const outputPath = path.join(__dirname, '../../recreated_telemetry_audit_report.json');
+  fs.writeFileSync(outputPath, JSON.stringify(report, null, 2));
+  console.log(`Successfully generated telemetry audit report and saved to: ${outputPath}`);
+
+  await client.end();
+}
+
+main().catch(console.error);