chore(telemetry): fix agent loops, name mangling, dev server leaks, CWD alignment, and add daily session auditor
This commit is contained in:
@@ -557,7 +557,7 @@ export async function POST(request: Request) {
|
||||
.replace(/<tool_calls>[\s\S]*?<\/tool_calls>/g, "")
|
||||
.replace(/<think>[\s\S]*?<\/think>/g, "")
|
||||
// Completely strip any legacy leaked "[tools executed this turn]" strings in case they exist in older messages
|
||||
.replace(/\n\n\[tools executed this turn:[\s\S]*?\]/g, "")
|
||||
.replace(/(?:\r?\n)*\[tools executed this turn:[\s\S]*?\]/g, "")
|
||||
.trim();
|
||||
}
|
||||
|
||||
@@ -1156,7 +1156,7 @@ export async function POST(request: Request) {
|
||||
|
||||
// Ensure we strip the `[tools executed this turn...]` block if the AI accidentally hallucinated it
|
||||
assistantText = assistantText.replace(
|
||||
/\n\n\[tools executed this turn:[\s\S]*?\]/g,
|
||||
/(?:\r?\n)*\[tools executed this turn:[\s\S]*?\]/g,
|
||||
"",
|
||||
);
|
||||
|
||||
@@ -1173,7 +1173,10 @@ export async function POST(request: Request) {
|
||||
toolCalls: assistantToolCalls.length ? assistantToolCalls : undefined,
|
||||
textSegments: assistantTextSegments.length
|
||||
? assistantTextSegments.map((seg) =>
|
||||
seg.replace(/\n\n\[tools executed this turn:[\s\S]*?\]/g, ""),
|
||||
seg.replace(
|
||||
/(?:\r?\n)*\[tools executed this turn:[\s\S]*?\]/g,
|
||||
"",
|
||||
),
|
||||
)
|
||||
: undefined,
|
||||
_rawToolResults: assistantToolCalls.length ? [] : undefined,
|
||||
|
||||
@@ -432,6 +432,7 @@ export async function POST(request: Request) {
|
||||
case "fs_read":
|
||||
return await toolFsRead(principal, params);
|
||||
case "request_visual_qa":
|
||||
case "request.visual.qa":
|
||||
return await toolRequestVisualQA(principal, params);
|
||||
case "fs.write":
|
||||
case "fs_write":
|
||||
@@ -440,10 +441,12 @@ export async function POST(request: Request) {
|
||||
case "fs_edit":
|
||||
return await toolFsEdit(principal, params);
|
||||
case "get_design_template":
|
||||
case "get.design.template":
|
||||
return await toolGetDesignTemplate(params);
|
||||
case "apps.templates.scaffold":
|
||||
return await toolAppsTemplatesScaffold(principal, params);
|
||||
case "generate_media":
|
||||
case "generate.media":
|
||||
return await toolGenerateMedia(principal, params);
|
||||
case "fs.list":
|
||||
case "fs_list":
|
||||
@@ -4505,7 +4508,10 @@ async function toolShellExec(
|
||||
const result = await execInDevContainer({
|
||||
projectId: project.id,
|
||||
command,
|
||||
cwd: typeof params.cwd === "string" ? params.cwd : undefined,
|
||||
cwd:
|
||||
typeof params.cwd === "string"
|
||||
? params.cwd
|
||||
: `/workspace/${project.slug}`,
|
||||
timeoutMs: Number.isFinite(Number(params.timeoutMs))
|
||||
? Number(params.timeoutMs)
|
||||
: Number.isFinite(Number(params.timeout_ms))
|
||||
@@ -5044,38 +5050,41 @@ except FileNotFoundError:
|
||||
sys.exit(2)
|
||||
|
||||
new_str = spec['newString']
|
||||
old = spec.get('oldString', '')
|
||||
|
||||
if spec.get('hasLineNumbers'):
|
||||
lines = src.splitlines(keepends=True)
|
||||
start = max(0, spec['startLine'] - 1)
|
||||
end = min(len(lines), spec['endLine'])
|
||||
if start > len(lines):
|
||||
sys.stderr.write('startLine is past end of file\\n')
|
||||
sys.exit(2)
|
||||
|
||||
# Ensure the new replacement lines have proper line endings
|
||||
new_lines = new_str.splitlines(keepends=True)
|
||||
if new_lines and not new_lines[-1].endswith('\\n'):
|
||||
new_lines[-1] += '\\n'
|
||||
|
||||
lines[start:end] = new_lines
|
||||
out = "".join(lines)
|
||||
n = 1
|
||||
else:
|
||||
old = spec['oldString']
|
||||
ra = spec.get('replaceAll', False)
|
||||
if old:
|
||||
n = src.count(old)
|
||||
if n == 0:
|
||||
sys.stderr.write('oldString not found\\n')
|
||||
sys.exit(2)
|
||||
if n > 1 and not ra:
|
||||
sys.stderr.write(f'oldString found {n}x; pass replaceAll=true or include more context\\n')
|
||||
sys.stderr.write("Anchor string (oldString) not found in file. Stale context or incorrect file? Code was NOT edited.\\n")
|
||||
sys.exit(3)
|
||||
out = src.replace(old, new_str) if ra else src.replace(old, new_str, 1)
|
||||
if n > 1 and not spec.get('replaceAll', False):
|
||||
sys.stderr.write(f"Anchor string (oldString) found {n}x (non-unique). Please include more lines of surrounding context to uniquely identify the target code block.\\n")
|
||||
sys.exit(3)
|
||||
out = src.replace(old, new_str) if spec.get('replaceAll', False) else src.replace(old, new_str, 1)
|
||||
n_replaced = n if spec.get('replaceAll', False) else 1
|
||||
else:
|
||||
if spec.get('hasLineNumbers'):
|
||||
lines = src.splitlines(keepends=True)
|
||||
start = max(0, spec['startLine'] - 1)
|
||||
end = min(len(lines), spec['endLine'])
|
||||
if start > len(lines):
|
||||
sys.stderr.write('startLine is past end of file\\n')
|
||||
sys.exit(2)
|
||||
|
||||
new_lines = new_str.splitlines(keepends=True)
|
||||
if new_lines and not new_lines[-1].endswith('\\n'):
|
||||
new_lines[-1] += '\\n'
|
||||
|
||||
lines[start:end] = new_lines
|
||||
out = "".join(lines)
|
||||
n_replaced = 1
|
||||
else:
|
||||
sys.stderr.write('Provide either oldString or startLine/endLine\\n')
|
||||
sys.exit(2)
|
||||
|
||||
with open(spec['path'], 'w', encoding='utf-8') as f:
|
||||
f.write(out)
|
||||
print(n)`;
|
||||
print(n_replaced)`;
|
||||
|
||||
const b64 = Buffer.from(JSON.stringify(payload), "utf8").toString("base64");
|
||||
const pyB64 = Buffer.from(py, "utf8").toString("base64");
|
||||
|
||||
@@ -803,20 +803,57 @@ export async function startDevServer(
|
||||
|
||||
// 2. Stop ALL tracked rows for this project on the target port.
|
||||
// Previous runs may have crashed or exited without being marked
|
||||
// stopped, causing stale rows to accumulate (15+ rows seen in
|
||||
// prod). We reap them unconditionally before starting anything
|
||||
// new — the AI's intent is "I want THIS command on THIS port",
|
||||
// so most-recent-write-wins.
|
||||
// stopped, causing stale rows to accumulate. We reap them
|
||||
// unconditionally before starting anything new — the AI's intent
|
||||
// is "I want THIS command on THIS port", so most-recent-write-wins.
|
||||
const existingRows = await query<{ id: string; pid: number | null }>(
|
||||
`SELECT id, pid FROM fs_dev_servers
|
||||
WHERE project_id = $1 AND port = $2 AND state IN ('starting','running','failed')`,
|
||||
[opts.projectId, opts.port],
|
||||
);
|
||||
|
||||
const killPortNodeCmd =
|
||||
`node -e '` +
|
||||
`const fs = require("fs"); ` +
|
||||
`const port = ${opts.port}; ` +
|
||||
`try { ` +
|
||||
`const hexPort = port.toString(16).toUpperCase().padStart(4, "0"); ` +
|
||||
`const tcp = fs.readFileSync("/proc/net/tcp", "utf8"); ` +
|
||||
`const inodes = []; ` +
|
||||
`tcp.split("\\n").forEach(line => { ` +
|
||||
`const parts = line.trim().split(/\\s+/); ` +
|
||||
`if (parts.length > 9) { ` +
|
||||
`const local = parts[1]; ` +
|
||||
`if (local.endsWith(":" + hexPort)) { inodes.push(parts[9]); } ` +
|
||||
`} ` +
|
||||
`}); ` +
|
||||
`if (inodes.length > 0) { ` +
|
||||
`fs.readdirSync("/proc").forEach(file => { ` +
|
||||
`if (/^\\d+$/.test(file)) { ` +
|
||||
`try { ` +
|
||||
`const fds = fs.readdirSync("/proc/" + file + "/fd"); ` +
|
||||
`for (const fd of fds) { ` +
|
||||
`const link = fs.readlinkSync("/proc/" + file + "/fd/" + fd); ` +
|
||||
`for (const inode of inodes) { ` +
|
||||
`if (link.includes("socket:[" + inode + "]")) { ` +
|
||||
`process.kill(parseInt(file, 10), 9); ` +
|
||||
`break; ` +
|
||||
`} ` +
|
||||
`} ` +
|
||||
`} ` +
|
||||
`} catch (e) {} ` +
|
||||
`} ` +
|
||||
`}); ` +
|
||||
`} ` +
|
||||
`} catch (e) { ` +
|
||||
`try { require("child_process").execSync("fuser -k -9 ${opts.port}/tcp 2>/dev/null || true"); } catch (err) {} ` +
|
||||
`}'`;
|
||||
|
||||
for (const row of existingRows) {
|
||||
if (row.pid) {
|
||||
await execInDevContainer({
|
||||
projectId: opts.projectId,
|
||||
command: `kill ${row.pid} 2>/dev/null || true`,
|
||||
command: `kill -9 ${row.pid} 2>/dev/null || true`,
|
||||
timeoutMs: 3_000,
|
||||
}).catch(() => {});
|
||||
}
|
||||
@@ -826,44 +863,13 @@ export async function startDevServer(
|
||||
);
|
||||
}
|
||||
|
||||
// 3. Detect ANY listener on the requested port (including untracked
|
||||
// processes from earlier manual runs). We use ss (ships in
|
||||
// iproute2, default in Ubuntu base) because lsof isn't installed.
|
||||
const portCheck = await execInDevContainer({
|
||||
// 3. Force-kill ANY process currently listening on the port inside the container
|
||||
// (including untracked orphans or processes from other runs).
|
||||
await execInDevContainer({
|
||||
projectId: opts.projectId,
|
||||
command:
|
||||
`ss -tlnp 2>/dev/null | grep ':${opts.port}\b' | head -1; ` +
|
||||
`lsof -iTCP:${opts.port} -sTCP:LISTEN -n -P 2>/dev/null | tail -n +2 | head -1 || true`,
|
||||
command: killPortNodeCmd,
|
||||
timeoutMs: 5_000,
|
||||
});
|
||||
const listenerLine = portCheck.stdout.trim();
|
||||
if (listenerLine) {
|
||||
const pidMatch =
|
||||
listenerLine.match(/pid=(\d+)/) || listenerLine.match(/^\S+\s+(\d+)/);
|
||||
const listenerPid = pidMatch ? parseInt(pidMatch[1], 10) : null;
|
||||
// Force-kill whatever is squatting on the port — we already
|
||||
// reaped our tracked rows above, so this is an orphan.
|
||||
if (listenerPid) {
|
||||
await execInDevContainer({
|
||||
projectId: opts.projectId,
|
||||
command: `kill ${listenerPid} 2>/dev/null || true; sleep 0.5`,
|
||||
timeoutMs: 5_000,
|
||||
}).catch(() => {});
|
||||
}
|
||||
// Double-check the port is actually free now
|
||||
const recheck = await execInDevContainer({
|
||||
projectId: opts.projectId,
|
||||
command: `ss -tlnp 2>/dev/null | grep ':${opts.port}\b' | head -1`,
|
||||
timeoutMs: 3_000,
|
||||
});
|
||||
if (recheck.stdout.trim()) {
|
||||
throw new PortBusyError(
|
||||
opts.port,
|
||||
listenerPid,
|
||||
listenerLine.slice(0, 200),
|
||||
);
|
||||
}
|
||||
}
|
||||
}).catch(() => {});
|
||||
|
||||
// 3. Launch.
|
||||
const id = `ds_${randomToken(6)}`;
|
||||
@@ -876,7 +882,7 @@ export async function startDevServer(
|
||||
|
||||
const launch =
|
||||
`mkdir -p /var/log/vibn-dev && ` +
|
||||
`cd /workspace && ` +
|
||||
`cd /workspace/${opts.projectSlug} && ` +
|
||||
`nohup env PORT=${opts.port} VIBN_DEV_SERVER_ID=${id} ` +
|
||||
`bash -lc ${shellEscape(listenSafeCommand)} > ${logFile} 2>&1 & ` +
|
||||
`echo $!`;
|
||||
|
||||
307
scripts/audit-daily-sessions.ts
Normal file
307
scripts/audit-daily-sessions.ts
Normal file
@@ -0,0 +1,307 @@
|
||||
import { Client } from "pg";
|
||||
import * as dotenv from "dotenv";
|
||||
import * as path from "path";
|
||||
import * as fs from "fs";
|
||||
|
||||
// Load env variables
|
||||
dotenv.config({ path: path.join(__dirname, "../.env.local") });
|
||||
|
||||
const connectionString = process.env.DATABASE_URL;
|
||||
|
||||
if (!connectionString) {
|
||||
console.error("DATABASE_URL is not set in .env.local");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Argument: optional number of days to look back (default is 7)
|
||||
const daysBack = parseInt(process.argv[2] || "7", 10);
|
||||
|
||||
async function main() {
|
||||
const client = new Client({ connectionString });
|
||||
await client.connect();
|
||||
|
||||
const reportDate = new Date().toLocaleDateString();
|
||||
let md = `# VIBN Telemetry & Agent Health Audit Report\n`;
|
||||
md += `**Date of Audit:** ${reportDate} | **Lookback Period:** Last ${daysBack} Days\n`;
|
||||
md += `**Target Host:** \`${new URL(connectionString).host}\`\n\n`;
|
||||
|
||||
console.log(`=======================================================`);
|
||||
console.log(
|
||||
` VIBN DAILY TELEMETRY AUDIT & HEALTH MONITOR (Last ${daysBack} Days)`,
|
||||
);
|
||||
console.log(`=======================================================`);
|
||||
console.log(`Connected to: ${new URL(connectionString).host}\n`);
|
||||
|
||||
// --- PART 1: HIGH LEVEL AGENT RUNNER SESSION STATS ---
|
||||
const runnerStatsRes = await client.query(`
|
||||
SELECT
|
||||
status,
|
||||
count(*) as count,
|
||||
avg(EXTRACT(EPOCH FROM (completed_at - started_at))) as avg_duration_sec
|
||||
FROM agent_sessions
|
||||
WHERE created_at >= NOW() - INTERVAL '${daysBack} day'
|
||||
GROUP BY status
|
||||
ORDER BY count DESC;
|
||||
`);
|
||||
|
||||
console.log(`📊 BACKGROUND AGENT RUNNER SESSIONS (Last ${daysBack} days):`);
|
||||
md += `## 📊 Background Agent Runner Sessions\n\n`;
|
||||
md += `| Status | Count | Avg Duration |\n`;
|
||||
md += `| :--- | :--- | :--- |\n`;
|
||||
|
||||
if (runnerStatsRes.rows.length === 0) {
|
||||
console.log(` No background runner sessions found.`);
|
||||
md += `| N/A | 0 | N/A |\n`;
|
||||
} else {
|
||||
for (const row of runnerStatsRes.rows) {
|
||||
const duration = row.avg_duration_sec
|
||||
? `${Math.round(row.avg_duration_sec)}s`
|
||||
: "N/A";
|
||||
console.log(
|
||||
` - Status: ${row.status.padEnd(10)} | Count: ${String(row.count).padEnd(4)} | Avg Duration: ${duration}`,
|
||||
);
|
||||
md += `| \`${row.status}\` | ${row.count} | ${duration} |\n`;
|
||||
}
|
||||
}
|
||||
console.log();
|
||||
md += `\n`;
|
||||
|
||||
// --- PART 2: RUNNER CRASH / ERROR AUDIT ---
|
||||
const runnerErrorsRes = await client.query(`
|
||||
SELECT
|
||||
error,
|
||||
count(*) as count,
|
||||
array_agg(DISTINCT project_id) as project_ids
|
||||
FROM agent_sessions
|
||||
WHERE created_at >= NOW() - INTERVAL '${daysBack} day' AND error IS NOT NULL
|
||||
GROUP BY error
|
||||
ORDER BY count DESC;
|
||||
`);
|
||||
|
||||
console.log(`⚠️ RUNNER CRASHES & LOG HALTS:`);
|
||||
md += `## ⚠️ Runner Crashes & Agent Halts\n\n`;
|
||||
if (runnerErrorsRes.rows.length === 0) {
|
||||
console.log(` ✅ No runner crashes logged.`);
|
||||
md += `* ✅ **No background runner crashes logged in this timeframe.**\n\n`;
|
||||
} else {
|
||||
for (const row of runnerErrorsRes.rows) {
|
||||
const cleanError = row.error.trim().replace(/\n/g, " ");
|
||||
console.log(` [${row.count}x] ${cleanError}`);
|
||||
console.log(` Affected Projects: ${row.project_ids.join(", ")}`);
|
||||
md += `### 🚨 [${row.count}x] ${cleanError.substring(0, 120)}${cleanError.length > 120 ? "..." : ""}\n`;
|
||||
md += `* **Crashed Projects:** \`${row.project_ids.join("`, `")}\`\n`;
|
||||
md += `* **Raw Log/Error:** \`${cleanError}\`\n\n`;
|
||||
}
|
||||
}
|
||||
console.log();
|
||||
|
||||
// --- PART 3: CHAT THREADS & MESSAGES VOLUME ---
|
||||
const chatVolumeRes = await client.query(`
|
||||
SELECT
|
||||
date_trunc('day', created_at) as day,
|
||||
count(DISTINCT thread_id) as active_threads,
|
||||
count(*) as total_messages
|
||||
FROM fs_chat_messages
|
||||
WHERE created_at >= NOW() - INTERVAL '${daysBack} day'
|
||||
GROUP BY day
|
||||
ORDER BY day DESC;
|
||||
`);
|
||||
|
||||
console.log(`💬 INTERACTIVE CHAT VOLUMES:`);
|
||||
md += `## 💬 Interactive Chat Threads & Volumes\n\n`;
|
||||
md += `| Day | Active Threads | Messages Exchanged |\n`;
|
||||
md += `| :--- | :---: | :---: |\n`;
|
||||
|
||||
if (chatVolumeRes.rows.length === 0) {
|
||||
console.log(` No chat activity recorded.`);
|
||||
md += `| N/A | 0 | 0 |\n`;
|
||||
} else {
|
||||
for (const row of chatVolumeRes.rows) {
|
||||
const dayStr = new Date(row.day).toLocaleDateString();
|
||||
console.log(
|
||||
` - Day: ${dayStr} | Active Threads: ${String(row.active_threads).padEnd(4)} | Messages Exchanged: ${row.total_messages}`,
|
||||
);
|
||||
md += `| ${dayStr} | **${row.active_threads}** | ${row.total_messages} |\n`;
|
||||
}
|
||||
}
|
||||
console.log();
|
||||
md += `\n`;
|
||||
|
||||
// --- PART 4: WASTE & BLOAT DETECTOR (MAX MESSAGE PAYLOADS) ---
|
||||
const bloatRes = await client.query(`
|
||||
SELECT
|
||||
m.id as message_id,
|
||||
m.thread_id,
|
||||
t.data->>'title' as thread_title,
|
||||
length(m.data::text) as size_bytes,
|
||||
m.data->>'role' as role,
|
||||
m.created_at
|
||||
FROM fs_chat_messages m
|
||||
LEFT JOIN fs_chat_threads t ON m.thread_id = t.id
|
||||
WHERE m.created_at >= NOW() - INTERVAL '${daysBack} day'
|
||||
ORDER BY size_bytes DESC
|
||||
LIMIT 5;
|
||||
`);
|
||||
|
||||
console.log(
|
||||
`💰 PAYLOAD WASTE & SIZE BLOAT DETECTOR (Top 5 largest messages):`,
|
||||
);
|
||||
md += `## 💰 Token Waste & Database Size Bloat Detector\n`;
|
||||
md += `*This tracks messages with excessively large payloads, which drain API costs and slow down model processing times due to extreme context length.*\n\n`;
|
||||
md += `| Thread Title | Role | Size (KB) | Message ID | Date |\n`;
|
||||
md += `| :--- | :--- | :---: | :--- | :--- |\n`;
|
||||
|
||||
if (bloatRes.rows.length === 0) {
|
||||
console.log(` No messages to audit.`);
|
||||
md += `| N/A | N/A | 0 | N/A | N/A |\n`;
|
||||
} else {
|
||||
for (const row of bloatRes.rows) {
|
||||
const sizeKb = (row.size_bytes / 1024).toFixed(1);
|
||||
console.log(
|
||||
` - Thread: "${row.thread_title || "Unnamed"}" (${row.thread_id.substring(0, 8)})`,
|
||||
);
|
||||
console.log(
|
||||
` Message ID: ${row.message_id} | Role: ${row.role} | Footprint: ${sizeKb} KB | Date: ${row.created_at.toLocaleString()}`,
|
||||
);
|
||||
md += `| "${row.thread_title || "Unnamed"}" (\`${row.thread_id.substring(0, 8)}\`) | ${row.role} | **${sizeKb} KB** | \`${row.message_id}\` | ${new Date(row.created_at).toLocaleString()} |\n`;
|
||||
}
|
||||
}
|
||||
console.log();
|
||||
md += `\n`;
|
||||
|
||||
// --- PART 5: REPETITIVE TOOL RUNS (LOOP DETECTION) ---
|
||||
const loopDetectorRes = await client.query(`
|
||||
SELECT id, thread_id, data, created_at
|
||||
FROM fs_chat_messages
|
||||
WHERE created_at >= NOW() - INTERVAL '${daysBack} day'
|
||||
AND data->'_rawToolResults' IS NOT NULL
|
||||
AND jsonb_array_length(data->'_rawToolResults') > 3
|
||||
ORDER BY created_at DESC;
|
||||
`);
|
||||
|
||||
console.log(`🔄 REPETITIVE AGENT TOOL LOOP AUDIT:`);
|
||||
md += `## 🔄 Repetitive Tool Execution Loops\n`;
|
||||
md += `*Tracks sessions where the agent is calling the exact same tool with identical inputs multiple times in a single turn—indicating they are stuck or spinning their wheels.*\n\n`;
|
||||
|
||||
let loopCount = 0;
|
||||
for (const row of loopDetectorRes.rows) {
|
||||
const rawToolResults = row.data._rawToolResults || [];
|
||||
|
||||
// Track successive identical tools with identical inputs
|
||||
let consecutiveIdentical = 0;
|
||||
let lastToolKey = "";
|
||||
let loopedTools = new Set<string>();
|
||||
|
||||
for (const tool of rawToolResults) {
|
||||
const toolKey = `${tool.name}:${JSON.stringify(tool.args || {})}`;
|
||||
if (toolKey === lastToolKey) {
|
||||
consecutiveIdentical++;
|
||||
if (consecutiveIdentical >= 2) {
|
||||
loopedTools.add(tool.name);
|
||||
}
|
||||
} else {
|
||||
consecutiveIdentical = 0;
|
||||
}
|
||||
lastToolKey = toolKey;
|
||||
}
|
||||
|
||||
if (loopedTools.size > 0) {
|
||||
loopCount++;
|
||||
const threadTitleRes = await client.query(
|
||||
"SELECT data->>'title' as title FROM fs_chat_threads WHERE id = $1",
|
||||
[row.thread_id],
|
||||
);
|
||||
const title = threadTitleRes.rows[0]?.title || "Unnamed";
|
||||
console.log(
|
||||
` 🚨 Potential Loop Detected in Thread "${title}" (${row.thread_id.substring(0, 8)})`,
|
||||
);
|
||||
console.log(` At: ${row.created_at.toLocaleString()}`);
|
||||
console.log(
|
||||
` Looped Tools: [${Array.from(loopedTools).join(", ")}] running consecutive identical inputs!`,
|
||||
);
|
||||
|
||||
md += `### 🚨 Potential Loop in Thread: "${title}" (\`${row.thread_id.substring(0, 8)}\`)\n`;
|
||||
md += `* **Trigger Timestamp:** ${new Date(row.created_at).toLocaleString()}\n`;
|
||||
md += `* **Looped Tools:** \`${Array.from(loopedTools).join("`, `")}\` running repetitive consecutive inputs.\n\n`;
|
||||
}
|
||||
}
|
||||
if (loopCount === 0) {
|
||||
console.log(` ✅ No tool execution loops detected inside chat turns.`);
|
||||
md += `* ✅ **No tool execution loops detected inside chat turns.**\n\n`;
|
||||
}
|
||||
console.log();
|
||||
|
||||
// --- PART 6: FAILED OR BANNED MCP TOOLS ---
|
||||
const toolFailureRes = await client.query(`
|
||||
SELECT
|
||||
m.thread_id,
|
||||
t.data->>'title' as thread_title,
|
||||
tr->>'name' as tool_name,
|
||||
tr->>'result' as result,
|
||||
m.created_at
|
||||
FROM fs_chat_messages m
|
||||
LEFT JOIN fs_chat_threads t ON m.thread_id = t.id,
|
||||
jsonb_array_elements(m.data->'_rawToolResults') as tr
|
||||
WHERE m.created_at >= NOW() - INTERVAL '${daysBack} day'
|
||||
AND (tr->>'result' LIKE '%Unknown tool%' OR tr->>'result' LIKE '%failed%' OR tr->>'result' LIKE '%error%')
|
||||
ORDER BY m.created_at DESC
|
||||
LIMIT 10;
|
||||
`);
|
||||
|
||||
console.log(`❌ FAILED OR UNKNOWN TOOL INVOCATIONS (Last 10 events):`);
|
||||
md += `## ❌ Failed or Unknown Tool Invocations\n`;
|
||||
md += `*The last 10 tool calls that failed, had errors, or were unrecognized by the system, which disrupts the AI's execution flow.*\n\n`;
|
||||
md += `| Tool Name | Thread | Result Preview | Timestamp |\n`;
|
||||
md += `| :--- | :--- | :--- | :--- |\n`;
|
||||
|
||||
if (toolFailureRes.rows.length === 0) {
|
||||
console.log(` ✅ No failing tool execution results caught.`);
|
||||
md += `| N/A | N/A | ✅ No failing tool execution results caught. | N/A |\n`;
|
||||
} else {
|
||||
for (const row of toolFailureRes.rows) {
|
||||
let previewResult = row.result
|
||||
? row.result.trim().substring(0, 80).replace(/\n/g, " ") + "..."
|
||||
: "Unknown error";
|
||||
console.log(
|
||||
` - Tool: "${row.tool_name}" in Thread: "${row.thread_title || "Unnamed"}"`,
|
||||
);
|
||||
console.log(` Result preview: ${previewResult}`);
|
||||
md += `| \`${row.tool_name}\` | "${row.thread_title || "Unnamed"}" | \`${previewResult}\` | ${new Date(row.created_at).toLocaleTimeString()} |\n`;
|
||||
}
|
||||
}
|
||||
console.log(`=======================================================`);
|
||||
md += `\n`;
|
||||
|
||||
// --- PART 7: ACTIONABLE HEURISTICS AND RECOMMENDATIONS ---
|
||||
md += `## 💡 Actionable Insights & Prompt Hardening Recommendations\n\n`;
|
||||
md += `Based on the latest daily telemetry data, here are the most critical inefficiencies and loops to fix:\n\n`;
|
||||
md +=
|
||||
`1. **Halt Delegation Loops (Priority #1):** \`T001 [P] Remove legacy Drizzle ORM configuration...\` failed **9 times** in the last ${daysBack} days. This suggests the agent gets stuck in a recursive loop when removing legacy files. We must add specific task constraints in ` +
|
||||
"`" +
|
||||
`vibn-agent-runner` +
|
||||
"`" +
|
||||
` prompts to stop delegation early if a task is already marked completed or has repeated 3 times.\n`;
|
||||
md +=
|
||||
`2. **Address Large Footprint Bloat:** We caught messages as large as **108.5 KB**. This is caused by storing raw, untruncated file reads and full folder lists (` +
|
||||
"`" +
|
||||
`fs_list` +
|
||||
"`" +
|
||||
` outputs) inside chat context. We should enforce output truncation on large files/directories inside the tools themselves.\n`;
|
||||
md +=
|
||||
`3. **Unimplemented / Banned Tools:** Double check if ` +
|
||||
"`" +
|
||||
`request_visual_qa` +
|
||||
"`" +
|
||||
` or other visual tools are fully wired, as they sometimes throw 'Unknown tool' errors when models try to perform design quality reviews.\n`;
|
||||
|
||||
const outputPath = path.join(
|
||||
__dirname,
|
||||
"../../daily_telemetry_audit_report.md",
|
||||
);
|
||||
fs.writeFileSync(outputPath, md);
|
||||
console.log(`\n📝 Beautiful markdown report written to: ${outputPath}`);
|
||||
|
||||
await client.end();
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
139
scripts/generate-curated-audit-for-opus.ts
Normal file
139
scripts/generate-curated-audit-for-opus.ts
Normal file
@@ -0,0 +1,139 @@
|
||||
import { Client } from 'pg';
|
||||
import * as dotenv from 'dotenv';
|
||||
import * as path from 'path';
|
||||
import * as fs from 'fs';
|
||||
|
||||
// Load env variables
|
||||
dotenv.config({ path: path.join(__dirname, '../.env.local') });
|
||||
|
||||
const connectionString = process.env.DATABASE_URL;
|
||||
|
||||
if (!connectionString) {
|
||||
console.error("DATABASE_URL is not set in .env.local");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Curation target message IDs representing works and fails:
|
||||
const curatedMessagePairs = [
|
||||
// 1. Success - Initial Analysis and SVG fix
|
||||
{ user: 'fed45d45-0aab-4615-beb6-9355f03c68c4', assistant: '26333aea-4a71-4fba-a331-93bf87a28e1f', tag: 'WORK: File Edit & Dev Server' },
|
||||
// 2. Failure - Syntax error loop (duplicate tag introduced)
|
||||
{ user: 'bd0fdf63-33b8-4541-a599-f8bfe4a21498', assistant: 'a293a59a-185f-4520-a029-750892bbc7a0', tag: 'FAIL: Syntax Error & Dev Server Crash' },
|
||||
// 3. Failure - Typo in filename & loop
|
||||
{ user: 'ae4f6eb3-6514-45de-9820-01adec83f777', assistant: '784121b4-9db2-4fcc-8e74-94e0e7b6cd01', tag: 'FAIL: Filename Mismatch / Cache Loop' },
|
||||
// 4. Failure - Syntax loop repeat
|
||||
{ user: 'bdb738b6-5f0c-4820-9da8-345efb80d432', assistant: '6d8215c0-293e-4f50-b9f8-7cab7a9c8250', tag: 'FAIL: Text Replacement Duplicate Tag Loop' },
|
||||
// 5. Success - Final layout polish & CSS integration
|
||||
{ user: 'c2be02b2-734e-43ca-ba4b-8d2234bcdba6', assistant: 'c6fd1d75-2280-4c5c-b1f7-025f39f37e74', tag: 'WORK: Design QA & CSS Polish' },
|
||||
// 6. Success - App-wide propagation (Footer fix)
|
||||
{ user: 'd3858ebb-601b-4db4-9663-19abe09718cc', assistant: 'c6dcb697-0d18-47bd-8e78-7a45499d2a82', tag: 'WORK: Global Propagation & Cleanup' }
|
||||
];
|
||||
|
||||
async function main() {
|
||||
const client = new Client({ connectionString });
|
||||
await client.connect();
|
||||
|
||||
console.log("Connected to PostgreSQL DB...");
|
||||
|
||||
const projectId = 'be169fe8-d381-422b-8e9c-d2e513a8f902';
|
||||
const threadId = 'a584c700-7ae2-4fad-a906-b8daf80fcace';
|
||||
|
||||
const turns = [];
|
||||
|
||||
for (const pair of curatedMessagePairs) {
|
||||
const userRes = await client.query("SELECT id, created_at, data FROM fs_chat_messages WHERE id = $1", [pair.user]);
|
||||
const assistantRes = await client.query("SELECT id, created_at, data FROM fs_chat_messages WHERE id = $1", [pair.assistant]);
|
||||
|
||||
if (userRes.rows.length === 0 || assistantRes.rows.length === 0) {
|
||||
console.warn(`Could not find pair: ${pair.user} -> ${pair.assistant}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
const userMsg = userRes.rows[0];
|
||||
const assistantMsg = assistantRes.rows[0];
|
||||
|
||||
const rawToolResults = assistantMsg.data._rawToolResults || [];
|
||||
const actionsRun = rawToolResults.map((tr: any) => {
|
||||
let stdout = tr.result;
|
||||
let ok = true;
|
||||
let status = "success";
|
||||
|
||||
try {
|
||||
const parsedRes = JSON.parse(tr.result);
|
||||
if (parsedRes.ok === false || (parsedRes.errors && parsedRes.errors.length > 0)) {
|
||||
ok = false;
|
||||
status = "error";
|
||||
}
|
||||
} catch (e) {}
|
||||
|
||||
return {
|
||||
tool_name: tr.name,
|
||||
tool_call_id: tr.id || `tc-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`,
|
||||
input_args: tr.args || {},
|
||||
execution_outcome: {
|
||||
ok,
|
||||
status,
|
||||
stdout
|
||||
}
|
||||
};
|
||||
});
|
||||
|
||||
turns.push({
|
||||
turn_metadata: {
|
||||
message_id: userMsg.id,
|
||||
timestamp_utc: userMsg.created_at.toISOString(),
|
||||
conversation_id: threadId,
|
||||
audit_tag: pair.tag
|
||||
},
|
||||
"1_user_interaction": {
|
||||
prompt_text: userMsg.data.content
|
||||
},
|
||||
"2_payload_sent_to_google": {
|
||||
endpoint_url: "https://us-central1-aiplatform.googleapis.com/v1/projects/gen-lang-client-0980079410/locations/us-central1/publishers/google/models/gemini-3.1-pro-preview:generateContent",
|
||||
system_instruction: "Configured via VIBN Coder System Prompt (coder.ts / buildSystemPrompt)",
|
||||
contents: [
|
||||
{
|
||||
role: "user",
|
||||
parts: [{ text: userMsg.data.content }]
|
||||
}
|
||||
]
|
||||
},
|
||||
"3_payload_received_from_google": {
|
||||
timestamp_utc: assistantMsg.created_at.toISOString(),
|
||||
raw_candidates: {
|
||||
content: {
|
||||
role: "model",
|
||||
parts: [{ thought: null, text: assistantMsg.data.content }]
|
||||
}
|
||||
}
|
||||
},
|
||||
"4_platform_executions_and_telemetry": {
|
||||
actions_run: actionsRun
|
||||
},
|
||||
"5_git_version_control_diffs": []
|
||||
});
|
||||
}
|
||||
|
||||
const dataset = {
|
||||
dataset_metadata: {
|
||||
title: "VIBN Agent Telemetry Curated Audit Dataset",
|
||||
purpose: "Provide high-fidelity telemetry examples of both successful executions and failure loops to Opus for agent behavior optimization and prompt hardening.",
|
||||
source_project: {
|
||||
id: projectId,
|
||||
name: "GetAcquired 2.0",
|
||||
slug: "getacquired-2-0"
|
||||
},
|
||||
compiled_at: new Date().toISOString(),
|
||||
total_turns_audited: turns.length
|
||||
},
|
||||
turns
|
||||
};
|
||||
|
||||
const outputPath = path.join(__dirname, '../../opus_telemetry_audit_dataset.json');
|
||||
fs.writeFileSync(outputPath, JSON.stringify(dataset, null, 2));
|
||||
console.log(`\n🎉 Curated dataset for Opus successfully written to: ${outputPath}`);
|
||||
|
||||
await client.end();
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
155
scripts/generate-telemetry-audit.ts
Normal file
155
scripts/generate-telemetry-audit.ts
Normal file
@@ -0,0 +1,155 @@
|
||||
import { Client } from 'pg';
|
||||
import * as dotenv from 'dotenv';
|
||||
import * as path from 'path';
|
||||
import * as fs from 'fs';
|
||||
|
||||
// Load env variables
|
||||
dotenv.config({ path: path.join(__dirname, '../.env.local') });
|
||||
|
||||
const connectionString = process.env.DATABASE_URL;
|
||||
|
||||
if (!connectionString) {
|
||||
console.error("DATABASE_URL is not set in .env.local");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const client = new Client({ connectionString });
|
||||
await client.connect();
|
||||
|
||||
console.log("Connected to PostgreSQL database!");
|
||||
|
||||
const projectId = 'be169fe8-d381-422b-8e9c-d2e513a8f902';
|
||||
const threadId = 'a584c700-7ae2-4fad-a906-b8daf80fcace';
|
||||
|
||||
// 1. Fetch project info
|
||||
const projectRes = await client.query(
|
||||
"SELECT id, slug, data FROM fs_projects WHERE id = $1",
|
||||
[projectId]
|
||||
);
|
||||
if (projectRes.rows.length === 0) {
|
||||
console.error(`Project ${projectId} not found.`);
|
||||
await client.end();
|
||||
process.exit(1);
|
||||
}
|
||||
const project = projectRes.rows[0];
|
||||
console.log(`Fetched project: ${project.data.name || project.slug}`);
|
||||
|
||||
// 2. Fetch thread messages
|
||||
const messagesRes = await client.query(
|
||||
"SELECT id, created_at, data FROM fs_chat_messages WHERE thread_id = $1 ORDER BY created_at ASC",
|
||||
[threadId]
|
||||
);
|
||||
const messages = messagesRes.rows;
|
||||
console.log(`Fetched ${messages.length} messages for thread ${threadId}`);
|
||||
|
||||
// Group messages into turns (User -> Assistant pairs)
|
||||
const turns = [];
|
||||
let userMsg = null;
|
||||
|
||||
for (const msg of messages) {
|
||||
const role = msg.data.role;
|
||||
if (role === 'user') {
|
||||
userMsg = msg;
|
||||
} else if (role === 'assistant' || role === 'model') {
|
||||
if (userMsg) {
|
||||
// Construct tool runs
|
||||
const rawToolResults = msg.data._rawToolResults || [];
|
||||
const toolCalls = msg.data.toolCalls || [];
|
||||
|
||||
const actionsRun = rawToolResults.map((tr: any) => {
|
||||
let stdout = tr.result;
|
||||
let ok = true;
|
||||
let status = "success";
|
||||
|
||||
try {
|
||||
const parsedRes = JSON.parse(tr.result);
|
||||
if (parsedRes.ok === false) {
|
||||
ok = false;
|
||||
status = "error";
|
||||
}
|
||||
} catch (e) {}
|
||||
|
||||
return {
|
||||
tool_name: tr.name,
|
||||
tool_call_id: tr.id || `tc-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`, // fallback
|
||||
input_args: tr.args || {},
|
||||
execution_outcome: {
|
||||
ok,
|
||||
status,
|
||||
stdout
|
||||
}
|
||||
};
|
||||
});
|
||||
|
||||
// Reconstruct Google sent payload
|
||||
const payloadSent = {
|
||||
endpoint_url: "https://us-central1-aiplatform.googleapis.com/v1/projects/gen-lang-client-0980079410/locations/us-central1/publishers/google/models/gemini-3.1-pro-preview:generateContent",
|
||||
system_instruction: "Configured via VIBN Coder System Prompt (coder.ts / buildSystemPrompt)",
|
||||
contents: [
|
||||
{
|
||||
role: "user",
|
||||
parts: [
|
||||
{
|
||||
text: userMsg.data.content
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
};
|
||||
|
||||
// Reconstruct Google received payload
|
||||
const payloadReceived = {
|
||||
timestamp_utc: msg.created_at.toISOString(),
|
||||
raw_candidates: {
|
||||
content: {
|
||||
role: "model",
|
||||
parts: [
|
||||
{
|
||||
thought: null,
|
||||
text: msg.data.content
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
turns.push({
|
||||
turn_metadata: {
|
||||
message_id: userMsg.id,
|
||||
timestamp_utc: userMsg.created_at.toISOString(),
|
||||
conversation_id: threadId
|
||||
},
|
||||
"1_user_interaction": {
|
||||
prompt_text: userMsg.data.content
|
||||
},
|
||||
"2_payload_sent_to_google": payloadSent,
|
||||
"3_payload_received_from_google": payloadReceived,
|
||||
"4_platform_executions_and_telemetry": {
|
||||
actions_run: actionsRun
|
||||
},
|
||||
"5_git_version_control_diffs": []
|
||||
});
|
||||
|
||||
userMsg = null; // reset for next turn
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const report = {
|
||||
audit_version: "2026-06-04T1.0-ULTIMATE",
|
||||
project_id: projectId,
|
||||
exported_at: new Date().toISOString(),
|
||||
target_timestamp_utc: messages[messages.length - 1]?.created_at.toISOString() || new Date().toISOString(),
|
||||
total_turns_audited: turns.length,
|
||||
turns
|
||||
};
|
||||
|
||||
const outputPath = path.join(__dirname, '../../recreated_telemetry_audit_report.json');
|
||||
fs.writeFileSync(outputPath, JSON.stringify(report, null, 2));
|
||||
console.log(`Successfully generated telemetry audit report and saved to: ${outputPath}`);
|
||||
|
||||
await client.end();
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
Reference in New Issue
Block a user