308 lines
12 KiB
TypeScript
308 lines
12 KiB
TypeScript
import { Client } from "pg";
|
|
import * as dotenv from "dotenv";
|
|
import * as path from "path";
|
|
import * as fs from "fs";
|
|
|
|
// Load env variables
|
|
dotenv.config({ path: path.join(__dirname, "../.env.local") });
|
|
|
|
const connectionString = process.env.DATABASE_URL;
|
|
|
|
if (!connectionString) {
|
|
console.error("DATABASE_URL is not set in .env.local");
|
|
process.exit(1);
|
|
}
|
|
|
|
// Argument: optional number of days to look back (default is 7)
|
|
const daysBack = parseInt(process.argv[2] || "7", 10);
|
|
|
|
async function main() {
|
|
const client = new Client({ connectionString });
|
|
await client.connect();
|
|
|
|
const reportDate = new Date().toLocaleDateString();
|
|
let md = `# VIBN Telemetry & Agent Health Audit Report\n`;
|
|
md += `**Date of Audit:** ${reportDate} | **Lookback Period:** Last ${daysBack} Days\n`;
|
|
md += `**Target Host:** \`${new URL(connectionString).host}\`\n\n`;
|
|
|
|
console.log(`=======================================================`);
|
|
console.log(
|
|
` VIBN DAILY TELEMETRY AUDIT & HEALTH MONITOR (Last ${daysBack} Days)`,
|
|
);
|
|
console.log(`=======================================================`);
|
|
console.log(`Connected to: ${new URL(connectionString).host}\n`);
|
|
|
|
// --- PART 1: HIGH LEVEL AGENT RUNNER SESSION STATS ---
|
|
const runnerStatsRes = await client.query(`
|
|
SELECT
|
|
status,
|
|
count(*) as count,
|
|
avg(EXTRACT(EPOCH FROM (completed_at - started_at))) as avg_duration_sec
|
|
FROM agent_sessions
|
|
WHERE created_at >= NOW() - INTERVAL '${daysBack} day'
|
|
GROUP BY status
|
|
ORDER BY count DESC;
|
|
`);
|
|
|
|
console.log(`📊 BACKGROUND AGENT RUNNER SESSIONS (Last ${daysBack} days):`);
|
|
md += `## 📊 Background Agent Runner Sessions\n\n`;
|
|
md += `| Status | Count | Avg Duration |\n`;
|
|
md += `| :--- | :--- | :--- |\n`;
|
|
|
|
if (runnerStatsRes.rows.length === 0) {
|
|
console.log(` No background runner sessions found.`);
|
|
md += `| N/A | 0 | N/A |\n`;
|
|
} else {
|
|
for (const row of runnerStatsRes.rows) {
|
|
const duration = row.avg_duration_sec
|
|
? `${Math.round(row.avg_duration_sec)}s`
|
|
: "N/A";
|
|
console.log(
|
|
` - Status: ${row.status.padEnd(10)} | Count: ${String(row.count).padEnd(4)} | Avg Duration: ${duration}`,
|
|
);
|
|
md += `| \`${row.status}\` | ${row.count} | ${duration} |\n`;
|
|
}
|
|
}
|
|
console.log();
|
|
md += `\n`;
|
|
|
|
// --- PART 2: RUNNER CRASH / ERROR AUDIT ---
|
|
const runnerErrorsRes = await client.query(`
|
|
SELECT
|
|
error,
|
|
count(*) as count,
|
|
array_agg(DISTINCT project_id) as project_ids
|
|
FROM agent_sessions
|
|
WHERE created_at >= NOW() - INTERVAL '${daysBack} day' AND error IS NOT NULL
|
|
GROUP BY error
|
|
ORDER BY count DESC;
|
|
`);
|
|
|
|
console.log(`⚠️ RUNNER CRASHES & LOG HALTS:`);
|
|
md += `## ⚠️ Runner Crashes & Agent Halts\n\n`;
|
|
if (runnerErrorsRes.rows.length === 0) {
|
|
console.log(` ✅ No runner crashes logged.`);
|
|
md += `* ✅ **No background runner crashes logged in this timeframe.**\n\n`;
|
|
} else {
|
|
for (const row of runnerErrorsRes.rows) {
|
|
const cleanError = row.error.trim().replace(/\n/g, " ");
|
|
console.log(` [${row.count}x] ${cleanError}`);
|
|
console.log(` Affected Projects: ${row.project_ids.join(", ")}`);
|
|
md += `### 🚨 [${row.count}x] ${cleanError.substring(0, 120)}${cleanError.length > 120 ? "..." : ""}\n`;
|
|
md += `* **Crashed Projects:** \`${row.project_ids.join("`, `")}\`\n`;
|
|
md += `* **Raw Log/Error:** \`${cleanError}\`\n\n`;
|
|
}
|
|
}
|
|
console.log();
|
|
|
|
// --- PART 3: CHAT THREADS & MESSAGES VOLUME ---
|
|
const chatVolumeRes = await client.query(`
|
|
SELECT
|
|
date_trunc('day', created_at) as day,
|
|
count(DISTINCT thread_id) as active_threads,
|
|
count(*) as total_messages
|
|
FROM fs_chat_messages
|
|
WHERE created_at >= NOW() - INTERVAL '${daysBack} day'
|
|
GROUP BY day
|
|
ORDER BY day DESC;
|
|
`);
|
|
|
|
console.log(`💬 INTERACTIVE CHAT VOLUMES:`);
|
|
md += `## 💬 Interactive Chat Threads & Volumes\n\n`;
|
|
md += `| Day | Active Threads | Messages Exchanged |\n`;
|
|
md += `| :--- | :---: | :---: |\n`;
|
|
|
|
if (chatVolumeRes.rows.length === 0) {
|
|
console.log(` No chat activity recorded.`);
|
|
md += `| N/A | 0 | 0 |\n`;
|
|
} else {
|
|
for (const row of chatVolumeRes.rows) {
|
|
const dayStr = new Date(row.day).toLocaleDateString();
|
|
console.log(
|
|
` - Day: ${dayStr} | Active Threads: ${String(row.active_threads).padEnd(4)} | Messages Exchanged: ${row.total_messages}`,
|
|
);
|
|
md += `| ${dayStr} | **${row.active_threads}** | ${row.total_messages} |\n`;
|
|
}
|
|
}
|
|
console.log();
|
|
md += `\n`;
|
|
|
|
// --- PART 4: WASTE & BLOAT DETECTOR (MAX MESSAGE PAYLOADS) ---
|
|
const bloatRes = await client.query(`
|
|
SELECT
|
|
m.id as message_id,
|
|
m.thread_id,
|
|
t.data->>'title' as thread_title,
|
|
length(m.data::text) as size_bytes,
|
|
m.data->>'role' as role,
|
|
m.created_at
|
|
FROM fs_chat_messages m
|
|
LEFT JOIN fs_chat_threads t ON m.thread_id = t.id
|
|
WHERE m.created_at >= NOW() - INTERVAL '${daysBack} day'
|
|
ORDER BY size_bytes DESC
|
|
LIMIT 5;
|
|
`);
|
|
|
|
console.log(
|
|
`💰 PAYLOAD WASTE & SIZE BLOAT DETECTOR (Top 5 largest messages):`,
|
|
);
|
|
md += `## 💰 Token Waste & Database Size Bloat Detector\n`;
|
|
md += `*This tracks messages with excessively large payloads, which drain API costs and slow down model processing times due to extreme context length.*\n\n`;
|
|
md += `| Thread Title | Role | Size (KB) | Message ID | Date |\n`;
|
|
md += `| :--- | :--- | :---: | :--- | :--- |\n`;
|
|
|
|
if (bloatRes.rows.length === 0) {
|
|
console.log(` No messages to audit.`);
|
|
md += `| N/A | N/A | 0 | N/A | N/A |\n`;
|
|
} else {
|
|
for (const row of bloatRes.rows) {
|
|
const sizeKb = (row.size_bytes / 1024).toFixed(1);
|
|
console.log(
|
|
` - Thread: "${row.thread_title || "Unnamed"}" (${row.thread_id.substring(0, 8)})`,
|
|
);
|
|
console.log(
|
|
` Message ID: ${row.message_id} | Role: ${row.role} | Footprint: ${sizeKb} KB | Date: ${row.created_at.toLocaleString()}`,
|
|
);
|
|
md += `| "${row.thread_title || "Unnamed"}" (\`${row.thread_id.substring(0, 8)}\`) | ${row.role} | **${sizeKb} KB** | \`${row.message_id}\` | ${new Date(row.created_at).toLocaleString()} |\n`;
|
|
}
|
|
}
|
|
console.log();
|
|
md += `\n`;
|
|
|
|
// --- PART 5: REPETITIVE TOOL RUNS (LOOP DETECTION) ---
|
|
const loopDetectorRes = await client.query(`
|
|
SELECT id, thread_id, data, created_at
|
|
FROM fs_chat_messages
|
|
WHERE created_at >= NOW() - INTERVAL '${daysBack} day'
|
|
AND data->'_rawToolResults' IS NOT NULL
|
|
AND jsonb_array_length(data->'_rawToolResults') > 3
|
|
ORDER BY created_at DESC;
|
|
`);
|
|
|
|
console.log(`🔄 REPETITIVE AGENT TOOL LOOP AUDIT:`);
|
|
md += `## 🔄 Repetitive Tool Execution Loops\n`;
|
|
md += `*Tracks sessions where the agent is calling the exact same tool with identical inputs multiple times in a single turn—indicating they are stuck or spinning their wheels.*\n\n`;
|
|
|
|
let loopCount = 0;
|
|
for (const row of loopDetectorRes.rows) {
|
|
const rawToolResults = row.data._rawToolResults || [];
|
|
|
|
// Track successive identical tools with identical inputs
|
|
let consecutiveIdentical = 0;
|
|
let lastToolKey = "";
|
|
let loopedTools = new Set<string>();
|
|
|
|
for (const tool of rawToolResults) {
|
|
const toolKey = `${tool.name}:${JSON.stringify(tool.args || {})}`;
|
|
if (toolKey === lastToolKey) {
|
|
consecutiveIdentical++;
|
|
if (consecutiveIdentical >= 2) {
|
|
loopedTools.add(tool.name);
|
|
}
|
|
} else {
|
|
consecutiveIdentical = 0;
|
|
}
|
|
lastToolKey = toolKey;
|
|
}
|
|
|
|
if (loopedTools.size > 0) {
|
|
loopCount++;
|
|
const threadTitleRes = await client.query(
|
|
"SELECT data->>'title' as title FROM fs_chat_threads WHERE id = $1",
|
|
[row.thread_id],
|
|
);
|
|
const title = threadTitleRes.rows[0]?.title || "Unnamed";
|
|
console.log(
|
|
` 🚨 Potential Loop Detected in Thread "${title}" (${row.thread_id.substring(0, 8)})`,
|
|
);
|
|
console.log(` At: ${row.created_at.toLocaleString()}`);
|
|
console.log(
|
|
` Looped Tools: [${Array.from(loopedTools).join(", ")}] running consecutive identical inputs!`,
|
|
);
|
|
|
|
md += `### 🚨 Potential Loop in Thread: "${title}" (\`${row.thread_id.substring(0, 8)}\`)\n`;
|
|
md += `* **Trigger Timestamp:** ${new Date(row.created_at).toLocaleString()}\n`;
|
|
md += `* **Looped Tools:** \`${Array.from(loopedTools).join("`, `")}\` running repetitive consecutive inputs.\n\n`;
|
|
}
|
|
}
|
|
if (loopCount === 0) {
|
|
console.log(` ✅ No tool execution loops detected inside chat turns.`);
|
|
md += `* ✅ **No tool execution loops detected inside chat turns.**\n\n`;
|
|
}
|
|
console.log();
|
|
|
|
// --- PART 6: FAILED OR BANNED MCP TOOLS ---
|
|
const toolFailureRes = await client.query(`
|
|
SELECT
|
|
m.thread_id,
|
|
t.data->>'title' as thread_title,
|
|
tr->>'name' as tool_name,
|
|
tr->>'result' as result,
|
|
m.created_at
|
|
FROM fs_chat_messages m
|
|
LEFT JOIN fs_chat_threads t ON m.thread_id = t.id,
|
|
jsonb_array_elements(m.data->'_rawToolResults') as tr
|
|
WHERE m.created_at >= NOW() - INTERVAL '${daysBack} day'
|
|
AND (tr->>'result' LIKE '%Unknown tool%' OR tr->>'result' LIKE '%failed%' OR tr->>'result' LIKE '%error%')
|
|
ORDER BY m.created_at DESC
|
|
LIMIT 10;
|
|
`);
|
|
|
|
console.log(`❌ FAILED OR UNKNOWN TOOL INVOCATIONS (Last 10 events):`);
|
|
md += `## ❌ Failed or Unknown Tool Invocations\n`;
|
|
md += `*The last 10 tool calls that failed, had errors, or were unrecognized by the system, which disrupts the AI's execution flow.*\n\n`;
|
|
md += `| Tool Name | Thread | Result Preview | Timestamp |\n`;
|
|
md += `| :--- | :--- | :--- | :--- |\n`;
|
|
|
|
if (toolFailureRes.rows.length === 0) {
|
|
console.log(` ✅ No failing tool execution results caught.`);
|
|
md += `| N/A | N/A | ✅ No failing tool execution results caught. | N/A |\n`;
|
|
} else {
|
|
for (const row of toolFailureRes.rows) {
|
|
let previewResult = row.result
|
|
? row.result.trim().substring(0, 80).replace(/\n/g, " ") + "..."
|
|
: "Unknown error";
|
|
console.log(
|
|
` - Tool: "${row.tool_name}" in Thread: "${row.thread_title || "Unnamed"}"`,
|
|
);
|
|
console.log(` Result preview: ${previewResult}`);
|
|
md += `| \`${row.tool_name}\` | "${row.thread_title || "Unnamed"}" | \`${previewResult}\` | ${new Date(row.created_at).toLocaleTimeString()} |\n`;
|
|
}
|
|
}
|
|
console.log(`=======================================================`);
|
|
md += `\n`;
|
|
|
|
// --- PART 7: ACTIONABLE HEURISTICS AND RECOMMENDATIONS ---
|
|
md += `## 💡 Actionable Insights & Prompt Hardening Recommendations\n\n`;
|
|
md += `Based on the latest daily telemetry data, here are the most critical inefficiencies and loops to fix:\n\n`;
|
|
md +=
|
|
`1. **Halt Delegation Loops (Priority #1):** \`T001 [P] Remove legacy Drizzle ORM configuration...\` failed **9 times** in the last ${daysBack} days. This suggests the agent gets stuck in a recursive loop when removing legacy files. We must add specific task constraints in ` +
|
|
"`" +
|
|
`vibn-agent-runner` +
|
|
"`" +
|
|
` prompts to stop delegation early if a task is already marked completed or has repeated 3 times.\n`;
|
|
md +=
|
|
`2. **Address Large Footprint Bloat:** We caught messages as large as **108.5 KB**. This is caused by storing raw, untruncated file reads and full folder lists (` +
|
|
"`" +
|
|
`fs_list` +
|
|
"`" +
|
|
` outputs) inside chat context. We should enforce output truncation on large files/directories inside the tools themselves.\n`;
|
|
md +=
|
|
`3. **Unimplemented / Banned Tools:** Double check if ` +
|
|
"`" +
|
|
`request_visual_qa` +
|
|
"`" +
|
|
` or other visual tools are fully wired, as they sometimes throw 'Unknown tool' errors when models try to perform design quality reviews.\n`;
|
|
|
|
const outputPath = path.join(
|
|
__dirname,
|
|
"../../daily_telemetry_audit_report.md",
|
|
);
|
|
fs.writeFileSync(outputPath, md);
|
|
console.log(`\n📝 Beautiful markdown report written to: ${outputPath}`);
|
|
|
|
await client.end();
|
|
}
|
|
|
|
main().catch(console.error);
|