From 3679ccf913bac63c7fb817ce6ee5ab3f081102d2 Mon Sep 17 00:00:00 2001 From: mawkone Date: Tue, 9 Jun 2026 12:23:20 -0700 Subject: [PATCH] chore(telemetry): optimize state-based loop stall detector by tracking tool input signatures and clean up unused helper functions --- vibn-frontend/app/api/chat/route.ts | 65 ++------ .../scripts/generate-ajay-audit-for-opus.ts | 143 +++++++++++++++++ .../scripts/generate-qa-audit-for-opus.ts | 147 ++++++++++++++++++ 3 files changed, 301 insertions(+), 54 deletions(-) create mode 100644 vibn-frontend/scripts/generate-ajay-audit-for-opus.ts create mode 100644 vibn-frontend/scripts/generate-qa-audit-for-opus.ts diff --git a/vibn-frontend/app/api/chat/route.ts b/vibn-frontend/app/api/chat/route.ts index 54b02419..b99d7cd9 100644 --- a/vibn-frontend/app/api/chat/route.ts +++ b/vibn-frontend/app/api/chat/route.ts @@ -861,7 +861,7 @@ export async function POST(request: Request) { isConversational(message.trim()); let lastVerifySig: string | null = null; - let fileHashes = new Map(); + let lastRoundToolSig: string | null = null; let stallRounds = 0; try { @@ -1069,12 +1069,13 @@ export async function POST(request: Request) { // 1. Compute verify signature const verifySig = getRoundVerifySignature(currentRoundResults); - // 2. Check for stall/progress - const { progressed, nextHashes } = checkRoundProgress( - currentRoundResults, - fileHashes, - ); - fileHashes = nextHashes; + // 2. Check for stall/progress by comparing tool call signatures (names + inputs) + const currentRoundToolSig = resp.toolCalls + .map((tc) => `${tc.name}:${JSON.stringify(tc.args || {})}`) + .sort() + .join(";;"); + + const progressed = !lastVerifySig || verifySig !== lastVerifySig; if ( verifySig && @@ -1085,7 +1086,7 @@ export async function POST(request: Request) { loopBreakReason = `Blocked on persistent error: ${verifySig.split(";;")[0]}`; } - if (!progressed) { + if (lastRoundToolSig && currentRoundToolSig === lastRoundToolSig) { stallRounds++; } else { stallRounds = 0; @@ -1093,10 +1094,11 @@ export async function POST(request: Request) { if (stallRounds >= 2) { loopBreakReason = - "Stalled (No state changes or progress made for 2 rounds)"; + "Stalled (Repeated the exact same tool calls twice without advancing)"; } lastVerifySig = verifySig; + lastRoundToolSig = currentRoundToolSig; if (loopBreakReason) break; } @@ -1518,48 +1520,3 @@ function getRoundVerifySignature(roundResults: any[]): string | null { if (errors.length === 0) return null; return errors.sort().join(";;"); } - -function checkRoundProgress( - roundResults: any[], - lastHashes: Map, -): { progressed: boolean; nextHashes: Map } { - let progressed = false; - const nextHashes = new Map(lastHashes); - - for (const tr of roundResults) { - if (!tr.content) continue; - try { - const parsed = JSON.parse(tr.content); - const result = parsed.result || parsed; - - // If a file write or edit succeeded, check if the sha256 is new or changed - if (result.ok && result.sha256 && result.path) { - const lastHash = lastHashes.get(result.path); - if (lastHash !== result.sha256) { - progressed = true; - nextHashes.set(result.path, result.sha256); - } - } - - // If any other action completed with ok: true (excluding read-only lookup tools) - if ( - result.ok && - ![ - "fs_read", - "fs_list", - "fs_tree", - "fs_glob", - "fs_grep", - "dev_server_list", - "browser_console", - ].includes(tr.toolName) - ) { - progressed = true; - } - } catch (e) { - // skip - } - } - - return { progressed, nextHashes }; -} diff --git a/vibn-frontend/scripts/generate-ajay-audit-for-opus.ts b/vibn-frontend/scripts/generate-ajay-audit-for-opus.ts new file mode 100644 index 00000000..a9e3da9f --- /dev/null +++ b/vibn-frontend/scripts/generate-ajay-audit-for-opus.ts @@ -0,0 +1,143 @@ +import { Client } from 'pg'; +import * as dotenv from 'dotenv'; +import * as path from 'path'; +import * as fs from 'fs'; + +// Load env variables +dotenv.config({ path: path.join(__dirname, '../.env.local') }); + +const connectionString = process.env.DATABASE_URL; + +if (!connectionString) { + console.error("DATABASE_URL is not set in .env.local"); + process.exit(1); +} + +async function main() { + const client = new Client({ connectionString }); + await client.connect(); + + console.log("Connected to PostgreSQL DB..."); + + const projectId = '013f032c-ee82-42e5-9a89-b396c982bbf5'; + const threadId = '70983a8b-ec26-4241-91c2-d9a4a9ead973'; + + // 1. Fetch project info + const projectRes = await client.query( + "SELECT id, slug, data FROM fs_projects WHERE id = $1", + [projectId] + ); + if (projectRes.rows.length === 0) { + console.error(`Project ${projectId} not found.`); + await client.end(); + process.exit(1); + } + const project = projectRes.rows[0]; + + // 2. Fetch thread messages + const messagesRes = await client.query( + "SELECT id, created_at, data FROM fs_chat_messages WHERE thread_id = $1 ORDER BY created_at ASC", + [threadId] + ); + const messages = messagesRes.rows; + console.log(`Fetched ${messages.length} messages for Ajay's thread ${threadId}`); + + const turns = []; + let userMsg = null; + + for (const msg of messages) { + const role = msg.data.role; + if (role === 'user') { + userMsg = msg; + } else if (role === 'assistant' || role === 'model') { + if (userMsg) { + const rawToolResults = msg.data._rawToolResults || []; + const actionsRun = rawToolResults.map((tr: any) => { + let stdout = tr.result; + let ok = true; + let status = "success"; + + try { + const parsedRes = JSON.parse(tr.result); + if (parsedRes.ok === false || parsedRes.error || (parsedRes.errors && parsedRes.errors.length > 0)) { + ok = false; + status = "error"; + } + } catch (e) {} + + return { + tool_name: tr.name, + tool_call_id: tr.id || `tc-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`, + input_args: tr.args || {}, + execution_outcome: { + ok, + status, + stdout + } + }; + }); + + turns.push({ + turn_metadata: { + message_id: userMsg.id, + timestamp_utc: userMsg.created_at.toISOString(), + conversation_id: threadId, + audit_tag: msg.data.content.includes("Unauthorized") ? 'FAIL: Workspace Session Auth Blockout' : 'WORK: Conversational Planning' + }, + "1_user_interaction": { + prompt_text: userMsg.data.content + }, + "2_payload_sent_to_google": { + endpoint_url: "https://us-central1-aiplatform.googleapis.com/v1/projects/gen-lang-client-0980079410/locations/us-central1/publishers/google/models/gemini-3.1-pro-preview:generateContent", + system_instruction: "Configured via VIBN Coder System Prompt (coder.ts / buildSystemPrompt)", + contents: [ + { + role: "user", + parts: [{ text: userMsg.data.content }] + } + ] + }, + "3_payload_received_from_google": { + timestamp_utc: msg.created_at.toISOString(), + raw_candidates: { + content: { + role: "model", + parts: [{ thought: null, text: msg.data.content }] + } + } + }, + "4_platform_executions_and_telemetry": { + actions_run: actionsRun + }, + "5_git_version_control_diffs": [] + }); + + userMsg = null; // reset + } + } + } + + const dataset = { + dataset_metadata: { + title: "VIBN Telemetry Dataset — ThathaPaati Workspace Lockout", + purpose: "Telemetry report capturing the exact server-side tool execution results and 401 Unauthorized lockout loop experienced by Ajay Sridharan.", + source_project: { + id: projectId, + name: project.data.name || "ThathaPaati", + slug: project.slug + }, + compiled_at: new Date().toISOString(), + total_turns_audited: turns.length + }, + turns + }; + + const currentDate = new Date().toISOString().split('T')[0]; // YYYY-MM-DD + const outputPath = path.join(__dirname, `../../opus_telemetry_audit_dataset_ajay_${currentDate}.json`); + fs.writeFileSync(outputPath, JSON.stringify(dataset, null, 2)); + console.log(`\nšŸŽ‰ Ajay's QA dataset successfully written to: ${outputPath}`); + + await client.end(); +} + +main().catch(console.error); diff --git a/vibn-frontend/scripts/generate-qa-audit-for-opus.ts b/vibn-frontend/scripts/generate-qa-audit-for-opus.ts new file mode 100644 index 00000000..603281af --- /dev/null +++ b/vibn-frontend/scripts/generate-qa-audit-for-opus.ts @@ -0,0 +1,147 @@ +import { Client } from 'pg'; +import * as dotenv from 'dotenv'; +import * as path from 'path'; +import * as fs from 'fs'; + +// Load env variables +dotenv.config({ path: path.join(__dirname, '../.env.local') }); + +const connectionString = process.env.DATABASE_URL; + +if (!connectionString) { + console.error("DATABASE_URL is not set in .env.local"); + process.exit(1); +} + +// Curation target message IDs from the live tests: +const curatedMessagePairs = [ + // 1. Success - Hardened fs_edit (Navbar title change) + { + user: 'acc489f6-808c-4a1c-9686-7f6145f2fa48', + assistant: '0c71e628-f6f3-4bce-9ccf-7a1631e7537a', + tag: 'WORK: Hardened fs_edit (Navbar Title Change)' + }, + // 2. Success - Port-reaper dev server restart + { + user: 'b304d55c-7d8a-4e1a-9823-58d521b1796d', + assistant: 'a244bc69-b0e7-4977-89fb-d39949a7e7ed', + tag: 'WORK: Socket-Inode Port Reaper dev_server_start' + }, + // 3. Failure - Prisma error and AI Success Hallucination + { + user: '6c98dd1f-ba1a-4cd8-a654-8e6da4eecace', + assistant: '46acdaad-3ea7-4fd1-9fde-6aff1d33e896', + tag: 'FAIL: Prisma DB Error and AI Success Hallucination' + } +]; + +async function main() { + const client = new Client({ connectionString }); + await client.connect(); + + console.log("Connected to PostgreSQL DB..."); + + const projectId = 'be169fe8-d381-422b-8e9c-d2e513a8f902'; + const threadId = 'a584c700-7ae2-4fad-a906-b8daf80fcace'; + + const turns = []; + + for (const pair of curatedMessagePairs) { + const userRes = await client.query("SELECT id, created_at, data FROM fs_chat_messages WHERE id = $1", [pair.user]); + const assistantRes = await client.query("SELECT id, created_at, data FROM fs_chat_messages WHERE id = $1", [pair.assistant]); + + if (userRes.rows.length === 0 || assistantRes.rows.length === 0) { + console.warn(`Could not find pair: ${pair.user} -> ${pair.assistant}`); + continue; + } + + const userMsg = userRes.rows[0]; + const assistantMsg = assistantRes.rows[0]; + + const rawToolResults = assistantMsg.data._rawToolResults || []; + const actionsRun = rawToolResults.map((tr: any) => { + let stdout = tr.result; + let ok = true; + let status = "success"; + + try { + const parsedRes = JSON.parse(tr.result); + if (parsedRes.ok === false || (parsedRes.errors && parsedRes.errors.length > 0) || parsedRes.error) { + ok = false; + status = "error"; + } + } catch (e) {} + + return { + tool_name: tr.name, + tool_call_id: tr.id || `tc-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`, + input_args: tr.args || {}, + execution_outcome: { + ok, + status, + stdout + } + }; + }); + + turns.push({ + turn_metadata: { + message_id: userMsg.id, + timestamp_utc: userMsg.created_at.toISOString(), + conversation_id: threadId, + audit_tag: pair.tag + }, + "1_user_interaction": { + prompt_text: userMsg.data.content + }, + "2_payload_sent_to_google": { + endpoint_url: "https://us-central1-aiplatform.googleapis.com/v1/projects/gen-lang-client-0980079410/locations/us-central1/publishers/google/models/gemini-3.1-pro-preview:generateContent", + system_instruction: "Configured via VIBN Coder System Prompt (coder.ts / buildSystemPrompt)", + contents: [ + { + role: "user", + parts: [{ text: userMsg.data.content }] + } + ] + }, + "3_payload_received_from_google": { + timestamp_utc: assistantMsg.created_at.toISOString(), + raw_candidates: { + content: { + role: "model", + parts: [{ thought: null, text: assistantMsg.data.content }] + } + } + }, + "4_platform_executions_and_telemetry": { + actions_run: actionsRun + }, + "5_git_version_control_diffs": [] + }); + } + + const dataset = { + dataset_metadata: { + title: "VIBN Production QA Telemetry Dataset (Hardening Validation)", + purpose: "Telemetry audit tracking the exact performance of Task-1, Task-3, and Task-4 fixes, plus isolating the database-related AI success-hallucination error.", + source_project: { + id: projectId, + name: "GetAcquired 2.0", + slug: "getacquired-2-0" + }, + compiled_at: new Date().toISOString(), + total_turns_audited: turns.length + }, + turns + }; + + // We date the file dynamically to track your QA cycles + const currentDate = new Date().toISOString().split('T')[0]; // YYYY-MM-DD + const outputPath = path.join(__dirname, `../../opus_telemetry_audit_dataset_${currentDate}.json`); + fs.writeFileSync(outputPath, JSON.stringify(dataset, null, 2)); + console.log(`\nšŸŽ‰ New dated QA telemetry dataset written to: ${outputPath}`); + + await client.end(); +} + +main().catch(console.error);