chore(telemetry): optimize state-based loop stall detector by tracking tool input signatures and clean up unused helper functions
This commit is contained in:
@@ -861,7 +861,7 @@ export async function POST(request: Request) {
|
||||
isConversational(message.trim());
|
||||
|
||||
let lastVerifySig: string | null = null;
|
||||
let fileHashes = new Map<string, string>();
|
||||
let lastRoundToolSig: string | null = null;
|
||||
let stallRounds = 0;
|
||||
|
||||
try {
|
||||
@@ -1069,12 +1069,13 @@ export async function POST(request: Request) {
|
||||
// 1. Compute verify signature
|
||||
const verifySig = getRoundVerifySignature(currentRoundResults);
|
||||
|
||||
// 2. Check for stall/progress
|
||||
const { progressed, nextHashes } = checkRoundProgress(
|
||||
currentRoundResults,
|
||||
fileHashes,
|
||||
);
|
||||
fileHashes = nextHashes;
|
||||
// 2. Check for stall/progress by comparing tool call signatures (names + inputs)
|
||||
const currentRoundToolSig = resp.toolCalls
|
||||
.map((tc) => `${tc.name}:${JSON.stringify(tc.args || {})}`)
|
||||
.sort()
|
||||
.join(";;");
|
||||
|
||||
const progressed = !lastVerifySig || verifySig !== lastVerifySig;
|
||||
|
||||
if (
|
||||
verifySig &&
|
||||
@@ -1085,7 +1086,7 @@ export async function POST(request: Request) {
|
||||
loopBreakReason = `Blocked on persistent error: ${verifySig.split(";;")[0]}`;
|
||||
}
|
||||
|
||||
if (!progressed) {
|
||||
if (lastRoundToolSig && currentRoundToolSig === lastRoundToolSig) {
|
||||
stallRounds++;
|
||||
} else {
|
||||
stallRounds = 0;
|
||||
@@ -1093,10 +1094,11 @@ export async function POST(request: Request) {
|
||||
|
||||
if (stallRounds >= 2) {
|
||||
loopBreakReason =
|
||||
"Stalled (No state changes or progress made for 2 rounds)";
|
||||
"Stalled (Repeated the exact same tool calls twice without advancing)";
|
||||
}
|
||||
|
||||
lastVerifySig = verifySig;
|
||||
lastRoundToolSig = currentRoundToolSig;
|
||||
|
||||
if (loopBreakReason) break;
|
||||
}
|
||||
@@ -1518,48 +1520,3 @@ function getRoundVerifySignature(roundResults: any[]): string | null {
|
||||
if (errors.length === 0) return null;
|
||||
return errors.sort().join(";;");
|
||||
}
|
||||
|
||||
function checkRoundProgress(
|
||||
roundResults: any[],
|
||||
lastHashes: Map<string, string>,
|
||||
): { progressed: boolean; nextHashes: Map<string, string> } {
|
||||
let progressed = false;
|
||||
const nextHashes = new Map(lastHashes);
|
||||
|
||||
for (const tr of roundResults) {
|
||||
if (!tr.content) continue;
|
||||
try {
|
||||
const parsed = JSON.parse(tr.content);
|
||||
const result = parsed.result || parsed;
|
||||
|
||||
// If a file write or edit succeeded, check if the sha256 is new or changed
|
||||
if (result.ok && result.sha256 && result.path) {
|
||||
const lastHash = lastHashes.get(result.path);
|
||||
if (lastHash !== result.sha256) {
|
||||
progressed = true;
|
||||
nextHashes.set(result.path, result.sha256);
|
||||
}
|
||||
}
|
||||
|
||||
// If any other action completed with ok: true (excluding read-only lookup tools)
|
||||
if (
|
||||
result.ok &&
|
||||
![
|
||||
"fs_read",
|
||||
"fs_list",
|
||||
"fs_tree",
|
||||
"fs_glob",
|
||||
"fs_grep",
|
||||
"dev_server_list",
|
||||
"browser_console",
|
||||
].includes(tr.toolName)
|
||||
) {
|
||||
progressed = true;
|
||||
}
|
||||
} catch (e) {
|
||||
// skip
|
||||
}
|
||||
}
|
||||
|
||||
return { progressed, nextHashes };
|
||||
}
|
||||
|
||||
143
vibn-frontend/scripts/generate-ajay-audit-for-opus.ts
Normal file
143
vibn-frontend/scripts/generate-ajay-audit-for-opus.ts
Normal file
@@ -0,0 +1,143 @@
|
||||
import { Client } from 'pg';
|
||||
import * as dotenv from 'dotenv';
|
||||
import * as path from 'path';
|
||||
import * as fs from 'fs';
|
||||
|
||||
// Load env variables
|
||||
dotenv.config({ path: path.join(__dirname, '../.env.local') });
|
||||
|
||||
const connectionString = process.env.DATABASE_URL;
|
||||
|
||||
if (!connectionString) {
|
||||
console.error("DATABASE_URL is not set in .env.local");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const client = new Client({ connectionString });
|
||||
await client.connect();
|
||||
|
||||
console.log("Connected to PostgreSQL DB...");
|
||||
|
||||
const projectId = '013f032c-ee82-42e5-9a89-b396c982bbf5';
|
||||
const threadId = '70983a8b-ec26-4241-91c2-d9a4a9ead973';
|
||||
|
||||
// 1. Fetch project info
|
||||
const projectRes = await client.query(
|
||||
"SELECT id, slug, data FROM fs_projects WHERE id = $1",
|
||||
[projectId]
|
||||
);
|
||||
if (projectRes.rows.length === 0) {
|
||||
console.error(`Project ${projectId} not found.`);
|
||||
await client.end();
|
||||
process.exit(1);
|
||||
}
|
||||
const project = projectRes.rows[0];
|
||||
|
||||
// 2. Fetch thread messages
|
||||
const messagesRes = await client.query(
|
||||
"SELECT id, created_at, data FROM fs_chat_messages WHERE thread_id = $1 ORDER BY created_at ASC",
|
||||
[threadId]
|
||||
);
|
||||
const messages = messagesRes.rows;
|
||||
console.log(`Fetched ${messages.length} messages for Ajay's thread ${threadId}`);
|
||||
|
||||
const turns = [];
|
||||
let userMsg = null;
|
||||
|
||||
for (const msg of messages) {
|
||||
const role = msg.data.role;
|
||||
if (role === 'user') {
|
||||
userMsg = msg;
|
||||
} else if (role === 'assistant' || role === 'model') {
|
||||
if (userMsg) {
|
||||
const rawToolResults = msg.data._rawToolResults || [];
|
||||
const actionsRun = rawToolResults.map((tr: any) => {
|
||||
let stdout = tr.result;
|
||||
let ok = true;
|
||||
let status = "success";
|
||||
|
||||
try {
|
||||
const parsedRes = JSON.parse(tr.result);
|
||||
if (parsedRes.ok === false || parsedRes.error || (parsedRes.errors && parsedRes.errors.length > 0)) {
|
||||
ok = false;
|
||||
status = "error";
|
||||
}
|
||||
} catch (e) {}
|
||||
|
||||
return {
|
||||
tool_name: tr.name,
|
||||
tool_call_id: tr.id || `tc-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`,
|
||||
input_args: tr.args || {},
|
||||
execution_outcome: {
|
||||
ok,
|
||||
status,
|
||||
stdout
|
||||
}
|
||||
};
|
||||
});
|
||||
|
||||
turns.push({
|
||||
turn_metadata: {
|
||||
message_id: userMsg.id,
|
||||
timestamp_utc: userMsg.created_at.toISOString(),
|
||||
conversation_id: threadId,
|
||||
audit_tag: msg.data.content.includes("Unauthorized") ? 'FAIL: Workspace Session Auth Blockout' : 'WORK: Conversational Planning'
|
||||
},
|
||||
"1_user_interaction": {
|
||||
prompt_text: userMsg.data.content
|
||||
},
|
||||
"2_payload_sent_to_google": {
|
||||
endpoint_url: "https://us-central1-aiplatform.googleapis.com/v1/projects/gen-lang-client-0980079410/locations/us-central1/publishers/google/models/gemini-3.1-pro-preview:generateContent",
|
||||
system_instruction: "Configured via VIBN Coder System Prompt (coder.ts / buildSystemPrompt)",
|
||||
contents: [
|
||||
{
|
||||
role: "user",
|
||||
parts: [{ text: userMsg.data.content }]
|
||||
}
|
||||
]
|
||||
},
|
||||
"3_payload_received_from_google": {
|
||||
timestamp_utc: msg.created_at.toISOString(),
|
||||
raw_candidates: {
|
||||
content: {
|
||||
role: "model",
|
||||
parts: [{ thought: null, text: msg.data.content }]
|
||||
}
|
||||
}
|
||||
},
|
||||
"4_platform_executions_and_telemetry": {
|
||||
actions_run: actionsRun
|
||||
},
|
||||
"5_git_version_control_diffs": []
|
||||
});
|
||||
|
||||
userMsg = null; // reset
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const dataset = {
|
||||
dataset_metadata: {
|
||||
title: "VIBN Telemetry Dataset — ThathaPaati Workspace Lockout",
|
||||
purpose: "Telemetry report capturing the exact server-side tool execution results and 401 Unauthorized lockout loop experienced by Ajay Sridharan.",
|
||||
source_project: {
|
||||
id: projectId,
|
||||
name: project.data.name || "ThathaPaati",
|
||||
slug: project.slug
|
||||
},
|
||||
compiled_at: new Date().toISOString(),
|
||||
total_turns_audited: turns.length
|
||||
},
|
||||
turns
|
||||
};
|
||||
|
||||
const currentDate = new Date().toISOString().split('T')[0]; // YYYY-MM-DD
|
||||
const outputPath = path.join(__dirname, `../../opus_telemetry_audit_dataset_ajay_${currentDate}.json`);
|
||||
fs.writeFileSync(outputPath, JSON.stringify(dataset, null, 2));
|
||||
console.log(`\n🎉 Ajay's QA dataset successfully written to: ${outputPath}`);
|
||||
|
||||
await client.end();
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
147
vibn-frontend/scripts/generate-qa-audit-for-opus.ts
Normal file
147
vibn-frontend/scripts/generate-qa-audit-for-opus.ts
Normal file
@@ -0,0 +1,147 @@
|
||||
import { Client } from 'pg';
|
||||
import * as dotenv from 'dotenv';
|
||||
import * as path from 'path';
|
||||
import * as fs from 'fs';
|
||||
|
||||
// Load env variables
|
||||
dotenv.config({ path: path.join(__dirname, '../.env.local') });
|
||||
|
||||
const connectionString = process.env.DATABASE_URL;
|
||||
|
||||
if (!connectionString) {
|
||||
console.error("DATABASE_URL is not set in .env.local");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Curation target message IDs from the live tests:
|
||||
const curatedMessagePairs = [
|
||||
// 1. Success - Hardened fs_edit (Navbar title change)
|
||||
{
|
||||
user: 'acc489f6-808c-4a1c-9686-7f6145f2fa48',
|
||||
assistant: '0c71e628-f6f3-4bce-9ccf-7a1631e7537a',
|
||||
tag: 'WORK: Hardened fs_edit (Navbar Title Change)'
|
||||
},
|
||||
// 2. Success - Port-reaper dev server restart
|
||||
{
|
||||
user: 'b304d55c-7d8a-4e1a-9823-58d521b1796d',
|
||||
assistant: 'a244bc69-b0e7-4977-89fb-d39949a7e7ed',
|
||||
tag: 'WORK: Socket-Inode Port Reaper dev_server_start'
|
||||
},
|
||||
// 3. Failure - Prisma error and AI Success Hallucination
|
||||
{
|
||||
user: '6c98dd1f-ba1a-4cd8-a654-8e6da4eecace',
|
||||
assistant: '46acdaad-3ea7-4fd1-9fde-6aff1d33e896',
|
||||
tag: 'FAIL: Prisma DB Error and AI Success Hallucination'
|
||||
}
|
||||
];
|
||||
|
||||
async function main() {
|
||||
const client = new Client({ connectionString });
|
||||
await client.connect();
|
||||
|
||||
console.log("Connected to PostgreSQL DB...");
|
||||
|
||||
const projectId = 'be169fe8-d381-422b-8e9c-d2e513a8f902';
|
||||
const threadId = 'a584c700-7ae2-4fad-a906-b8daf80fcace';
|
||||
|
||||
const turns = [];
|
||||
|
||||
for (const pair of curatedMessagePairs) {
|
||||
const userRes = await client.query("SELECT id, created_at, data FROM fs_chat_messages WHERE id = $1", [pair.user]);
|
||||
const assistantRes = await client.query("SELECT id, created_at, data FROM fs_chat_messages WHERE id = $1", [pair.assistant]);
|
||||
|
||||
if (userRes.rows.length === 0 || assistantRes.rows.length === 0) {
|
||||
console.warn(`Could not find pair: ${pair.user} -> ${pair.assistant}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
const userMsg = userRes.rows[0];
|
||||
const assistantMsg = assistantRes.rows[0];
|
||||
|
||||
const rawToolResults = assistantMsg.data._rawToolResults || [];
|
||||
const actionsRun = rawToolResults.map((tr: any) => {
|
||||
let stdout = tr.result;
|
||||
let ok = true;
|
||||
let status = "success";
|
||||
|
||||
try {
|
||||
const parsedRes = JSON.parse(tr.result);
|
||||
if (parsedRes.ok === false || (parsedRes.errors && parsedRes.errors.length > 0) || parsedRes.error) {
|
||||
ok = false;
|
||||
status = "error";
|
||||
}
|
||||
} catch (e) {}
|
||||
|
||||
return {
|
||||
tool_name: tr.name,
|
||||
tool_call_id: tr.id || `tc-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`,
|
||||
input_args: tr.args || {},
|
||||
execution_outcome: {
|
||||
ok,
|
||||
status,
|
||||
stdout
|
||||
}
|
||||
};
|
||||
});
|
||||
|
||||
turns.push({
|
||||
turn_metadata: {
|
||||
message_id: userMsg.id,
|
||||
timestamp_utc: userMsg.created_at.toISOString(),
|
||||
conversation_id: threadId,
|
||||
audit_tag: pair.tag
|
||||
},
|
||||
"1_user_interaction": {
|
||||
prompt_text: userMsg.data.content
|
||||
},
|
||||
"2_payload_sent_to_google": {
|
||||
endpoint_url: "https://us-central1-aiplatform.googleapis.com/v1/projects/gen-lang-client-0980079410/locations/us-central1/publishers/google/models/gemini-3.1-pro-preview:generateContent",
|
||||
system_instruction: "Configured via VIBN Coder System Prompt (coder.ts / buildSystemPrompt)",
|
||||
contents: [
|
||||
{
|
||||
role: "user",
|
||||
parts: [{ text: userMsg.data.content }]
|
||||
}
|
||||
]
|
||||
},
|
||||
"3_payload_received_from_google": {
|
||||
timestamp_utc: assistantMsg.created_at.toISOString(),
|
||||
raw_candidates: {
|
||||
content: {
|
||||
role: "model",
|
||||
parts: [{ thought: null, text: assistantMsg.data.content }]
|
||||
}
|
||||
}
|
||||
},
|
||||
"4_platform_executions_and_telemetry": {
|
||||
actions_run: actionsRun
|
||||
},
|
||||
"5_git_version_control_diffs": []
|
||||
});
|
||||
}
|
||||
|
||||
const dataset = {
|
||||
dataset_metadata: {
|
||||
title: "VIBN Production QA Telemetry Dataset (Hardening Validation)",
|
||||
purpose: "Telemetry audit tracking the exact performance of Task-1, Task-3, and Task-4 fixes, plus isolating the database-related AI success-hallucination error.",
|
||||
source_project: {
|
||||
id: projectId,
|
||||
name: "GetAcquired 2.0",
|
||||
slug: "getacquired-2-0"
|
||||
},
|
||||
compiled_at: new Date().toISOString(),
|
||||
total_turns_audited: turns.length
|
||||
},
|
||||
turns
|
||||
};
|
||||
|
||||
// We date the file dynamically to track your QA cycles
|
||||
const currentDate = new Date().toISOString().split('T')[0]; // YYYY-MM-DD
|
||||
const outputPath = path.join(__dirname, `../../opus_telemetry_audit_dataset_${currentDate}.json`);
|
||||
fs.writeFileSync(outputPath, JSON.stringify(dataset, null, 2));
|
||||
console.log(`\n🎉 New dated QA telemetry dataset written to: ${outputPath}`);
|
||||
|
||||
await client.end();
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
Reference in New Issue
Block a user