feat(telemetry): emit per-turn governor summary (stop_reason, rounds, tool_results) for orchestration diagnostics
This commit is contained in:
@@ -109,6 +109,7 @@ import { buildDesignKitPromptSection } from "@/lib/design-kits/for-ai";
|
|||||||
import { buildCodebaseSummary } from "@/lib/ai/codebase-summary";
|
import { buildCodebaseSummary } from "@/lib/ai/codebase-summary";
|
||||||
import { execInDevContainer } from "@/lib/dev-container";
|
import { execInDevContainer } from "@/lib/dev-container";
|
||||||
import type { ChatMessage, ToolCall } from "@/lib/ai/gemini-chat";
|
import type { ChatMessage, ToolCall } from "@/lib/ai/gemini-chat";
|
||||||
|
import { logTurnSummary } from "@/lib/ai/telemetry-db";
|
||||||
|
|
||||||
// C-01: Raised to 150. Provides a virtually unlimited, elite engineering runway
|
// C-01: Raised to 150. Provides a virtually unlimited, elite engineering runway
|
||||||
// for complex custom application building, while the State-Based
|
// for complex custom application building, while the State-Based
|
||||||
@@ -1456,6 +1457,46 @@ export async function POST(request: Request) {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ---- Orchestration telemetry: one turn_summary per user turn ----
|
||||||
|
// Records WHY the agent loop ended so we can diagnose and tune the
|
||||||
|
// governor (premature stops, loop cut-offs). Fire-and-forget.
|
||||||
|
try {
|
||||||
|
const stopReason = aborted
|
||||||
|
? "user_aborted"
|
||||||
|
: loopBreakReason
|
||||||
|
? `loop_detected:${String(loopBreakReason).slice(0, 160)}`
|
||||||
|
: maxToolRounds > 0 && round >= maxToolRounds
|
||||||
|
? "round_cap"
|
||||||
|
: lastToolResultsHadFailure(messages)
|
||||||
|
? "tool_failure"
|
||||||
|
: roundsSinceText >= 30
|
||||||
|
? "silent_rounds"
|
||||||
|
: assistantToolCalls.length === 0 &&
|
||||||
|
assistantText.trim().length === 0
|
||||||
|
? "empty_no_tools"
|
||||||
|
: "completed";
|
||||||
|
|
||||||
|
logTurnSummary({
|
||||||
|
projectId: activeProject?.id,
|
||||||
|
sessionId: thread_id,
|
||||||
|
userMessage: message,
|
||||||
|
model: process.env.VIBN_CHAT_MODEL || "gemini-3.1-pro-preview",
|
||||||
|
response: {
|
||||||
|
text: assistantText,
|
||||||
|
thoughts: "",
|
||||||
|
toolCalls: assistantToolCalls,
|
||||||
|
},
|
||||||
|
toolResults: finalMsg._rawToolResults ?? [],
|
||||||
|
stopReason,
|
||||||
|
rounds: round,
|
||||||
|
toolCallCount: assistantToolCalls.length,
|
||||||
|
turnIntent,
|
||||||
|
chatMode,
|
||||||
|
});
|
||||||
|
} catch {
|
||||||
|
// never let telemetry interfere with the turn
|
||||||
|
}
|
||||||
|
|
||||||
await query(
|
await query(
|
||||||
`INSERT INTO fs_chat_messages (thread_id, user_id, data) VALUES ($1, $2, $3)`,
|
`INSERT INTO fs_chat_messages (thread_id, user_id, data) VALUES ($1, $2, $3)`,
|
||||||
[thread_id, email, JSON.stringify(finalMsg)],
|
[thread_id, email, JSON.stringify(finalMsg)],
|
||||||
|
|||||||
@@ -16,31 +16,51 @@ export interface TelemetryPayload {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fire and forget function to send telemetry to our Coolify Microservice
|
// Turn-level governor summary: emitted once per user turn so we can
|
||||||
export function logTrainingTelemetryDb(data: TelemetryPayload) {
|
// diagnose orchestration problems (premature stops, loop cut-offs).
|
||||||
|
// `stopReason` is the key field — it records WHY the agent loop ended.
|
||||||
|
export interface TurnSummaryPayload {
|
||||||
|
recordType: "turn_summary";
|
||||||
|
projectId?: string;
|
||||||
|
sessionId?: string;
|
||||||
|
userMessage?: string;
|
||||||
|
model?: string;
|
||||||
|
response?: { text: string; thoughts: string; toolCalls: any[] };
|
||||||
|
toolResults?: any[];
|
||||||
|
stopReason?: string;
|
||||||
|
rounds?: number;
|
||||||
|
toolCallCount?: number;
|
||||||
|
turnIntent?: string;
|
||||||
|
chatMode?: string;
|
||||||
|
metrics?: { durationMs: number };
|
||||||
|
}
|
||||||
|
|
||||||
|
function postTelemetry(body: unknown) {
|
||||||
setTimeout(async () => {
|
setTimeout(async () => {
|
||||||
try {
|
try {
|
||||||
const telemetryUrl = process.env.TELEMETRY_SERVICE_URL;
|
const telemetryUrl = process.env.TELEMETRY_SERVICE_URL;
|
||||||
|
if (!telemetryUrl) return; // silently skip when unconfigured
|
||||||
|
|
||||||
if (!telemetryUrl) {
|
await fetch(`${telemetryUrl.replace(/\/$/, "")}/ingest`, {
|
||||||
console.warn(
|
|
||||||
"[Telemetry] TELEMETRY_SERVICE_URL is not set. Skipping log.",
|
|
||||||
);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
await fetch(`${telemetryUrl}/ingest`, {
|
|
||||||
method: "POST",
|
method: "POST",
|
||||||
headers: {
|
headers: { "Content-Type": "application/json" },
|
||||||
"Content-Type": "application/json",
|
body: JSON.stringify(body),
|
||||||
},
|
|
||||||
body: JSON.stringify(data),
|
|
||||||
});
|
});
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error(
|
console.error(
|
||||||
"[Telemetry] Failed to send training data to microservice:",
|
"[Telemetry] Failed to send data to microservice:",
|
||||||
error,
|
error instanceof Error ? error.message : String(error),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}, 0);
|
}, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Fire and forget: one row per LLM call (training data).
|
||||||
|
export function logTrainingTelemetryDb(data: TelemetryPayload) {
|
||||||
|
postTelemetry(data);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fire and forget: one row per user turn (orchestration diagnostics).
|
||||||
|
export function logTurnSummary(data: Omit<TurnSummaryPayload, "recordType">) {
|
||||||
|
postTelemetry({ recordType: "turn_summary", ...data });
|
||||||
|
}
|
||||||
|
|||||||
@@ -13,13 +13,25 @@
|
|||||||
CREATE TABLE IF NOT EXISTS agent_telemetry (
|
CREATE TABLE IF NOT EXISTS agent_telemetry (
|
||||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||||
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
-- 'llm_call' = one row per model call (training data)
|
||||||
|
-- 'turn_summary' = one row per user turn (orchestration diagnostics)
|
||||||
|
record_type TEXT NOT NULL DEFAULT 'llm_call',
|
||||||
project_id VARCHAR(255),
|
project_id VARCHAR(255),
|
||||||
|
session_id TEXT, -- chat thread id; groups all calls of a conversation
|
||||||
|
user_message TEXT, -- the user message that triggered the turn
|
||||||
model_used VARCHAR(255) NOT NULL,
|
model_used VARCHAR(255) NOT NULL,
|
||||||
system_prompt TEXT NOT NULL,
|
system_prompt TEXT NOT NULL,
|
||||||
chat_history JSONB NOT NULL,
|
chat_history JSONB NOT NULL,
|
||||||
response_text TEXT,
|
response_text TEXT,
|
||||||
response_thoughts TEXT,
|
response_thoughts TEXT,
|
||||||
tool_calls JSONB,
|
tool_calls JSONB,
|
||||||
|
tool_results JSONB, -- redacted tool outputs (turn_summary)
|
||||||
|
-- Orchestration / governor diagnostics (turn_summary rows)
|
||||||
|
stop_reason TEXT, -- completed | round_cap | loop_detected:* | tool_failure | silent_rounds | user_aborted | empty_no_tools
|
||||||
|
rounds INTEGER, -- how many tool-loop rounds the turn ran
|
||||||
|
tool_call_count INTEGER, -- total tool calls executed in the turn
|
||||||
|
turn_intent TEXT, -- conversational vs action intent classification
|
||||||
|
chat_mode TEXT, -- vibe | collaborate | delegate
|
||||||
prompt_tokens INTEGER,
|
prompt_tokens INTEGER,
|
||||||
completion_tokens INTEGER,
|
completion_tokens INTEGER,
|
||||||
total_tokens INTEGER,
|
total_tokens INTEGER,
|
||||||
@@ -30,3 +42,26 @@ CREATE TABLE IF NOT EXISTS agent_telemetry (
|
|||||||
CREATE INDEX IF NOT EXISTS idx_agent_telemetry_project ON agent_telemetry(project_id);
|
CREATE INDEX IF NOT EXISTS idx_agent_telemetry_project ON agent_telemetry(project_id);
|
||||||
-- Index for chronological sorting
|
-- Index for chronological sorting
|
||||||
CREATE INDEX IF NOT EXISTS idx_agent_telemetry_created_at ON agent_telemetry(created_at DESC);
|
CREATE INDEX IF NOT EXISTS idx_agent_telemetry_created_at ON agent_telemetry(created_at DESC);
|
||||||
|
-- Diagnostic indexes
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_agent_telemetry_session ON agent_telemetry(session_id);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_agent_telemetry_stop ON agent_telemetry(stop_reason);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_agent_telemetry_record_type ON agent_telemetry(record_type);
|
||||||
|
|
||||||
|
-- =====================================================================
|
||||||
|
-- Example diagnostic queries (the whole point of this instrumentation)
|
||||||
|
-- =====================================================================
|
||||||
|
--
|
||||||
|
-- Distribution of how turns end (find premature-stop problems):
|
||||||
|
-- SELECT stop_reason, COUNT(*), ROUND(AVG(rounds),1) AS avg_rounds
|
||||||
|
-- FROM agent_telemetry WHERE record_type='turn_summary'
|
||||||
|
-- GROUP BY stop_reason ORDER BY 2 DESC;
|
||||||
|
--
|
||||||
|
-- Turns the governor cut off early (rounds < 3 but tools were running):
|
||||||
|
-- SELECT created_at, project_id, user_message, stop_reason, rounds, tool_call_count
|
||||||
|
-- FROM agent_telemetry
|
||||||
|
-- WHERE record_type='turn_summary' AND stop_reason <> 'completed' AND rounds < 3
|
||||||
|
-- ORDER BY created_at DESC;
|
||||||
|
--
|
||||||
|
-- Replay the full trajectory of one turn:
|
||||||
|
-- SELECT created_at, record_type, response_text, tool_calls
|
||||||
|
-- FROM agent_telemetry WHERE session_id = '<thread_id>' ORDER BY created_at ASC;
|
||||||
|
|||||||
Reference in New Issue
Block a user