// LLM Performance Metrics Extension // Captures and displays LLM inference performance metrics import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; import { appendFileSync, mkdirSync } from "node:fs"; import { dirname, join } from "node:path"; // Re-export core functions from the shared metrics module import { calculateTurnMetrics, aggregatePromptMetrics, formatMetricsForDisplay, toLogEntry, type TurnMetrics, type PromptMetrics, type MetricLogEntry, } from "./llm-metrics-core.ts"; // ============================================================================ // Extension Event Handlers (imperative shell) // ============================================================================ // State tracking let promptStartMs: number | undefined; let currentTurnStartMs: number | undefined; let currentTurnId: string | undefined; let turnMetrics: TurnMetrics[] = []; let currentTurnFirstTokenMs: number | undefined; // Per-turn TTFT let provider: string | undefined; let model: string | undefined; export default function (pi: ExtensionAPI) { const logFile = join(process.cwd(), ".pi", "llm-metrics.log"); pi.on("agent_start", async (_event, ctx) => { if (!ctx.model) return; promptStartMs = Date.now(); turnMetrics = []; currentTurnFirstTokenMs = undefined; provider = ctx.model.provider; model = ctx.model.id; }); pi.on("turn_start", async (event, _ctx) => { currentTurnStartMs = Date.now(); currentTurnId = `turn-${event.turnIndex}`; currentTurnFirstTokenMs = undefined; // Reset TTFT for this turn }); pi.on("message_update", async (event, _ctx) => { // Capture per-turn TTFT on first token if (currentTurnFirstTokenMs === undefined && event.assistantMessageEvent?.type === "text_delta") { currentTurnFirstTokenMs = Date.now(); } }); pi.on("turn_end", async (event, _ctx) => { if (event.message.role !== "assistant") return; const inputTokens = event.message.usage?.input ?? 0; const outputTokens = event.message.usage?.output ?? 0; const durationMs = currentTurnStartMs ? Date.now() - currentTurnStartMs : 0; const ttftMs = currentTurnFirstTokenMs && currentTurnStartMs ? currentTurnFirstTokenMs - currentTurnStartMs : undefined; const turnMetric = calculateTurnMetrics({ turnId: currentTurnId!, inputTokens, outputTokens, durationMs, timeToFirstTokenMs: ttftMs, }); turnMetrics.push(turnMetric); }); pi.on("agent_end", async (_event, ctx) => { if (!provider || !model || promptStartMs === undefined) return; const promptMetrics = aggregatePromptMetrics({ provider, model, turnMetrics, }); // Display in TUI const display = formatMetricsForDisplay(promptMetrics); ctx.ui.notify(display, "info"); ctx.ui.setStatus("metrics", `📊 ${promptMetrics.combinedTokensPerSec.toFixed(1)} tok/s`); // Log to JSONL file const logEntry = toLogEntry(promptMetrics); mkdirSync(dirname(logFile), { recursive: true }); appendFileSync(logFile, JSON.stringify(logEntry) + "\n", "utf8"); // Reset state promptStartMs = undefined; turnMetrics = []; currentTurnFirstTokenMs = undefined; }); }