monotonic-pi-extensions/packages/pi-llm-performance/src/llm-performance-metrics.ts

// LLM Performance Metrics Extension
// Captures and displays LLM inference performance metrics

import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
import { appendFileSync, mkdirSync } from "node:fs";
import { dirname, join } from "node:path";

// Re-export core functions from the shared metrics module
import {
  calculateTurnMetrics,
  aggregatePromptMetrics,
  formatMetricsForDisplay,
  toLogEntry,
  type TurnMetrics,
  type PromptMetrics,
  type MetricLogEntry,
} from "./llm-metrics-core.ts";

// ============================================================================
// Extension Event Handlers (imperative shell)
// ============================================================================

// State tracking
let promptStartMs: number | undefined;
let currentTurnStartMs: number | undefined;
let currentTurnId: string | undefined;
let turnMetrics: TurnMetrics[] = [];
let currentTurnFirstTokenMs: number | undefined; // Per-turn TTFT
let provider: string | undefined;
let model: string | undefined;

export default function (pi: ExtensionAPI) {
  const logFile = join(process.cwd(), ".pi", "llm-metrics.log");

  pi.on("agent_start", async (_event, ctx) => {
    if (!ctx.model) return;
    promptStartMs = Date.now();
    turnMetrics = [];
    currentTurnFirstTokenMs = undefined;
    provider = ctx.model.provider;
    model = ctx.model.id;
  });

  pi.on("turn_start", async (event, _ctx) => {
    currentTurnStartMs = Date.now();
    currentTurnId = `turn-${event.turnIndex}`;
    currentTurnFirstTokenMs = undefined; // Reset TTFT for this turn
  });

  pi.on("message_update", async (event, _ctx) => {
    // Capture per-turn TTFT on first token
    if (currentTurnFirstTokenMs === undefined && event.assistantMessageEvent?.type === "text_delta") {
      currentTurnFirstTokenMs = Date.now();
    }
  });

  pi.on("turn_end", async (event, _ctx) => {
    if (event.message.role !== "assistant") return;
    const inputTokens = event.message.usage?.input ?? 0;
    const outputTokens = event.message.usage?.output ?? 0;
    const durationMs = currentTurnStartMs ? Date.now() - currentTurnStartMs : 0;
    const ttftMs = currentTurnFirstTokenMs && currentTurnStartMs
      ? currentTurnFirstTokenMs - currentTurnStartMs
      : undefined;

    const turnMetric = calculateTurnMetrics({
      turnId: currentTurnId!,
      inputTokens,
      outputTokens,
      durationMs,
      timeToFirstTokenMs: ttftMs,
    });

    turnMetrics.push(turnMetric);
  });

  pi.on("agent_end", async (_event, ctx) => {
    if (!provider || !model || promptStartMs === undefined) return;

    const promptMetrics = aggregatePromptMetrics({
      provider,
      model,
      turnMetrics,
    });

    // Display in TUI
    const display = formatMetricsForDisplay(promptMetrics);
    ctx.ui.notify(display, "info");
    ctx.ui.setStatus("metrics", `📊 ${promptMetrics.combinedTokensPerSec.toFixed(1)} tok/s`);

    // Log to JSONL file
    const logEntry = toLogEntry(promptMetrics);
    mkdirSync(dirname(logFile), { recursive: true });
    appendFileSync(logFile, JSON.stringify(logEntry) + "\n", "utf8");

    // Reset state
    promptStartMs = undefined;
    turnMetrics = [];
    currentTurnFirstTokenMs = undefined;
  });
}