Willem van den Ende 98e18643c5 pi-performance: Make Time to first token more accurate.
Summary of changes:

 ┌──────┬──────────────────────────────────────────────────────────────────┬──────────┐
 │ Step │ Change                                                           │ Result   │
 ├──────┼──────────────────────────────────────────────────────────────────┼──────────┤
 │ 1    │ Removed duplicate llm-performance-metrics.test.ts                │ 14 tests │
 ├──────┼──────────────────────────────────────────────────────────────────┼──────────┤
 │ 2    │ Added rawTimestamps assertions to toLogEntry test                │ 14 tests │
 ├──────┼──────────────────────────────────────────────────────────────────┼──────────┤
 │ 3    │ Added rawTimestamps assertions to single-turn aggregate test     │ 14 tests │
 ├──────┼──────────────────────────────────────────────────────────────────┼──────────┤
 │ 4    │ Added rawTimestamps assertions to multi-turn aggregate test      │ 14 tests │
 ├──────┼──────────────────────────────────────────────────────────────────┼──────────┤
 │ 5    │ Added negative TTFT filtering test                               │ 15 tests │
 ├──────┼──────────────────────────────────────────────────────────────────┼──────────┤
 │ 6    │ Added "first turn missing TTFT, later turns have it" test        │ 16 tests │
 ├──────┼──────────────────────────────────────────────────────────────────┼──────────┤
 │ 7    │ Added sanity check tests (warn on >500 tok/s, no warn otherwise) │ 18 tests │
 └──────┴──────────────────────────────────────────────────────────────────┴──────────┘

This is what it looks like now when I run `pi`
 📊 Performance: llama.cpp/Qwen3.6-35B-A3B-MXFP4_MOE.gguf
   Prefill: 15,460 tokens @ 20104.0 tok/s
   Generation: 12,179 tokens @ 52.6 tok/s
   Combined: 27,639 tokens @ 118.9 tok/s (3.9m total)
   TTFT: 769ms
   Turns: 36
2026-04-28 10:52:00 +01:00

101 lines
3.2 KiB
TypeScript

// LLM Performance Metrics Extension
// Captures and displays LLM inference performance metrics
import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
import { appendFileSync, mkdirSync } from "node:fs";
import { dirname, join } from "node:path";
// Re-export core functions from the shared metrics module
import {
calculateTurnMetrics,
aggregatePromptMetrics,
formatMetricsForDisplay,
toLogEntry,
type TurnMetrics,
type PromptMetrics,
type MetricLogEntry,
} from "./llm-metrics-core.ts";
// ============================================================================
// Extension Event Handlers (imperative shell)
// ============================================================================
// State tracking
let promptStartMs: number | undefined;
let currentTurnStartMs: number | undefined;
let currentTurnId: string | undefined;
let turnMetrics: TurnMetrics[] = [];
let currentTurnFirstTokenMs: number | undefined; // Per-turn TTFT
let provider: string | undefined;
let model: string | undefined;
export default function (pi: ExtensionAPI) {
const logFile = join(process.cwd(), ".pi", "llm-metrics.log");
pi.on("agent_start", async (_event, ctx) => {
if (!ctx.model) return;
promptStartMs = Date.now();
turnMetrics = [];
currentTurnFirstTokenMs = undefined;
provider = ctx.model.provider;
model = ctx.model.id;
});
pi.on("turn_start", async (event, _ctx) => {
currentTurnStartMs = Date.now();
currentTurnId = `turn-${event.turnIndex}`;
currentTurnFirstTokenMs = undefined; // Reset TTFT for this turn
});
pi.on("message_update", async (event, _ctx) => {
// Capture per-turn TTFT on first token
if (currentTurnFirstTokenMs === undefined && event.assistantMessageEvent?.type === "text_delta") {
currentTurnFirstTokenMs = Date.now();
}
});
pi.on("turn_end", async (event, _ctx) => {
if (event.message.role !== "assistant") return;
const inputTokens = event.message.usage?.input ?? 0;
const outputTokens = event.message.usage?.output ?? 0;
const durationMs = currentTurnStartMs ? Date.now() - currentTurnStartMs : 0;
const ttftMs = currentTurnFirstTokenMs && currentTurnStartMs
? currentTurnFirstTokenMs - currentTurnStartMs
: undefined;
const turnMetric = calculateTurnMetrics({
turnId: currentTurnId!,
inputTokens,
outputTokens,
durationMs,
timeToFirstTokenMs: ttftMs,
});
turnMetrics.push(turnMetric);
});
pi.on("agent_end", async (_event, ctx) => {
if (!provider || !model || promptStartMs === undefined) return;
const promptMetrics = aggregatePromptMetrics({
provider,
model,
turnMetrics,
});
// Display in TUI
const display = formatMetricsForDisplay(promptMetrics);
ctx.ui.notify(display, "info");
ctx.ui.setStatus("metrics", `📊 ${promptMetrics.combinedTokensPerSec.toFixed(1)} tok/s`);
// Log to JSONL file
const logEntry = toLogEntry(promptMetrics);
mkdirSync(dirname(logFile), { recursive: true });
appendFileSync(logFile, JSON.stringify(logEntry) + "\n", "utf8");
// Reset state
promptStartMs = undefined;
turnMetrics = [];
currentTurnFirstTokenMs = undefined;
});
}