Summary of changes:
┌──────┬──────────────────────────────────────────────────────────────────┬──────────┐
│ Step │ Change │ Result │
├──────┼──────────────────────────────────────────────────────────────────┼──────────┤
│ 1 │ Removed duplicate llm-performance-metrics.test.ts │ 14 tests │
├──────┼──────────────────────────────────────────────────────────────────┼──────────┤
│ 2 │ Added rawTimestamps assertions to toLogEntry test │ 14 tests │
├──────┼──────────────────────────────────────────────────────────────────┼──────────┤
│ 3 │ Added rawTimestamps assertions to single-turn aggregate test │ 14 tests │
├──────┼──────────────────────────────────────────────────────────────────┼──────────┤
│ 4 │ Added rawTimestamps assertions to multi-turn aggregate test │ 14 tests │
├──────┼──────────────────────────────────────────────────────────────────┼──────────┤
│ 5 │ Added negative TTFT filtering test │ 15 tests │
├──────┼──────────────────────────────────────────────────────────────────┼──────────┤
│ 6 │ Added "first turn missing TTFT, later turns have it" test │ 16 tests │
├──────┼──────────────────────────────────────────────────────────────────┼──────────┤
│ 7 │ Added sanity check tests (warn on >500 tok/s, no warn otherwise) │ 18 tests │
└──────┴──────────────────────────────────────────────────────────────────┴──────────┘
This is what it looks like now when I run `pi`
📊 Performance: llama.cpp/Qwen3.6-35B-A3B-MXFP4_MOE.gguf
Prefill: 15,460 tokens @ 20104.0 tok/s
Generation: 12,179 tokens @ 52.6 tok/s
Combined: 27,639 tokens @ 118.9 tok/s (3.9m total)
TTFT: 769ms
Turns: 36
101 lines
3.2 KiB
TypeScript
101 lines
3.2 KiB
TypeScript
// LLM Performance Metrics Extension
|
|
// Captures and displays LLM inference performance metrics
|
|
|
|
import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
|
|
import { appendFileSync, mkdirSync } from "node:fs";
|
|
import { dirname, join } from "node:path";
|
|
|
|
// Re-export core functions from the shared metrics module
|
|
import {
|
|
calculateTurnMetrics,
|
|
aggregatePromptMetrics,
|
|
formatMetricsForDisplay,
|
|
toLogEntry,
|
|
type TurnMetrics,
|
|
type PromptMetrics,
|
|
type MetricLogEntry,
|
|
} from "./llm-metrics-core.ts";
|
|
|
|
// ============================================================================
|
|
// Extension Event Handlers (imperative shell)
|
|
// ============================================================================
|
|
|
|
// State tracking
|
|
let promptStartMs: number | undefined;
|
|
let currentTurnStartMs: number | undefined;
|
|
let currentTurnId: string | undefined;
|
|
let turnMetrics: TurnMetrics[] = [];
|
|
let currentTurnFirstTokenMs: number | undefined; // Per-turn TTFT
|
|
let provider: string | undefined;
|
|
let model: string | undefined;
|
|
|
|
export default function (pi: ExtensionAPI) {
|
|
const logFile = join(process.cwd(), ".pi", "llm-metrics.log");
|
|
|
|
pi.on("agent_start", async (_event, ctx) => {
|
|
if (!ctx.model) return;
|
|
promptStartMs = Date.now();
|
|
turnMetrics = [];
|
|
currentTurnFirstTokenMs = undefined;
|
|
provider = ctx.model.provider;
|
|
model = ctx.model.id;
|
|
});
|
|
|
|
pi.on("turn_start", async (event, _ctx) => {
|
|
currentTurnStartMs = Date.now();
|
|
currentTurnId = `turn-${event.turnIndex}`;
|
|
currentTurnFirstTokenMs = undefined; // Reset TTFT for this turn
|
|
});
|
|
|
|
pi.on("message_update", async (event, _ctx) => {
|
|
// Capture per-turn TTFT on first token
|
|
if (currentTurnFirstTokenMs === undefined && event.assistantMessageEvent?.type === "text_delta") {
|
|
currentTurnFirstTokenMs = Date.now();
|
|
}
|
|
});
|
|
|
|
pi.on("turn_end", async (event, _ctx) => {
|
|
if (event.message.role !== "assistant") return;
|
|
const inputTokens = event.message.usage?.input ?? 0;
|
|
const outputTokens = event.message.usage?.output ?? 0;
|
|
const durationMs = currentTurnStartMs ? Date.now() - currentTurnStartMs : 0;
|
|
const ttftMs = currentTurnFirstTokenMs && currentTurnStartMs
|
|
? currentTurnFirstTokenMs - currentTurnStartMs
|
|
: undefined;
|
|
|
|
const turnMetric = calculateTurnMetrics({
|
|
turnId: currentTurnId!,
|
|
inputTokens,
|
|
outputTokens,
|
|
durationMs,
|
|
timeToFirstTokenMs: ttftMs,
|
|
});
|
|
|
|
turnMetrics.push(turnMetric);
|
|
});
|
|
|
|
pi.on("agent_end", async (_event, ctx) => {
|
|
if (!provider || !model || promptStartMs === undefined) return;
|
|
|
|
const promptMetrics = aggregatePromptMetrics({
|
|
provider,
|
|
model,
|
|
turnMetrics,
|
|
});
|
|
|
|
// Display in TUI
|
|
const display = formatMetricsForDisplay(promptMetrics);
|
|
ctx.ui.notify(display, "info");
|
|
ctx.ui.setStatus("metrics", `📊 ${promptMetrics.combinedTokensPerSec.toFixed(1)} tok/s`);
|
|
|
|
// Log to JSONL file
|
|
const logEntry = toLogEntry(promptMetrics);
|
|
mkdirSync(dirname(logFile), { recursive: true });
|
|
appendFileSync(logFile, JSON.stringify(logEntry) + "\n", "utf8");
|
|
|
|
// Reset state
|
|
promptStartMs = undefined;
|
|
turnMetrics = [];
|
|
currentTurnFirstTokenMs = undefined;
|
|
});
|
|
} |