move pi-llm-performance to this repo

This commit is contained in:
Willem van den Ende 2026-04-28 10:00:45 +01:00
parent c62eb432bf
commit 0cf13ed54e
7 changed files with 1162 additions and 0 deletions

View File

@ -0,0 +1,30 @@
# pi-llm-performance
LLM performance metrics extension
## How to install
Add to your global pi settings:
```bash
pi install /Users/willem/dev/spikes/llm/custom-coding-agent/packages/pi-llm-performance
```
Or add manually to `~/.pi/agent/settings.json`:
```
"packages": [
"/Users/willem/dev/spikes/llm/custom-coding-agent/packages/pi-llm-performance",
...
]
```
Then reload pi:
```bash
/reload
```
## License
MIT

18
packages/pi-llm-performance/deno.lock generated Normal file
View File

@ -0,0 +1,18 @@
{
"version": "5",
"specifiers": {
"jsr:@std/assert@*": "1.0.19",
"jsr:@std/internal@^1.0.12": "1.0.12"
},
"jsr": {
"@std/assert@1.0.19": {
"integrity": "eaada96ee120cb980bc47e040f82814d786fe8162ecc53c91d8df60b8755991e",
"dependencies": [
"jsr:@std/internal"
]
},
"@std/internal@1.0.12": {
"integrity": "972a634fd5bc34b242024402972cd5143eac68d8dffaca5eaa4dba30ce17b027"
}
}
}

View File

@ -0,0 +1,17 @@
{
"name": "pi-llm-performance",
"version": "0.1.0",
"description": "LLM performance metrics extension",
"type": "module",
"exports": {
".": "./src/llm-performance-metrics.ts"
},
"keywords": ["pi-package"],
"pi": {
"extensions": ["src/llm-performance-metrics.ts"]
},
"peerDependencies": {
"@mariozechner/pi-coding-agent": "*"
},
"license": "MIT"
}

View File

@ -0,0 +1,398 @@
import {
calculateTurnMetrics,
aggregatePromptMetrics,
formatMetricsForDisplay,
toLogEntry,
type TurnMetrics,
type PromptMetrics,
} from "./llm-metrics-core.ts";
import { assertEquals, assertGreaterOrEqual, assertLessOrEqual } from "jsr:@std/assert";
Deno.test("calculateTurnMetrics - creates turn metrics object", () => {
const result = calculateTurnMetrics({
turnId: "turn-1",
inputTokens: 100,
outputTokens: 50,
durationMs: 2000,
timeToFirstTokenMs: 500,
});
assertEquals(result.turnId, "turn-1");
assertEquals(result.inputTokens, 100);
assertEquals(result.outputTokens, 50);
assertEquals(result.durationMs, 2000);
assertEquals(result.timeToFirstTokenMs, 500);
});
Deno.test("calculateTurnMetrics - handles missing timeToFirstToken", () => {
const result = calculateTurnMetrics({
turnId: "turn-1",
inputTokens: 100,
outputTokens: 50,
durationMs: 2000,
});
assertEquals(result.timeToFirstTokenMs, undefined);
});
Deno.test("aggregatePromptMetrics - aggregates single turn", () => {
const turnMetrics: TurnMetrics[] = [
{
turnId: "turn-1",
inputTokens: 1000,
outputTokens: 200,
durationMs: 5000,
timeToFirstTokenMs: 800,
},
];
const result = aggregatePromptMetrics({
provider: "anthropic",
model: "claude-sonnet-4",
turnMetrics,
});
assertEquals(result.provider, "anthropic");
assertEquals(result.model, "claude-sonnet-4");
assertEquals(result.turnCount, 1);
assertEquals(result.inputTokens, 1000);
assertEquals(result.outputTokens, 200);
assertEquals(result.totalTokens, 1200);
assertEquals(result.totalDurationMs, 5000);
assertEquals(result.timeToFirstTokenMs, 800);
// Tokens per second calculations
// prefill: 1000 input tokens / 0.8s TTFT = 1250 tok/s
assertEquals(result.prefillTokensPerSec, 1250);
// generation: 200 output tokens / 4.2s (5s - 0.8s) = 47.62 tok/s
assertGreaterOrEqual(result.generationTokensPerSec, 47.6);
assertLessOrEqual(result.generationTokensPerSec, 47.7);
// combined: 1200 total tokens / 5s = 240 tok/s
assertEquals(result.combinedTokensPerSec, 240);
});
Deno.test("aggregatePromptMetrics - aggregates multiple turns", () => {
const turnMetrics: TurnMetrics[] = [
{
turnId: "turn-1",
inputTokens: 1000,
outputTokens: 200,
durationMs: 3000,
timeToFirstTokenMs: 800,
},
{
turnId: "turn-2",
inputTokens: 500,
outputTokens: 150,
durationMs: 2000,
},
{
turnId: "turn-3",
inputTokens: 300,
outputTokens: 100,
durationMs: 1500,
},
];
const result = aggregatePromptMetrics({
provider: "openai",
model: "gpt-4o",
turnMetrics,
});
assertEquals(result.turnCount, 3);
assertEquals(result.inputTokens, 1800); // 1000 + 500 + 300
assertEquals(result.outputTokens, 450); // 200 + 150 + 100
assertEquals(result.totalTokens, 2250);
assertEquals(result.totalDurationMs, 6500); // 3000 + 2000 + 1500
assertEquals(result.timeToFirstTokenMs, 800); // From first turn only
// Tokens per second: prefill uses TTFT (0.8s), generation uses (total - TTFT) = 5.7s
// prefill: 1800 / 0.8 = 2250 tok/s
assertEquals(result.prefillTokensPerSec, 2250);
// generation: 450 / 5.7 = 78.95 tok/s
assertGreaterOrEqual(result.generationTokensPerSec, 78.9);
assertLessOrEqual(result.generationTokensPerSec, 79.0);
// combined: 2250 / 6.5 = 346.15 tok/s
assertGreaterOrEqual(result.combinedTokensPerSec, 346.1);
assertLessOrEqual(result.combinedTokensPerSec, 346.2);
});
Deno.test("aggregatePromptMetrics - handles empty turn list", () => {
const result = aggregatePromptMetrics({
provider: "anthropic",
model: "claude-sonnet-4",
turnMetrics: [],
});
assertEquals(result.turnCount, 0);
assertEquals(result.inputTokens, 0);
assertEquals(result.outputTokens, 0);
assertEquals(result.totalTokens, 0);
assertEquals(result.prefillTokensPerSec, 0);
assertEquals(result.generationTokensPerSec, 0);
assertEquals(result.combinedTokensPerSec, 0);
assertEquals(result.totalDurationMs, 0);
assertEquals(result.timeToFirstTokenMs, undefined);
});
Deno.test("formatMetricsForDisplay - formats single turn metrics", () => {
const metrics: PromptMetrics = {
provider: "anthropic",
model: "claude-sonnet-4",
turnCount: 1,
inputTokens: 1250,
outputTokens: 342,
totalTokens: 1592,
prefillTokensPerSec: 482.1,
generationTokensPerSec: 18.3,
combinedTokensPerSec: 38.0,
totalDurationMs: 21600,
timeToFirstTokenMs: 850,
turns: [],
};
const display = formatMetricsForDisplay(metrics);
assertEquals(display.includes("anthropic/claude-sonnet-4"), true);
assertEquals(display.includes("1,250 tokens"), true);
assertEquals(display.includes("482.1 tok/s"), true);
assertEquals(display.includes("342 tokens"), true);
assertEquals(display.includes("18.3 tok/s"), true);
assertEquals(display.includes("1,592 tokens"), true);
assertEquals(display.includes("38.0 tok/s"), true);
assertEquals(display.includes("21.6s"), true);
assertEquals(display.includes("TTFT: 850ms"), true);
});
Deno.test("formatMetricsForDisplay - formats duration as minutes when over 60s", () => {
const metrics: PromptMetrics = {
provider: "openai",
model: "gpt-4o",
turnCount: 1,
inputTokens: 5000,
outputTokens: 1000,
totalTokens: 6000,
prefillTokensPerSec: 50,
generationTokensPerSec: 10,
combinedTokensPerSec: 60,
totalDurationMs: 120000, // 2 minutes
timeToFirstTokenMs: 1500,
turns: [],
};
const display = formatMetricsForDisplay(metrics);
assertEquals(display.includes("2.0m"), true);
});
Deno.test("formatMetricsForDisplay - omits turn count when single turn", () => {
const metrics: PromptMetrics = {
provider: "anthropic",
model: "claude-sonnet-4",
turnCount: 1,
inputTokens: 100,
outputTokens: 50,
totalTokens: 150,
prefillTokensPerSec: 20,
generationTokensPerSec: 10,
combinedTokensPerSec: 30,
totalDurationMs: 5000,
timeToFirstTokenMs: 500,
turns: [],
};
const display = formatMetricsForDisplay(metrics);
assertEquals(display.includes("Turns: 1"), false);
});
Deno.test("formatMetricsForDisplay - omits prefill/generation when TTFT is unavailable", () => {
const metrics: PromptMetrics = {
provider: "openai",
model: "gpt-4o",
turnCount: 1,
inputTokens: 1000,
outputTokens: 200,
totalTokens: 1200,
prefillTokensPerSec: 0,
generationTokensPerSec: 0,
combinedTokensPerSec: 240,
totalDurationMs: 5000,
timeToFirstTokenMs: undefined,
turns: [],
};
const display = formatMetricsForDisplay(metrics);
assertEquals(display.includes("Prefill:"), false);
assertEquals(display.includes("Generation:"), false);
assertEquals(display.includes("1,200 tokens"), true);
assertEquals(display.includes("240.0 tok/s"), true);
});
Deno.test("formatMetricsForDisplay - shows turn count when multiple turns", () => {
const metrics: PromptMetrics = {
provider: "anthropic",
model: "claude-sonnet-4",
turnCount: 3,
inputTokens: 100,
outputTokens: 50,
totalTokens: 150,
prefillTokensPerSec: 20,
generationTokensPerSec: 10,
combinedTokensPerSec: 30,
totalDurationMs: 5000,
timeToFirstTokenMs: 500,
turns: [],
};
const display = formatMetricsForDisplay(metrics);
assertEquals(display.includes("Turns: 3"), true);
});
Deno.test("toLogEntry - creates JSON-serializable log entry", () => {
const metrics: PromptMetrics = {
provider: "anthropic",
model: "claude-sonnet-4",
turnCount: 2,
inputTokens: 1250,
outputTokens: 342,
totalTokens: 1592,
prefillTokensPerSec: 482.12345,
generationTokensPerSec: 18.34567,
combinedTokensPerSec: 38.09876,
totalDurationMs: 21600,
timeToFirstTokenMs: 850,
turns: [],
};
const logEntry = toLogEntry(metrics);
assertEquals(logEntry.provider, "anthropic");
assertEquals(logEntry.model, "claude-sonnet-4");
assertEquals(logEntry.turnCount, 2);
assertEquals(logEntry.inputTokens, 1250);
assertEquals(logEntry.outputTokens, 342);
assertEquals(logEntry.totalTokens, 1592);
// Rounded to 2 decimal places
assertEquals(logEntry.prefillTokensPerSec, 482.12);
assertEquals(logEntry.generationTokensPerSec, 18.35);
assertEquals(logEntry.combinedTokensPerSec, 38.1);
assertEquals(logEntry.totalDurationMs, 21600);
assertEquals(logEntry.timeToFirstTokenMs, 850);
// Should have ISO timestamp
assertEquals(logEntry.timestamp.includes("T"), true);
assertEquals(logEntry.timestamp.includes("Z"), true);
// Should be JSON serializable
const json = JSON.stringify(logEntry);
assertEquals(json.length > 0, true);
const parsed = JSON.parse(json);
assertEquals(parsed.provider, "anthropic");
});
Deno.test("aggregatePromptMetrics - uses full duration when TTFT is undefined", () => {
const turnMetrics: TurnMetrics[] = [
{
turnId: "turn-1",
inputTokens: 1000,
outputTokens: 200,
durationMs: 5000,
// No timeToFirstTokenMs
},
];
const result = aggregatePromptMetrics({
provider: "openai",
model: "gpt-4o",
turnMetrics,
});
assertEquals(result.turnCount, 1);
assertEquals(result.inputTokens, 1000);
assertEquals(result.outputTokens, 200);
// Without TTFT, prefill and generation rates are 0 (cannot separate phases)
// Only combined rate is meaningful
assertEquals(result.prefillTokensPerSec, 0);
assertEquals(result.generationTokensPerSec, 0);
assertEquals(result.combinedTokensPerSec, 240);
});
Deno.test("toLogEntry - handles missing timeToFirstToken", () => {
const metrics: PromptMetrics = {
provider: "anthropic",
model: "claude-sonnet-4",
turnCount: 1,
inputTokens: 100,
outputTokens: 50,
totalTokens: 150,
prefillTokensPerSec: 20,
generationTokensPerSec: 10,
combinedTokensPerSec: 30,
totalDurationMs: 5000,
timeToFirstTokenMs: undefined,
turns: [],
};
const logEntry = toLogEntry(metrics);
assertEquals(logEntry.timeToFirstTokenMs, undefined);
});
Deno.test("Integration - full flow from turns to log entry", () => {
// Simulate a real scenario with multiple turns
const turn1 = calculateTurnMetrics({
turnId: "turn-1",
inputTokens: 2000,
outputTokens: 500,
durationMs: 8000,
timeToFirstTokenMs: 1200,
});
const turn2 = calculateTurnMetrics({
turnId: "turn-2",
inputTokens: 800,
outputTokens: 200,
durationMs: 3000,
});
const promptMetrics = aggregatePromptMetrics({
provider: "groq",
model: "llama-3.1-70b",
turnMetrics: [turn1, turn2],
});
const display = formatMetricsForDisplay(promptMetrics);
const logEntry = toLogEntry(promptMetrics);
// Verify aggregation
assertEquals(promptMetrics.turnCount, 2);
assertEquals(promptMetrics.inputTokens, 2800);
assertEquals(promptMetrics.outputTokens, 700);
assertEquals(promptMetrics.totalTokens, 3500);
assertEquals(promptMetrics.totalDurationMs, 11000);
assertEquals(promptMetrics.timeToFirstTokenMs, 1200);
// Verify corrected rate calculations
// prefill: 2800 / 1.2 = 2333.33 tok/s
assertGreaterOrEqual(promptMetrics.prefillTokensPerSec, 2333.3);
assertLessOrEqual(promptMetrics.prefillTokensPerSec, 2333.4);
// generation: 700 / 9.8 = 71.43 tok/s
assertGreaterOrEqual(promptMetrics.generationTokensPerSec, 71.4);
assertLessOrEqual(promptMetrics.generationTokensPerSec, 71.5);
// combined: 3500 / 11 = 318.18 tok/s
assertGreaterOrEqual(promptMetrics.combinedTokensPerSec, 318.1);
assertLessOrEqual(promptMetrics.combinedTokensPerSec, 318.2);
// Verify display contains key info
assertEquals(display.includes("groq/llama-3.1-70b"), true);
assertEquals(display.includes("TTFT: 1200ms"), true);
// Verify log entry
assertEquals(logEntry.provider, "groq");
assertEquals(logEntry.model, "llama-3.1-70b");
assertEquals(logEntry.turnCount, 2);
});

View File

@ -0,0 +1,201 @@
// Functional core for LLM performance metrics calculation
export interface TurnMetrics {
turnId: string;
inputTokens: number;
outputTokens: number;
durationMs: number;
timeToFirstTokenMs?: number;
}
export interface PromptMetrics {
provider: string;
model: string;
turnCount: number;
inputTokens: number;
outputTokens: number;
totalTokens: number;
prefillTokensPerSec: number;
generationTokensPerSec: number;
combinedTokensPerSec: number;
totalDurationMs: number;
timeToFirstTokenMs?: number;
turns: TurnMetrics[];
}
export interface MetricLogEntry {
timestamp: string;
provider: string;
model: string;
turnCount: number;
inputTokens: number;
outputTokens: number;
totalTokens: number;
prefillTokensPerSec: number;
generationTokensPerSec: number;
combinedTokensPerSec: number;
totalDurationMs: number;
timeToFirstTokenMs?: number;
}
/**
* Calculate metrics for a single turn
*/
export function calculateTurnMetrics(params: {
turnId: string;
inputTokens: number;
outputTokens: number;
durationMs: number;
timeToFirstTokenMs?: number;
}): TurnMetrics {
return {
turnId: params.turnId,
inputTokens: params.inputTokens,
outputTokens: params.outputTokens,
durationMs: params.durationMs,
timeToFirstTokenMs: params.timeToFirstTokenMs,
};
}
/**
* Aggregate multiple turn metrics into prompt-level metrics
*/
export function aggregatePromptMetrics(params: {
provider: string;
model: string;
turnMetrics: TurnMetrics[];
}): PromptMetrics {
const { provider, model, turnMetrics } = params;
if (turnMetrics.length === 0) {
return {
provider,
model,
turnCount: 0,
inputTokens: 0,
outputTokens: 0,
totalTokens: 0,
prefillTokensPerSec: 0,
generationTokensPerSec: 0,
combinedTokensPerSec: 0,
totalDurationMs: 0,
turns: [],
};
}
// Sum tokens across all turns
const inputTokens = turnMetrics.reduce((sum, t) => sum + t.inputTokens, 0);
const outputTokens = turnMetrics.reduce((sum, t) => sum + t.outputTokens, 0);
const totalTokens = inputTokens + outputTokens;
// Sum duration across all turns
const totalDurationMs = turnMetrics.reduce((sum, t) => sum + t.durationMs, 0);
const totalDurationSec = totalDurationMs / 1000;
// Time to first token is from the first turn
const timeToFirstTokenMs = turnMetrics[0]?.timeToFirstTokenMs;
// Calculate tokens per second
// Prefill: input tokens / TTFT duration (prefill phase)
// Generation: output tokens / (totalDuration - TTFT) (generation phase)
// Combined: total tokens / total duration
// When TTFT is unavailable, prefill and generation phases cannot be separated,
// so we set them to 0 and only report combined.
const ttftSec = timeToFirstTokenMs !== undefined ? timeToFirstTokenMs / 1000 : undefined;
const generationDurationSec = timeToFirstTokenMs !== undefined
? (totalDurationMs - timeToFirstTokenMs) / 1000
: undefined;
const prefillTokensPerSec = (ttftSec && ttftSec > 0) ? inputTokens / ttftSec : 0;
const generationTokensPerSec = (generationDurationSec !== undefined && generationDurationSec > 0)
? outputTokens / generationDurationSec
: 0;
const combinedTokensPerSec = totalDurationSec > 0 ? totalTokens / totalDurationSec : 0;
return {
provider,
model,
turnCount: turnMetrics.length,
inputTokens,
outputTokens,
totalTokens,
prefillTokensPerSec,
generationTokensPerSec,
combinedTokensPerSec,
totalDurationMs,
timeToFirstTokenMs,
turns: turnMetrics,
};
}
/**
* Format metrics for TUI display
*/
export function formatMetricsForDisplay(metrics: PromptMetrics): string {
const lines: string[] = [];
// Header with provider/model
lines.push(`📊 Performance: ${metrics.provider}/${metrics.model}`);
if (metrics.turnCount === 0) {
lines.push(" No turns recorded");
return lines.join("\n");
}
// Format duration display
const durationSec = metrics.totalDurationMs / 1000;
const durationDisplay = durationSec >= 60
? `${(durationSec / 60).toFixed(1)}m`
: `${durationSec.toFixed(1)}s`;
// Prefill metrics (only when TTFT was available)
if (metrics.prefillTokensPerSec > 0) {
lines.push(
` Prefill: ${metrics.inputTokens.toLocaleString()} tokens @ ${metrics.prefillTokensPerSec.toFixed(1)} tok/s`
);
}
// Generation metrics (only when TTFT was available)
if (metrics.generationTokensPerSec > 0) {
lines.push(
` Generation: ${metrics.outputTokens.toLocaleString()} tokens @ ${metrics.generationTokensPerSec.toFixed(1)} tok/s`
);
}
// Combined metrics
lines.push(
` Combined: ${metrics.totalTokens.toLocaleString()} tokens @ ${metrics.combinedTokensPerSec.toFixed(1)} tok/s (${durationDisplay} total)`
);
// Time to first token
if (metrics.timeToFirstTokenMs !== undefined) {
lines.push(` TTFT: ${metrics.timeToFirstTokenMs.toFixed(0)}ms`);
}
// Turn count
if (metrics.turnCount > 1) {
lines.push(` Turns: ${metrics.turnCount}`);
}
return lines.join("\n");
}
/**
* Convert PromptMetrics to JSONL log entry
*/
export function toLogEntry(metrics: PromptMetrics): MetricLogEntry {
return {
timestamp: new Date().toISOString(),
provider: metrics.provider,
model: metrics.model,
turnCount: metrics.turnCount,
inputTokens: metrics.inputTokens,
outputTokens: metrics.outputTokens,
totalTokens: metrics.totalTokens,
prefillTokensPerSec: Math.round(metrics.prefillTokensPerSec * 100) / 100,
generationTokensPerSec: Math.round(metrics.generationTokensPerSec * 100) / 100,
combinedTokensPerSec: Math.round(metrics.combinedTokensPerSec * 100) / 100,
totalDurationMs: metrics.totalDurationMs,
timeToFirstTokenMs: metrics.timeToFirstTokenMs,
};
}

View File

@ -0,0 +1,398 @@
import {
calculateTurnMetrics,
aggregatePromptMetrics,
formatMetricsForDisplay,
toLogEntry,
type TurnMetrics,
type PromptMetrics,
} from "./llm-metrics-core.ts";
import { assertEquals, assertGreaterOrEqual, assertLessOrEqual } from "jsr:@std/assert";
Deno.test("calculateTurnMetrics - creates turn metrics object", () => {
const result = calculateTurnMetrics({
turnId: "turn-1",
inputTokens: 100,
outputTokens: 50,
durationMs: 2000,
timeToFirstTokenMs: 500,
});
assertEquals(result.turnId, "turn-1");
assertEquals(result.inputTokens, 100);
assertEquals(result.outputTokens, 50);
assertEquals(result.durationMs, 2000);
assertEquals(result.timeToFirstTokenMs, 500);
});
Deno.test("calculateTurnMetrics - handles missing timeToFirstToken", () => {
const result = calculateTurnMetrics({
turnId: "turn-1",
inputTokens: 100,
outputTokens: 50,
durationMs: 2000,
});
assertEquals(result.timeToFirstTokenMs, undefined);
});
Deno.test("aggregatePromptMetrics - aggregates single turn", () => {
const turnMetrics: TurnMetrics[] = [
{
turnId: "turn-1",
inputTokens: 1000,
outputTokens: 200,
durationMs: 5000,
timeToFirstTokenMs: 800,
},
];
const result = aggregatePromptMetrics({
provider: "anthropic",
model: "claude-sonnet-4",
turnMetrics,
});
assertEquals(result.provider, "anthropic");
assertEquals(result.model, "claude-sonnet-4");
assertEquals(result.turnCount, 1);
assertEquals(result.inputTokens, 1000);
assertEquals(result.outputTokens, 200);
assertEquals(result.totalTokens, 1200);
assertEquals(result.totalDurationMs, 5000);
assertEquals(result.timeToFirstTokenMs, 800);
// Tokens per second calculations
// prefill: 1000 input tokens / 0.8s TTFT = 1250 tok/s
assertEquals(result.prefillTokensPerSec, 1250);
// generation: 200 output tokens / 4.2s (5s - 0.8s) = 47.62 tok/s
assertGreaterOrEqual(result.generationTokensPerSec, 47.6);
assertLessOrEqual(result.generationTokensPerSec, 47.7);
// combined: 1200 total tokens / 5s = 240 tok/s
assertEquals(result.combinedTokensPerSec, 240);
});
Deno.test("aggregatePromptMetrics - aggregates multiple turns", () => {
const turnMetrics: TurnMetrics[] = [
{
turnId: "turn-1",
inputTokens: 1000,
outputTokens: 200,
durationMs: 3000,
timeToFirstTokenMs: 800,
},
{
turnId: "turn-2",
inputTokens: 500,
outputTokens: 150,
durationMs: 2000,
},
{
turnId: "turn-3",
inputTokens: 300,
outputTokens: 100,
durationMs: 1500,
},
];
const result = aggregatePromptMetrics({
provider: "openai",
model: "gpt-4o",
turnMetrics,
});
assertEquals(result.turnCount, 3);
assertEquals(result.inputTokens, 1800); // 1000 + 500 + 300
assertEquals(result.outputTokens, 450); // 200 + 150 + 100
assertEquals(result.totalTokens, 2250);
assertEquals(result.totalDurationMs, 6500); // 3000 + 2000 + 1500
assertEquals(result.timeToFirstTokenMs, 800); // From first turn only
// Tokens per second: prefill uses TTFT (0.8s), generation uses (total - TTFT) = 5.7s
// prefill: 1800 / 0.8 = 2250 tok/s
assertEquals(result.prefillTokensPerSec, 2250);
// generation: 450 / 5.7 = 78.95 tok/s
assertGreaterOrEqual(result.generationTokensPerSec, 78.9);
assertLessOrEqual(result.generationTokensPerSec, 79.0);
// combined: 2250 / 6.5 = 346.15 tok/s
assertGreaterOrEqual(result.combinedTokensPerSec, 346.1);
assertLessOrEqual(result.combinedTokensPerSec, 346.2);
});
Deno.test("aggregatePromptMetrics - handles empty turn list", () => {
const result = aggregatePromptMetrics({
provider: "anthropic",
model: "claude-sonnet-4",
turnMetrics: [],
});
assertEquals(result.turnCount, 0);
assertEquals(result.inputTokens, 0);
assertEquals(result.outputTokens, 0);
assertEquals(result.totalTokens, 0);
assertEquals(result.prefillTokensPerSec, 0);
assertEquals(result.generationTokensPerSec, 0);
assertEquals(result.combinedTokensPerSec, 0);
assertEquals(result.totalDurationMs, 0);
assertEquals(result.timeToFirstTokenMs, undefined);
});
Deno.test("formatMetricsForDisplay - formats single turn metrics", () => {
const metrics: PromptMetrics = {
provider: "anthropic",
model: "claude-sonnet-4",
turnCount: 1,
inputTokens: 1250,
outputTokens: 342,
totalTokens: 1592,
prefillTokensPerSec: 482.1,
generationTokensPerSec: 18.3,
combinedTokensPerSec: 38.0,
totalDurationMs: 21600,
timeToFirstTokenMs: 850,
turns: [],
};
const display = formatMetricsForDisplay(metrics);
assertEquals(display.includes("anthropic/claude-sonnet-4"), true);
assertEquals(display.includes("1,250 tokens"), true);
assertEquals(display.includes("482.1 tok/s"), true);
assertEquals(display.includes("342 tokens"), true);
assertEquals(display.includes("18.3 tok/s"), true);
assertEquals(display.includes("1,592 tokens"), true);
assertEquals(display.includes("38.0 tok/s"), true);
assertEquals(display.includes("21.6s"), true);
assertEquals(display.includes("TTFT: 850ms"), true);
});
Deno.test("formatMetricsForDisplay - formats duration as minutes when over 60s", () => {
const metrics: PromptMetrics = {
provider: "openai",
model: "gpt-4o",
turnCount: 1,
inputTokens: 5000,
outputTokens: 1000,
totalTokens: 6000,
prefillTokensPerSec: 50,
generationTokensPerSec: 10,
combinedTokensPerSec: 60,
totalDurationMs: 120000, // 2 minutes
timeToFirstTokenMs: 1500,
turns: [],
};
const display = formatMetricsForDisplay(metrics);
assertEquals(display.includes("2.0m"), true);
});
Deno.test("formatMetricsForDisplay - omits turn count when single turn", () => {
const metrics: PromptMetrics = {
provider: "anthropic",
model: "claude-sonnet-4",
turnCount: 1,
inputTokens: 100,
outputTokens: 50,
totalTokens: 150,
prefillTokensPerSec: 20,
generationTokensPerSec: 10,
combinedTokensPerSec: 30,
totalDurationMs: 5000,
timeToFirstTokenMs: 500,
turns: [],
};
const display = formatMetricsForDisplay(metrics);
assertEquals(display.includes("Turns: 1"), false);
});
Deno.test("formatMetricsForDisplay - omits prefill/generation when TTFT is unavailable", () => {
const metrics: PromptMetrics = {
provider: "openai",
model: "gpt-4o",
turnCount: 1,
inputTokens: 1000,
outputTokens: 200,
totalTokens: 1200,
prefillTokensPerSec: 0,
generationTokensPerSec: 0,
combinedTokensPerSec: 240,
totalDurationMs: 5000,
timeToFirstTokenMs: undefined,
turns: [],
};
const display = formatMetricsForDisplay(metrics);
assertEquals(display.includes("Prefill:"), false);
assertEquals(display.includes("Generation:"), false);
assertEquals(display.includes("1,200 tokens"), true);
assertEquals(display.includes("240.0 tok/s"), true);
});
Deno.test("formatMetricsForDisplay - shows turn count when multiple turns", () => {
const metrics: PromptMetrics = {
provider: "anthropic",
model: "claude-sonnet-4",
turnCount: 3,
inputTokens: 100,
outputTokens: 50,
totalTokens: 150,
prefillTokensPerSec: 20,
generationTokensPerSec: 10,
combinedTokensPerSec: 30,
totalDurationMs: 5000,
timeToFirstTokenMs: 500,
turns: [],
};
const display = formatMetricsForDisplay(metrics);
assertEquals(display.includes("Turns: 3"), true);
});
Deno.test("toLogEntry - creates JSON-serializable log entry", () => {
const metrics: PromptMetrics = {
provider: "anthropic",
model: "claude-sonnet-4",
turnCount: 2,
inputTokens: 1250,
outputTokens: 342,
totalTokens: 1592,
prefillTokensPerSec: 482.12345,
generationTokensPerSec: 18.34567,
combinedTokensPerSec: 38.09876,
totalDurationMs: 21600,
timeToFirstTokenMs: 850,
turns: [],
};
const logEntry = toLogEntry(metrics);
assertEquals(logEntry.provider, "anthropic");
assertEquals(logEntry.model, "claude-sonnet-4");
assertEquals(logEntry.turnCount, 2);
assertEquals(logEntry.inputTokens, 1250);
assertEquals(logEntry.outputTokens, 342);
assertEquals(logEntry.totalTokens, 1592);
// Rounded to 2 decimal places
assertEquals(logEntry.prefillTokensPerSec, 482.12);
assertEquals(logEntry.generationTokensPerSec, 18.35);
assertEquals(logEntry.combinedTokensPerSec, 38.1);
assertEquals(logEntry.totalDurationMs, 21600);
assertEquals(logEntry.timeToFirstTokenMs, 850);
// Should have ISO timestamp
assertEquals(logEntry.timestamp.includes("T"), true);
assertEquals(logEntry.timestamp.includes("Z"), true);
// Should be JSON serializable
const json = JSON.stringify(logEntry);
assertEquals(json.length > 0, true);
const parsed = JSON.parse(json);
assertEquals(parsed.provider, "anthropic");
});
Deno.test("aggregatePromptMetrics - uses full duration when TTFT is undefined", () => {
const turnMetrics: TurnMetrics[] = [
{
turnId: "turn-1",
inputTokens: 1000,
outputTokens: 200,
durationMs: 5000,
// No timeToFirstTokenMs
},
];
const result = aggregatePromptMetrics({
provider: "openai",
model: "gpt-4o",
turnMetrics,
});
assertEquals(result.turnCount, 1);
assertEquals(result.inputTokens, 1000);
assertEquals(result.outputTokens, 200);
// Without TTFT, prefill and generation rates are 0 (cannot separate phases)
// Only combined rate is meaningful
assertEquals(result.prefillTokensPerSec, 0);
assertEquals(result.generationTokensPerSec, 0);
assertEquals(result.combinedTokensPerSec, 240);
});
Deno.test("toLogEntry - handles missing timeToFirstToken", () => {
const metrics: PromptMetrics = {
provider: "anthropic",
model: "claude-sonnet-4",
turnCount: 1,
inputTokens: 100,
outputTokens: 50,
totalTokens: 150,
prefillTokensPerSec: 20,
generationTokensPerSec: 10,
combinedTokensPerSec: 30,
totalDurationMs: 5000,
timeToFirstTokenMs: undefined,
turns: [],
};
const logEntry = toLogEntry(metrics);
assertEquals(logEntry.timeToFirstTokenMs, undefined);
});
Deno.test("Integration - full flow from turns to log entry", () => {
// Simulate a real scenario with multiple turns
const turn1 = calculateTurnMetrics({
turnId: "turn-1",
inputTokens: 2000,
outputTokens: 500,
durationMs: 8000,
timeToFirstTokenMs: 1200,
});
const turn2 = calculateTurnMetrics({
turnId: "turn-2",
inputTokens: 800,
outputTokens: 200,
durationMs: 3000,
});
const promptMetrics = aggregatePromptMetrics({
provider: "groq",
model: "llama-3.1-70b",
turnMetrics: [turn1, turn2],
});
const display = formatMetricsForDisplay(promptMetrics);
const logEntry = toLogEntry(promptMetrics);
// Verify aggregation
assertEquals(promptMetrics.turnCount, 2);
assertEquals(promptMetrics.inputTokens, 2800);
assertEquals(promptMetrics.outputTokens, 700);
assertEquals(promptMetrics.totalTokens, 3500);
assertEquals(promptMetrics.totalDurationMs, 11000);
assertEquals(promptMetrics.timeToFirstTokenMs, 1200);
// Verify corrected rate calculations
// prefill: 2800 / 1.2 = 2333.33 tok/s
assertGreaterOrEqual(promptMetrics.prefillTokensPerSec, 2333.3);
assertLessOrEqual(promptMetrics.prefillTokensPerSec, 2333.4);
// generation: 700 / 9.8 = 71.43 tok/s
assertGreaterOrEqual(promptMetrics.generationTokensPerSec, 71.4);
assertLessOrEqual(promptMetrics.generationTokensPerSec, 71.5);
// combined: 3500 / 11 = 318.18 tok/s
assertGreaterOrEqual(promptMetrics.combinedTokensPerSec, 318.1);
assertLessOrEqual(promptMetrics.combinedTokensPerSec, 318.2);
// Verify display contains key info
assertEquals(display.includes("groq/llama-3.1-70b"), true);
assertEquals(display.includes("TTFT: 1200ms"), true);
// Verify log entry
assertEquals(logEntry.provider, "groq");
assertEquals(logEntry.model, "llama-3.1-70b");
assertEquals(logEntry.turnCount, 2);
});

View File

@ -0,0 +1,100 @@
// LLM Performance Metrics Extension
// Captures and displays LLM inference performance metrics
import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
import { appendFileSync, mkdirSync } from "node:fs";
import { dirname, join } from "node:path";
// Re-export core functions from the shared metrics module
import {
calculateTurnMetrics,
aggregatePromptMetrics,
formatMetricsForDisplay,
toLogEntry,
type TurnMetrics,
type PromptMetrics,
type MetricLogEntry,
} from "./llm-metrics-core.ts";
// ============================================================================
// Extension Event Handlers (imperative shell)
// ============================================================================
// State tracking
let promptStartMs: number | undefined;
let currentTurnStartMs: number | undefined;
let currentTurnId: string | undefined;
let turnMetrics: TurnMetrics[] = [];
let firstTokenTimeMs: number | undefined;
let provider: string | undefined;
let model: string | undefined;
export default function (pi: ExtensionAPI) {
const logFile = join(process.cwd(), ".pi", "llm-metrics.log");
pi.on("agent_start", async (_event, ctx) => {
if (!ctx.model) return;
promptStartMs = Date.now();
turnMetrics = [];
firstTokenTimeMs = undefined;
provider = ctx.model.provider;
model = ctx.model.id;
});
pi.on("turn_start", async (event, _ctx) => {
currentTurnStartMs = Date.now();
currentTurnId = `turn-${event.turnIndex}`;
});
pi.on("message_update", async (event, _ctx) => {
// Capture TTFT on first token
if (firstTokenTimeMs === undefined && event.assistantMessageEvent?.type === "text_delta") {
firstTokenTimeMs = Date.now();
}
});
pi.on("turn_end", async (event, _ctx) => {
if (event.message.role !== "assistant") return;
const inputTokens = event.message.usage?.input ?? 0;
const outputTokens = event.message.usage?.output ?? 0;
const durationMs = currentTurnStartMs ? Date.now() - currentTurnStartMs : 0;
const ttftMs = currentTurnId === `turn-${event.turnIndex}` && firstTokenTimeMs && currentTurnStartMs
? firstTokenTimeMs - currentTurnStartMs
: undefined;
const turnMetric = calculateTurnMetrics({
turnId: currentTurnId!,
inputTokens,
outputTokens,
durationMs,
timeToFirstTokenMs: ttftMs,
});
turnMetrics.push(turnMetric);
});
pi.on("agent_end", async (_event, ctx) => {
if (!provider || !model || promptStartMs === undefined) return;
const promptMetrics = aggregatePromptMetrics({
provider,
model,
turnMetrics,
});
// Display in TUI
const display = formatMetricsForDisplay(promptMetrics);
ctx.ui.notify(display, "info");
ctx.ui.setStatus("metrics", `📊 ${promptMetrics.combinedTokensPerSec.toFixed(1)} tok/s`);
// Log to JSONL file
const logEntry = toLogEntry(promptMetrics);
mkdirSync(dirname(logFile), { recursive: true });
appendFileSync(logFile, JSON.stringify(logEntry) + "\n", "utf8");
// Reset state
promptStartMs = undefined;
turnMetrics = [];
firstTokenTimeMs = undefined;
});
}