move pi-llm-performance to this repo
This commit is contained in:
parent
c62eb432bf
commit
0cf13ed54e
30
packages/pi-llm-performance/README.md
Normal file
30
packages/pi-llm-performance/README.md
Normal file
@ -0,0 +1,30 @@
|
||||
# pi-llm-performance
|
||||
|
||||
LLM performance metrics extension
|
||||
|
||||
## How to install
|
||||
|
||||
Add to your global pi settings:
|
||||
|
||||
```bash
|
||||
pi install /Users/willem/dev/spikes/llm/custom-coding-agent/packages/pi-llm-performance
|
||||
```
|
||||
|
||||
Or add manually to `~/.pi/agent/settings.json`:
|
||||
|
||||
```
|
||||
"packages": [
|
||||
"/Users/willem/dev/spikes/llm/custom-coding-agent/packages/pi-llm-performance",
|
||||
...
|
||||
]
|
||||
```
|
||||
|
||||
Then reload pi:
|
||||
|
||||
```bash
|
||||
/reload
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
MIT
|
||||
18
packages/pi-llm-performance/deno.lock
generated
Normal file
18
packages/pi-llm-performance/deno.lock
generated
Normal file
@ -0,0 +1,18 @@
|
||||
{
|
||||
"version": "5",
|
||||
"specifiers": {
|
||||
"jsr:@std/assert@*": "1.0.19",
|
||||
"jsr:@std/internal@^1.0.12": "1.0.12"
|
||||
},
|
||||
"jsr": {
|
||||
"@std/assert@1.0.19": {
|
||||
"integrity": "eaada96ee120cb980bc47e040f82814d786fe8162ecc53c91d8df60b8755991e",
|
||||
"dependencies": [
|
||||
"jsr:@std/internal"
|
||||
]
|
||||
},
|
||||
"@std/internal@1.0.12": {
|
||||
"integrity": "972a634fd5bc34b242024402972cd5143eac68d8dffaca5eaa4dba30ce17b027"
|
||||
}
|
||||
}
|
||||
}
|
||||
17
packages/pi-llm-performance/package.json
Normal file
17
packages/pi-llm-performance/package.json
Normal file
@ -0,0 +1,17 @@
|
||||
{
|
||||
"name": "pi-llm-performance",
|
||||
"version": "0.1.0",
|
||||
"description": "LLM performance metrics extension",
|
||||
"type": "module",
|
||||
"exports": {
|
||||
".": "./src/llm-performance-metrics.ts"
|
||||
},
|
||||
"keywords": ["pi-package"],
|
||||
"pi": {
|
||||
"extensions": ["src/llm-performance-metrics.ts"]
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@mariozechner/pi-coding-agent": "*"
|
||||
},
|
||||
"license": "MIT"
|
||||
}
|
||||
398
packages/pi-llm-performance/src/llm-metrics-core.test.ts
Normal file
398
packages/pi-llm-performance/src/llm-metrics-core.test.ts
Normal file
@ -0,0 +1,398 @@
|
||||
import {
|
||||
calculateTurnMetrics,
|
||||
aggregatePromptMetrics,
|
||||
formatMetricsForDisplay,
|
||||
toLogEntry,
|
||||
type TurnMetrics,
|
||||
type PromptMetrics,
|
||||
} from "./llm-metrics-core.ts";
|
||||
import { assertEquals, assertGreaterOrEqual, assertLessOrEqual } from "jsr:@std/assert";
|
||||
|
||||
Deno.test("calculateTurnMetrics - creates turn metrics object", () => {
|
||||
const result = calculateTurnMetrics({
|
||||
turnId: "turn-1",
|
||||
inputTokens: 100,
|
||||
outputTokens: 50,
|
||||
durationMs: 2000,
|
||||
timeToFirstTokenMs: 500,
|
||||
});
|
||||
|
||||
assertEquals(result.turnId, "turn-1");
|
||||
assertEquals(result.inputTokens, 100);
|
||||
assertEquals(result.outputTokens, 50);
|
||||
assertEquals(result.durationMs, 2000);
|
||||
assertEquals(result.timeToFirstTokenMs, 500);
|
||||
});
|
||||
|
||||
Deno.test("calculateTurnMetrics - handles missing timeToFirstToken", () => {
|
||||
const result = calculateTurnMetrics({
|
||||
turnId: "turn-1",
|
||||
inputTokens: 100,
|
||||
outputTokens: 50,
|
||||
durationMs: 2000,
|
||||
});
|
||||
|
||||
assertEquals(result.timeToFirstTokenMs, undefined);
|
||||
});
|
||||
|
||||
Deno.test("aggregatePromptMetrics - aggregates single turn", () => {
|
||||
const turnMetrics: TurnMetrics[] = [
|
||||
{
|
||||
turnId: "turn-1",
|
||||
inputTokens: 1000,
|
||||
outputTokens: 200,
|
||||
durationMs: 5000,
|
||||
timeToFirstTokenMs: 800,
|
||||
},
|
||||
];
|
||||
|
||||
const result = aggregatePromptMetrics({
|
||||
provider: "anthropic",
|
||||
model: "claude-sonnet-4",
|
||||
turnMetrics,
|
||||
});
|
||||
|
||||
assertEquals(result.provider, "anthropic");
|
||||
assertEquals(result.model, "claude-sonnet-4");
|
||||
assertEquals(result.turnCount, 1);
|
||||
assertEquals(result.inputTokens, 1000);
|
||||
assertEquals(result.outputTokens, 200);
|
||||
assertEquals(result.totalTokens, 1200);
|
||||
assertEquals(result.totalDurationMs, 5000);
|
||||
assertEquals(result.timeToFirstTokenMs, 800);
|
||||
|
||||
// Tokens per second calculations
|
||||
// prefill: 1000 input tokens / 0.8s TTFT = 1250 tok/s
|
||||
assertEquals(result.prefillTokensPerSec, 1250);
|
||||
// generation: 200 output tokens / 4.2s (5s - 0.8s) = 47.62 tok/s
|
||||
assertGreaterOrEqual(result.generationTokensPerSec, 47.6);
|
||||
assertLessOrEqual(result.generationTokensPerSec, 47.7);
|
||||
// combined: 1200 total tokens / 5s = 240 tok/s
|
||||
assertEquals(result.combinedTokensPerSec, 240);
|
||||
});
|
||||
|
||||
Deno.test("aggregatePromptMetrics - aggregates multiple turns", () => {
|
||||
const turnMetrics: TurnMetrics[] = [
|
||||
{
|
||||
turnId: "turn-1",
|
||||
inputTokens: 1000,
|
||||
outputTokens: 200,
|
||||
durationMs: 3000,
|
||||
timeToFirstTokenMs: 800,
|
||||
},
|
||||
{
|
||||
turnId: "turn-2",
|
||||
inputTokens: 500,
|
||||
outputTokens: 150,
|
||||
durationMs: 2000,
|
||||
},
|
||||
{
|
||||
turnId: "turn-3",
|
||||
inputTokens: 300,
|
||||
outputTokens: 100,
|
||||
durationMs: 1500,
|
||||
},
|
||||
];
|
||||
|
||||
const result = aggregatePromptMetrics({
|
||||
provider: "openai",
|
||||
model: "gpt-4o",
|
||||
turnMetrics,
|
||||
});
|
||||
|
||||
assertEquals(result.turnCount, 3);
|
||||
assertEquals(result.inputTokens, 1800); // 1000 + 500 + 300
|
||||
assertEquals(result.outputTokens, 450); // 200 + 150 + 100
|
||||
assertEquals(result.totalTokens, 2250);
|
||||
assertEquals(result.totalDurationMs, 6500); // 3000 + 2000 + 1500
|
||||
assertEquals(result.timeToFirstTokenMs, 800); // From first turn only
|
||||
|
||||
// Tokens per second: prefill uses TTFT (0.8s), generation uses (total - TTFT) = 5.7s
|
||||
// prefill: 1800 / 0.8 = 2250 tok/s
|
||||
assertEquals(result.prefillTokensPerSec, 2250);
|
||||
// generation: 450 / 5.7 = 78.95 tok/s
|
||||
assertGreaterOrEqual(result.generationTokensPerSec, 78.9);
|
||||
assertLessOrEqual(result.generationTokensPerSec, 79.0);
|
||||
// combined: 2250 / 6.5 = 346.15 tok/s
|
||||
assertGreaterOrEqual(result.combinedTokensPerSec, 346.1);
|
||||
assertLessOrEqual(result.combinedTokensPerSec, 346.2);
|
||||
});
|
||||
|
||||
Deno.test("aggregatePromptMetrics - handles empty turn list", () => {
|
||||
const result = aggregatePromptMetrics({
|
||||
provider: "anthropic",
|
||||
model: "claude-sonnet-4",
|
||||
turnMetrics: [],
|
||||
});
|
||||
|
||||
assertEquals(result.turnCount, 0);
|
||||
assertEquals(result.inputTokens, 0);
|
||||
assertEquals(result.outputTokens, 0);
|
||||
assertEquals(result.totalTokens, 0);
|
||||
assertEquals(result.prefillTokensPerSec, 0);
|
||||
assertEquals(result.generationTokensPerSec, 0);
|
||||
assertEquals(result.combinedTokensPerSec, 0);
|
||||
assertEquals(result.totalDurationMs, 0);
|
||||
assertEquals(result.timeToFirstTokenMs, undefined);
|
||||
});
|
||||
|
||||
Deno.test("formatMetricsForDisplay - formats single turn metrics", () => {
|
||||
const metrics: PromptMetrics = {
|
||||
provider: "anthropic",
|
||||
model: "claude-sonnet-4",
|
||||
turnCount: 1,
|
||||
inputTokens: 1250,
|
||||
outputTokens: 342,
|
||||
totalTokens: 1592,
|
||||
prefillTokensPerSec: 482.1,
|
||||
generationTokensPerSec: 18.3,
|
||||
combinedTokensPerSec: 38.0,
|
||||
totalDurationMs: 21600,
|
||||
timeToFirstTokenMs: 850,
|
||||
turns: [],
|
||||
};
|
||||
|
||||
const display = formatMetricsForDisplay(metrics);
|
||||
|
||||
assertEquals(display.includes("anthropic/claude-sonnet-4"), true);
|
||||
assertEquals(display.includes("1,250 tokens"), true);
|
||||
assertEquals(display.includes("482.1 tok/s"), true);
|
||||
assertEquals(display.includes("342 tokens"), true);
|
||||
assertEquals(display.includes("18.3 tok/s"), true);
|
||||
assertEquals(display.includes("1,592 tokens"), true);
|
||||
assertEquals(display.includes("38.0 tok/s"), true);
|
||||
assertEquals(display.includes("21.6s"), true);
|
||||
assertEquals(display.includes("TTFT: 850ms"), true);
|
||||
});
|
||||
|
||||
Deno.test("formatMetricsForDisplay - formats duration as minutes when over 60s", () => {
|
||||
const metrics: PromptMetrics = {
|
||||
provider: "openai",
|
||||
model: "gpt-4o",
|
||||
turnCount: 1,
|
||||
inputTokens: 5000,
|
||||
outputTokens: 1000,
|
||||
totalTokens: 6000,
|
||||
prefillTokensPerSec: 50,
|
||||
generationTokensPerSec: 10,
|
||||
combinedTokensPerSec: 60,
|
||||
totalDurationMs: 120000, // 2 minutes
|
||||
timeToFirstTokenMs: 1500,
|
||||
turns: [],
|
||||
};
|
||||
|
||||
const display = formatMetricsForDisplay(metrics);
|
||||
|
||||
assertEquals(display.includes("2.0m"), true);
|
||||
});
|
||||
|
||||
Deno.test("formatMetricsForDisplay - omits turn count when single turn", () => {
|
||||
const metrics: PromptMetrics = {
|
||||
provider: "anthropic",
|
||||
model: "claude-sonnet-4",
|
||||
turnCount: 1,
|
||||
inputTokens: 100,
|
||||
outputTokens: 50,
|
||||
totalTokens: 150,
|
||||
prefillTokensPerSec: 20,
|
||||
generationTokensPerSec: 10,
|
||||
combinedTokensPerSec: 30,
|
||||
totalDurationMs: 5000,
|
||||
timeToFirstTokenMs: 500,
|
||||
turns: [],
|
||||
};
|
||||
|
||||
const display = formatMetricsForDisplay(metrics);
|
||||
|
||||
assertEquals(display.includes("Turns: 1"), false);
|
||||
});
|
||||
|
||||
Deno.test("formatMetricsForDisplay - omits prefill/generation when TTFT is unavailable", () => {
|
||||
const metrics: PromptMetrics = {
|
||||
provider: "openai",
|
||||
model: "gpt-4o",
|
||||
turnCount: 1,
|
||||
inputTokens: 1000,
|
||||
outputTokens: 200,
|
||||
totalTokens: 1200,
|
||||
prefillTokensPerSec: 0,
|
||||
generationTokensPerSec: 0,
|
||||
combinedTokensPerSec: 240,
|
||||
totalDurationMs: 5000,
|
||||
timeToFirstTokenMs: undefined,
|
||||
turns: [],
|
||||
};
|
||||
|
||||
const display = formatMetricsForDisplay(metrics);
|
||||
|
||||
assertEquals(display.includes("Prefill:"), false);
|
||||
assertEquals(display.includes("Generation:"), false);
|
||||
assertEquals(display.includes("1,200 tokens"), true);
|
||||
assertEquals(display.includes("240.0 tok/s"), true);
|
||||
});
|
||||
|
||||
Deno.test("formatMetricsForDisplay - shows turn count when multiple turns", () => {
|
||||
const metrics: PromptMetrics = {
|
||||
provider: "anthropic",
|
||||
model: "claude-sonnet-4",
|
||||
turnCount: 3,
|
||||
inputTokens: 100,
|
||||
outputTokens: 50,
|
||||
totalTokens: 150,
|
||||
prefillTokensPerSec: 20,
|
||||
generationTokensPerSec: 10,
|
||||
combinedTokensPerSec: 30,
|
||||
totalDurationMs: 5000,
|
||||
timeToFirstTokenMs: 500,
|
||||
turns: [],
|
||||
};
|
||||
|
||||
const display = formatMetricsForDisplay(metrics);
|
||||
|
||||
assertEquals(display.includes("Turns: 3"), true);
|
||||
});
|
||||
|
||||
Deno.test("toLogEntry - creates JSON-serializable log entry", () => {
|
||||
const metrics: PromptMetrics = {
|
||||
provider: "anthropic",
|
||||
model: "claude-sonnet-4",
|
||||
turnCount: 2,
|
||||
inputTokens: 1250,
|
||||
outputTokens: 342,
|
||||
totalTokens: 1592,
|
||||
prefillTokensPerSec: 482.12345,
|
||||
generationTokensPerSec: 18.34567,
|
||||
combinedTokensPerSec: 38.09876,
|
||||
totalDurationMs: 21600,
|
||||
timeToFirstTokenMs: 850,
|
||||
turns: [],
|
||||
};
|
||||
|
||||
const logEntry = toLogEntry(metrics);
|
||||
|
||||
assertEquals(logEntry.provider, "anthropic");
|
||||
assertEquals(logEntry.model, "claude-sonnet-4");
|
||||
assertEquals(logEntry.turnCount, 2);
|
||||
assertEquals(logEntry.inputTokens, 1250);
|
||||
assertEquals(logEntry.outputTokens, 342);
|
||||
assertEquals(logEntry.totalTokens, 1592);
|
||||
// Rounded to 2 decimal places
|
||||
assertEquals(logEntry.prefillTokensPerSec, 482.12);
|
||||
assertEquals(logEntry.generationTokensPerSec, 18.35);
|
||||
assertEquals(logEntry.combinedTokensPerSec, 38.1);
|
||||
assertEquals(logEntry.totalDurationMs, 21600);
|
||||
assertEquals(logEntry.timeToFirstTokenMs, 850);
|
||||
|
||||
// Should have ISO timestamp
|
||||
assertEquals(logEntry.timestamp.includes("T"), true);
|
||||
assertEquals(logEntry.timestamp.includes("Z"), true);
|
||||
|
||||
// Should be JSON serializable
|
||||
const json = JSON.stringify(logEntry);
|
||||
assertEquals(json.length > 0, true);
|
||||
const parsed = JSON.parse(json);
|
||||
assertEquals(parsed.provider, "anthropic");
|
||||
});
|
||||
|
||||
Deno.test("aggregatePromptMetrics - uses full duration when TTFT is undefined", () => {
|
||||
const turnMetrics: TurnMetrics[] = [
|
||||
{
|
||||
turnId: "turn-1",
|
||||
inputTokens: 1000,
|
||||
outputTokens: 200,
|
||||
durationMs: 5000,
|
||||
// No timeToFirstTokenMs
|
||||
},
|
||||
];
|
||||
|
||||
const result = aggregatePromptMetrics({
|
||||
provider: "openai",
|
||||
model: "gpt-4o",
|
||||
turnMetrics,
|
||||
});
|
||||
|
||||
assertEquals(result.turnCount, 1);
|
||||
assertEquals(result.inputTokens, 1000);
|
||||
assertEquals(result.outputTokens, 200);
|
||||
// Without TTFT, prefill and generation rates are 0 (cannot separate phases)
|
||||
// Only combined rate is meaningful
|
||||
assertEquals(result.prefillTokensPerSec, 0);
|
||||
assertEquals(result.generationTokensPerSec, 0);
|
||||
assertEquals(result.combinedTokensPerSec, 240);
|
||||
});
|
||||
|
||||
Deno.test("toLogEntry - handles missing timeToFirstToken", () => {
|
||||
const metrics: PromptMetrics = {
|
||||
provider: "anthropic",
|
||||
model: "claude-sonnet-4",
|
||||
turnCount: 1,
|
||||
inputTokens: 100,
|
||||
outputTokens: 50,
|
||||
totalTokens: 150,
|
||||
prefillTokensPerSec: 20,
|
||||
generationTokensPerSec: 10,
|
||||
combinedTokensPerSec: 30,
|
||||
totalDurationMs: 5000,
|
||||
timeToFirstTokenMs: undefined,
|
||||
turns: [],
|
||||
};
|
||||
|
||||
const logEntry = toLogEntry(metrics);
|
||||
|
||||
assertEquals(logEntry.timeToFirstTokenMs, undefined);
|
||||
});
|
||||
|
||||
Deno.test("Integration - full flow from turns to log entry", () => {
|
||||
// Simulate a real scenario with multiple turns
|
||||
const turn1 = calculateTurnMetrics({
|
||||
turnId: "turn-1",
|
||||
inputTokens: 2000,
|
||||
outputTokens: 500,
|
||||
durationMs: 8000,
|
||||
timeToFirstTokenMs: 1200,
|
||||
});
|
||||
|
||||
const turn2 = calculateTurnMetrics({
|
||||
turnId: "turn-2",
|
||||
inputTokens: 800,
|
||||
outputTokens: 200,
|
||||
durationMs: 3000,
|
||||
});
|
||||
|
||||
const promptMetrics = aggregatePromptMetrics({
|
||||
provider: "groq",
|
||||
model: "llama-3.1-70b",
|
||||
turnMetrics: [turn1, turn2],
|
||||
});
|
||||
|
||||
const display = formatMetricsForDisplay(promptMetrics);
|
||||
const logEntry = toLogEntry(promptMetrics);
|
||||
|
||||
// Verify aggregation
|
||||
assertEquals(promptMetrics.turnCount, 2);
|
||||
assertEquals(promptMetrics.inputTokens, 2800);
|
||||
assertEquals(promptMetrics.outputTokens, 700);
|
||||
assertEquals(promptMetrics.totalTokens, 3500);
|
||||
assertEquals(promptMetrics.totalDurationMs, 11000);
|
||||
assertEquals(promptMetrics.timeToFirstTokenMs, 1200);
|
||||
|
||||
// Verify corrected rate calculations
|
||||
// prefill: 2800 / 1.2 = 2333.33 tok/s
|
||||
assertGreaterOrEqual(promptMetrics.prefillTokensPerSec, 2333.3);
|
||||
assertLessOrEqual(promptMetrics.prefillTokensPerSec, 2333.4);
|
||||
// generation: 700 / 9.8 = 71.43 tok/s
|
||||
assertGreaterOrEqual(promptMetrics.generationTokensPerSec, 71.4);
|
||||
assertLessOrEqual(promptMetrics.generationTokensPerSec, 71.5);
|
||||
// combined: 3500 / 11 = 318.18 tok/s
|
||||
assertGreaterOrEqual(promptMetrics.combinedTokensPerSec, 318.1);
|
||||
assertLessOrEqual(promptMetrics.combinedTokensPerSec, 318.2);
|
||||
|
||||
// Verify display contains key info
|
||||
assertEquals(display.includes("groq/llama-3.1-70b"), true);
|
||||
assertEquals(display.includes("TTFT: 1200ms"), true);
|
||||
|
||||
// Verify log entry
|
||||
assertEquals(logEntry.provider, "groq");
|
||||
assertEquals(logEntry.model, "llama-3.1-70b");
|
||||
assertEquals(logEntry.turnCount, 2);
|
||||
});
|
||||
201
packages/pi-llm-performance/src/llm-metrics-core.ts
Normal file
201
packages/pi-llm-performance/src/llm-metrics-core.ts
Normal file
@ -0,0 +1,201 @@
|
||||
// Functional core for LLM performance metrics calculation
|
||||
|
||||
export interface TurnMetrics {
|
||||
turnId: string;
|
||||
inputTokens: number;
|
||||
outputTokens: number;
|
||||
durationMs: number;
|
||||
timeToFirstTokenMs?: number;
|
||||
}
|
||||
|
||||
export interface PromptMetrics {
|
||||
provider: string;
|
||||
model: string;
|
||||
turnCount: number;
|
||||
inputTokens: number;
|
||||
outputTokens: number;
|
||||
totalTokens: number;
|
||||
prefillTokensPerSec: number;
|
||||
generationTokensPerSec: number;
|
||||
combinedTokensPerSec: number;
|
||||
totalDurationMs: number;
|
||||
timeToFirstTokenMs?: number;
|
||||
turns: TurnMetrics[];
|
||||
}
|
||||
|
||||
export interface MetricLogEntry {
|
||||
timestamp: string;
|
||||
provider: string;
|
||||
model: string;
|
||||
turnCount: number;
|
||||
inputTokens: number;
|
||||
outputTokens: number;
|
||||
totalTokens: number;
|
||||
prefillTokensPerSec: number;
|
||||
generationTokensPerSec: number;
|
||||
combinedTokensPerSec: number;
|
||||
totalDurationMs: number;
|
||||
timeToFirstTokenMs?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate metrics for a single turn
|
||||
*/
|
||||
export function calculateTurnMetrics(params: {
|
||||
turnId: string;
|
||||
inputTokens: number;
|
||||
outputTokens: number;
|
||||
durationMs: number;
|
||||
timeToFirstTokenMs?: number;
|
||||
}): TurnMetrics {
|
||||
return {
|
||||
turnId: params.turnId,
|
||||
inputTokens: params.inputTokens,
|
||||
outputTokens: params.outputTokens,
|
||||
durationMs: params.durationMs,
|
||||
timeToFirstTokenMs: params.timeToFirstTokenMs,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Aggregate multiple turn metrics into prompt-level metrics
|
||||
*/
|
||||
export function aggregatePromptMetrics(params: {
|
||||
provider: string;
|
||||
model: string;
|
||||
turnMetrics: TurnMetrics[];
|
||||
}): PromptMetrics {
|
||||
const { provider, model, turnMetrics } = params;
|
||||
|
||||
if (turnMetrics.length === 0) {
|
||||
return {
|
||||
provider,
|
||||
model,
|
||||
turnCount: 0,
|
||||
inputTokens: 0,
|
||||
outputTokens: 0,
|
||||
totalTokens: 0,
|
||||
prefillTokensPerSec: 0,
|
||||
generationTokensPerSec: 0,
|
||||
combinedTokensPerSec: 0,
|
||||
totalDurationMs: 0,
|
||||
turns: [],
|
||||
};
|
||||
}
|
||||
|
||||
// Sum tokens across all turns
|
||||
const inputTokens = turnMetrics.reduce((sum, t) => sum + t.inputTokens, 0);
|
||||
const outputTokens = turnMetrics.reduce((sum, t) => sum + t.outputTokens, 0);
|
||||
const totalTokens = inputTokens + outputTokens;
|
||||
|
||||
// Sum duration across all turns
|
||||
const totalDurationMs = turnMetrics.reduce((sum, t) => sum + t.durationMs, 0);
|
||||
const totalDurationSec = totalDurationMs / 1000;
|
||||
|
||||
// Time to first token is from the first turn
|
||||
const timeToFirstTokenMs = turnMetrics[0]?.timeToFirstTokenMs;
|
||||
|
||||
// Calculate tokens per second
|
||||
// Prefill: input tokens / TTFT duration (prefill phase)
|
||||
// Generation: output tokens / (totalDuration - TTFT) (generation phase)
|
||||
// Combined: total tokens / total duration
|
||||
// When TTFT is unavailable, prefill and generation phases cannot be separated,
|
||||
// so we set them to 0 and only report combined.
|
||||
const ttftSec = timeToFirstTokenMs !== undefined ? timeToFirstTokenMs / 1000 : undefined;
|
||||
const generationDurationSec = timeToFirstTokenMs !== undefined
|
||||
? (totalDurationMs - timeToFirstTokenMs) / 1000
|
||||
: undefined;
|
||||
|
||||
const prefillTokensPerSec = (ttftSec && ttftSec > 0) ? inputTokens / ttftSec : 0;
|
||||
const generationTokensPerSec = (generationDurationSec !== undefined && generationDurationSec > 0)
|
||||
? outputTokens / generationDurationSec
|
||||
: 0;
|
||||
const combinedTokensPerSec = totalDurationSec > 0 ? totalTokens / totalDurationSec : 0;
|
||||
|
||||
return {
|
||||
provider,
|
||||
model,
|
||||
turnCount: turnMetrics.length,
|
||||
inputTokens,
|
||||
outputTokens,
|
||||
totalTokens,
|
||||
prefillTokensPerSec,
|
||||
generationTokensPerSec,
|
||||
combinedTokensPerSec,
|
||||
totalDurationMs,
|
||||
timeToFirstTokenMs,
|
||||
turns: turnMetrics,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Format metrics for TUI display
|
||||
*/
|
||||
export function formatMetricsForDisplay(metrics: PromptMetrics): string {
|
||||
const lines: string[] = [];
|
||||
|
||||
// Header with provider/model
|
||||
lines.push(`📊 Performance: ${metrics.provider}/${metrics.model}`);
|
||||
|
||||
if (metrics.turnCount === 0) {
|
||||
lines.push(" No turns recorded");
|
||||
return lines.join("\n");
|
||||
}
|
||||
|
||||
// Format duration display
|
||||
const durationSec = metrics.totalDurationMs / 1000;
|
||||
const durationDisplay = durationSec >= 60
|
||||
? `${(durationSec / 60).toFixed(1)}m`
|
||||
: `${durationSec.toFixed(1)}s`;
|
||||
|
||||
// Prefill metrics (only when TTFT was available)
|
||||
if (metrics.prefillTokensPerSec > 0) {
|
||||
lines.push(
|
||||
` Prefill: ${metrics.inputTokens.toLocaleString()} tokens @ ${metrics.prefillTokensPerSec.toFixed(1)} tok/s`
|
||||
);
|
||||
}
|
||||
|
||||
// Generation metrics (only when TTFT was available)
|
||||
if (metrics.generationTokensPerSec > 0) {
|
||||
lines.push(
|
||||
` Generation: ${metrics.outputTokens.toLocaleString()} tokens @ ${metrics.generationTokensPerSec.toFixed(1)} tok/s`
|
||||
);
|
||||
}
|
||||
|
||||
// Combined metrics
|
||||
lines.push(
|
||||
` Combined: ${metrics.totalTokens.toLocaleString()} tokens @ ${metrics.combinedTokensPerSec.toFixed(1)} tok/s (${durationDisplay} total)`
|
||||
);
|
||||
|
||||
// Time to first token
|
||||
if (metrics.timeToFirstTokenMs !== undefined) {
|
||||
lines.push(` TTFT: ${metrics.timeToFirstTokenMs.toFixed(0)}ms`);
|
||||
}
|
||||
|
||||
// Turn count
|
||||
if (metrics.turnCount > 1) {
|
||||
lines.push(` Turns: ${metrics.turnCount}`);
|
||||
}
|
||||
|
||||
return lines.join("\n");
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert PromptMetrics to JSONL log entry
|
||||
*/
|
||||
export function toLogEntry(metrics: PromptMetrics): MetricLogEntry {
|
||||
return {
|
||||
timestamp: new Date().toISOString(),
|
||||
provider: metrics.provider,
|
||||
model: metrics.model,
|
||||
turnCount: metrics.turnCount,
|
||||
inputTokens: metrics.inputTokens,
|
||||
outputTokens: metrics.outputTokens,
|
||||
totalTokens: metrics.totalTokens,
|
||||
prefillTokensPerSec: Math.round(metrics.prefillTokensPerSec * 100) / 100,
|
||||
generationTokensPerSec: Math.round(metrics.generationTokensPerSec * 100) / 100,
|
||||
combinedTokensPerSec: Math.round(metrics.combinedTokensPerSec * 100) / 100,
|
||||
totalDurationMs: metrics.totalDurationMs,
|
||||
timeToFirstTokenMs: metrics.timeToFirstTokenMs,
|
||||
};
|
||||
}
|
||||
398
packages/pi-llm-performance/src/llm-performance-metrics.test.ts
Normal file
398
packages/pi-llm-performance/src/llm-performance-metrics.test.ts
Normal file
@ -0,0 +1,398 @@
|
||||
import {
|
||||
calculateTurnMetrics,
|
||||
aggregatePromptMetrics,
|
||||
formatMetricsForDisplay,
|
||||
toLogEntry,
|
||||
type TurnMetrics,
|
||||
type PromptMetrics,
|
||||
} from "./llm-metrics-core.ts";
|
||||
import { assertEquals, assertGreaterOrEqual, assertLessOrEqual } from "jsr:@std/assert";
|
||||
|
||||
Deno.test("calculateTurnMetrics - creates turn metrics object", () => {
|
||||
const result = calculateTurnMetrics({
|
||||
turnId: "turn-1",
|
||||
inputTokens: 100,
|
||||
outputTokens: 50,
|
||||
durationMs: 2000,
|
||||
timeToFirstTokenMs: 500,
|
||||
});
|
||||
|
||||
assertEquals(result.turnId, "turn-1");
|
||||
assertEquals(result.inputTokens, 100);
|
||||
assertEquals(result.outputTokens, 50);
|
||||
assertEquals(result.durationMs, 2000);
|
||||
assertEquals(result.timeToFirstTokenMs, 500);
|
||||
});
|
||||
|
||||
Deno.test("calculateTurnMetrics - handles missing timeToFirstToken", () => {
|
||||
const result = calculateTurnMetrics({
|
||||
turnId: "turn-1",
|
||||
inputTokens: 100,
|
||||
outputTokens: 50,
|
||||
durationMs: 2000,
|
||||
});
|
||||
|
||||
assertEquals(result.timeToFirstTokenMs, undefined);
|
||||
});
|
||||
|
||||
Deno.test("aggregatePromptMetrics - aggregates single turn", () => {
|
||||
const turnMetrics: TurnMetrics[] = [
|
||||
{
|
||||
turnId: "turn-1",
|
||||
inputTokens: 1000,
|
||||
outputTokens: 200,
|
||||
durationMs: 5000,
|
||||
timeToFirstTokenMs: 800,
|
||||
},
|
||||
];
|
||||
|
||||
const result = aggregatePromptMetrics({
|
||||
provider: "anthropic",
|
||||
model: "claude-sonnet-4",
|
||||
turnMetrics,
|
||||
});
|
||||
|
||||
assertEquals(result.provider, "anthropic");
|
||||
assertEquals(result.model, "claude-sonnet-4");
|
||||
assertEquals(result.turnCount, 1);
|
||||
assertEquals(result.inputTokens, 1000);
|
||||
assertEquals(result.outputTokens, 200);
|
||||
assertEquals(result.totalTokens, 1200);
|
||||
assertEquals(result.totalDurationMs, 5000);
|
||||
assertEquals(result.timeToFirstTokenMs, 800);
|
||||
|
||||
// Tokens per second calculations
|
||||
// prefill: 1000 input tokens / 0.8s TTFT = 1250 tok/s
|
||||
assertEquals(result.prefillTokensPerSec, 1250);
|
||||
// generation: 200 output tokens / 4.2s (5s - 0.8s) = 47.62 tok/s
|
||||
assertGreaterOrEqual(result.generationTokensPerSec, 47.6);
|
||||
assertLessOrEqual(result.generationTokensPerSec, 47.7);
|
||||
// combined: 1200 total tokens / 5s = 240 tok/s
|
||||
assertEquals(result.combinedTokensPerSec, 240);
|
||||
});
|
||||
|
||||
Deno.test("aggregatePromptMetrics - aggregates multiple turns", () => {
|
||||
const turnMetrics: TurnMetrics[] = [
|
||||
{
|
||||
turnId: "turn-1",
|
||||
inputTokens: 1000,
|
||||
outputTokens: 200,
|
||||
durationMs: 3000,
|
||||
timeToFirstTokenMs: 800,
|
||||
},
|
||||
{
|
||||
turnId: "turn-2",
|
||||
inputTokens: 500,
|
||||
outputTokens: 150,
|
||||
durationMs: 2000,
|
||||
},
|
||||
{
|
||||
turnId: "turn-3",
|
||||
inputTokens: 300,
|
||||
outputTokens: 100,
|
||||
durationMs: 1500,
|
||||
},
|
||||
];
|
||||
|
||||
const result = aggregatePromptMetrics({
|
||||
provider: "openai",
|
||||
model: "gpt-4o",
|
||||
turnMetrics,
|
||||
});
|
||||
|
||||
assertEquals(result.turnCount, 3);
|
||||
assertEquals(result.inputTokens, 1800); // 1000 + 500 + 300
|
||||
assertEquals(result.outputTokens, 450); // 200 + 150 + 100
|
||||
assertEquals(result.totalTokens, 2250);
|
||||
assertEquals(result.totalDurationMs, 6500); // 3000 + 2000 + 1500
|
||||
assertEquals(result.timeToFirstTokenMs, 800); // From first turn only
|
||||
|
||||
// Tokens per second: prefill uses TTFT (0.8s), generation uses (total - TTFT) = 5.7s
|
||||
// prefill: 1800 / 0.8 = 2250 tok/s
|
||||
assertEquals(result.prefillTokensPerSec, 2250);
|
||||
// generation: 450 / 5.7 = 78.95 tok/s
|
||||
assertGreaterOrEqual(result.generationTokensPerSec, 78.9);
|
||||
assertLessOrEqual(result.generationTokensPerSec, 79.0);
|
||||
// combined: 2250 / 6.5 = 346.15 tok/s
|
||||
assertGreaterOrEqual(result.combinedTokensPerSec, 346.1);
|
||||
assertLessOrEqual(result.combinedTokensPerSec, 346.2);
|
||||
});
|
||||
|
||||
Deno.test("aggregatePromptMetrics - handles empty turn list", () => {
|
||||
const result = aggregatePromptMetrics({
|
||||
provider: "anthropic",
|
||||
model: "claude-sonnet-4",
|
||||
turnMetrics: [],
|
||||
});
|
||||
|
||||
assertEquals(result.turnCount, 0);
|
||||
assertEquals(result.inputTokens, 0);
|
||||
assertEquals(result.outputTokens, 0);
|
||||
assertEquals(result.totalTokens, 0);
|
||||
assertEquals(result.prefillTokensPerSec, 0);
|
||||
assertEquals(result.generationTokensPerSec, 0);
|
||||
assertEquals(result.combinedTokensPerSec, 0);
|
||||
assertEquals(result.totalDurationMs, 0);
|
||||
assertEquals(result.timeToFirstTokenMs, undefined);
|
||||
});
|
||||
|
||||
Deno.test("formatMetricsForDisplay - formats single turn metrics", () => {
|
||||
const metrics: PromptMetrics = {
|
||||
provider: "anthropic",
|
||||
model: "claude-sonnet-4",
|
||||
turnCount: 1,
|
||||
inputTokens: 1250,
|
||||
outputTokens: 342,
|
||||
totalTokens: 1592,
|
||||
prefillTokensPerSec: 482.1,
|
||||
generationTokensPerSec: 18.3,
|
||||
combinedTokensPerSec: 38.0,
|
||||
totalDurationMs: 21600,
|
||||
timeToFirstTokenMs: 850,
|
||||
turns: [],
|
||||
};
|
||||
|
||||
const display = formatMetricsForDisplay(metrics);
|
||||
|
||||
assertEquals(display.includes("anthropic/claude-sonnet-4"), true);
|
||||
assertEquals(display.includes("1,250 tokens"), true);
|
||||
assertEquals(display.includes("482.1 tok/s"), true);
|
||||
assertEquals(display.includes("342 tokens"), true);
|
||||
assertEquals(display.includes("18.3 tok/s"), true);
|
||||
assertEquals(display.includes("1,592 tokens"), true);
|
||||
assertEquals(display.includes("38.0 tok/s"), true);
|
||||
assertEquals(display.includes("21.6s"), true);
|
||||
assertEquals(display.includes("TTFT: 850ms"), true);
|
||||
});
|
||||
|
||||
Deno.test("formatMetricsForDisplay - formats duration as minutes when over 60s", () => {
|
||||
const metrics: PromptMetrics = {
|
||||
provider: "openai",
|
||||
model: "gpt-4o",
|
||||
turnCount: 1,
|
||||
inputTokens: 5000,
|
||||
outputTokens: 1000,
|
||||
totalTokens: 6000,
|
||||
prefillTokensPerSec: 50,
|
||||
generationTokensPerSec: 10,
|
||||
combinedTokensPerSec: 60,
|
||||
totalDurationMs: 120000, // 2 minutes
|
||||
timeToFirstTokenMs: 1500,
|
||||
turns: [],
|
||||
};
|
||||
|
||||
const display = formatMetricsForDisplay(metrics);
|
||||
|
||||
assertEquals(display.includes("2.0m"), true);
|
||||
});
|
||||
|
||||
Deno.test("formatMetricsForDisplay - omits turn count when single turn", () => {
|
||||
const metrics: PromptMetrics = {
|
||||
provider: "anthropic",
|
||||
model: "claude-sonnet-4",
|
||||
turnCount: 1,
|
||||
inputTokens: 100,
|
||||
outputTokens: 50,
|
||||
totalTokens: 150,
|
||||
prefillTokensPerSec: 20,
|
||||
generationTokensPerSec: 10,
|
||||
combinedTokensPerSec: 30,
|
||||
totalDurationMs: 5000,
|
||||
timeToFirstTokenMs: 500,
|
||||
turns: [],
|
||||
};
|
||||
|
||||
const display = formatMetricsForDisplay(metrics);
|
||||
|
||||
assertEquals(display.includes("Turns: 1"), false);
|
||||
});
|
||||
|
||||
Deno.test("formatMetricsForDisplay - omits prefill/generation when TTFT is unavailable", () => {
|
||||
const metrics: PromptMetrics = {
|
||||
provider: "openai",
|
||||
model: "gpt-4o",
|
||||
turnCount: 1,
|
||||
inputTokens: 1000,
|
||||
outputTokens: 200,
|
||||
totalTokens: 1200,
|
||||
prefillTokensPerSec: 0,
|
||||
generationTokensPerSec: 0,
|
||||
combinedTokensPerSec: 240,
|
||||
totalDurationMs: 5000,
|
||||
timeToFirstTokenMs: undefined,
|
||||
turns: [],
|
||||
};
|
||||
|
||||
const display = formatMetricsForDisplay(metrics);
|
||||
|
||||
assertEquals(display.includes("Prefill:"), false);
|
||||
assertEquals(display.includes("Generation:"), false);
|
||||
assertEquals(display.includes("1,200 tokens"), true);
|
||||
assertEquals(display.includes("240.0 tok/s"), true);
|
||||
});
|
||||
|
||||
Deno.test("formatMetricsForDisplay - shows turn count when multiple turns", () => {
|
||||
const metrics: PromptMetrics = {
|
||||
provider: "anthropic",
|
||||
model: "claude-sonnet-4",
|
||||
turnCount: 3,
|
||||
inputTokens: 100,
|
||||
outputTokens: 50,
|
||||
totalTokens: 150,
|
||||
prefillTokensPerSec: 20,
|
||||
generationTokensPerSec: 10,
|
||||
combinedTokensPerSec: 30,
|
||||
totalDurationMs: 5000,
|
||||
timeToFirstTokenMs: 500,
|
||||
turns: [],
|
||||
};
|
||||
|
||||
const display = formatMetricsForDisplay(metrics);
|
||||
|
||||
assertEquals(display.includes("Turns: 3"), true);
|
||||
});
|
||||
|
||||
Deno.test("toLogEntry - creates JSON-serializable log entry", () => {
|
||||
const metrics: PromptMetrics = {
|
||||
provider: "anthropic",
|
||||
model: "claude-sonnet-4",
|
||||
turnCount: 2,
|
||||
inputTokens: 1250,
|
||||
outputTokens: 342,
|
||||
totalTokens: 1592,
|
||||
prefillTokensPerSec: 482.12345,
|
||||
generationTokensPerSec: 18.34567,
|
||||
combinedTokensPerSec: 38.09876,
|
||||
totalDurationMs: 21600,
|
||||
timeToFirstTokenMs: 850,
|
||||
turns: [],
|
||||
};
|
||||
|
||||
const logEntry = toLogEntry(metrics);
|
||||
|
||||
assertEquals(logEntry.provider, "anthropic");
|
||||
assertEquals(logEntry.model, "claude-sonnet-4");
|
||||
assertEquals(logEntry.turnCount, 2);
|
||||
assertEquals(logEntry.inputTokens, 1250);
|
||||
assertEquals(logEntry.outputTokens, 342);
|
||||
assertEquals(logEntry.totalTokens, 1592);
|
||||
// Rounded to 2 decimal places
|
||||
assertEquals(logEntry.prefillTokensPerSec, 482.12);
|
||||
assertEquals(logEntry.generationTokensPerSec, 18.35);
|
||||
assertEquals(logEntry.combinedTokensPerSec, 38.1);
|
||||
assertEquals(logEntry.totalDurationMs, 21600);
|
||||
assertEquals(logEntry.timeToFirstTokenMs, 850);
|
||||
|
||||
// Should have ISO timestamp
|
||||
assertEquals(logEntry.timestamp.includes("T"), true);
|
||||
assertEquals(logEntry.timestamp.includes("Z"), true);
|
||||
|
||||
// Should be JSON serializable
|
||||
const json = JSON.stringify(logEntry);
|
||||
assertEquals(json.length > 0, true);
|
||||
const parsed = JSON.parse(json);
|
||||
assertEquals(parsed.provider, "anthropic");
|
||||
});
|
||||
|
||||
Deno.test("aggregatePromptMetrics - uses full duration when TTFT is undefined", () => {
|
||||
const turnMetrics: TurnMetrics[] = [
|
||||
{
|
||||
turnId: "turn-1",
|
||||
inputTokens: 1000,
|
||||
outputTokens: 200,
|
||||
durationMs: 5000,
|
||||
// No timeToFirstTokenMs
|
||||
},
|
||||
];
|
||||
|
||||
const result = aggregatePromptMetrics({
|
||||
provider: "openai",
|
||||
model: "gpt-4o",
|
||||
turnMetrics,
|
||||
});
|
||||
|
||||
assertEquals(result.turnCount, 1);
|
||||
assertEquals(result.inputTokens, 1000);
|
||||
assertEquals(result.outputTokens, 200);
|
||||
// Without TTFT, prefill and generation rates are 0 (cannot separate phases)
|
||||
// Only combined rate is meaningful
|
||||
assertEquals(result.prefillTokensPerSec, 0);
|
||||
assertEquals(result.generationTokensPerSec, 0);
|
||||
assertEquals(result.combinedTokensPerSec, 240);
|
||||
});
|
||||
|
||||
Deno.test("toLogEntry - handles missing timeToFirstToken", () => {
|
||||
const metrics: PromptMetrics = {
|
||||
provider: "anthropic",
|
||||
model: "claude-sonnet-4",
|
||||
turnCount: 1,
|
||||
inputTokens: 100,
|
||||
outputTokens: 50,
|
||||
totalTokens: 150,
|
||||
prefillTokensPerSec: 20,
|
||||
generationTokensPerSec: 10,
|
||||
combinedTokensPerSec: 30,
|
||||
totalDurationMs: 5000,
|
||||
timeToFirstTokenMs: undefined,
|
||||
turns: [],
|
||||
};
|
||||
|
||||
const logEntry = toLogEntry(metrics);
|
||||
|
||||
assertEquals(logEntry.timeToFirstTokenMs, undefined);
|
||||
});
|
||||
|
||||
Deno.test("Integration - full flow from turns to log entry", () => {
|
||||
// Simulate a real scenario with multiple turns
|
||||
const turn1 = calculateTurnMetrics({
|
||||
turnId: "turn-1",
|
||||
inputTokens: 2000,
|
||||
outputTokens: 500,
|
||||
durationMs: 8000,
|
||||
timeToFirstTokenMs: 1200,
|
||||
});
|
||||
|
||||
const turn2 = calculateTurnMetrics({
|
||||
turnId: "turn-2",
|
||||
inputTokens: 800,
|
||||
outputTokens: 200,
|
||||
durationMs: 3000,
|
||||
});
|
||||
|
||||
const promptMetrics = aggregatePromptMetrics({
|
||||
provider: "groq",
|
||||
model: "llama-3.1-70b",
|
||||
turnMetrics: [turn1, turn2],
|
||||
});
|
||||
|
||||
const display = formatMetricsForDisplay(promptMetrics);
|
||||
const logEntry = toLogEntry(promptMetrics);
|
||||
|
||||
// Verify aggregation
|
||||
assertEquals(promptMetrics.turnCount, 2);
|
||||
assertEquals(promptMetrics.inputTokens, 2800);
|
||||
assertEquals(promptMetrics.outputTokens, 700);
|
||||
assertEquals(promptMetrics.totalTokens, 3500);
|
||||
assertEquals(promptMetrics.totalDurationMs, 11000);
|
||||
assertEquals(promptMetrics.timeToFirstTokenMs, 1200);
|
||||
|
||||
// Verify corrected rate calculations
|
||||
// prefill: 2800 / 1.2 = 2333.33 tok/s
|
||||
assertGreaterOrEqual(promptMetrics.prefillTokensPerSec, 2333.3);
|
||||
assertLessOrEqual(promptMetrics.prefillTokensPerSec, 2333.4);
|
||||
// generation: 700 / 9.8 = 71.43 tok/s
|
||||
assertGreaterOrEqual(promptMetrics.generationTokensPerSec, 71.4);
|
||||
assertLessOrEqual(promptMetrics.generationTokensPerSec, 71.5);
|
||||
// combined: 3500 / 11 = 318.18 tok/s
|
||||
assertGreaterOrEqual(promptMetrics.combinedTokensPerSec, 318.1);
|
||||
assertLessOrEqual(promptMetrics.combinedTokensPerSec, 318.2);
|
||||
|
||||
// Verify display contains key info
|
||||
assertEquals(display.includes("groq/llama-3.1-70b"), true);
|
||||
assertEquals(display.includes("TTFT: 1200ms"), true);
|
||||
|
||||
// Verify log entry
|
||||
assertEquals(logEntry.provider, "groq");
|
||||
assertEquals(logEntry.model, "llama-3.1-70b");
|
||||
assertEquals(logEntry.turnCount, 2);
|
||||
});
|
||||
100
packages/pi-llm-performance/src/llm-performance-metrics.ts
Normal file
100
packages/pi-llm-performance/src/llm-performance-metrics.ts
Normal file
@ -0,0 +1,100 @@
|
||||
// LLM Performance Metrics Extension
|
||||
// Captures and displays LLM inference performance metrics
|
||||
|
||||
import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
|
||||
import { appendFileSync, mkdirSync } from "node:fs";
|
||||
import { dirname, join } from "node:path";
|
||||
|
||||
// Re-export core functions from the shared metrics module
|
||||
import {
|
||||
calculateTurnMetrics,
|
||||
aggregatePromptMetrics,
|
||||
formatMetricsForDisplay,
|
||||
toLogEntry,
|
||||
type TurnMetrics,
|
||||
type PromptMetrics,
|
||||
type MetricLogEntry,
|
||||
} from "./llm-metrics-core.ts";
|
||||
|
||||
// ============================================================================
|
||||
// Extension Event Handlers (imperative shell)
|
||||
// ============================================================================
|
||||
|
||||
// State tracking
|
||||
let promptStartMs: number | undefined;
|
||||
let currentTurnStartMs: number | undefined;
|
||||
let currentTurnId: string | undefined;
|
||||
let turnMetrics: TurnMetrics[] = [];
|
||||
let firstTokenTimeMs: number | undefined;
|
||||
let provider: string | undefined;
|
||||
let model: string | undefined;
|
||||
|
||||
export default function (pi: ExtensionAPI) {
|
||||
const logFile = join(process.cwd(), ".pi", "llm-metrics.log");
|
||||
|
||||
pi.on("agent_start", async (_event, ctx) => {
|
||||
if (!ctx.model) return;
|
||||
promptStartMs = Date.now();
|
||||
turnMetrics = [];
|
||||
firstTokenTimeMs = undefined;
|
||||
provider = ctx.model.provider;
|
||||
model = ctx.model.id;
|
||||
});
|
||||
|
||||
pi.on("turn_start", async (event, _ctx) => {
|
||||
currentTurnStartMs = Date.now();
|
||||
currentTurnId = `turn-${event.turnIndex}`;
|
||||
});
|
||||
|
||||
pi.on("message_update", async (event, _ctx) => {
|
||||
// Capture TTFT on first token
|
||||
if (firstTokenTimeMs === undefined && event.assistantMessageEvent?.type === "text_delta") {
|
||||
firstTokenTimeMs = Date.now();
|
||||
}
|
||||
});
|
||||
|
||||
pi.on("turn_end", async (event, _ctx) => {
|
||||
if (event.message.role !== "assistant") return;
|
||||
const inputTokens = event.message.usage?.input ?? 0;
|
||||
const outputTokens = event.message.usage?.output ?? 0;
|
||||
const durationMs = currentTurnStartMs ? Date.now() - currentTurnStartMs : 0;
|
||||
const ttftMs = currentTurnId === `turn-${event.turnIndex}` && firstTokenTimeMs && currentTurnStartMs
|
||||
? firstTokenTimeMs - currentTurnStartMs
|
||||
: undefined;
|
||||
|
||||
const turnMetric = calculateTurnMetrics({
|
||||
turnId: currentTurnId!,
|
||||
inputTokens,
|
||||
outputTokens,
|
||||
durationMs,
|
||||
timeToFirstTokenMs: ttftMs,
|
||||
});
|
||||
|
||||
turnMetrics.push(turnMetric);
|
||||
});
|
||||
|
||||
pi.on("agent_end", async (_event, ctx) => {
|
||||
if (!provider || !model || promptStartMs === undefined) return;
|
||||
|
||||
const promptMetrics = aggregatePromptMetrics({
|
||||
provider,
|
||||
model,
|
||||
turnMetrics,
|
||||
});
|
||||
|
||||
// Display in TUI
|
||||
const display = formatMetricsForDisplay(promptMetrics);
|
||||
ctx.ui.notify(display, "info");
|
||||
ctx.ui.setStatus("metrics", `📊 ${promptMetrics.combinedTokensPerSec.toFixed(1)} tok/s`);
|
||||
|
||||
// Log to JSONL file
|
||||
const logEntry = toLogEntry(promptMetrics);
|
||||
mkdirSync(dirname(logFile), { recursive: true });
|
||||
appendFileSync(logFile, JSON.stringify(logEntry) + "\n", "utf8");
|
||||
|
||||
// Reset state
|
||||
promptStartMs = undefined;
|
||||
turnMetrics = [];
|
||||
firstTokenTimeMs = undefined;
|
||||
});
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user