201 lines
5.7 KiB
TypeScript

// Functional core for LLM performance metrics calculation
export interface TurnMetrics {
turnId: string;
inputTokens: number;
outputTokens: number;
durationMs: number;
timeToFirstTokenMs?: number;
}
export interface PromptMetrics {
provider: string;
model: string;
turnCount: number;
inputTokens: number;
outputTokens: number;
totalTokens: number;
prefillTokensPerSec: number;
generationTokensPerSec: number;
combinedTokensPerSec: number;
totalDurationMs: number;
timeToFirstTokenMs?: number;
turns: TurnMetrics[];
}
export interface MetricLogEntry {
timestamp: string;
provider: string;
model: string;
turnCount: number;
inputTokens: number;
outputTokens: number;
totalTokens: number;
prefillTokensPerSec: number;
generationTokensPerSec: number;
combinedTokensPerSec: number;
totalDurationMs: number;
timeToFirstTokenMs?: number;
}
/**
* Calculate metrics for a single turn
*/
export function calculateTurnMetrics(params: {
turnId: string;
inputTokens: number;
outputTokens: number;
durationMs: number;
timeToFirstTokenMs?: number;
}): TurnMetrics {
return {
turnId: params.turnId,
inputTokens: params.inputTokens,
outputTokens: params.outputTokens,
durationMs: params.durationMs,
timeToFirstTokenMs: params.timeToFirstTokenMs,
};
}
/**
* Aggregate multiple turn metrics into prompt-level metrics
*/
export function aggregatePromptMetrics(params: {
provider: string;
model: string;
turnMetrics: TurnMetrics[];
}): PromptMetrics {
const { provider, model, turnMetrics } = params;
if (turnMetrics.length === 0) {
return {
provider,
model,
turnCount: 0,
inputTokens: 0,
outputTokens: 0,
totalTokens: 0,
prefillTokensPerSec: 0,
generationTokensPerSec: 0,
combinedTokensPerSec: 0,
totalDurationMs: 0,
turns: [],
};
}
// Sum tokens across all turns
const inputTokens = turnMetrics.reduce((sum, t) => sum + t.inputTokens, 0);
const outputTokens = turnMetrics.reduce((sum, t) => sum + t.outputTokens, 0);
const totalTokens = inputTokens + outputTokens;
// Sum duration across all turns
const totalDurationMs = turnMetrics.reduce((sum, t) => sum + t.durationMs, 0);
const totalDurationSec = totalDurationMs / 1000;
// Time to first token is from the first turn
const timeToFirstTokenMs = turnMetrics[0]?.timeToFirstTokenMs;
// Calculate tokens per second
// Prefill: input tokens / TTFT duration (prefill phase)
// Generation: output tokens / (totalDuration - TTFT) (generation phase)
// Combined: total tokens / total duration
// When TTFT is unavailable, prefill and generation phases cannot be separated,
// so we set them to 0 and only report combined.
const ttftSec = timeToFirstTokenMs !== undefined ? timeToFirstTokenMs / 1000 : undefined;
const generationDurationSec = timeToFirstTokenMs !== undefined
? (totalDurationMs - timeToFirstTokenMs) / 1000
: undefined;
const prefillTokensPerSec = (ttftSec && ttftSec > 0) ? inputTokens / ttftSec : 0;
const generationTokensPerSec = (generationDurationSec !== undefined && generationDurationSec > 0)
? outputTokens / generationDurationSec
: 0;
const combinedTokensPerSec = totalDurationSec > 0 ? totalTokens / totalDurationSec : 0;
return {
provider,
model,
turnCount: turnMetrics.length,
inputTokens,
outputTokens,
totalTokens,
prefillTokensPerSec,
generationTokensPerSec,
combinedTokensPerSec,
totalDurationMs,
timeToFirstTokenMs,
turns: turnMetrics,
};
}
/**
* Format metrics for TUI display
*/
export function formatMetricsForDisplay(metrics: PromptMetrics): string {
const lines: string[] = [];
// Header with provider/model
lines.push(`📊 Performance: ${metrics.provider}/${metrics.model}`);
if (metrics.turnCount === 0) {
lines.push(" No turns recorded");
return lines.join("\n");
}
// Format duration display
const durationSec = metrics.totalDurationMs / 1000;
const durationDisplay = durationSec >= 60
? `${(durationSec / 60).toFixed(1)}m`
: `${durationSec.toFixed(1)}s`;
// Prefill metrics (only when TTFT was available)
if (metrics.prefillTokensPerSec > 0) {
lines.push(
` Prefill: ${metrics.inputTokens.toLocaleString()} tokens @ ${metrics.prefillTokensPerSec.toFixed(1)} tok/s`
);
}
// Generation metrics (only when TTFT was available)
if (metrics.generationTokensPerSec > 0) {
lines.push(
` Generation: ${metrics.outputTokens.toLocaleString()} tokens @ ${metrics.generationTokensPerSec.toFixed(1)} tok/s`
);
}
// Combined metrics
lines.push(
` Combined: ${metrics.totalTokens.toLocaleString()} tokens @ ${metrics.combinedTokensPerSec.toFixed(1)} tok/s (${durationDisplay} total)`
);
// Time to first token
if (metrics.timeToFirstTokenMs !== undefined) {
lines.push(` TTFT: ${metrics.timeToFirstTokenMs.toFixed(0)}ms`);
}
// Turn count
if (metrics.turnCount > 1) {
lines.push(` Turns: ${metrics.turnCount}`);
}
return lines.join("\n");
}
/**
* Convert PromptMetrics to JSONL log entry
*/
export function toLogEntry(metrics: PromptMetrics): MetricLogEntry {
return {
timestamp: new Date().toISOString(),
provider: metrics.provider,
model: metrics.model,
turnCount: metrics.turnCount,
inputTokens: metrics.inputTokens,
outputTokens: metrics.outputTokens,
totalTokens: metrics.totalTokens,
prefillTokensPerSec: Math.round(metrics.prefillTokensPerSec * 100) / 100,
generationTokensPerSec: Math.round(metrics.generationTokensPerSec * 100) / 100,
combinedTokensPerSec: Math.round(metrics.combinedTokensPerSec * 100) / 100,
totalDurationMs: metrics.totalDurationMs,
timeToFirstTokenMs: metrics.timeToFirstTokenMs,
};
}