201 lines
5.7 KiB
TypeScript
201 lines
5.7 KiB
TypeScript
// Functional core for LLM performance metrics calculation
|
|
|
|
export interface TurnMetrics {
|
|
turnId: string;
|
|
inputTokens: number;
|
|
outputTokens: number;
|
|
durationMs: number;
|
|
timeToFirstTokenMs?: number;
|
|
}
|
|
|
|
export interface PromptMetrics {
|
|
provider: string;
|
|
model: string;
|
|
turnCount: number;
|
|
inputTokens: number;
|
|
outputTokens: number;
|
|
totalTokens: number;
|
|
prefillTokensPerSec: number;
|
|
generationTokensPerSec: number;
|
|
combinedTokensPerSec: number;
|
|
totalDurationMs: number;
|
|
timeToFirstTokenMs?: number;
|
|
turns: TurnMetrics[];
|
|
}
|
|
|
|
export interface MetricLogEntry {
|
|
timestamp: string;
|
|
provider: string;
|
|
model: string;
|
|
turnCount: number;
|
|
inputTokens: number;
|
|
outputTokens: number;
|
|
totalTokens: number;
|
|
prefillTokensPerSec: number;
|
|
generationTokensPerSec: number;
|
|
combinedTokensPerSec: number;
|
|
totalDurationMs: number;
|
|
timeToFirstTokenMs?: number;
|
|
}
|
|
|
|
/**
|
|
* Calculate metrics for a single turn
|
|
*/
|
|
export function calculateTurnMetrics(params: {
|
|
turnId: string;
|
|
inputTokens: number;
|
|
outputTokens: number;
|
|
durationMs: number;
|
|
timeToFirstTokenMs?: number;
|
|
}): TurnMetrics {
|
|
return {
|
|
turnId: params.turnId,
|
|
inputTokens: params.inputTokens,
|
|
outputTokens: params.outputTokens,
|
|
durationMs: params.durationMs,
|
|
timeToFirstTokenMs: params.timeToFirstTokenMs,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Aggregate multiple turn metrics into prompt-level metrics
|
|
*/
|
|
export function aggregatePromptMetrics(params: {
|
|
provider: string;
|
|
model: string;
|
|
turnMetrics: TurnMetrics[];
|
|
}): PromptMetrics {
|
|
const { provider, model, turnMetrics } = params;
|
|
|
|
if (turnMetrics.length === 0) {
|
|
return {
|
|
provider,
|
|
model,
|
|
turnCount: 0,
|
|
inputTokens: 0,
|
|
outputTokens: 0,
|
|
totalTokens: 0,
|
|
prefillTokensPerSec: 0,
|
|
generationTokensPerSec: 0,
|
|
combinedTokensPerSec: 0,
|
|
totalDurationMs: 0,
|
|
turns: [],
|
|
};
|
|
}
|
|
|
|
// Sum tokens across all turns
|
|
const inputTokens = turnMetrics.reduce((sum, t) => sum + t.inputTokens, 0);
|
|
const outputTokens = turnMetrics.reduce((sum, t) => sum + t.outputTokens, 0);
|
|
const totalTokens = inputTokens + outputTokens;
|
|
|
|
// Sum duration across all turns
|
|
const totalDurationMs = turnMetrics.reduce((sum, t) => sum + t.durationMs, 0);
|
|
const totalDurationSec = totalDurationMs / 1000;
|
|
|
|
// Time to first token is from the first turn
|
|
const timeToFirstTokenMs = turnMetrics[0]?.timeToFirstTokenMs;
|
|
|
|
// Calculate tokens per second
|
|
// Prefill: input tokens / TTFT duration (prefill phase)
|
|
// Generation: output tokens / (totalDuration - TTFT) (generation phase)
|
|
// Combined: total tokens / total duration
|
|
// When TTFT is unavailable, prefill and generation phases cannot be separated,
|
|
// so we set them to 0 and only report combined.
|
|
const ttftSec = timeToFirstTokenMs !== undefined ? timeToFirstTokenMs / 1000 : undefined;
|
|
const generationDurationSec = timeToFirstTokenMs !== undefined
|
|
? (totalDurationMs - timeToFirstTokenMs) / 1000
|
|
: undefined;
|
|
|
|
const prefillTokensPerSec = (ttftSec && ttftSec > 0) ? inputTokens / ttftSec : 0;
|
|
const generationTokensPerSec = (generationDurationSec !== undefined && generationDurationSec > 0)
|
|
? outputTokens / generationDurationSec
|
|
: 0;
|
|
const combinedTokensPerSec = totalDurationSec > 0 ? totalTokens / totalDurationSec : 0;
|
|
|
|
return {
|
|
provider,
|
|
model,
|
|
turnCount: turnMetrics.length,
|
|
inputTokens,
|
|
outputTokens,
|
|
totalTokens,
|
|
prefillTokensPerSec,
|
|
generationTokensPerSec,
|
|
combinedTokensPerSec,
|
|
totalDurationMs,
|
|
timeToFirstTokenMs,
|
|
turns: turnMetrics,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Format metrics for TUI display
|
|
*/
|
|
export function formatMetricsForDisplay(metrics: PromptMetrics): string {
|
|
const lines: string[] = [];
|
|
|
|
// Header with provider/model
|
|
lines.push(`📊 Performance: ${metrics.provider}/${metrics.model}`);
|
|
|
|
if (metrics.turnCount === 0) {
|
|
lines.push(" No turns recorded");
|
|
return lines.join("\n");
|
|
}
|
|
|
|
// Format duration display
|
|
const durationSec = metrics.totalDurationMs / 1000;
|
|
const durationDisplay = durationSec >= 60
|
|
? `${(durationSec / 60).toFixed(1)}m`
|
|
: `${durationSec.toFixed(1)}s`;
|
|
|
|
// Prefill metrics (only when TTFT was available)
|
|
if (metrics.prefillTokensPerSec > 0) {
|
|
lines.push(
|
|
` Prefill: ${metrics.inputTokens.toLocaleString()} tokens @ ${metrics.prefillTokensPerSec.toFixed(1)} tok/s`
|
|
);
|
|
}
|
|
|
|
// Generation metrics (only when TTFT was available)
|
|
if (metrics.generationTokensPerSec > 0) {
|
|
lines.push(
|
|
` Generation: ${metrics.outputTokens.toLocaleString()} tokens @ ${metrics.generationTokensPerSec.toFixed(1)} tok/s`
|
|
);
|
|
}
|
|
|
|
// Combined metrics
|
|
lines.push(
|
|
` Combined: ${metrics.totalTokens.toLocaleString()} tokens @ ${metrics.combinedTokensPerSec.toFixed(1)} tok/s (${durationDisplay} total)`
|
|
);
|
|
|
|
// Time to first token
|
|
if (metrics.timeToFirstTokenMs !== undefined) {
|
|
lines.push(` TTFT: ${metrics.timeToFirstTokenMs.toFixed(0)}ms`);
|
|
}
|
|
|
|
// Turn count
|
|
if (metrics.turnCount > 1) {
|
|
lines.push(` Turns: ${metrics.turnCount}`);
|
|
}
|
|
|
|
return lines.join("\n");
|
|
}
|
|
|
|
/**
|
|
* Convert PromptMetrics to JSONL log entry
|
|
*/
|
|
export function toLogEntry(metrics: PromptMetrics): MetricLogEntry {
|
|
return {
|
|
timestamp: new Date().toISOString(),
|
|
provider: metrics.provider,
|
|
model: metrics.model,
|
|
turnCount: metrics.turnCount,
|
|
inputTokens: metrics.inputTokens,
|
|
outputTokens: metrics.outputTokens,
|
|
totalTokens: metrics.totalTokens,
|
|
prefillTokensPerSec: Math.round(metrics.prefillTokensPerSec * 100) / 100,
|
|
generationTokensPerSec: Math.round(metrics.generationTokensPerSec * 100) / 100,
|
|
combinedTokensPerSec: Math.round(metrics.combinedTokensPerSec * 100) / 100,
|
|
totalDurationMs: metrics.totalDurationMs,
|
|
timeToFirstTokenMs: metrics.timeToFirstTokenMs,
|
|
};
|
|
} |