// Functional core for LLM performance metrics calculation export interface TurnMetrics { turnId: string; inputTokens: number; outputTokens: number; durationMs: number; timeToFirstTokenMs?: number; } export interface PromptMetrics { provider: string; model: string; turnCount: number; inputTokens: number; outputTokens: number; totalTokens: number; prefillTokensPerSec: number; generationTokensPerSec: number; combinedTokensPerSec: number; totalDurationMs: number; timeToFirstTokenMs?: number; turns: TurnMetrics[]; } export interface MetricLogEntry { timestamp: string; provider: string; model: string; turnCount: number; inputTokens: number; outputTokens: number; totalTokens: number; prefillTokensPerSec: number; generationTokensPerSec: number; combinedTokensPerSec: number; totalDurationMs: number; timeToFirstTokenMs?: number; } /** * Calculate metrics for a single turn */ export function calculateTurnMetrics(params: { turnId: string; inputTokens: number; outputTokens: number; durationMs: number; timeToFirstTokenMs?: number; }): TurnMetrics { return { turnId: params.turnId, inputTokens: params.inputTokens, outputTokens: params.outputTokens, durationMs: params.durationMs, timeToFirstTokenMs: params.timeToFirstTokenMs, }; } /** * Aggregate multiple turn metrics into prompt-level metrics */ export function aggregatePromptMetrics(params: { provider: string; model: string; turnMetrics: TurnMetrics[]; }): PromptMetrics { const { provider, model, turnMetrics } = params; if (turnMetrics.length === 0) { return { provider, model, turnCount: 0, inputTokens: 0, outputTokens: 0, totalTokens: 0, prefillTokensPerSec: 0, generationTokensPerSec: 0, combinedTokensPerSec: 0, totalDurationMs: 0, turns: [], }; } // Sum tokens across all turns const inputTokens = turnMetrics.reduce((sum, t) => sum + t.inputTokens, 0); const outputTokens = turnMetrics.reduce((sum, t) => sum + t.outputTokens, 0); const totalTokens = inputTokens + outputTokens; // Sum duration across all turns const totalDurationMs = turnMetrics.reduce((sum, t) => sum + t.durationMs, 0); const totalDurationSec = totalDurationMs / 1000; // Time to first token is from the first turn const timeToFirstTokenMs = turnMetrics[0]?.timeToFirstTokenMs; // Calculate tokens per second // Prefill: input tokens / TTFT duration (prefill phase) // Generation: output tokens / (totalDuration - TTFT) (generation phase) // Combined: total tokens / total duration // When TTFT is unavailable, prefill and generation phases cannot be separated, // so we set them to 0 and only report combined. const ttftSec = timeToFirstTokenMs !== undefined ? timeToFirstTokenMs / 1000 : undefined; const generationDurationSec = timeToFirstTokenMs !== undefined ? (totalDurationMs - timeToFirstTokenMs) / 1000 : undefined; const prefillTokensPerSec = (ttftSec && ttftSec > 0) ? inputTokens / ttftSec : 0; const generationTokensPerSec = (generationDurationSec !== undefined && generationDurationSec > 0) ? outputTokens / generationDurationSec : 0; const combinedTokensPerSec = totalDurationSec > 0 ? totalTokens / totalDurationSec : 0; return { provider, model, turnCount: turnMetrics.length, inputTokens, outputTokens, totalTokens, prefillTokensPerSec, generationTokensPerSec, combinedTokensPerSec, totalDurationMs, timeToFirstTokenMs, turns: turnMetrics, }; } /** * Format metrics for TUI display */ export function formatMetricsForDisplay(metrics: PromptMetrics): string { const lines: string[] = []; // Header with provider/model lines.push(`📊 Performance: ${metrics.provider}/${metrics.model}`); if (metrics.turnCount === 0) { lines.push(" No turns recorded"); return lines.join("\n"); } // Format duration display const durationSec = metrics.totalDurationMs / 1000; const durationDisplay = durationSec >= 60 ? `${(durationSec / 60).toFixed(1)}m` : `${durationSec.toFixed(1)}s`; // Prefill metrics (only when TTFT was available) if (metrics.prefillTokensPerSec > 0) { lines.push( ` Prefill: ${metrics.inputTokens.toLocaleString()} tokens @ ${metrics.prefillTokensPerSec.toFixed(1)} tok/s` ); } // Generation metrics (only when TTFT was available) if (metrics.generationTokensPerSec > 0) { lines.push( ` Generation: ${metrics.outputTokens.toLocaleString()} tokens @ ${metrics.generationTokensPerSec.toFixed(1)} tok/s` ); } // Combined metrics lines.push( ` Combined: ${metrics.totalTokens.toLocaleString()} tokens @ ${metrics.combinedTokensPerSec.toFixed(1)} tok/s (${durationDisplay} total)` ); // Time to first token if (metrics.timeToFirstTokenMs !== undefined) { lines.push(` TTFT: ${metrics.timeToFirstTokenMs.toFixed(0)}ms`); } // Turn count if (metrics.turnCount > 1) { lines.push(` Turns: ${metrics.turnCount}`); } return lines.join("\n"); } /** * Convert PromptMetrics to JSONL log entry */ export function toLogEntry(metrics: PromptMetrics): MetricLogEntry { return { timestamp: new Date().toISOString(), provider: metrics.provider, model: metrics.model, turnCount: metrics.turnCount, inputTokens: metrics.inputTokens, outputTokens: metrics.outputTokens, totalTokens: metrics.totalTokens, prefillTokensPerSec: Math.round(metrics.prefillTokensPerSec * 100) / 100, generationTokensPerSec: Math.round(metrics.generationTokensPerSec * 100) / 100, combinedTokensPerSec: Math.round(metrics.combinedTokensPerSec * 100) / 100, totalDurationMs: metrics.totalDurationMs, timeToFirstTokenMs: metrics.timeToFirstTokenMs, }; }