#!/usr/bin/env bash set -euo pipefail # ─── score.sh ──────────────────────────────────────────────────────────────── # Score a single diagram output against 6 binary evals. # Usage: ./scripts/score.sh # Prints a JSON line with pass/fail for each eval and total score. # ───────────────────────────────────────────────────────────────────────────── SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" OUTPUT_FILE="$1" if [[ ! -f "$OUTPUT_FILE" ]]; then echo '{"error": "file not found", "score": 0}' exit 0 fi CONTENT=$(cat "$OUTPUT_FILE") CHAR_COUNT=${#CONTENT} # ─── Eval 1: has_diagram ───────────────────────────────────────────────────── # Output contains a mermaid fenced block with sequenceDiagram has_diagram=0 if echo "$CONTENT" | grep -q '```mermaid' && echo "$CONTENT" | grep -q 'sequenceDiagram'; then has_diagram=1 fi # ─── Eval 2: diagram_parseable ─────────────────────────────────────────────── # Extract the mermaid block and check basic syntax diagram_parseable=0 if (( has_diagram == 1 )); then # Extract mermaid block MERMAID_BLOCK=$(echo "$CONTENT" | awk '/^```mermaid/{found=1;next} found && /^```$/{exit} found{print}') if [[ -n "$MERMAID_BLOCK" ]]; then # Basic syntax checks: # - Has "sequenceDiagram" keyword # - Has at least one "participant" line # - Has at least one "->>", "-->>", or "->>" message line has_keyword=$(echo "$MERMAID_BLOCK" | grep -c 'sequenceDiagram' || true) has_participant=$(echo "$MERMAID_BLOCK" | grep -c 'participant' || true) has_message=$(echo "$MERMAID_BLOCK" | grep -cE '\->>|-->>|\->' || true) if (( has_keyword > 0 && has_participant > 0 && has_message > 0 )); then diagram_parseable=1 fi fi # If mmdc (mermaid CLI) is available, use it for real validation if command -v mmdc &> /dev/null && (( diagram_parseable == 1 )); then TMPFILE=$(mktemp /tmp/mermaid_XXXXXX.mmd) echo "$MERMAID_BLOCK" > "$TMPFILE" if mmdc -i "$TMPFILE" -o /dev/null 2>/dev/null; then diagram_parseable=1 else diagram_parseable=0 fi rm -f "$TMPFILE" fi fi # ─── Eval 3: uses_real_modules ─────────────────────────────────────────────── # Diagram mentions at least 2 real modules from the Firehose codebase uses_real_modules=0 module_count=0 for module in BlogController EngineeringBlog ReleaseNotes Blogex Router PageController Layouts; do if echo "$CONTENT" | grep -qi "$module"; then module_count=$((module_count + 1)) fi done if (( module_count >= 2 )); then uses_real_modules=1 fi # ─── Eval 4: uses_real_functions ───────────────────────────────────────────── # Diagram mentions at least 1 real function from the codebase uses_real_functions=0 for func in posts_by_tag get_post all_posts paginate resolve_blog render recent_posts; do if echo "$CONTENT" | grep -qi "$func"; then uses_real_functions=1 break fi done # ─── Eval 5: no_sidetracking ──────────────────────────────────────────────── # Output does NOT contain code review / critique language no_sidetracking=1 BLOCKLIST="${SCRIPT_DIR}/sidetrack_blocklist.txt" if [[ -f "$BLOCKLIST" ]]; then while IFS= read -r phrase; do phrase=$(echo "$phrase" | xargs) # trim whitespace if [[ -n "$phrase" ]] && echo "$CONTENT" | grep -qi "$phrase"; then no_sidetracking=0 break fi done < "$BLOCKLIST" fi # ─── Eval 6: concise ──────────────────────────────────────────────────────── # Total output under 3000 characters concise=0 if (( CHAR_COUNT < 3000 )); then concise=1 fi # ─── Total ─────────────────────────────────────────────────────────────────── score=$((has_diagram + diagram_parseable + uses_real_modules + uses_real_functions + no_sidetracking + concise)) echo "{\"score\":${score},\"has_diagram\":${has_diagram},\"diagram_parseable\":${diagram_parseable},\"uses_real_modules\":${uses_real_modules},\"uses_real_functions\":${uses_real_functions},\"no_sidetracking\":${no_sidetracking},\"concise\":${concise},\"char_count\":${CHAR_COUNT}}"