110 lines
5.1 KiB
Bash
Executable File

#!/usr/bin/env bash
set -euo pipefail
# ─── score.sh ────────────────────────────────────────────────────────────────
# Score a single diagram output against 6 binary evals.
# Usage: ./scripts/score.sh <output_file>
# Prints a JSON line with pass/fail for each eval and total score.
# ─────────────────────────────────────────────────────────────────────────────
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
OUTPUT_FILE="$1"
if [[ ! -f "$OUTPUT_FILE" ]]; then
echo '{"error": "file not found", "score": 0}'
exit 0
fi
CONTENT=$(cat "$OUTPUT_FILE")
CHAR_COUNT=${#CONTENT}
# ─── Eval 1: has_diagram ─────────────────────────────────────────────────────
# Output contains a mermaid fenced block with sequenceDiagram
has_diagram=0
if echo "$CONTENT" | grep -q '```mermaid' && echo "$CONTENT" | grep -q 'sequenceDiagram'; then
has_diagram=1
fi
# ─── Eval 2: diagram_parseable ───────────────────────────────────────────────
# Extract the mermaid block and check basic syntax
diagram_parseable=0
if (( has_diagram == 1 )); then
# Extract mermaid block
MERMAID_BLOCK=$(echo "$CONTENT" | sed -n '/```mermaid/,/```/p' | sed '1d;$d')
if [[ -n "$MERMAID_BLOCK" ]]; then
# Basic syntax checks:
# - Has "sequenceDiagram" keyword
# - Has at least one "participant" line
# - Has at least one "->>", "-->>", or "->>" message line
has_keyword=$(echo "$MERMAID_BLOCK" | grep -c 'sequenceDiagram' || true)
has_participant=$(echo "$MERMAID_BLOCK" | grep -c 'participant' || true)
has_message=$(echo "$MERMAID_BLOCK" | grep -cE '\->>|-->>|\->' || true)
if (( has_keyword > 0 && has_participant > 0 && has_message > 0 )); then
diagram_parseable=1
fi
fi
# If mmdc (mermaid CLI) is available, use it for real validation
if command -v mmdc &> /dev/null && (( diagram_parseable == 1 )); then
TMPFILE=$(mktemp /tmp/mermaid_XXXXXX.mmd)
echo "$MERMAID_BLOCK" > "$TMPFILE"
if mmdc -i "$TMPFILE" -o /dev/null 2>/dev/null; then
diagram_parseable=1
else
diagram_parseable=0
fi
rm -f "$TMPFILE"
fi
fi
# ─── Eval 3: uses_real_modules ───────────────────────────────────────────────
# Diagram mentions at least 2 real modules from the Firehose codebase
uses_real_modules=0
module_count=0
for module in BlogController EngineeringBlog ReleaseNotes Blogex Router PageController Layouts; do
if echo "$CONTENT" | grep -qi "$module"; then
module_count=$((module_count + 1))
fi
done
if (( module_count >= 2 )); then
uses_real_modules=1
fi
# ─── Eval 4: uses_real_functions ─────────────────────────────────────────────
# Diagram mentions at least 1 real function from the codebase
uses_real_functions=0
for func in posts_by_tag get_post all_posts paginate resolve_blog render recent_posts; do
if echo "$CONTENT" | grep -qi "$func"; then
uses_real_functions=1
break
fi
done
# ─── Eval 5: no_sidetracking ────────────────────────────────────────────────
# Output does NOT contain code review / critique language
no_sidetracking=1
BLOCKLIST="${SCRIPT_DIR}/sidetrack_blocklist.txt"
if [[ -f "$BLOCKLIST" ]]; then
while IFS= read -r phrase; do
phrase=$(echo "$phrase" | xargs) # trim whitespace
if [[ -n "$phrase" ]] && echo "$CONTENT" | grep -qi "$phrase"; then
no_sidetracking=0
break
fi
done < "$BLOCKLIST"
fi
# ─── Eval 6: concise ────────────────────────────────────────────────────────
# Total output under 3000 characters
concise=0
if (( CHAR_COUNT < 3000 )); then
concise=1
fi
# ─── Total ───────────────────────────────────────────────────────────────────
score=$((has_diagram + diagram_parseable + uses_real_modules + uses_real_functions + no_sidetracking + concise))
echo "{\"score\":${score},\"has_diagram\":${has_diagram},\"diagram_parseable\":${diagram_parseable},\"uses_real_modules\":${uses_real_modules},\"uses_real_functions\":${uses_real_functions},\"no_sidetracking\":${no_sidetracking},\"concise\":${concise},\"char_count\":${CHAR_COUNT}}"