110 lines
5.1 KiB
Bash
Executable File
110 lines
5.1 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
# ─── score.sh ────────────────────────────────────────────────────────────────
|
|
# Score a single diagram output against 6 binary evals.
|
|
# Usage: ./scripts/score.sh <output_file>
|
|
# Prints a JSON line with pass/fail for each eval and total score.
|
|
# ─────────────────────────────────────────────────────────────────────────────
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
OUTPUT_FILE="$1"
|
|
|
|
if [[ ! -f "$OUTPUT_FILE" ]]; then
|
|
echo '{"error": "file not found", "score": 0}'
|
|
exit 0
|
|
fi
|
|
|
|
CONTENT=$(cat "$OUTPUT_FILE")
|
|
CHAR_COUNT=${#CONTENT}
|
|
|
|
# ─── Eval 1: has_diagram ─────────────────────────────────────────────────────
|
|
# Output contains a mermaid fenced block with sequenceDiagram
|
|
has_diagram=0
|
|
if echo "$CONTENT" | grep -q '```mermaid' && echo "$CONTENT" | grep -q 'sequenceDiagram'; then
|
|
has_diagram=1
|
|
fi
|
|
|
|
# ─── Eval 2: diagram_parseable ───────────────────────────────────────────────
|
|
# Extract the mermaid block and check basic syntax
|
|
diagram_parseable=0
|
|
if (( has_diagram == 1 )); then
|
|
# Extract mermaid block
|
|
MERMAID_BLOCK=$(echo "$CONTENT" | awk '/^```mermaid/{found=1;next} found && /^```$/{exit} found{print}')
|
|
|
|
if [[ -n "$MERMAID_BLOCK" ]]; then
|
|
# Basic syntax checks:
|
|
# - Has "sequenceDiagram" keyword
|
|
# - Has at least one "participant" line
|
|
# - Has at least one "->>", "-->>", or "->>" message line
|
|
has_keyword=$(echo "$MERMAID_BLOCK" | grep -c 'sequenceDiagram' || true)
|
|
has_participant=$(echo "$MERMAID_BLOCK" | grep -c 'participant' || true)
|
|
has_message=$(echo "$MERMAID_BLOCK" | grep -cE '\->>|-->>|\->' || true)
|
|
|
|
if (( has_keyword > 0 && has_participant > 0 && has_message > 0 )); then
|
|
diagram_parseable=1
|
|
fi
|
|
fi
|
|
|
|
# If mmdc (mermaid CLI) is available, use it for real validation
|
|
if command -v mmdc &> /dev/null && (( diagram_parseable == 1 )); then
|
|
TMPFILE=$(mktemp /tmp/mermaid_XXXXXX.mmd)
|
|
echo "$MERMAID_BLOCK" > "$TMPFILE"
|
|
if mmdc -i "$TMPFILE" -o /dev/null 2>/dev/null; then
|
|
diagram_parseable=1
|
|
else
|
|
diagram_parseable=0
|
|
fi
|
|
rm -f "$TMPFILE"
|
|
fi
|
|
fi
|
|
|
|
# ─── Eval 3: uses_real_modules ───────────────────────────────────────────────
|
|
# Diagram mentions at least 2 real modules from the Firehose codebase
|
|
uses_real_modules=0
|
|
module_count=0
|
|
for module in BlogController EngineeringBlog ReleaseNotes Blogex Router PageController Layouts; do
|
|
if echo "$CONTENT" | grep -qi "$module"; then
|
|
module_count=$((module_count + 1))
|
|
fi
|
|
done
|
|
if (( module_count >= 2 )); then
|
|
uses_real_modules=1
|
|
fi
|
|
|
|
# ─── Eval 4: uses_real_functions ─────────────────────────────────────────────
|
|
# Diagram mentions at least 1 real function from the codebase
|
|
uses_real_functions=0
|
|
for func in posts_by_tag get_post all_posts paginate resolve_blog render recent_posts; do
|
|
if echo "$CONTENT" | grep -qi "$func"; then
|
|
uses_real_functions=1
|
|
break
|
|
fi
|
|
done
|
|
|
|
# ─── Eval 5: no_sidetracking ────────────────────────────────────────────────
|
|
# Output does NOT contain code review / critique language
|
|
no_sidetracking=1
|
|
BLOCKLIST="${SCRIPT_DIR}/sidetrack_blocklist.txt"
|
|
if [[ -f "$BLOCKLIST" ]]; then
|
|
while IFS= read -r phrase; do
|
|
phrase=$(echo "$phrase" | xargs) # trim whitespace
|
|
if [[ -n "$phrase" ]] && echo "$CONTENT" | grep -qi "$phrase"; then
|
|
no_sidetracking=0
|
|
break
|
|
fi
|
|
done < "$BLOCKLIST"
|
|
fi
|
|
|
|
# ─── Eval 6: concise ────────────────────────────────────────────────────────
|
|
# Total output under 3000 characters
|
|
concise=0
|
|
if (( CHAR_COUNT < 3000 )); then
|
|
concise=1
|
|
fi
|
|
|
|
# ─── Total ───────────────────────────────────────────────────────────────────
|
|
score=$((has_diagram + diagram_parseable + uses_real_modules + uses_real_functions + no_sidetracking + concise))
|
|
|
|
echo "{\"score\":${score},\"has_diagram\":${has_diagram},\"diagram_parseable\":${diagram_parseable},\"uses_real_modules\":${uses_real_modules},\"uses_real_functions\":${uses_real_functions},\"no_sidetracking\":${no_sidetracking},\"concise\":${concise},\"char_count\":${CHAR_COUNT}}"
|