Add compile-time link validation for blog post internal links

- Add Blogex.LinkValidator module to validate /blog/{id}/{slug} semantics
- Add Blogex.LinkError exception with actionable error messages
- Integrate validation into Blogex.Blog via @before_compile callback
- Add unit tests (34) and integration tests (4) for link validation
- Add test fixtures (valid/invalid posts) in blogex/priv/blog/test/

Closes: validate-internal-link-semantics-in-blog-post-markdown-bodies-at-compile-time-h3hb
Closes: define-link-semantic-validation-logic-in-blogex-7syv
Closes: write-tests-for-link-semantic-validation-y30h
Closes: integrate-link-validation-into-blogexblog-compile-time-macro-1205
This commit is contained in:
Firehose Bot 2026-05-07 11:56:54 +01:00
parent c535e63b70
commit ab7a520e9e
7 changed files with 679 additions and 0 deletions

View File

@ -53,6 +53,9 @@ defmodule Blogex.Blog do
|> Enum.map(&Map.put(&1, :blog, unquote(blog_id)))
|> Enum.sort_by(& &1.date, {:desc, Date})
# Validate internal link semantics at compile time via before_compile
@before_compile Blogex.Blog
# Collect all unique tags
@tags @posts |> Enum.flat_map(& &1.tags) |> Enum.uniq() |> Enum.sort()
@ -135,4 +138,32 @@ defmodule Blogex.Blog do
end
end
end
@doc false
defmacro __before_compile__(env) do
blog_id = Module.get_attribute(env.module, :blog_id)
quote do
unquote(__MODULE__)._validate_links(@posts, unquote(blog_id))
end
end
@doc false
@spec _validate_links([Blogex.Post.t()], atom()) :: :ok
def _validate_links(posts, blog_id) do
Enum.each(posts, fn post ->
case Blogex.LinkValidator.validate_body(post.body, blog_id, post_id: post.id) do
:ok ->
:ok
{:error, errors} ->
raise Blogex.LinkError,
blog: blog_id,
post_id: post.id,
errors: errors
end
end)
:ok
end
end

View File

@ -0,0 +1,53 @@
defmodule Blogex.LinkError do
@moduledoc """
Exception raised when a blog post contains invalid internal links.
Raised at compile time by `Blogex.Blog` when `LinkValidator` finds
semantic errors in post body links.
## Fields
* `:blog` the blog identifier atom (e.g., `:engineering`)
* `:post_id` the post slug/id that contains the invalid link
* `:errors` list of `{line, link, reason, post_id}` tuples
## Example
raise Blogex.LinkError,
blog: :engineering,
post_id: "my-post",
errors: [
{1, "/blog/unknown/broken", "unknown blog ID: unknown", post_id: "my-post"}
]
"""
defexception blog: nil, post_id: nil, errors: []
@type t :: %__MODULE__{
blog: atom(),
post_id: String.t() | nil,
errors: [{integer(), String.t(), String.t(), keyword()}]
}
@impl true
def message(%__MODULE__{blog: blog, post_id: post_id, errors: errors}) do
post_label =
case post_id do
nil -> ""
id -> " (post: #{id})"
end
errors_list =
errors
|> Enum.map(fn {line, link, reason, _meta} ->
" line #{line}: #{link}#{reason}"
end)
|> Enum.join("\n")
"""
invalid internal blog links in #{blog}#{post_label}
#{errors_list}
"""
end
end

View File

@ -0,0 +1,249 @@
defmodule Blogex.LinkValidator do
@moduledoc """
Validates internal blog link semantics in markdown post bodies.
Checks that internal links follow the pattern `/blog/{blog_id}/{slug}`
where `blog_id` is a valid blog identifier and `slug` matches the
expected format (lowercase alphanumeric with hyphens).
This module is pure and does not depend on any stored posts it validates
link format and semantics only. Post existence checking is handled separately.
## Valid blog IDs
* `:engineering` maps to `/blog/engineering/`
* `:release_notes` maps to `/blog/releases/`
## Valid slug format
* Lowercase alphanumeric characters and hyphens only
* Must not start or end with a hyphen
* Must not contain consecutive hyphens
* Query strings and anchor fragments are allowed after the slug
## Usage
# Validate a single link
LinkValidator.validate_link("/blog/engineering/hello-world")
# => :ok
# Validate multiple links with line numbers
LinkValidator.validate_links(["/blog/engineering/a", "/blog/bad/b"])
# => {:error, [{2, "/blog/bad/b", "unknown blog ID: bad"}]}
# Validate all links in a post body
LinkValidator.validate_body(body, :engineering, post_id: "my-post")
# => :ok or {:error, [...]}
"""
@valid_blog_ids %{
"engineering" => :engineering,
"releases" => :release_notes
}
@slug_regex ~r/^[a-z0-9]+(-[a-z0-9]+)*$/
@doc """
Extracts internal blog links from a markdown body.
Returns a list of link paths (strings) that match the pattern
`/blog/{engineering|releases}/{slug}`. External links and non-blog
internal links are ignored.
Handles both markdown link syntax `[text](url)` and HTML `<a href="url">`.
## Examples
iex> extract_links("<p>[link](/blog/engineering/post)</p>")
["/blog/engineering/post"]
iex> extract_links("<p><a href=\"/blog/engineering/post\">link</a></p>")
["/blog/engineering/post"]
iex> extract_links("<p>See [GitHub](https://github.com)</p>")
[]
"""
@spec extract_links(String.t()) :: [String.t()]
def extract_links(body) when is_binary(body) do
markdown_links =
~r/\[([^\]]+)\]\(([^)]+)\)/
|> Regex.scan(body)
|> Enum.map(fn [_, _, path] -> path end)
html_links =
~r/<a\s+href=["']([^"']*)["']/i
|> Regex.scan(body)
|> Enum.map(fn [_, path] -> path end)
(markdown_links ++ html_links)
|> Enum.uniq()
|> Enum.filter(&internal_blog_link?/1)
end
defp internal_blog_link?(path) do
case parse_blog_link(path) do
{_blog_id_str, _slug, _query, _fragment} -> true
nil -> false
end
end
@doc """
Validates a single link path.
Returns `:ok` if the link has valid semantics, or `{:error, reason}` otherwise.
## Examples
iex> validate_link("/blog/engineering/hello-world")
:ok
iex> validate_link("/blog/unknown/post")
{:error, "unknown blog ID: unknown"}
iex> validate_link("/blog/engineering/My-Post")
{:error, "slug must be lowercase alphanumeric with hyphens: My-Post"}
"""
@spec validate_link(String.t()) :: :ok | {:error, String.t()}
def validate_link(link) when is_binary(link) do
case parse_blog_link(link) do
nil ->
{:error, "not a blog link: #{link}"}
{blog_id_str, slug_part, _query, _fragment} ->
case Map.fetch(@valid_blog_ids, blog_id_str) do
{:ok, _blog_atom} -> validate_slug(slug_part)
:error -> {:error, "unknown blog ID: #{blog_id_str}"}
end
end
end
@doc """
Validates a list of links, returning errors with line numbers.
Returns `:ok` if all links are valid, or `{:error, errors}` where each
error is `{line_number, link, reason}`.
## Examples
iex> validate_links(["/blog/engineering/a", "/blog/bad/b"])
{:error, [{2, "/blog/bad/b", "unknown blog ID: bad"}]}
"""
@spec validate_links([String.t()]) :: :ok | {:error, [{integer(), String.t(), String.t()}]}
def validate_links(links) when is_list(links) do
links
|> Enum.with_index(1)
|> Enum.map(fn {link, line} ->
case validate_link(link) do
:ok -> nil
{:error, reason} -> {line, link, reason}
end
end)
|> Enum.reject(&is_nil/1)
|> case do
[] -> :ok
errors -> {:error, errors}
end
end
@doc """
Validates all internal blog links in a post body.
Extracts links from the body, validates each one, and returns the result
with optional post context.
## Options
* `:post_id` the post slug/id, included in error tuples for debugging
## Examples
iex> validate_body("<p>[link](/blog/engineering/post)</p>", :engineering)
:ok
iex> validate_body("<p>[link](/blog/unknown/post)</p>", :engineering)
{:error, [{1, "/blog/unknown/post", "unknown blog ID: unknown", post_id: nil}]}
iex> validate_body("<p>[link](/blog/unknown/post)</p>", :engineering, post_id: "my-post")
{:error, [{1, "/blog/unknown/post", "unknown blog ID: unknown", post_id: "my-post"}]}
"""
@spec validate_body(String.t(), atom(), keyword()) ::
:ok | {:error, [{integer(), String.t(), String.t(), keyword()}]}
def validate_body(body, blog, opts \\ []) when is_binary(body) and is_atom(blog) do
post_id = Keyword.get(opts, :post_id)
links = extract_links(body)
case validate_links(links) do
:ok ->
:ok
{:error, errors} ->
enriched_errors =
Enum.map(errors, fn {line, link, reason} ->
{line, link, reason, post_id: post_id}
end)
{:error, enriched_errors}
end
end
# --- Private helpers ---
@doc false
@spec parse_blog_link(String.t()) ::
{String.t(), String.t(), String.t() | nil, String.t() | nil} | nil
def parse_blog_link(path) do
# Parse /blog/{id}/{slug} with optional query string and/or fragment
with ["", "blog", blog_id, rest] <- String.split(path, "/", parts: 4),
{slug, query_fragment} <- String.split_at(rest, slug_slug_end(rest)),
{query, fragment} <- parse_query_fragment(query_fragment) do
case Map.fetch(@valid_blog_ids, blog_id) do
{:ok, _blog_atom} -> {blog_id, slug, query, fragment}
:error -> {blog_id, slug, query, fragment}
end
else
_ -> nil
end
end
@doc false
@spec slug_slug_end(String.t()) :: integer()
defp slug_slug_end(str) do
case String.split(str, ["?", "#"], parts: 2) do
[slug | _] -> String.length(slug)
_ -> String.length(str)
end
end
@doc false
@spec parse_query_fragment(String.t()) :: {String.t() | nil, String.t() | nil}
defp parse_query_fragment("") do
{nil, nil}
end
defp parse_query_fragment("?" <> query) do
case String.split(query, "#", parts: 2) do
[q, f] -> {q, f}
[q] -> {q, nil}
end
end
defp parse_query_fragment("#" <> fragment) do
{nil, fragment}
end
defp parse_query_fragment(_), do: {nil, nil}
@doc false
@spec validate_slug(String.t()) :: :ok | {:error, String.t()}
defp validate_slug(slug) when slug == "" do
{:error, "empty slug"}
end
defp validate_slug(slug) do
if Regex.match?(@slug_regex, slug) do
:ok
else
{:error, "slug must be lowercase alphanumeric with hyphens: #{slug}"}
end
end
end

View File

@ -0,0 +1,7 @@
%{
title: "Valid Post",
author: "Test Author",
description: "A post with valid links"
}
---
This post has [valid links](/blog/engineering/hello-world) and [release notes](/blog/releases/v1-0-0).

View File

@ -0,0 +1,7 @@
%{
title: "Invalid Post",
author: "Test Author",
description: "A post with invalid links"
}
---
This post has [invalid links](/blog/unknown/broken) and [bad slug](/blog/engineering/My-Post).

View File

@ -0,0 +1,96 @@
defmodule Blogex.BlogIntegrationTest do
use ExUnit.Case
describe "compile-time link validation" do
test "raises LinkError for invalid blog ID in link" do
tmp_file = Path.join(System.tmp_dir!(), "test_blog_invalid.ex")
File.write!(tmp_file, """
defmodule TestBlogInvalidBlogId do
use Blogex.Blog,
blog_id: :test,
app: :blogex,
from: "priv/blog/test/2026/01-02-invalid-post.md",
title: "Test Blog",
base_path: "/blog/test"
end
""")
assert_raise Blogex.LinkError, fn ->
Code.compile_file(tmp_file, __ENV__.file)
end
File.rm!(tmp_file)
end
test "raises LinkError for invalid slug in link" do
tmp_file = Path.join(System.tmp_dir!(), "test_blog_invalid_slug.ex")
File.write!(tmp_file, """
defmodule TestBlogInvalidSlug do
use Blogex.Blog,
blog_id: :test,
app: :blogex,
from: "priv/blog/test/2026/01-02-invalid-post.md",
title: "Test Blog",
base_path: "/blog/test"
end
""")
assert_raise Blogex.LinkError, fn ->
Code.compile_file(tmp_file, __ENV__.file)
end
File.rm!(tmp_file)
end
test "compiles successfully with valid links" do
tmp_file = Path.join(System.tmp_dir!(), "test_blog_valid.ex")
File.write!(tmp_file, """
defmodule TestBlogValid do
use Blogex.Blog,
blog_id: :test,
app: :blogex,
from: "priv/blog/test/2026/01-01-valid-post.md",
title: "Test Blog",
base_path: "/blog/test"
end
""")
[{TestBlogValid, _bytecode}] = Code.compile_file(tmp_file, __ENV__.file)
assert TestBlogValid.title() == "Test Blog"
File.rm!(tmp_file)
end
test "LinkError message includes post context" do
tmp_file = Path.join(System.tmp_dir!(), "test_blog_msg.ex")
File.write!(tmp_file, """
defmodule TestBlogErrorMsg do
use Blogex.Blog,
blog_id: :test,
app: :blogex,
from: "priv/blog/test/2026/01-02-invalid-post.md",
title: "Test Blog",
base_path: "/blog/test"
end
""")
exception =
assert_raise Blogex.LinkError, fn ->
Code.compile_file(tmp_file, __ENV__.file)
end
msg = Exception.message(exception)
assert msg =~ "invalid internal blog links"
assert msg =~ "invalid-post"
assert msg =~ "/blog/unknown/broken"
assert msg =~ "/blog/engineering/My-Post"
File.rm!(tmp_file)
end
end
end

View File

@ -0,0 +1,236 @@
defmodule Blogex.LinkValidatorTest do
use ExUnit.Case
alias Blogex.LinkValidator
describe "extract_links/1" do
test "extracts internal blog links from markdown body" do
body =
"<p>Check out [hello world](/blog/engineering/hello-world) and [release v1](/blog/releases/v1-0-0).</p>"
assert LinkValidator.extract_links(body) == [
"/blog/engineering/hello-world",
"/blog/releases/v1-0-0"
]
end
test "ignores external links" do
body = "<p>See [GitHub](https://github.com) and [internal](/blog/engineering/post).</p>"
assert LinkValidator.extract_links(body) == ["/blog/engineering/post"]
end
test "ignores non-blog internal links" do
body = "<p>See [/about](/about) and [/blog/engineering/post](/blog/engineering/post).</p>"
assert LinkValidator.extract_links(body) == ["/blog/engineering/post"]
end
test "returns empty list when no internal blog links" do
body = "<p>Just external links: [GitHub](https://github.com).</p>"
assert LinkValidator.extract_links(body) == []
end
test "handles multiple links on one line" do
body = "<p>[a](/blog/engineering/a) [b](/blog/releases/b) [c](/blog/engineering/c)</p>"
assert LinkValidator.extract_links(body) == [
"/blog/engineering/a",
"/blog/releases/b",
"/blog/engineering/c"
]
end
test "handles links with query strings" do
body = "<p>[link](/blog/engineering/post?foo=bar)</p>"
assert LinkValidator.extract_links(body) == ["/blog/engineering/post?foo=bar"]
end
test "handles links with anchor fragments" do
body = "<p>[link](/blog/engineering/post#section)</p>"
assert LinkValidator.extract_links(body) == ["/blog/engineering/post#section"]
end
test "handles empty body" do
assert LinkValidator.extract_links("") == []
end
end
describe "validate_link/1" do
test "validates correct engineering link" do
assert LinkValidator.validate_link("/blog/engineering/my-post") == :ok
end
test "validates correct releases link" do
assert LinkValidator.validate_link("/blog/releases/v1-0-0") == :ok
end
test "rejects unknown blog ID" do
assert LinkValidator.validate_link("/blog/unknown/post") ==
{:error, "unknown blog ID: unknown"}
end
test "rejects uppercase blog ID" do
assert LinkValidator.validate_link("/blog/Engineering/post") ==
{:error, "unknown blog ID: Engineering"}
end
test "rejects empty slug" do
assert LinkValidator.validate_link("/blog/engineering/") ==
{:error, "empty slug"}
end
test "rejects slug with uppercase letters" do
assert LinkValidator.validate_link("/blog/engineering/My-Post") ==
{:error, "slug must be lowercase alphanumeric with hyphens: My-Post"}
end
test "rejects slug with special characters" do
assert LinkValidator.validate_link("/blog/engineering/hello@world") ==
{:error, "slug must be lowercase alphanumeric with hyphens: hello@world"}
end
test "rejects slug with spaces" do
assert LinkValidator.validate_link("/blog/engineering/hello world") ==
{:error, "slug must be lowercase alphanumeric with hyphens: hello world"}
end
test "allows single-word slug" do
assert LinkValidator.validate_link("/blog/engineering/hello") == :ok
end
test "allows hyphenated slug" do
assert LinkValidator.validate_link("/blog/engineering/my-cool-post") == :ok
end
test "allows slug with numbers" do
assert LinkValidator.validate_link("/blog/releases/v1-2-3") == :ok
end
test "rejects slug starting with hyphen" do
assert LinkValidator.validate_link("/blog/engineering/-post") ==
{:error, "slug must be lowercase alphanumeric with hyphens: -post"}
end
test "rejects slug ending with hyphen" do
assert LinkValidator.validate_link("/blog/engineering/post-") ==
{:error, "slug must be lowercase alphanumeric with hyphens: post-"}
end
test "rejects consecutive hyphens" do
assert LinkValidator.validate_link("/blog/engineering/post--name") ==
{:error, "slug must be lowercase alphanumeric with hyphens: post--name"}
end
test "returns :ok for link with query string and valid slug" do
assert LinkValidator.validate_link("/blog/engineering/post?foo=bar") == :ok
end
test "returns :ok for link with anchor fragment and valid slug" do
assert LinkValidator.validate_link("/blog/engineering/post#section") == :ok
end
test "rejects non-blog path" do
assert LinkValidator.validate_link("/about") ==
{:error, "not a blog link: /about"}
end
test "rejects malformed link" do
assert LinkValidator.validate_link("not-a-url") ==
{:error, "not a blog link: not-a-url"}
end
end
describe "validate_links/1" do
test "returns :ok when all links are valid" do
links = [
"/blog/engineering/hello-world",
"/blog/releases/v1-0-0"
]
assert LinkValidator.validate_links(links) == :ok
end
test "returns errors for invalid links" do
links = [
"/blog/engineering/hello-world",
"/blog/unknown/post",
"/blog/releases/My-Post"
]
assert LinkValidator.validate_links(links) == {
:error,
[
{2, "/blog/unknown/post", "unknown blog ID: unknown"},
{3, "/blog/releases/My-Post",
"slug must be lowercase alphanumeric with hyphens: My-Post"}
]
}
end
test "returns :ok for empty list" do
assert LinkValidator.validate_links([]) == :ok
end
test "reports line numbers correctly" do
links = [
"/blog/engineering/ok",
"/blog/bad/slug",
"/blog/releases/ok"
]
assert LinkValidator.validate_links(links) == {
:error,
[{2, "/blog/bad/slug", "unknown blog ID: bad"}]
}
end
end
describe "validate_body/2" do
test "returns :ok when body has no internal blog links" do
body = "<p>Just text, no links.</p>"
assert LinkValidator.validate_body(body, :engineering) == :ok
end
test "returns :ok when all links are valid" do
body = "<p>[link](/blog/engineering/post)</p>"
assert LinkValidator.validate_body(body, :engineering) == :ok
end
test "returns errors with post context" do
body = "<p>[link](/blog/unknown/post)</p>"
assert LinkValidator.validate_body(body, :engineering) == {
:error,
[
{
1,
"/blog/unknown/post",
"unknown blog ID: unknown",
post_id: nil
}
]
}
end
test "includes post_id in error tuples when provided" do
body = "<p>[link](/blog/unknown/post)</p>"
assert LinkValidator.validate_body(body, :engineering, post_id: "test-post") == {
:error,
[
{
1,
"/blog/unknown/post",
"unknown blog ID: unknown",
post_id: "test-post"
}
]
}
end
end
end