diff --git a/blogex/lib/blogex/blog.ex b/blogex/lib/blogex/blog.ex index 01669b0..7926dbc 100644 --- a/blogex/lib/blogex/blog.ex +++ b/blogex/lib/blogex/blog.ex @@ -53,6 +53,9 @@ defmodule Blogex.Blog do |> Enum.map(&Map.put(&1, :blog, unquote(blog_id))) |> Enum.sort_by(& &1.date, {:desc, Date}) + # Validate internal link semantics at compile time via before_compile + @before_compile Blogex.Blog + # Collect all unique tags @tags @posts |> Enum.flat_map(& &1.tags) |> Enum.uniq() |> Enum.sort() @@ -135,4 +138,32 @@ defmodule Blogex.Blog do end end end + + @doc false + defmacro __before_compile__(env) do + blog_id = Module.get_attribute(env.module, :blog_id) + + quote do + unquote(__MODULE__)._validate_links(@posts, unquote(blog_id)) + end + end + + @doc false + @spec _validate_links([Blogex.Post.t()], atom()) :: :ok + def _validate_links(posts, blog_id) do + Enum.each(posts, fn post -> + case Blogex.LinkValidator.validate_body(post.body, blog_id, post_id: post.id) do + :ok -> + :ok + + {:error, errors} -> + raise Blogex.LinkError, + blog: blog_id, + post_id: post.id, + errors: errors + end + end) + + :ok + end end diff --git a/blogex/lib/blogex/link_error.ex b/blogex/lib/blogex/link_error.ex new file mode 100644 index 0000000..8ba9ecb --- /dev/null +++ b/blogex/lib/blogex/link_error.ex @@ -0,0 +1,53 @@ +defmodule Blogex.LinkError do + @moduledoc """ + Exception raised when a blog post contains invalid internal links. + + Raised at compile time by `Blogex.Blog` when `LinkValidator` finds + semantic errors in post body links. + + ## Fields + + * `:blog` — the blog identifier atom (e.g., `:engineering`) + * `:post_id` — the post slug/id that contains the invalid link + * `:errors` — list of `{line, link, reason, post_id}` tuples + + ## Example + + raise Blogex.LinkError, + blog: :engineering, + post_id: "my-post", + errors: [ + {1, "/blog/unknown/broken", "unknown blog ID: unknown", post_id: "my-post"} + ] + """ + + defexception blog: nil, post_id: nil, errors: [] + + @type t :: %__MODULE__{ + blog: atom(), + post_id: String.t() | nil, + errors: [{integer(), String.t(), String.t(), keyword()}] + } + + @impl true + def message(%__MODULE__{blog: blog, post_id: post_id, errors: errors}) do + post_label = + case post_id do + nil -> "" + id -> " (post: #{id})" + end + + errors_list = + errors + |> Enum.map(fn {line, link, reason, _meta} -> + " line #{line}: #{link} — #{reason}" + end) + |> Enum.join("\n") + + """ + invalid internal blog links in #{blog}#{post_label} + + #{errors_list} + """ + end +end diff --git a/blogex/lib/blogex/link_validator.ex b/blogex/lib/blogex/link_validator.ex new file mode 100644 index 0000000..bb84416 --- /dev/null +++ b/blogex/lib/blogex/link_validator.ex @@ -0,0 +1,249 @@ +defmodule Blogex.LinkValidator do + @moduledoc """ + Validates internal blog link semantics in markdown post bodies. + + Checks that internal links follow the pattern `/blog/{blog_id}/{slug}` + where `blog_id` is a valid blog identifier and `slug` matches the + expected format (lowercase alphanumeric with hyphens). + + This module is pure and does not depend on any stored posts — it validates + link format and semantics only. Post existence checking is handled separately. + + ## Valid blog IDs + + * `:engineering` — maps to `/blog/engineering/` + * `:release_notes` — maps to `/blog/releases/` + + ## Valid slug format + + * Lowercase alphanumeric characters and hyphens only + * Must not start or end with a hyphen + * Must not contain consecutive hyphens + * Query strings and anchor fragments are allowed after the slug + + ## Usage + + # Validate a single link + LinkValidator.validate_link("/blog/engineering/hello-world") + # => :ok + + # Validate multiple links with line numbers + LinkValidator.validate_links(["/blog/engineering/a", "/blog/bad/b"]) + # => {:error, [{2, "/blog/bad/b", "unknown blog ID: bad"}]} + + # Validate all links in a post body + LinkValidator.validate_body(body, :engineering, post_id: "my-post") + # => :ok or {:error, [...]} + """ + + @valid_blog_ids %{ + "engineering" => :engineering, + "releases" => :release_notes + } + + @slug_regex ~r/^[a-z0-9]+(-[a-z0-9]+)*$/ + + @doc """ + Extracts internal blog links from a markdown body. + + Returns a list of link paths (strings) that match the pattern + `/blog/{engineering|releases}/{slug}`. External links and non-blog + internal links are ignored. + + Handles both markdown link syntax `[text](url)` and HTML ``. + + ## Examples + + iex> extract_links("

[link](/blog/engineering/post)

") + ["/blog/engineering/post"] + + iex> extract_links("

link

") + ["/blog/engineering/post"] + + iex> extract_links("

See [GitHub](https://github.com)

") + [] + """ + @spec extract_links(String.t()) :: [String.t()] + def extract_links(body) when is_binary(body) do + markdown_links = + ~r/\[([^\]]+)\]\(([^)]+)\)/ + |> Regex.scan(body) + |> Enum.map(fn [_, _, path] -> path end) + + html_links = + ~r/ Regex.scan(body) + |> Enum.map(fn [_, path] -> path end) + + (markdown_links ++ html_links) + |> Enum.uniq() + |> Enum.filter(&internal_blog_link?/1) + end + + defp internal_blog_link?(path) do + case parse_blog_link(path) do + {_blog_id_str, _slug, _query, _fragment} -> true + nil -> false + end + end + + @doc """ + Validates a single link path. + + Returns `:ok` if the link has valid semantics, or `{:error, reason}` otherwise. + + ## Examples + + iex> validate_link("/blog/engineering/hello-world") + :ok + + iex> validate_link("/blog/unknown/post") + {:error, "unknown blog ID: unknown"} + + iex> validate_link("/blog/engineering/My-Post") + {:error, "slug must be lowercase alphanumeric with hyphens: My-Post"} + """ + @spec validate_link(String.t()) :: :ok | {:error, String.t()} + def validate_link(link) when is_binary(link) do + case parse_blog_link(link) do + nil -> + {:error, "not a blog link: #{link}"} + + {blog_id_str, slug_part, _query, _fragment} -> + case Map.fetch(@valid_blog_ids, blog_id_str) do + {:ok, _blog_atom} -> validate_slug(slug_part) + :error -> {:error, "unknown blog ID: #{blog_id_str}"} + end + end + end + + @doc """ + Validates a list of links, returning errors with line numbers. + + Returns `:ok` if all links are valid, or `{:error, errors}` where each + error is `{line_number, link, reason}`. + + ## Examples + + iex> validate_links(["/blog/engineering/a", "/blog/bad/b"]) + {:error, [{2, "/blog/bad/b", "unknown blog ID: bad"}]} + """ + @spec validate_links([String.t()]) :: :ok | {:error, [{integer(), String.t(), String.t()}]} + def validate_links(links) when is_list(links) do + links + |> Enum.with_index(1) + |> Enum.map(fn {link, line} -> + case validate_link(link) do + :ok -> nil + {:error, reason} -> {line, link, reason} + end + end) + |> Enum.reject(&is_nil/1) + |> case do + [] -> :ok + errors -> {:error, errors} + end + end + + @doc """ + Validates all internal blog links in a post body. + + Extracts links from the body, validates each one, and returns the result + with optional post context. + + ## Options + + * `:post_id` — the post slug/id, included in error tuples for debugging + + ## Examples + + iex> validate_body("

[link](/blog/engineering/post)

", :engineering) + :ok + + iex> validate_body("

[link](/blog/unknown/post)

", :engineering) + {:error, [{1, "/blog/unknown/post", "unknown blog ID: unknown", post_id: nil}]} + + iex> validate_body("

[link](/blog/unknown/post)

", :engineering, post_id: "my-post") + {:error, [{1, "/blog/unknown/post", "unknown blog ID: unknown", post_id: "my-post"}]} + """ + @spec validate_body(String.t(), atom(), keyword()) :: + :ok | {:error, [{integer(), String.t(), String.t(), keyword()}]} + def validate_body(body, blog, opts \\ []) when is_binary(body) and is_atom(blog) do + post_id = Keyword.get(opts, :post_id) + links = extract_links(body) + + case validate_links(links) do + :ok -> + :ok + + {:error, errors} -> + enriched_errors = + Enum.map(errors, fn {line, link, reason} -> + {line, link, reason, post_id: post_id} + end) + + {:error, enriched_errors} + end + end + + # --- Private helpers --- + + @doc false + @spec parse_blog_link(String.t()) :: + {String.t(), String.t(), String.t() | nil, String.t() | nil} | nil + def parse_blog_link(path) do + # Parse /blog/{id}/{slug} with optional query string and/or fragment + with ["", "blog", blog_id, rest] <- String.split(path, "/", parts: 4), + {slug, query_fragment} <- String.split_at(rest, slug_slug_end(rest)), + {query, fragment} <- parse_query_fragment(query_fragment) do + case Map.fetch(@valid_blog_ids, blog_id) do + {:ok, _blog_atom} -> {blog_id, slug, query, fragment} + :error -> {blog_id, slug, query, fragment} + end + else + _ -> nil + end + end + + @doc false + @spec slug_slug_end(String.t()) :: integer() + defp slug_slug_end(str) do + case String.split(str, ["?", "#"], parts: 2) do + [slug | _] -> String.length(slug) + _ -> String.length(str) + end + end + + @doc false + @spec parse_query_fragment(String.t()) :: {String.t() | nil, String.t() | nil} + defp parse_query_fragment("") do + {nil, nil} + end + + defp parse_query_fragment("?" <> query) do + case String.split(query, "#", parts: 2) do + [q, f] -> {q, f} + [q] -> {q, nil} + end + end + + defp parse_query_fragment("#" <> fragment) do + {nil, fragment} + end + + defp parse_query_fragment(_), do: {nil, nil} + + @doc false + @spec validate_slug(String.t()) :: :ok | {:error, String.t()} + defp validate_slug(slug) when slug == "" do + {:error, "empty slug"} + end + + defp validate_slug(slug) do + if Regex.match?(@slug_regex, slug) do + :ok + else + {:error, "slug must be lowercase alphanumeric with hyphens: #{slug}"} + end + end +end diff --git a/blogex/priv/blog/test/2026/01-01-valid-post.md b/blogex/priv/blog/test/2026/01-01-valid-post.md new file mode 100644 index 0000000..c892966 --- /dev/null +++ b/blogex/priv/blog/test/2026/01-01-valid-post.md @@ -0,0 +1,7 @@ +%{ + title: "Valid Post", + author: "Test Author", + description: "A post with valid links" +} +--- +This post has [valid links](/blog/engineering/hello-world) and [release notes](/blog/releases/v1-0-0). diff --git a/blogex/priv/blog/test/2026/01-02-invalid-post.md b/blogex/priv/blog/test/2026/01-02-invalid-post.md new file mode 100644 index 0000000..ff3b2d8 --- /dev/null +++ b/blogex/priv/blog/test/2026/01-02-invalid-post.md @@ -0,0 +1,7 @@ +%{ + title: "Invalid Post", + author: "Test Author", + description: "A post with invalid links" +} +--- +This post has [invalid links](/blog/unknown/broken) and [bad slug](/blog/engineering/My-Post). diff --git a/blogex/test/blogex/blog_integration_test.exs b/blogex/test/blogex/blog_integration_test.exs new file mode 100644 index 0000000..5e6ee61 --- /dev/null +++ b/blogex/test/blogex/blog_integration_test.exs @@ -0,0 +1,96 @@ +defmodule Blogex.BlogIntegrationTest do + use ExUnit.Case + + describe "compile-time link validation" do + test "raises LinkError for invalid blog ID in link" do + tmp_file = Path.join(System.tmp_dir!(), "test_blog_invalid.ex") + + File.write!(tmp_file, """ + defmodule TestBlogInvalidBlogId do + use Blogex.Blog, + blog_id: :test, + app: :blogex, + from: "priv/blog/test/2026/01-02-invalid-post.md", + title: "Test Blog", + base_path: "/blog/test" + end + """) + + assert_raise Blogex.LinkError, fn -> + Code.compile_file(tmp_file, __ENV__.file) + end + + File.rm!(tmp_file) + end + + test "raises LinkError for invalid slug in link" do + tmp_file = Path.join(System.tmp_dir!(), "test_blog_invalid_slug.ex") + + File.write!(tmp_file, """ + defmodule TestBlogInvalidSlug do + use Blogex.Blog, + blog_id: :test, + app: :blogex, + from: "priv/blog/test/2026/01-02-invalid-post.md", + title: "Test Blog", + base_path: "/blog/test" + end + """) + + assert_raise Blogex.LinkError, fn -> + Code.compile_file(tmp_file, __ENV__.file) + end + + File.rm!(tmp_file) + end + + test "compiles successfully with valid links" do + tmp_file = Path.join(System.tmp_dir!(), "test_blog_valid.ex") + + File.write!(tmp_file, """ + defmodule TestBlogValid do + use Blogex.Blog, + blog_id: :test, + app: :blogex, + from: "priv/blog/test/2026/01-01-valid-post.md", + title: "Test Blog", + base_path: "/blog/test" + end + """) + + [{TestBlogValid, _bytecode}] = Code.compile_file(tmp_file, __ENV__.file) + assert TestBlogValid.title() == "Test Blog" + + File.rm!(tmp_file) + end + + test "LinkError message includes post context" do + tmp_file = Path.join(System.tmp_dir!(), "test_blog_msg.ex") + + File.write!(tmp_file, """ + defmodule TestBlogErrorMsg do + use Blogex.Blog, + blog_id: :test, + app: :blogex, + from: "priv/blog/test/2026/01-02-invalid-post.md", + title: "Test Blog", + base_path: "/blog/test" + end + """) + + exception = + assert_raise Blogex.LinkError, fn -> + Code.compile_file(tmp_file, __ENV__.file) + end + + msg = Exception.message(exception) + + assert msg =~ "invalid internal blog links" + assert msg =~ "invalid-post" + assert msg =~ "/blog/unknown/broken" + assert msg =~ "/blog/engineering/My-Post" + + File.rm!(tmp_file) + end + end +end diff --git a/blogex/test/blogex/link_validator_test.exs b/blogex/test/blogex/link_validator_test.exs new file mode 100644 index 0000000..5b72413 --- /dev/null +++ b/blogex/test/blogex/link_validator_test.exs @@ -0,0 +1,236 @@ +defmodule Blogex.LinkValidatorTest do + use ExUnit.Case + alias Blogex.LinkValidator + + describe "extract_links/1" do + test "extracts internal blog links from markdown body" do + body = + "

Check out [hello world](/blog/engineering/hello-world) and [release v1](/blog/releases/v1-0-0).

" + + assert LinkValidator.extract_links(body) == [ + "/blog/engineering/hello-world", + "/blog/releases/v1-0-0" + ] + end + + test "ignores external links" do + body = "

See [GitHub](https://github.com) and [internal](/blog/engineering/post).

" + + assert LinkValidator.extract_links(body) == ["/blog/engineering/post"] + end + + test "ignores non-blog internal links" do + body = "

See [/about](/about) and [/blog/engineering/post](/blog/engineering/post).

" + + assert LinkValidator.extract_links(body) == ["/blog/engineering/post"] + end + + test "returns empty list when no internal blog links" do + body = "

Just external links: [GitHub](https://github.com).

" + + assert LinkValidator.extract_links(body) == [] + end + + test "handles multiple links on one line" do + body = "

[a](/blog/engineering/a) [b](/blog/releases/b) [c](/blog/engineering/c)

" + + assert LinkValidator.extract_links(body) == [ + "/blog/engineering/a", + "/blog/releases/b", + "/blog/engineering/c" + ] + end + + test "handles links with query strings" do + body = "

[link](/blog/engineering/post?foo=bar)

" + + assert LinkValidator.extract_links(body) == ["/blog/engineering/post?foo=bar"] + end + + test "handles links with anchor fragments" do + body = "

[link](/blog/engineering/post#section)

" + + assert LinkValidator.extract_links(body) == ["/blog/engineering/post#section"] + end + + test "handles empty body" do + assert LinkValidator.extract_links("") == [] + end + end + + describe "validate_link/1" do + test "validates correct engineering link" do + assert LinkValidator.validate_link("/blog/engineering/my-post") == :ok + end + + test "validates correct releases link" do + assert LinkValidator.validate_link("/blog/releases/v1-0-0") == :ok + end + + test "rejects unknown blog ID" do + assert LinkValidator.validate_link("/blog/unknown/post") == + {:error, "unknown blog ID: unknown"} + end + + test "rejects uppercase blog ID" do + assert LinkValidator.validate_link("/blog/Engineering/post") == + {:error, "unknown blog ID: Engineering"} + end + + test "rejects empty slug" do + assert LinkValidator.validate_link("/blog/engineering/") == + {:error, "empty slug"} + end + + test "rejects slug with uppercase letters" do + assert LinkValidator.validate_link("/blog/engineering/My-Post") == + {:error, "slug must be lowercase alphanumeric with hyphens: My-Post"} + end + + test "rejects slug with special characters" do + assert LinkValidator.validate_link("/blog/engineering/hello@world") == + {:error, "slug must be lowercase alphanumeric with hyphens: hello@world"} + end + + test "rejects slug with spaces" do + assert LinkValidator.validate_link("/blog/engineering/hello world") == + {:error, "slug must be lowercase alphanumeric with hyphens: hello world"} + end + + test "allows single-word slug" do + assert LinkValidator.validate_link("/blog/engineering/hello") == :ok + end + + test "allows hyphenated slug" do + assert LinkValidator.validate_link("/blog/engineering/my-cool-post") == :ok + end + + test "allows slug with numbers" do + assert LinkValidator.validate_link("/blog/releases/v1-2-3") == :ok + end + + test "rejects slug starting with hyphen" do + assert LinkValidator.validate_link("/blog/engineering/-post") == + {:error, "slug must be lowercase alphanumeric with hyphens: -post"} + end + + test "rejects slug ending with hyphen" do + assert LinkValidator.validate_link("/blog/engineering/post-") == + {:error, "slug must be lowercase alphanumeric with hyphens: post-"} + end + + test "rejects consecutive hyphens" do + assert LinkValidator.validate_link("/blog/engineering/post--name") == + {:error, "slug must be lowercase alphanumeric with hyphens: post--name"} + end + + test "returns :ok for link with query string and valid slug" do + assert LinkValidator.validate_link("/blog/engineering/post?foo=bar") == :ok + end + + test "returns :ok for link with anchor fragment and valid slug" do + assert LinkValidator.validate_link("/blog/engineering/post#section") == :ok + end + + test "rejects non-blog path" do + assert LinkValidator.validate_link("/about") == + {:error, "not a blog link: /about"} + end + + test "rejects malformed link" do + assert LinkValidator.validate_link("not-a-url") == + {:error, "not a blog link: not-a-url"} + end + end + + describe "validate_links/1" do + test "returns :ok when all links are valid" do + links = [ + "/blog/engineering/hello-world", + "/blog/releases/v1-0-0" + ] + + assert LinkValidator.validate_links(links) == :ok + end + + test "returns errors for invalid links" do + links = [ + "/blog/engineering/hello-world", + "/blog/unknown/post", + "/blog/releases/My-Post" + ] + + assert LinkValidator.validate_links(links) == { + :error, + [ + {2, "/blog/unknown/post", "unknown blog ID: unknown"}, + {3, "/blog/releases/My-Post", + "slug must be lowercase alphanumeric with hyphens: My-Post"} + ] + } + end + + test "returns :ok for empty list" do + assert LinkValidator.validate_links([]) == :ok + end + + test "reports line numbers correctly" do + links = [ + "/blog/engineering/ok", + "/blog/bad/slug", + "/blog/releases/ok" + ] + + assert LinkValidator.validate_links(links) == { + :error, + [{2, "/blog/bad/slug", "unknown blog ID: bad"}] + } + end + end + + describe "validate_body/2" do + test "returns :ok when body has no internal blog links" do + body = "

Just text, no links.

" + + assert LinkValidator.validate_body(body, :engineering) == :ok + end + + test "returns :ok when all links are valid" do + body = "

[link](/blog/engineering/post)

" + + assert LinkValidator.validate_body(body, :engineering) == :ok + end + + test "returns errors with post context" do + body = "

[link](/blog/unknown/post)

" + + assert LinkValidator.validate_body(body, :engineering) == { + :error, + [ + { + 1, + "/blog/unknown/post", + "unknown blog ID: unknown", + post_id: nil + } + ] + } + end + + test "includes post_id in error tuples when provided" do + body = "

[link](/blog/unknown/post)

" + + assert LinkValidator.validate_body(body, :engineering, post_id: "test-post") == { + :error, + [ + { + 1, + "/blog/unknown/post", + "unknown blog ID: unknown", + post_id: "test-post" + } + ] + } + end + end +end