firehose/blogex/lib/blogex/link_validator.ex
Firehose Bot a83634da36 refactor(Blogex.LinkValidator): simplify dead code and naming
- Drop redundant :ok return in _validate_links/2 (blog.ex)
- Remove dead HTML link regex from extract_links/1 (body is raw markdown)
- Rename slug_slug_end/1 to slug_end/1
- Simplify parse_blog_link/1 to return {blog_id, slug}, removing
  parse_query_fragment/1 and dead case branches
2026-05-07 13:24:11 +01:00

214 lines
6.0 KiB
Elixir

defmodule Blogex.LinkValidator do
@moduledoc """
Validates internal blog link semantics in markdown post bodies.
Checks that internal links follow the pattern `/blog/{blog_id}/{slug}`
where `blog_id` is a valid blog identifier and `slug` matches the
expected format (lowercase alphanumeric with hyphens).
This module is pure and does not depend on any stored posts — it validates
link format and semantics only. Post existence checking is handled separately.
## Valid blog IDs
* `:engineering` — maps to `/blog/engineering/`
* `:release_notes` — maps to `/blog/releases/`
## Valid slug format
* Lowercase alphanumeric characters and hyphens only
* Must not start or end with a hyphen
* Must not contain consecutive hyphens
* Query strings and anchor fragments are allowed after the slug
## Usage
# Validate a single link
LinkValidator.validate_link("/blog/engineering/hello-world")
# => :ok
# Validate multiple links with line numbers
LinkValidator.validate_links(["/blog/engineering/a", "/blog/bad/b"])
# => {:error, [{2, "/blog/bad/b", "unknown blog ID: bad"}]}
# Validate all links in a post body
LinkValidator.validate_body(body, :engineering, post_id: "my-post")
# => :ok or {:error, [...]}
"""
@valid_blog_ids %{
"engineering" => :engineering,
"releases" => :release_notes
}
@slug_regex ~r/^[a-z0-9]+(-[a-z0-9]+)*$/
@doc """
Extracts internal blog links from a markdown body.
Returns a list of link paths (strings) that match the pattern
`/blog/{engineering|releases}/{slug}`. External links and non-blog
internal links are ignored.
Handles markdown link syntax `[text](url)`.
## Examples
iex> extract_links("[link](/blog/engineering/post)")
["/blog/engineering/post"]
iex> extract_links("See [GitHub](https://github.com)")
[]
"""
@spec extract_links(String.t()) :: [String.t()]
def extract_links(body) when is_binary(body) do
~r/\[([^\]]+)\]\(([^)]+)\)/
|> Regex.scan(body)
|> Enum.map(fn [_, _, path] -> path end)
|> Enum.filter(&internal_blog_link?/1)
end
defp internal_blog_link?(path) do
case parse_blog_link(path) do
{_, _} -> true
nil -> false
end
end
@doc """
Validates a single link path.
Returns `:ok` if the link has valid semantics, or `{:error, reason}` otherwise.
## Examples
iex> validate_link("/blog/engineering/hello-world")
:ok
iex> validate_link("/blog/unknown/post")
{:error, "unknown blog ID: unknown"}
iex> validate_link("/blog/engineering/My-Post")
{:error, "slug must be lowercase alphanumeric with hyphens: My-Post"}
"""
@spec validate_link(String.t()) :: :ok | {:error, String.t()}
def validate_link(link) when is_binary(link) do
case parse_blog_link(link) do
nil ->
{:error, "not a blog link: #{link}"}
{blog_id_str, slug_part} ->
case Map.fetch(@valid_blog_ids, blog_id_str) do
{:ok, _blog_atom} -> validate_slug(slug_part)
:error -> {:error, "unknown blog ID: #{blog_id_str}"}
end
end
end
@doc """
Validates a list of links, returning errors with line numbers.
Returns `:ok` if all links are valid, or `{:error, errors}` where each
error is `{line_number, link, reason}`.
## Examples
iex> validate_links(["/blog/engineering/a", "/blog/bad/b"])
{:error, [{2, "/blog/bad/b", "unknown blog ID: bad"}]}
"""
@spec validate_links([String.t()]) :: :ok | {:error, [{integer(), String.t(), String.t()}]}
def validate_links(links) when is_list(links) do
links
|> Enum.with_index(1)
|> Enum.map(fn {link, line} ->
case validate_link(link) do
:ok -> nil
{:error, reason} -> {line, link, reason}
end
end)
|> Enum.reject(&is_nil/1)
|> case do
[] -> :ok
errors -> {:error, errors}
end
end
@doc """
Validates all internal blog links in a post body.
Extracts links from the body, validates each one, and returns the result
with optional post context.
## Options
* `:post_id` — the post slug/id, included in error tuples for debugging
## Examples
iex> validate_body("<p>[link](/blog/engineering/post)</p>", :engineering)
:ok
iex> validate_body("<p>[link](/blog/unknown/post)</p>", :engineering)
{:error, [{1, "/blog/unknown/post", "unknown blog ID: unknown", post_id: nil}]}
iex> validate_body("<p>[link](/blog/unknown/post)</p>", :engineering, post_id: "my-post")
{:error, [{1, "/blog/unknown/post", "unknown blog ID: unknown", post_id: "my-post"}]}
"""
@spec validate_body(String.t(), atom(), keyword()) ::
:ok | {:error, [{integer(), String.t(), String.t(), keyword()}]}
def validate_body(body, blog, opts \\ []) when is_binary(body) and is_atom(blog) do
post_id = Keyword.get(opts, :post_id)
links = extract_links(body)
case validate_links(links) do
:ok ->
:ok
{:error, errors} ->
enriched_errors =
Enum.map(errors, fn {line, link, reason} ->
{line, link, reason, post_id: post_id}
end)
{:error, enriched_errors}
end
end
# --- Private helpers ---
@doc false
@spec parse_blog_link(String.t()) :: {String.t(), String.t()} | nil
def parse_blog_link(path) do
with ["", "blog", blog_id, rest] <- String.split(path, "/", parts: 4) do
slug = String.slice(rest, 0, slug_end(rest))
{blog_id, slug}
else
_ -> nil
end
end
@doc false
@spec slug_end(String.t()) :: integer()
defp slug_end(str) do
case String.split(str, ["?", "#"], parts: 2) do
[slug | _] -> String.length(slug)
_ -> String.length(str)
end
end
@doc false
@spec validate_slug(String.t()) :: :ok | {:error, String.t()}
defp validate_slug(slug) when slug == "" do
{:error, "empty slug"}
end
defp validate_slug(slug) do
if Regex.match?(@slug_regex, slug) do
:ok
else
{:error, "slug must be lowercase alphanumeric with hyphens: #{slug}"}
end
end
end