docster/app/Main.hs
Willem van den Ende 7f1f1aeeb0 Fix PDF diagram text rendering with format-optimized generation
Problem: SVG diagrams had missing text in PDFs due to Inkscape conversion
issues with foreignObject elements used by Mermaid for HTML text rendering.

Solution: Implement format-specific optimal generation:
- HTML: SVG files for perfect vector scaling and text rendering
- PDF: High-resolution PNG files (3x scale) for sharp images with readable text

Changes:
- Generate SVG for HTML output using standard mmdc command
- Generate PNG for PDF output using mmdc with --scale 3 for high resolution
- Remove SVG LaTeX package dependency and Inkscape requirement
- Update documentation to reflect the dual-format approach

Results:
- HTML: Crisp, scalable vector diagrams with perfect text
- PDF: Sharp, high-resolution raster diagrams with clear text
- No external dependencies beyond mermaid-cli

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-07-29 19:20:13 +02:00

224 lines
9.6 KiB
Haskell

{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Main (main) where
import Text.Pandoc
import Text.Pandoc.Class (runIOorExplode)
import Text.Pandoc.PDF (makePDF)
import Text.Pandoc.Walk (walkM)
import Text.Pandoc.Extensions (getDefaultExtensions)
import System.Environment (getArgs)
import System.FilePath (replaceExtension, takeDirectory, takeFileName, takeExtension, (</>))
import System.Process (callProcess)
import System.Directory (removeFile)
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.IO as TIO
import Data.Hashable (hash)
import Control.Monad (void)
import Control.Exception (Exception, throwIO, bracket, catch, SomeException)
import qualified Data.ByteString.Lazy as BL
-- | Custom error types for better error handling
data DocsterError
= InvalidUsage Text
| FileError Text
| PDFGenerationError Text
| ProcessError Text
deriving (Show)
instance Exception DocsterError
-- | Type-safe wrappers for better domain modeling
newtype SourceDir = SourceDir FilePath deriving (Show, Eq)
newtype OutputPath = OutputPath FilePath deriving (Show, Eq)
newtype DiagramId = DiagramId Text deriving (Show, Eq)
-- | Constants for the application
mermaidCommand :: String
mermaidCommand = "mmdc"
diagramPrefix :: String
diagramPrefix = "diagram-"
successEmoji :: String
successEmoji = ""
-- | Generate a diagram ID from content hash or explicit ID
generateDiagramId :: Text -> Text -> DiagramId
generateDiagramId explicitId contents
| T.null explicitId = DiagramId $ T.pack $ diagramPrefix <> take 6 (show (abs (hash (T.unpack contents))))
| otherwise = DiagramId explicitId
-- | Transform Mermaid code blocks into image embeds with resource cleanup
processMermaidBlock :: SourceDir -> OutputPath -> Block -> IO Block
processMermaidBlock (SourceDir sourceDir) (OutputPath outputPath) (CodeBlock (id', classes, _) contents)
| "mermaid" `elem` classes = do
let DiagramId diagId = generateDiagramId id' contents
diagIdStr = T.unpack diagId
mmdFile = sourceDir </> diagIdStr <> ".mmd"
-- Use SVG for HTML (scalable), high-res PNG for PDF (text compatibility)
(outputFile, imagePath) = if isHTMLOutput outputPath
then let svgFile = sourceDir </> diagIdStr <> ".svg"
in (svgFile, takeFileName svgFile)
else let pngFile = sourceDir </> diagIdStr <> ".png"
in (pngFile, pngFile)
-- Use bracket to ensure cleanup of temporary mermaid file
bracket
(TIO.writeFile mmdFile contents >> return mmdFile)
(\file -> removeFile file `catch` \(_ :: SomeException) -> return ())
(\_ -> do
-- Generate with appropriate format and quality for output type
if isHTMLOutput outputPath
then void $ callProcess mermaidCommand ["-i", mmdFile, "-o", outputFile]
else void $ callProcess mermaidCommand ["-i", mmdFile, "-o", outputFile, "--scale", "3"]
putStrLn $ successEmoji <> " Generated " <> outputFile
return $ Para [Image nullAttr [] (T.pack imagePath, "Mermaid diagram")])
processMermaidBlock _ _ block = return block
-- | Check if output is HTML format based on file extension
isHTMLOutput :: FilePath -> Bool
isHTMLOutput path = takeExtension path == ".html"
-- | Walk the Pandoc AST and process blocks using walkM
transformDocument :: SourceDir -> OutputPath -> Pandoc -> IO Pandoc
transformDocument sourceDir outputPath = walkM (processMermaidBlock sourceDir outputPath)
-- | LaTeX template with comprehensive package support
latexTemplate :: Text -> Text
latexTemplate bodyContent = T.unlines
[ "\\documentclass{article}"
, "\\usepackage[utf8]{inputenc}"
, "\\usepackage{fontspec}"
, "\\usepackage{graphicx}"
, "\\usepackage{geometry}"
, "\\geometry{margin=1in}"
, "\\usepackage{hyperref}"
, "\\usepackage{enumitem}"
, "\\usepackage{amsmath}"
, "\\usepackage{amssymb}"
, "\\usepackage{fancyvrb}"
, "\\usepackage{color}"
, "\\DefineVerbatimEnvironment{Highlighting}{Verbatim}{commandchars=\\\\\\{\\}}"
, "\\newenvironment{Shaded}{}{}"
, syntaxHighlightingCommands
, "\\providecommand{\\tightlist}{%"
, " \\setlength{\\itemsep}{0pt}\\setlength{\\parskip}{0pt}}"
, "\\begin{document}"
, bodyContent
, "\\end{document}"
]
-- | Syntax highlighting commands for LaTeX
syntaxHighlightingCommands :: Text
syntaxHighlightingCommands = T.unlines
[ "\\newcommand{\\AlertTok}[1]{\\textcolor[rgb]{1.00,0.00,0.00}{\\textbf{#1}}}"
, "\\newcommand{\\AnnotationTok}[1]{\\textcolor[rgb]{0.38,0.63,0.69}{\\textbf{\\textit{#1}}}}"
, "\\newcommand{\\AttributeTok}[1]{\\textcolor[rgb]{0.49,0.56,0.16}{#1}}"
, "\\newcommand{\\BaseNTok}[1]{\\textcolor[rgb]{0.25,0.63,0.44}{#1}}"
, "\\newcommand{\\BuiltInTok}[1]{#1}"
, "\\newcommand{\\CharTok}[1]{\\textcolor[rgb]{0.25,0.44,0.63}{#1}}"
, "\\newcommand{\\CommentTok}[1]{\\textcolor[rgb]{0.38,0.63,0.69}{\\textit{#1}}}"
, "\\newcommand{\\CommentVarTok}[1]{\\textcolor[rgb]{0.38,0.63,0.69}{\\textbf{\\textit{#1}}}}"
, "\\newcommand{\\ConstantTok}[1]{\\textcolor[rgb]{0.53,0.00,0.00}{#1}}"
, "\\newcommand{\\ControlFlowTok}[1]{\\textcolor[rgb]{0.00,0.44,0.13}{\\textbf{#1}}}"
, "\\newcommand{\\DataTypeTok}[1]{\\textcolor[rgb]{0.56,0.13,0.00}{#1}}"
, "\\newcommand{\\DecValTok}[1]{\\textcolor[rgb]{0.25,0.63,0.44}{#1}}"
, "\\newcommand{\\DocumentationTok}[1]{\\textcolor[rgb]{0.73,0.13,0.13}{\\textit{#1}}}"
, "\\newcommand{\\ErrorTok}[1]{\\textcolor[rgb]{1.00,0.00,0.00}{\\textbf{#1}}}"
, "\\newcommand{\\ExtensionTok}[1]{#1}"
, "\\newcommand{\\FloatTok}[1]{\\textcolor[rgb]{0.25,0.63,0.44}{#1}}"
, "\\newcommand{\\FunctionTok}[1]{\\textcolor[rgb]{0.02,0.16,0.49}{#1}}"
, "\\newcommand{\\ImportTok}[1]{#1}"
, "\\newcommand{\\InformationTok}[1]{\\textcolor[rgb]{0.38,0.63,0.69}{\\textbf{\\textit{#1}}}}"
, "\\newcommand{\\KeywordTok}[1]{\\textcolor[rgb]{0.00,0.44,0.13}{\\textbf{#1}}}"
, "\\newcommand{\\NormalTok}[1]{#1}"
, "\\newcommand{\\OperatorTok}[1]{\\textcolor[rgb]{0.40,0.40,0.40}{#1}}"
, "\\newcommand{\\OtherTok}[1]{\\textcolor[rgb]{0.00,0.44,0.13}{#1}}"
, "\\newcommand{\\PreprocessorTok}[1]{\\textcolor[rgb]{0.74,0.48,0.00}{#1}}"
, "\\newcommand{\\RegionMarkerTok}[1]{#1}"
, "\\newcommand{\\SpecialCharTok}[1]{\\textcolor[rgb]{0.25,0.44,0.63}{#1}}"
, "\\newcommand{\\SpecialStringTok}[1]{\\textcolor[rgb]{0.73,0.40,0.53}{#1}}"
, "\\newcommand{\\StringTok}[1]{\\textcolor[rgb]{0.25,0.44,0.63}{#1}}"
, "\\newcommand{\\VariableTok}[1]{\\textcolor[rgb]{0.10,0.09,0.49}{#1}}"
, "\\newcommand{\\VerbatimStringTok}[1]{\\textcolor[rgb]{0.25,0.44,0.63}{#1}}"
, "\\newcommand{\\WarningTok}[1]{\\textcolor[rgb]{0.38,0.63,0.69}{\\textbf{\\textit{#1}}}}"
]
-- | Parse command line arguments
parseArgs :: [String] -> Either DocsterError (IO ())
parseArgs ["-pdf", path] = Right (compileToPDF path)
parseArgs ["-html", path] = Right (compileToHTML path)
parseArgs _ = Left $ InvalidUsage "Usage: docster -pdf|-html <file.md>"
-- | Compile markdown to PDF using XeLaTeX
compileToPDF :: FilePath -> IO ()
compileToPDF path = do
let sourceDir = SourceDir $ takeDirectory path
outputPath = OutputPath $ replaceExtension path "pdf"
result <- compileToPDFSafe sourceDir (OutputPath path) outputPath
case result of
Left err -> throwIO err
Right _ -> return ()
-- | Safe PDF compilation with proper error handling
compileToPDFSafe :: SourceDir -> OutputPath -> OutputPath -> IO (Either DocsterError ())
compileToPDFSafe sourceDir (OutputPath inputPath) outputPath@(OutputPath outputPathStr) = do
content <- TIO.readFile inputPath
let readerOptions = def { readerExtensions = getDefaultExtensions "markdown" }
pandoc <- runIOorExplode $ readMarkdown readerOptions content
transformed <- transformDocument sourceDir outputPath pandoc
-- Generate LaTeX with proper template
latexOutput <- runIOorExplode $ writeLaTeX def transformed
let completeLatex = latexTemplate latexOutput
result <- runIOorExplode $ makePDF "xelatex" [] (\_ _ -> return completeLatex) def transformed
case result of
Left err -> return $ Left $ PDFGenerationError $ T.pack $ show err
Right bs -> do
BL.writeFile outputPathStr bs
putStrLn $ successEmoji <> " PDF written to " <> outputPathStr
return $ Right ()
-- | Compile markdown to HTML
compileToHTML :: FilePath -> IO ()
compileToHTML path = do
let sourceDir = SourceDir $ takeDirectory path
outputPath = OutputPath $ replaceExtension path "html"
result <- compileToHTMLSafe sourceDir (OutputPath path) outputPath
case result of
Left err -> throwIO err
Right _ -> return ()
-- | Safe HTML compilation with proper error handling
compileToHTMLSafe :: SourceDir -> OutputPath -> OutputPath -> IO (Either DocsterError ())
compileToHTMLSafe sourceDir (OutputPath inputPath) outputPath@(OutputPath outputPathStr) = do
content <- TIO.readFile inputPath
let readerOptions = def { readerExtensions = getDefaultExtensions "markdown" }
pandoc <- runIOorExplode $ readMarkdown readerOptions content
transformed <- transformDocument sourceDir outputPath pandoc
html <- runIOorExplode $ writeHtml5String def transformed
TIO.writeFile outputPathStr html
putStrLn $ successEmoji <> " HTML written to " <> outputPathStr
-- Open the generated HTML file in browser
putStrLn $ "🌐 Opening " <> outputPathStr <> " in browser for error checking..."
void $ callProcess "open" [outputPathStr]
return $ Right ()
-- | Main entry point
main :: IO ()
main = do
args <- getArgs
case parseArgs args of
Left err -> throwIO err
Right action -> action