docster/src/Docster/Compiler.hs
Your Name 7de2bc811a Add output directory structure and heading-based image naming
- Output files now go to output/<document-name>/ relative to input
- Images named after nearest heading (e.g., file_flow.svg)
- Multiple images under same heading get suffixes: _1, _2, etc.
- Images before any heading use document name as prefix
- Add StateT-based AST traversal for heading tracking
- Add HSpec test suite with 21 tests

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-05 17:47:49 +00:00

307 lines
12 KiB
Haskell

{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE LambdaCase #-}
-- | Document compilation functionality for PDF and HTML output
module Docster.Compiler
( -- * Compilation Functions
compileToPDF
, compileToHTML
) where
import Docster.Types
( DocsterError(..), OutputFormat(..), SourceDir(..), OutputDir(..), OutputPath(..)
, DiagramConfig(..), computeOutputDir, ensureOutputDir
)
import Docster.Transform (transformDocument)
import Docster.LaTeX (latexTemplate)
import Text.Pandoc
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.IO as TIO
import System.FilePath (takeDirectory, takeBaseName, replaceExtension, (</>), (<.>))
import System.Process (callProcess, readProcessWithExitCode)
import System.IO.Temp (withSystemTempDirectory)
import System.Directory (copyFile, doesFileExist)
import System.Exit (ExitCode(..))
import Control.Exception (throwIO)
import Control.Monad (void)
import Control.Monad.Trans.Except (ExceptT, runExceptT, throwE)
import Control.Monad.Trans.Reader (ReaderT, runReaderT, asks)
import Control.Monad.Trans.Class (lift)
import Control.Monad.IO.Class (liftIO)
import Data.Maybe (mapMaybe)
-- | Success indicator for user feedback
successEmoji :: Text
successEmoji = ""
-- | Compilation context for pipeline operations
data CompilationContext = CompilationContext
{ ccStrategy :: CompilationStrategy
, ccSourceDir :: SourceDir
, ccOutputDir :: OutputDir
, ccInputPath :: FilePath
, ccOutputPath :: FilePath
, ccDocName :: Text
, ccReaderOptions :: ReaderOptions
, ccConfig :: DiagramConfig
}
-- | Monad stack for compilation pipeline
type CompilationM = ReaderT CompilationContext (ExceptT DocsterError IO)
-- | Strategy pattern: Record of format-specific operations
data CompilationStrategy = CompilationStrategy
{ -- | Format for diagram configuration
csOutputFormat :: OutputFormat
-- | Pandoc writer function
, csWriter :: WriterOptions -> Pandoc -> PandocIO Text
-- | Post-processing function for the generated content
, csProcessOutput :: String -> Text -> IO (Either DocsterError ())
-- | Success message formatter
, csSuccessMessage :: String -> Text
}
-- | PDF compilation strategy
pdfStrategy :: CompilationStrategy
pdfStrategy = CompilationStrategy
{ csOutputFormat = PDF
, csWriter = writeLaTeX
, csProcessOutput = processPDFOutput
, csSuccessMessage = \path -> successEmoji <> " PDF written to " <> T.pack path
}
-- | HTML compilation strategy
htmlStrategy :: CompilationStrategy
htmlStrategy = CompilationStrategy
{ csOutputFormat = HTML
, csWriter = writeHtml5String
, csProcessOutput = processHTMLOutput
, csSuccessMessage = \path -> successEmoji <> " HTML written to " <> T.pack path
}
-- | Parse LaTeX log content to extract meaningful error messages
parseLatexErrors :: Text -> Text
parseLatexErrors logContent =
let logLines = T.lines logContent
missingChars = extractMissingChars logLines
overfullBoxes = extractOverfullBoxes logLines
undefinedCommands = extractUndefinedCommands logLines
fatalErrors = extractFatalErrors logLines
errorCount = length missingChars + length overfullBoxes + length undefinedCommands + length fatalErrors
summary = if errorCount == 0
then "Unknown LaTeX error occurred."
else T.unlines $ filter (not . T.null) [
if not (null fatalErrors) then "Fatal errors:\n" <> T.unlines (map ("" <>) fatalErrors) else "",
if not (null undefinedCommands) then "Undefined commands:\n" <> T.unlines (map ("" <>) undefinedCommands) else "",
if not (null missingChars) then "Missing Unicode characters:\n" <> T.unlines (map ("" <>) missingChars) else "",
if not (null overfullBoxes) then T.pack (show (length overfullBoxes)) <> " overfull boxes (layout warnings)" else ""
]
in summary
-- | Extract missing character warnings from LaTeX log
extractMissingChars :: [Text] -> [Text]
extractMissingChars = mapMaybe extractChar
where
extractChar line
| "Missing character:" `T.isInfixOf` line =
case T.splitOn "(U+" line of
[_, rest] -> case T.splitOn ")" rest of
(unicode:_) -> Just $ "U+" <> unicode <> " " <> extractCharContext line
_ -> Nothing
_ -> Nothing
| otherwise = Nothing
extractCharContext line =
case T.splitOn " in font " line of
[_, rest] -> "in " <> T.takeWhile (/= ':') rest
_ -> ""
-- | Extract overfull box warnings
extractOverfullBoxes :: [Text] -> [Text]
extractOverfullBoxes = mapMaybe extractBox
where
extractBox line
| "Overfull \\hbox" `T.isInfixOf` line = Just $ T.takeWhile (/= '\n') line
| otherwise = Nothing
-- | Extract undefined command errors
extractUndefinedCommands :: [Text] -> [Text]
extractUndefinedCommands = mapMaybe extractUndef
where
extractUndef line
| "Undefined control sequence" `T.isInfixOf` line = Just line
| otherwise = Nothing
-- | Extract fatal LaTeX errors
extractFatalErrors :: [Text] -> [Text]
extractFatalErrors = mapMaybe extractFatal
where
extractFatal line
| "! " `T.isPrefixOf` line && not ("Missing character:" `T.isInfixOf` line) = Just $ T.drop 2 line
| otherwise = Nothing
-- | Process PDF output: LaTeX template application and direct XeLaTeX compilation
processPDFOutput :: String -> Text -> IO (Either DocsterError ())
processPDFOutput outputPath latexOutput = do
let completeLatex = latexTemplate latexOutput
logOutputPath = replaceExtension outputPath "log"
-- Use temporary directory for LaTeX compilation
withSystemTempDirectory "docster-latex" $ \tempDir -> do
let texFile = tempDir </> "document.tex"
pdfFile = tempDir </> "document.pdf"
logFile = tempDir </> "document.log"
-- Write LaTeX content to temporary file
TIO.writeFile texFile completeLatex
-- Run XeLaTeX compilation
(exitCode, _stdout, stderr) <- readProcessWithExitCode "xelatex"
[ "-output-directory=" <> tempDir
, "-interaction=nonstopmode" -- Don't stop on errors
, texFile
] ""
-- Always copy log file to output location for debugging
logExists <- doesFileExist logFile
logContent <- if logExists
then TIO.readFile logFile
else return (T.pack stderr)
TIO.writeFile logOutputPath logContent
case exitCode of
ExitSuccess -> do
-- Check if PDF was actually generated
pdfExists <- doesFileExist pdfFile
if pdfExists
then do
-- Copy the generated PDF to the final location
copyFile pdfFile outputPath
return $ Right ()
else do
return $ Left $ PDFGenerationError $
"PDF file not generated despite successful exit code.\n" <>
"Full LaTeX log written to: " <> T.pack logOutputPath
ExitFailure code -> do
-- LaTeX compilation failed - parse log for meaningful errors
let errorSummary = parseLatexErrors logContent
return $ Left $ PDFGenerationError $
"❌ LaTeX compilation failed (exit code " <> T.pack (show code) <> "):\n" <>
errorSummary <> "\n\n" <>
"Full LaTeX log written to: " <> T.pack logOutputPath
-- | Process HTML output: file writing and browser opening
processHTMLOutput :: String -> Text -> IO (Either DocsterError ())
processHTMLOutput outputPath html = do
TIO.writeFile outputPath html
-- Open the generated HTML file in browser for verification
putStrLn $ "🌐 Opening " <> outputPath <> " in browser for error checking..."
void $ callProcess "open" [outputPath]
return $ Right ()
-- | Helper function to lift IO (Either DocsterError a) into CompilationM
liftEitherM :: IO (Either DocsterError a) -> CompilationM a
liftEitherM action = do
result <- liftIO action
case result of
Left err -> lift $ throwE err
Right value -> return value
-- | Pipeline step: Read content from input file
readContent :: CompilationM Text
readContent = do
inputPath <- asks ccInputPath
liftIO $ TIO.readFile inputPath
-- | Pipeline step: Parse markdown content into Pandoc AST
parseDocument :: Text -> CompilationM Pandoc
parseDocument content = do
readerOptions <- asks ccReaderOptions
liftEitherM $ parseMarkdown readerOptions content
-- | Pipeline step: Transform document (process Mermaid diagrams)
transformDocumentM :: Pandoc -> CompilationM Pandoc
transformDocumentM pandoc = do
config <- asks ccConfig
docName <- asks ccDocName
liftEitherM $ transformDocument config docName pandoc
-- | Pipeline step: Generate output using format-specific writer
generateOutputM :: Pandoc -> CompilationM Text
generateOutputM pandoc = do
strategy <- asks ccStrategy
liftEitherM $ generateOutput strategy pandoc
-- | Pipeline step: Process output and write to file
processOutput :: Text -> CompilationM ()
processOutput output = do
strategy <- asks ccStrategy
outputPath <- asks ccOutputPath
liftEitherM $ csProcessOutput strategy outputPath output
-- | Pipeline step: Print success message
printSuccess :: CompilationM ()
printSuccess = do
strategy <- asks ccStrategy
outputPath <- asks ccOutputPath
liftIO $ putStrLn $ T.unpack $ csSuccessMessage strategy outputPath
-- | Higher-order compilation function that takes a strategy and executes the pipeline
compileWithStrategy :: CompilationStrategy -> SourceDir -> OutputDir -> Text -> OutputPath -> OutputPath -> IO (Either DocsterError ())
compileWithStrategy strategy sourceDir outputDir docName (OutputPath inputPath) (OutputPath outputPath) = do
let readerOptions = def { readerExtensions = getDefaultExtensions "markdown" }
config = DiagramConfig sourceDir outputDir (csOutputFormat strategy)
context = CompilationContext strategy sourceDir outputDir inputPath outputPath docName readerOptions config
pipeline = readContent >>= parseDocument >>= transformDocumentM >>= generateOutputM >>= processOutput >> printSuccess
runExceptT $ runReaderT pipeline context
-- | Parse markdown with error handling
parseMarkdown :: ReaderOptions -> Text -> IO (Either DocsterError Pandoc)
parseMarkdown readerOptions content = do
pandocResult <- runIO $ readMarkdown readerOptions content
return $ case pandocResult of
Left err -> Left $ FileError $ "Failed to parse markdown: " <> T.pack (show err)
Right pandoc -> Right pandoc
-- | Generate output using the strategy's writer with error handling
generateOutput :: CompilationStrategy -> Pandoc -> IO (Either DocsterError Text)
generateOutput strategy transformed = do
result <- runIO $ csWriter strategy def transformed
return $ case result of
Left err -> Left $ case csOutputFormat strategy of
PDF -> PDFGenerationError $ "LaTeX generation failed: " <> T.pack (show err)
HTML -> FileError $ "HTML generation failed: " <> T.pack (show err)
Right output -> Right output
-- | Compile markdown to PDF using XeLaTeX
compileToPDF :: FilePath -> IO ()
compileToPDF = compileWithFormat pdfStrategy "pdf"
-- | Compile markdown to HTML
compileToHTML :: FilePath -> IO ()
compileToHTML = compileWithFormat htmlStrategy "html"
-- | Higher-order function to compile with any format strategy
compileWithFormat :: CompilationStrategy -> String -> FilePath -> IO ()
compileWithFormat strategy extension path = do
let sourceDir = SourceDir $ takeDirectory path
outputDir = computeOutputDir path
OutputDir outDirPath = outputDir
baseName = takeBaseName path
docName = T.pack baseName
outputPath = OutputPath $ outDirPath </> baseName <.> extension
-- Ensure output directory exists before compilation
ensureOutputDir outputDir
result <- compileWithStrategy strategy sourceDir outputDir docName (OutputPath path) outputPath
case result of
Left err -> throwIO err
Right _ -> return ()