- Output files now go to output/<document-name>/ relative to input - Images named after nearest heading (e.g., file_flow.svg) - Multiple images under same heading get suffixes: _1, _2, etc. - Images before any heading use document name as prefix - Add StateT-based AST traversal for heading tracking - Add HSpec test suite with 21 tests 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
307 lines
12 KiB
Haskell
307 lines
12 KiB
Haskell
{-# LANGUAGE OverloadedStrings #-}
|
|
{-# LANGUAGE LambdaCase #-}
|
|
|
|
-- | Document compilation functionality for PDF and HTML output
|
|
module Docster.Compiler
|
|
( -- * Compilation Functions
|
|
compileToPDF
|
|
, compileToHTML
|
|
) where
|
|
|
|
import Docster.Types
|
|
( DocsterError(..), OutputFormat(..), SourceDir(..), OutputDir(..), OutputPath(..)
|
|
, DiagramConfig(..), computeOutputDir, ensureOutputDir
|
|
)
|
|
import Docster.Transform (transformDocument)
|
|
import Docster.LaTeX (latexTemplate)
|
|
import Text.Pandoc
|
|
import Data.Text (Text)
|
|
import qualified Data.Text as T
|
|
import qualified Data.Text.IO as TIO
|
|
import System.FilePath (takeDirectory, takeBaseName, replaceExtension, (</>), (<.>))
|
|
import System.Process (callProcess, readProcessWithExitCode)
|
|
import System.IO.Temp (withSystemTempDirectory)
|
|
import System.Directory (copyFile, doesFileExist)
|
|
import System.Exit (ExitCode(..))
|
|
import Control.Exception (throwIO)
|
|
import Control.Monad (void)
|
|
import Control.Monad.Trans.Except (ExceptT, runExceptT, throwE)
|
|
import Control.Monad.Trans.Reader (ReaderT, runReaderT, asks)
|
|
import Control.Monad.Trans.Class (lift)
|
|
import Control.Monad.IO.Class (liftIO)
|
|
import Data.Maybe (mapMaybe)
|
|
|
|
-- | Success indicator for user feedback
|
|
successEmoji :: Text
|
|
successEmoji = "✅"
|
|
|
|
-- | Compilation context for pipeline operations
|
|
data CompilationContext = CompilationContext
|
|
{ ccStrategy :: CompilationStrategy
|
|
, ccSourceDir :: SourceDir
|
|
, ccOutputDir :: OutputDir
|
|
, ccInputPath :: FilePath
|
|
, ccOutputPath :: FilePath
|
|
, ccDocName :: Text
|
|
, ccReaderOptions :: ReaderOptions
|
|
, ccConfig :: DiagramConfig
|
|
}
|
|
|
|
-- | Monad stack for compilation pipeline
|
|
type CompilationM = ReaderT CompilationContext (ExceptT DocsterError IO)
|
|
|
|
-- | Strategy pattern: Record of format-specific operations
|
|
data CompilationStrategy = CompilationStrategy
|
|
{ -- | Format for diagram configuration
|
|
csOutputFormat :: OutputFormat
|
|
-- | Pandoc writer function
|
|
, csWriter :: WriterOptions -> Pandoc -> PandocIO Text
|
|
-- | Post-processing function for the generated content
|
|
, csProcessOutput :: String -> Text -> IO (Either DocsterError ())
|
|
-- | Success message formatter
|
|
, csSuccessMessage :: String -> Text
|
|
}
|
|
|
|
-- | PDF compilation strategy
|
|
pdfStrategy :: CompilationStrategy
|
|
pdfStrategy = CompilationStrategy
|
|
{ csOutputFormat = PDF
|
|
, csWriter = writeLaTeX
|
|
, csProcessOutput = processPDFOutput
|
|
, csSuccessMessage = \path -> successEmoji <> " PDF written to " <> T.pack path
|
|
}
|
|
|
|
-- | HTML compilation strategy
|
|
htmlStrategy :: CompilationStrategy
|
|
htmlStrategy = CompilationStrategy
|
|
{ csOutputFormat = HTML
|
|
, csWriter = writeHtml5String
|
|
, csProcessOutput = processHTMLOutput
|
|
, csSuccessMessage = \path -> successEmoji <> " HTML written to " <> T.pack path
|
|
}
|
|
|
|
-- | Parse LaTeX log content to extract meaningful error messages
|
|
parseLatexErrors :: Text -> Text
|
|
parseLatexErrors logContent =
|
|
let logLines = T.lines logContent
|
|
missingChars = extractMissingChars logLines
|
|
overfullBoxes = extractOverfullBoxes logLines
|
|
undefinedCommands = extractUndefinedCommands logLines
|
|
fatalErrors = extractFatalErrors logLines
|
|
|
|
errorCount = length missingChars + length overfullBoxes + length undefinedCommands + length fatalErrors
|
|
|
|
summary = if errorCount == 0
|
|
then "Unknown LaTeX error occurred."
|
|
else T.unlines $ filter (not . T.null) [
|
|
if not (null fatalErrors) then "Fatal errors:\n" <> T.unlines (map (" • " <>) fatalErrors) else "",
|
|
if not (null undefinedCommands) then "Undefined commands:\n" <> T.unlines (map (" • " <>) undefinedCommands) else "",
|
|
if not (null missingChars) then "Missing Unicode characters:\n" <> T.unlines (map (" • " <>) missingChars) else "",
|
|
if not (null overfullBoxes) then T.pack (show (length overfullBoxes)) <> " overfull boxes (layout warnings)" else ""
|
|
]
|
|
in summary
|
|
|
|
-- | Extract missing character warnings from LaTeX log
|
|
extractMissingChars :: [Text] -> [Text]
|
|
extractMissingChars = mapMaybe extractChar
|
|
where
|
|
extractChar line
|
|
| "Missing character:" `T.isInfixOf` line =
|
|
case T.splitOn "(U+" line of
|
|
[_, rest] -> case T.splitOn ")" rest of
|
|
(unicode:_) -> Just $ "U+" <> unicode <> " " <> extractCharContext line
|
|
_ -> Nothing
|
|
_ -> Nothing
|
|
| otherwise = Nothing
|
|
|
|
extractCharContext line =
|
|
case T.splitOn " in font " line of
|
|
[_, rest] -> "in " <> T.takeWhile (/= ':') rest
|
|
_ -> ""
|
|
|
|
-- | Extract overfull box warnings
|
|
extractOverfullBoxes :: [Text] -> [Text]
|
|
extractOverfullBoxes = mapMaybe extractBox
|
|
where
|
|
extractBox line
|
|
| "Overfull \\hbox" `T.isInfixOf` line = Just $ T.takeWhile (/= '\n') line
|
|
| otherwise = Nothing
|
|
|
|
-- | Extract undefined command errors
|
|
extractUndefinedCommands :: [Text] -> [Text]
|
|
extractUndefinedCommands = mapMaybe extractUndef
|
|
where
|
|
extractUndef line
|
|
| "Undefined control sequence" `T.isInfixOf` line = Just line
|
|
| otherwise = Nothing
|
|
|
|
-- | Extract fatal LaTeX errors
|
|
extractFatalErrors :: [Text] -> [Text]
|
|
extractFatalErrors = mapMaybe extractFatal
|
|
where
|
|
extractFatal line
|
|
| "! " `T.isPrefixOf` line && not ("Missing character:" `T.isInfixOf` line) = Just $ T.drop 2 line
|
|
| otherwise = Nothing
|
|
|
|
-- | Process PDF output: LaTeX template application and direct XeLaTeX compilation
|
|
processPDFOutput :: String -> Text -> IO (Either DocsterError ())
|
|
processPDFOutput outputPath latexOutput = do
|
|
let completeLatex = latexTemplate latexOutput
|
|
logOutputPath = replaceExtension outputPath "log"
|
|
|
|
-- Use temporary directory for LaTeX compilation
|
|
withSystemTempDirectory "docster-latex" $ \tempDir -> do
|
|
let texFile = tempDir </> "document.tex"
|
|
pdfFile = tempDir </> "document.pdf"
|
|
logFile = tempDir </> "document.log"
|
|
|
|
-- Write LaTeX content to temporary file
|
|
TIO.writeFile texFile completeLatex
|
|
|
|
-- Run XeLaTeX compilation
|
|
(exitCode, _stdout, stderr) <- readProcessWithExitCode "xelatex"
|
|
[ "-output-directory=" <> tempDir
|
|
, "-interaction=nonstopmode" -- Don't stop on errors
|
|
, texFile
|
|
] ""
|
|
|
|
-- Always copy log file to output location for debugging
|
|
logExists <- doesFileExist logFile
|
|
logContent <- if logExists
|
|
then TIO.readFile logFile
|
|
else return (T.pack stderr)
|
|
TIO.writeFile logOutputPath logContent
|
|
|
|
case exitCode of
|
|
ExitSuccess -> do
|
|
-- Check if PDF was actually generated
|
|
pdfExists <- doesFileExist pdfFile
|
|
if pdfExists
|
|
then do
|
|
-- Copy the generated PDF to the final location
|
|
copyFile pdfFile outputPath
|
|
return $ Right ()
|
|
else do
|
|
return $ Left $ PDFGenerationError $
|
|
"PDF file not generated despite successful exit code.\n" <>
|
|
"Full LaTeX log written to: " <> T.pack logOutputPath
|
|
ExitFailure code -> do
|
|
-- LaTeX compilation failed - parse log for meaningful errors
|
|
let errorSummary = parseLatexErrors logContent
|
|
return $ Left $ PDFGenerationError $
|
|
"❌ LaTeX compilation failed (exit code " <> T.pack (show code) <> "):\n" <>
|
|
errorSummary <> "\n\n" <>
|
|
"Full LaTeX log written to: " <> T.pack logOutputPath
|
|
|
|
-- | Process HTML output: file writing and browser opening
|
|
processHTMLOutput :: String -> Text -> IO (Either DocsterError ())
|
|
processHTMLOutput outputPath html = do
|
|
TIO.writeFile outputPath html
|
|
|
|
-- Open the generated HTML file in browser for verification
|
|
putStrLn $ "🌐 Opening " <> outputPath <> " in browser for error checking..."
|
|
void $ callProcess "open" [outputPath]
|
|
|
|
return $ Right ()
|
|
|
|
-- | Helper function to lift IO (Either DocsterError a) into CompilationM
|
|
liftEitherM :: IO (Either DocsterError a) -> CompilationM a
|
|
liftEitherM action = do
|
|
result <- liftIO action
|
|
case result of
|
|
Left err -> lift $ throwE err
|
|
Right value -> return value
|
|
|
|
-- | Pipeline step: Read content from input file
|
|
readContent :: CompilationM Text
|
|
readContent = do
|
|
inputPath <- asks ccInputPath
|
|
liftIO $ TIO.readFile inputPath
|
|
|
|
-- | Pipeline step: Parse markdown content into Pandoc AST
|
|
parseDocument :: Text -> CompilationM Pandoc
|
|
parseDocument content = do
|
|
readerOptions <- asks ccReaderOptions
|
|
liftEitherM $ parseMarkdown readerOptions content
|
|
|
|
-- | Pipeline step: Transform document (process Mermaid diagrams)
|
|
transformDocumentM :: Pandoc -> CompilationM Pandoc
|
|
transformDocumentM pandoc = do
|
|
config <- asks ccConfig
|
|
docName <- asks ccDocName
|
|
liftEitherM $ transformDocument config docName pandoc
|
|
|
|
-- | Pipeline step: Generate output using format-specific writer
|
|
generateOutputM :: Pandoc -> CompilationM Text
|
|
generateOutputM pandoc = do
|
|
strategy <- asks ccStrategy
|
|
liftEitherM $ generateOutput strategy pandoc
|
|
|
|
-- | Pipeline step: Process output and write to file
|
|
processOutput :: Text -> CompilationM ()
|
|
processOutput output = do
|
|
strategy <- asks ccStrategy
|
|
outputPath <- asks ccOutputPath
|
|
liftEitherM $ csProcessOutput strategy outputPath output
|
|
|
|
-- | Pipeline step: Print success message
|
|
printSuccess :: CompilationM ()
|
|
printSuccess = do
|
|
strategy <- asks ccStrategy
|
|
outputPath <- asks ccOutputPath
|
|
liftIO $ putStrLn $ T.unpack $ csSuccessMessage strategy outputPath
|
|
|
|
-- | Higher-order compilation function that takes a strategy and executes the pipeline
|
|
compileWithStrategy :: CompilationStrategy -> SourceDir -> OutputDir -> Text -> OutputPath -> OutputPath -> IO (Either DocsterError ())
|
|
compileWithStrategy strategy sourceDir outputDir docName (OutputPath inputPath) (OutputPath outputPath) = do
|
|
let readerOptions = def { readerExtensions = getDefaultExtensions "markdown" }
|
|
config = DiagramConfig sourceDir outputDir (csOutputFormat strategy)
|
|
context = CompilationContext strategy sourceDir outputDir inputPath outputPath docName readerOptions config
|
|
pipeline = readContent >>= parseDocument >>= transformDocumentM >>= generateOutputM >>= processOutput >> printSuccess
|
|
|
|
runExceptT $ runReaderT pipeline context
|
|
|
|
-- | Parse markdown with error handling
|
|
parseMarkdown :: ReaderOptions -> Text -> IO (Either DocsterError Pandoc)
|
|
parseMarkdown readerOptions content = do
|
|
pandocResult <- runIO $ readMarkdown readerOptions content
|
|
return $ case pandocResult of
|
|
Left err -> Left $ FileError $ "Failed to parse markdown: " <> T.pack (show err)
|
|
Right pandoc -> Right pandoc
|
|
|
|
-- | Generate output using the strategy's writer with error handling
|
|
generateOutput :: CompilationStrategy -> Pandoc -> IO (Either DocsterError Text)
|
|
generateOutput strategy transformed = do
|
|
result <- runIO $ csWriter strategy def transformed
|
|
return $ case result of
|
|
Left err -> Left $ case csOutputFormat strategy of
|
|
PDF -> PDFGenerationError $ "LaTeX generation failed: " <> T.pack (show err)
|
|
HTML -> FileError $ "HTML generation failed: " <> T.pack (show err)
|
|
Right output -> Right output
|
|
|
|
-- | Compile markdown to PDF using XeLaTeX
|
|
compileToPDF :: FilePath -> IO ()
|
|
compileToPDF = compileWithFormat pdfStrategy "pdf"
|
|
|
|
-- | Compile markdown to HTML
|
|
compileToHTML :: FilePath -> IO ()
|
|
compileToHTML = compileWithFormat htmlStrategy "html"
|
|
|
|
-- | Higher-order function to compile with any format strategy
|
|
compileWithFormat :: CompilationStrategy -> String -> FilePath -> IO ()
|
|
compileWithFormat strategy extension path = do
|
|
let sourceDir = SourceDir $ takeDirectory path
|
|
outputDir = computeOutputDir path
|
|
OutputDir outDirPath = outputDir
|
|
baseName = takeBaseName path
|
|
docName = T.pack baseName
|
|
outputPath = OutputPath $ outDirPath </> baseName <.> extension
|
|
|
|
-- Ensure output directory exists before compilation
|
|
ensureOutputDir outputDir
|
|
|
|
result <- compileWithStrategy strategy sourceDir outputDir docName (OutputPath path) outputPath
|
|
case result of
|
|
Left err -> throwIO err
|
|
Right _ -> return ()
|