{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE LambdaCase #-} -- | Document compilation functionality for PDF and HTML output module Docster.Compiler ( -- * Compilation Functions compileToPDF , compileToHTML ) where import Docster.Types ( DocsterError(..), OutputFormat(..), SourceDir(..), OutputDir(..), OutputPath(..) , DiagramConfig(..), computeOutputDir, ensureOutputDir ) import Docster.Transform (transformDocument) import Docster.LaTeX (latexTemplate) import Text.Pandoc import Data.Text (Text) import qualified Data.Text as T import qualified Data.Text.IO as TIO import System.FilePath (takeDirectory, takeBaseName, replaceExtension, (), (<.>)) import System.Process (callProcess, readProcessWithExitCode) import System.IO.Temp (withSystemTempDirectory) import System.Directory (copyFile, doesFileExist) import System.Exit (ExitCode(..)) import Control.Exception (throwIO) import Control.Monad (void) import Control.Monad.Trans.Except (ExceptT, runExceptT, throwE) import Control.Monad.Trans.Reader (ReaderT, runReaderT, asks) import Control.Monad.Trans.Class (lift) import Control.Monad.IO.Class (liftIO) import Data.Maybe (mapMaybe) -- | Success indicator for user feedback successEmoji :: Text successEmoji = "✅" -- | Compilation context for pipeline operations data CompilationContext = CompilationContext { ccStrategy :: CompilationStrategy , ccSourceDir :: SourceDir , ccOutputDir :: OutputDir , ccInputPath :: FilePath , ccOutputPath :: FilePath , ccDocName :: Text , ccReaderOptions :: ReaderOptions , ccConfig :: DiagramConfig } -- | Monad stack for compilation pipeline type CompilationM = ReaderT CompilationContext (ExceptT DocsterError IO) -- | Strategy pattern: Record of format-specific operations data CompilationStrategy = CompilationStrategy { -- | Format for diagram configuration csOutputFormat :: OutputFormat -- | Pandoc writer function , csWriter :: WriterOptions -> Pandoc -> PandocIO Text -- | Post-processing function for the generated content , csProcessOutput :: String -> Text -> IO (Either DocsterError ()) -- | Success message formatter , csSuccessMessage :: String -> Text } -- | PDF compilation strategy pdfStrategy :: CompilationStrategy pdfStrategy = CompilationStrategy { csOutputFormat = PDF , csWriter = writeLaTeX , csProcessOutput = processPDFOutput , csSuccessMessage = \path -> successEmoji <> " PDF written to " <> T.pack path } -- | HTML compilation strategy htmlStrategy :: CompilationStrategy htmlStrategy = CompilationStrategy { csOutputFormat = HTML , csWriter = writeHtml5String , csProcessOutput = processHTMLOutput , csSuccessMessage = \path -> successEmoji <> " HTML written to " <> T.pack path } -- | Parse LaTeX log content to extract meaningful error messages parseLatexErrors :: Text -> Text parseLatexErrors logContent = let logLines = T.lines logContent missingChars = extractMissingChars logLines overfullBoxes = extractOverfullBoxes logLines undefinedCommands = extractUndefinedCommands logLines fatalErrors = extractFatalErrors logLines errorCount = length missingChars + length overfullBoxes + length undefinedCommands + length fatalErrors summary = if errorCount == 0 then "Unknown LaTeX error occurred." else T.unlines $ filter (not . T.null) [ if not (null fatalErrors) then "Fatal errors:\n" <> T.unlines (map (" • " <>) fatalErrors) else "", if not (null undefinedCommands) then "Undefined commands:\n" <> T.unlines (map (" • " <>) undefinedCommands) else "", if not (null missingChars) then "Missing Unicode characters:\n" <> T.unlines (map (" • " <>) missingChars) else "", if not (null overfullBoxes) then T.pack (show (length overfullBoxes)) <> " overfull boxes (layout warnings)" else "" ] in summary -- | Extract missing character warnings from LaTeX log extractMissingChars :: [Text] -> [Text] extractMissingChars = mapMaybe extractChar where extractChar line | "Missing character:" `T.isInfixOf` line = case T.splitOn "(U+" line of [_, rest] -> case T.splitOn ")" rest of (unicode:_) -> Just $ "U+" <> unicode <> " " <> extractCharContext line _ -> Nothing _ -> Nothing | otherwise = Nothing extractCharContext line = case T.splitOn " in font " line of [_, rest] -> "in " <> T.takeWhile (/= ':') rest _ -> "" -- | Extract overfull box warnings extractOverfullBoxes :: [Text] -> [Text] extractOverfullBoxes = mapMaybe extractBox where extractBox line | "Overfull \\hbox" `T.isInfixOf` line = Just $ T.takeWhile (/= '\n') line | otherwise = Nothing -- | Extract undefined command errors extractUndefinedCommands :: [Text] -> [Text] extractUndefinedCommands = mapMaybe extractUndef where extractUndef line | "Undefined control sequence" `T.isInfixOf` line = Just line | otherwise = Nothing -- | Extract fatal LaTeX errors extractFatalErrors :: [Text] -> [Text] extractFatalErrors = mapMaybe extractFatal where extractFatal line | "! " `T.isPrefixOf` line && not ("Missing character:" `T.isInfixOf` line) = Just $ T.drop 2 line | otherwise = Nothing -- | Process PDF output: LaTeX template application and direct XeLaTeX compilation processPDFOutput :: String -> Text -> IO (Either DocsterError ()) processPDFOutput outputPath latexOutput = do let completeLatex = latexTemplate latexOutput logOutputPath = replaceExtension outputPath "log" -- Use temporary directory for LaTeX compilation withSystemTempDirectory "docster-latex" $ \tempDir -> do let texFile = tempDir "document.tex" pdfFile = tempDir "document.pdf" logFile = tempDir "document.log" -- Write LaTeX content to temporary file TIO.writeFile texFile completeLatex -- Run XeLaTeX compilation (exitCode, _stdout, stderr) <- readProcessWithExitCode "xelatex" [ "-output-directory=" <> tempDir , "-interaction=nonstopmode" -- Don't stop on errors , texFile ] "" -- Always copy log file to output location for debugging logExists <- doesFileExist logFile logContent <- if logExists then TIO.readFile logFile else return (T.pack stderr) TIO.writeFile logOutputPath logContent case exitCode of ExitSuccess -> do -- Check if PDF was actually generated pdfExists <- doesFileExist pdfFile if pdfExists then do -- Copy the generated PDF to the final location copyFile pdfFile outputPath return $ Right () else do return $ Left $ PDFGenerationError $ "PDF file not generated despite successful exit code.\n" <> "Full LaTeX log written to: " <> T.pack logOutputPath ExitFailure code -> do -- LaTeX compilation failed - parse log for meaningful errors let errorSummary = parseLatexErrors logContent return $ Left $ PDFGenerationError $ "❌ LaTeX compilation failed (exit code " <> T.pack (show code) <> "):\n" <> errorSummary <> "\n\n" <> "Full LaTeX log written to: " <> T.pack logOutputPath -- | Process HTML output: file writing and browser opening processHTMLOutput :: String -> Text -> IO (Either DocsterError ()) processHTMLOutput outputPath html = do TIO.writeFile outputPath html -- Open the generated HTML file in browser for verification putStrLn $ "🌐 Opening " <> outputPath <> " in browser for error checking..." void $ callProcess "open" [outputPath] return $ Right () -- | Helper function to lift IO (Either DocsterError a) into CompilationM liftEitherM :: IO (Either DocsterError a) -> CompilationM a liftEitherM action = do result <- liftIO action case result of Left err -> lift $ throwE err Right value -> return value -- | Pipeline step: Read content from input file readContent :: CompilationM Text readContent = do inputPath <- asks ccInputPath liftIO $ TIO.readFile inputPath -- | Pipeline step: Parse markdown content into Pandoc AST parseDocument :: Text -> CompilationM Pandoc parseDocument content = do readerOptions <- asks ccReaderOptions liftEitherM $ parseMarkdown readerOptions content -- | Pipeline step: Transform document (process Mermaid diagrams) transformDocumentM :: Pandoc -> CompilationM Pandoc transformDocumentM pandoc = do config <- asks ccConfig docName <- asks ccDocName liftEitherM $ transformDocument config docName pandoc -- | Pipeline step: Generate output using format-specific writer generateOutputM :: Pandoc -> CompilationM Text generateOutputM pandoc = do strategy <- asks ccStrategy liftEitherM $ generateOutput strategy pandoc -- | Pipeline step: Process output and write to file processOutput :: Text -> CompilationM () processOutput output = do strategy <- asks ccStrategy outputPath <- asks ccOutputPath liftEitherM $ csProcessOutput strategy outputPath output -- | Pipeline step: Print success message printSuccess :: CompilationM () printSuccess = do strategy <- asks ccStrategy outputPath <- asks ccOutputPath liftIO $ putStrLn $ T.unpack $ csSuccessMessage strategy outputPath -- | Higher-order compilation function that takes a strategy and executes the pipeline compileWithStrategy :: CompilationStrategy -> SourceDir -> OutputDir -> Text -> OutputPath -> OutputPath -> IO (Either DocsterError ()) compileWithStrategy strategy sourceDir outputDir docName (OutputPath inputPath) (OutputPath outputPath) = do let readerOptions = def { readerExtensions = getDefaultExtensions "markdown" } config = DiagramConfig sourceDir outputDir (csOutputFormat strategy) context = CompilationContext strategy sourceDir outputDir inputPath outputPath docName readerOptions config pipeline = readContent >>= parseDocument >>= transformDocumentM >>= generateOutputM >>= processOutput >> printSuccess runExceptT $ runReaderT pipeline context -- | Parse markdown with error handling parseMarkdown :: ReaderOptions -> Text -> IO (Either DocsterError Pandoc) parseMarkdown readerOptions content = do pandocResult <- runIO $ readMarkdown readerOptions content return $ case pandocResult of Left err -> Left $ FileError $ "Failed to parse markdown: " <> T.pack (show err) Right pandoc -> Right pandoc -- | Generate output using the strategy's writer with error handling generateOutput :: CompilationStrategy -> Pandoc -> IO (Either DocsterError Text) generateOutput strategy transformed = do result <- runIO $ csWriter strategy def transformed return $ case result of Left err -> Left $ case csOutputFormat strategy of PDF -> PDFGenerationError $ "LaTeX generation failed: " <> T.pack (show err) HTML -> FileError $ "HTML generation failed: " <> T.pack (show err) Right output -> Right output -- | Compile markdown to PDF using XeLaTeX compileToPDF :: FilePath -> IO () compileToPDF = compileWithFormat pdfStrategy "pdf" -- | Compile markdown to HTML compileToHTML :: FilePath -> IO () compileToHTML = compileWithFormat htmlStrategy "html" -- | Higher-order function to compile with any format strategy compileWithFormat :: CompilationStrategy -> String -> FilePath -> IO () compileWithFormat strategy extension path = do let sourceDir = SourceDir $ takeDirectory path outputDir = computeOutputDir path OutputDir outDirPath = outputDir baseName = takeBaseName path docName = T.pack baseName outputPath = OutputPath $ outDirPath baseName <.> extension -- Ensure output directory exists before compilation ensureOutputDir outputDir result <- compileWithStrategy strategy sourceDir outputDir docName (OutputPath path) outputPath case result of Left err -> throwIO err Right _ -> return ()