{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE LambdaCase #-} -- | Document compilation functionality for PDF and HTML output module Docster.Compiler ( -- * Compilation Functions compileToPDF , compileToHTML , compileToDOCX ) where import Docster.Types ( DocsterError(..), OutputFormat(..), SourceDir(..), OutputDir(..), OutputPath(..) , DiagramConfig(..), computeOutputDir, ensureOutputDir ) import Text.Pandoc.Writers () import qualified Data.ByteString.Lazy as BSL import Docster.Transform (transformDocument) import Docster.LaTeX (latexTemplate) import Text.Pandoc import Data.Text (Text) import qualified Data.Text as T import qualified Data.Text.IO as TIO import System.FilePath (takeDirectory, takeBaseName, replaceExtension, (), (<.>)) import System.Process (callProcess, readProcessWithExitCode) import System.IO.Temp (withSystemTempDirectory) import System.Directory (copyFile, doesFileExist) import System.Exit (ExitCode(..)) import Control.Exception (throwIO) import Control.Monad (void) import Control.Monad.Trans.Except (ExceptT, runExceptT, throwE) import Control.Monad.Trans.Reader (ReaderT, runReaderT, asks) import Control.Monad.Trans.Class (lift) import Control.Monad.IO.Class (liftIO) import Data.Maybe (mapMaybe) import Data.Char (ord) -- | Success indicator for user feedback successEmoji :: Text successEmoji = "✅" -- | Compilation context for pipeline operations data CompilationContext = CompilationContext { ccStrategy :: CompilationStrategy , ccInputPath :: FilePath , ccOutputPath :: FilePath , ccDocName :: Text , ccReaderOptions :: ReaderOptions , ccConfig :: DiagramConfig } -- | Monad stack for compilation pipeline type CompilationM = ReaderT CompilationContext (ExceptT DocsterError IO) -- | Strategy pattern: Record of format-specific operations data CompilationStrategy = CompilationStrategy { -- | Format for diagram configuration csOutputFormat :: OutputFormat -- | Pandoc writer: writes output directly to the given file path , csWriter :: WriterOptions -> Pandoc -> FilePath -> IO (Either DocsterError ()) -- | Post-processing after write (PDF→xelatex, HTML→open browser, DOCX→noop) , csPostProcess :: String -> IO (Either DocsterError ()) -- | Success message formatter , csSuccessMessage :: String -> Text } -- | PDF compilation strategy pdfStrategy :: CompilationStrategy pdfStrategy = CompilationStrategy { csOutputFormat = PDF , csWriter = \opts doc path -> do result <- runIO (writeLaTeX opts doc) case result of Left err -> return $ Left $ FileError $ "LaTeX write failed: " <> T.pack (show err) Right latex -> do TIO.writeFile path (latexTemplate latex) return $ Right () , csPostProcess = processPDFOutput , csSuccessMessage = \path -> successEmoji <> " PDF written to " <> T.pack path } -- | HTML compilation strategy htmlStrategy :: CompilationStrategy htmlStrategy = CompilationStrategy { csOutputFormat = HTML , csWriter = \opts doc path -> do result <- runIO (writeHtml5String opts doc) case result of Left err -> return $ Left $ FileError $ "HTML write failed: " <> T.pack (show err) Right html -> do TIO.writeFile path html return $ Right () , csPostProcess = processHTMLOutput , csSuccessMessage = \path -> successEmoji <> " HTML written to " <> T.pack path } -- | DOCX compilation strategy (Pandoc writes file directly) docxStrategy :: CompilationStrategy docxStrategy = CompilationStrategy { csOutputFormat = DOCX , csWriter = \opts doc path -> do result <- runIO (writeDocx opts doc) case result of Left err -> return $ Left $ FileError $ "DOCX generation failed: " <> T.pack (show err) Right docxBS -> do BSL.writeFile path docxBS return $ Right () , csPostProcess = \_ -> return $ Right () -- no post-processing needed , csSuccessMessage = \path -> successEmoji <> " DOCX written to " <> T.pack path } -- | Parse LaTeX log content to extract meaningful error messages parseLatexErrors :: Text -> Text parseLatexErrors logContent = let logLines = T.lines logContent missingChars = extractMissingChars logLines overfullBoxes = extractOverfullBoxes logLines undefinedCommands = extractUndefinedCommands logLines fatalErrors = extractFatalErrors logLines errorCount = length missingChars + length overfullBoxes + length undefinedCommands + length fatalErrors summary = if errorCount == 0 then "Unknown LaTeX error occurred." else T.unlines $ filter (not . T.null) [ if not (null fatalErrors) then "Fatal errors:\n" <> T.unlines (map (" • " <>) fatalErrors) else "", if not (null undefinedCommands) then "Undefined commands:\n" <> T.unlines (map (" • " <>) undefinedCommands) else "", if not (null missingChars) then "Missing Unicode characters:\n" <> T.unlines (map (" • " <>) missingChars) else "", if not (null overfullBoxes) then T.pack (show (length overfullBoxes)) <> " overfull boxes (layout warnings)" else "" ] in summary -- | Extract missing character warnings from LaTeX log extractMissingChars :: [Text] -> [Text] extractMissingChars = mapMaybe extractChar where extractChar line | "Missing character:" `T.isInfixOf` line = case T.splitOn "(U+" line of [_, rest] -> case T.splitOn ")" rest of (unicode:_) -> Just $ "U+" <> unicode <> " " <> extractCharContext line _ -> Nothing _ -> Nothing | otherwise = Nothing extractCharContext line = case T.splitOn " in font " line of [_, rest] -> "in " <> T.takeWhile (/= ':') rest _ -> "" -- | Extract overfull box warnings extractOverfullBoxes :: [Text] -> [Text] extractOverfullBoxes = mapMaybe extractBox where extractBox line | "Overfull \\hbox" `T.isInfixOf` line = Just $ T.takeWhile (/= '\n') line | otherwise = Nothing -- | Extract undefined command errors extractUndefinedCommands :: [Text] -> [Text] extractUndefinedCommands = mapMaybe extractUndef where extractUndef line | "Undefined control sequence" `T.isInfixOf` line = Just line | otherwise = Nothing -- | Extract fatal LaTeX errors extractFatalErrors :: [Text] -> [Text] extractFatalErrors = mapMaybe extractFatal where extractFatal line | "! " `T.isPrefixOf` line && not ("Missing character:" `T.isInfixOf` line) = Just $ T.drop 2 line | otherwise = Nothing -- | Process PDF output: direct XeLaTeX compilation (LaTeX already written by csWriter) processPDFOutput :: String -> IO (Either DocsterError ()) processPDFOutput outputPath = do let logOutputPath = replaceExtension outputPath "log" -- Use temporary directory for LaTeX compilation withSystemTempDirectory "docster-latex" $ \tempDir -> do let texFile = tempDir "document.tex" pdfFile = tempDir "document.pdf" logFile = tempDir "document.log" -- Run XeLaTeX compilation (exitCode, _stdout, stderr) <- readProcessWithExitCode "xelatex" [ "-output-directory=" <> tempDir , "-interaction=nonstopmode" -- Don't stop on errors , texFile ] "" -- Always copy log file to output location for debugging logExists <- doesFileExist logFile logContent <- if logExists then TIO.readFile logFile else return (T.pack stderr) TIO.writeFile logOutputPath logContent case exitCode of ExitSuccess -> do -- Check if PDF was actually generated pdfExists <- doesFileExist pdfFile if pdfExists then do -- Copy the generated PDF to the final location copyFile pdfFile outputPath return $ Right () else do return $ Left $ PDFGenerationError $ "PDF file not generated despite successful exit code.\n" <> "Full LaTeX log written to: " <> T.pack logOutputPath ExitFailure code -> do -- LaTeX compilation failed - parse log for meaningful errors let errorSummary = parseLatexErrors logContent return $ Left $ PDFGenerationError $ "❌ LaTeX compilation failed (exit code " <> T.pack (show code) <> "):\n" <> errorSummary <> "\n\n" <> "Full LaTeX log written to: " <> T.pack logOutputPath -- | Process HTML output: open browser (HTML already written by csWriter) processHTMLOutput :: String -> IO (Either DocsterError ()) processHTMLOutput outputPath = do -- Open the generated HTML file in browser for verification putStrLn $ "🌐 Opening " <> outputPath <> " in browser for error checking..." void $ callProcess "open" [outputPath] return $ Right () -- | Helper function to lift IO (Either DocsterError a) into CompilationM liftEitherM :: IO (Either DocsterError a) -> CompilationM a liftEitherM action = do result <- liftIO action case result of Left err -> lift $ throwE err Right value -> return value -- | Strip ANSI escape sequences (CSI codes like color/style) from text. -- These appear in copy-pasted terminal output and break LaTeX compilation. stripAnsiCodes :: Text -> Text stripAnsiCodes input = case T.break (== '\x1b') input of (before, rest) | T.null rest -> before | otherwise -> before <> stripAnsiCodes (skipEscape (T.tail rest)) where -- Skip an ESC sequence: ESC [ skipEscape t | T.null t = t | T.head t == '[' = skipCSIParams (T.tail t) | otherwise = T.tail t -- non-CSI escape: skip one char after ESC -- Skip CSI parameter/intermediate bytes until final byte (0x40-0x7E) skipCSIParams t | T.null t = t | let c = ord (T.head t), c >= 0x40 && c <= 0x7E = T.tail t -- final byte, consume it | otherwise = skipCSIParams (T.tail t) -- | Pipeline step: Read content from input file readContent :: CompilationM Text readContent = do inputPath <- asks ccInputPath liftIO $ stripAnsiCodes <$> TIO.readFile inputPath -- | Pipeline step: Parse markdown content into Pandoc AST parseDocument :: Text -> CompilationM Pandoc parseDocument content = do readerOptions <- asks ccReaderOptions liftEitherM $ parseMarkdown readerOptions content -- | Pipeline step: Transform document (process Mermaid diagrams) transformDocumentM :: Pandoc -> CompilationM Pandoc transformDocumentM pandoc = do config <- asks ccConfig docName <- asks ccDocName liftEitherM $ transformDocument config docName pandoc -- | Pipeline step: Write output and post-process (format-specific) writeAndProcessOutput :: Pandoc -> CompilationM () writeAndProcessOutput pandoc = do strategy <- asks ccStrategy outputPath <- asks ccOutputPath liftEitherM $ csWriter strategy def pandoc outputPath liftEitherM $ (csPostProcess strategy) outputPath -- | Pipeline step: Print success message printSuccess :: CompilationM () printSuccess = do strategy <- asks ccStrategy outputPath <- asks ccOutputPath liftIO $ putStrLn $ T.unpack $ csSuccessMessage strategy outputPath -- | Higher-order compilation function that takes a strategy and executes the pipeline compileWithStrategy :: CompilationStrategy -> SourceDir -> OutputDir -> Text -> OutputPath -> OutputPath -> IO (Either DocsterError ()) compileWithStrategy strategy sourceDir outputDir docName (OutputPath inputPath) (OutputPath outputPath) = do let readerOptions = def { readerExtensions = getDefaultExtensions "markdown" } config = DiagramConfig sourceDir outputDir (csOutputFormat strategy) context = CompilationContext strategy inputPath outputPath docName readerOptions config pipeline = readContent >>= parseDocument >>= transformDocumentM >>= writeAndProcessOutput >> printSuccess runExceptT $ runReaderT pipeline context -- | Parse markdown with error handling parseMarkdown :: ReaderOptions -> Text -> IO (Either DocsterError Pandoc) parseMarkdown readerOptions content = do pandocResult <- runIO $ readMarkdown readerOptions content return $ case pandocResult of Left err -> Left $ FileError $ "Failed to parse markdown: " <> T.pack (show err) Right pandoc -> Right pandoc -- | Compile markdown to PDF using XeLaTeX compileToPDF :: FilePath -> IO () compileToPDF = compileWithFormat pdfStrategy "pdf" -- | Compile markdown to HTML compileToHTML :: FilePath -> IO () compileToHTML = compileWithFormat htmlStrategy "html" -- | Compile markdown to DOCX compileToDOCX :: FilePath -> IO () compileToDOCX = compileWithFormat docxStrategy "docx" -- | Higher-order function to compile with any format strategy compileWithFormat :: CompilationStrategy -> String -> FilePath -> IO () compileWithFormat strategy extension path = do let sourceDir = SourceDir $ takeDirectory path outputDir = computeOutputDir path OutputDir outDirPath = outputDir baseName = takeBaseName path docName = T.pack baseName outputPath = OutputPath $ outDirPath baseName <.> extension -- Ensure output directory exists before compilation ensureOutputDir outputDir result <- compileWithStrategy strategy sourceDir outputDir docName (OutputPath path) outputPath case result of Left err -> throwIO err Right _ -> return ()