- csWriter now writes files directly (WriterOptions -> Pandoc -> FilePath -> IO (Either DocsterError ())) - csPostProcess no longer takes text content (String -> IO (Either DocsterError ())) - Each strategy owns its complete output logic (PDF/HTML/DOCX) - Remove generateOutputFile (eliminated unused CompilationStrategy parameter) - Pipeline: generateOutputM >>= processOutput => writeAndProcessOutput (1 step) - Pandoc 3.7 compatibility: writeDocx returns ByteString instead of () .gitignore: exclude stack/cabal config and generated files
340 lines
13 KiB
Haskell
340 lines
13 KiB
Haskell
{-# LANGUAGE OverloadedStrings #-}
|
|
{-# LANGUAGE LambdaCase #-}
|
|
|
|
-- | Document compilation functionality for PDF and HTML output
|
|
module Docster.Compiler
|
|
( -- * Compilation Functions
|
|
compileToPDF
|
|
, compileToHTML
|
|
, compileToDOCX
|
|
) where
|
|
|
|
import Docster.Types
|
|
( DocsterError(..), OutputFormat(..), SourceDir(..), OutputDir(..), OutputPath(..)
|
|
, DiagramConfig(..), computeOutputDir, ensureOutputDir
|
|
)
|
|
import Text.Pandoc.Writers ()
|
|
import qualified Data.ByteString.Lazy as BSL
|
|
import Docster.Transform (transformDocument)
|
|
import Docster.LaTeX (latexTemplate)
|
|
import Text.Pandoc
|
|
import Data.Text (Text)
|
|
import qualified Data.Text as T
|
|
import qualified Data.Text.IO as TIO
|
|
import System.FilePath (takeDirectory, takeBaseName, replaceExtension, (</>), (<.>))
|
|
import System.Process (callProcess, readProcessWithExitCode)
|
|
import System.IO.Temp (withSystemTempDirectory)
|
|
import System.Directory (copyFile, doesFileExist)
|
|
import System.Exit (ExitCode(..))
|
|
import Control.Exception (throwIO)
|
|
import Control.Monad (void)
|
|
import Control.Monad.Trans.Except (ExceptT, runExceptT, throwE)
|
|
import Control.Monad.Trans.Reader (ReaderT, runReaderT, asks)
|
|
import Control.Monad.Trans.Class (lift)
|
|
import Control.Monad.IO.Class (liftIO)
|
|
import Data.Maybe (mapMaybe)
|
|
import Data.Char (ord)
|
|
|
|
-- | Success indicator for user feedback
|
|
successEmoji :: Text
|
|
successEmoji = "✅"
|
|
|
|
-- | Compilation context for pipeline operations
|
|
data CompilationContext = CompilationContext
|
|
{ ccStrategy :: CompilationStrategy
|
|
, ccInputPath :: FilePath
|
|
, ccOutputPath :: FilePath
|
|
, ccDocName :: Text
|
|
, ccReaderOptions :: ReaderOptions
|
|
, ccConfig :: DiagramConfig
|
|
}
|
|
|
|
-- | Monad stack for compilation pipeline
|
|
type CompilationM = ReaderT CompilationContext (ExceptT DocsterError IO)
|
|
|
|
-- | Strategy pattern: Record of format-specific operations
|
|
data CompilationStrategy = CompilationStrategy
|
|
{ -- | Format for diagram configuration
|
|
csOutputFormat :: OutputFormat
|
|
-- | Pandoc writer: writes output directly to the given file path
|
|
, csWriter :: WriterOptions -> Pandoc -> FilePath -> IO (Either DocsterError ())
|
|
-- | Post-processing after write (PDF→xelatex, HTML→open browser, DOCX→noop)
|
|
, csPostProcess :: String -> IO (Either DocsterError ())
|
|
-- | Success message formatter
|
|
, csSuccessMessage :: String -> Text
|
|
}
|
|
|
|
-- | PDF compilation strategy
|
|
pdfStrategy :: CompilationStrategy
|
|
pdfStrategy = CompilationStrategy
|
|
{ csOutputFormat = PDF
|
|
, csWriter = \opts doc path -> do
|
|
result <- runIO (writeLaTeX opts doc)
|
|
case result of
|
|
Left err -> return $ Left $ FileError $ "LaTeX write failed: " <> T.pack (show err)
|
|
Right latex -> do
|
|
TIO.writeFile path (latexTemplate latex)
|
|
return $ Right ()
|
|
, csPostProcess = processPDFOutput
|
|
, csSuccessMessage = \path -> successEmoji <> " PDF written to " <> T.pack path
|
|
}
|
|
|
|
-- | HTML compilation strategy
|
|
htmlStrategy :: CompilationStrategy
|
|
htmlStrategy = CompilationStrategy
|
|
{ csOutputFormat = HTML
|
|
, csWriter = \opts doc path -> do
|
|
result <- runIO (writeHtml5String opts doc)
|
|
case result of
|
|
Left err -> return $ Left $ FileError $ "HTML write failed: " <> T.pack (show err)
|
|
Right html -> do
|
|
TIO.writeFile path html
|
|
return $ Right ()
|
|
, csPostProcess = processHTMLOutput
|
|
, csSuccessMessage = \path -> successEmoji <> " HTML written to " <> T.pack path
|
|
}
|
|
|
|
-- | DOCX compilation strategy (Pandoc writes file directly)
|
|
docxStrategy :: CompilationStrategy
|
|
docxStrategy = CompilationStrategy
|
|
{ csOutputFormat = DOCX
|
|
, csWriter = \opts doc path -> do
|
|
result <- runIO (writeDocx opts doc)
|
|
case result of
|
|
Left err -> return $ Left $ FileError $ "DOCX generation failed: " <> T.pack (show err)
|
|
Right docxBS -> do
|
|
BSL.writeFile path docxBS
|
|
return $ Right ()
|
|
, csPostProcess = \_ -> return $ Right () -- no post-processing needed
|
|
, csSuccessMessage = \path -> successEmoji <> " DOCX written to " <> T.pack path
|
|
}
|
|
|
|
-- | Parse LaTeX log content to extract meaningful error messages
|
|
parseLatexErrors :: Text -> Text
|
|
parseLatexErrors logContent =
|
|
let logLines = T.lines logContent
|
|
missingChars = extractMissingChars logLines
|
|
overfullBoxes = extractOverfullBoxes logLines
|
|
undefinedCommands = extractUndefinedCommands logLines
|
|
fatalErrors = extractFatalErrors logLines
|
|
|
|
errorCount = length missingChars + length overfullBoxes + length undefinedCommands + length fatalErrors
|
|
|
|
summary = if errorCount == 0
|
|
then "Unknown LaTeX error occurred."
|
|
else T.unlines $ filter (not . T.null) [
|
|
if not (null fatalErrors) then "Fatal errors:\n" <> T.unlines (map (" • " <>) fatalErrors) else "",
|
|
if not (null undefinedCommands) then "Undefined commands:\n" <> T.unlines (map (" • " <>) undefinedCommands) else "",
|
|
if not (null missingChars) then "Missing Unicode characters:\n" <> T.unlines (map (" • " <>) missingChars) else "",
|
|
if not (null overfullBoxes) then T.pack (show (length overfullBoxes)) <> " overfull boxes (layout warnings)" else ""
|
|
]
|
|
in summary
|
|
|
|
-- | Extract missing character warnings from LaTeX log
|
|
extractMissingChars :: [Text] -> [Text]
|
|
extractMissingChars = mapMaybe extractChar
|
|
where
|
|
extractChar line
|
|
| "Missing character:" `T.isInfixOf` line =
|
|
case T.splitOn "(U+" line of
|
|
[_, rest] -> case T.splitOn ")" rest of
|
|
(unicode:_) -> Just $ "U+" <> unicode <> " " <> extractCharContext line
|
|
_ -> Nothing
|
|
_ -> Nothing
|
|
| otherwise = Nothing
|
|
|
|
extractCharContext line =
|
|
case T.splitOn " in font " line of
|
|
[_, rest] -> "in " <> T.takeWhile (/= ':') rest
|
|
_ -> ""
|
|
|
|
-- | Extract overfull box warnings
|
|
extractOverfullBoxes :: [Text] -> [Text]
|
|
extractOverfullBoxes = mapMaybe extractBox
|
|
where
|
|
extractBox line
|
|
| "Overfull \\hbox" `T.isInfixOf` line = Just $ T.takeWhile (/= '\n') line
|
|
| otherwise = Nothing
|
|
|
|
-- | Extract undefined command errors
|
|
extractUndefinedCommands :: [Text] -> [Text]
|
|
extractUndefinedCommands = mapMaybe extractUndef
|
|
where
|
|
extractUndef line
|
|
| "Undefined control sequence" `T.isInfixOf` line = Just line
|
|
| otherwise = Nothing
|
|
|
|
-- | Extract fatal LaTeX errors
|
|
extractFatalErrors :: [Text] -> [Text]
|
|
extractFatalErrors = mapMaybe extractFatal
|
|
where
|
|
extractFatal line
|
|
| "! " `T.isPrefixOf` line && not ("Missing character:" `T.isInfixOf` line) = Just $ T.drop 2 line
|
|
| otherwise = Nothing
|
|
|
|
-- | Process PDF output: direct XeLaTeX compilation (LaTeX already written by csWriter)
|
|
processPDFOutput :: String -> IO (Either DocsterError ())
|
|
processPDFOutput outputPath = do
|
|
let logOutputPath = replaceExtension outputPath "log"
|
|
|
|
-- Use temporary directory for LaTeX compilation
|
|
withSystemTempDirectory "docster-latex" $ \tempDir -> do
|
|
let texFile = tempDir </> "document.tex"
|
|
pdfFile = tempDir </> "document.pdf"
|
|
logFile = tempDir </> "document.log"
|
|
|
|
-- Run XeLaTeX compilation
|
|
(exitCode, _stdout, stderr) <- readProcessWithExitCode "xelatex"
|
|
[ "-output-directory=" <> tempDir
|
|
, "-interaction=nonstopmode" -- Don't stop on errors
|
|
, texFile
|
|
] ""
|
|
|
|
-- Always copy log file to output location for debugging
|
|
logExists <- doesFileExist logFile
|
|
logContent <- if logExists
|
|
then TIO.readFile logFile
|
|
else return (T.pack stderr)
|
|
TIO.writeFile logOutputPath logContent
|
|
|
|
case exitCode of
|
|
ExitSuccess -> do
|
|
-- Check if PDF was actually generated
|
|
pdfExists <- doesFileExist pdfFile
|
|
if pdfExists
|
|
then do
|
|
-- Copy the generated PDF to the final location
|
|
copyFile pdfFile outputPath
|
|
return $ Right ()
|
|
else do
|
|
return $ Left $ PDFGenerationError $
|
|
"PDF file not generated despite successful exit code.\n" <>
|
|
"Full LaTeX log written to: " <> T.pack logOutputPath
|
|
ExitFailure code -> do
|
|
-- LaTeX compilation failed - parse log for meaningful errors
|
|
let errorSummary = parseLatexErrors logContent
|
|
return $ Left $ PDFGenerationError $
|
|
"❌ LaTeX compilation failed (exit code " <> T.pack (show code) <> "):\n" <>
|
|
errorSummary <> "\n\n" <>
|
|
"Full LaTeX log written to: " <> T.pack logOutputPath
|
|
|
|
-- | Process HTML output: open browser (HTML already written by csWriter)
|
|
processHTMLOutput :: String -> IO (Either DocsterError ())
|
|
processHTMLOutput outputPath = do
|
|
-- Open the generated HTML file in browser for verification
|
|
putStrLn $ "🌐 Opening " <> outputPath <> " in browser for error checking..."
|
|
void $ callProcess "open" [outputPath]
|
|
|
|
return $ Right ()
|
|
|
|
-- | Helper function to lift IO (Either DocsterError a) into CompilationM
|
|
liftEitherM :: IO (Either DocsterError a) -> CompilationM a
|
|
liftEitherM action = do
|
|
result <- liftIO action
|
|
case result of
|
|
Left err -> lift $ throwE err
|
|
Right value -> return value
|
|
|
|
-- | Strip ANSI escape sequences (CSI codes like color/style) from text.
|
|
-- These appear in copy-pasted terminal output and break LaTeX compilation.
|
|
stripAnsiCodes :: Text -> Text
|
|
stripAnsiCodes input = case T.break (== '\x1b') input of
|
|
(before, rest)
|
|
| T.null rest -> before
|
|
| otherwise -> before <> stripAnsiCodes (skipEscape (T.tail rest))
|
|
where
|
|
-- Skip an ESC sequence: ESC [ <params> <final byte>
|
|
skipEscape t
|
|
| T.null t = t
|
|
| T.head t == '[' = skipCSIParams (T.tail t)
|
|
| otherwise = T.tail t -- non-CSI escape: skip one char after ESC
|
|
-- Skip CSI parameter/intermediate bytes until final byte (0x40-0x7E)
|
|
skipCSIParams t
|
|
| T.null t = t
|
|
| let c = ord (T.head t), c >= 0x40 && c <= 0x7E = T.tail t -- final byte, consume it
|
|
| otherwise = skipCSIParams (T.tail t)
|
|
|
|
-- | Pipeline step: Read content from input file
|
|
readContent :: CompilationM Text
|
|
readContent = do
|
|
inputPath <- asks ccInputPath
|
|
liftIO $ stripAnsiCodes <$> TIO.readFile inputPath
|
|
|
|
-- | Pipeline step: Parse markdown content into Pandoc AST
|
|
parseDocument :: Text -> CompilationM Pandoc
|
|
parseDocument content = do
|
|
readerOptions <- asks ccReaderOptions
|
|
liftEitherM $ parseMarkdown readerOptions content
|
|
|
|
-- | Pipeline step: Transform document (process Mermaid diagrams)
|
|
transformDocumentM :: Pandoc -> CompilationM Pandoc
|
|
transformDocumentM pandoc = do
|
|
config <- asks ccConfig
|
|
docName <- asks ccDocName
|
|
liftEitherM $ transformDocument config docName pandoc
|
|
|
|
-- | Pipeline step: Write output and post-process (format-specific)
|
|
writeAndProcessOutput :: Pandoc -> CompilationM ()
|
|
writeAndProcessOutput pandoc = do
|
|
strategy <- asks ccStrategy
|
|
outputPath <- asks ccOutputPath
|
|
liftEitherM $ csWriter strategy def pandoc outputPath
|
|
liftEitherM $ (csPostProcess strategy) outputPath
|
|
|
|
-- | Pipeline step: Print success message
|
|
printSuccess :: CompilationM ()
|
|
printSuccess = do
|
|
strategy <- asks ccStrategy
|
|
outputPath <- asks ccOutputPath
|
|
liftIO $ putStrLn $ T.unpack $ csSuccessMessage strategy outputPath
|
|
|
|
-- | Higher-order compilation function that takes a strategy and executes the pipeline
|
|
compileWithStrategy :: CompilationStrategy -> SourceDir -> OutputDir -> Text -> OutputPath -> OutputPath -> IO (Either DocsterError ())
|
|
compileWithStrategy strategy sourceDir outputDir docName (OutputPath inputPath) (OutputPath outputPath) = do
|
|
let readerOptions = def { readerExtensions = getDefaultExtensions "markdown" }
|
|
config = DiagramConfig sourceDir outputDir (csOutputFormat strategy)
|
|
context = CompilationContext strategy inputPath outputPath docName readerOptions config
|
|
pipeline = readContent >>= parseDocument >>= transformDocumentM >>= writeAndProcessOutput >> printSuccess
|
|
|
|
runExceptT $ runReaderT pipeline context
|
|
|
|
-- | Parse markdown with error handling
|
|
parseMarkdown :: ReaderOptions -> Text -> IO (Either DocsterError Pandoc)
|
|
parseMarkdown readerOptions content = do
|
|
pandocResult <- runIO $ readMarkdown readerOptions content
|
|
return $ case pandocResult of
|
|
Left err -> Left $ FileError $ "Failed to parse markdown: " <> T.pack (show err)
|
|
Right pandoc -> Right pandoc
|
|
|
|
|
|
|
|
-- | Compile markdown to PDF using XeLaTeX
|
|
compileToPDF :: FilePath -> IO ()
|
|
compileToPDF = compileWithFormat pdfStrategy "pdf"
|
|
|
|
-- | Compile markdown to HTML
|
|
compileToHTML :: FilePath -> IO ()
|
|
compileToHTML = compileWithFormat htmlStrategy "html"
|
|
|
|
-- | Compile markdown to DOCX
|
|
compileToDOCX :: FilePath -> IO ()
|
|
compileToDOCX = compileWithFormat docxStrategy "docx"
|
|
|
|
-- | Higher-order function to compile with any format strategy
|
|
compileWithFormat :: CompilationStrategy -> String -> FilePath -> IO ()
|
|
compileWithFormat strategy extension path = do
|
|
let sourceDir = SourceDir $ takeDirectory path
|
|
outputDir = computeOutputDir path
|
|
OutputDir outDirPath = outputDir
|
|
baseName = takeBaseName path
|
|
docName = T.pack baseName
|
|
outputPath = OutputPath $ outDirPath </> baseName <.> extension
|
|
|
|
-- Ensure output directory exists before compilation
|
|
ensureOutputDir outputDir
|
|
|
|
result <- compileWithStrategy strategy sourceDir outputDir docName (OutputPath path) outputPath
|
|
case result of
|
|
Left err -> throwIO err
|
|
Right _ -> return ()
|