Replace makePDF with direct XeLaTeX processing to eliminate API mismatch

- Remove Text.Pandoc.PDF dependency and dummy Pandoc document
- Add direct xelatex invocation with temporary file handling
- Improve error reporting with LaTeX log file parsing
- Add temporary package dependency for proper temp file cleanup
- Maintain same external API while cleaning internal architecture
- Eliminate architectural mismatch between our pipeline and Pandoc's expectations

The previous code used makePDF in a way that fought against its intended usage,
requiring a dummy Pandoc document. Now we directly call xelatex after our
custom LaTeX template processing, creating a cleaner separation of concerns.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Willem van den Ende 2025-07-29 23:22:40 +02:00
parent 303f347243
commit bf6f82abe5
2 changed files with 51 additions and 16 deletions

View File

@ -42,7 +42,8 @@ library
hashable >=1.4 && <1.6, hashable >=1.4 && <1.6,
pandoc >=3.0 && <3.2, pandoc >=3.0 && <3.2,
pandoc-types >=1.23 && <1.25, pandoc-types >=1.23 && <1.25,
bytestring >=0.11 && <0.13 bytestring >=0.11 && <0.13,
temporary >=1.3 && <1.4
default-language: Haskell2010 default-language: Haskell2010
executable docster executable docster

View File

@ -12,13 +12,14 @@ import Docster.Types
import Docster.Transform (transformDocument) import Docster.Transform (transformDocument)
import Docster.LaTeX (latexTemplate) import Docster.LaTeX (latexTemplate)
import Text.Pandoc import Text.Pandoc
import Text.Pandoc.PDF (makePDF)
import Data.Text (Text) import Data.Text (Text)
import qualified Data.Text as T import qualified Data.Text as T
import qualified Data.Text.IO as TIO import qualified Data.Text.IO as TIO
import qualified Data.ByteString.Lazy as BL import System.FilePath (takeDirectory, replaceExtension, (</>))
import System.FilePath (takeDirectory, replaceExtension) import System.Process (callProcess, readProcessWithExitCode)
import System.Process (callProcess) import System.IO.Temp (withSystemTempDirectory)
import System.Directory (copyFile, doesFileExist)
import System.Exit (ExitCode(..))
import Control.Exception (throwIO) import Control.Exception (throwIO)
import Control.Monad (void) import Control.Monad (void)
@ -56,20 +57,53 @@ htmlStrategy = CompilationStrategy
, csSuccessMessage = \path -> successEmoji <> " HTML written to " <> T.pack path , csSuccessMessage = \path -> successEmoji <> " HTML written to " <> T.pack path
} }
-- | Process PDF output: LaTeX template application and PDF generation -- | Process PDF output: LaTeX template application and direct XeLaTeX compilation
processPDFOutput :: Text -> String -> IO (Either DocsterError ()) processPDFOutput :: Text -> String -> IO (Either DocsterError ())
processPDFOutput latexOutput outputPath = do processPDFOutput latexOutput outputPath = do
let completeLatex = latexTemplate latexOutput let completeLatex = latexTemplate latexOutput
-- We need a Pandoc document for makePDF, but it's not used in the template function
-- Create a minimal document for the API -- Use temporary directory for LaTeX compilation
let dummyDoc = Pandoc nullMeta [] withSystemTempDirectory "docster-latex" $ \tempDir -> do
pdfResult <- runIO $ makePDF "xelatex" [] (\_ _ -> return completeLatex) def dummyDoc let texFile = tempDir </> "document.tex"
case pdfResult of pdfFile = tempDir </> "document.pdf"
Left err -> return $ Left $ PDFGenerationError $ T.pack $ show err logFile = tempDir </> "document.log"
Right (Left err) -> return $ Left $ PDFGenerationError $ T.pack $ show err
Right (Right bs) -> do -- Write LaTeX content to temporary file
BL.writeFile outputPath bs TIO.writeFile texFile completeLatex
return $ Right ()
-- Run XeLaTeX compilation
(exitCode, _stdout, stderr) <- readProcessWithExitCode "xelatex"
[ "-output-directory=" <> tempDir
, "-interaction=nonstopmode" -- Don't stop on errors
, texFile
] ""
case exitCode of
ExitSuccess -> do
-- Check if PDF was actually generated
pdfExists <- doesFileExist pdfFile
if pdfExists
then do
-- Copy the generated PDF to the final location
copyFile pdfFile outputPath
return $ Right ()
else do
-- PDF generation failed, read log for details
logExists <- doesFileExist logFile
logContent <- if logExists
then TIO.readFile logFile
else return "No log file generated"
return $ Left $ PDFGenerationError $
"PDF file not generated. LaTeX log:\n" <> logContent
ExitFailure code -> do
-- LaTeX compilation failed, read log for details
logExists <- doesFileExist logFile
logContent <- if logExists
then TIO.readFile logFile
else return (T.pack stderr)
return $ Left $ PDFGenerationError $
"XeLaTeX compilation failed (exit code " <> T.pack (show code) <> "):\n" <>
T.pack stderr <> "\n\nLaTeX log:\n" <> logContent
-- | Process HTML output: file writing and browser opening -- | Process HTML output: file writing and browser opening
processHTMLOutput :: Text -> String -> IO (Either DocsterError ()) processHTMLOutput :: Text -> String -> IO (Either DocsterError ())