docster/src/Docster/Compiler.hs

{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE LambdaCase #-}

-- | Document compilation functionality for PDF and HTML output
module Docster.Compiler
  ( -- * Compilation Functions
    compileToPDF
  , compileToHTML
  ) where

import Docster.Types
  ( DocsterError(..), OutputFormat(..), SourceDir(..), OutputDir(..), OutputPath(..)
  , DiagramConfig(..), computeOutputDir, ensureOutputDir
  )
import Docster.Transform (transformDocument)
import Docster.LaTeX (latexTemplate)
import Text.Pandoc
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.IO as TIO
import System.FilePath (takeDirectory, takeBaseName, replaceExtension, (</>), (<.>))
import System.Process (callProcess, readProcessWithExitCode)
import System.IO.Temp (withSystemTempDirectory)
import System.Directory (copyFile, doesFileExist)
import System.Exit (ExitCode(..))
import Control.Exception (throwIO)
import Control.Monad (void)
import Control.Monad.Trans.Except (ExceptT, runExceptT, throwE)
import Control.Monad.Trans.Reader (ReaderT, runReaderT, asks)
import Control.Monad.Trans.Class (lift)
import Control.Monad.IO.Class (liftIO)
import Data.Maybe (mapMaybe)

-- | Success indicator for user feedback
successEmoji :: Text
successEmoji = "✅"

-- | Compilation context for pipeline operations
data CompilationContext = CompilationContext
  { ccStrategy :: CompilationStrategy
  , ccSourceDir :: SourceDir
  , ccOutputDir :: OutputDir
  , ccInputPath :: FilePath
  , ccOutputPath :: FilePath
  , ccDocName :: Text
  , ccReaderOptions :: ReaderOptions
  , ccConfig :: DiagramConfig
  }

-- | Monad stack for compilation pipeline
type CompilationM = ReaderT CompilationContext (ExceptT DocsterError IO)

-- | Strategy pattern: Record of format-specific operations
data CompilationStrategy = CompilationStrategy
  { -- | Format for diagram configuration
    csOutputFormat :: OutputFormat
    -- | Pandoc writer function
  , csWriter :: WriterOptions -> Pandoc -> PandocIO Text
    -- | Post-processing function for the generated content
  , csProcessOutput :: String -> Text -> IO (Either DocsterError ())
    -- | Success message formatter
  , csSuccessMessage :: String -> Text
  }

-- | PDF compilation strategy
pdfStrategy :: CompilationStrategy
pdfStrategy = CompilationStrategy
  { csOutputFormat = PDF
  , csWriter = writeLaTeX
  , csProcessOutput = processPDFOutput
  , csSuccessMessage = \path -> successEmoji <> " PDF written to " <> T.pack path
  }

-- | HTML compilation strategy
htmlStrategy :: CompilationStrategy
htmlStrategy = CompilationStrategy
  { csOutputFormat = HTML
  , csWriter = writeHtml5String
  , csProcessOutput = processHTMLOutput
  , csSuccessMessage = \path -> successEmoji <> " HTML written to " <> T.pack path
  }

-- | Parse LaTeX log content to extract meaningful error messages
parseLatexErrors :: Text -> Text
parseLatexErrors logContent =
  let logLines = T.lines logContent
      missingChars = extractMissingChars logLines
      overfullBoxes = extractOverfullBoxes logLines
      undefinedCommands = extractUndefinedCommands logLines
      fatalErrors = extractFatalErrors logLines

      errorCount = length missingChars + length overfullBoxes + length undefinedCommands + length fatalErrors

      summary = if errorCount == 0
                then "Unknown LaTeX error occurred."
                else T.unlines $ filter (not . T.null) [
                  if not (null fatalErrors) then "Fatal errors:\n" <> T.unlines (map ("  • " <>) fatalErrors) else "",
                  if not (null undefinedCommands) then "Undefined commands:\n" <> T.unlines (map ("  • " <>) undefinedCommands) else "",
                  if not (null missingChars) then "Missing Unicode characters:\n" <> T.unlines (map ("  • " <>) missingChars) else "",
                  if not (null overfullBoxes) then T.pack (show (length overfullBoxes)) <> " overfull boxes (layout warnings)" else ""
                ]
  in summary

-- | Extract missing character warnings from LaTeX log
extractMissingChars :: [Text] -> [Text]
extractMissingChars = mapMaybe extractChar
  where
    extractChar line
      | "Missing character:" `T.isInfixOf` line =
          case T.splitOn "(U+" line of
            [_, rest] -> case T.splitOn ")" rest of
              (unicode:_) -> Just $ "U+" <> unicode <> " " <> extractCharContext line
              _ -> Nothing
            _ -> Nothing
      | otherwise = Nothing

    extractCharContext line =
      case T.splitOn " in font " line of
        [_, rest] -> "in " <> T.takeWhile (/= ':') rest
        _ -> ""

-- | Extract overfull box warnings
extractOverfullBoxes :: [Text] -> [Text]
extractOverfullBoxes = mapMaybe extractBox
  where
    extractBox line
      | "Overfull \\hbox" `T.isInfixOf` line = Just $ T.takeWhile (/= '\n') line
      | otherwise = Nothing

-- | Extract undefined command errors
extractUndefinedCommands :: [Text] -> [Text]
extractUndefinedCommands = mapMaybe extractUndef
  where
    extractUndef line
      | "Undefined control sequence" `T.isInfixOf` line = Just line
      | otherwise = Nothing

-- | Extract fatal LaTeX errors
extractFatalErrors :: [Text] -> [Text]
extractFatalErrors = mapMaybe extractFatal
  where
    extractFatal line
      | "! " `T.isPrefixOf` line && not ("Missing character:" `T.isInfixOf` line) = Just $ T.drop 2 line
      | otherwise = Nothing

-- | Process PDF output: LaTeX template application and direct XeLaTeX compilation
processPDFOutput :: String -> Text -> IO (Either DocsterError ())
processPDFOutput outputPath latexOutput = do
  let completeLatex = latexTemplate latexOutput
      logOutputPath = replaceExtension outputPath "log"

  -- Use temporary directory for LaTeX compilation
  withSystemTempDirectory "docster-latex" $ \tempDir -> do
    let texFile = tempDir </> "document.tex"
        pdfFile = tempDir </> "document.pdf"
        logFile = tempDir </> "document.log"

    -- Write LaTeX content to temporary file
    TIO.writeFile texFile completeLatex

    -- Run XeLaTeX compilation
    (exitCode, _stdout, stderr) <- readProcessWithExitCode "xelatex"
      [ "-output-directory=" <> tempDir
      , "-interaction=nonstopmode"  -- Don't stop on errors
      , texFile
      ] ""

    -- Always copy log file to output location for debugging
    logExists <- doesFileExist logFile
    logContent <- if logExists
      then TIO.readFile logFile
      else return (T.pack stderr)
    TIO.writeFile logOutputPath logContent

    case exitCode of
      ExitSuccess -> do
        -- Check if PDF was actually generated
        pdfExists <- doesFileExist pdfFile
        if pdfExists
          then do
            -- Copy the generated PDF to the final location
            copyFile pdfFile outputPath
            return $ Right ()
          else do
            return $ Left $ PDFGenerationError $
              "PDF file not generated despite successful exit code.\n" <>
              "Full LaTeX log written to: " <> T.pack logOutputPath
      ExitFailure code -> do
        -- LaTeX compilation failed - parse log for meaningful errors
        let errorSummary = parseLatexErrors logContent
        return $ Left $ PDFGenerationError $
          "❌ LaTeX compilation failed (exit code " <> T.pack (show code) <> "):\n" <>
          errorSummary <> "\n\n" <>
          "Full LaTeX log written to: " <> T.pack logOutputPath

-- | Process HTML output: file writing and browser opening
processHTMLOutput :: String -> Text -> IO (Either DocsterError ())
processHTMLOutput outputPath html = do
  TIO.writeFile outputPath html

  -- Open the generated HTML file in browser for verification
  putStrLn $ "🌐 Opening " <> outputPath <> " in browser for error checking..."
  void $ callProcess "open" [outputPath]

  return $ Right ()

-- | Helper function to lift IO (Either DocsterError a) into CompilationM
liftEitherM :: IO (Either DocsterError a) -> CompilationM a
liftEitherM action = do
  result <- liftIO action
  case result of
    Left err -> lift $ throwE err
    Right value -> return value

-- | Pipeline step: Read content from input file
readContent :: CompilationM Text
readContent = do
  inputPath <- asks ccInputPath
  liftIO $ TIO.readFile inputPath

-- | Pipeline step: Parse markdown content into Pandoc AST
parseDocument :: Text -> CompilationM Pandoc
parseDocument content = do
  readerOptions <- asks ccReaderOptions
  liftEitherM $ parseMarkdown readerOptions content

-- | Pipeline step: Transform document (process Mermaid diagrams)
transformDocumentM :: Pandoc -> CompilationM Pandoc
transformDocumentM pandoc = do
  config <- asks ccConfig
  docName <- asks ccDocName
  liftEitherM $ transformDocument config docName pandoc

-- | Pipeline step: Generate output using format-specific writer
generateOutputM :: Pandoc -> CompilationM Text
generateOutputM pandoc = do
  strategy <- asks ccStrategy
  liftEitherM $ generateOutput strategy pandoc

-- | Pipeline step: Process output and write to file
processOutput :: Text -> CompilationM ()
processOutput output = do
  strategy <- asks ccStrategy
  outputPath <- asks ccOutputPath
  liftEitherM $ csProcessOutput strategy outputPath output

-- | Pipeline step: Print success message
printSuccess :: CompilationM ()
printSuccess = do
  strategy <- asks ccStrategy
  outputPath <- asks ccOutputPath
  liftIO $ putStrLn $ T.unpack $ csSuccessMessage strategy outputPath

-- | Higher-order compilation function that takes a strategy and executes the pipeline
compileWithStrategy :: CompilationStrategy -> SourceDir -> OutputDir -> Text -> OutputPath -> OutputPath -> IO (Either DocsterError ())
compileWithStrategy strategy sourceDir outputDir docName (OutputPath inputPath) (OutputPath outputPath) = do
  let readerOptions = def { readerExtensions = getDefaultExtensions "markdown" }
      config = DiagramConfig sourceDir outputDir (csOutputFormat strategy)
      context = CompilationContext strategy sourceDir outputDir inputPath outputPath docName readerOptions config
      pipeline = readContent >>= parseDocument >>= transformDocumentM >>= generateOutputM >>= processOutput >> printSuccess

  runExceptT $ runReaderT pipeline context

-- | Parse markdown with error handling
parseMarkdown :: ReaderOptions -> Text -> IO (Either DocsterError Pandoc)
parseMarkdown readerOptions content = do
  pandocResult <- runIO $ readMarkdown readerOptions content
  return $ case pandocResult of
    Left err -> Left $ FileError $ "Failed to parse markdown: " <> T.pack (show err)
    Right pandoc -> Right pandoc

-- | Generate output using the strategy's writer with error handling
generateOutput :: CompilationStrategy -> Pandoc -> IO (Either DocsterError Text)
generateOutput strategy transformed = do
  result <- runIO $ csWriter strategy def transformed
  return $ case result of
    Left err -> Left $ case csOutputFormat strategy of
      PDF -> PDFGenerationError $ "LaTeX generation failed: " <> T.pack (show err)
      HTML -> FileError $ "HTML generation failed: " <> T.pack (show err)
    Right output -> Right output

-- | Compile markdown to PDF using XeLaTeX
compileToPDF :: FilePath -> IO ()
compileToPDF = compileWithFormat pdfStrategy "pdf"

-- | Compile markdown to HTML
compileToHTML :: FilePath -> IO ()
compileToHTML = compileWithFormat htmlStrategy "html"

-- | Higher-order function to compile with any format strategy
compileWithFormat :: CompilationStrategy -> String -> FilePath -> IO ()
compileWithFormat strategy extension path = do
  let sourceDir = SourceDir $ takeDirectory path
      outputDir = computeOutputDir path
      OutputDir outDirPath = outputDir
      baseName = takeBaseName path
      docName = T.pack baseName
      outputPath = OutputPath $ outDirPath </> baseName <.> extension

  -- Ensure output directory exists before compilation
  ensureOutputDir outputDir

  result <- compileWithStrategy strategy sourceDir outputDir docName (OutputPath path) outputPath
  case result of
    Left err -> throwIO err
    Right _ -> return ()