Add docx export

This commit is contained in:
Willem van den Ende 2026-04-30 17:27:59 +01:00
parent fa850d5017
commit 9dd9313829
5 changed files with 110 additions and 20 deletions

View File

@ -4,7 +4,7 @@
module Main (main) where module Main (main) where
import Docster.Types (DocsterError(..)) import Docster.Types (DocsterError(..))
import Docster.Compiler (compileToPDF, compileToHTML) import Docster.Compiler (compileToPDF, compileToHTML, compileToDOCX)
import System.Environment (getArgs) import System.Environment (getArgs)
import Control.Exception (throwIO) import Control.Exception (throwIO)
@ -12,7 +12,8 @@ import Control.Exception (throwIO)
parseArgs :: [String] -> Either DocsterError (IO ()) parseArgs :: [String] -> Either DocsterError (IO ())
parseArgs ["-pdf", path] = Right (compileToPDF path) parseArgs ["-pdf", path] = Right (compileToPDF path)
parseArgs ["-html", path] = Right (compileToHTML path) parseArgs ["-html", path] = Right (compileToHTML path)
parseArgs _ = Left $ InvalidUsage "Usage: docster -pdf|-html <file.md>" parseArgs ["-docx", path] = Right (compileToDOCX path)
parseArgs _ = Left $ InvalidUsage "Usage: docster -pdf|-html|-docx <file.md>"
-- | Main entry point - parse arguments and execute appropriate action -- | Main entry point - parse arguments and execute appropriate action
main :: IO () main :: IO ()

View File

@ -6,12 +6,15 @@ module Docster.Compiler
( -- * Compilation Functions ( -- * Compilation Functions
compileToPDF compileToPDF
, compileToHTML , compileToHTML
, compileToDOCX
) where ) where
import Docster.Types import Docster.Types
( DocsterError(..), OutputFormat(..), SourceDir(..), OutputDir(..), OutputPath(..) ( DocsterError(..), OutputFormat(..), SourceDir(..), OutputDir(..), OutputPath(..)
, DiagramConfig(..), computeOutputDir, ensureOutputDir , DiagramConfig(..), computeOutputDir, ensureOutputDir
) )
import Text.Pandoc.Writers (writeDocx)
import qualified Data.ByteString.Lazy as BSL
import Docster.Transform (transformDocument) import Docster.Transform (transformDocument)
import Docster.LaTeX (latexTemplate) import Docster.LaTeX (latexTemplate)
import Text.Pandoc import Text.Pandoc
@ -30,6 +33,7 @@ import Control.Monad.Trans.Reader (ReaderT, runReaderT, asks)
import Control.Monad.Trans.Class (lift) import Control.Monad.Trans.Class (lift)
import Control.Monad.IO.Class (liftIO) import Control.Monad.IO.Class (liftIO)
import Data.Maybe (mapMaybe) import Data.Maybe (mapMaybe)
import Data.Char (ord)
-- | Success indicator for user feedback -- | Success indicator for user feedback
successEmoji :: Text successEmoji :: Text
@ -38,13 +42,12 @@ successEmoji = "✅"
-- | Compilation context for pipeline operations -- | Compilation context for pipeline operations
data CompilationContext = CompilationContext data CompilationContext = CompilationContext
{ ccStrategy :: CompilationStrategy { ccStrategy :: CompilationStrategy
, ccSourceDir :: SourceDir
, ccOutputDir :: OutputDir
, ccInputPath :: FilePath , ccInputPath :: FilePath
, ccOutputPath :: FilePath , ccOutputPath :: FilePath
, ccDocName :: Text , ccDocName :: Text
, ccReaderOptions :: ReaderOptions , ccReaderOptions :: ReaderOptions
, ccConfig :: DiagramConfig , ccConfig :: DiagramConfig
, ccWritesFile :: Bool
} }
-- | Monad stack for compilation pipeline -- | Monad stack for compilation pipeline
@ -54,12 +57,14 @@ type CompilationM = ReaderT CompilationContext (ExceptT DocsterError IO)
data CompilationStrategy = CompilationStrategy data CompilationStrategy = CompilationStrategy
{ -- | Format for diagram configuration { -- | Format for diagram configuration
csOutputFormat :: OutputFormat csOutputFormat :: OutputFormat
-- | Pandoc writer function -- | Pandoc writer function (returns Text for HTML/PDF, unused for DOCX)
, csWriter :: WriterOptions -> Pandoc -> PandocIO Text , csWriter :: WriterOptions -> Pandoc -> PandocIO Text
-- | Post-processing function for the generated content -- | Post-processing function for the generated content
, csProcessOutput :: String -> Text -> IO (Either DocsterError ()) , csProcessOutput :: String -> Text -> IO (Either DocsterError ())
-- | Success message formatter -- | Success message formatter
, csSuccessMessage :: String -> Text , csSuccessMessage :: String -> Text
-- | True for formats where writer writes a file directly (DOCX)
, csWritesFile :: Bool
} }
-- | PDF compilation strategy -- | PDF compilation strategy
@ -69,6 +74,7 @@ pdfStrategy = CompilationStrategy
, csWriter = writeLaTeX , csWriter = writeLaTeX
, csProcessOutput = processPDFOutput , csProcessOutput = processPDFOutput
, csSuccessMessage = \path -> successEmoji <> " PDF written to " <> T.pack path , csSuccessMessage = \path -> successEmoji <> " PDF written to " <> T.pack path
, csWritesFile = False
} }
-- | HTML compilation strategy -- | HTML compilation strategy
@ -78,6 +84,17 @@ htmlStrategy = CompilationStrategy
, csWriter = writeHtml5String , csWriter = writeHtml5String
, csProcessOutput = processHTMLOutput , csProcessOutput = processHTMLOutput
, csSuccessMessage = \path -> successEmoji <> " HTML written to " <> T.pack path , csSuccessMessage = \path -> successEmoji <> " HTML written to " <> T.pack path
, csWritesFile = False
}
-- | DOCX compilation strategy (Pandoc writes file directly)
docxStrategy :: CompilationStrategy
docxStrategy = CompilationStrategy
{ csOutputFormat = DOCX
, csWriter = \_ _ -> return "" -- unused: writeDocx writes file directly
, csProcessOutput = \_ _ -> return $ Right () -- no post-processing needed
, csSuccessMessage = \path -> successEmoji <> " DOCX written to " <> T.pack path
, csWritesFile = True
} }
-- | Parse LaTeX log content to extract meaningful error messages -- | Parse LaTeX log content to extract meaningful error messages
@ -212,11 +229,30 @@ liftEitherM action = do
Left err -> lift $ throwE err Left err -> lift $ throwE err
Right value -> return value Right value -> return value
-- | Strip ANSI escape sequences (CSI codes like color/style) from text.
-- These appear in copy-pasted terminal output and break LaTeX compilation.
stripAnsiCodes :: Text -> Text
stripAnsiCodes input = case T.break (== '\x1b') input of
(before, rest)
| T.null rest -> before
| otherwise -> before <> stripAnsiCodes (skipEscape (T.tail rest))
where
-- Skip an ESC sequence: ESC [ <params> <final byte>
skipEscape t
| T.null t = t
| T.head t == '[' = skipCSIParams (T.tail t)
| otherwise = T.tail t -- non-CSI escape: skip one char after ESC
-- Skip CSI parameter/intermediate bytes until final byte (0x40-0x7E)
skipCSIParams t
| T.null t = t
| let c = ord (T.head t), c >= 0x40 && c <= 0x7E = T.tail t -- final byte, consume it
| otherwise = skipCSIParams (T.tail t)
-- | Pipeline step: Read content from input file -- | Pipeline step: Read content from input file
readContent :: CompilationM Text readContent :: CompilationM Text
readContent = do readContent = do
inputPath <- asks ccInputPath inputPath <- asks ccInputPath
liftIO $ TIO.readFile inputPath liftIO $ stripAnsiCodes <$> TIO.readFile inputPath
-- | Pipeline step: Parse markdown content into Pandoc AST -- | Pipeline step: Parse markdown content into Pandoc AST
parseDocument :: Text -> CompilationM Pandoc parseDocument :: Text -> CompilationM Pandoc
@ -235,12 +271,22 @@ transformDocumentM pandoc = do
generateOutputM :: Pandoc -> CompilationM Text generateOutputM :: Pandoc -> CompilationM Text
generateOutputM pandoc = do generateOutputM pandoc = do
strategy <- asks ccStrategy strategy <- asks ccStrategy
liftEitherM $ generateOutput strategy pandoc writesFile <- asks ccWritesFile
if writesFile
then do
outputPath <- asks ccOutputPath
liftIO $ generateOutputFile strategy outputPath pandoc
return "" -- placeholder, won't be used
else liftEitherM $ generateOutput strategy pandoc
-- | Pipeline step: Process output and write to file -- | Pipeline step: Process output and write to file
processOutput :: Text -> CompilationM () processOutput :: Text -> CompilationM ()
processOutput output = do processOutput output = do
strategy <- asks ccStrategy strategy <- asks ccStrategy
writesFile <- asks ccWritesFile
if writesFile
then return () -- file already written by writer
else do
outputPath <- asks ccOutputPath outputPath <- asks ccOutputPath
liftEitherM $ csProcessOutput strategy outputPath output liftEitherM $ csProcessOutput strategy outputPath output
@ -256,7 +302,7 @@ compileWithStrategy :: CompilationStrategy -> SourceDir -> OutputDir -> Text ->
compileWithStrategy strategy sourceDir outputDir docName (OutputPath inputPath) (OutputPath outputPath) = do compileWithStrategy strategy sourceDir outputDir docName (OutputPath inputPath) (OutputPath outputPath) = do
let readerOptions = def { readerExtensions = getDefaultExtensions "markdown" } let readerOptions = def { readerExtensions = getDefaultExtensions "markdown" }
config = DiagramConfig sourceDir outputDir (csOutputFormat strategy) config = DiagramConfig sourceDir outputDir (csOutputFormat strategy)
context = CompilationContext strategy sourceDir outputDir inputPath outputPath docName readerOptions config context = CompilationContext strategy inputPath outputPath docName readerOptions config (csWritesFile strategy)
pipeline = readContent >>= parseDocument >>= transformDocumentM >>= generateOutputM >>= processOutput >> printSuccess pipeline = readContent >>= parseDocument >>= transformDocumentM >>= generateOutputM >>= processOutput >> printSuccess
runExceptT $ runReaderT pipeline context runExceptT $ runReaderT pipeline context
@ -277,8 +323,19 @@ generateOutput strategy transformed = do
Left err -> Left $ case csOutputFormat strategy of Left err -> Left $ case csOutputFormat strategy of
PDF -> PDFGenerationError $ "LaTeX generation failed: " <> T.pack (show err) PDF -> PDFGenerationError $ "LaTeX generation failed: " <> T.pack (show err)
HTML -> FileError $ "HTML generation failed: " <> T.pack (show err) HTML -> FileError $ "HTML generation failed: " <> T.pack (show err)
DOCX -> FileError $ "DOCX generation failed: " <> T.pack (show err)
Right output -> Right output Right output -> Right output
-- | Generate output file directly (for DOCX which writes to file)
generateOutputFile :: CompilationStrategy -> FilePath -> Pandoc -> IO (Either DocsterError ())
generateOutputFile strategy outputPath pandoc = do
result <- runIO $ writeDocx def pandoc
case result of
Left err -> return $ Left $ FileError $ "DOCX generation failed: " <> T.pack (show err)
Right docxBS -> do
BSL.writeFile outputPath docxBS
return $ Right ()
-- | Compile markdown to PDF using XeLaTeX -- | Compile markdown to PDF using XeLaTeX
compileToPDF :: FilePath -> IO () compileToPDF :: FilePath -> IO ()
compileToPDF = compileWithFormat pdfStrategy "pdf" compileToPDF = compileWithFormat pdfStrategy "pdf"
@ -287,6 +344,10 @@ compileToPDF = compileWithFormat pdfStrategy "pdf"
compileToHTML :: FilePath -> IO () compileToHTML :: FilePath -> IO ()
compileToHTML = compileWithFormat htmlStrategy "html" compileToHTML = compileWithFormat htmlStrategy "html"
-- | Compile markdown to DOCX
compileToDOCX :: FilePath -> IO ()
compileToDOCX = compileWithFormat docxStrategy "docx"
-- | Higher-order function to compile with any format strategy -- | Higher-order function to compile with any format strategy
compileWithFormat :: CompilationStrategy -> String -> FilePath -> IO () compileWithFormat :: CompilationStrategy -> String -> FilePath -> IO ()
compileWithFormat strategy extension path = do compileWithFormat strategy extension path = do

View File

@ -9,15 +9,12 @@ module Docster.LaTeX
import Data.Text (Text) import Data.Text (Text)
import qualified Data.Text as T import qualified Data.Text as T
-- | LaTeX template with comprehensive package support for PDF generation -- | LaTeX template with modern corporate styling for PDF generation
latexTemplate :: Text -> Text latexTemplate :: Text -> Text
latexTemplate bodyContent = T.unlines latexTemplate bodyContent = T.unlines
[ "\\documentclass{article}" [ "\\documentclass{article}"
, "\\usepackage[utf8]{inputenc}" -- Packages
, "\\usepackage{fontspec}" , "\\usepackage{fontspec}"
, "\\setmainfont{DejaVu Serif}[Scale=1.0]"
, "\\setsansfont{DejaVu Sans}[Scale=1.0]"
, "\\setmonofont{DejaVu Sans Mono}[Scale=0.85]"
, "\\usepackage{graphicx}" , "\\usepackage{graphicx}"
, "\\usepackage{adjustbox}" , "\\usepackage{adjustbox}"
, "\\usepackage{geometry}" , "\\usepackage{geometry}"
@ -25,16 +22,46 @@ latexTemplate bodyContent = T.unlines
, "\\usepackage{booktabs}" , "\\usepackage{booktabs}"
, "\\usepackage{array}" , "\\usepackage{array}"
, "\\usepackage{calc}" , "\\usepackage{calc}"
, "\\geometry{margin=1in}"
, "\\usepackage{hyperref}"
, "\\usepackage{enumitem}" , "\\usepackage{enumitem}"
, "\\usepackage{amsmath}" , "\\usepackage{amsmath}"
, "\\usepackage{amssymb}" , "\\usepackage{amssymb}"
, "\\usepackage{fancyvrb}" , "\\usepackage{fancyvrb}"
, "\\usepackage{color}" , "\\usepackage[dvipsnames,svgnames,x11names]{xcolor}"
, "\\usepackage{titlesec}"
, "\\usepackage{fancyhdr}"
, "\\usepackage{framed}"
-- Typography: Helvetica Neue + Menlo, sans-serif default
, "\\setmainfont{Helvetica Neue}"
, "\\setsansfont{Helvetica Neue}"
, "\\setmonofont{Menlo}[Scale=0.85]"
, "\\renewcommand{\\familydefault}{\\sfdefault}"
-- Layout: wider margins, block paragraphs
, "\\geometry{left=0.9in,right=0.9in,top=1in,bottom=1in}"
, "\\setlength{\\parindent}{0pt}"
, "\\setlength{\\parskip}{0.5em}"
-- Color scheme
, "\\definecolor{accent}{HTML}{1A365D}"
, "\\definecolor{codebg}{HTML}{F5F5F5}"
-- Hyperlinks: accent-colored, no boxes
, "\\usepackage[colorlinks=true,linkcolor=accent,urlcolor=accent,citecolor=accent]{hyperref}"
-- Heading styles
, "\\titleformat{\\section}{\\Large\\bfseries\\color{accent}}{\\thesection}{1em}{}[\\vspace{2pt}\\titlerule]"
, "\\titleformat{\\subsection}{\\large\\bfseries\\color{accent}}{\\thesubsection}{1em}{}"
, "\\titleformat{\\subsubsection}{\\normalsize\\bfseries\\color{accent}}{\\thesubsubsection}{1em}{}"
, "\\titlespacing*{\\section}{0pt}{1.5em}{0.8em}"
, "\\titlespacing*{\\subsection}{0pt}{1.2em}{0.5em}"
, "\\titlespacing*{\\subsubsection}{0pt}{1em}{0.4em}"
-- Page header/footer: minimal centered page number
, "\\pagestyle{fancy}"
, "\\fancyhf{}"
, "\\renewcommand{\\headrulewidth}{0pt}"
, "\\fancyfoot[C]{\\small\\thepage}"
-- Code blocks: light gray background
, "\\DefineVerbatimEnvironment{Highlighting}{Verbatim}{commandchars=\\\\\\{\\}}" , "\\DefineVerbatimEnvironment{Highlighting}{Verbatim}{commandchars=\\\\\\{\\}}"
, "\\newenvironment{Shaded}{}{}" , "\\newenvironment{Shaded}{\\begin{snugshade}}{\\end{snugshade}}"
, "\\definecolor{shadecolor}{HTML}{F5F5F5}"
, syntaxHighlightingCommands , syntaxHighlightingCommands
-- Pandoc helpers
, "\\providecommand{\\tightlist}{%" , "\\providecommand{\\tightlist}{%"
, " \\setlength{\\itemsep}{0pt}\\setlength{\\parskip}{0pt}}" , " \\setlength{\\itemsep}{0pt}\\setlength{\\parskip}{0pt}}"
, "\\newcommand{\\real}[1]{#1}" , "\\newcommand{\\real}[1]{#1}"
@ -47,6 +74,7 @@ latexTemplate bodyContent = T.unlines
, "\\def\\maxheight{\\ifdim\\Gin@nat@height>\\textheight\\textheight\\else\\Gin@nat@height\\fi}" , "\\def\\maxheight{\\ifdim\\Gin@nat@height>\\textheight\\textheight\\else\\Gin@nat@height\\fi}"
, "\\makeatother" , "\\makeatother"
, "\\setkeys{Gin}{width=\\maxwidth,height=\\maxheight,keepaspectratio}" , "\\setkeys{Gin}{width=\\maxwidth,height=\\maxheight,keepaspectratio}"
, "\\providecommand{\\pandocbounded}[1]{#1}"
, "\\begin{document}" , "\\begin{document}"
, bodyContent , bodyContent
, "\\end{document}" , "\\end{document}"

BIN
src/Docster/Types.hi Normal file

Binary file not shown.

View File

@ -45,7 +45,7 @@ data DocsterError
instance Exception DocsterError instance Exception DocsterError
-- | Output format for document generation -- | Output format for document generation
data OutputFormat = PDF | HTML data OutputFormat = PDF | HTML | DOCX
deriving (Show, Eq) deriving (Show, Eq)
-- | Type-safe wrapper for source directory paths -- | Type-safe wrapper for source directory paths