Add docx export

This commit is contained in:
Willem van den Ende 2026-04-30 17:27:59 +01:00
parent fa850d5017
commit 9dd9313829
5 changed files with 110 additions and 20 deletions

View File

@ -4,7 +4,7 @@
module Main (main) where
import Docster.Types (DocsterError(..))
import Docster.Compiler (compileToPDF, compileToHTML)
import Docster.Compiler (compileToPDF, compileToHTML, compileToDOCX)
import System.Environment (getArgs)
import Control.Exception (throwIO)
@ -12,7 +12,8 @@ import Control.Exception (throwIO)
parseArgs :: [String] -> Either DocsterError (IO ())
parseArgs ["-pdf", path] = Right (compileToPDF path)
parseArgs ["-html", path] = Right (compileToHTML path)
parseArgs _ = Left $ InvalidUsage "Usage: docster -pdf|-html <file.md>"
parseArgs ["-docx", path] = Right (compileToDOCX path)
parseArgs _ = Left $ InvalidUsage "Usage: docster -pdf|-html|-docx <file.md>"
-- | Main entry point - parse arguments and execute appropriate action
main :: IO ()

View File

@ -6,12 +6,15 @@ module Docster.Compiler
( -- * Compilation Functions
compileToPDF
, compileToHTML
, compileToDOCX
) where
import Docster.Types
( DocsterError(..), OutputFormat(..), SourceDir(..), OutputDir(..), OutputPath(..)
, DiagramConfig(..), computeOutputDir, ensureOutputDir
)
import Text.Pandoc.Writers (writeDocx)
import qualified Data.ByteString.Lazy as BSL
import Docster.Transform (transformDocument)
import Docster.LaTeX (latexTemplate)
import Text.Pandoc
@ -30,6 +33,7 @@ import Control.Monad.Trans.Reader (ReaderT, runReaderT, asks)
import Control.Monad.Trans.Class (lift)
import Control.Monad.IO.Class (liftIO)
import Data.Maybe (mapMaybe)
import Data.Char (ord)
-- | Success indicator for user feedback
successEmoji :: Text
@ -38,13 +42,12 @@ successEmoji = "✅"
-- | Compilation context for pipeline operations
data CompilationContext = CompilationContext
{ ccStrategy :: CompilationStrategy
, ccSourceDir :: SourceDir
, ccOutputDir :: OutputDir
, ccInputPath :: FilePath
, ccOutputPath :: FilePath
, ccDocName :: Text
, ccReaderOptions :: ReaderOptions
, ccConfig :: DiagramConfig
, ccWritesFile :: Bool
}
-- | Monad stack for compilation pipeline
@ -54,12 +57,14 @@ type CompilationM = ReaderT CompilationContext (ExceptT DocsterError IO)
data CompilationStrategy = CompilationStrategy
{ -- | Format for diagram configuration
csOutputFormat :: OutputFormat
-- | Pandoc writer function
-- | Pandoc writer function (returns Text for HTML/PDF, unused for DOCX)
, csWriter :: WriterOptions -> Pandoc -> PandocIO Text
-- | Post-processing function for the generated content
, csProcessOutput :: String -> Text -> IO (Either DocsterError ())
-- | Success message formatter
, csSuccessMessage :: String -> Text
-- | True for formats where writer writes a file directly (DOCX)
, csWritesFile :: Bool
}
-- | PDF compilation strategy
@ -69,6 +74,7 @@ pdfStrategy = CompilationStrategy
, csWriter = writeLaTeX
, csProcessOutput = processPDFOutput
, csSuccessMessage = \path -> successEmoji <> " PDF written to " <> T.pack path
, csWritesFile = False
}
-- | HTML compilation strategy
@ -78,6 +84,17 @@ htmlStrategy = CompilationStrategy
, csWriter = writeHtml5String
, csProcessOutput = processHTMLOutput
, csSuccessMessage = \path -> successEmoji <> " HTML written to " <> T.pack path
, csWritesFile = False
}
-- | DOCX compilation strategy (Pandoc writes file directly)
docxStrategy :: CompilationStrategy
docxStrategy = CompilationStrategy
{ csOutputFormat = DOCX
, csWriter = \_ _ -> return "" -- unused: writeDocx writes file directly
, csProcessOutput = \_ _ -> return $ Right () -- no post-processing needed
, csSuccessMessage = \path -> successEmoji <> " DOCX written to " <> T.pack path
, csWritesFile = True
}
-- | Parse LaTeX log content to extract meaningful error messages
@ -212,11 +229,30 @@ liftEitherM action = do
Left err -> lift $ throwE err
Right value -> return value
-- | Strip ANSI escape sequences (CSI codes like color/style) from text.
-- These appear in copy-pasted terminal output and break LaTeX compilation.
stripAnsiCodes :: Text -> Text
stripAnsiCodes input = case T.break (== '\x1b') input of
(before, rest)
| T.null rest -> before
| otherwise -> before <> stripAnsiCodes (skipEscape (T.tail rest))
where
-- Skip an ESC sequence: ESC [ <params> <final byte>
skipEscape t
| T.null t = t
| T.head t == '[' = skipCSIParams (T.tail t)
| otherwise = T.tail t -- non-CSI escape: skip one char after ESC
-- Skip CSI parameter/intermediate bytes until final byte (0x40-0x7E)
skipCSIParams t
| T.null t = t
| let c = ord (T.head t), c >= 0x40 && c <= 0x7E = T.tail t -- final byte, consume it
| otherwise = skipCSIParams (T.tail t)
-- | Pipeline step: Read content from input file
readContent :: CompilationM Text
readContent = do
inputPath <- asks ccInputPath
liftIO $ TIO.readFile inputPath
liftIO $ stripAnsiCodes <$> TIO.readFile inputPath
-- | Pipeline step: Parse markdown content into Pandoc AST
parseDocument :: Text -> CompilationM Pandoc
@ -235,12 +271,22 @@ transformDocumentM pandoc = do
generateOutputM :: Pandoc -> CompilationM Text
generateOutputM pandoc = do
strategy <- asks ccStrategy
liftEitherM $ generateOutput strategy pandoc
writesFile <- asks ccWritesFile
if writesFile
then do
outputPath <- asks ccOutputPath
liftIO $ generateOutputFile strategy outputPath pandoc
return "" -- placeholder, won't be used
else liftEitherM $ generateOutput strategy pandoc
-- | Pipeline step: Process output and write to file
processOutput :: Text -> CompilationM ()
processOutput output = do
strategy <- asks ccStrategy
writesFile <- asks ccWritesFile
if writesFile
then return () -- file already written by writer
else do
outputPath <- asks ccOutputPath
liftEitherM $ csProcessOutput strategy outputPath output
@ -256,7 +302,7 @@ compileWithStrategy :: CompilationStrategy -> SourceDir -> OutputDir -> Text ->
compileWithStrategy strategy sourceDir outputDir docName (OutputPath inputPath) (OutputPath outputPath) = do
let readerOptions = def { readerExtensions = getDefaultExtensions "markdown" }
config = DiagramConfig sourceDir outputDir (csOutputFormat strategy)
context = CompilationContext strategy sourceDir outputDir inputPath outputPath docName readerOptions config
context = CompilationContext strategy inputPath outputPath docName readerOptions config (csWritesFile strategy)
pipeline = readContent >>= parseDocument >>= transformDocumentM >>= generateOutputM >>= processOutput >> printSuccess
runExceptT $ runReaderT pipeline context
@ -277,8 +323,19 @@ generateOutput strategy transformed = do
Left err -> Left $ case csOutputFormat strategy of
PDF -> PDFGenerationError $ "LaTeX generation failed: " <> T.pack (show err)
HTML -> FileError $ "HTML generation failed: " <> T.pack (show err)
DOCX -> FileError $ "DOCX generation failed: " <> T.pack (show err)
Right output -> Right output
-- | Generate output file directly (for DOCX which writes to file)
generateOutputFile :: CompilationStrategy -> FilePath -> Pandoc -> IO (Either DocsterError ())
generateOutputFile strategy outputPath pandoc = do
result <- runIO $ writeDocx def pandoc
case result of
Left err -> return $ Left $ FileError $ "DOCX generation failed: " <> T.pack (show err)
Right docxBS -> do
BSL.writeFile outputPath docxBS
return $ Right ()
-- | Compile markdown to PDF using XeLaTeX
compileToPDF :: FilePath -> IO ()
compileToPDF = compileWithFormat pdfStrategy "pdf"
@ -287,6 +344,10 @@ compileToPDF = compileWithFormat pdfStrategy "pdf"
compileToHTML :: FilePath -> IO ()
compileToHTML = compileWithFormat htmlStrategy "html"
-- | Compile markdown to DOCX
compileToDOCX :: FilePath -> IO ()
compileToDOCX = compileWithFormat docxStrategy "docx"
-- | Higher-order function to compile with any format strategy
compileWithFormat :: CompilationStrategy -> String -> FilePath -> IO ()
compileWithFormat strategy extension path = do

View File

@ -9,15 +9,12 @@ module Docster.LaTeX
import Data.Text (Text)
import qualified Data.Text as T
-- | LaTeX template with comprehensive package support for PDF generation
-- | LaTeX template with modern corporate styling for PDF generation
latexTemplate :: Text -> Text
latexTemplate bodyContent = T.unlines
[ "\\documentclass{article}"
, "\\usepackage[utf8]{inputenc}"
-- Packages
, "\\usepackage{fontspec}"
, "\\setmainfont{DejaVu Serif}[Scale=1.0]"
, "\\setsansfont{DejaVu Sans}[Scale=1.0]"
, "\\setmonofont{DejaVu Sans Mono}[Scale=0.85]"
, "\\usepackage{graphicx}"
, "\\usepackage{adjustbox}"
, "\\usepackage{geometry}"
@ -25,16 +22,46 @@ latexTemplate bodyContent = T.unlines
, "\\usepackage{booktabs}"
, "\\usepackage{array}"
, "\\usepackage{calc}"
, "\\geometry{margin=1in}"
, "\\usepackage{hyperref}"
, "\\usepackage{enumitem}"
, "\\usepackage{amsmath}"
, "\\usepackage{amssymb}"
, "\\usepackage{fancyvrb}"
, "\\usepackage{color}"
, "\\usepackage[dvipsnames,svgnames,x11names]{xcolor}"
, "\\usepackage{titlesec}"
, "\\usepackage{fancyhdr}"
, "\\usepackage{framed}"
-- Typography: Helvetica Neue + Menlo, sans-serif default
, "\\setmainfont{Helvetica Neue}"
, "\\setsansfont{Helvetica Neue}"
, "\\setmonofont{Menlo}[Scale=0.85]"
, "\\renewcommand{\\familydefault}{\\sfdefault}"
-- Layout: wider margins, block paragraphs
, "\\geometry{left=0.9in,right=0.9in,top=1in,bottom=1in}"
, "\\setlength{\\parindent}{0pt}"
, "\\setlength{\\parskip}{0.5em}"
-- Color scheme
, "\\definecolor{accent}{HTML}{1A365D}"
, "\\definecolor{codebg}{HTML}{F5F5F5}"
-- Hyperlinks: accent-colored, no boxes
, "\\usepackage[colorlinks=true,linkcolor=accent,urlcolor=accent,citecolor=accent]{hyperref}"
-- Heading styles
, "\\titleformat{\\section}{\\Large\\bfseries\\color{accent}}{\\thesection}{1em}{}[\\vspace{2pt}\\titlerule]"
, "\\titleformat{\\subsection}{\\large\\bfseries\\color{accent}}{\\thesubsection}{1em}{}"
, "\\titleformat{\\subsubsection}{\\normalsize\\bfseries\\color{accent}}{\\thesubsubsection}{1em}{}"
, "\\titlespacing*{\\section}{0pt}{1.5em}{0.8em}"
, "\\titlespacing*{\\subsection}{0pt}{1.2em}{0.5em}"
, "\\titlespacing*{\\subsubsection}{0pt}{1em}{0.4em}"
-- Page header/footer: minimal centered page number
, "\\pagestyle{fancy}"
, "\\fancyhf{}"
, "\\renewcommand{\\headrulewidth}{0pt}"
, "\\fancyfoot[C]{\\small\\thepage}"
-- Code blocks: light gray background
, "\\DefineVerbatimEnvironment{Highlighting}{Verbatim}{commandchars=\\\\\\{\\}}"
, "\\newenvironment{Shaded}{}{}"
, "\\newenvironment{Shaded}{\\begin{snugshade}}{\\end{snugshade}}"
, "\\definecolor{shadecolor}{HTML}{F5F5F5}"
, syntaxHighlightingCommands
-- Pandoc helpers
, "\\providecommand{\\tightlist}{%"
, " \\setlength{\\itemsep}{0pt}\\setlength{\\parskip}{0pt}}"
, "\\newcommand{\\real}[1]{#1}"
@ -47,6 +74,7 @@ latexTemplate bodyContent = T.unlines
, "\\def\\maxheight{\\ifdim\\Gin@nat@height>\\textheight\\textheight\\else\\Gin@nat@height\\fi}"
, "\\makeatother"
, "\\setkeys{Gin}{width=\\maxwidth,height=\\maxheight,keepaspectratio}"
, "\\providecommand{\\pandocbounded}[1]{#1}"
, "\\begin{document}"
, bodyContent
, "\\end{document}"

BIN
src/Docster/Types.hi Normal file

Binary file not shown.

View File

@ -45,7 +45,7 @@ data DocsterError
instance Exception DocsterError
-- | Output format for document generation
data OutputFormat = PDF | HTML
data OutputFormat = PDF | HTML | DOCX
deriving (Show, Eq)
-- | Type-safe wrapper for source directory paths