From 9dd93138293fe1478a80ab40481964493f674968 Mon Sep 17 00:00:00 2001 From: Willem van den Ende Date: Thu, 30 Apr 2026 17:27:59 +0100 Subject: [PATCH] Add docx export --- app/Main.hs | 5 +-- src/Docster/Compiler.hs | 77 +++++++++++++++++++++++++++++++++++----- src/Docster/LaTeX.hs | 46 +++++++++++++++++++----- src/Docster/Types.hi | Bin 0 -> 7520 bytes src/Docster/Types.hs | 2 +- 5 files changed, 110 insertions(+), 20 deletions(-) create mode 100644 src/Docster/Types.hi diff --git a/app/Main.hs b/app/Main.hs index 32c5d2e..54e029a 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -4,7 +4,7 @@ module Main (main) where import Docster.Types (DocsterError(..)) -import Docster.Compiler (compileToPDF, compileToHTML) +import Docster.Compiler (compileToPDF, compileToHTML, compileToDOCX) import System.Environment (getArgs) import Control.Exception (throwIO) @@ -12,7 +12,8 @@ import Control.Exception (throwIO) parseArgs :: [String] -> Either DocsterError (IO ()) parseArgs ["-pdf", path] = Right (compileToPDF path) parseArgs ["-html", path] = Right (compileToHTML path) -parseArgs _ = Left $ InvalidUsage "Usage: docster -pdf|-html " +parseArgs ["-docx", path] = Right (compileToDOCX path) +parseArgs _ = Left $ InvalidUsage "Usage: docster -pdf|-html|-docx " -- | Main entry point - parse arguments and execute appropriate action main :: IO () diff --git a/src/Docster/Compiler.hs b/src/Docster/Compiler.hs index a757345..e7220e5 100644 --- a/src/Docster/Compiler.hs +++ b/src/Docster/Compiler.hs @@ -6,12 +6,15 @@ module Docster.Compiler ( -- * Compilation Functions compileToPDF , compileToHTML + , compileToDOCX ) where import Docster.Types ( DocsterError(..), OutputFormat(..), SourceDir(..), OutputDir(..), OutputPath(..) , DiagramConfig(..), computeOutputDir, ensureOutputDir ) +import Text.Pandoc.Writers (writeDocx) +import qualified Data.ByteString.Lazy as BSL import Docster.Transform (transformDocument) import Docster.LaTeX (latexTemplate) import Text.Pandoc @@ -30,6 +33,7 @@ import Control.Monad.Trans.Reader (ReaderT, runReaderT, asks) import Control.Monad.Trans.Class (lift) import Control.Monad.IO.Class (liftIO) import Data.Maybe (mapMaybe) +import Data.Char (ord) -- | Success indicator for user feedback successEmoji :: Text @@ -38,13 +42,12 @@ successEmoji = "✅" -- | Compilation context for pipeline operations data CompilationContext = CompilationContext { ccStrategy :: CompilationStrategy - , ccSourceDir :: SourceDir - , ccOutputDir :: OutputDir , ccInputPath :: FilePath , ccOutputPath :: FilePath , ccDocName :: Text , ccReaderOptions :: ReaderOptions , ccConfig :: DiagramConfig + , ccWritesFile :: Bool } -- | Monad stack for compilation pipeline @@ -54,12 +57,14 @@ type CompilationM = ReaderT CompilationContext (ExceptT DocsterError IO) data CompilationStrategy = CompilationStrategy { -- | Format for diagram configuration csOutputFormat :: OutputFormat - -- | Pandoc writer function + -- | Pandoc writer function (returns Text for HTML/PDF, unused for DOCX) , csWriter :: WriterOptions -> Pandoc -> PandocIO Text -- | Post-processing function for the generated content , csProcessOutput :: String -> Text -> IO (Either DocsterError ()) -- | Success message formatter , csSuccessMessage :: String -> Text + -- | True for formats where writer writes a file directly (DOCX) + , csWritesFile :: Bool } -- | PDF compilation strategy @@ -69,6 +74,7 @@ pdfStrategy = CompilationStrategy , csWriter = writeLaTeX , csProcessOutput = processPDFOutput , csSuccessMessage = \path -> successEmoji <> " PDF written to " <> T.pack path + , csWritesFile = False } -- | HTML compilation strategy @@ -78,6 +84,17 @@ htmlStrategy = CompilationStrategy , csWriter = writeHtml5String , csProcessOutput = processHTMLOutput , csSuccessMessage = \path -> successEmoji <> " HTML written to " <> T.pack path + , csWritesFile = False + } + +-- | DOCX compilation strategy (Pandoc writes file directly) +docxStrategy :: CompilationStrategy +docxStrategy = CompilationStrategy + { csOutputFormat = DOCX + , csWriter = \_ _ -> return "" -- unused: writeDocx writes file directly + , csProcessOutput = \_ _ -> return $ Right () -- no post-processing needed + , csSuccessMessage = \path -> successEmoji <> " DOCX written to " <> T.pack path + , csWritesFile = True } -- | Parse LaTeX log content to extract meaningful error messages @@ -212,11 +229,30 @@ liftEitherM action = do Left err -> lift $ throwE err Right value -> return value +-- | Strip ANSI escape sequences (CSI codes like color/style) from text. +-- These appear in copy-pasted terminal output and break LaTeX compilation. +stripAnsiCodes :: Text -> Text +stripAnsiCodes input = case T.break (== '\x1b') input of + (before, rest) + | T.null rest -> before + | otherwise -> before <> stripAnsiCodes (skipEscape (T.tail rest)) + where + -- Skip an ESC sequence: ESC [ + skipEscape t + | T.null t = t + | T.head t == '[' = skipCSIParams (T.tail t) + | otherwise = T.tail t -- non-CSI escape: skip one char after ESC + -- Skip CSI parameter/intermediate bytes until final byte (0x40-0x7E) + skipCSIParams t + | T.null t = t + | let c = ord (T.head t), c >= 0x40 && c <= 0x7E = T.tail t -- final byte, consume it + | otherwise = skipCSIParams (T.tail t) + -- | Pipeline step: Read content from input file readContent :: CompilationM Text readContent = do inputPath <- asks ccInputPath - liftIO $ TIO.readFile inputPath + liftIO $ stripAnsiCodes <$> TIO.readFile inputPath -- | Pipeline step: Parse markdown content into Pandoc AST parseDocument :: Text -> CompilationM Pandoc @@ -235,14 +271,24 @@ transformDocumentM pandoc = do generateOutputM :: Pandoc -> CompilationM Text generateOutputM pandoc = do strategy <- asks ccStrategy - liftEitherM $ generateOutput strategy pandoc + writesFile <- asks ccWritesFile + if writesFile + then do + outputPath <- asks ccOutputPath + liftIO $ generateOutputFile strategy outputPath pandoc + return "" -- placeholder, won't be used + else liftEitherM $ generateOutput strategy pandoc -- | Pipeline step: Process output and write to file processOutput :: Text -> CompilationM () processOutput output = do strategy <- asks ccStrategy - outputPath <- asks ccOutputPath - liftEitherM $ csProcessOutput strategy outputPath output + writesFile <- asks ccWritesFile + if writesFile + then return () -- file already written by writer + else do + outputPath <- asks ccOutputPath + liftEitherM $ csProcessOutput strategy outputPath output -- | Pipeline step: Print success message printSuccess :: CompilationM () @@ -256,7 +302,7 @@ compileWithStrategy :: CompilationStrategy -> SourceDir -> OutputDir -> Text -> compileWithStrategy strategy sourceDir outputDir docName (OutputPath inputPath) (OutputPath outputPath) = do let readerOptions = def { readerExtensions = getDefaultExtensions "markdown" } config = DiagramConfig sourceDir outputDir (csOutputFormat strategy) - context = CompilationContext strategy sourceDir outputDir inputPath outputPath docName readerOptions config + context = CompilationContext strategy inputPath outputPath docName readerOptions config (csWritesFile strategy) pipeline = readContent >>= parseDocument >>= transformDocumentM >>= generateOutputM >>= processOutput >> printSuccess runExceptT $ runReaderT pipeline context @@ -277,8 +323,19 @@ generateOutput strategy transformed = do Left err -> Left $ case csOutputFormat strategy of PDF -> PDFGenerationError $ "LaTeX generation failed: " <> T.pack (show err) HTML -> FileError $ "HTML generation failed: " <> T.pack (show err) + DOCX -> FileError $ "DOCX generation failed: " <> T.pack (show err) Right output -> Right output +-- | Generate output file directly (for DOCX which writes to file) +generateOutputFile :: CompilationStrategy -> FilePath -> Pandoc -> IO (Either DocsterError ()) +generateOutputFile strategy outputPath pandoc = do + result <- runIO $ writeDocx def pandoc + case result of + Left err -> return $ Left $ FileError $ "DOCX generation failed: " <> T.pack (show err) + Right docxBS -> do + BSL.writeFile outputPath docxBS + return $ Right () + -- | Compile markdown to PDF using XeLaTeX compileToPDF :: FilePath -> IO () compileToPDF = compileWithFormat pdfStrategy "pdf" @@ -287,6 +344,10 @@ compileToPDF = compileWithFormat pdfStrategy "pdf" compileToHTML :: FilePath -> IO () compileToHTML = compileWithFormat htmlStrategy "html" +-- | Compile markdown to DOCX +compileToDOCX :: FilePath -> IO () +compileToDOCX = compileWithFormat docxStrategy "docx" + -- | Higher-order function to compile with any format strategy compileWithFormat :: CompilationStrategy -> String -> FilePath -> IO () compileWithFormat strategy extension path = do diff --git a/src/Docster/LaTeX.hs b/src/Docster/LaTeX.hs index b1cca27..d9a44ce 100644 --- a/src/Docster/LaTeX.hs +++ b/src/Docster/LaTeX.hs @@ -9,15 +9,12 @@ module Docster.LaTeX import Data.Text (Text) import qualified Data.Text as T --- | LaTeX template with comprehensive package support for PDF generation +-- | LaTeX template with modern corporate styling for PDF generation latexTemplate :: Text -> Text latexTemplate bodyContent = T.unlines [ "\\documentclass{article}" - , "\\usepackage[utf8]{inputenc}" + -- Packages , "\\usepackage{fontspec}" - , "\\setmainfont{DejaVu Serif}[Scale=1.0]" - , "\\setsansfont{DejaVu Sans}[Scale=1.0]" - , "\\setmonofont{DejaVu Sans Mono}[Scale=0.85]" , "\\usepackage{graphicx}" , "\\usepackage{adjustbox}" , "\\usepackage{geometry}" @@ -25,16 +22,46 @@ latexTemplate bodyContent = T.unlines , "\\usepackage{booktabs}" , "\\usepackage{array}" , "\\usepackage{calc}" - , "\\geometry{margin=1in}" - , "\\usepackage{hyperref}" , "\\usepackage{enumitem}" , "\\usepackage{amsmath}" , "\\usepackage{amssymb}" , "\\usepackage{fancyvrb}" - , "\\usepackage{color}" + , "\\usepackage[dvipsnames,svgnames,x11names]{xcolor}" + , "\\usepackage{titlesec}" + , "\\usepackage{fancyhdr}" + , "\\usepackage{framed}" + -- Typography: Helvetica Neue + Menlo, sans-serif default + , "\\setmainfont{Helvetica Neue}" + , "\\setsansfont{Helvetica Neue}" + , "\\setmonofont{Menlo}[Scale=0.85]" + , "\\renewcommand{\\familydefault}{\\sfdefault}" + -- Layout: wider margins, block paragraphs + , "\\geometry{left=0.9in,right=0.9in,top=1in,bottom=1in}" + , "\\setlength{\\parindent}{0pt}" + , "\\setlength{\\parskip}{0.5em}" + -- Color scheme + , "\\definecolor{accent}{HTML}{1A365D}" + , "\\definecolor{codebg}{HTML}{F5F5F5}" + -- Hyperlinks: accent-colored, no boxes + , "\\usepackage[colorlinks=true,linkcolor=accent,urlcolor=accent,citecolor=accent]{hyperref}" + -- Heading styles + , "\\titleformat{\\section}{\\Large\\bfseries\\color{accent}}{\\thesection}{1em}{}[\\vspace{2pt}\\titlerule]" + , "\\titleformat{\\subsection}{\\large\\bfseries\\color{accent}}{\\thesubsection}{1em}{}" + , "\\titleformat{\\subsubsection}{\\normalsize\\bfseries\\color{accent}}{\\thesubsubsection}{1em}{}" + , "\\titlespacing*{\\section}{0pt}{1.5em}{0.8em}" + , "\\titlespacing*{\\subsection}{0pt}{1.2em}{0.5em}" + , "\\titlespacing*{\\subsubsection}{0pt}{1em}{0.4em}" + -- Page header/footer: minimal centered page number + , "\\pagestyle{fancy}" + , "\\fancyhf{}" + , "\\renewcommand{\\headrulewidth}{0pt}" + , "\\fancyfoot[C]{\\small\\thepage}" + -- Code blocks: light gray background , "\\DefineVerbatimEnvironment{Highlighting}{Verbatim}{commandchars=\\\\\\{\\}}" - , "\\newenvironment{Shaded}{}{}" + , "\\newenvironment{Shaded}{\\begin{snugshade}}{\\end{snugshade}}" + , "\\definecolor{shadecolor}{HTML}{F5F5F5}" , syntaxHighlightingCommands + -- Pandoc helpers , "\\providecommand{\\tightlist}{%" , " \\setlength{\\itemsep}{0pt}\\setlength{\\parskip}{0pt}}" , "\\newcommand{\\real}[1]{#1}" @@ -47,6 +74,7 @@ latexTemplate bodyContent = T.unlines , "\\def\\maxheight{\\ifdim\\Gin@nat@height>\\textheight\\textheight\\else\\Gin@nat@height\\fi}" , "\\makeatother" , "\\setkeys{Gin}{width=\\maxwidth,height=\\maxheight,keepaspectratio}" + , "\\providecommand{\\pandocbounded}[1]{#1}" , "\\begin{document}" , bodyContent , "\\end{document}" diff --git a/src/Docster/Types.hi b/src/Docster/Types.hi new file mode 100644 index 0000000000000000000000000000000000000000..340a0e38110909e2b1571fc3c24cf5bd8ce930cb GIT binary patch literal 7520 zcmaJ^30#y%vagVU z5IN;i0(fyBD&B$!YQTVi2cm*_a3~@X6j2lvMBM8BW`-H%<@^1p>8}63tE;Q4s~a*< z>us^vWzHPP&W*o%@ZR8YZT!s0M`EkKvm)Xn_WevyOZij z@u>%;TL)rpwve4ab>|+s{baC!gdTX3yDvC2{M$Rv5(g80exA1KSl_Pv?y#7)3uGv@ zGw5-}*1ca`*g8<1p4PrTnnactJ!mb=Xxc;`p13@;K5^sobEM{ERpFkH_~<<(yzFd5 z^zOVZ=mdVfAQo{J;!(712cs>)>J5-sgTw}G?ZAEvjBP5|jd zkWTtNh$e$*3W%nH=uHq!1JPR`nhsWPgVhYMnh93#0QW9%zXEO+aK8rbJ>Y%=&N7g^ z50cp+aRJF3kjw?iJdn%>$pVlp1j!iY%zwwG6xQ9k$gjIiv1 zTj^0{foVV1RhGqd>^?yC7GE`1R~G(!u<4P!I<_wN$CrH@zDKqA9|?8IDc4g&Yu9c$ z+MjnfDB7f2M=wU zO4a{)e$U;K$NLJ}W)9xZZ@8Y7bHtOX<=yEApVwt&KRZvt27(7SH=RleqrOO1AH5iH z?^JT_&zieUN6U+EXOz89)r#=elzq=rtFm-WkykUjJNtJjsoFcVIVWYHC@y4mP*H5$ z!<@Mulw?|NZsvSwJmpFJg9gN)uS9v+<0`pGCp-rIN6dpkdxDxbTX9kN3A)x^TxB& zwXs|7kc6cA>Y?N1Ls0~*5h5T0j#!8>dq-OmE0NV3L?V`0+t}I>dy)MZ;$Y$6C>=|j zL{8(#c+vO?WTJTDq{&mJRz1v$3Qvg3>LfL{((0>PkDRHZGXabie#EfwRCHZuUB$`n zulm{!p2)m*D0mBjH<4kf01kHVFRtsoAGiKE*`6LBzx~I}o9tj30|^JudK;ti>z}5_ zl8y^KyCWWF+Jp3k%GEi)#ZJa=V!q?3>?it%FxN=joUiE&{i>|DW>-iFB z23RVrIk@_GLqb?bXUGSAr7`;>N*gPm5cmxPZ8+F}GIXdpALGL+LuV2%MSOTV@A@|A(e2PC$xqv>jH(6fF5^kA?#yEi0m32ll^*+3H0 zi=P&s3VZx9%rk(Ag(c;Y{r4ZVlRXX9oPq zfbGn_a4mFW%7AZtM{IReZN-Tk0v{N_)a_widvn&G*hHd2kG3{dJlbRd9~!_P%W`6y zcZA$}e`9xW#NMvZYgY)+8Yfa;8$H)4uH zPJOWFT2p9a_0{NlBC-I*7{mz?qr{I>I4B(PPl|tIIUtrmMghSSh>(bV05^t0gC88f zoxPq^X6*|*e&j{VRwA+z3~_HTGeB^>6IMPV5&=|f%TMZ0+;&}hv?@Qc>U3o3B#}fv zuZ7KPd|EQ`GqFJL7;_DthgUQJEk&J$CEJdtB$L>x>&Z7acRwj1B741CbdmT`qvH&2 z9gHt}e0^)om5D#ap2&J$-q0oyIq2Qu#tM8YrNd3Uk%zzLh{uEN4atpL$)Q_MDiUM5 zD=-XB`uWBQJhS=Qvm2T>TgLt*>$D0{oCK$@v?02 zYIyVknP{TkKQc++)9o6Me-mi9al0l1nF?fz*>>$1OsKeUxb3djq0Gq4*zE_mUKOqn ztVW87ig5~D-{E6BH>4J9iFK{0s%$UVaBbgN0`#Rdj!`Blqy#MRn#cH!g6%hBs&>xnVWE#qGII(u(QLYM?kNrU0Bj z99PwTDI%wh><&&k6udsa;xNIRaG2u+@UlMq;gO#P`y5*~UEE&WAHMey0kp1P>=ea# z0uuo+z!=TJ+KhAU+jD9sTzpo4yDBOv_*1}Hj8rEhbqeg?+1#_|L4DWa4aYatU5PuM zdWisi?g%lQqL^UhQY}7WLxY0aEeWv#c4HGx33?44)oDl|+YMU{AsHCVh55P5SKYn)K;|$aFkA#U?o2QLq-fQQOmK+g{9k zqon@B$^F=md?VD_j6GlzaQ;vK>$m|l%t0^d-fp+ZZA4h1G4-au4X1Ozhq zEtB7gC#3QSyVt)*fe72i|wFBF)G*rEb65L^FE#s5(8zf|W(TU21~VT%fybA(V~ zq53~2gD{HN38p^~!X$mx6IjnA{YQkr1}4Lqj9@a7Nqpxc8qFmAH;lkWCS#e5V=|t} z1SS)i+{EN&CX<-_fyrbh@tJ{kQkld@J+`+pxsA!~OzvPZjmdN-GnmX|GKWJ z7IF#xm&N5_ZC}IXV|z8X6RYDaZWq~2Ed1q4m5rNPq1Evknb*1iUhA-0uH~KQ$>zAo zX3J(fFP^^$p9X4`4vD-*>pWYw(Aj6v;ss9LN)4~jsWt1IU1W1*i;+5ifnwfRAEh53 zAlI!y?R?ohYGCo~1$H`qtO#;;zd!#wK{vP&L91qk1KO=UL^PN z8RLo}(5U@n9%_}`TjuHIVZ~hV!iCuyTQbiz>hElIN`D@g?=sKXX8|vtETE{9xyt=` zmA70Y`$)t4sx?Zv>aAgLnOfs7*U9uh?kcTLu2S&Y@vkx5Jy$MTFx$oHHDHxoqvfZK ztj0L#4Zbr>SfN(P{k*7W#wZz8zfAj5t>V4uNJm3lJ`4!(Q!3;-<+r>Yo8d!^M!rtA z1h>eV^X1B8{* z3CwfAxR@!-fC<64N}A}ep)2IyuQQIF`s!HRS&hyJ3l5__4|%}rN>6^6IffZBr3oA7 z$$t~b;|3c?3q;eQ`V7`>vkboTLCy~xfGdN%~ooRe8{(`B#J;E|YU zrZ$Yh-BEw{rWs@wZscio)Bk_m-J4hGl)81}U#H*+Q7gS?ybc(5&bVfC-v7 zgxAM^hV@6OR!uTP@VxL!UzMjuu|lr$ootTjv+PS{zzU^SH`WZ)ml&=2MD8o`_$&AT z9ZuwE#&_3J8|ERSQwGX?d8`<`iu$HC-vqDaGS78bI@e0oT4#M7!xDxiboHEtim9IK zuTsZe|K5~_6d zlWVoS){3@0KQHBlmarwptme3kcNV0Tf<=74r^MVX1G6AsZ#2c zc;d_~+Nh0%nqp)GibkkK(Kx*g07Yj@SRXu$hZEtXP;Rc^~z zX~w67S|#B4Ai$T9mUS%LR=WPpV!4;c3J#g}I<0GFVD6{o44vX#8ptuMA>@^i z1vVxccW>4+ZWJ3vLvK$>rn|;?Mn7#SJnYhF+QrRs(*M4)z08>D7BgeU1vt=A=I+xG zX0fy$R=9iH(T+(JDQI+!f{dg