Add output directory structure and heading-based image naming

- Output files now go to output/<document-name>/ relative to input
- Images named after nearest heading (e.g., file_flow.svg)
- Multiple images under same heading get suffixes: _1, _2, etc.
- Images before any heading use document name as prefix
- Add StateT-based AST traversal for heading tracking
- Add HSpec test suite with 21 tests

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Your Name 2026-01-05 17:47:49 +00:00
parent 7d2b407908
commit 7de2bc811a
8 changed files with 384 additions and 46 deletions

View File

@ -40,6 +40,7 @@ library
directory >=1.3 && <1.4,
process >=1.6 && <1.7,
hashable >=1.4 && <1.6,
containers >=0.6 && <0.8,
pandoc >=3.0 && <3.2,
pandoc-types >=1.23 && <1.25,
bytestring >=0.11 && <0.13,
@ -59,3 +60,23 @@ executable docster
ghc-options: -threaded
-rtsopts
-with-rtsopts=-N
test-suite docster-test
import: warnings
type: exitcode-stdio-1.0
main-is: Spec.hs
hs-source-dirs: test
other-modules: Docster.TransformSpec
build-depends:
base >=4.18 && <5,
text >=2.0 && <2.2,
filepath >=1.4 && <1.6,
containers >=0.6 && <0.8,
hspec >=2.10 && <2.12,
pandoc-types >=1.23 && <1.25,
docster
default-language: Haskell2010
ghc-options: -threaded
-rtsopts
-with-rtsopts=-N
build-tool-depends: hspec-discover:hspec-discover

2
install.sh Normal file
View File

@ -0,0 +1,2 @@
cabal install --installdir=$HOME/.local/bin --overwrite-policy=always

View File

@ -9,13 +9,16 @@ module Docster.Compiler
) where
import Docster.Types
( DocsterError(..), OutputFormat(..), SourceDir(..), OutputDir(..), OutputPath(..)
, DiagramConfig(..), computeOutputDir, ensureOutputDir
)
import Docster.Transform (transformDocument)
import Docster.LaTeX (latexTemplate)
import Text.Pandoc
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.IO as TIO
import System.FilePath (takeDirectory, replaceExtension, (</>))
import System.FilePath (takeDirectory, takeBaseName, replaceExtension, (</>), (<.>))
import System.Process (callProcess, readProcessWithExitCode)
import System.IO.Temp (withSystemTempDirectory)
import System.Directory (copyFile, doesFileExist)
@ -26,6 +29,7 @@ import Control.Monad.Trans.Except (ExceptT, runExceptT, throwE)
import Control.Monad.Trans.Reader (ReaderT, runReaderT, asks)
import Control.Monad.Trans.Class (lift)
import Control.Monad.IO.Class (liftIO)
import Data.Maybe (mapMaybe)
-- | Success indicator for user feedback
successEmoji :: Text
@ -35,8 +39,10 @@ successEmoji = "✅"
data CompilationContext = CompilationContext
{ ccStrategy :: CompilationStrategy
, ccSourceDir :: SourceDir
, ccOutputDir :: OutputDir
, ccInputPath :: FilePath
, ccOutputPath :: FilePath
, ccDocName :: Text
, ccReaderOptions :: ReaderOptions
, ccConfig :: DiagramConfig
}
@ -74,10 +80,74 @@ htmlStrategy = CompilationStrategy
, csSuccessMessage = \path -> successEmoji <> " HTML written to " <> T.pack path
}
-- | Parse LaTeX log content to extract meaningful error messages
parseLatexErrors :: Text -> Text
parseLatexErrors logContent =
let logLines = T.lines logContent
missingChars = extractMissingChars logLines
overfullBoxes = extractOverfullBoxes logLines
undefinedCommands = extractUndefinedCommands logLines
fatalErrors = extractFatalErrors logLines
errorCount = length missingChars + length overfullBoxes + length undefinedCommands + length fatalErrors
summary = if errorCount == 0
then "Unknown LaTeX error occurred."
else T.unlines $ filter (not . T.null) [
if not (null fatalErrors) then "Fatal errors:\n" <> T.unlines (map ("" <>) fatalErrors) else "",
if not (null undefinedCommands) then "Undefined commands:\n" <> T.unlines (map ("" <>) undefinedCommands) else "",
if not (null missingChars) then "Missing Unicode characters:\n" <> T.unlines (map ("" <>) missingChars) else "",
if not (null overfullBoxes) then T.pack (show (length overfullBoxes)) <> " overfull boxes (layout warnings)" else ""
]
in summary
-- | Extract missing character warnings from LaTeX log
extractMissingChars :: [Text] -> [Text]
extractMissingChars = mapMaybe extractChar
where
extractChar line
| "Missing character:" `T.isInfixOf` line =
case T.splitOn "(U+" line of
[_, rest] -> case T.splitOn ")" rest of
(unicode:_) -> Just $ "U+" <> unicode <> " " <> extractCharContext line
_ -> Nothing
_ -> Nothing
| otherwise = Nothing
extractCharContext line =
case T.splitOn " in font " line of
[_, rest] -> "in " <> T.takeWhile (/= ':') rest
_ -> ""
-- | Extract overfull box warnings
extractOverfullBoxes :: [Text] -> [Text]
extractOverfullBoxes = mapMaybe extractBox
where
extractBox line
| "Overfull \\hbox" `T.isInfixOf` line = Just $ T.takeWhile (/= '\n') line
| otherwise = Nothing
-- | Extract undefined command errors
extractUndefinedCommands :: [Text] -> [Text]
extractUndefinedCommands = mapMaybe extractUndef
where
extractUndef line
| "Undefined control sequence" `T.isInfixOf` line = Just line
| otherwise = Nothing
-- | Extract fatal LaTeX errors
extractFatalErrors :: [Text] -> [Text]
extractFatalErrors = mapMaybe extractFatal
where
extractFatal line
| "! " `T.isPrefixOf` line && not ("Missing character:" `T.isInfixOf` line) = Just $ T.drop 2 line
| otherwise = Nothing
-- | Process PDF output: LaTeX template application and direct XeLaTeX compilation
processPDFOutput :: String -> Text -> IO (Either DocsterError ())
processPDFOutput outputPath latexOutput = do
let completeLatex = latexTemplate latexOutput
logOutputPath = replaceExtension outputPath "log"
-- Use temporary directory for LaTeX compilation
withSystemTempDirectory "docster-latex" $ \tempDir -> do
@ -95,6 +165,13 @@ processPDFOutput outputPath latexOutput = do
, texFile
] ""
-- Always copy log file to output location for debugging
logExists <- doesFileExist logFile
logContent <- if logExists
then TIO.readFile logFile
else return (T.pack stderr)
TIO.writeFile logOutputPath logContent
case exitCode of
ExitSuccess -> do
-- Check if PDF was actually generated
@ -105,22 +182,16 @@ processPDFOutput outputPath latexOutput = do
copyFile pdfFile outputPath
return $ Right ()
else do
-- PDF generation failed, read log for details
logExists <- doesFileExist logFile
logContent <- if logExists
then TIO.readFile logFile
else return "No log file generated"
return $ Left $ PDFGenerationError $
"PDF file not generated. LaTeX log:\n" <> logContent
"PDF file not generated despite successful exit code.\n" <>
"Full LaTeX log written to: " <> T.pack logOutputPath
ExitFailure code -> do
-- LaTeX compilation failed, read log for details
logExists <- doesFileExist logFile
logContent <- if logExists
then TIO.readFile logFile
else return (T.pack stderr)
-- LaTeX compilation failed - parse log for meaningful errors
let errorSummary = parseLatexErrors logContent
return $ Left $ PDFGenerationError $
"XeLaTeX compilation failed (exit code " <> T.pack (show code) <> "):\n" <>
T.pack stderr <> "\n\nLaTeX log:\n" <> logContent
"❌ LaTeX compilation failed (exit code " <> T.pack (show code) <> "):\n" <>
errorSummary <> "\n\n" <>
"Full LaTeX log written to: " <> T.pack logOutputPath
-- | Process HTML output: file writing and browser opening
processHTMLOutput :: String -> Text -> IO (Either DocsterError ())
@ -153,11 +224,12 @@ parseDocument content = do
readerOptions <- asks ccReaderOptions
liftEitherM $ parseMarkdown readerOptions content
-- | Pipeline step: Transform document (process Mermaid diagrams)
-- | Pipeline step: Transform document (process Mermaid diagrams)
transformDocumentM :: Pandoc -> CompilationM Pandoc
transformDocumentM pandoc = do
config <- asks ccConfig
liftEitherM $ transformDocument config pandoc
docName <- asks ccDocName
liftEitherM $ transformDocument config docName pandoc
-- | Pipeline step: Generate output using format-specific writer
generateOutputM :: Pandoc -> CompilationM Text
@ -180,13 +252,13 @@ printSuccess = do
liftIO $ putStrLn $ T.unpack $ csSuccessMessage strategy outputPath
-- | Higher-order compilation function that takes a strategy and executes the pipeline
compileWithStrategy :: CompilationStrategy -> SourceDir -> OutputPath -> OutputPath -> IO (Either DocsterError ())
compileWithStrategy strategy sourceDir (OutputPath inputPath) (OutputPath outputPath) = do
compileWithStrategy :: CompilationStrategy -> SourceDir -> OutputDir -> Text -> OutputPath -> OutputPath -> IO (Either DocsterError ())
compileWithStrategy strategy sourceDir outputDir docName (OutputPath inputPath) (OutputPath outputPath) = do
let readerOptions = def { readerExtensions = getDefaultExtensions "markdown" }
config = DiagramConfig sourceDir (csOutputFormat strategy)
context = CompilationContext strategy sourceDir inputPath outputPath readerOptions config
config = DiagramConfig sourceDir outputDir (csOutputFormat strategy)
context = CompilationContext strategy sourceDir outputDir inputPath outputPath docName readerOptions config
pipeline = readContent >>= parseDocument >>= transformDocumentM >>= generateOutputM >>= processOutput >> printSuccess
runExceptT $ runReaderT pipeline context
-- | Parse markdown with error handling
@ -219,9 +291,16 @@ compileToHTML = compileWithFormat htmlStrategy "html"
compileWithFormat :: CompilationStrategy -> String -> FilePath -> IO ()
compileWithFormat strategy extension path = do
let sourceDir = SourceDir $ takeDirectory path
outputPath = OutputPath $ replaceExtension path extension
result <- compileWithStrategy strategy sourceDir (OutputPath path) outputPath
outputDir = computeOutputDir path
OutputDir outDirPath = outputDir
baseName = takeBaseName path
docName = T.pack baseName
outputPath = OutputPath $ outDirPath </> baseName <.> extension
-- Ensure output directory exists before compilation
ensureOutputDir outputDir
result <- compileWithStrategy strategy sourceDir outputDir docName (OutputPath path) outputPath
case result of
Left err -> throwIO err
Right _ -> return ()

View File

@ -7,9 +7,10 @@ module Docster.Mermaid
processMermaidBlock
, renderMermaidDiagram
, generateDiagramId
, createImageBlock
) where
import Docster.Types
import Docster.Types (DiagramConfig(..), DiagramId(..), OutputDir(..), OutputFormat(..), DocsterError(..))
import Text.Pandoc.Definition (Block(..), Inline(..), nullAttr)
import Data.Text (Text)
import qualified Data.Text as T
@ -51,9 +52,9 @@ processMermaidBlock _ block = return $ Right block
-- | Render Mermaid diagram to appropriate format with resource cleanup
renderMermaidDiagram :: DiagramConfig -> DiagramId -> Text -> IO (Either DocsterError Text)
renderMermaidDiagram config@(DiagramConfig (SourceDir sourceDir) format) diagId contents = do
renderMermaidDiagram config@(DiagramConfig _ (OutputDir outDir) format) diagId contents = do
let diagIdStr = T.unpack $ (\(DiagramId d) -> d) diagId
mmdFile = sourceDir </> diagIdStr <> ".mmd"
mmdFile = outDir </> diagIdStr <> ".mmd"
(outputFile, imagePath) = generateDiagramPaths config diagId
-- Use bracket to ensure cleanup of temporary mermaid file
@ -71,12 +72,12 @@ renderMermaidDiagram config@(DiagramConfig (SourceDir sourceDir) format) diagId
-- | Generate file paths for diagram based on format
generateDiagramPaths :: DiagramConfig -> DiagramId -> (FilePath, Text)
generateDiagramPaths (DiagramConfig (SourceDir sourceDir) format) (DiagramId diagId) =
generateDiagramPaths (DiagramConfig _ (OutputDir outDir) format) (DiagramId diagId) =
let diagIdStr = T.unpack diagId
in case format of
HTML -> let svgFile = sourceDir </> diagIdStr <> ".svg"
HTML -> let svgFile = outDir </> diagIdStr <> ".svg"
in (svgFile, T.pack $ takeFileName svgFile)
PDF -> let pngFile = sourceDir </> diagIdStr <> ".png"
PDF -> let pngFile = outDir </> diagIdStr <> ".png"
in (pngFile, T.pack pngFile)
-- | Puppeteer configuration content for disabling sandbox

View File

@ -4,20 +4,95 @@
module Docster.Transform
( -- * Document Transformation
transformDocument
-- * Utilities (exported for testing)
, inlinesToText
) where
import Docster.Types
import Docster.Mermaid (processMermaidBlock)
import Text.Pandoc.Definition (Pandoc(..), Block)
( DocsterError(..), OutputFormat(..), DiagramConfig(..), DiagramId(..)
, TraversalState(..), initialTraversalState, normalizeHeading
)
import Docster.Mermaid (renderMermaidDiagram, createImageBlock)
import Text.Pandoc.Definition (Pandoc(..), Block(..), Inline(..))
import Text.Pandoc.Walk (walk)
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Map.Strict as Map
import Data.Maybe (fromMaybe)
import Control.Monad.Trans.State.Strict (StateT, runStateT, get, modify)
import Control.Monad.Trans.Except (ExceptT, runExceptT, throwE)
import Control.Monad.Trans.Class (lift)
import Control.Monad.IO.Class (liftIO)
-- | Walk the Pandoc AST and process blocks with error handling
transformDocument :: DiagramConfig -> Pandoc -> IO (Either DocsterError Pandoc)
transformDocument config doc = walkMEither (processMermaidBlock config) doc
-- | Monad stack for stateful block transformation with error handling
type TransformM = StateT TraversalState (ExceptT DocsterError IO)
-- | Walk with error handling - transforms Either into IO Either
walkMEither :: Monad m => (Block -> m (Either e Block)) -> Pandoc -> m (Either e Pandoc)
walkMEither f (Pandoc meta blocks) = do
results <- mapM f blocks
case sequence results of
-- | Walk the Pandoc AST and process blocks with heading tracking
transformDocument :: DiagramConfig -> Text -> Pandoc -> IO (Either DocsterError Pandoc)
transformDocument config docName (Pandoc meta blocks) = do
let initialState = initialTraversalState docName
result <- runExceptT $ runStateT (mapM (processBlockStateful config) blocks) initialState
case result of
Left err -> return $ Left err
Right newBlocks -> return $ Right $ Pandoc meta newBlocks
Right (newBlocks, _finalState) ->
case dcOutputFormat config of
PDF -> return $ Right $ substituteUnicodeSymbols (Pandoc meta newBlocks)
HTML -> return $ Right $ Pandoc meta newBlocks
-- | Process a single block with heading tracking state
processBlockStateful :: DiagramConfig -> Block -> TransformM Block
processBlockStateful config block = case block of
-- Update current heading on any heading level
Header _ _ inlines -> do
let headingText = normalizeHeading $ inlinesToText inlines
modify $ \s -> s { tsCurrentHeading = Just headingText }
return block
-- Process mermaid blocks with heading context
CodeBlock (_, classes, _) contents
| "mermaid" `elem` classes -> do
state <- get
let baseName = fromMaybe (tsDocumentName state) (tsCurrentHeading state)
counter = Map.findWithDefault 0 baseName (tsHeadingCounters state)
diagName = if counter == 0
then baseName
else baseName <> "_" <> T.pack (show counter)
-- Increment counter for this heading
modify $ \s -> s { tsHeadingCounters = Map.insertWith (+) baseName 1 (tsHeadingCounters s) }
-- Render diagram with semantic name
let diagId = DiagramId diagName
result <- liftIO $ renderMermaidDiagram config diagId contents
case result of
Left err -> lift $ throwE err
Right imagePath -> return $ createImageBlock imagePath
-- Pass through all other blocks unchanged
_ -> return block
-- | Extract text content from inline elements
inlinesToText :: [Inline] -> Text
inlinesToText = T.concat . map inlineToText
where
inlineToText :: Inline -> Text
inlineToText (Str t) = t
inlineToText Space = " "
inlineToText SoftBreak = " "
inlineToText (Code _ t) = t
inlineToText (Emph inlines) = inlinesToText inlines
inlineToText (Strong inlines) = inlinesToText inlines
inlineToText (Strikeout inlines) = inlinesToText inlines
inlineToText (Quoted _ inlines) = inlinesToText inlines
inlineToText (Link _ inlines _) = inlinesToText inlines
inlineToText _ = ""
-- | Substitute Unicode symbols with LaTeX equivalents for PDF output
substituteUnicodeSymbols :: Pandoc -> Pandoc
substituteUnicodeSymbols = walk substituteInline
where
substituteInline :: Inline -> Inline
substituteInline (Str text) = Str (substituteSymbols text)
substituteInline other = other
substituteSymbols :: T.Text -> T.Text
substituteSymbols = T.replace "" "\\checkmark"
. T.replace "" "\\times"

View File

@ -4,19 +4,35 @@
module Docster.Types
( -- * Error Types
DocsterError(..)
-- * Output Format
, OutputFormat(..)
-- * Domain Types
, SourceDir(..)
, OutputDir(..)
, OutputPath(..)
, DiagramId(..)
, DiagramConfig(..)
-- * Traversal State
, TraversalState(..)
, initialTraversalState
, normalizeHeading
-- * Path Utilities
, computeOutputDir
, ensureOutputDir
) where
import Data.Text (Text)
import qualified Data.Text as T
import Data.Char (isAlphaNum, isSpace)
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as Map
import Control.Exception (Exception)
import System.FilePath (takeDirectory, takeBaseName, (</>))
import System.Directory (createDirectoryIfMissing)
-- | Custom error types for comprehensive error handling
data DocsterError
@ -33,11 +49,15 @@ data OutputFormat = PDF | HTML
deriving (Show, Eq)
-- | Type-safe wrapper for source directory paths
newtype SourceDir = SourceDir FilePath
newtype SourceDir = SourceDir FilePath
deriving (Show, Eq)
-- | Type-safe wrapper for output directory paths
newtype OutputDir = OutputDir FilePath
deriving (Show, Eq)
-- | Type-safe wrapper for output file paths
newtype OutputPath = OutputPath FilePath
newtype OutputPath = OutputPath FilePath
deriving (Show, Eq)
-- | Type-safe wrapper for diagram identifiers
@ -47,5 +67,43 @@ newtype DiagramId = DiagramId Text
-- | Configuration for diagram generation
data DiagramConfig = DiagramConfig
{ dcSourceDir :: SourceDir
, dcOutputDir :: OutputDir
, dcOutputFormat :: OutputFormat
} deriving (Show)
} deriving (Show)
-- | Compute output directory from input file path
-- "docs/readme.md" -> "docs/output/readme"
computeOutputDir :: FilePath -> OutputDir
computeOutputDir inputPath =
let dir = takeDirectory inputPath
baseName = takeBaseName inputPath
in OutputDir $ if null dir || dir == "."
then "output" </> baseName
else dir </> "output" </> baseName
-- | Ensure output directory exists
ensureOutputDir :: OutputDir -> IO ()
ensureOutputDir (OutputDir dir) = createDirectoryIfMissing True dir
-- | State for heading-aware diagram naming during AST traversal
data TraversalState = TraversalState
{ tsCurrentHeading :: Maybe Text -- ^ Current heading text (normalized)
, tsHeadingCounters :: Map Text Int -- ^ Counter for diagrams per heading
, tsDocumentName :: Text -- ^ Fallback name when no heading
} deriving (Show, Eq)
-- | Create initial traversal state with document name as fallback
initialTraversalState :: Text -> TraversalState
initialTraversalState docName = TraversalState
{ tsCurrentHeading = Nothing
, tsHeadingCounters = Map.empty
, tsDocumentName = docName
}
-- | Normalize heading text for use as a filename
-- "File Flow Diagram!" -> "file_flow_diagram"
normalizeHeading :: Text -> Text
normalizeHeading = T.intercalate "_"
. T.words
. T.filter (\c -> isAlphaNum c || isSpace c)
. T.toLower

View File

@ -0,0 +1,101 @@
{-# LANGUAGE OverloadedStrings #-}
module Docster.TransformSpec (spec) where
import Test.Hspec
import qualified Data.Map.Strict as Map
import Data.Text (Text)
import qualified Data.Text as T
import Text.Pandoc.Definition (Inline(..))
import Docster.Types
( OutputDir(..)
, TraversalState(..)
, computeOutputDir
, normalizeHeading
, initialTraversalState
)
import Docster.Transform (inlinesToText)
spec :: Spec
spec = do
describe "normalizeHeading" $ do
it "lowercases and replaces spaces with underscores" $
normalizeHeading "File Flow" `shouldBe` "file_flow"
it "strips non-alphanumeric characters" $
normalizeHeading "API (v2.0)!" `shouldBe` "api_v20"
it "handles multiple spaces" $
normalizeHeading "Hello World" `shouldBe` "hello_world"
it "handles unicode letters" $
normalizeHeading "Diagrama de Flujo" `shouldBe` "diagrama_de_flujo"
it "handles empty string" $
normalizeHeading "" `shouldBe` ""
it "handles heading with only symbols" $
normalizeHeading "!@#$%^" `shouldBe` ""
describe "computeOutputDir" $ do
it "creates output subdir from input path" $
computeOutputDir "docs/readme.md" `shouldBe` OutputDir "docs/output/readme"
it "handles nested paths" $
computeOutputDir "a/b/c/file.md" `shouldBe` OutputDir "a/b/c/output/file"
it "handles current directory (no path)" $
computeOutputDir "readme.md" `shouldBe` OutputDir "output/readme"
it "handles dot prefix path" $
computeOutputDir "./readme.md" `shouldBe` OutputDir "output/readme"
describe "initialTraversalState" $ do
it "starts with no current heading" $
tsCurrentHeading (initialTraversalState "doc") `shouldBe` Nothing
it "starts with empty counters" $
tsHeadingCounters (initialTraversalState "doc") `shouldBe` Map.empty
it "stores document name" $
tsDocumentName (initialTraversalState "myfile") `shouldBe` "myfile"
describe "diagram naming logic" $ do
it "first diagram under heading has no suffix" $
let baseName = "file_flow"
counter = Map.findWithDefault 0 baseName Map.empty
diagName = if counter == 0 then baseName else baseName <> "_" <> T.pack (show counter)
in diagName `shouldBe` "file_flow"
it "second diagram gets _1 suffix" $
let baseName = "file_flow"
counters = Map.singleton "file_flow" 1
counter = Map.findWithDefault 0 baseName counters
diagName = if counter == 0 then baseName else baseName <> "_" <> T.pack (show counter)
in diagName `shouldBe` "file_flow_1"
it "third diagram gets _2 suffix" $
let baseName = "file_flow"
counters = Map.singleton "file_flow" 2
counter = Map.findWithDefault 0 baseName counters
diagName = if counter == 0 then baseName else baseName <> "_" <> T.pack (show counter)
in diagName `shouldBe` "file_flow_2"
it "uses document name when no heading" $
let state = initialTraversalState "readme"
baseName = maybe (tsDocumentName state) id (tsCurrentHeading state)
in baseName `shouldBe` "readme"
describe "inlinesToText" $ do
it "extracts text from Str inline" $
inlinesToText [Str "hello"] `shouldBe` "hello"
it "handles Space" $
inlinesToText [Str "hello", Space, Str "world"] `shouldBe` "hello world"
it "handles nested emphasis" $
inlinesToText [Emph [Str "important"]] `shouldBe` "important"
it "handles Code inline" $
inlinesToText [Code ("", [], []) "code"] `shouldBe` "code"

1
test/Spec.hs Normal file
View File

@ -0,0 +1 @@
{-# OPTIONS_GHC -F -pgmF hspec-discover #-}