{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TypeFamilies #-}
{- |
   Module      : Text.Pandoc.Readers.Mdoc.Lex
   Copyright   : Copyright (C) 2024 Evan Silberman
   License     : GNU GPL, version 2 or above

   Maintainer  : Evan Silberman <evan@jklol.net>
   Stability   : WIP
   Portability : portable

Tokenizer for mdoc
-}
module Text.Pandoc.Readers.Mdoc.Lex
  ( MdocToken(..)
  , MdocTokens(..)
  , DelimSide(..)
  , lexMdoc
  , toString
  )
where

import Control.Monad (void, guard, when)
import Control.Monad.Except (throwError)
import Text.Pandoc.Class.PandocMonad (PandocMonad(..))
import Data.Char (isAlphaNum)
import Data.Maybe (isJust)
import qualified Data.Text as T
import Text.Pandoc.Options
import Text.Pandoc.Parsing
import Text.Pandoc.Readers.Roff.Escape
import Text.Pandoc.Readers.Mdoc.Macros
import qualified Data.Sequence as Seq

-- As a higher level language with a wealth of semantic macros, mdoc
-- discourages authors from falling back to low-level roff features like font
-- selection, custom macros, defined strings, etc. Pandoc's mdoc reader is
-- accordingly implemented as a high-level interpreter of mdoc's semantic macros
-- and almost no raw roff requests are supported.
--
-- tbl(7) and eqn(7) macros are rare but not completely unseen in mdoc manuals.
-- they are not yet implemented. most use of tbl macros in mdoc could probably
-- be replaced with .Bl -column

data DelimSide = Open | Middle | Close deriving (Int -> DelimSide -> ShowS
[DelimSide] -> ShowS
DelimSide -> String
(Int -> DelimSide -> ShowS)
-> (DelimSide -> String)
-> ([DelimSide] -> ShowS)
-> Show DelimSide
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> DelimSide -> ShowS
showsPrec :: Int -> DelimSide -> ShowS
$cshow :: DelimSide -> String
show :: DelimSide -> String
$cshowList :: [DelimSide] -> ShowS
showList :: [DelimSide] -> ShowS
Show, DelimSide -> DelimSide -> Bool
(DelimSide -> DelimSide -> Bool)
-> (DelimSide -> DelimSide -> Bool) -> Eq DelimSide
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: DelimSide -> DelimSide -> Bool
== :: DelimSide -> DelimSide -> Bool
$c/= :: DelimSide -> DelimSide -> Bool
/= :: DelimSide -> DelimSide -> Bool
Eq)

-- | Tokens for Mdoc documents
data MdocToken = Str T.Text SourcePos -- ^ The contents of a text line
               | Macro T.Text SourcePos  -- ^ A macro to be processed
               | Lit T.Text SourcePos  -- ^ Literal text on a control line
               | Blank SourcePos  -- ^ A blank line
               | Delim DelimSide T.Text SourcePos  -- ^ A delimiter character
               | Eol  -- ^ The end of a control line
               deriving Int -> MdocToken -> ShowS
[MdocToken] -> ShowS
MdocToken -> String
(Int -> MdocToken -> ShowS)
-> (MdocToken -> String)
-> ([MdocToken] -> ShowS)
-> Show MdocToken
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> MdocToken -> ShowS
showsPrec :: Int -> MdocToken -> ShowS
$cshow :: MdocToken -> String
show :: MdocToken -> String
$cshowList :: [MdocToken] -> ShowS
showList :: [MdocToken] -> ShowS
Show

toString :: MdocToken -> T.Text
toString :: MdocToken -> Text
toString (Str Text
x SourcePos
_) = Text
x
toString (Macro Text
x SourcePos
_) = Text
x
toString (Lit Text
x SourcePos
_) = Text
x
toString (Delim DelimSide
_ Text
x SourcePos
_) = Text
x
toString Blank{} = Text
forall a. Monoid a => a
mempty
toString MdocToken
Eol = Text
forall a. Monoid a => a
mempty

newtype MdocTokens = MdocTokens { MdocTokens -> Seq MdocToken
unMdocTokens :: Seq.Seq MdocToken }
        deriving (Int -> MdocTokens -> ShowS
[MdocTokens] -> ShowS
MdocTokens -> String
(Int -> MdocTokens -> ShowS)
-> (MdocTokens -> String)
-> ([MdocTokens] -> ShowS)
-> Show MdocTokens
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> MdocTokens -> ShowS
showsPrec :: Int -> MdocTokens -> ShowS
$cshow :: MdocTokens -> String
show :: MdocTokens -> String
$cshowList :: [MdocTokens] -> ShowS
showList :: [MdocTokens] -> ShowS
Show, NonEmpty MdocTokens -> MdocTokens
MdocTokens -> MdocTokens -> MdocTokens
(MdocTokens -> MdocTokens -> MdocTokens)
-> (NonEmpty MdocTokens -> MdocTokens)
-> (forall b. Integral b => b -> MdocTokens -> MdocTokens)
-> Semigroup MdocTokens
forall b. Integral b => b -> MdocTokens -> MdocTokens
forall a.
(a -> a -> a)
-> (NonEmpty a -> a)
-> (forall b. Integral b => b -> a -> a)
-> Semigroup a
$c<> :: MdocTokens -> MdocTokens -> MdocTokens
<> :: MdocTokens -> MdocTokens -> MdocTokens
$csconcat :: NonEmpty MdocTokens -> MdocTokens
sconcat :: NonEmpty MdocTokens -> MdocTokens
$cstimes :: forall b. Integral b => b -> MdocTokens -> MdocTokens
stimes :: forall b. Integral b => b -> MdocTokens -> MdocTokens
Semigroup, Semigroup MdocTokens
MdocTokens
Semigroup MdocTokens =>
MdocTokens
-> (MdocTokens -> MdocTokens -> MdocTokens)
-> ([MdocTokens] -> MdocTokens)
-> Monoid MdocTokens
[MdocTokens] -> MdocTokens
MdocTokens -> MdocTokens -> MdocTokens
forall a.
Semigroup a =>
a -> (a -> a -> a) -> ([a] -> a) -> Monoid a
$cmempty :: MdocTokens
mempty :: MdocTokens
$cmappend :: MdocTokens -> MdocTokens -> MdocTokens
mappend :: MdocTokens -> MdocTokens -> MdocTokens
$cmconcat :: [MdocTokens] -> MdocTokens
mconcat :: [MdocTokens] -> MdocTokens
Monoid)

singleTok :: MdocToken -> MdocTokens
singleTok :: MdocToken -> MdocTokens
singleTok MdocToken
t = Seq MdocToken -> MdocTokens
MdocTokens (MdocToken -> Seq MdocToken
forall a. a -> Seq a
Seq.singleton MdocToken
t)

type Lexer m = ParsecT Sources () m

instance RoffLikeLexer MdocTokens where
  -- This is a bit confusing. We're lexing to MdocTokens, but for escaping
  -- purposes we just want Texts.
  type Token MdocTokens = T.Text
  -- We don't need a state
  type State MdocTokens = ()
  -- We don't support predefined string expansion
  expandString :: forall (m :: * -> *). PandocMonad m => Lexer m MdocTokens ()
expandString = () -> ParsecT Sources () m ()
forall a. a -> ParsecT Sources () m a
forall (m :: * -> *) a. Monad m => a -> m a
return ()
  escString :: forall (m :: * -> *).
PandocMonad m =>
Lexer m MdocTokens (Token MdocTokens)
escString = Text -> ParsecT Sources () m Text
forall a. a -> ParsecT Sources () m a
forall (m :: * -> *) a. Monad m => a -> m a
return Text
forall a. Monoid a => a
mempty
  -- what token type the unescaped text gets wrapped in is decided by other
  -- parts of the lexer.
  emit :: Text -> Token MdocTokens
emit = Text -> Text
Text -> Token MdocTokens
forall a. a -> a
id
  -- All escapes are resolved in the lexer and we never need to emit anything,
  -- vs. the roff lexer which has to push the backlashes to the output while
  -- in copy mode.
  backslash :: forall (m :: * -> *). PandocMonad m => Lexer m MdocTokens ()
backslash = (ParsecT Sources () m ()
forall a. Monoid a => a
mempty ParsecT Sources () m ()
-> ParsecT Sources () m Char -> ParsecT Sources () m ()
forall a b.
ParsecT Sources () m a
-> ParsecT Sources () m b -> ParsecT Sources () m a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> ParsecT Sources () m Char
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
Char -> ParsecT s u m Char
char Char
'\\') ParsecT Sources () m ()
-> ParsecT Sources () m () -> ParsecT Sources () m ()
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> (ParsecT Sources () m ()
forall a. Monoid a => a
mempty ParsecT Sources () m ()
-> ParsecT Sources () m String -> ParsecT Sources () m ()
forall a b.
ParsecT Sources () m a
-> ParsecT Sources () m b -> ParsecT Sources () m a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* String -> ParsecT Sources () m String
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
String -> ParsecT s u m String
string String
"\\E")
  -- We don't support macro definition and we don't output anything for \A
  checkDefined :: forall (m :: * -> *).
PandocMonad m =>
Text -> Lexer m MdocTokens (Token MdocTokens)
checkDefined = ParsecT Sources () m Text -> Text -> ParsecT Sources () m Text
forall a b. a -> b -> a
const ParsecT Sources () m Text
forall a. Monoid a => a
mempty
  -- We don't support copy mode and \E is treated as backslash
  escE :: forall (m :: * -> *).
PandocMonad m =>
Lexer m MdocTokens (Token MdocTokens)
escE = Text -> ParsecT Sources () m Text
forall a. a -> ParsecT Sources () m a
forall (m :: * -> *) a. Monad m => a -> m a
return Text
forall a. Monoid a => a
mempty
  -- We don't support low-level font selection
  escFont :: forall (m :: * -> *).
PandocMonad m =>
Lexer m MdocTokens (Token MdocTokens)
escFont = Char
-> [Lexer m MdocTokens Text]
-> ParsecT Sources (State MdocTokens) m (Token MdocTokens)
forall (m :: * -> *) x.
(PandocMonad m, RoffLikeLexer x) =>
Char -> [Lexer m x Text] -> Lexer m x (Token x)
escIgnore Char
'f' [Lexer m MdocTokens Text
forall (m :: * -> *) x.
(PandocMonad m, RoffLikeLexer x) =>
Lexer m x Text
escapeArg, Int -> ParsecT Sources () m Char -> ParsecT Sources () m Text
forall s (m :: * -> *) st.
(Stream s m Char, UpdateSourcePos s Char, Monad m) =>
Int -> ParsecT s st m Char -> ParsecT s st m Text
countChar Int
1 ((Char -> Bool) -> ParsecT Sources () m Char
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
(Char -> Bool) -> ParsecT s u m Char
satisfy (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/=Char
'\n'))]

eofline :: (Stream s m Char, UpdateSourcePos s Char) => ParsecT s u m MdocToken
eofline :: forall s (m :: * -> *) u.
(Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s u m MdocToken
eofline = do
  ParsecT s u m Char -> ParsecT s u m ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void ParsecT s u m Char
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s u m Char
newline ParsecT s u m () -> ParsecT s u m () -> ParsecT s u m ()
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> ParsecT s u m ()
forall s (m :: * -> *) t u.
(Stream s m t, Show t) =>
ParsecT s u m ()
eof
  MdocToken -> ParsecT s u m MdocToken
forall a. a -> ParsecT s u m a
forall (m :: * -> *) a. Monad m => a -> m a
return MdocToken
Eol

lexComment :: PandocMonad m => Lexer m MdocTokens
lexComment :: forall (m :: * -> *). PandocMonad m => Lexer m MdocTokens
lexComment = do
  ParsecT Sources () m String -> ParsecT Sources () m String
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try (ParsecT Sources () m String -> ParsecT Sources () m String)
-> ParsecT Sources () m String -> ParsecT Sources () m String
forall a b. (a -> b) -> a -> b
$ String -> ParsecT Sources () m String
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
String -> ParsecT s u m String
string String
".\\\""
  ParsecT Sources () m Char -> ParsecT Sources () m ()
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m ()
skipMany (ParsecT Sources () m Char -> ParsecT Sources () m ())
-> ParsecT Sources () m Char -> ParsecT Sources () m ()
forall a b. (a -> b) -> a -> b
$ String -> ParsecT Sources () m Char
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
String -> ParsecT s u m Char
noneOf String
"\n"
  ParsecT Sources () m MdocToken
forall s (m :: * -> *) u.
(Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s u m MdocToken
eofline
  MdocTokens -> ParsecT Sources () m MdocTokens
forall a. a -> ParsecT Sources () m a
forall (m :: * -> *) a. Monad m => a -> m a
return MdocTokens
forall a. Monoid a => a
mempty

argText :: PandocMonad m => Lexer m T.Text
argText :: forall (m :: * -> *). PandocMonad m => Lexer m Text
argText = do
  beg <- ParsecT Sources () m Text
Lexer m MdocTokens (Token MdocTokens)
forall (m :: * -> *) x.
(PandocMonad m, RoffLikeLexer x) =>
Lexer m x (Token x)
escape ParsecT Sources () m Text
-> ParsecT Sources () m Text -> ParsecT Sources () m Text
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> ParsecT Sources () m Text
forall (m :: * -> *). PandocMonad m => Lexer m Text
regularText
  end <- mconcat <$> many (escape <|> regularText <|> quoteChar)
  return $ beg <> end

spaceTabChar :: PandocMonad m => Lexer m T.Text
spaceTabChar :: forall (m :: * -> *). PandocMonad m => Lexer m Text
spaceTabChar = Char -> Text
T.singleton (Char -> Text)
-> ParsecT Sources () m Char -> ParsecT Sources () m Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ParsecT Sources () m Char
forall s (m :: * -> *) st.
(Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s st m Char
spaceChar

quotedArg :: PandocMonad m => Lexer m T.Text
quotedArg :: forall (m :: * -> *). PandocMonad m => Lexer m Text
quotedArg = do
  Lexer m Text
forall (m :: * -> *). PandocMonad m => Lexer m Text
quoteChar
  t <- [Text] -> Text
forall a. Monoid a => [a] -> a
mconcat ([Text] -> Text) -> ParsecT Sources () m [Text] -> Lexer m Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Lexer m Text -> ParsecT Sources () m [Text]
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many (Lexer m Text -> Lexer m Text
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try Lexer m Text
forall {u}. ParsecT Sources u m Text
innerQuote Lexer m Text -> Lexer m Text -> Lexer m Text
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> Lexer m Text
Lexer m MdocTokens (Token MdocTokens)
forall (m :: * -> *) x.
(PandocMonad m, RoffLikeLexer x) =>
Lexer m x (Token x)
escape Lexer m Text -> Lexer m Text -> Lexer m Text
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> Lexer m Text
forall (m :: * -> *). PandocMonad m => Lexer m Text
regularText Lexer m Text -> Lexer m Text -> Lexer m Text
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> Lexer m Text
forall (m :: * -> *). PandocMonad m => Lexer m Text
spaceTabChar)
  quoteChar
  notFollowedBy quoteChar
  return t
  where
    innerQuote :: ParsecT Sources u m Text
innerQuote = do
      String -> ParsecT Sources u m String
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
String -> ParsecT s u m String
string String
"\"\""
      Text -> ParsecT Sources u m Text
forall a. a -> ParsecT Sources u m a
forall (m :: * -> *) a. Monad m => a -> m a
return Text
"\""

anyText :: PandocMonad m => Lexer m T.Text
anyText :: forall (m :: * -> *). PandocMonad m => Lexer m Text
anyText = ParsecT Sources () m Text
Lexer m MdocTokens (Token MdocTokens)
forall (m :: * -> *) x.
(PandocMonad m, RoffLikeLexer x) =>
Lexer m x (Token x)
escape ParsecT Sources () m Text
-> ParsecT Sources () m Text -> ParsecT Sources () m Text
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> ParsecT Sources () m Text
forall (m :: * -> *). PandocMonad m => Lexer m Text
regularText ParsecT Sources () m Text
-> ParsecT Sources () m Text -> ParsecT Sources () m Text
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> ParsecT Sources () m Text
forall (m :: * -> *). PandocMonad m => Lexer m Text
quoteChar ParsecT Sources () m Text
-> ParsecT Sources () m Text -> ParsecT Sources () m Text
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> ParsecT Sources () m Text
forall (m :: * -> *). PandocMonad m => Lexer m Text
spaceTabChar

regularText :: PandocMonad m => Lexer m T.Text
regularText :: forall (m :: * -> *). PandocMonad m => Lexer m Text
regularText = ParsecT Sources () m Char -> ParsecT Sources () m Text
forall s (m :: * -> *) t st.
Stream s m t =>
ParsecT s st m Char -> ParsecT s st m Text
many1Char (ParsecT Sources () m Char -> ParsecT Sources () m Text)
-> ParsecT Sources () m Char -> ParsecT Sources () m Text
forall a b. (a -> b) -> a -> b
$ String -> ParsecT Sources () m Char
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
String -> ParsecT s u m Char
noneOf String
"\n\r\t \\\""

quoteChar :: PandocMonad m => Lexer m T.Text
quoteChar :: forall (m :: * -> *). PandocMonad m => Lexer m Text
quoteChar = Char -> Text
T.singleton (Char -> Text)
-> ParsecT Sources () m Char -> ParsecT Sources () m Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Char -> ParsecT Sources () m Char
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
Char -> ParsecT s u m Char
char Char
'"'

mdocToken :: PandocMonad m => Lexer m MdocTokens
mdocToken :: forall (m :: * -> *). PandocMonad m => Lexer m MdocTokens
mdocToken = Lexer m MdocTokens
forall (m :: * -> *). PandocMonad m => Lexer m MdocTokens
lexComment Lexer m MdocTokens -> Lexer m MdocTokens -> Lexer m MdocTokens
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> Lexer m MdocTokens
forall (m :: * -> *). PandocMonad m => Lexer m MdocTokens
lexControlLine Lexer m MdocTokens -> Lexer m MdocTokens -> Lexer m MdocTokens
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> Lexer m MdocTokens
forall (m :: * -> *). PandocMonad m => Lexer m MdocTokens
lexTextLine

lexMacroName :: PandocMonad m => Lexer m T.Text
lexMacroName :: forall (m :: * -> *). PandocMonad m => Lexer m Text
lexMacroName = ParsecT Sources () m Char -> ParsecT Sources () m Text
forall s (m :: * -> *) t st.
Stream s m t =>
ParsecT s st m Char -> ParsecT s st m Text
many1Char ((Char -> Bool) -> ParsecT Sources () m Char
forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
(Char -> Bool) -> ParsecT s u m Char
satisfy Char -> Bool
isMacroChar)
  where
    isMacroChar :: Char -> Bool
isMacroChar Char
'%' = Bool
True
    isMacroChar Char
x = Char -> Bool
isAlphaNum Char
x

lexMacro :: PandocMonad m => Lexer m MdocToken
lexMacro :: forall (m :: * -> *). PandocMonad m => Lexer m MdocToken
lexMacro = do
  pos <- ParsecT Sources () m SourcePos
forall (m :: * -> *) s u. Monad m => ParsecT s u m SourcePos
getPosition
  name <- lexMacroName
  eof <|> void (lookAhead (spaceChar <|> newline))
  skipSpaces
  return $ Macro name pos

lexCallableMacro :: PandocMonad m => Lexer m MdocToken
lexCallableMacro :: forall (m :: * -> *). PandocMonad m => Lexer m MdocToken
lexCallableMacro = do
  pos <- ParsecT Sources () m SourcePos
forall (m :: * -> *) s u. Monad m => ParsecT s u m SourcePos
getPosition
  q <- optionMaybe quoteChar
  name <- lexMacroName
  when (isJust q) (void quoteChar)
  eof <|> void (lookAhead (spaceChar <|> newline))
  skipSpaces
  guard $ isCallableMacro name
  return $ Macro name pos

lexDelim :: (PandocMonad m) => Lexer m MdocToken
lexDelim :: forall (m :: * -> *). PandocMonad m => Lexer m MdocToken
lexDelim = do
  pos <- ParsecT Sources () m SourcePos
forall (m :: * -> *) s u. Monad m => ParsecT s u m SourcePos
getPosition
  q <- optionMaybe quoteChar
  t <-
    Delim Open <$> oneOfStrings ["(", "["]
      <|> Delim Close <$> oneOfStrings [".", ",", ":", ";", ")", "]", "?", "!"]
      <|> Delim Middle <$> textStr "|"
  when (isJust q) (void quoteChar)
  eof <|> void (lookAhead (spaceChar <|> newline))
  skipSpaces
  return $ t pos

lexLit :: PandocMonad m => Lexer m MdocToken
lexLit :: forall (m :: * -> *). PandocMonad m => Lexer m MdocToken
lexLit = do
  pos <- ParsecT Sources () m SourcePos
forall (m :: * -> *) s u. Monad m => ParsecT s u m SourcePos
getPosition
  t <- argText <|> quotedArg
  skipSpaces
  return $ Lit t pos

lexTextLine :: PandocMonad m => Lexer m MdocTokens
lexTextLine :: forall (m :: * -> *). PandocMonad m => Lexer m MdocTokens
lexTextLine = do
  pos <- ParsecT Sources () m SourcePos
forall (m :: * -> *) s u. Monad m => ParsecT s u m SourcePos
getPosition
  guard $ sourceColumn pos == 1
  t <- mconcat <$> many anyText
  eofline
  if T.null $ T.strip t
     then return $ singleTok $ Blank pos
     else return $ singleTok $ Str t pos

lexControlLine :: PandocMonad m => Lexer m MdocTokens
lexControlLine :: forall (m :: * -> *). PandocMonad m => Lexer m MdocTokens
lexControlLine = do
  pos <- ParsecT Sources () m SourcePos
forall (m :: * -> *) s u. Monad m => ParsecT s u m SourcePos
getPosition
  guard $ sourceColumn pos == 1
  char '.'
  eofline *> mempty <|> do
    m@(Macro name _) <- lexMacro
    -- .Ns macros at the start of a line are ignored. We'd have to look behind
    -- to keep track of the "start of the line" in the parser, so we'll drop
    -- those macros in lexing.
    let start | Text
name Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"Ns" = []
              | Bool
otherwise = [MdocToken
m]
    let parsed = Text -> Bool
isParsedMacro Text
name
    (wds, e) <- manyUntil (l parsed) eofline
    return $ MdocTokens $ Seq.fromList $ start <> wds <> [e]
      where
        l :: Bool -> ParsecT Sources () m MdocToken
l Bool
True = ParsecT Sources () m MdocToken -> ParsecT Sources () m MdocToken
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try ParsecT Sources () m MdocToken
forall (m :: * -> *). PandocMonad m => Lexer m MdocToken
lexDelim ParsecT Sources () m MdocToken
-> ParsecT Sources () m MdocToken -> ParsecT Sources () m MdocToken
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> ParsecT Sources () m MdocToken -> ParsecT Sources () m MdocToken
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try ParsecT Sources () m MdocToken
forall (m :: * -> *). PandocMonad m => Lexer m MdocToken
lexCallableMacro ParsecT Sources () m MdocToken
-> ParsecT Sources () m MdocToken -> ParsecT Sources () m MdocToken
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> ParsecT Sources () m MdocToken
forall (m :: * -> *). PandocMonad m => Lexer m MdocToken
lexLit
        l Bool
False = ParsecT Sources () m MdocToken -> ParsecT Sources () m MdocToken
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try ParsecT Sources () m MdocToken
forall (m :: * -> *). PandocMonad m => Lexer m MdocToken
lexDelim ParsecT Sources () m MdocToken
-> ParsecT Sources () m MdocToken -> ParsecT Sources () m MdocToken
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> ParsecT Sources () m MdocToken
forall (m :: * -> *). PandocMonad m => Lexer m MdocToken
lexLit

-- | Tokenize a string as a sequence of mdoc tokens.
lexMdoc :: PandocMonad m => SourcePos -> T.Text -> m MdocTokens
lexMdoc :: forall (m :: * -> *).
PandocMonad m =>
SourcePos -> Text -> m MdocTokens
lexMdoc SourcePos
pos Text
txt = do
  eithertokens <- ParsecT Sources () m MdocTokens
-> () -> Text -> m (Either PandocError MdocTokens)
forall (m :: * -> *) t st a.
(Monad m, ToSources t) =>
ParsecT Sources st m a -> st -> t -> m (Either PandocError a)
readWithM (do SourcePos -> ParsecT Sources () m ()
forall (m :: * -> *) s u. Monad m => SourcePos -> ParsecT s u m ()
setPosition SourcePos
pos
                                [MdocTokens] -> MdocTokens
forall a. Monoid a => [a] -> a
mconcat ([MdocTokens] -> MdocTokens)
-> ParsecT Sources () m [MdocTokens]
-> ParsecT Sources () m MdocTokens
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ParsecT Sources () m MdocTokens
-> ParsecT Sources () m () -> ParsecT Sources () m [MdocTokens]
forall s (m :: * -> *) t u a end.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m end -> ParsecT s u m [a]
manyTill ParsecT Sources () m MdocTokens
forall (m :: * -> *). PandocMonad m => Lexer m MdocTokens
mdocToken ParsecT Sources () m ()
forall s (m :: * -> *) t u.
(Stream s m t, Show t) =>
ParsecT s u m ()
eof) ()
forall a. Default a => a
def Text
txt
  case eithertokens of
    Left PandocError
e       -> PandocError -> m MdocTokens
forall a. PandocError -> m a
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError PandocError
e
    Right MdocTokens
tokenz -> MdocTokens -> m MdocTokens
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return MdocTokens
tokenz