sparsec

simple Haskell parser combinator library
git clone git://git.rr3.xyz/sparsec
Log | Files | Refs | README | LICENSE

commit 05ba9e85076778176007539d54f84623b476e128
parent 59aa556e7760664f7e18e8a837fc1a23377a7c79
Author: Robert Russell <robert@rr3.xyz>
Date:   Sun, 14 Dec 2025 11:56:18 -0800

Reimplement core combinators in terms of new continue combinator, fix branch, and rename some stuff

Diffstat:
MSparsec.hs | 113+++++++++++++++++++++++++++----------------------------------------------------
Mexample/Main.hs | 94++++++++++++++++++++++++++++++++++++++++---------------------------------------
2 files changed, 87 insertions(+), 120 deletions(-)

diff --git a/Sparsec.hs b/Sparsec.hs @@ -53,7 +53,7 @@ module Sparsec ( save, load, fail, - notp, + not, err, catch, -- Derived combinators @@ -103,7 +103,7 @@ import Data.Foldable import Data.Functor import Numeric.Natural import Text.Printf -import Prelude hiding (fail, read) +import Prelude hiding (fail, not, read) -------------------------------------------------------------------------------- -- Char @@ -208,21 +208,21 @@ instance (Show e, Show a) => Show (Result e a) where show = \case Failure -> "failure" Error e -> "error: " ++ show e - Success a rest _ -> - if BS.null rest + Success a i _ -> + if BS.null i then printf "ok: %s" (show a) else printf "ok (%d bytes remaining): %s" - (BS.length rest) + (BS.length i) (show a) instance Functor (Result e) where fmap f = \case Failure -> Failure Error e -> Error e - Success a rest loc -> Success (f a) rest loc + Success a i l -> Success (f a) i l -------------------------------------------------------------------------------- -- Parse monad and core combinators @@ -241,121 +241,86 @@ mapParseT :: (m (Result e a) -> m' (Result e' a')) -> ParseT e m a -> ParseT e' m' a' -mapParseT f p = ParseT \input loc -> f $ runParseT p input loc +mapParseT f p = ParseT \i l -> f $ runParseT p i l type Parse e a = ParseT e Identity a runParse :: Parse e a -> ByteString -> Loc -> Result e a -runParse (ParseT f) input loc = runIdentity $ f input loc +runParse (ParseT f) i l = runIdentity $ f i l mapParse :: (Result e a -> Result e' a') -> Parse e a -> Parse e' a' mapParse f = mapParseT (Identity . f . runIdentity) --- handle generalizes the following: --- (<|>): provide a continuation for failure --- catch: provide a continuation for error --- (>>=): provide a continuation for ok --- handle let's you specify a continuation for all three cases. --- handle :: (Monad m) => ParseT e' m a' -> (e -> ParseT e' m a') -> (a -> ParseT e' m a') -> ParseT e m a -> ParseT e' m a' --- handle hf he ho p = ParseT \input loc -> --- runParseT p input loc >>= \case --- Failure -> runParseT hf input loc --- Error e -> runParseT (he e) input loc --- Success a input' loc' -> runParseT (ho a) input' loc' --- bind2 :: (Monad m) => (a -> ParseT e m a') -> ParseT e m a -> ParseT e m a' --- bind2 h = handle fail err h --- catch2 :: (Monad m) => (e -> ParseT e' m a) -> ParseT e m a -> ParseT e' m a --- catch2 h = handle fail h pure --- alt2 :: (Monad m) => ParseT e m a -> ParseT e m a -> ParseT e m a --- alt2 h = handle h err pure --- branch2 :: (Monad m) => ParseT e m a' -> (a -> ParseT e m a') -> ParseT e m a -> ParseT e m a' --- branch2 hf ho = handle hf err ho --- succeed :: (Monad m) => a -> ParseT e m a --- succeed = pure +fail :: (Monad m) => ParseT e m a +fail = ParseT \_ _ -> pure Failure + +err :: (Monad m) => e -> ParseT e m a +err e = ParseT \_ _ -> pure $ Error e + +succeed :: (Monad m) => a -> ParseT e m a +succeed a = ParseT \i l -> pure $ Success a i l + +continue :: (Monad m) => ParseT e' m a' -> (e -> ParseT e' m a') -> (a -> ParseT e' m a') -> ParseT e m a -> ParseT e' m a' +continue kf ke ks p = ParseT \i l -> + runParseT p i l >>= \case + Failure -> runParseT kf i l + Error e -> runParseT (ke e) i l + Success a i' l' -> runParseT (ks a) i' l' instance (Monad m) => Functor (ParseT e m) where fmap = liftM instance (Monad m) => Applicative (ParseT e m) where - pure a = ParseT \input loc -> pure $ Success a input loc - + pure = succeed (<*>) = ap instance (Monad m) => Monad (ParseT e m) where - p >>= k = ParseT \input loc -> - runParseT p input loc >>= \case - Failure -> pure Failure - Error e -> pure $ Error e - Success a input' loc' -> runParseT (k a) input' loc' + p >>= k = continue fail err k p instance (Monad m) => Alternative (ParseT e m) where - empty = ParseT \_ _ -> pure Failure - - p <|> q = ParseT \input loc -> - runParseT p input loc >>= \case - Failure -> runParseT q input loc - Error e -> pure $ Error e - Success a input' loc' -> pure $ Success a input' loc' + empty = fail + p <|> q = continue q err succeed p instance MonadTrans (ParseT e) where - lift m = ParseT \input loc -> (\a -> Success a input loc) <$> m + lift m = ParseT \i l -> (\a -> Success a i l) <$> m instance (MonadReader r m) => MonadReader r (ParseT e m) where ask = lift ask - - local f p = ParseT \input loc -> local f $ runParseT p input loc + local f p = ParseT \i l -> local f $ runParseT p i l instance (MonadState s m) => MonadState s (ParseT e m) where get = lift get - put = lift . put --- TODO: Separate a MTL-like MonadParse typeclass with (a subset of) the --- following methods? - -- | Read the entire input without consuming it. read :: (Monad m) => ParseT e m ByteString -read = ParseT \input loc -> pure $ Success input input loc +read = ParseT \i l -> pure $ Success i i l -- | Replace the entire input without affecting the current location. write :: (Monad m) => ByteString -> ParseT e m () -write input = ParseT \_ loc -> pure $ Success () input loc +write i = ParseT \_ l -> pure $ Success () i l getLoc :: (Monad m) => ParseT e m Loc -getLoc = ParseT \input loc -> pure $ Success loc input loc +getLoc = ParseT \i l -> pure $ Success l i l putLoc :: (Monad m) => Loc -> ParseT e m () -putLoc loc = ParseT \input _ -> pure $ Success () input loc +putLoc l = ParseT \i _ -> pure $ Success () i l -- | Save parsing state. This is effectively a combination of read and getLoc. save :: (Monad m) => ParseT e m (ByteString, Loc) -save = ParseT \input loc -> pure $ Success (input, loc) input loc +save = ParseT \i l -> pure $ Success (i, l) i l -- | Load parsing state. This is effectively a combination of write and putLoc. load :: (Monad m) => (ByteString, Loc) -> ParseT e m () -load (input, loc) = ParseT \_ _ -> pure $ Success () input loc - -fail :: (Monad m) => ParseT e m a -fail = empty +load (i, l) = ParseT \_ _ -> pure $ Success () i l -- | Turn a failure into a success and vice versa. -notp :: (Monad m) => ParseT e m a -> ParseT e m () -notp p = ParseT \input loc -> - runParseT p input loc >>= \case - Failure -> pure $ Success () input loc - Error e -> pure $ Error e - Success{} -> pure Failure - -err :: (Monad m) => e -> ParseT e m a -err e = ParseT \_ _ -> pure $ Error e +not :: (Monad m) => ParseT e m a -> ParseT e m () +not = continue (succeed ()) err (const fail) -- | Catch an error. catch :: (Monad m) => ParseT e m a -> (e -> ParseT e' m a) -> ParseT e' m a -p `catch` h = ParseT \input loc -> - runParseT p input loc >>= \case - Failure -> pure Failure - Error e -> runParseT (h e) input loc - Success a input' loc' -> pure $ Success a input' loc' +p `catch` h = continue fail h succeed p -------------------------------------------------------------------------------- -- General combinators @@ -414,7 +379,7 @@ chainr f pa pb = f <$> pa <*> chainr f pa pb <|> pb -- | `branch pc pt pf` runs pc; if it succeeds, it continues with pt, and -- otherwise it continues with pf. branch :: (Monad m) => ParseT e m a -> ParseT e m b -> ParseT e m b -> ParseT e m b -branch pc pt pf = pc *> pt <|> pf -- TODO: This is bugged. pt can backtrack into pf. +branch p kt kf = continue kf err (const kt) p choice :: (Monad m, Foldable t) => t (ParseT e m a) -> ParseT e m a choice = asum diff --git a/example/Main.hs b/example/Main.hs @@ -1,10 +1,11 @@ module Main where + import Control.Applicative import Control.Monad import Data.ByteString (ByteString) import Data.Char -import Sparsec +import Sparsec qualified as P -------------------------------------------------------------------------------- -- Surface syntax @@ -14,49 +15,49 @@ newtype TypeName = TypeName ByteString deriving (Show, Eq, Ord) newtype TermName = TermName ByteString deriving (Show, Eq, Ord) newtype StmtName = StmtName ByteString deriving (Show, Eq, Ord) -data PosNeg = Pos | Neg deriving Show -data PrdCns = Prd | Cns deriving Show +data PosNeg = Pos | Neg deriving (Show) +data PrdCns = Prd | Cns deriving (Show) -data DeclParam = DeclParam TypeName Kind deriving Show -data DeclField = DeclField PrdCns Type deriving Show -data DeclXtor = DeclXtor XtorName [DeclParam] [DeclField] deriving Show -data Decl = Decl PosNeg TypeName [DeclParam] [DeclXtor] deriving Show +data DeclParam = DeclParam TypeName Kind deriving (Show) +data DeclField = DeclField PrdCns Type deriving (Show) +data DeclXtor = DeclXtor XtorName [DeclParam] [DeclField] deriving (Show) +data Decl = Decl PosNeg TypeName [DeclParam] [DeclXtor] deriving (Show) data Kind = KindType PosNeg | KindArrow Kind Kind - deriving Show + deriving (Show) data Type = TypeVar TypeName | TypeLam TypeName (Maybe Kind) Type | TypeApp Type Type - deriving Show + deriving (Show) -data Case = Case XtorName [TermName] Stmt deriving Show +data Case = Case XtorName [TermName] Stmt deriving (Show) data Term = TermVar TermName | TermMu TermName (Maybe Type) Stmt | TermXtor XtorName [Type] [Term] | TermMatch [Case] - deriving Show + deriving (Show) data Stmt = StmtCut Term Type Term -- TODO: Should Cut be a special case of Cmd? | StmtCmd StmtName [Type] [Term] - deriving Show + deriving (Show) -data Prog = Prog [Decl] Stmt deriving Show +data Prog = Prog [Decl] Stmt deriving (Show) -------------------------------------------------------------------------------- -- Parser -data Err = ErrUtf8 Loc | Err deriving Show -- TODO +data Err = ErrUtf8 P.Loc | Err deriving (Show) -- TODO -instance Utf8Error Err where +instance P.Utf8Error Err where utf8Error = ErrUtf8 -type P a = Parse Err a +type P a = P.Parse Err a isLetterOrDigit c = isLetter c || isDigit c isWordStart c = isLetter c || c == '_' @@ -64,10 +65,10 @@ isWordCont c = isLetterOrDigit c || c == '_' || c == '\'' isKw s = s == "data" || s == "prd" || s == "cns" -pWs = charWhile isSpace +pWs = P.charWhile isSpace pToken p = p <* pWs -pSym = pToken . string -pWord = snd <$> (bytesOf $ charIf isWordStart *> charWhile isWordCont) +pSym = pToken . P.string +pWord = snd <$> (P.bytesOf $ P.charIf isWordStart *> P.charWhile isWordCont) pKw s = do s' <- pToken pWord guard (s' == s) @@ -75,21 +76,21 @@ pIdent = do x <- pToken pWord guard $ not $ isKw x pure x -pDelim l p r = pSym l *> (p <* pSym r) `cut` Err -pManySepByWithTrailer p sep = p `someSepBy` sep <* optional sep <|> pure [] +pDelim l p r = pSym l *> (p <* pSym r) `P.cut` Err +pManySepByWithTrailer p sep = p `P.someSepBy` sep <* optional sep <|> pure [] pList p = p `pManySepByWithTrailer` pSym "," -pXtorName = XtorName <$> pToken (char '#' *> pWord `cut` Err) +pXtorName = XtorName <$> pToken (P.char '#' *> pWord `P.cut` Err) pTypeName = TypeName <$> pIdent pTermName = TermName <$> pIdent -pStmtName = StmtName <$> pToken (char '@' *> pWord `cut` Err) +pStmtName = StmtName <$> pToken (P.char '@' *> pWord `P.cut` Err) pPosNeg = pSym "+" *> pure Pos <|> pSym "-" *> pure Neg pPrdCns = pKw "prd" *> pure Prd <|> pKw "cns" *> pure Cns pDeclParam :: Int -> P DeclParam pDeclParam = \case - 0 -> DeclParam <$> pTypeName <*> branch (pSym ":") (pKind 0 `cut` Err) (pure $ KindType Pos) + 0 -> DeclParam <$> pTypeName <*> P.branch (pSym ":") (pKind 0 `P.cut` Err) (pure $ KindType Pos) 1 -> DeclParam <$> pTypeName <*> pure (KindType Pos) <|> pDelim "(" (pDeclParam 0) ")" _ -> error "pDeclParam: invalid precedence" @@ -109,11 +110,11 @@ pDecl = do pKw "data" x <- pTypeName params <- many (pDeclParam 1) - posneg <- branch (pSym ":") (pPosNeg `cut` Err) (pure Pos) + posneg <- P.branch (pSym ":") (pPosNeg `P.cut` Err) (pure Pos) xtors <- pDelim "{" (pList pDeclXtor) "}" pure $ Decl posneg x params xtors -pKindArrow dom = pSym "->" *> (KindArrow dom <$> pKind 0) `cut` Err +pKindArrow dom = pSym "->" *> (KindArrow dom <$> pKind 0) `P.cut` Err pKind :: Int -> P Kind pKind = \case @@ -125,10 +126,10 @@ pKind = \case pTypeLam = do pSym "\\" - x <- pTypeName `cut` Err - mk <- optional (pSym ":" *> pKind 0 `cut` Err) - pSym "=>" `cut` Err - body <- pType 0 `cut` Err + x <- pTypeName `P.cut` Err + mk <- optional (pSym ":" *> pKind 0 `P.cut` Err) + pSym "=>" `P.cut` Err + body <- pType 0 `P.cut` Err pure $ TypeLam x mk body pTypeAtom = TypeVar <$> pTypeName @@ -136,23 +137,23 @@ pTypeAtom = TypeVar <$> pTypeName pType :: Int -> P Type pType = \case 0 -> pTypeLam <|> pType 1 - 1 -> pType 2 >>= iter \a -> TypeApp a <$> pType 2 + 1 -> pType 2 >>= P.iter \a -> TypeApp a <$> pType 2 2 -> pTypeAtom <|> pDelim "(" (pType 0) ")" _ -> error "pType: invalid precedence" pCase = do x <- pXtorName vars <- many pTermName - pSym "=>" `cut` Err - body <- pStmt `cut` Err + pSym "=>" `P.cut` Err + body <- pStmt `P.cut` Err pure $ Case x vars body pTermMu = do pSym "\\" - x <- pTermName `cut` Err - ma <- optional (pSym ":" *> pType 1 `cut` Err) - pSym "=>" `cut` Err - body <- pStmt `cut` Err + x <- pTermName `P.cut` Err + ma <- optional (pSym ":" *> pType 1 `P.cut` Err) + pSym "=>" `P.cut` Err + body <- pStmt `P.cut` Err pure $ TermMu x ma body pTermArg = pTerm 2 <|> TermXtor <$> pXtorName <*> pure [] <*> pure [] @@ -171,9 +172,10 @@ pTerm :: Int -> P Term pTerm = \case 0 -> pTermMu <|> pTerm 1 1 -> pTermXtor <|> pTerm 2 - 2 -> TermVar <$> pTermName - <|> TermMatch <$> pDelim "{" (pList pCase) "}" - <|> pDelim "(" (pTerm 0) ")" + 2 -> + TermVar <$> pTermName + <|> TermMatch <$> pDelim "{" (pList pCase) "}" + <|> pDelim "(" (pTerm 0) ")" _ -> error "pTerm: invalid precedence" pStmtCmd = do @@ -183,10 +185,10 @@ pStmtCmd = do pStmtCut = do prd <- pTerm 0 - pSym ":" `cut` Err - typ <- pType 0 `cut` Err - pSym ":" `cut` Err - cns <- pTerm 0 `cut` Err + pSym ":" `P.cut` Err + typ <- pType 0 `P.cut` Err + pSym ":" `P.cut` Err + cns <- pTerm 0 `P.cut` Err pure $ StmtCut prd typ cns pStmt = pStmtCmd <|> pStmtCut @@ -195,7 +197,7 @@ pProg :: P Prog pProg = Prog <$> many pDecl <*> pStmt pSrc :: P Prog -pSrc = pWs *> pProg <* eof +pSrc = pWs *> pProg <* P.eof -------------------------------------------------------------------------------- @@ -203,4 +205,4 @@ ex :: ByteString ex = "data Bool : + { #true, #false } data Any { #any [A] A, } #true :Bool: \\x=>@print" main :: IO () -main = print $ runParse pSrc ex locZero +main = print $ P.runParse pSrc ex P.locZero