commit 05ba9e85076778176007539d54f84623b476e128
parent 59aa556e7760664f7e18e8a837fc1a23377a7c79
Author: Robert Russell <robert@rr3.xyz>
Date: Sun, 14 Dec 2025 11:56:18 -0800
Reimplement core combinators in terms of new continue combinator, fix branch, and rename some stuff
Diffstat:
| M | Sparsec.hs | | | 113 | +++++++++++++++++++++++++++---------------------------------------------------- |
| M | example/Main.hs | | | 94 | ++++++++++++++++++++++++++++++++++++++++--------------------------------------- |
2 files changed, 87 insertions(+), 120 deletions(-)
diff --git a/Sparsec.hs b/Sparsec.hs
@@ -53,7 +53,7 @@ module Sparsec (
save,
load,
fail,
- notp,
+ not,
err,
catch,
-- Derived combinators
@@ -103,7 +103,7 @@ import Data.Foldable
import Data.Functor
import Numeric.Natural
import Text.Printf
-import Prelude hiding (fail, read)
+import Prelude hiding (fail, not, read)
--------------------------------------------------------------------------------
-- Char
@@ -208,21 +208,21 @@ instance (Show e, Show a) => Show (Result e a) where
show = \case
Failure -> "failure"
Error e -> "error: " ++ show e
- Success a rest _ ->
- if BS.null rest
+ Success a i _ ->
+ if BS.null i
then
printf "ok: %s" (show a)
else
printf
"ok (%d bytes remaining): %s"
- (BS.length rest)
+ (BS.length i)
(show a)
instance Functor (Result e) where
fmap f = \case
Failure -> Failure
Error e -> Error e
- Success a rest loc -> Success (f a) rest loc
+ Success a i l -> Success (f a) i l
--------------------------------------------------------------------------------
-- Parse monad and core combinators
@@ -241,121 +241,86 @@ mapParseT ::
(m (Result e a) -> m' (Result e' a')) ->
ParseT e m a ->
ParseT e' m' a'
-mapParseT f p = ParseT \input loc -> f $ runParseT p input loc
+mapParseT f p = ParseT \i l -> f $ runParseT p i l
type Parse e a = ParseT e Identity a
runParse :: Parse e a -> ByteString -> Loc -> Result e a
-runParse (ParseT f) input loc = runIdentity $ f input loc
+runParse (ParseT f) i l = runIdentity $ f i l
mapParse :: (Result e a -> Result e' a') -> Parse e a -> Parse e' a'
mapParse f = mapParseT (Identity . f . runIdentity)
--- handle generalizes the following:
--- (<|>): provide a continuation for failure
--- catch: provide a continuation for error
--- (>>=): provide a continuation for ok
--- handle let's you specify a continuation for all three cases.
--- handle :: (Monad m) => ParseT e' m a' -> (e -> ParseT e' m a') -> (a -> ParseT e' m a') -> ParseT e m a -> ParseT e' m a'
--- handle hf he ho p = ParseT \input loc ->
--- runParseT p input loc >>= \case
--- Failure -> runParseT hf input loc
--- Error e -> runParseT (he e) input loc
--- Success a input' loc' -> runParseT (ho a) input' loc'
--- bind2 :: (Monad m) => (a -> ParseT e m a') -> ParseT e m a -> ParseT e m a'
--- bind2 h = handle fail err h
--- catch2 :: (Monad m) => (e -> ParseT e' m a) -> ParseT e m a -> ParseT e' m a
--- catch2 h = handle fail h pure
--- alt2 :: (Monad m) => ParseT e m a -> ParseT e m a -> ParseT e m a
--- alt2 h = handle h err pure
--- branch2 :: (Monad m) => ParseT e m a' -> (a -> ParseT e m a') -> ParseT e m a -> ParseT e m a'
--- branch2 hf ho = handle hf err ho
--- succeed :: (Monad m) => a -> ParseT e m a
--- succeed = pure
+fail :: (Monad m) => ParseT e m a
+fail = ParseT \_ _ -> pure Failure
+
+err :: (Monad m) => e -> ParseT e m a
+err e = ParseT \_ _ -> pure $ Error e
+
+succeed :: (Monad m) => a -> ParseT e m a
+succeed a = ParseT \i l -> pure $ Success a i l
+
+continue :: (Monad m) => ParseT e' m a' -> (e -> ParseT e' m a') -> (a -> ParseT e' m a') -> ParseT e m a -> ParseT e' m a'
+continue kf ke ks p = ParseT \i l ->
+ runParseT p i l >>= \case
+ Failure -> runParseT kf i l
+ Error e -> runParseT (ke e) i l
+ Success a i' l' -> runParseT (ks a) i' l'
instance (Monad m) => Functor (ParseT e m) where
fmap = liftM
instance (Monad m) => Applicative (ParseT e m) where
- pure a = ParseT \input loc -> pure $ Success a input loc
-
+ pure = succeed
(<*>) = ap
instance (Monad m) => Monad (ParseT e m) where
- p >>= k = ParseT \input loc ->
- runParseT p input loc >>= \case
- Failure -> pure Failure
- Error e -> pure $ Error e
- Success a input' loc' -> runParseT (k a) input' loc'
+ p >>= k = continue fail err k p
instance (Monad m) => Alternative (ParseT e m) where
- empty = ParseT \_ _ -> pure Failure
-
- p <|> q = ParseT \input loc ->
- runParseT p input loc >>= \case
- Failure -> runParseT q input loc
- Error e -> pure $ Error e
- Success a input' loc' -> pure $ Success a input' loc'
+ empty = fail
+ p <|> q = continue q err succeed p
instance MonadTrans (ParseT e) where
- lift m = ParseT \input loc -> (\a -> Success a input loc) <$> m
+ lift m = ParseT \i l -> (\a -> Success a i l) <$> m
instance (MonadReader r m) => MonadReader r (ParseT e m) where
ask = lift ask
-
- local f p = ParseT \input loc -> local f $ runParseT p input loc
+ local f p = ParseT \i l -> local f $ runParseT p i l
instance (MonadState s m) => MonadState s (ParseT e m) where
get = lift get
-
put = lift . put
--- TODO: Separate a MTL-like MonadParse typeclass with (a subset of) the
--- following methods?
-
-- | Read the entire input without consuming it.
read :: (Monad m) => ParseT e m ByteString
-read = ParseT \input loc -> pure $ Success input input loc
+read = ParseT \i l -> pure $ Success i i l
-- | Replace the entire input without affecting the current location.
write :: (Monad m) => ByteString -> ParseT e m ()
-write input = ParseT \_ loc -> pure $ Success () input loc
+write i = ParseT \_ l -> pure $ Success () i l
getLoc :: (Monad m) => ParseT e m Loc
-getLoc = ParseT \input loc -> pure $ Success loc input loc
+getLoc = ParseT \i l -> pure $ Success l i l
putLoc :: (Monad m) => Loc -> ParseT e m ()
-putLoc loc = ParseT \input _ -> pure $ Success () input loc
+putLoc l = ParseT \i _ -> pure $ Success () i l
-- | Save parsing state. This is effectively a combination of read and getLoc.
save :: (Monad m) => ParseT e m (ByteString, Loc)
-save = ParseT \input loc -> pure $ Success (input, loc) input loc
+save = ParseT \i l -> pure $ Success (i, l) i l
-- | Load parsing state. This is effectively a combination of write and putLoc.
load :: (Monad m) => (ByteString, Loc) -> ParseT e m ()
-load (input, loc) = ParseT \_ _ -> pure $ Success () input loc
-
-fail :: (Monad m) => ParseT e m a
-fail = empty
+load (i, l) = ParseT \_ _ -> pure $ Success () i l
-- | Turn a failure into a success and vice versa.
-notp :: (Monad m) => ParseT e m a -> ParseT e m ()
-notp p = ParseT \input loc ->
- runParseT p input loc >>= \case
- Failure -> pure $ Success () input loc
- Error e -> pure $ Error e
- Success{} -> pure Failure
-
-err :: (Monad m) => e -> ParseT e m a
-err e = ParseT \_ _ -> pure $ Error e
+not :: (Monad m) => ParseT e m a -> ParseT e m ()
+not = continue (succeed ()) err (const fail)
-- | Catch an error.
catch :: (Monad m) => ParseT e m a -> (e -> ParseT e' m a) -> ParseT e' m a
-p `catch` h = ParseT \input loc ->
- runParseT p input loc >>= \case
- Failure -> pure Failure
- Error e -> runParseT (h e) input loc
- Success a input' loc' -> pure $ Success a input' loc'
+p `catch` h = continue fail h succeed p
--------------------------------------------------------------------------------
-- General combinators
@@ -414,7 +379,7 @@ chainr f pa pb = f <$> pa <*> chainr f pa pb <|> pb
-- | `branch pc pt pf` runs pc; if it succeeds, it continues with pt, and
-- otherwise it continues with pf.
branch :: (Monad m) => ParseT e m a -> ParseT e m b -> ParseT e m b -> ParseT e m b
-branch pc pt pf = pc *> pt <|> pf -- TODO: This is bugged. pt can backtrack into pf.
+branch p kt kf = continue kf err (const kt) p
choice :: (Monad m, Foldable t) => t (ParseT e m a) -> ParseT e m a
choice = asum
diff --git a/example/Main.hs b/example/Main.hs
@@ -1,10 +1,11 @@
module Main where
+
import Control.Applicative
import Control.Monad
import Data.ByteString (ByteString)
import Data.Char
-import Sparsec
+import Sparsec qualified as P
--------------------------------------------------------------------------------
-- Surface syntax
@@ -14,49 +15,49 @@ newtype TypeName = TypeName ByteString deriving (Show, Eq, Ord)
newtype TermName = TermName ByteString deriving (Show, Eq, Ord)
newtype StmtName = StmtName ByteString deriving (Show, Eq, Ord)
-data PosNeg = Pos | Neg deriving Show
-data PrdCns = Prd | Cns deriving Show
+data PosNeg = Pos | Neg deriving (Show)
+data PrdCns = Prd | Cns deriving (Show)
-data DeclParam = DeclParam TypeName Kind deriving Show
-data DeclField = DeclField PrdCns Type deriving Show
-data DeclXtor = DeclXtor XtorName [DeclParam] [DeclField] deriving Show
-data Decl = Decl PosNeg TypeName [DeclParam] [DeclXtor] deriving Show
+data DeclParam = DeclParam TypeName Kind deriving (Show)
+data DeclField = DeclField PrdCns Type deriving (Show)
+data DeclXtor = DeclXtor XtorName [DeclParam] [DeclField] deriving (Show)
+data Decl = Decl PosNeg TypeName [DeclParam] [DeclXtor] deriving (Show)
data Kind
= KindType PosNeg
| KindArrow Kind Kind
- deriving Show
+ deriving (Show)
data Type
= TypeVar TypeName
| TypeLam TypeName (Maybe Kind) Type
| TypeApp Type Type
- deriving Show
+ deriving (Show)
-data Case = Case XtorName [TermName] Stmt deriving Show
+data Case = Case XtorName [TermName] Stmt deriving (Show)
data Term
= TermVar TermName
| TermMu TermName (Maybe Type) Stmt
| TermXtor XtorName [Type] [Term]
| TermMatch [Case]
- deriving Show
+ deriving (Show)
data Stmt
= StmtCut Term Type Term -- TODO: Should Cut be a special case of Cmd?
| StmtCmd StmtName [Type] [Term]
- deriving Show
+ deriving (Show)
-data Prog = Prog [Decl] Stmt deriving Show
+data Prog = Prog [Decl] Stmt deriving (Show)
--------------------------------------------------------------------------------
-- Parser
-data Err = ErrUtf8 Loc | Err deriving Show -- TODO
+data Err = ErrUtf8 P.Loc | Err deriving (Show) -- TODO
-instance Utf8Error Err where
+instance P.Utf8Error Err where
utf8Error = ErrUtf8
-type P a = Parse Err a
+type P a = P.Parse Err a
isLetterOrDigit c = isLetter c || isDigit c
isWordStart c = isLetter c || c == '_'
@@ -64,10 +65,10 @@ isWordCont c = isLetterOrDigit c || c == '_' || c == '\''
isKw s = s == "data" || s == "prd" || s == "cns"
-pWs = charWhile isSpace
+pWs = P.charWhile isSpace
pToken p = p <* pWs
-pSym = pToken . string
-pWord = snd <$> (bytesOf $ charIf isWordStart *> charWhile isWordCont)
+pSym = pToken . P.string
+pWord = snd <$> (P.bytesOf $ P.charIf isWordStart *> P.charWhile isWordCont)
pKw s = do
s' <- pToken pWord
guard (s' == s)
@@ -75,21 +76,21 @@ pIdent = do
x <- pToken pWord
guard $ not $ isKw x
pure x
-pDelim l p r = pSym l *> (p <* pSym r) `cut` Err
-pManySepByWithTrailer p sep = p `someSepBy` sep <* optional sep <|> pure []
+pDelim l p r = pSym l *> (p <* pSym r) `P.cut` Err
+pManySepByWithTrailer p sep = p `P.someSepBy` sep <* optional sep <|> pure []
pList p = p `pManySepByWithTrailer` pSym ","
-pXtorName = XtorName <$> pToken (char '#' *> pWord `cut` Err)
+pXtorName = XtorName <$> pToken (P.char '#' *> pWord `P.cut` Err)
pTypeName = TypeName <$> pIdent
pTermName = TermName <$> pIdent
-pStmtName = StmtName <$> pToken (char '@' *> pWord `cut` Err)
+pStmtName = StmtName <$> pToken (P.char '@' *> pWord `P.cut` Err)
pPosNeg = pSym "+" *> pure Pos <|> pSym "-" *> pure Neg
pPrdCns = pKw "prd" *> pure Prd <|> pKw "cns" *> pure Cns
pDeclParam :: Int -> P DeclParam
pDeclParam = \case
- 0 -> DeclParam <$> pTypeName <*> branch (pSym ":") (pKind 0 `cut` Err) (pure $ KindType Pos)
+ 0 -> DeclParam <$> pTypeName <*> P.branch (pSym ":") (pKind 0 `P.cut` Err) (pure $ KindType Pos)
1 -> DeclParam <$> pTypeName <*> pure (KindType Pos) <|> pDelim "(" (pDeclParam 0) ")"
_ -> error "pDeclParam: invalid precedence"
@@ -109,11 +110,11 @@ pDecl = do
pKw "data"
x <- pTypeName
params <- many (pDeclParam 1)
- posneg <- branch (pSym ":") (pPosNeg `cut` Err) (pure Pos)
+ posneg <- P.branch (pSym ":") (pPosNeg `P.cut` Err) (pure Pos)
xtors <- pDelim "{" (pList pDeclXtor) "}"
pure $ Decl posneg x params xtors
-pKindArrow dom = pSym "->" *> (KindArrow dom <$> pKind 0) `cut` Err
+pKindArrow dom = pSym "->" *> (KindArrow dom <$> pKind 0) `P.cut` Err
pKind :: Int -> P Kind
pKind = \case
@@ -125,10 +126,10 @@ pKind = \case
pTypeLam = do
pSym "\\"
- x <- pTypeName `cut` Err
- mk <- optional (pSym ":" *> pKind 0 `cut` Err)
- pSym "=>" `cut` Err
- body <- pType 0 `cut` Err
+ x <- pTypeName `P.cut` Err
+ mk <- optional (pSym ":" *> pKind 0 `P.cut` Err)
+ pSym "=>" `P.cut` Err
+ body <- pType 0 `P.cut` Err
pure $ TypeLam x mk body
pTypeAtom = TypeVar <$> pTypeName
@@ -136,23 +137,23 @@ pTypeAtom = TypeVar <$> pTypeName
pType :: Int -> P Type
pType = \case
0 -> pTypeLam <|> pType 1
- 1 -> pType 2 >>= iter \a -> TypeApp a <$> pType 2
+ 1 -> pType 2 >>= P.iter \a -> TypeApp a <$> pType 2
2 -> pTypeAtom <|> pDelim "(" (pType 0) ")"
_ -> error "pType: invalid precedence"
pCase = do
x <- pXtorName
vars <- many pTermName
- pSym "=>" `cut` Err
- body <- pStmt `cut` Err
+ pSym "=>" `P.cut` Err
+ body <- pStmt `P.cut` Err
pure $ Case x vars body
pTermMu = do
pSym "\\"
- x <- pTermName `cut` Err
- ma <- optional (pSym ":" *> pType 1 `cut` Err)
- pSym "=>" `cut` Err
- body <- pStmt `cut` Err
+ x <- pTermName `P.cut` Err
+ ma <- optional (pSym ":" *> pType 1 `P.cut` Err)
+ pSym "=>" `P.cut` Err
+ body <- pStmt `P.cut` Err
pure $ TermMu x ma body
pTermArg = pTerm 2 <|> TermXtor <$> pXtorName <*> pure [] <*> pure []
@@ -171,9 +172,10 @@ pTerm :: Int -> P Term
pTerm = \case
0 -> pTermMu <|> pTerm 1
1 -> pTermXtor <|> pTerm 2
- 2 -> TermVar <$> pTermName
- <|> TermMatch <$> pDelim "{" (pList pCase) "}"
- <|> pDelim "(" (pTerm 0) ")"
+ 2 ->
+ TermVar <$> pTermName
+ <|> TermMatch <$> pDelim "{" (pList pCase) "}"
+ <|> pDelim "(" (pTerm 0) ")"
_ -> error "pTerm: invalid precedence"
pStmtCmd = do
@@ -183,10 +185,10 @@ pStmtCmd = do
pStmtCut = do
prd <- pTerm 0
- pSym ":" `cut` Err
- typ <- pType 0 `cut` Err
- pSym ":" `cut` Err
- cns <- pTerm 0 `cut` Err
+ pSym ":" `P.cut` Err
+ typ <- pType 0 `P.cut` Err
+ pSym ":" `P.cut` Err
+ cns <- pTerm 0 `P.cut` Err
pure $ StmtCut prd typ cns
pStmt = pStmtCmd <|> pStmtCut
@@ -195,7 +197,7 @@ pProg :: P Prog
pProg = Prog <$> many pDecl <*> pStmt
pSrc :: P Prog
-pSrc = pWs *> pProg <* eof
+pSrc = pWs *> pProg <* P.eof
--------------------------------------------------------------------------------
@@ -203,4 +205,4 @@ ex :: ByteString
ex = "data Bool : + { #true, #false } data Any { #any [A] A, } #true :Bool: \\x=>@print"
main :: IO ()
-main = print $ runParse pSrc ex locZero
+main = print $ P.runParse pSrc ex P.locZero