sparsec

simple Haskell parser combinator library
git clone git://git.rr3.xyz/sparsec
Log | Files | Refs | README | LICENSE

commit 82860641fa0a8b7b36ac23669c91568f531b9b8f
parent c70b98f89abeafebb14dcaa31e86519f3c9d6973
Author: Robert Russell <robert@rr3.xyz>
Date:   Sun, 21 Dec 2025 22:14:35 -0800

Inline everything

This might not be a good idea, but I'm currently benchmarking
against some other parsec libraries, and I want to see the
effect.

Diffstat:
Msrc/Sparsec.hs | 39+++++++++++++++++++++++++++++++++++++++
Msrc/Sparsec/Char.hs | 42++++++++++++++++++++++++++++++++++++++----
Msrc/Sparsec/Loc.hs | 13++++++++++++-
Msrc/Sparsec/Span.hs | 8+++++++-
4 files changed, 96 insertions(+), 6 deletions(-)

diff --git a/src/Sparsec.hs b/src/Sparsec.hs @@ -92,12 +92,15 @@ pattern State# i l = State_# (# i, l #) stateInput# :: State# -> ByteString stateInput# (State# i _) = i +{-# INLINE stateInput# #-} stateLoc# :: State# -> Loc# stateLoc# (State# _ l) = l +{-# INLINE stateLoc# #-} stateEq# :: State# -> State# -> Bool stateEq# (State# i0 l0) (State# i1 l1) = i0 == i1 && l0 `locEq#` l1 +{-# INLINE stateEq# #-} data State = StateBox {stateUnbox :: State#} @@ -157,9 +160,11 @@ newtype ParseT e m a = ParseT (State# -> m (Result# e a)) runParseT# :: (Monad m) => ParseT e m a -> State# -> m (Result# e a) runParseT# (ParseT f) = f +{-# INLINE runParseT# #-} runParseT :: (Monad m) => ParseT e m a -> ByteString -> Loc -> m (Result# e a) runParseT p i l = p `runParseT#` State# i (locUnbox l) +{-# INLINE runParseT #-} mapParseT :: (Monad m, Monad m') => @@ -167,6 +172,7 @@ mapParseT :: ParseT e m a -> ParseT e' m' a' mapParseT f p = ParseT \s -> f $ p `runParseT#` s +{-# INLINE mapParseT #-} -------------------------------------------------------------------------------- -- Parse @@ -175,24 +181,30 @@ type Parse e a = ParseT e Identity a runParse# :: Parse e a -> State# -> Result# e a runParse# p s = runIdentity $ p `runParseT#` s +{-# INLINE runParse# #-} runParse :: Parse e a -> ByteString -> Loc -> Result# e a runParse p i l = p `runParse#` State# i (locUnbox l) +{-# INLINE runParse #-} mapParse :: (Result# e a -> Result# e' a') -> Parse e a -> Parse e' a' mapParse f = mapParseT (Identity . f . runIdentity) +{-# INLINE mapParse #-} -------------------------------------------------------------------------------- -- Core combinators fail :: (Monad m) => ParseT e m a fail = ParseT \_ -> pure Failure# +{-# INLINE fail #-} err :: (Monad m) => e -> ParseT e m a err e = ParseT \_ -> pure $ Error# e +{-# INLINE err #-} succeed :: (Monad m) => a -> ParseT e m a succeed a = ParseT \s -> pure $ Success# a s +{-# INLINE succeed #-} continue :: (Monad m) => ParseT f m b -> (e -> ParseT f m b) -> (a -> ParseT f m b) -> ParseT e m a -> ParseT f m b continue kf ke ks p = ParseT \s -> @@ -201,12 +213,15 @@ continue kf ke ks p = ParseT \s -> Failure# -> kf `runParseT#` s Error# e -> ke e `runParseT#` s Success# a s' -> ks a `runParseT#` s' +{-# INLINE continue #-} get# :: (Monad m) => (State# -> ParseT e m a) -> ParseT e m a get# f = ParseT \s -> f s `runParseT#` s +{-# INLINE get# #-} put# :: (Monad m) => State# -> ParseT e m a -> ParseT e m a put# s p = ParseT \_ -> p `runParseT#` s +{-# INLINE put# #-} -- | Read the entire input without consuming it. -- getInput :: (Monad m) => ParseT e m ByteString @@ -275,27 +290,33 @@ instance (MonadState s m) => MonadState s (ParseT e m) where -- | Turn a failure into a success and vice versa. not :: (Monad m) => ParseT e m a -> ParseT e m () not = continue (succeed ()) err (const fail) +{-# INLINE not #-} -- | Convert an error into a failure. try :: (Monad m) => ParseT e m a -> ParseT e m a try = continue fail (const fail) succeed +{-# INLINE try #-} -- | Convert a failure into an error. cut :: (Monad m) => ParseT e m a -> e -> ParseT e m a p `cut` e = continue (err e) err succeed p +{-# INLINE cut #-} -- | Catch an error. catch :: (Monad m) => ParseT e m a -> (e -> ParseT f m a) -> ParseT f m a p `catch` h = continue fail h succeed p +{-# INLINE catch #-} -- | `branch p ks kf` runs p; if it succeeds, it continues with ks; if it fails, -- it continues with kf. branch :: (Monad m) => ParseT e m a -> (a -> ParseT e m b) -> ParseT e m b -> ParseT e m b branch p ks kf = continue kf err ks p +{-# INLINE branch #-} -- | Run a parser without changing the parser state. lookahead :: (Monad m) => ParseT e m a -> ParseT e m a lookahead p = get# \s -> p <* put# s (pure ()) +{-# INLINE lookahead #-} withSpan :: (Monad m) => (a -> Span# -> ParseT e m b) -> ParseT e m a -> ParseT e m b withSpan k p = @@ -303,6 +324,7 @@ withSpan k p = p >>= \a -> get# \(State# _ end) -> k a (Span# beg end) +{-# INLINE withSpan #-} withConsumed :: (Monad m) => (a -> ByteString -> ParseT e m b) -> ParseT e m a -> ParseT e m b withConsumed k p = @@ -310,37 +332,46 @@ withConsumed k p = p >>= \a -> get# \(State# _ (Loc# b1 _ _)) -> k a (BS.take (I# (b1 -# b0)) i) +{-# INLINE withConsumed #-} -- | Iterate a parsing function until it fails. I.e., -- @iter f a = pure a >>= f >>= f >>= ...@ -- where the chain of binds is as long as possible without failure. iter :: (Monad m) => (a -> ParseT e m a) -> a -> ParseT e m a iter f a = (f a >>= iter f) <|> pure a +{-# INLINE iter #-} -- | Parse a `b` and then zero or more `a`s, and then combine in a left-nested -- fashion. chainl :: (Monad m) => (b -> a -> b) -> ParseT e m b -> ParseT e m a -> ParseT e m b chainl f pb pa = pb >>= iter \b -> f b <$> pa +{-# INLINE chainl #-} -- | Parse zero or more `a`s (greedily) and then a `b`, and then combine in a -- right-nested fashion. chainr :: (Monad m) => (a -> b -> b) -> ParseT e m a -> ParseT e m b -> ParseT e m b chainr f pa pb = f <$> pa <*> chainr f pa pb <|> pb +{-# INLINE chainr #-} many_ :: (Monad m) => ParseT e m a -> ParseT e m () many_ p = some_ p <|> pure () +{-# INLINE many_ #-} some_ :: (Monad m) => ParseT e m a -> ParseT e m () some_ p = p *> many_ p +{-# INLINE some_ #-} manySepBy :: (Monad m) => ParseT e m a -> ParseT e m b -> ParseT e m [a] manySepBy p sep = someSepBy p sep <|> pure [] +{-# INLINE manySepBy #-} someSepBy :: (Monad m) => ParseT e m a -> ParseT e m b -> ParseT e m [a] someSepBy p sep = (:) <$> p <*> many (sep *> p) +{-# INLINE someSepBy #-} choose :: (Monad m, Foldable t) => t (ParseT e m a) -> ParseT e m a choose = asum +{-# INLINE choose #-} -- | `match scrut cases` binds scrut to the first non-failing case and returns -- the result. @@ -350,6 +381,7 @@ match :: t (a -> ParseT e m b) -> ParseT e m b match scrut cases = choose $ (scrut >>=) <$> cases +{-# INLINE match #-} -------------------------------------------------------------------------------- -- Utf8 combinators @@ -364,21 +396,27 @@ next ke kc = ParseT \s@(State# i l) -> | c == '\xFFFD' -> pure $ Utf8Error# l | otherwise -> kc c `runParseT#` State# (BS.drop w i) (locAdvance# c w l) Nothing -> ke `runParseT#` s +{-# INLINE next #-} eof :: (Monad m) => ParseT e m () eof = get# \(State# i _) -> guard (BS.null i) +{-# INLINE eof #-} anyChar :: (Monad m) => ParseT e m Char anyChar = next fail succeed +{-# INLINE anyChar #-} charIf :: (Monad m) => (Char -> Bool) -> ParseT e m Char charIf want = next fail (\c -> guard (want c) $> c) +{-# INLINE charIf #-} char :: (Monad m) => Char -> ParseT e m () char c = void $ charIf (== c) +{-# INLINE char #-} string :: (Monad m) => String -> ParseT e m () string = traverse_ char +{-# INLINE string #-} natural :: (Monad m) => Int -> ParseT e m Natural natural = \case @@ -409,3 +447,4 @@ natural = \case underscores = many (char '_') in numeral >>= iter (\n -> (nat b * n +) <$> (underscores *> numeral)) _ -> error "natural: invalid base" +{-# INLINE natural #-} diff --git a/src/Sparsec/Char.hs b/src/Sparsec/Char.hs @@ -15,24 +15,58 @@ module Sparsec.Char ( charIsNum16, ) where -charIsUpperLatin, charIsLowerLatin, charIsLatin :: Char -> Bool +charIsUpperLatin :: Char -> Bool charIsUpperLatin c = 'A' <= c && c <= 'Z' +{-# INLINE charIsUpperLatin #-} + +charIsLowerLatin :: Char -> Bool charIsLowerLatin c = 'a' <= c && c <= 'z' +{-# INLINE charIsLowerLatin #-} + +charIsLatin :: Char -> Bool charIsLatin c = charIsUpperLatin c || charIsLowerLatin c +{-# INLINE charIsLatin #-} -charIsUpperGreek, charIsLowerGreek, charIsGreek :: Char -> Bool +charIsUpperGreek :: Char -> Bool charIsUpperGreek c = 'Α' <= c && c <= 'Ω' +{-# INLINE charIsUpperGreek #-} + +charIsLowerGreek :: Char -> Bool charIsLowerGreek c = 'α' <= c && c <= 'ω' +{-# INLINE charIsLowerGreek #-} + +charIsGreek :: Char -> Bool charIsGreek c = charIsUpperGreek c || charIsLowerGreek c +{-# INLINE charIsGreek #-} -charIsSafeUpperGreek, charIsSafeLowerGreek, charIsSafeGreek :: Char -> Bool +charIsSafeUpperGreek :: Char -> Bool charIsSafeUpperGreek c = c == 'Γ' || c == 'Δ' || c == 'Θ' || c == 'Λ' || c == 'Ξ' || c == 'Π' || c == 'Σ' || c == 'Φ' || c == 'Ψ' || c == 'Ω' +{-# INLINE charIsSafeUpperGreek #-} + +charIsSafeLowerGreek :: Char -> Bool charIsSafeLowerGreek c = charIsLowerGreek c && c /= 'ο' +{-# INLINE charIsSafeLowerGreek #-} + +charIsSafeGreek :: Char -> Bool charIsSafeGreek c = charIsSafeUpperGreek c || charIsSafeLowerGreek c +{-# INLINE charIsSafeGreek #-} -charIsNum2, charIsNum4, charIsNum8, charIsNum10, charIsNum16 :: Char -> Bool +charIsNum2 :: Char -> Bool charIsNum2 c = '0' <= c && c <= '1' +{-# INLINE charIsNum2 #-} + +charIsNum4 :: Char -> Bool charIsNum4 c = '0' <= c && c <= '3' +{-# INLINE charIsNum4 #-} + +charIsNum8 :: Char -> Bool charIsNum8 c = '0' <= c && c <= '7' +{-# INLINE charIsNum8 #-} + +charIsNum10 :: Char -> Bool charIsNum10 c = '0' <= c && c <= '9' +{-# INLINE charIsNum10 #-} + +charIsNum16 :: Char -> Bool charIsNum16 c = '0' <= c && c <= '9' || 'A' <= c && c <= 'F' || 'a' <= c && c <= 'f' +{-# INLINE charIsNum16 #-} diff --git a/src/Sparsec/Loc.hs b/src/Sparsec/Loc.hs @@ -25,22 +25,32 @@ pattern Loc# :: Int# -> Int# -> Int# -> Loc# pattern Loc# b r c = Loc_# (# b, r, c #) {-# COMPLETE Loc# #-} -locByte#, locRow#, locCol# :: Loc# -> Int# +locByte# :: Loc# -> Int# locByte# (Loc# b _ _) = b +{-# INLINE locByte# #-} + +locRow# :: Loc# -> Int# locRow# (Loc# _ r _) = r +{-# INLINE locRow# #-} + +locCol# :: Loc# -> Int# locCol# (Loc# _ _ c) = c +{-# INLINE locCol# #-} locEq# :: Loc# -> Loc# -> Bool locEq# (Loc# b0 r0 c0) (Loc# b1 r1 c1) = b0 `intEq#` b1 && r0 `intEq#` r1 && c0 `intEq#` c1 +{-# INLINE locEq# #-} locShow# :: Loc# -> String locShow# (Loc# b r c) = show (I# b) ++ "@" ++ show (I# r) ++ ":" ++ show (I# c) +{-# INLINE locShow# #-} locAdvance# :: Char -> Int -> Loc# -> Loc# locAdvance# x (I# w) (Loc# b r c) = case x of '\n' -> Loc# (b +# w) (r +# 1#) 0# _ -> Loc# (b +# w) r (c +# 1#) +{-# INLINE locAdvance# #-} -- | Loc is a source code location, tracking byte offset, row (number of '\n' -- characters seen so far), and column (number of characters into the current @@ -54,6 +64,7 @@ pattern Loc b r c = LocBox (Loc# b r c) locZero :: Loc locZero = Loc 0# 0# 0# +{-# INLINE locZero #-} instance Eq Loc where LocBox l0 == LocBox l1 = l0 `locEq#` l1 diff --git a/src/Sparsec/Span.hs b/src/Sparsec/Span.hs @@ -17,16 +17,22 @@ pattern Span# :: Loc# -> Loc# -> Span# pattern Span# beg end = Span_# (# beg, end #) {-# COMPLETE Span# #-} -spanBeg#, spanEnd# :: Span# -> Loc# +spanBeg# :: Span# -> Loc# spanBeg# (Span# beg _) = beg +{-# INLINE spanBeg# #-} + +spanEnd# :: Span# -> Loc# spanEnd# (Span# _ end) = end +{-# INLINE spanEnd# #-} spanEq# :: Span# -> Span# -> Bool spanEq# (Span# beg0 end0) (Span# beg1 end1) = beg0 `locEq#` beg1 && end0 `locEq#` end1 +{-# INLINE spanEq# #-} spanShow# :: Span# -> String spanShow# (Span# beg end) = locShow# beg ++ "--" ++ locShow# end +{-# INLINE spanShow# #-} -- | Span is a pair of Loc's specifying a range in the input. The beginning/left -- Loc is inclusive and the ending/right Loc is exclusive.