commit 82860641fa0a8b7b36ac23669c91568f531b9b8f
parent c70b98f89abeafebb14dcaa31e86519f3c9d6973
Author: Robert Russell <robert@rr3.xyz>
Date: Sun, 21 Dec 2025 22:14:35 -0800
Inline everything
This might not be a good idea, but I'm currently benchmarking
against some other parsec libraries, and I want to see the
effect.
Diffstat:
4 files changed, 96 insertions(+), 6 deletions(-)
diff --git a/src/Sparsec.hs b/src/Sparsec.hs
@@ -92,12 +92,15 @@ pattern State# i l = State_# (# i, l #)
stateInput# :: State# -> ByteString
stateInput# (State# i _) = i
+{-# INLINE stateInput# #-}
stateLoc# :: State# -> Loc#
stateLoc# (State# _ l) = l
+{-# INLINE stateLoc# #-}
stateEq# :: State# -> State# -> Bool
stateEq# (State# i0 l0) (State# i1 l1) = i0 == i1 && l0 `locEq#` l1
+{-# INLINE stateEq# #-}
data State = StateBox {stateUnbox :: State#}
@@ -157,9 +160,11 @@ newtype ParseT e m a = ParseT (State# -> m (Result# e a))
runParseT# :: (Monad m) => ParseT e m a -> State# -> m (Result# e a)
runParseT# (ParseT f) = f
+{-# INLINE runParseT# #-}
runParseT :: (Monad m) => ParseT e m a -> ByteString -> Loc -> m (Result# e a)
runParseT p i l = p `runParseT#` State# i (locUnbox l)
+{-# INLINE runParseT #-}
mapParseT ::
(Monad m, Monad m') =>
@@ -167,6 +172,7 @@ mapParseT ::
ParseT e m a ->
ParseT e' m' a'
mapParseT f p = ParseT \s -> f $ p `runParseT#` s
+{-# INLINE mapParseT #-}
--------------------------------------------------------------------------------
-- Parse
@@ -175,24 +181,30 @@ type Parse e a = ParseT e Identity a
runParse# :: Parse e a -> State# -> Result# e a
runParse# p s = runIdentity $ p `runParseT#` s
+{-# INLINE runParse# #-}
runParse :: Parse e a -> ByteString -> Loc -> Result# e a
runParse p i l = p `runParse#` State# i (locUnbox l)
+{-# INLINE runParse #-}
mapParse :: (Result# e a -> Result# e' a') -> Parse e a -> Parse e' a'
mapParse f = mapParseT (Identity . f . runIdentity)
+{-# INLINE mapParse #-}
--------------------------------------------------------------------------------
-- Core combinators
fail :: (Monad m) => ParseT e m a
fail = ParseT \_ -> pure Failure#
+{-# INLINE fail #-}
err :: (Monad m) => e -> ParseT e m a
err e = ParseT \_ -> pure $ Error# e
+{-# INLINE err #-}
succeed :: (Monad m) => a -> ParseT e m a
succeed a = ParseT \s -> pure $ Success# a s
+{-# INLINE succeed #-}
continue :: (Monad m) => ParseT f m b -> (e -> ParseT f m b) -> (a -> ParseT f m b) -> ParseT e m a -> ParseT f m b
continue kf ke ks p = ParseT \s ->
@@ -201,12 +213,15 @@ continue kf ke ks p = ParseT \s ->
Failure# -> kf `runParseT#` s
Error# e -> ke e `runParseT#` s
Success# a s' -> ks a `runParseT#` s'
+{-# INLINE continue #-}
get# :: (Monad m) => (State# -> ParseT e m a) -> ParseT e m a
get# f = ParseT \s -> f s `runParseT#` s
+{-# INLINE get# #-}
put# :: (Monad m) => State# -> ParseT e m a -> ParseT e m a
put# s p = ParseT \_ -> p `runParseT#` s
+{-# INLINE put# #-}
-- | Read the entire input without consuming it.
-- getInput :: (Monad m) => ParseT e m ByteString
@@ -275,27 +290,33 @@ instance (MonadState s m) => MonadState s (ParseT e m) where
-- | Turn a failure into a success and vice versa.
not :: (Monad m) => ParseT e m a -> ParseT e m ()
not = continue (succeed ()) err (const fail)
+{-# INLINE not #-}
-- | Convert an error into a failure.
try :: (Monad m) => ParseT e m a -> ParseT e m a
try = continue fail (const fail) succeed
+{-# INLINE try #-}
-- | Convert a failure into an error.
cut :: (Monad m) => ParseT e m a -> e -> ParseT e m a
p `cut` e = continue (err e) err succeed p
+{-# INLINE cut #-}
-- | Catch an error.
catch :: (Monad m) => ParseT e m a -> (e -> ParseT f m a) -> ParseT f m a
p `catch` h = continue fail h succeed p
+{-# INLINE catch #-}
-- | `branch p ks kf` runs p; if it succeeds, it continues with ks; if it fails,
-- it continues with kf.
branch :: (Monad m) => ParseT e m a -> (a -> ParseT e m b) -> ParseT e m b -> ParseT e m b
branch p ks kf = continue kf err ks p
+{-# INLINE branch #-}
-- | Run a parser without changing the parser state.
lookahead :: (Monad m) => ParseT e m a -> ParseT e m a
lookahead p = get# \s -> p <* put# s (pure ())
+{-# INLINE lookahead #-}
withSpan :: (Monad m) => (a -> Span# -> ParseT e m b) -> ParseT e m a -> ParseT e m b
withSpan k p =
@@ -303,6 +324,7 @@ withSpan k p =
p >>= \a ->
get# \(State# _ end) ->
k a (Span# beg end)
+{-# INLINE withSpan #-}
withConsumed :: (Monad m) => (a -> ByteString -> ParseT e m b) -> ParseT e m a -> ParseT e m b
withConsumed k p =
@@ -310,37 +332,46 @@ withConsumed k p =
p >>= \a ->
get# \(State# _ (Loc# b1 _ _)) ->
k a (BS.take (I# (b1 -# b0)) i)
+{-# INLINE withConsumed #-}
-- | Iterate a parsing function until it fails. I.e.,
-- @iter f a = pure a >>= f >>= f >>= ...@
-- where the chain of binds is as long as possible without failure.
iter :: (Monad m) => (a -> ParseT e m a) -> a -> ParseT e m a
iter f a = (f a >>= iter f) <|> pure a
+{-# INLINE iter #-}
-- | Parse a `b` and then zero or more `a`s, and then combine in a left-nested
-- fashion.
chainl :: (Monad m) => (b -> a -> b) -> ParseT e m b -> ParseT e m a -> ParseT e m b
chainl f pb pa = pb >>= iter \b -> f b <$> pa
+{-# INLINE chainl #-}
-- | Parse zero or more `a`s (greedily) and then a `b`, and then combine in a
-- right-nested fashion.
chainr :: (Monad m) => (a -> b -> b) -> ParseT e m a -> ParseT e m b -> ParseT e m b
chainr f pa pb = f <$> pa <*> chainr f pa pb <|> pb
+{-# INLINE chainr #-}
many_ :: (Monad m) => ParseT e m a -> ParseT e m ()
many_ p = some_ p <|> pure ()
+{-# INLINE many_ #-}
some_ :: (Monad m) => ParseT e m a -> ParseT e m ()
some_ p = p *> many_ p
+{-# INLINE some_ #-}
manySepBy :: (Monad m) => ParseT e m a -> ParseT e m b -> ParseT e m [a]
manySepBy p sep = someSepBy p sep <|> pure []
+{-# INLINE manySepBy #-}
someSepBy :: (Monad m) => ParseT e m a -> ParseT e m b -> ParseT e m [a]
someSepBy p sep = (:) <$> p <*> many (sep *> p)
+{-# INLINE someSepBy #-}
choose :: (Monad m, Foldable t) => t (ParseT e m a) -> ParseT e m a
choose = asum
+{-# INLINE choose #-}
-- | `match scrut cases` binds scrut to the first non-failing case and returns
-- the result.
@@ -350,6 +381,7 @@ match ::
t (a -> ParseT e m b) ->
ParseT e m b
match scrut cases = choose $ (scrut >>=) <$> cases
+{-# INLINE match #-}
--------------------------------------------------------------------------------
-- Utf8 combinators
@@ -364,21 +396,27 @@ next ke kc = ParseT \s@(State# i l) ->
| c == '\xFFFD' -> pure $ Utf8Error# l
| otherwise -> kc c `runParseT#` State# (BS.drop w i) (locAdvance# c w l)
Nothing -> ke `runParseT#` s
+{-# INLINE next #-}
eof :: (Monad m) => ParseT e m ()
eof = get# \(State# i _) -> guard (BS.null i)
+{-# INLINE eof #-}
anyChar :: (Monad m) => ParseT e m Char
anyChar = next fail succeed
+{-# INLINE anyChar #-}
charIf :: (Monad m) => (Char -> Bool) -> ParseT e m Char
charIf want = next fail (\c -> guard (want c) $> c)
+{-# INLINE charIf #-}
char :: (Monad m) => Char -> ParseT e m ()
char c = void $ charIf (== c)
+{-# INLINE char #-}
string :: (Monad m) => String -> ParseT e m ()
string = traverse_ char
+{-# INLINE string #-}
natural :: (Monad m) => Int -> ParseT e m Natural
natural = \case
@@ -409,3 +447,4 @@ natural = \case
underscores = many (char '_')
in numeral >>= iter (\n -> (nat b * n +) <$> (underscores *> numeral))
_ -> error "natural: invalid base"
+{-# INLINE natural #-}
diff --git a/src/Sparsec/Char.hs b/src/Sparsec/Char.hs
@@ -15,24 +15,58 @@ module Sparsec.Char (
charIsNum16,
) where
-charIsUpperLatin, charIsLowerLatin, charIsLatin :: Char -> Bool
+charIsUpperLatin :: Char -> Bool
charIsUpperLatin c = 'A' <= c && c <= 'Z'
+{-# INLINE charIsUpperLatin #-}
+
+charIsLowerLatin :: Char -> Bool
charIsLowerLatin c = 'a' <= c && c <= 'z'
+{-# INLINE charIsLowerLatin #-}
+
+charIsLatin :: Char -> Bool
charIsLatin c = charIsUpperLatin c || charIsLowerLatin c
+{-# INLINE charIsLatin #-}
-charIsUpperGreek, charIsLowerGreek, charIsGreek :: Char -> Bool
+charIsUpperGreek :: Char -> Bool
charIsUpperGreek c = 'Α' <= c && c <= 'Ω'
+{-# INLINE charIsUpperGreek #-}
+
+charIsLowerGreek :: Char -> Bool
charIsLowerGreek c = 'α' <= c && c <= 'ω'
+{-# INLINE charIsLowerGreek #-}
+
+charIsGreek :: Char -> Bool
charIsGreek c = charIsUpperGreek c || charIsLowerGreek c
+{-# INLINE charIsGreek #-}
-charIsSafeUpperGreek, charIsSafeLowerGreek, charIsSafeGreek :: Char -> Bool
+charIsSafeUpperGreek :: Char -> Bool
charIsSafeUpperGreek c = c == 'Γ' || c == 'Δ' || c == 'Θ' || c == 'Λ' || c == 'Ξ' || c == 'Π' || c == 'Σ' || c == 'Φ' || c == 'Ψ' || c == 'Ω'
+{-# INLINE charIsSafeUpperGreek #-}
+
+charIsSafeLowerGreek :: Char -> Bool
charIsSafeLowerGreek c = charIsLowerGreek c && c /= 'ο'
+{-# INLINE charIsSafeLowerGreek #-}
+
+charIsSafeGreek :: Char -> Bool
charIsSafeGreek c = charIsSafeUpperGreek c || charIsSafeLowerGreek c
+{-# INLINE charIsSafeGreek #-}
-charIsNum2, charIsNum4, charIsNum8, charIsNum10, charIsNum16 :: Char -> Bool
+charIsNum2 :: Char -> Bool
charIsNum2 c = '0' <= c && c <= '1'
+{-# INLINE charIsNum2 #-}
+
+charIsNum4 :: Char -> Bool
charIsNum4 c = '0' <= c && c <= '3'
+{-# INLINE charIsNum4 #-}
+
+charIsNum8 :: Char -> Bool
charIsNum8 c = '0' <= c && c <= '7'
+{-# INLINE charIsNum8 #-}
+
+charIsNum10 :: Char -> Bool
charIsNum10 c = '0' <= c && c <= '9'
+{-# INLINE charIsNum10 #-}
+
+charIsNum16 :: Char -> Bool
charIsNum16 c = '0' <= c && c <= '9' || 'A' <= c && c <= 'F' || 'a' <= c && c <= 'f'
+{-# INLINE charIsNum16 #-}
diff --git a/src/Sparsec/Loc.hs b/src/Sparsec/Loc.hs
@@ -25,22 +25,32 @@ pattern Loc# :: Int# -> Int# -> Int# -> Loc#
pattern Loc# b r c = Loc_# (# b, r, c #)
{-# COMPLETE Loc# #-}
-locByte#, locRow#, locCol# :: Loc# -> Int#
+locByte# :: Loc# -> Int#
locByte# (Loc# b _ _) = b
+{-# INLINE locByte# #-}
+
+locRow# :: Loc# -> Int#
locRow# (Loc# _ r _) = r
+{-# INLINE locRow# #-}
+
+locCol# :: Loc# -> Int#
locCol# (Loc# _ _ c) = c
+{-# INLINE locCol# #-}
locEq# :: Loc# -> Loc# -> Bool
locEq# (Loc# b0 r0 c0) (Loc# b1 r1 c1) =
b0 `intEq#` b1 && r0 `intEq#` r1 && c0 `intEq#` c1
+{-# INLINE locEq# #-}
locShow# :: Loc# -> String
locShow# (Loc# b r c) = show (I# b) ++ "@" ++ show (I# r) ++ ":" ++ show (I# c)
+{-# INLINE locShow# #-}
locAdvance# :: Char -> Int -> Loc# -> Loc#
locAdvance# x (I# w) (Loc# b r c) = case x of
'\n' -> Loc# (b +# w) (r +# 1#) 0#
_ -> Loc# (b +# w) r (c +# 1#)
+{-# INLINE locAdvance# #-}
-- | Loc is a source code location, tracking byte offset, row (number of '\n'
-- characters seen so far), and column (number of characters into the current
@@ -54,6 +64,7 @@ pattern Loc b r c = LocBox (Loc# b r c)
locZero :: Loc
locZero = Loc 0# 0# 0#
+{-# INLINE locZero #-}
instance Eq Loc where
LocBox l0 == LocBox l1 = l0 `locEq#` l1
diff --git a/src/Sparsec/Span.hs b/src/Sparsec/Span.hs
@@ -17,16 +17,22 @@ pattern Span# :: Loc# -> Loc# -> Span#
pattern Span# beg end = Span_# (# beg, end #)
{-# COMPLETE Span# #-}
-spanBeg#, spanEnd# :: Span# -> Loc#
+spanBeg# :: Span# -> Loc#
spanBeg# (Span# beg _) = beg
+{-# INLINE spanBeg# #-}
+
+spanEnd# :: Span# -> Loc#
spanEnd# (Span# _ end) = end
+{-# INLINE spanEnd# #-}
spanEq# :: Span# -> Span# -> Bool
spanEq# (Span# beg0 end0) (Span# beg1 end1) =
beg0 `locEq#` beg1 && end0 `locEq#` end1
+{-# INLINE spanEq# #-}
spanShow# :: Span# -> String
spanShow# (Span# beg end) = locShow# beg ++ "--" ++ locShow# end
+{-# INLINE spanShow# #-}
-- | Span is a pair of Loc's specifying a range in the input. The beginning/left
-- Loc is inclusive and the ending/right Loc is exclusive.