commit d4de0fa6982ad9b4be8024e371e1b25dcb99275a
parent 0ef3f55b8e3aaf39074c713feedc96a04b48a43c
Author: Robert Russell <robert@rr3.xyz>
Date: Sun, 14 Dec 2025 13:56:53 -0800
Unbox parser state
Diffstat:
| M | Sparsec.hs | | | 255 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------- |
| M | package.yaml | | | 4 | ++++ |
2 files changed, 191 insertions(+), 68 deletions(-)
diff --git a/Sparsec.hs b/Sparsec.hs
@@ -32,31 +32,59 @@ module Sparsec (
runeIsNum10,
runeIsNum16,
-- Loc
+ Loc#,
+ pattern Loc#,
+ locByte#,
+ locRow#,
+ locCol#,
+ locEq#,
Loc (..),
locZero,
+ locBox,
+ locUnbox,
-- Span
+ Span#,
+ pattern Span#,
+ spanBeg#,
+ spanEnd#,
+ spanEq#,
Span (..),
+ spanBox,
+ spanUnbox,
+ -- State
+ State#,
+ pattern State#,
+ stateInput#,
+ stateLoc#,
+ stateEq#,
+ State (..),
+ stateBox,
+ stateUnbox,
-- Result
- Result (..),
+ Result# (..),
-- Parse monad
ParseT (..),
+ runParseT#,
runParseT,
mapParseT,
Parse,
+ runParse#,
runParse,
mapParse,
-- Core combinators
+ fail,
+ err,
+ succeed,
+ continue,
read,
write,
getLoc,
putLoc,
save,
load,
- fail,
+ -- Derived combinators
not,
- err,
catch,
- -- Derived combinators
eof,
lookahead,
spanOf,
@@ -92,20 +120,30 @@ module Sparsec (
import Control.Applicative
import Control.Monad hiding (fail)
-import Control.Monad.Identity
-import Control.Monad.Reader
-import Control.Monad.State
+import Control.Monad.Identity (Identity (..))
+import Control.Monad.Reader (MonadReader (..))
+import Control.Monad.State (MonadState (..))
+import Control.Monad.Trans (MonadTrans (..))
import Data.ByteString (ByteString)
import Data.ByteString qualified as BS
import Data.ByteString.UTF8 qualified as UTF8
import Data.Char
import Data.Foldable
import Data.Functor
+import GHC.Exts (Int (..), Int#, (==#))
import Numeric.Natural
import Text.Printf
import Prelude hiding (fail, not, read)
--------------------------------------------------------------------------------
+-- Util
+
+intEq# :: Int# -> Int# -> Bool
+intEq# x y = case x ==# y of
+ 0# -> False
+ _ -> True
+
+--------------------------------------------------------------------------------
-- Char
charIsUpperLatin, charIsLowerLatin, charIsLatin :: Char -> Bool
@@ -171,6 +209,20 @@ runeIsNum16 = runeElim False charIsNum16
--------------------------------------------------------------------------------
-- Loc
+newtype Loc# = Loc_# (# Int#, Int#, Int# #)
+pattern Loc# :: Int# -> Int# -> Int# -> Loc#
+pattern Loc# b r c = Loc_# (# b, r, c #)
+{-# COMPLETE Loc# #-}
+
+locByte#, locRow#, locCol# :: Loc# -> Int#
+locByte# (Loc# b _ _) = b
+locRow# (Loc# _ r _) = r
+locCol# (Loc# _ _ c) = c
+
+locEq# :: Loc# -> Loc# -> Bool
+locEq# (Loc# b0 r0 c0) (Loc# b1 r1 c1) =
+ intEq# b0 b1 && intEq# r0 r1 && intEq# c0 c1
+
-- | Loc is a source code location, tracking byte offset, row (number of '\n'
-- characters seen so far), and column (number of characters into the current
-- row). All three of these quantities are 0-based. The column tracking is
@@ -184,88 +236,177 @@ locZero = Loc 0 0 0
instance Show Loc where
show (Loc b r c) = printf "%d:%d#%d" r c b
+locBox :: Loc# -> Loc
+locBox (Loc# b r c) = Loc (I# b) (I# r) (I# c)
+
+locUnbox :: Loc -> Loc#
+locUnbox (Loc (I# b) (I# r) (I# c)) = Loc# b r c
+
--------------------------------------------------------------------------------
-- Span
+newtype Span# = Span_# (# Loc#, Loc# #)
+pattern Span# :: Loc# -> Loc# -> Span#
+pattern Span# beg end = Span_# (# beg, end #)
+{-# COMPLETE Span# #-}
+
+spanBeg#, spanEnd# :: Span# -> Loc#
+spanBeg# (Span# beg _) = beg
+spanEnd# (Span# _ end) = end
+
+spanEq# :: Span# -> Span# -> Bool
+spanEq# (Span# beg0 end0) (Span# beg1 end1) =
+ locEq# beg0 beg1 && locEq# end0 end1
+
-- | Span is a pair of Loc's specifying a range in the input. The beginning/left
-- Loc is inclusive and the ending/right Loc is exclusive.
-data Span = Span Loc Loc deriving (Eq)
+data Span = Span {spanBeg :: Loc, spanEnd :: Loc} deriving (Eq)
-- Weird Show instance intended for debugging
instance Show Span where
show (Span beg end) = printf "%s--%s" (show beg) (show end)
+spanBox :: Span# -> Span
+spanBox (Span# beg end) = Span (locBox beg) (locBox end)
+
+spanUnbox :: Span -> Span#
+spanUnbox (Span beg end) = Span# (locUnbox beg) (locUnbox end)
+
+--------------------------------------------------------------------------------
+-- State
+
+newtype State# = State_# (# ByteString, Loc# #)
+pattern State# :: ByteString -> Loc# -> State#
+pattern State# i l = State_# (# i, l #)
+{-# COMPLETE State# #-}
+
+stateInput# :: State# -> ByteString
+stateInput# (State# i _) = i
+
+stateLoc# :: State# -> Loc#
+stateLoc# (State# _ l) = l
+
+stateEq# :: State# -> State# -> Bool
+stateEq# (State# i0 l0) (State# i1 l1) = i0 == i1 && locEq# l0 l1
+
+data State = State {stateInput :: ByteString, stateLoc :: Loc} deriving (Eq)
+
+stateBox :: State# -> State
+stateBox (State# i l) = State i (locBox l)
+
+stateUnbox :: State -> State#
+stateUnbox (State i l) = State# i (locUnbox l)
+
--------------------------------------------------------------------------------
-- Result
-data Result e a
- = Failure
- | Error e
- | Success a ByteString Loc
- deriving (Eq)
+-- TODO: Result (no hash)
+
+data Result# e a = Failure# | Error# e | Success# a State#
-instance (Show e, Show a) => Show (Result e a) where
+instance (Eq e, Eq a) => Eq (Result# e a) where
+ Failure# == Failure# = True
+ Error# e0 == Error# e1 = e0 == e1
+ Success# a0 s0 == Success# a1 s1 = a0 == a1 && stateEq# s0 s1
+ _ == _ = False
+
+instance (Show e, Show a) => Show (Result# e a) where
show = \case
- Failure -> "failure"
- Error e -> "error: " ++ show e
- Success a i _ ->
+ Failure# -> "failure"
+ Error# e -> "error: " ++ show e
+ Success# a (State# i _) ->
if BS.null i
then
- printf "ok: %s" (show a)
+ printf "success: %s" (show a)
else
printf
- "ok (%d bytes remaining): %s"
+ "success (%d bytes remaining): %s"
(BS.length i)
(show a)
-instance Functor (Result e) where
+instance Functor (Result# e) where
fmap f = \case
- Failure -> Failure
- Error e -> Error e
- Success a i l -> Success (f a) i l
+ Failure# -> Failure#
+ Error# e -> Error# e
+ Success# a s -> Success# (f a) s
--------------------------------------------------------------------------------
--- Parse monad and core combinators
+-- ParseT
-- | ParseT is a monad transformer for parsing. It effectively has a ByteString
-- and Loc state effect for the input, and an error effect (of some type e) for
-- parsing errors. Parsing *errors* are distinct from parsing *failures* in
-- that only the latter trigger backtracking in the Alternative instance.
-newtype ParseT e m a = ParseT (ByteString -> Loc -> m (Result e a))
+newtype ParseT e m a = ParseT (State# -> m (Result# e a))
+
+runParseT# :: (Monad m) => ParseT e m a -> State# -> m (Result# e a)
+runParseT# (ParseT f) = f
-runParseT :: (Monad m) => ParseT e m a -> ByteString -> Loc -> m (Result e a)
-runParseT (ParseT f) = f
+runParseT :: (Monad m) => ParseT e m a -> ByteString -> Loc -> m (Result# e a)
+runParseT p i l = p `runParseT#` State# i (locUnbox l)
mapParseT ::
(Monad m, Monad m') =>
- (m (Result e a) -> m' (Result e' a')) ->
+ (m (Result# e a) -> m' (Result# e' a')) ->
ParseT e m a ->
ParseT e' m' a'
-mapParseT f p = ParseT \i l -> f $ runParseT p i l
+mapParseT f p = ParseT \s -> f $ p `runParseT#` s
+
+--------------------------------------------------------------------------------
+-- Parse
type Parse e a = ParseT e Identity a
-runParse :: Parse e a -> ByteString -> Loc -> Result e a
-runParse (ParseT f) i l = runIdentity $ f i l
+runParse# :: Parse e a -> State# -> Result# e a
+runParse# p s = runIdentity $ p `runParseT#` s
-mapParse :: (Result e a -> Result e' a') -> Parse e a -> Parse e' a'
+runParse :: Parse e a -> ByteString -> Loc -> Result# e a
+runParse p i l = p `runParse#` State# i (locUnbox l)
+
+mapParse :: (Result# e a -> Result# e' a') -> Parse e a -> Parse e' a'
mapParse f = mapParseT (Identity . f . runIdentity)
+--------------------------------------------------------------------------------
+-- Core combinators
+
fail :: (Monad m) => ParseT e m a
-fail = ParseT \_ _ -> pure Failure
+fail = ParseT \_ -> pure Failure#
err :: (Monad m) => e -> ParseT e m a
-err e = ParseT \_ _ -> pure $ Error e
+err e = ParseT \_ -> pure $ Error# e
succeed :: (Monad m) => a -> ParseT e m a
-succeed a = ParseT \i l -> pure $ Success a i l
+succeed a = ParseT \s -> pure $ Success# a s
continue :: (Monad m) => ParseT e' m a' -> (e -> ParseT e' m a') -> (a -> ParseT e' m a') -> ParseT e m a -> ParseT e' m a'
-continue kf ke ks p = ParseT \i l ->
- runParseT p i l >>= \case
- Failure -> runParseT kf i l
- Error e -> runParseT (ke e) i l
- Success a i' l' -> runParseT (ks a) i' l'
+continue kf ke ks p = ParseT \s ->
+ p `runParseT#` s >>= \case
+ Failure# -> kf `runParseT#` s
+ Error# e -> ke e `runParseT#` s
+ Success# a s' -> ks a `runParseT#` s'
+
+-- | Read the entire input without consuming it.
+read :: (Monad m) => ParseT e m ByteString
+read = ParseT \s -> pure $ Success# (stateInput# s) s
+
+-- | Replace the entire input without affecting the current location.
+write :: (Monad m) => ByteString -> ParseT e m ()
+write i = ParseT \(State# _ l) -> pure $ Success# () (State# i l)
+
+getLoc :: (Monad m) => ParseT e m Loc
+getLoc = ParseT \s -> pure $ Success# (locBox $ stateLoc# s) s
+
+putLoc :: (Monad m) => Loc -> ParseT e m ()
+putLoc l = ParseT \(State# i _) -> pure $ Success# () (State# i (locUnbox l))
+
+save :: (Monad m) => ParseT e m State
+save = ParseT \s -> pure $ Success# (stateBox s) s
+
+load :: (Monad m) => State -> ParseT e m ()
+load s = ParseT \_ -> pure $ Success# () (stateUnbox s)
+
+--------------------------------------------------------------------------------
+-- Instances
instance (Monad m) => Functor (ParseT e m) where
fmap = liftM
@@ -282,37 +423,18 @@ instance (Monad m) => Alternative (ParseT e m) where
p <|> q = continue q err succeed p
instance MonadTrans (ParseT e) where
- lift m = ParseT \i l -> (\a -> Success a i l) <$> m
+ lift m = ParseT \s -> (`Success#` s) <$> m
instance (MonadReader r m) => MonadReader r (ParseT e m) where
ask = lift ask
- local f p = ParseT \i l -> local f $ runParseT p i l
+ local f p = ParseT \s -> local f $ p `runParseT#` s
instance (MonadState s m) => MonadState s (ParseT e m) where
get = lift get
put = lift . put
--- | Read the entire input without consuming it.
-read :: (Monad m) => ParseT e m ByteString
-read = ParseT \i l -> pure $ Success i i l
-
--- | Replace the entire input without affecting the current location.
-write :: (Monad m) => ByteString -> ParseT e m ()
-write i = ParseT \_ l -> pure $ Success () i l
-
-getLoc :: (Monad m) => ParseT e m Loc
-getLoc = ParseT \i l -> pure $ Success l i l
-
-putLoc :: (Monad m) => Loc -> ParseT e m ()
-putLoc l = ParseT \i _ -> pure $ Success () i l
-
--- | Save parsing state. This is effectively a combination of read and getLoc.
-save :: (Monad m) => ParseT e m (ByteString, Loc)
-save = ParseT \i l -> pure $ Success (i, l) i l
-
--- | Load parsing state. This is effectively a combination of write and putLoc.
-load :: (Monad m) => (ByteString, Loc) -> ParseT e m ()
-load (i, l) = ParseT \_ _ -> pure $ Success () i l
+--------------------------------------------------------------------------------
+-- General combinators
-- | Turn a failure into a success and vice versa.
not :: (Monad m) => ParseT e m a -> ParseT e m ()
@@ -322,9 +444,6 @@ not = continue (succeed ()) err (const fail)
catch :: (Monad m) => ParseT e m a -> (e -> ParseT e' m a) -> ParseT e' m a
p `catch` h = continue fail h succeed p
---------------------------------------------------------------------------------
--- General combinators
-
eof :: (Monad m) => ParseT e m ()
eof = guard . BS.null =<< read
@@ -345,10 +464,10 @@ spanOf p = do
bytesOf :: (Monad m) => ParseT e m a -> ParseT e m (a, ByteString)
bytesOf p = do
- (input, beg) <- save
+ State i beg <- save
a <- p
end <- getLoc
- pure (a, BS.take (locByte end - locByte beg) input)
+ pure (a, BS.take (locByte end - locByte beg) i)
-- | Convert a failure into an error.
--
diff --git a/package.yaml b/package.yaml
@@ -13,9 +13,13 @@ language: GHC2024
default-extensions:
- BlockArguments
+ - MagicHash
- MultiWayIf
- OverloadedStrings
+ - PatternSynonyms
+ - UnboxedTuples
- UndecidableInstances
+ - UnliftedNewtypes
dependencies:
- base >= 4.20 && < 5