commit ceefed6f32eac1424a789568be5c950d4f207798
parent e89fbd9aaa39e3916ad86d6362d377d5b18b3c59
Author: Robert Russell <robert@rr3.xyz>
Date: Sat, 13 Dec 2025 22:20:38 -0800
Update to GHC 9.10 and language version GHC2024, and clean up
Diffstat:
3 files changed, 177 insertions(+), 146 deletions(-)
diff --git a/Sparsec.hs b/Sparsec.hs
@@ -1,50 +1,52 @@
module Sparsec where
+
import Control.Applicative
import Control.Monad hiding (fail)
import Control.Monad.Identity
import Control.Monad.Reader
import Control.Monad.State
import Data.ByteString (ByteString)
-import qualified Data.ByteString as ByteString
-import qualified Data.ByteString.UTF8 as UTF8
+import Data.ByteString qualified as ByteString
+import Data.ByteString.UTF8 qualified as UTF8
import Data.Char
import Data.Foldable
+import Data.Functor
import Numeric.Natural
-import Prelude hiding (fail, read)
import Text.Printf
+import Prelude hiding (fail, read)
--------------------------------------------------------------------------------
-- Rune
-data Rune = RuneEof | RuneChar Char deriving Eq
+data Rune = RuneEof | RuneChar Char deriving (Eq)
instance Show Rune where
show = \case
- RuneEof -> "EOF"
+ RuneEof -> "EOF"
RuneChar c -> show c
-runeCase :: a -> (Char -> a) -> Rune -> a
-runeCase eof chr = \case
- RuneEof -> eof
+runeElim :: a -> (Char -> a) -> Rune -> a
+runeElim eof chr = \case
+ RuneEof -> eof
RuneChar c -> chr c
runeIsSpace :: Rune -> Bool
-runeIsSpace = runeCase False isSpace
+runeIsSpace = runeElim False isSpace
runeIsLetter :: Rune -> Bool
-runeIsLetter = runeCase False isLetter
+runeIsLetter = runeElim False isLetter
runeIsDigit :: Rune -> Bool
-runeIsDigit = runeCase False isDigit
+runeIsDigit = runeElim False isDigit
--------------------------------------------------------------------------------
--- Locations
+-- Loc
--- Loc is a source code location, tracking byte offset, row (number of '\n'
+-- | Loc is a source code location, tracking byte offset, row (number of '\n'
-- characters seen so far), and column (number of characters into the current
-- row). All three of these quantities are 0-based. The column tracking is
-- somewhat stupid in that it treats each Unicode codepoint as one column.
-data Loc = Loc { byte :: Int, row :: Int, col :: Int } deriving Eq
+data Loc = Loc {byte :: Int, row :: Int, col :: Int} deriving (Eq)
locZero :: Loc
locZero = Loc 0 0 0
@@ -54,11 +56,11 @@ instance Show Loc where
show (Loc b r c) = printf "%d:%d#%d" r c b
--------------------------------------------------------------------------------
--- Spans
+-- Span
--- Span is a pair of Loc's specifying a range in the input. The beginning/left
+-- | Span is a pair of Loc's specifying a range in the input. The beginning/left
-- Loc is inclusive and the ending/right Loc is exclusive.
-data Span = Span Loc Loc deriving Eq
+data Span = Span Loc Loc deriving (Eq)
-- Weird Show instance intended for debugging
instance Show Span where
@@ -68,44 +70,48 @@ instance Show Span where
-- Result
data Result e a
- = Fail
+ = Failure
| Error e
| Ok a ByteString Loc
- deriving Eq
+ deriving (Eq)
instance (Show e, Show a) => Show (Result e a) where
show = \case
- Fail -> "fail"
+ Failure -> "failure"
Error e -> "error: " ++ show e
Ok a rest _ ->
- if ByteString.null rest then
- printf "ok: %s" (show a)
- else
- printf "ok (%d bytes remaining): %s"
- (ByteString.length rest) (show a)
+ if ByteString.null rest
+ then
+ printf "ok: %s" (show a)
+ else
+ printf
+ "ok (%d bytes remaining): %s"
+ (ByteString.length rest)
+ (show a)
instance Functor (Result e) where
fmap f = \case
- Fail -> Fail
+ Failure -> Failure
Error e -> Error e
Ok a rest loc -> Ok (f a) rest loc
--------------------------------------------------------------------------------
--- Parse monad
+-- Parse monad and core combinators
--- ParseT is a monad transformer for parsing. It effectively has a ByteString
+-- | ParseT is a monad transformer for parsing. It effectively has a ByteString
-- and Loc state effect for the input, and an error effect (of some type e) for
-- parsing errors. Parsing *errors* are distinct from parsing *failures* in
-- that only the latter trigger backtracking in the Alternative instance.
newtype ParseT e m a = ParseT (ByteString -> Loc -> m (Result e a))
-runParseT :: Monad m => ParseT e m a -> ByteString -> Loc -> m (Result e a)
+runParseT :: (Monad m) => ParseT e m a -> ByteString -> Loc -> m (Result e a)
runParseT (ParseT f) = f
-mapParseT :: (Monad m, Monad m')
- => (m (Result e a) -> m' (Result e' a'))
- -> ParseT e m a
- -> ParseT e' m' a'
+mapParseT ::
+ (Monad m, Monad m') =>
+ (m (Result e a) -> m' (Result e' a')) ->
+ ParseT e m a ->
+ ParseT e' m' a'
mapParseT f p = ParseT \input loc -> f $ runParseT p input loc
type Parse e a = ParseT e Identity a
@@ -116,41 +122,39 @@ runParse (ParseT f) input loc = runIdentity $ f input loc
mapParse :: (Result e a -> Result e' a') -> Parse e a -> Parse e' a'
mapParse f = mapParseT (Identity . f . runIdentity)
-instance Monad m => Functor (ParseT e m) where
+instance (Monad m) => Functor (ParseT e m) where
fmap = liftM
-instance Monad m => Applicative (ParseT e m) where
+instance (Monad m) => Applicative (ParseT e m) where
pure a = ParseT \input loc -> pure $ Ok a input loc
(<*>) = ap
-instance Monad m => Monad (ParseT e m) where
+instance (Monad m) => Monad (ParseT e m) where
p >>= k = ParseT \input loc ->
runParseT p input loc >>= \case
- Fail -> pure $ Fail
- Error e -> pure $ Error e
+ Failure -> pure Failure
+ Error e -> pure $ Error e
Ok a input' loc' -> runParseT (k a) input' loc'
-instance Monad m => Alternative (ParseT e m) where
- empty = ParseT \_ _ -> pure Fail
+instance (Monad m) => Alternative (ParseT e m) where
+ empty = ParseT \_ _ -> pure Failure
p <|> q = ParseT \input loc ->
runParseT p input loc >>= \case
- Fail -> runParseT q input loc
- Error e -> pure $ Error e
+ Failure -> runParseT q input loc
+ Error e -> pure $ Error e
Ok a input' loc' -> pure $ Ok a input' loc'
instance MonadTrans (ParseT e) where
- lift m = ParseT \input loc -> do
- a <- m
- pure $ Ok a input loc
+ lift m = ParseT \input loc -> (\a -> Ok a input loc) <$> m
-instance MonadReader r m => MonadReader r (ParseT e m) where
+instance (MonadReader r m) => MonadReader r (ParseT e m) where
ask = lift ask
local f p = ParseT \input loc -> local f $ runParseT p input loc
-instance MonadState s m => MonadState s (ParseT e m) where
+instance (MonadState s m) => MonadState s (ParseT e m) where
get = lift get
put = lift . put
@@ -158,123 +162,148 @@ instance MonadState s m => MonadState s (ParseT e m) where
-- TODO: Separate a MTL-like MonadParse typeclass with (a subset of) the
-- following methods?
--- Read the entire input without consuming it.
-read :: Monad m => ParseT e m ByteString
+-- | Read the entire input without consuming it.
+read :: (Monad m) => ParseT e m ByteString
read = ParseT \input loc -> pure $ Ok input input loc
--- Replace the entire input without affecting the current location.
-write :: Monad m => ByteString -> ParseT e m ()
+-- | Replace the entire input without affecting the current location.
+write :: (Monad m) => ByteString -> ParseT e m ()
write input = ParseT \_ loc -> pure $ Ok () input loc
-getLoc :: Monad m => ParseT e m Loc
+getLoc :: (Monad m) => ParseT e m Loc
getLoc = ParseT \input loc -> pure $ Ok loc input loc
-putLoc :: Monad m => Loc -> ParseT e m ()
+putLoc :: (Monad m) => Loc -> ParseT e m ()
putLoc loc = ParseT \input _ -> pure $ Ok () input loc
--- Save parsing state. This is effectively a combination of read and getLoc.
-save :: Monad m => ParseT e m (ByteString, Loc)
+-- | Save parsing state. This is effectively a combination of read and getLoc.
+save :: (Monad m) => ParseT e m (ByteString, Loc)
save = ParseT \input loc -> pure $ Ok (input, loc) input loc
--- Load parsing state. This is effectively a combination of write and putLoc.
-load :: Monad m => (ByteString, Loc) -> ParseT e m ()
+-- | Load parsing state. This is effectively a combination of write and putLoc.
+load :: (Monad m) => (ByteString, Loc) -> ParseT e m ()
load (input, loc) = ParseT \_ _ -> pure $ Ok () input loc
-fail :: Monad m => ParseT e m a
+fail :: (Monad m) => ParseT e m a
fail = empty
--- Turn a failure into a success and vice versa.
-notp :: Monad m => ParseT e m a -> ParseT e m ()
+-- | Turn a failure into a success and vice versa.
+notp :: (Monad m) => ParseT e m a -> ParseT e m ()
notp p = ParseT \input loc ->
runParseT p input loc >>= \case
- Fail -> pure $ Ok () input loc
- Error e -> pure $ Error e
- Ok _ _ _ -> pure Fail
+ Failure -> pure $ Ok () input loc
+ Error e -> pure $ Error e
+ Ok{} -> pure Failure
-err :: Monad m => e -> ParseT e m a
+err :: (Monad m) => e -> ParseT e m a
err e = ParseT \_ _ -> pure $ Error e
--- Catch an error.
-catch :: Monad m => ParseT e m a -> (e -> ParseT e m a) -> ParseT e m a
+-- | Catch an error.
+catch :: (Monad m) => ParseT e m a -> (e -> ParseT e' m a) -> ParseT e' m a
p `catch` h = ParseT \input loc ->
runParseT p input loc >>= \case
- Fail -> pure Fail
- Error e -> runParseT (h e) input loc
+ Failure -> pure Failure
+ Error e -> runParseT (h e) input loc
Ok a input' loc' -> pure $ Ok a input' loc'
+-- handle generalizes the following:
+-- (<|>): provide a continuation for failure
+-- catch: provide a continuation for error
+-- (>>=): provide a continuation for ok
+-- handle let's you specify a continuation for all three cases.
+-- handle :: (Monad m) => ParseT e' m a' -> (e -> ParseT e' m a') -> (a -> ParseT e' m a') -> ParseT e m a -> ParseT e' m a'
+-- handle hf he ho p = ParseT \input loc ->
+-- runParseT p input loc >>= \case
+-- Failure -> runParseT hf input loc
+-- Error e -> runParseT (he e) input loc
+-- Ok a input' loc' -> runParseT (ho a) input' loc'
+-- bind2 :: (Monad m) => (a -> ParseT e m a') -> ParseT e m a -> ParseT e m a'
+-- bind2 h = handle fail err h
+-- catch2 :: (Monad m) => (e -> ParseT e' m a) -> ParseT e m a -> ParseT e' m a
+-- catch2 h = handle fail h pure
+-- alt2 :: (Monad m) => ParseT e m a -> ParseT e m a -> ParseT e m a
+-- alt2 h = handle h err pure
+-- branch2 :: (Monad m) => ParseT e m a' -> (a -> ParseT e m a') -> ParseT e m a -> ParseT e m a'
+-- branch2 hf ho = handle hf err ho
+-- -- TODO: Rename Ok to Success
+-- succeed :: (Monad m) => a -> ParseT e m a
+-- succeed = pure
+
--------------------------------------------------------------------------------
--- General parse combinators
+-- General combinators
-eof :: Monad m => ParseT e m ()
-eof = (ByteString.null <$> read) >>= guard
+eof :: (Monad m) => ParseT e m ()
+eof = guard . ByteString.null =<< read
--- Run a parser without changing the parser state.
-lookahead :: Monad m => ParseT e m a -> ParseT e m a
+-- | Run a parser without changing the parser state.
+lookahead :: (Monad m) => ParseT e m a -> ParseT e m a
lookahead p = do
s <- save
a <- p
load s
pure a
-spanOf :: Monad m => ParseT e m a -> ParseT e m (a, Span)
+spanOf :: (Monad m) => ParseT e m a -> ParseT e m (a, Span)
spanOf p = do
beg <- getLoc
a <- p
end <- getLoc
pure (a, Span beg end)
-bytesOf :: Monad m => ParseT e m a -> ParseT e m (a, ByteString)
+bytesOf :: (Monad m) => ParseT e m a -> ParseT e m (a, ByteString)
bytesOf p = do
(input, beg) <- save
a <- p
end <- getLoc
pure (a, ByteString.take (end.byte - beg.byte) input)
--- Convert a failure into an error.
+-- | Convert a failure into an error.
+--
-- TODO: Why do we call this "cut"?
-cut :: Monad m => ParseT e m a -> e -> ParseT e m a
+cut :: (Monad m) => ParseT e m a -> e -> ParseT e m a
p `cut` e = p <|> err e
--- Convert an error into a failure.
-try :: Monad m => ParseT e m a -> ParseT e m a
-try p = p `catch` \_ -> fail
+-- | Convert an error into a failure.
+try :: (Monad m) => ParseT e m a -> ParseT e m a
+try p = p `catch` const fail
--- Iterate a parsing function until it fails. I.e.,
--- iter f a = pure a >>= f >>= f >>= ...
+-- | Iterate a parsing function until it fails. I.e.,
+-- @iter f a = pure a >>= f >>= f >>= ...@
-- where the chain of binds is as long as possible without failure.
-iter :: Monad m => (a -> ParseT e m a) -> a -> ParseT e m a
+iter :: (Monad m) => (a -> ParseT e m a) -> a -> ParseT e m a
iter f a = (f a >>= iter f) <|> pure a
--- Parse a `b` and then zero or more `a`s, and then combine in a left-nested
+-- | Parse a `b` and then zero or more `a`s, and then combine in a left-nested
-- fashion.
-chainl :: Monad m => (b -> a -> b) -> ParseT e m b -> ParseT e m a -> ParseT e m b
-chainl f pb pa = pb >>= iter \b -> (f b <$> pa)
+chainl :: (Monad m) => (b -> a -> b) -> ParseT e m b -> ParseT e m a -> ParseT e m b
+chainl f pb pa = pb >>= iter \b -> f b <$> pa
--- Parse zero or more `a`s (greedily) and then a `b`, and then combine in a
+-- | Parse zero or more `a`s (greedily) and then a `b`, and then combine in a
-- right-nested fashion.
-chainr :: Monad m => (a -> b -> b) -> ParseT e m a -> ParseT e m b -> ParseT e m b
+chainr :: (Monad m) => (a -> b -> b) -> ParseT e m a -> ParseT e m b -> ParseT e m b
chainr f pa pb = f <$> pa <*> chainr f pa pb <|> pb
--- `branch pc pt pf` runs pc; if it succeeds, it continues with pt, and
+-- | `branch pc pt pf` runs pc; if it succeeds, it continues with pt, and
-- otherwise it continues with pf.
-branch :: Monad m => ParseT e m a -> ParseT e m b -> ParseT e m b -> ParseT e m b
-branch pc pt pf = pc *> pt <|> pf
+branch :: (Monad m) => ParseT e m a -> ParseT e m b -> ParseT e m b -> ParseT e m b
+branch pc pt pf = pc *> pt <|> pf -- TODO: This is bugged. pt can backtrack into pf.
choice :: (Monad m, Foldable t) => t (ParseT e m a) -> ParseT e m a
choice = asum
--- `match scrut cases` binds scrut to the first non-failing case and returns
+-- | `match scrut cases` binds scrut to the first non-failing case and returns
-- the result.
-match :: (Monad m, Functor t, Foldable t)
- => ParseT e m a
- -> t (a -> ParseT e m b)
- -> ParseT e m b
+match ::
+ (Monad m, Functor t, Foldable t) =>
+ ParseT e m a ->
+ t (a -> ParseT e m b) ->
+ ParseT e m b
match scrut cases = choice $ (scrut >>=) <$> cases
-someSepBy :: Monad m => ParseT e m a -> ParseT e m b -> ParseT e m [a]
+someSepBy :: (Monad m) => ParseT e m a -> ParseT e m b -> ParseT e m [a]
someSepBy p sep = (:) <$> p <*> many (sep *> p)
-manySepBy :: Monad m => ParseT e m a -> ParseT e m b -> ParseT e m [a]
+manySepBy :: (Monad m) => ParseT e m a -> ParseT e m b -> ParseT e m [a]
manySepBy p sep = someSepBy p sep <|> pure []
--------------------------------------------------------------------------------
@@ -292,30 +321,33 @@ class Utf8Error e where
utf8Error :: Loc -> e
nextRune :: forall e m. (Utf8Error e, Monad m) => ParseT e m Rune
-nextRune =
- (UTF8.decode <$> read) >>= \case
+nextRune = toRune . UTF8.decode =<< read
+ where
+ toRune :: Maybe (Char, Int) -> ParseT e m Rune
+ toRune = \case
Just (c, w)
- | c == UTF8.replacement_char -> getLoc >>= (err . utf8Error)
- | otherwise -> updateState c w *> pure (RuneChar c)
+ | c == UTF8.replacement_char -> err . utf8Error =<< getLoc
+ | otherwise -> updateState c w $> RuneChar c
Nothing -> pure RuneEof
- where
- updateState :: Char -> Int -> ParseT e m ()
- updateState c w = do
- (ByteString.drop w <$> read) >>= write
- loc <- getLoc
- putLoc case c of
- '\n' -> loc { byte = loc.byte + w, row = loc.row + 1, col = 0 }
- _ -> loc { byte = loc.byte + w, col = loc.col + 1 }
+
+ updateState :: Char -> Int -> ParseT e m ()
+ updateState c w = do
+ write . ByteString.drop w =<< read
+ loc <- getLoc
+ putLoc case c of
+ '\n' -> loc{byte = loc.byte + w, row = loc.row + 1, col = 0}
+ _ -> loc{byte = loc.byte + w, col = loc.col + 1}
nextChar :: (Utf8Error e, Monad m) => ParseT e m Char
-nextChar = nextRune >>= \case
- RuneEof -> fail
- RuneChar c -> pure c
+nextChar =
+ nextRune >>= \case
+ RuneEof -> fail
+ RuneChar c -> pure c
runeIfM :: (Utf8Error e, Monad m) => (Rune -> m Bool) -> ParseT e m Rune
runeIfM want = do
r <- nextRune
- (lift $ want r) >>= guard
+ lift (want r) >>= guard
pure r
runeIf :: (Utf8Error e, Monad m) => (Rune -> Bool) -> ParseT e m Rune
@@ -324,14 +356,14 @@ runeIf want = runeIfM (pure . want)
charIfM :: (Utf8Error e, Monad m) => (Char -> m Bool) -> ParseT e m Char
charIfM want = do
c <- nextChar
- (lift $ want c) >>= guard
+ lift (want c) >>= guard
pure c
charIf :: (Utf8Error e, Monad m) => (Char -> Bool) -> ParseT e m Char
charIf want = charIfM (pure . want)
runeWhileM :: (Utf8Error e, Monad m) => (Rune -> m Bool) -> ParseT e m ByteString
-runeWhileM want = snd <$> (bytesOf $ many (runeIfM want))
+runeWhileM want = snd <$> bytesOf (many (runeIfM want))
runeWhile :: (Utf8Error e, Monad m) => (Rune -> Bool) -> ParseT e m ByteString
runeWhile want = runeWhileM (pure . want)
@@ -345,40 +377,40 @@ charWhile :: (Utf8Error e, Monad m) => (Char -> Bool) -> ParseT e m ByteString
charWhile want = charWhileM (pure . want)
rune :: (Utf8Error e, Monad m) => Rune -> ParseT e m ()
-rune r = runeIf (== r) *> pure ()
+rune r = void $ runeIf (== r)
char :: (Utf8Error e, Monad m) => Char -> ParseT e m ()
-char c = charIf (== c) *> pure ()
+char c = void $ charIf (== c)
string :: (Utf8Error e, Monad m) => String -> ParseT e m ()
string = traverse_ char
natural :: (Utf8Error e, Monad m) => Int -> ParseT e m Natural
natural = \case
- 0 ->
- let prefix l u = char '0' *> (char l <|> char u) in
- prefix 'b' 'B' *> natural 2
+ 0 ->
+ let prefix l u = char '0' *> (char l <|> char u)
+ in prefix 'b' 'B' *> natural 2
<|> prefix 'q' 'Q' *> natural 4
<|> prefix 'o' 'O' *> natural 8
<|> prefix 'x' 'X' *> natural 16
<|> natural 10
-
- b | 0 < b && b <= 36 ->
- let nat :: Int -> Natural
- nat = fromIntegral
-
- maxn = min (chr $ ord '0' + b) (succ '9')
- maxu = min (chr $ ord 'A' + b - 10) (succ 'Z')
- maxl = min (chr $ ord 'a' + b - 10) (succ 'z')
-
- numeral = nextChar >>= \c -> if
- | '0' <= c && c < maxn -> pure $ nat (ord c - ord '0')
- | 'A' <= c && c < maxu -> pure $ nat (ord c - ord 'A' + 10)
- | 'a' <= c && c < maxl -> pure $ nat (ord c - ord 'a' + 10)
- | otherwise -> fail
-
- underscores = many (char '_') in
-
- numeral >>= iter (\n -> (nat b * n +) <$> (underscores *> numeral))
-
- _ -> error "natural: invalid base"
+ b
+ | 0 < b && b <= 36 ->
+ let nat :: Int -> Natural
+ nat = fromIntegral
+
+ maxn = min (chr $ ord '0' + b) (succ '9')
+ maxu = min (chr $ ord 'A' + b - 10) (succ 'Z')
+ maxl = min (chr $ ord 'a' + b - 10) (succ 'z')
+
+ numeral =
+ nextChar >>= \c ->
+ if
+ | '0' <= c && c < maxn -> pure $ nat (ord c - ord '0')
+ | 'A' <= c && c < maxu -> pure $ nat (ord c - ord 'A' + 10)
+ | 'a' <= c && c < maxl -> pure $ nat (ord c - ord 'a' + 10)
+ | otherwise -> fail
+
+ underscores = many (char '_')
+ in numeral >>= iter (\n -> (nat b * n +) <$> (underscores *> numeral))
+ _ -> error "natural: invalid base"
diff --git a/package.yaml b/package.yaml
@@ -1,5 +1,5 @@
name: sparsec
-version: 0.1.0.0
+version: 0.1.1.0
author: "Robert Russell"
license: ISC
@@ -8,12 +8,11 @@ ghc-options:
- -Wno-name-shadowing
- -Wno-missing-signatures
-language: GHC2021
+language: GHC2024
default-extensions:
- BlockArguments
- DuplicateRecordFields
- - LambdaCase
- MultiWayIf
- NoFieldSelectors
- OverloadedRecordDot
@@ -21,7 +20,7 @@ default-extensions:
- UndecidableInstances
dependencies:
- - base >= 4.18 && < 5
+ - base >= 4.20 && < 5
- bytestring
library:
diff --git a/stack.yaml b/stack.yaml
@@ -1 +1 @@
-resolver: lts-23.23
+resolver: lts-24.23