commit e2dbc607501d98772fa74792d822547c74d8f619
parent 9709acfc587cdcb130e970f6728d396c0f85a0bf
Author: Robert Russell <robertrussell.72001@gmail.com>
Date: Wed, 3 Jan 2024 17:32:36 -0800
Update parser
Diffstat:
| M | Makefile | | | 1 | - |
| M | common.ml | | | 24 | ++++++++++++++++++++---- |
| M | concrete.ml | | | 88 | +++++++++++++++++++++---------------------------------------------------------- |
| D | field.ml | | | 7 | ------- |
| M | lexer.mll | | | 43 | ++++++++++++------------------------------- |
| M | parser.mly | | | 222 | ++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------- |
6 files changed, 197 insertions(+), 188 deletions(-)
diff --git a/Makefile b/Makefile
@@ -2,7 +2,6 @@ SRC = \
loc.ml \
node.ml \
common.ml \
- field.ml \
concrete.ml \
parser.ml \
lexer.ml \
diff --git a/common.ml b/common.ml
@@ -1,8 +1,24 @@
-type ident = string
-type index = int
-type level = int
+type name = Name of string
-module IdentMap = Map.Make(String)
+module NameMap = Map.Make(struct
+ type t = name
+ let compare (Name x) (Name y) = String.compare x y
+end)
+
+type tag = Tag of string
+
+module TagMap = Map.Make(struct
+ type t = tag
+ let compare (Tag x) (Tag y) = String.compare x y
+end)
+
+type index = Index of int
+
+type plicity =
+| Implicit
+| Explicit
+
+let impossible () = failwith "impossible"
(* Short-circuiting AND fold_left. *)
let rec all2 f xs ys = match xs, ys with
diff --git a/concrete.ml b/concrete.ml
@@ -1,10 +1,8 @@
open Common
-exception InvalidPattern
-
type term = term' Node.t
and term' =
-| Let of patt * term * term
+| Let of defn list * term
| Seq of term * term
| Match of term * case list
| If of term * term * term
@@ -12,87 +10,47 @@ and term' =
| Annot of term * term
| Add of term * term
| Mul of term * term
-| Or of term * term
-| And of term * term
| App of term * term list
-| Fn of patt list * term
-| FnType of annot list * term
-| Variant of ident * term list
+| Fn of param list * term
+| FnType of domain list * term
+| Variant of tag * term list
| VariantType of ctor list
| Tuple of term list
-| TupleType of annot list
-| Record of term Field.t list
-| RecordType of annot Field.t list
+| TupleType of domain list
| Unit
| Nat of Z.t
-| Underscore
-| IdentIndex of ident * int
-| Ident of ident
+| Var of name * int option
| Paren of term
-(*
-| Access of term * ident
-| Index of term * term
-| Deref of term
-*)
+
+and defn = defn' Node.t
+and defn' =
+| Defn of patt * term
+| RecDefn of patt * term
and case = case' Node.t
and case' = Case of patt * term
+and param = param' Node.t
+and param' = Param of plicity * patt
+
+and domain = domain' Node.t
+and domain' =
+| DExplicit of term
+| DImplicitType of patt
+| DAnnot of plicity * patt * term
+
and ctor = ctor' Node.t
-and ctor' = Ctor of ident * annot list
+and ctor' = Ctor of tag * domain list
and patt = patt' Node.t
and patt' =
| PWild
-| PBind of ident
+| PBind of name
| PAnnot of patt * term
-| PVariant of ident * patt list
+| PVariant of tag * patt list
| PTuple of patt list
-| PRecord of patt Field.t list
| PUnit
| PNat of Z.t
| PAnd of patt * patt
| POr of patt * patt
| PParen of patt
-(*
-| PAssign of term
-*)
-
-and annot = annot' Node.t
-and annot' =
-| AType of term
-| APatt of patt * term
-| AParen of annot
-
-let rec patt p = Node.map patt' p
-and patt' = function
-| Let _ -> raise InvalidPattern
-| Seq _ -> raise InvalidPattern
-| Match _ -> raise InvalidPattern
-| If _ -> raise InvalidPattern
-| Iff _ -> raise InvalidPattern
-| Annot (p, t) -> PAnnot (patt p, t)
-| Add _ -> raise InvalidPattern (* TODO: n + k patterns? *)
-| Mul _ -> raise InvalidPattern
-| Or (p, q) -> POr (patt p, patt q)
-| And (p, q) -> PAnd (patt p, patt q)
-| App _ -> raise InvalidPattern
-| Fn _ -> raise InvalidPattern
-| FnType _ -> raise InvalidPattern
-| Variant (x, ts) -> PVariant (x, List.map patt ts)
-| VariantType _ -> raise InvalidPattern
-| Tuple ts -> PTuple (List.map patt ts)
-| TupleType _ -> raise InvalidPattern
-| Record tfs -> PRecord (List.map (Field.map patt) tfs)
-| RecordType _ -> raise InvalidPattern
-| Unit -> PUnit
-| Nat n -> PNat n
-| Underscore -> PWild
-| IdentIndex _ -> raise InvalidPattern
-| Ident x -> PBind x
-| Paren p -> PParen (patt p)
-
-let rec annot = function
-| Node.Of (loc, Annot (p, t)) -> loc @$ APatt (patt p, t)
-| Node.Of (loc, Paren a) -> loc @$ AParen (annot a)
-| Node.Of (loc, _) as n -> loc @$ AType n
diff --git a/field.ml b/field.ml
@@ -1,7 +0,0 @@
-open Common
-
-type 'a t = 'a t' Node.t
-and 'a t' = Of of ident * 'a
-
-let map' f (Of (id, x)) = Of (id, f x)
-let map f = Node.map (map' f)
diff --git a/lexer.mll b/lexer.mll
@@ -27,7 +27,8 @@ let greek = ugreek | lgreek | lgreekalt
let ident_start = latin | greek | '_'
let ident_cont = latin | greek | '_' | dnumeral | '\'' | '"'
-let ident = ident_start ident_cont*
+let name = ident_start ident_cont*
+let tag = ident_cont+
(* This format is accepted by Z.of_string. *)
let dnumber = dnumeral (dnumeral | '_')*
@@ -48,10 +49,8 @@ rule token = parse
| ";" { SEMICOLON }
| ":" { COLON }
| "," { COMMA }
-| "." { DOT }
-| "!" { EXCLAM }
-| "@" { AT }
| ":=" { COLON_EQ }
+| "?" { QUESTION }
| "->" { SINGLE_ARROW }
| "=>" { DOUBLE_ARROW }
@@ -59,7 +58,6 @@ rule token = parse
| "(" { LPAREN }
| ")" { RPAREN }
-| "#[" { HASH_LBRACK }
| "[" { LBRACK }
| "]" { RBRACK }
@@ -67,14 +65,14 @@ rule token = parse
| "{" { LBRACE }
| "}" { RBRACE }
-| "=" { EQ }
-
| "+" { PLUS }
| "*" { ASTERISK }
| "|" { PIPE }
| "&" { AMPERSAND }
+| "rec" { REC }
+
| "match" { MATCH }
| "if" { IF }
@@ -86,16 +84,19 @@ rule token = parse
| "fn" { FN }
+| "@" (tag as t) { TAG t }
+
| "_" { UNDERSCORE }
-| (ident as x) "#" (number as i) {
+
+| (name as x) "#" (number as i) {
let z = Z.of_string i in
(* TODO: Standardize this limit. *)
if Z.fits_int z then
- IDENT_INDEX (x, Z.to_int z)
+ INDEXED_NAME (x, Z.to_int z)
else
- error lexbuf "identifier index too big"
+ error lexbuf "name index too big"
}
-| ident as x { IDENT x }
+| name as x { NAME x }
| number as n { NAT (Z.of_string n) }
| number invalid_number_terminator
@@ -103,25 +104,5 @@ rule token = parse
| _ { error lexbuf "invalid byte" }
-(*
-| "<" { LT }
-| "<=" { LE }
-| "=" { EQ }
-| ">=" { GE }
-| ">" { GT }
-| "<>" { NE }
-| "-" { MINUS }
-| "/" { SLASH }
-| "%" { PERCENT }
-| "^" { CIRCUM }
-| "~" { TILDE }
-| "&" { AMP }
-| "|" { PIPE }
-| "not" { NOT }
-| "and" { AND }
-| "or" { OR }
-| "imp" { IMP }
-*)
-
{
}
diff --git a/parser.mly b/parser.mly
@@ -1,68 +1,141 @@
%{
open Common
-open Concrete
-
-let locate i x = Loc.of_position (Parsing.rhs_start_pos i) @$ x
-let (@$) = locate
+module C = Concrete
+
+(* To avoid reduce-reduce conflicts, we need to distinguish between raw
+ * expressions (which are generally of unknown syntactic category) and concrete
+ * terms/patternss/etc. In a hand-written parser, we'd have more control over
+ * when to decide the syntactic category of the top of the token stack, so
+ * we wouldn't need to make this distinction. *)
+type expr = expr' Node.t
+and expr' =
+| Let of C.defn list * expr
+| Seq of expr * expr
+| Match of expr * C.case list
+| If of expr * expr * expr
+| Iff of expr * expr
+| Annot of expr * expr
+| Add of expr * expr
+| Mul of expr * expr
+| Or of expr * expr
+| And of expr * expr
+| App of expr * expr list
+| Fn of C.param list * expr
+| FnType of C.domain list * expr
+| Variant of tag * expr list
+| VariantType of C.ctor list
+| Tuple of expr list
+| TupleType of C.domain list
+| Unit
+| Nat of Z.t
+| Underscore
+| IndexedVar of name * int
+| Var of name
+| Paren of expr
+
+exception Error of Loc.t * string
+let error loc msg = raise (Error (loc, msg))
+
+let rec term (Node.Of (loc, e)) = loc @$ match e with
+| Let (defns, body) -> C.Let (defns, term body)
+| Seq (a, b) -> C.Seq (term a, term b)
+| Match (scrutinee, cases) -> C.Match (term scrutinee, cases)
+| If (c, t, f) -> C.If (term c, term t, term f)
+| Iff (c, t) -> C.Iff (term c, term t)
+| Annot (a, b) -> C.Iff (term a, term b)
+| Add (a, b) -> C.Add (term a, term b)
+| Mul (a, b) -> C.Mul (term a, term b)
+| Or _ -> error loc "'|' can not appear in terms (yet)"
+| And _ -> error loc "'&' can not appear in terms (yet)"
+| App (f, args) -> C.App (term f, List.map term args)
+| Fn (params, body) -> C.Fn (params, term body)
+| FnType (doms, cod) -> C.FnType (doms, term cod)
+| Variant (tag, fields) -> C.Variant (tag, List.map term fields)
+| VariantType ctors -> C.VariantType ctors
+| Tuple fields -> C.Tuple (List.map term fields)
+| TupleType doms -> C.TupleType doms
+| Unit -> C.Unit
+| Nat n -> C.Nat n
+| Underscore -> error loc "'_' can not appear in terms (yet)"
+| IndexedVar (x, i) -> C.Var (x, Some i)
+| Var x -> C.Var (x, None)
+| Paren e -> C.Paren (term e)
+
+let rec patt (Node.Of (loc, e)) = loc @$ match e with
+| Annot (p, t) -> C.PAnnot (patt p, term t)
+| Or (p, q) -> C.POr (patt p, patt q)
+| And (p, q) -> C.PAnd (patt p, patt q)
+| Variant (tag, ps) -> C.PVariant (tag, List.map patt ps)
+| Tuple ps -> C.PTuple (List.map patt ps)
+| Unit -> C.PUnit
+| Nat n -> C.PNat n
+| Underscore -> C.PWild
+| Var x -> C.PBind x
+| Paren p -> C.PParen (patt p)
+| Let _ | Seq _ | Match _ | If _ | Iff _ | Add _ | Mul _
+| App _ | Fn _ | FnType _ | VariantType _ | TupleType _ ->
+ error loc "invalid pattern"
+| IndexedVar _ ->
+ error loc "only unindexed variables can appear in patterns"
+
+let loc_of i = Loc.of_position (Parsing.rhs_start_pos i)
+let (@$) i x = loc_of i @$ x
%}
%token EOF
-%token SEMICOLON COLON COMMA DOT EXCLAM AT COLON_EQ
+%token SEMICOLON COLON COMMA COLON_EQ QUESTION
%token SINGLE_ARROW DOUBLE_ARROW
%token HASH_LPAREN LPAREN RPAREN
-%token HASH_LBRACK LBRACK RBRACK
+%token LBRACK RBRACK
%token HASH_LBRACE LBRACE RBRACE
-%token EQ
%token PLUS ASTERISK
%token PIPE AMPERSAND
+%token REC
%token MATCH
%token IF THEN ELSE
%token IFF DO
%token FN
%token UNDERSCORE
-%token <Common.ident * int> IDENT_INDEX
-%token <Common.ident> IDENT
+%token <string> TAG
+%token <string * int> INDEXED_NAME
+%token <string> NAME
%token <Z.t> NAT
%start start
%type <Concrete.term> start
-%nonassoc PREC_TUPLE
-%nonassoc SINGLE_ARROW
-
%%
-(* Syntax idea for mutual recursion:
- * Anonymous functions (no recursion support):
- * fn x -> ...
- * Named functions ((mutual) recursion support)):
- * f := func x -> ...,
- * h := func y -> ...,
- * g := func z -> ...;
- * (Think of ":= func" as being one symbol).
- * We can make a similar distinction for types with "ty" vs "type". *)
+start: expr0 EOF { term $1 }
-start: expr0 EOF { $1 }
-
-expr0: (* Sequencing *)
- | expr2 COLON_EQ expr1 SEMICOLON expr0 { 2 @$ Let (patt $1, $3, $5) }
+expr0: (* Definitions and sequencing *)
+ | defn_list SEMICOLON expr0 { 2 @$ Let ($1, $3) }
| expr1 SEMICOLON expr0 { 2 @$ Seq ($1, $3) }
| expr1 { $1 }
expr1: (* Control flow *)
+ (* TODO: Probably we should change the match syntax when staging is added,
+ * so that we can write something like `$match x ...` instead of
+ * `$(x match ...)` for conditional compilation. *)
| expr2 MATCH LBRACE case_list RBRACE { 2 @$ Match($1, $4) }
- (* if let: "if p := x then t else f"; and similarly for iff *)
+ (* TODO: if let: "if p := x then t else f"; and similarly for iff *)
| IF expr1 THEN expr1 ELSE expr1 { 1 @$ If ($2, $4, $6) }
| IFF expr1 DO expr1 { 1 @$ Iff ($2, $4) }
- | FN expr2_list DOUBLE_ARROW expr1 { 1 @$ Fn (List.map patt $2, $4) }
+ | FN param_list DOUBLE_ARROW expr1 { 1 @$ Fn ($2, $4) }
| expr2 { $1 }
expr2: (* Type annotations (right-associative) *)
| expr3 COLON expr2 { 2 @$ Annot ($1, $3) }
| expr3 { $1 }
expr3: (* Function arrow (right-associative) *)
- | expr6 SINGLE_ARROW expr3 { 2 @$ FnType ([annot $1], $3) }
- | paren_expr_list SINGLE_ARROW expr3 { 2 @$ FnType (List.map annot $1, $3) }
+ | expr6 SINGLE_ARROW expr3 { 2 @$ FnType ([1 @$ C.DExplicit (term $1)], $3) }
+ | LBRACK domain_list RBRACK SINGLE_ARROW expr3 {
+ if List.length $2 = 0 then
+ error (loc_of 2) "empty domain"
+ else
+ 4 @$ FnType ($2, $5)
+ }
| expr4 { $1 }
-expr4: (* Additive binary operators (left-associative) *)
+expr4: (* Additive binary operators (left-associative;
+ * pairwise incomparable precedence) *)
| expr4add PLUS expr5 { 2 @$ Add ($1, $3) }
| expr4or PIPE expr5 { 2 @$ Or ($1, $3) }
| expr5 { $1 }
@@ -72,7 +145,8 @@ expr4add:
expr4or:
| expr4or PIPE expr5 { 2 @$ Or ($1, $3) }
| expr5 { $1 }
-expr5: (* Multiplicative binary operators (left-associative) *)
+expr5: (* Multiplicative binary operators (left-associative;
+ * pairwise incomparable precedence) *)
| expr5mul ASTERISK expr6 { 2 @$ Mul ($1, $3) }
| expr5and AMPERSAND expr6 { 2 @$ And ($1, $3) }
| expr6 { $1 }
@@ -83,78 +157,66 @@ expr5and:
| expr5and AMPERSAND expr6 { 2 @$ And ($1, $3) }
| expr6 { $1 }
expr6:
- | expr7 expr7 expr7_apposition { 1 @$ App ($1, $2 :: $3) }
- | AT IDENT expr7_apposition { 1 @$ Variant ($2, $3) }
+ | expr7 expr7 expr7_apposition_list { 1 @$ App ($1, $2 :: $3) }
+ | TAG expr7_apposition_list { 1 @$ Variant (Tag $1, $2) }
| expr7 { $1 }
expr7:
- (*
- | expr7 DOT IDENT { 2 @$ Access ($1, $3) }
- | expr7 DOT LPAREN expr0 RPAREN { 2 @$ Index ($1, $4) }
- | expr7 EXCLAM { 2 @$ Deref $1 }
- *)
- | expr8 { $1 }
-expr8:
| UNDERSCORE { 1 @$ Underscore }
- | IDENT_INDEX { 1 @$ let (x, i) = $1 in IdentIndex (x, i) }
- | IDENT { 1 @$ Ident $1 }
+ | INDEXED_NAME { 1 @$ let (x, i) = $1 in IndexedVar (Name x, i) }
+ | NAME { 1 @$ Var (Name $1) }
| NAT { 1 @$ Nat $1 }
- | HASH_LBRACK ctor_list RBRACK { 1 @$ VariantType $2 }
- | HASH_LPAREN expr2_list RPAREN { 1 @$ TupleType (List.map annot $2) }
- | HASH_LBRACE annot_field_list RBRACE { 1 @$ RecordType $2 }
- | paren_expr_list %prec PREC_TUPLE { 1 @$ Tuple $1 }
- | LBRACE term_field_list RBRACE { 1 @$ Record $2 }
+ | HASH_LBRACE ctor_list RBRACE { 1 @$ VariantType $2 }
+ | HASH_LPAREN domain_list RPAREN { 1 @$ TupleType $2 }
+ | LPAREN expr1 COMMA expr1_comma_list RPAREN { 1 @$ Tuple ($2 :: $4) }
| LPAREN RPAREN { 1 @$ Unit }
| LPAREN expr0 RPAREN { 1 @$ Paren $2 }
-expr1_list:
- | expr1 COMMA expr1_list { $1 :: $3 }
+expr1_comma_list:
+ | expr1 COMMA expr1_comma_list { $1 :: $3 }
| expr1 { [$1] }
| { [] }
-expr2_list:
- | expr2 COMMA expr2_list { $1 :: $3 }
- | expr2 { [$1] }
+expr7_apposition_list:
+ | expr7 expr7_apposition_list { $1 :: $2 }
| { [] }
-expr7_apposition:
- | expr7 expr7_apposition { $1 :: $2 }
- | { [] }
-
-paren_expr_list: LPAREN expr1 COMMA expr1_list RPAREN { $2 :: $4 }
+defn:
+ | expr2 COLON_EQ expr1 { 2 @$ C.Defn (patt $1, term $3) }
+ | expr2 COLON_EQ REC expr1 { 2 @$ C.RecDefn (patt $1, term $4) }
+defn_list:
+ | defn COMMA defn_list { $1 :: $3 }
+ | defn { [$1] }
case:
- | expr2 DOUBLE_ARROW expr1 { 2 @$ Case (patt $1, $3) }
+ | expr2 DOUBLE_ARROW expr1 { 2 @$ C.Case (patt $1, term $3) }
case_list:
| case COMMA case_list { $1 :: $3 }
| case { [$1] }
| { [] }
-ctor:
- | AT IDENT expr2_list { 1 @$ Ctor ($2, List.map annot $3) }
-ctor_list:
- | ctor SEMICOLON ctor_list { $1 :: $3 }
- | ctor { [$1] }
+param:
+ | expr2 { 1 @$ C.Param (Explicit, patt $1) }
+ | QUESTION expr2 { 1 @$ C.Param (Implicit, patt $2) }
+param_list:
+ | param COMMA param_list { $1 :: $3 }
+ | param { [$1] }
| { [] }
-annot_field:
- | IDENT COLON expr2 {
- let ann = 2 @$ APatt (1 @$ PBind $1, $3) in
- 2 @$ Field.Of ($1, ann)
- }
- | IDENT EQ expr3 COLON expr2 {
- let ann = 4 @$ APatt (2 @$ PAnd (1 @$ PBind $1, patt $3), $5) in
- 4 @$ Field.Of ($1, ann)
- }
-annot_field_list:
- | annot_field COMMA annot_field_list { $1 :: $3 }
- | annot_field { [$1] }
+domain:
+ | expr3 { 1 @$ C.DExplicit (term $1) }
+ | QUESTION expr3 { 1 @$ C.DImplicitType (patt $2) }
+ | expr3 COLON expr2 { 2 @$ C.DAnnot (Explicit, patt $1, term $3) }
+ | QUESTION expr3 COLON expr2 { 3 @$ C.DAnnot (Implicit, patt $2, term $4) }
+domain_list:
+ | domain COMMA domain_list { $1 :: $3 }
+ | domain { [$1] }
| { [] }
-term_field:
- | IDENT COLON_EQ expr1 { 2 @$ Field.Of ($1, $3) }
-term_field_list:
- | term_field COMMA term_field_list { $1 :: $3 }
- | term_field { [$1] }
+ctor:
+ | TAG domain_list { 1 @$ C.Ctor (Tag $1, $2) }
+ctor_list:
+ | ctor SEMICOLON ctor_list { $1 :: $3 }
+ | ctor { [$1] }
| { [] }
%%