Update parser - dtlc - dependently-typed lambda calculus toy

commit e2dbc607501d98772fa74792d822547c74d8f619
parent 9709acfc587cdcb130e970f6728d396c0f85a0bf
Author: Robert Russell <robertrussell.72001@gmail.com>
Date:   Wed,  3 Jan 2024 17:32:36 -0800

Update parser

Diffstat:
M Makefile  | 1 -
M common.ml  | 24 ++++++++++++++++++++----
M concrete.ml  | 88 +++++++++++++++++++++----------------------------------------------------------
D field.ml  | 7 -------
M lexer.mll  | 43 ++++++++++++-------------------------------
M parser.mly  | 222 ++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------------

6 files changed, 197 insertions(+), 188 deletions(-)
diff --git a/Makefile b/Makefile
@@ -2,7 +2,6 @@ SRC = \
 	loc.ml \
 	node.ml \
 	common.ml \
-	field.ml \
 	concrete.ml \
 	parser.ml \
 	lexer.ml \
diff --git a/common.ml b/common.ml
@@ -1,8 +1,24 @@
-type ident = string
-type index = int
-type level = int
+type name = Name of string
 
-module IdentMap = Map.Make(String)
+module NameMap = Map.Make(struct
+	type t = name
+	let compare (Name x) (Name y) = String.compare x y
+end)
+
+type tag = Tag of string
+
+module TagMap = Map.Make(struct
+	type t = tag
+	let compare (Tag x) (Tag y) = String.compare x y
+end)
+
+type index = Index of int
+
+type plicity =
+| Implicit
+| Explicit
+
+let impossible () = failwith "impossible"
 
 (* Short-circuiting AND fold_left. *)
 let rec all2 f xs ys = match xs, ys with
diff --git a/concrete.ml b/concrete.ml
@@ -1,10 +1,8 @@
 open Common
 
-exception InvalidPattern
-
 type term = term' Node.t
 and term' =
-| Let of patt * term * term
+| Let of defn list * term
 | Seq of term * term
 | Match of term * case list
 | If of term * term * term
@@ -12,87 +10,47 @@ and term' =
 | Annot of term * term
 | Add of term * term
 | Mul of term * term
-| Or of term * term
-| And of term * term
 | App of term * term list
-| Fn of patt list * term
-| FnType of annot list * term
-| Variant of ident * term list
+| Fn of param list * term
+| FnType of domain list * term
+| Variant of tag * term list
 | VariantType of ctor list
 | Tuple of term list
-| TupleType of annot list
-| Record of term Field.t list
-| RecordType of annot Field.t list
+| TupleType of domain list
 | Unit
 | Nat of Z.t
-| Underscore
-| IdentIndex of ident * int
-| Ident of ident
+| Var of name * int option
 | Paren of term
-(*
-| Access of term * ident
-| Index of term * term
-| Deref of term
-*)
+
+and defn = defn' Node.t
+and defn' =
+| Defn of patt * term
+| RecDefn of patt * term
 
 and case = case' Node.t
 and case' = Case of patt * term
 
+and param = param' Node.t
+and param' = Param of plicity * patt
+
+and domain = domain' Node.t
+and domain' =
+| DExplicit of term
+| DImplicitType of patt
+| DAnnot of plicity * patt * term
+
 and ctor = ctor' Node.t
-and ctor' = Ctor of ident * annot list
+and ctor' = Ctor of tag * domain list
 
 and patt = patt' Node.t
 and patt' =
 | PWild
-| PBind of ident
+| PBind of name
 | PAnnot of patt * term
-| PVariant of ident * patt list
+| PVariant of tag * patt list
 | PTuple of patt list
-| PRecord of patt Field.t list
 | PUnit
 | PNat of Z.t
 | PAnd of patt * patt
 | POr of patt * patt
 | PParen of patt
-(*
-| PAssign of term
-*)
-
-and annot = annot' Node.t
-and annot' =
-| AType of term
-| APatt of patt * term
-| AParen of annot
-
-let rec patt p = Node.map patt' p
-and patt' = function
-| Let _ -> raise InvalidPattern
-| Seq _ -> raise InvalidPattern
-| Match _ -> raise InvalidPattern
-| If _ -> raise InvalidPattern
-| Iff _ -> raise InvalidPattern
-| Annot (p, t) -> PAnnot (patt p, t)
-| Add _ -> raise InvalidPattern (* TODO: n + k patterns? *)
-| Mul _ -> raise InvalidPattern
-| Or (p, q) -> POr (patt p, patt q)
-| And (p, q) -> PAnd (patt p, patt q)
-| App _ -> raise InvalidPattern
-| Fn _ -> raise InvalidPattern
-| FnType _ -> raise InvalidPattern
-| Variant (x, ts) -> PVariant (x, List.map patt ts)
-| VariantType _ -> raise InvalidPattern
-| Tuple ts -> PTuple (List.map patt ts)
-| TupleType _ -> raise InvalidPattern
-| Record tfs -> PRecord (List.map (Field.map patt) tfs)
-| RecordType _ -> raise InvalidPattern
-| Unit -> PUnit
-| Nat n -> PNat n
-| Underscore -> PWild
-| IdentIndex _ -> raise InvalidPattern
-| Ident x -> PBind x
-| Paren p -> PParen (patt p)
-
-let rec annot = function
-| Node.Of (loc, Annot (p, t)) -> loc @$ APatt (patt p, t)
-| Node.Of (loc, Paren a) -> loc @$ AParen (annot a)
-| Node.Of (loc, _) as n -> loc @$ AType n
diff --git a/field.ml b/field.ml
@@ -1,7 +0,0 @@
-open Common
-
-type 'a t = 'a t' Node.t
-and 'a t' = Of of ident * 'a
-
-let map' f (Of (id, x)) = Of (id, f x)
-let map f = Node.map (map' f)
diff --git a/lexer.mll b/lexer.mll
@@ -27,7 +27,8 @@ let greek = ugreek | lgreek | lgreekalt
 
 let ident_start = latin | greek | '_'
 let ident_cont  = latin | greek | '_' | dnumeral | '\'' | '"'
-let ident = ident_start ident_cont*
+let name = ident_start ident_cont*
+let tag = ident_cont+
 
 (* This format is accepted by Z.of_string. *)
 let dnumber = dnumeral (dnumeral | '_')*
@@ -48,10 +49,8 @@ rule token = parse
 | ";"  { SEMICOLON }
 | ":"  { COLON }
 | ","  { COMMA }
-| "."  { DOT }
-| "!"  { EXCLAM }
-| "@"  { AT }
 | ":=" { COLON_EQ }
+| "?"  { QUESTION }
 | "->" { SINGLE_ARROW }
 | "=>" { DOUBLE_ARROW }
 
@@ -59,7 +58,6 @@ rule token = parse
 | "("  { LPAREN }
 | ")"  { RPAREN }
 
-| "#[" { HASH_LBRACK }
 | "["  { LBRACK }
 | "]"  { RBRACK }
 
@@ -67,14 +65,14 @@ rule token = parse
 | "{"  { LBRACE }
 | "}"  { RBRACE }
 
-| "=" { EQ }
-
 | "+" { PLUS }
 | "*" { ASTERISK }
 
 | "|" { PIPE }
 | "&" { AMPERSAND }
 
+| "rec" { REC }
+
 | "match" { MATCH }
 
 | "if"   { IF }
@@ -86,16 +84,19 @@ rule token = parse
 
 | "fn" { FN }
 
+| "@" (tag as t) { TAG t }
+
 | "_" { UNDERSCORE }
-| (ident as x) "#" (number as i) {
+
+| (name as x) "#" (number as i) {
 	let z = Z.of_string i in
 	(* TODO: Standardize this limit. *)
 	if Z.fits_int z then
-		IDENT_INDEX (x, Z.to_int z)
+		INDEXED_NAME (x, Z.to_int z)
 	else
-		error lexbuf "identifier index too big"
+		error lexbuf "name index too big"
 }
-| ident as x { IDENT x }
+| name as x { NAME x }
 
 | number as n { NAT (Z.of_string n) }
 | number invalid_number_terminator
@@ -103,25 +104,5 @@ rule token = parse
 
 | _ { error lexbuf "invalid byte" }
 
-(*
-| "<"  { LT }
-| "<=" { LE }
-| "="  { EQ }
-| ">=" { GE }
-| ">"  { GT }
-| "<>" { NE }
-| "-" { MINUS }
-| "/" { SLASH }
-| "%" { PERCENT }
-| "^" { CIRCUM }
-| "~" { TILDE }
-| "&" { AMP }
-| "|" { PIPE }
-| "not" { NOT }
-| "and" { AND }
-| "or"  { OR }
-| "imp" { IMP }
-*)
-
 {
 }
diff --git a/parser.mly b/parser.mly
@@ -1,68 +1,141 @@
 %{
 open Common
-open Concrete
-
-let locate i x = Loc.of_position (Parsing.rhs_start_pos i) @$ x
-let (@$) = locate
+module C = Concrete
+
+(* To avoid reduce-reduce conflicts, we need to distinguish between raw
+ * expressions (which are generally of unknown syntactic category) and concrete
+ * terms/patternss/etc. In a hand-written parser, we'd have more control over
+ * when to decide the syntactic category of the top of the token stack, so
+ * we wouldn't need to make this distinction. *)
+type expr = expr' Node.t
+and expr' =
+| Let of C.defn list * expr
+| Seq of expr * expr
+| Match of expr * C.case list
+| If of expr * expr * expr
+| Iff of expr * expr
+| Annot of expr * expr
+| Add of expr * expr
+| Mul of expr * expr
+| Or of expr * expr
+| And of expr * expr
+| App of expr * expr list
+| Fn of C.param list * expr
+| FnType of C.domain list * expr
+| Variant of tag * expr list
+| VariantType of C.ctor list
+| Tuple of expr list
+| TupleType of C.domain list
+| Unit
+| Nat of Z.t
+| Underscore
+| IndexedVar of name * int
+| Var of name
+| Paren of expr
+
+exception Error of Loc.t * string
+let error loc msg = raise (Error (loc, msg))
+
+let rec term (Node.Of (loc, e)) = loc @$ match e with
+| Let (defns, body) -> C.Let (defns, term body)
+| Seq (a, b) -> C.Seq (term a, term b)
+| Match (scrutinee, cases) -> C.Match (term scrutinee, cases)
+| If (c, t, f) -> C.If (term c, term t, term f)
+| Iff (c, t) -> C.Iff (term c, term t)
+| Annot (a, b) -> C.Iff (term a, term b)
+| Add (a, b) -> C.Add (term a, term b)
+| Mul (a, b) -> C.Mul (term a, term b)
+| Or _ -> error loc "'|' can not appear in terms (yet)"
+| And _ -> error loc "'&' can not appear in terms (yet)"
+| App (f, args) -> C.App (term f, List.map term args)
+| Fn (params, body) -> C.Fn (params, term body)
+| FnType (doms, cod) -> C.FnType (doms, term cod)
+| Variant (tag, fields) -> C.Variant (tag, List.map term fields)
+| VariantType ctors -> C.VariantType ctors
+| Tuple fields -> C.Tuple (List.map term fields)
+| TupleType doms -> C.TupleType doms
+| Unit -> C.Unit
+| Nat n -> C.Nat n
+| Underscore -> error loc "'_' can not appear in terms (yet)"
+| IndexedVar (x, i) -> C.Var (x, Some i)
+| Var x -> C.Var (x, None)
+| Paren e -> C.Paren (term e)
+
+let rec patt (Node.Of (loc, e)) = loc @$ match e with
+| Annot (p, t) -> C.PAnnot (patt p, term t)
+| Or (p, q) -> C.POr (patt p, patt q)
+| And (p, q) -> C.PAnd (patt p, patt q)
+| Variant (tag, ps) -> C.PVariant (tag, List.map patt ps)
+| Tuple ps -> C.PTuple (List.map patt ps)
+| Unit -> C.PUnit
+| Nat n -> C.PNat n
+| Underscore -> C.PWild
+| Var x -> C.PBind x
+| Paren p -> C.PParen (patt p)
+| Let _ | Seq _ | Match _ | If _ | Iff _ | Add _ | Mul _
+| App _ | Fn _ | FnType _ | VariantType _ | TupleType _ ->
+	error loc "invalid pattern"
+| IndexedVar _ ->
+	error loc "only unindexed variables can appear in patterns"
+
+let loc_of i = Loc.of_position (Parsing.rhs_start_pos i)
+let (@$) i x = loc_of i @$ x
 %}
 
 %token EOF
-%token SEMICOLON COLON COMMA DOT EXCLAM AT COLON_EQ 
+%token SEMICOLON COLON COMMA COLON_EQ QUESTION
 %token SINGLE_ARROW DOUBLE_ARROW
 %token HASH_LPAREN LPAREN RPAREN
-%token HASH_LBRACK LBRACK RBRACK
+%token LBRACK RBRACK
 %token HASH_LBRACE LBRACE RBRACE
-%token EQ
 %token PLUS ASTERISK
 %token PIPE AMPERSAND
+%token REC
 %token MATCH
 %token IF THEN ELSE
 %token IFF DO
 %token FN
 %token UNDERSCORE
-%token <Common.ident * int> IDENT_INDEX
-%token <Common.ident> IDENT
+%token <string> TAG
+%token <string * int> INDEXED_NAME
+%token <string> NAME
 %token <Z.t> NAT
 
 %start start
 %type <Concrete.term> start
 
-%nonassoc PREC_TUPLE
-%nonassoc SINGLE_ARROW
-
 %%
 
-(* Syntax idea for mutual recursion:
- *     Anonymous functions (no recursion support):
- *         fn x -> ...
- *     Named functions ((mutual) recursion support)):
- *         f := func x -> ...,
- *         h := func y -> ...,
- *         g := func z -> ...;
- *     (Think of ":= func" as being one symbol).
- * We can make a similar distinction for types with "ty" vs "type". *)
+start: expr0 EOF { term $1 }
 
-start: expr0 EOF { $1 }
-
-expr0: (* Sequencing *)
-	| expr2 COLON_EQ expr1 SEMICOLON expr0 { 2 @$ Let (patt $1, $3, $5) }
+expr0: (* Definitions and sequencing *)
+	| defn_list SEMICOLON expr0 { 2 @$ Let ($1, $3) }
 	| expr1 SEMICOLON expr0 { 2 @$ Seq ($1, $3) }
 	| expr1 { $1 }
 expr1: (* Control flow *)
+	(* TODO: Probably we should change the match syntax when staging is added,
+	 * so that we can write something like `$match x ...` instead of
+	 * `$(x match ...)` for conditional compilation. *)
 	| expr2 MATCH LBRACE case_list RBRACE { 2 @$ Match($1, $4) }
-	(* if let: "if p := x then t else f"; and similarly for iff *)
+	(* TODO: if let: "if p := x then t else f"; and similarly for iff *)
 	| IF expr1 THEN expr1 ELSE expr1 { 1 @$ If ($2, $4, $6) }
 	| IFF expr1 DO expr1 { 1 @$ Iff ($2, $4) }
-	| FN expr2_list DOUBLE_ARROW expr1 { 1 @$ Fn (List.map patt $2, $4) }
+	| FN param_list DOUBLE_ARROW expr1 { 1 @$ Fn ($2, $4) }
 	| expr2 { $1 }
 expr2: (* Type annotations (right-associative) *)
 	| expr3 COLON expr2 { 2 @$ Annot ($1, $3) }
 	| expr3 { $1 }
 expr3: (* Function arrow (right-associative) *)
-	| expr6 SINGLE_ARROW expr3 { 2 @$ FnType ([annot $1], $3) }
-	| paren_expr_list SINGLE_ARROW expr3 { 2 @$ FnType (List.map annot $1, $3) }
+	| expr6 SINGLE_ARROW expr3 { 2 @$ FnType ([1 @$ C.DExplicit (term $1)], $3) }
+	| LBRACK domain_list RBRACK SINGLE_ARROW expr3 {
+		if List.length $2 = 0 then
+			error (loc_of 2) "empty domain"
+		else
+			4 @$ FnType ($2, $5)
+	}
 	| expr4 { $1 }
-expr4: (* Additive binary operators (left-associative) *)
+expr4: (* Additive binary operators (left-associative;
+        * pairwise incomparable precedence) *)
 	| expr4add PLUS expr5 { 2 @$ Add ($1, $3) }
 	| expr4or PIPE expr5 { 2 @$ Or ($1, $3) }
 	| expr5 { $1 }
@@ -72,7 +145,8 @@ expr4add:
 expr4or:
 	| expr4or PIPE expr5 { 2 @$ Or ($1, $3) }
 	| expr5 { $1 }
-expr5: (* Multiplicative binary operators (left-associative) *)
+expr5: (* Multiplicative binary operators (left-associative;
+        * pairwise incomparable precedence) *)
 	| expr5mul ASTERISK expr6 { 2 @$ Mul ($1, $3) }
 	| expr5and AMPERSAND expr6 { 2 @$ And ($1, $3) }
 	| expr6 { $1 }
@@ -83,78 +157,66 @@ expr5and:
 	| expr5and AMPERSAND expr6 { 2 @$ And ($1, $3) }
 	| expr6 { $1 }
 expr6:
-	| expr7 expr7 expr7_apposition { 1 @$ App ($1, $2 :: $3) }
-	| AT IDENT expr7_apposition { 1 @$ Variant ($2, $3) }
+	| expr7 expr7 expr7_apposition_list { 1 @$ App ($1, $2 :: $3) }
+	| TAG expr7_apposition_list { 1 @$ Variant (Tag $1, $2) }
 	| expr7 { $1 }
 expr7:
-	(*
-	| expr7 DOT IDENT { 2 @$ Access ($1, $3) }
-	| expr7 DOT LPAREN expr0 RPAREN { 2 @$ Index ($1, $4) }
-	| expr7 EXCLAM { 2 @$ Deref $1 }
-	*)
-	| expr8 { $1 }
-expr8:
 	| UNDERSCORE { 1 @$ Underscore }
-	| IDENT_INDEX { 1 @$ let (x, i) = $1 in IdentIndex (x, i) }
-	| IDENT { 1 @$ Ident $1 }
+	| INDEXED_NAME { 1 @$ let (x, i) = $1 in IndexedVar (Name x, i) }
+	| NAME { 1 @$ Var (Name $1) }
 	| NAT { 1 @$ Nat $1 }
-	| HASH_LBRACK ctor_list RBRACK { 1 @$ VariantType $2 }
-	| HASH_LPAREN expr2_list RPAREN { 1 @$ TupleType (List.map annot $2) }
-	| HASH_LBRACE annot_field_list RBRACE { 1 @$ RecordType $2 }
-	| paren_expr_list %prec PREC_TUPLE { 1 @$ Tuple $1 }
-	| LBRACE term_field_list RBRACE { 1 @$ Record $2 }
+	| HASH_LBRACE ctor_list RBRACE { 1 @$ VariantType $2 }
+	| HASH_LPAREN domain_list RPAREN { 1 @$ TupleType $2 }
+	| LPAREN expr1 COMMA expr1_comma_list RPAREN { 1 @$ Tuple ($2 :: $4) }
 	| LPAREN RPAREN { 1 @$ Unit }
 	| LPAREN expr0 RPAREN { 1 @$ Paren $2 }
 
-expr1_list:
-	| expr1 COMMA expr1_list { $1 :: $3 }
+expr1_comma_list:
+	| expr1 COMMA expr1_comma_list { $1 :: $3 }
 	| expr1 { [$1] }
 	| { [] }
 
-expr2_list:
-	| expr2 COMMA expr2_list { $1 :: $3 }
-	| expr2 { [$1] }
+expr7_apposition_list:
+	| expr7 expr7_apposition_list { $1 :: $2 }
 	| { [] }
 
-expr7_apposition:
-	| expr7 expr7_apposition { $1 :: $2 }
-	| { [] }
-
-paren_expr_list: LPAREN expr1 COMMA expr1_list RPAREN { $2 :: $4 }
+defn:
+	| expr2 COLON_EQ expr1 { 2 @$ C.Defn (patt $1, term $3) }
+	| expr2 COLON_EQ REC expr1 { 2 @$ C.RecDefn (patt $1, term $4) }
+defn_list:
+	| defn COMMA defn_list { $1 :: $3 }
+	| defn { [$1] }
 
 case:
-	| expr2 DOUBLE_ARROW expr1 { 2 @$ Case (patt $1, $3) }
+	| expr2 DOUBLE_ARROW expr1 { 2 @$ C.Case (patt $1, term $3) }
 case_list:
 	| case COMMA case_list { $1 :: $3 }
 	| case { [$1] }
 	| { [] }
 
-ctor:
-	| AT IDENT expr2_list { 1 @$ Ctor ($2, List.map annot $3) }
-ctor_list:
-	| ctor SEMICOLON ctor_list { $1 :: $3 }
-	| ctor { [$1] }
+param:
+	| expr2 { 1 @$ C.Param (Explicit, patt $1) }
+	| QUESTION expr2 { 1 @$ C.Param (Implicit, patt $2) }
+param_list:
+	| param COMMA param_list { $1 :: $3 }
+	| param { [$1] }
 	| { [] }
 
-annot_field:
-	| IDENT COLON expr2 {
-		let ann = 2 @$ APatt (1 @$ PBind $1, $3) in
-		2 @$ Field.Of ($1, ann)
-	}
-	| IDENT EQ expr3 COLON expr2 {
-		let ann = 4 @$ APatt (2 @$ PAnd (1 @$ PBind $1, patt $3), $5) in
-		4 @$ Field.Of ($1, ann)
-	}
-annot_field_list:
-	| annot_field COMMA annot_field_list { $1 :: $3 }
-	| annot_field { [$1] }
+domain:
+	| expr3 { 1 @$ C.DExplicit (term $1) }
+	| QUESTION expr3 { 1 @$ C.DImplicitType (patt $2) }
+	| expr3 COLON expr2 { 2 @$ C.DAnnot (Explicit, patt $1, term $3) }
+	| QUESTION expr3 COLON expr2 { 3 @$ C.DAnnot (Implicit, patt $2, term $4) }
+domain_list:
+	| domain COMMA domain_list { $1 :: $3 }
+	| domain { [$1] }
 	| { [] }
 
-term_field:
-	| IDENT COLON_EQ expr1 { 2 @$ Field.Of ($1, $3) }
-term_field_list:
-	| term_field COMMA term_field_list { $1 :: $3 }
-	| term_field { [$1] }
+ctor:
+	| TAG domain_list { 1 @$ C.Ctor (Tag $1, $2) }
+ctor_list:
+	| ctor SEMICOLON ctor_list { $1 :: $3 }
+	| ctor { [$1] }
 	| { [] }
 
 %%

	dtlc dependently-typed lambda calculus toy
	git clone git://git.rr3.xyz/dtlc
	Log \| Files \| Refs \| README \| LICENSE

M	Makefile	\|	1	-
M	common.ml	\|	24	++++++++++++++++++++----
M	concrete.ml	\|	88	+++++++++++++++++++++----------------------------------------------------------
D	field.ml	\|	7	-------
M	lexer.mll	\|	43	++++++++++++-------------------------------
M	parser.mly	\|	222	++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------------