dtlc

dependently-typed lambda calculus toy
git clone git://git.rr3.xyz/dtlc
Log | Files | Refs | README | LICENSE

commit e2dbc607501d98772fa74792d822547c74d8f619
parent 9709acfc587cdcb130e970f6728d396c0f85a0bf
Author: Robert Russell <robertrussell.72001@gmail.com>
Date:   Wed,  3 Jan 2024 17:32:36 -0800

Update parser

Diffstat:
MMakefile | 1-
Mcommon.ml | 24++++++++++++++++++++----
Mconcrete.ml | 88+++++++++++++++++++++----------------------------------------------------------
Dfield.ml | 7-------
Mlexer.mll | 43++++++++++++-------------------------------
Mparser.mly | 222++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------------
6 files changed, 197 insertions(+), 188 deletions(-)

diff --git a/Makefile b/Makefile @@ -2,7 +2,6 @@ SRC = \ loc.ml \ node.ml \ common.ml \ - field.ml \ concrete.ml \ parser.ml \ lexer.ml \ diff --git a/common.ml b/common.ml @@ -1,8 +1,24 @@ -type ident = string -type index = int -type level = int +type name = Name of string -module IdentMap = Map.Make(String) +module NameMap = Map.Make(struct + type t = name + let compare (Name x) (Name y) = String.compare x y +end) + +type tag = Tag of string + +module TagMap = Map.Make(struct + type t = tag + let compare (Tag x) (Tag y) = String.compare x y +end) + +type index = Index of int + +type plicity = +| Implicit +| Explicit + +let impossible () = failwith "impossible" (* Short-circuiting AND fold_left. *) let rec all2 f xs ys = match xs, ys with diff --git a/concrete.ml b/concrete.ml @@ -1,10 +1,8 @@ open Common -exception InvalidPattern - type term = term' Node.t and term' = -| Let of patt * term * term +| Let of defn list * term | Seq of term * term | Match of term * case list | If of term * term * term @@ -12,87 +10,47 @@ and term' = | Annot of term * term | Add of term * term | Mul of term * term -| Or of term * term -| And of term * term | App of term * term list -| Fn of patt list * term -| FnType of annot list * term -| Variant of ident * term list +| Fn of param list * term +| FnType of domain list * term +| Variant of tag * term list | VariantType of ctor list | Tuple of term list -| TupleType of annot list -| Record of term Field.t list -| RecordType of annot Field.t list +| TupleType of domain list | Unit | Nat of Z.t -| Underscore -| IdentIndex of ident * int -| Ident of ident +| Var of name * int option | Paren of term -(* -| Access of term * ident -| Index of term * term -| Deref of term -*) + +and defn = defn' Node.t +and defn' = +| Defn of patt * term +| RecDefn of patt * term and case = case' Node.t and case' = Case of patt * term +and param = param' Node.t +and param' = Param of plicity * patt + +and domain = domain' Node.t +and domain' = +| DExplicit of term +| DImplicitType of patt +| DAnnot of plicity * patt * term + and ctor = ctor' Node.t -and ctor' = Ctor of ident * annot list +and ctor' = Ctor of tag * domain list and patt = patt' Node.t and patt' = | PWild -| PBind of ident +| PBind of name | PAnnot of patt * term -| PVariant of ident * patt list +| PVariant of tag * patt list | PTuple of patt list -| PRecord of patt Field.t list | PUnit | PNat of Z.t | PAnd of patt * patt | POr of patt * patt | PParen of patt -(* -| PAssign of term -*) - -and annot = annot' Node.t -and annot' = -| AType of term -| APatt of patt * term -| AParen of annot - -let rec patt p = Node.map patt' p -and patt' = function -| Let _ -> raise InvalidPattern -| Seq _ -> raise InvalidPattern -| Match _ -> raise InvalidPattern -| If _ -> raise InvalidPattern -| Iff _ -> raise InvalidPattern -| Annot (p, t) -> PAnnot (patt p, t) -| Add _ -> raise InvalidPattern (* TODO: n + k patterns? *) -| Mul _ -> raise InvalidPattern -| Or (p, q) -> POr (patt p, patt q) -| And (p, q) -> PAnd (patt p, patt q) -| App _ -> raise InvalidPattern -| Fn _ -> raise InvalidPattern -| FnType _ -> raise InvalidPattern -| Variant (x, ts) -> PVariant (x, List.map patt ts) -| VariantType _ -> raise InvalidPattern -| Tuple ts -> PTuple (List.map patt ts) -| TupleType _ -> raise InvalidPattern -| Record tfs -> PRecord (List.map (Field.map patt) tfs) -| RecordType _ -> raise InvalidPattern -| Unit -> PUnit -| Nat n -> PNat n -| Underscore -> PWild -| IdentIndex _ -> raise InvalidPattern -| Ident x -> PBind x -| Paren p -> PParen (patt p) - -let rec annot = function -| Node.Of (loc, Annot (p, t)) -> loc @$ APatt (patt p, t) -| Node.Of (loc, Paren a) -> loc @$ AParen (annot a) -| Node.Of (loc, _) as n -> loc @$ AType n diff --git a/field.ml b/field.ml @@ -1,7 +0,0 @@ -open Common - -type 'a t = 'a t' Node.t -and 'a t' = Of of ident * 'a - -let map' f (Of (id, x)) = Of (id, f x) -let map f = Node.map (map' f) diff --git a/lexer.mll b/lexer.mll @@ -27,7 +27,8 @@ let greek = ugreek | lgreek | lgreekalt let ident_start = latin | greek | '_' let ident_cont = latin | greek | '_' | dnumeral | '\'' | '"' -let ident = ident_start ident_cont* +let name = ident_start ident_cont* +let tag = ident_cont+ (* This format is accepted by Z.of_string. *) let dnumber = dnumeral (dnumeral | '_')* @@ -48,10 +49,8 @@ rule token = parse | ";" { SEMICOLON } | ":" { COLON } | "," { COMMA } -| "." { DOT } -| "!" { EXCLAM } -| "@" { AT } | ":=" { COLON_EQ } +| "?" { QUESTION } | "->" { SINGLE_ARROW } | "=>" { DOUBLE_ARROW } @@ -59,7 +58,6 @@ rule token = parse | "(" { LPAREN } | ")" { RPAREN } -| "#[" { HASH_LBRACK } | "[" { LBRACK } | "]" { RBRACK } @@ -67,14 +65,14 @@ rule token = parse | "{" { LBRACE } | "}" { RBRACE } -| "=" { EQ } - | "+" { PLUS } | "*" { ASTERISK } | "|" { PIPE } | "&" { AMPERSAND } +| "rec" { REC } + | "match" { MATCH } | "if" { IF } @@ -86,16 +84,19 @@ rule token = parse | "fn" { FN } +| "@" (tag as t) { TAG t } + | "_" { UNDERSCORE } -| (ident as x) "#" (number as i) { + +| (name as x) "#" (number as i) { let z = Z.of_string i in (* TODO: Standardize this limit. *) if Z.fits_int z then - IDENT_INDEX (x, Z.to_int z) + INDEXED_NAME (x, Z.to_int z) else - error lexbuf "identifier index too big" + error lexbuf "name index too big" } -| ident as x { IDENT x } +| name as x { NAME x } | number as n { NAT (Z.of_string n) } | number invalid_number_terminator @@ -103,25 +104,5 @@ rule token = parse | _ { error lexbuf "invalid byte" } -(* -| "<" { LT } -| "<=" { LE } -| "=" { EQ } -| ">=" { GE } -| ">" { GT } -| "<>" { NE } -| "-" { MINUS } -| "/" { SLASH } -| "%" { PERCENT } -| "^" { CIRCUM } -| "~" { TILDE } -| "&" { AMP } -| "|" { PIPE } -| "not" { NOT } -| "and" { AND } -| "or" { OR } -| "imp" { IMP } -*) - { } diff --git a/parser.mly b/parser.mly @@ -1,68 +1,141 @@ %{ open Common -open Concrete - -let locate i x = Loc.of_position (Parsing.rhs_start_pos i) @$ x -let (@$) = locate +module C = Concrete + +(* To avoid reduce-reduce conflicts, we need to distinguish between raw + * expressions (which are generally of unknown syntactic category) and concrete + * terms/patternss/etc. In a hand-written parser, we'd have more control over + * when to decide the syntactic category of the top of the token stack, so + * we wouldn't need to make this distinction. *) +type expr = expr' Node.t +and expr' = +| Let of C.defn list * expr +| Seq of expr * expr +| Match of expr * C.case list +| If of expr * expr * expr +| Iff of expr * expr +| Annot of expr * expr +| Add of expr * expr +| Mul of expr * expr +| Or of expr * expr +| And of expr * expr +| App of expr * expr list +| Fn of C.param list * expr +| FnType of C.domain list * expr +| Variant of tag * expr list +| VariantType of C.ctor list +| Tuple of expr list +| TupleType of C.domain list +| Unit +| Nat of Z.t +| Underscore +| IndexedVar of name * int +| Var of name +| Paren of expr + +exception Error of Loc.t * string +let error loc msg = raise (Error (loc, msg)) + +let rec term (Node.Of (loc, e)) = loc @$ match e with +| Let (defns, body) -> C.Let (defns, term body) +| Seq (a, b) -> C.Seq (term a, term b) +| Match (scrutinee, cases) -> C.Match (term scrutinee, cases) +| If (c, t, f) -> C.If (term c, term t, term f) +| Iff (c, t) -> C.Iff (term c, term t) +| Annot (a, b) -> C.Iff (term a, term b) +| Add (a, b) -> C.Add (term a, term b) +| Mul (a, b) -> C.Mul (term a, term b) +| Or _ -> error loc "'|' can not appear in terms (yet)" +| And _ -> error loc "'&' can not appear in terms (yet)" +| App (f, args) -> C.App (term f, List.map term args) +| Fn (params, body) -> C.Fn (params, term body) +| FnType (doms, cod) -> C.FnType (doms, term cod) +| Variant (tag, fields) -> C.Variant (tag, List.map term fields) +| VariantType ctors -> C.VariantType ctors +| Tuple fields -> C.Tuple (List.map term fields) +| TupleType doms -> C.TupleType doms +| Unit -> C.Unit +| Nat n -> C.Nat n +| Underscore -> error loc "'_' can not appear in terms (yet)" +| IndexedVar (x, i) -> C.Var (x, Some i) +| Var x -> C.Var (x, None) +| Paren e -> C.Paren (term e) + +let rec patt (Node.Of (loc, e)) = loc @$ match e with +| Annot (p, t) -> C.PAnnot (patt p, term t) +| Or (p, q) -> C.POr (patt p, patt q) +| And (p, q) -> C.PAnd (patt p, patt q) +| Variant (tag, ps) -> C.PVariant (tag, List.map patt ps) +| Tuple ps -> C.PTuple (List.map patt ps) +| Unit -> C.PUnit +| Nat n -> C.PNat n +| Underscore -> C.PWild +| Var x -> C.PBind x +| Paren p -> C.PParen (patt p) +| Let _ | Seq _ | Match _ | If _ | Iff _ | Add _ | Mul _ +| App _ | Fn _ | FnType _ | VariantType _ | TupleType _ -> + error loc "invalid pattern" +| IndexedVar _ -> + error loc "only unindexed variables can appear in patterns" + +let loc_of i = Loc.of_position (Parsing.rhs_start_pos i) +let (@$) i x = loc_of i @$ x %} %token EOF -%token SEMICOLON COLON COMMA DOT EXCLAM AT COLON_EQ +%token SEMICOLON COLON COMMA COLON_EQ QUESTION %token SINGLE_ARROW DOUBLE_ARROW %token HASH_LPAREN LPAREN RPAREN -%token HASH_LBRACK LBRACK RBRACK +%token LBRACK RBRACK %token HASH_LBRACE LBRACE RBRACE -%token EQ %token PLUS ASTERISK %token PIPE AMPERSAND +%token REC %token MATCH %token IF THEN ELSE %token IFF DO %token FN %token UNDERSCORE -%token <Common.ident * int> IDENT_INDEX -%token <Common.ident> IDENT +%token <string> TAG +%token <string * int> INDEXED_NAME +%token <string> NAME %token <Z.t> NAT %start start %type <Concrete.term> start -%nonassoc PREC_TUPLE -%nonassoc SINGLE_ARROW - %% -(* Syntax idea for mutual recursion: - * Anonymous functions (no recursion support): - * fn x -> ... - * Named functions ((mutual) recursion support)): - * f := func x -> ..., - * h := func y -> ..., - * g := func z -> ...; - * (Think of ":= func" as being one symbol). - * We can make a similar distinction for types with "ty" vs "type". *) +start: expr0 EOF { term $1 } -start: expr0 EOF { $1 } - -expr0: (* Sequencing *) - | expr2 COLON_EQ expr1 SEMICOLON expr0 { 2 @$ Let (patt $1, $3, $5) } +expr0: (* Definitions and sequencing *) + | defn_list SEMICOLON expr0 { 2 @$ Let ($1, $3) } | expr1 SEMICOLON expr0 { 2 @$ Seq ($1, $3) } | expr1 { $1 } expr1: (* Control flow *) + (* TODO: Probably we should change the match syntax when staging is added, + * so that we can write something like `$match x ...` instead of + * `$(x match ...)` for conditional compilation. *) | expr2 MATCH LBRACE case_list RBRACE { 2 @$ Match($1, $4) } - (* if let: "if p := x then t else f"; and similarly for iff *) + (* TODO: if let: "if p := x then t else f"; and similarly for iff *) | IF expr1 THEN expr1 ELSE expr1 { 1 @$ If ($2, $4, $6) } | IFF expr1 DO expr1 { 1 @$ Iff ($2, $4) } - | FN expr2_list DOUBLE_ARROW expr1 { 1 @$ Fn (List.map patt $2, $4) } + | FN param_list DOUBLE_ARROW expr1 { 1 @$ Fn ($2, $4) } | expr2 { $1 } expr2: (* Type annotations (right-associative) *) | expr3 COLON expr2 { 2 @$ Annot ($1, $3) } | expr3 { $1 } expr3: (* Function arrow (right-associative) *) - | expr6 SINGLE_ARROW expr3 { 2 @$ FnType ([annot $1], $3) } - | paren_expr_list SINGLE_ARROW expr3 { 2 @$ FnType (List.map annot $1, $3) } + | expr6 SINGLE_ARROW expr3 { 2 @$ FnType ([1 @$ C.DExplicit (term $1)], $3) } + | LBRACK domain_list RBRACK SINGLE_ARROW expr3 { + if List.length $2 = 0 then + error (loc_of 2) "empty domain" + else + 4 @$ FnType ($2, $5) + } | expr4 { $1 } -expr4: (* Additive binary operators (left-associative) *) +expr4: (* Additive binary operators (left-associative; + * pairwise incomparable precedence) *) | expr4add PLUS expr5 { 2 @$ Add ($1, $3) } | expr4or PIPE expr5 { 2 @$ Or ($1, $3) } | expr5 { $1 } @@ -72,7 +145,8 @@ expr4add: expr4or: | expr4or PIPE expr5 { 2 @$ Or ($1, $3) } | expr5 { $1 } -expr5: (* Multiplicative binary operators (left-associative) *) +expr5: (* Multiplicative binary operators (left-associative; + * pairwise incomparable precedence) *) | expr5mul ASTERISK expr6 { 2 @$ Mul ($1, $3) } | expr5and AMPERSAND expr6 { 2 @$ And ($1, $3) } | expr6 { $1 } @@ -83,78 +157,66 @@ expr5and: | expr5and AMPERSAND expr6 { 2 @$ And ($1, $3) } | expr6 { $1 } expr6: - | expr7 expr7 expr7_apposition { 1 @$ App ($1, $2 :: $3) } - | AT IDENT expr7_apposition { 1 @$ Variant ($2, $3) } + | expr7 expr7 expr7_apposition_list { 1 @$ App ($1, $2 :: $3) } + | TAG expr7_apposition_list { 1 @$ Variant (Tag $1, $2) } | expr7 { $1 } expr7: - (* - | expr7 DOT IDENT { 2 @$ Access ($1, $3) } - | expr7 DOT LPAREN expr0 RPAREN { 2 @$ Index ($1, $4) } - | expr7 EXCLAM { 2 @$ Deref $1 } - *) - | expr8 { $1 } -expr8: | UNDERSCORE { 1 @$ Underscore } - | IDENT_INDEX { 1 @$ let (x, i) = $1 in IdentIndex (x, i) } - | IDENT { 1 @$ Ident $1 } + | INDEXED_NAME { 1 @$ let (x, i) = $1 in IndexedVar (Name x, i) } + | NAME { 1 @$ Var (Name $1) } | NAT { 1 @$ Nat $1 } - | HASH_LBRACK ctor_list RBRACK { 1 @$ VariantType $2 } - | HASH_LPAREN expr2_list RPAREN { 1 @$ TupleType (List.map annot $2) } - | HASH_LBRACE annot_field_list RBRACE { 1 @$ RecordType $2 } - | paren_expr_list %prec PREC_TUPLE { 1 @$ Tuple $1 } - | LBRACE term_field_list RBRACE { 1 @$ Record $2 } + | HASH_LBRACE ctor_list RBRACE { 1 @$ VariantType $2 } + | HASH_LPAREN domain_list RPAREN { 1 @$ TupleType $2 } + | LPAREN expr1 COMMA expr1_comma_list RPAREN { 1 @$ Tuple ($2 :: $4) } | LPAREN RPAREN { 1 @$ Unit } | LPAREN expr0 RPAREN { 1 @$ Paren $2 } -expr1_list: - | expr1 COMMA expr1_list { $1 :: $3 } +expr1_comma_list: + | expr1 COMMA expr1_comma_list { $1 :: $3 } | expr1 { [$1] } | { [] } -expr2_list: - | expr2 COMMA expr2_list { $1 :: $3 } - | expr2 { [$1] } +expr7_apposition_list: + | expr7 expr7_apposition_list { $1 :: $2 } | { [] } -expr7_apposition: - | expr7 expr7_apposition { $1 :: $2 } - | { [] } - -paren_expr_list: LPAREN expr1 COMMA expr1_list RPAREN { $2 :: $4 } +defn: + | expr2 COLON_EQ expr1 { 2 @$ C.Defn (patt $1, term $3) } + | expr2 COLON_EQ REC expr1 { 2 @$ C.RecDefn (patt $1, term $4) } +defn_list: + | defn COMMA defn_list { $1 :: $3 } + | defn { [$1] } case: - | expr2 DOUBLE_ARROW expr1 { 2 @$ Case (patt $1, $3) } + | expr2 DOUBLE_ARROW expr1 { 2 @$ C.Case (patt $1, term $3) } case_list: | case COMMA case_list { $1 :: $3 } | case { [$1] } | { [] } -ctor: - | AT IDENT expr2_list { 1 @$ Ctor ($2, List.map annot $3) } -ctor_list: - | ctor SEMICOLON ctor_list { $1 :: $3 } - | ctor { [$1] } +param: + | expr2 { 1 @$ C.Param (Explicit, patt $1) } + | QUESTION expr2 { 1 @$ C.Param (Implicit, patt $2) } +param_list: + | param COMMA param_list { $1 :: $3 } + | param { [$1] } | { [] } -annot_field: - | IDENT COLON expr2 { - let ann = 2 @$ APatt (1 @$ PBind $1, $3) in - 2 @$ Field.Of ($1, ann) - } - | IDENT EQ expr3 COLON expr2 { - let ann = 4 @$ APatt (2 @$ PAnd (1 @$ PBind $1, patt $3), $5) in - 4 @$ Field.Of ($1, ann) - } -annot_field_list: - | annot_field COMMA annot_field_list { $1 :: $3 } - | annot_field { [$1] } +domain: + | expr3 { 1 @$ C.DExplicit (term $1) } + | QUESTION expr3 { 1 @$ C.DImplicitType (patt $2) } + | expr3 COLON expr2 { 2 @$ C.DAnnot (Explicit, patt $1, term $3) } + | QUESTION expr3 COLON expr2 { 3 @$ C.DAnnot (Implicit, patt $2, term $4) } +domain_list: + | domain COMMA domain_list { $1 :: $3 } + | domain { [$1] } | { [] } -term_field: - | IDENT COLON_EQ expr1 { 2 @$ Field.Of ($1, $3) } -term_field_list: - | term_field COMMA term_field_list { $1 :: $3 } - | term_field { [$1] } +ctor: + | TAG domain_list { 1 @$ C.Ctor (Tag $1, $2) } +ctor_list: + | ctor SEMICOLON ctor_list { $1 :: $3 } + | ctor { [$1] } | { [] } %%