commit 098e731afa4bd512cf09ad7358204a0d26e6e118
parent 98a4af8e5798785f07f05471de0c031beb111085
Author: Robert Russell <robertrussell.72001@gmail.com>
Date: Wed, 20 Dec 2023 14:35:10 -0800
Add de Bruijn indices to surface language
Diffstat:
7 files changed, 43 insertions(+), 17 deletions(-)
diff --git a/abstract.ml b/abstract.ml
@@ -17,7 +17,7 @@ and term' =
| Data of ident * term Field.t list
| DataType of cons list
| Nat of Z.t
-| Ident of ident
+| Var of ident * int
(*
| Access of term * ident
| Index of term * term
diff --git a/cnctoabs.ml b/cnctoabs.ml
@@ -26,10 +26,14 @@ and term' = function
Loc.None @$ A.Case (Loc.None @$ A.PWild, Loc.None @$ A.Data ("", []));
])
| C.Annot (t, u) -> A.Annot (term t, term u)
-| C.Add (t, u) -> A.App (Loc.None @$ A.Ident "`+", [term t; term u])
-| C.Mul (t, u) -> A.App (Loc.None @$ A.Ident "`*", [term t; term u])
-| C.Or (t, u) -> A.App (Loc.None @$ A.Ident "`|", [term t; term u])
-| C.And (t, u) -> A.App (Loc.None @$ A.Ident "`&", [term t; term u])
+(* TODO: Idea: Apply variables in abstract syntax to have a level instead of
+ * an index (encoded as a negative index) and desugar binary operators to
+ * level 0 (i.e., index -1), so that they always refer to the top-level/
+ * built-in operations. *)
+| C.Add (t, u) -> A.App (Loc.None @$ A.Var ("`+", 0), [term t; term u])
+| C.Mul (t, u) -> A.App (Loc.None @$ A.Var ("`*", 0), [term t; term u])
+| C.Or (t, u) -> A.App (Loc.None @$ A.Var ("`|", 0), [term t; term u])
+| C.And (t, u) -> A.App (Loc.None @$ A.Var ("`&", 0), [term t; term u])
| C.App (f, args) -> A.App (term f, List.map term args)
| C.Fn (ps, body) -> A.Fn (List.map patt ps, term body)
| C.FnType (anns, cod) -> A.FnType (List.map annot anns, term cod)
@@ -42,7 +46,8 @@ and term' = function
| C.Unit -> A.Data ("", [])
| C.Nat n -> A.Nat n
| C.Underscore -> failwith "no impl: holes"
-| C.Ident x -> A.Ident x
+| C.IdentIndex (x, i) -> A.Var (x, i)
+| C.Ident x -> A.Var (x, 0)
| C.Paren _ -> failwith "impossible"
and case c = Node.map case' c
diff --git a/concrete.ml b/concrete.ml
@@ -26,6 +26,7 @@ and term' =
| Unit
| Nat of Z.t
| Underscore
+| IdentIndex of ident * int
| Ident of ident
| Paren of term
(*
@@ -87,6 +88,7 @@ and patt' = function
| Unit -> PUnit
| Nat n -> PNat n
| Underscore -> PWild
+| IdentIndex _ -> raise InvalidPattern
| Ident x -> PBind x
| Paren p -> PParen (patt p)
diff --git a/elab.ml b/elab.ml
@@ -127,13 +127,20 @@ let rec infer ctx (Node.Of (loc, t)) = match t with
| A.Data _ -> failwith "no impl: data"
| A.DataType _ -> failwith "no impl: data type"
| A.Nat n -> (I.Nat n, V.Intrin I.NatType)
-| A.Ident x ->
- let rec go i ys ts = match ys, ts with
- | Some y :: ys, t :: ts -> if x = y then (I.Var i, t) else go (i + 1) ys ts
- | None :: ys, _ :: ts -> go (i + 1) ys ts
- | [], [] -> error loc ("unbound identifier: " ^ x)
- | _, _ -> failwith "impossible" in
- go 0 ctx.names ctx.types
+| A.Var (x, i) ->
+ let rec go j i' ys ts = match i', ys, ts with
+ | 0, Some y :: ys, t :: ts ->
+ if x = y then (I.Var j, t) else go (j + 1) 0 ys ts
+ | i', Some y :: ys, t :: ts ->
+ go (j + 1) (if x = y then i' - 1 else i') ys ts
+ | i', None :: ys, t :: ts ->
+ go (j + 1) i' ys ts
+ | _, [], [] ->
+ (* TODO: We should print x ^ "#0" if the programmer wrote x ^ "#0". *)
+ let x = if i = 0 then x else x ^ "#" ^ string_of_int i in
+ error loc ("unbound identifier: " ^ x)
+ | _, _, _ -> failwith "impossible" in
+ go 0 i ctx.names ctx.types
and check ctx (Node.Of (loc, t') as t) va = match t', va with
(* TODO: Match, Data *)
diff --git a/lexer.mll b/lexer.mll
@@ -27,6 +27,7 @@ let greek = ugreek | lgreek | lgreekalt
let ident_start = latin | greek | '_'
let ident_cont = latin | greek | '_' | dnumeral | '\'' | '"'
+let ident = ident_start ident_cont*
(* This format is accepted by Z.of_string. *)
let dnumber = dnumeral (dnumeral | '_')*
@@ -84,9 +85,17 @@ rule token = parse
| "fn" { FN }
| "_" { UNDERSCORE }
-| ident_start ident_cont* { IDENT (Lexing.lexeme lexbuf) }
+| (ident as x) "#" (number as i) {
+ let z = Z.of_string i in
+ (* TODO: Standardize this limit. *)
+ if Z.fits_int z then
+ IDENT_INDEX (x, Z.to_int z)
+ else
+ error lexbuf "identifier index too big"
+}
+| ident as x { IDENT x }
-| number { NAT (Z.of_string (Lexing.lexeme lexbuf)) }
+| number as n { NAT (Z.of_string n) }
| number invalid_number_terminator
{ error lexbuf "invalid terminator after integer literal" }
diff --git a/main.ml b/main.ml
@@ -1,5 +1,4 @@
-(*let buf = Lexing.from_string "(fn A: Nat -> Type, f: (A 0, A 1) -> A 2, x: A 0, y: A 1 => f x y) (fn _: Nat => Nat) (fn x: Nat, y: Nat => x + y) 3 4 : Nat"*)
-(*let buf = Lexing.from_string "N := (A: Type, A, A -> A) -> A; ((fn m: N, n: N => fn A: Type, z: A, s: A -> A => m A (n A z s) s) (fn A: Type, z: A, s: A -> A => s (s z)) (fn A: Type, z: A, s: A -> A => s (s (s z)))) Nat 0 (fn n: Nat => n + 1)"*)
+(*
let buf = Lexing.from_string "\
N := (A: Type, A, A -> A) -> A;\n\
add: (N, N) -> N := fn m, n => fn A, z, s => m A (n A z s) s;\n\
@@ -8,6 +7,8 @@ let buf = Lexing.from_string "\
five := add two three;\n\
five Nat 0 (fn n => n + 1)\n\
"
+*)
+let buf = Lexing.from_string "Type := 3; Type := 4; Type#2"
let concrete =
try Parser.start Lexer.token buf with
diff --git a/parser.mly b/parser.mly
@@ -19,6 +19,7 @@ let (@$) = locate
%token IFF DO
%token FN
%token UNDERSCORE
+%token <Common.ident * int> IDENT_INDEX
%token <Common.ident> IDENT
%token <Z.t> NAT
@@ -93,6 +94,7 @@ expr7:
| expr8 { $1 }
expr8:
| UNDERSCORE { 1 @$ Underscore }
+ | IDENT_INDEX { 1 @$ let (x, i) = $1 in IdentIndex (x, i) }
| IDENT { 1 @$ Ident $1 }
| NAT { 1 @$ Nat $1 }
| HASH_LBRACK cons_list RBRACK { 1 @$ VariantType $2 }