Add de Bruijn indices to surface language - dtlc - dependently-typed lambda calculus toy

commit 098e731afa4bd512cf09ad7358204a0d26e6e118
parent 98a4af8e5798785f07f05471de0c031beb111085
Author: Robert Russell <robertrussell.72001@gmail.com>
Date:   Wed, 20 Dec 2023 14:35:10 -0800

Add de Bruijn indices to surface language

Diffstat:
M abstract.ml  | 2 +-
M cnctoabs.ml  | 15 ++++++++++-----
M concrete.ml  | 2 ++
M elab.ml  | 21 ++++++++++++++-------
M lexer.mll  | 13 +++++++++++--
M main.ml  | 5 +++--
M parser.mly  | 2 ++

7 files changed, 43 insertions(+), 17 deletions(-)
diff --git a/abstract.ml b/abstract.ml
@@ -17,7 +17,7 @@ and term' =
 | Data of ident * term Field.t list
 | DataType of cons list
 | Nat of Z.t
-| Ident of ident
+| Var of ident * int
 (*
 | Access of term * ident
 | Index of term * term
diff --git a/cnctoabs.ml b/cnctoabs.ml
@@ -26,10 +26,14 @@ and term' = function
 		Loc.None @$ A.Case (Loc.None @$ A.PWild, Loc.None @$ A.Data ("", []));
 	])
 | C.Annot (t, u) -> A.Annot (term t, term u)
-| C.Add (t, u) -> A.App (Loc.None @$ A.Ident "`+", [term t; term u])
-| C.Mul (t, u) -> A.App (Loc.None @$ A.Ident "`*", [term t; term u])
-| C.Or (t, u) -> A.App (Loc.None @$ A.Ident "`|", [term t; term u])
-| C.And (t, u) -> A.App (Loc.None @$ A.Ident "`&", [term t; term u])
+(* TODO: Idea: Apply variables in abstract syntax to have a level instead of
+ * an index (encoded as a negative index) and desugar binary operators to
+ * level 0 (i.e., index -1), so that they always refer to the top-level/
+ * built-in operations. *)
+| C.Add (t, u) -> A.App (Loc.None @$ A.Var ("`+", 0), [term t; term u])
+| C.Mul (t, u) -> A.App (Loc.None @$ A.Var ("`*", 0), [term t; term u])
+| C.Or  (t, u) -> A.App (Loc.None @$ A.Var ("`|", 0), [term t; term u])
+| C.And (t, u) -> A.App (Loc.None @$ A.Var ("`&", 0), [term t; term u])
 | C.App (f, args) -> A.App (term f, List.map term args)
 | C.Fn (ps, body) -> A.Fn (List.map patt ps, term body)
 | C.FnType (anns, cod) -> A.FnType (List.map annot anns, term cod)
@@ -42,7 +46,8 @@ and term' = function
 | C.Unit -> A.Data ("", [])
 | C.Nat n -> A.Nat n
 | C.Underscore -> failwith "no impl: holes"
-| C.Ident x -> A.Ident x
+| C.IdentIndex (x, i) -> A.Var (x, i)
+| C.Ident x -> A.Var (x, 0)
 | C.Paren _ -> failwith "impossible"
 
 and case c = Node.map case' c
diff --git a/concrete.ml b/concrete.ml
@@ -26,6 +26,7 @@ and term' =
 | Unit
 | Nat of Z.t
 | Underscore
+| IdentIndex of ident * int
 | Ident of ident
 | Paren of term
 (*
@@ -87,6 +88,7 @@ and patt' = function
 | Unit -> PUnit
 | Nat n -> PNat n
 | Underscore -> PWild
+| IdentIndex _ -> raise InvalidPattern
 | Ident x -> PBind x
 | Paren p -> PParen (patt p)
 
diff --git a/elab.ml b/elab.ml
@@ -127,13 +127,20 @@ let rec infer ctx (Node.Of (loc, t)) = match t with
 | A.Data _ -> failwith "no impl: data"
 | A.DataType _ -> failwith "no impl: data type"
 | A.Nat n -> (I.Nat n, V.Intrin I.NatType)
-| A.Ident x ->
-	let rec go i ys ts = match ys, ts with
-	| Some y :: ys, t :: ts -> if x = y then (I.Var i, t) else go (i + 1) ys ts
-	| None :: ys, _ :: ts -> go (i + 1) ys ts
-	| [], [] -> error loc ("unbound identifier: " ^ x)
-	| _, _ -> failwith "impossible" in
-	go 0 ctx.names ctx.types
+| A.Var (x, i) ->
+	let rec go j i' ys ts = match i', ys, ts with
+	| 0, Some y :: ys, t :: ts ->
+		if x = y then (I.Var j, t) else go (j + 1) 0 ys ts
+	| i', Some y :: ys, t :: ts ->
+		go (j + 1) (if x = y then i' - 1 else i') ys ts
+	| i', None :: ys, t :: ts ->
+		go (j + 1) i' ys ts
+	| _, [], [] ->
+		(* TODO: We should print x ^ "#0" if the programmer wrote x ^ "#0". *)
+		let x = if i = 0 then x else x ^ "#" ^ string_of_int i in
+		error loc ("unbound identifier: " ^ x)
+	| _, _, _ -> failwith "impossible" in
+	go 0 i ctx.names ctx.types
 
 and check ctx (Node.Of (loc, t') as t) va = match t', va with
 (* TODO: Match, Data *)
diff --git a/lexer.mll b/lexer.mll
@@ -27,6 +27,7 @@ let greek = ugreek | lgreek | lgreekalt
 
 let ident_start = latin | greek | '_'
 let ident_cont  = latin | greek | '_' | dnumeral | '\'' | '"'
+let ident = ident_start ident_cont*
 
 (* This format is accepted by Z.of_string. *)
 let dnumber = dnumeral (dnumeral | '_')*
@@ -84,9 +85,17 @@ rule token = parse
 | "fn" { FN }
 
 | "_" { UNDERSCORE }
-| ident_start ident_cont* { IDENT (Lexing.lexeme lexbuf) }
+| (ident as x) "#" (number as i) {
+	let z = Z.of_string i in
+	(* TODO: Standardize this limit. *)
+	if Z.fits_int z then
+		IDENT_INDEX (x, Z.to_int z)
+	else
+		error lexbuf "identifier index too big"
+}
+| ident as x { IDENT x }
 
-| number { NAT (Z.of_string (Lexing.lexeme lexbuf)) }
+| number as n { NAT (Z.of_string n) }
 | number invalid_number_terminator
 	{ error lexbuf "invalid terminator after integer literal" }
 
diff --git a/main.ml b/main.ml
@@ -1,5 +1,4 @@
-(*let buf = Lexing.from_string "(fn A: Nat -> Type, f: (A 0, A 1) -> A 2, x: A 0, y: A 1 => f x y) (fn _: Nat => Nat) (fn x: Nat, y: Nat => x + y) 3 4 : Nat"*)
-(*let buf = Lexing.from_string "N := (A: Type, A, A -> A) -> A; ((fn m: N, n: N => fn A: Type, z: A, s: A -> A => m A (n A z s) s) (fn A: Type, z: A, s: A -> A => s (s z)) (fn A: Type, z: A, s: A -> A => s (s (s z)))) Nat 0 (fn n: Nat => n + 1)"*)
+(*
 let buf = Lexing.from_string "\
 	N := (A: Type, A, A -> A) -> A;\n\
 	add: (N, N) -> N := fn m, n => fn A, z, s => m A (n A z s) s;\n\
@@ -8,6 +7,8 @@ let buf = Lexing.from_string "\
 	five := add two three;\n\
 	five Nat 0 (fn n => n + 1)\n\
 "
+*)
+let buf = Lexing.from_string "Type := 3; Type := 4; Type#2"
 
 let concrete =
 	try Parser.start Lexer.token buf with
diff --git a/parser.mly b/parser.mly
@@ -19,6 +19,7 @@ let (@$) = locate
 %token IFF DO
 %token FN
 %token UNDERSCORE
+%token <Common.ident * int> IDENT_INDEX
 %token <Common.ident> IDENT
 %token <Z.t> NAT
 
@@ -93,6 +94,7 @@ expr7:
 	| expr8 { $1 }
 expr8:
 	| UNDERSCORE { 1 @$ Underscore }
+	| IDENT_INDEX { 1 @$ let (x, i) = $1 in IdentIndex (x, i) }
 	| IDENT { 1 @$ Ident $1 }
 	| NAT { 1 @$ Nat $1 }
 	| HASH_LBRACK cons_list RBRACK { 1 @$ VariantType $2 }

	dtlc dependently-typed lambda calculus toy
	git clone git://git.rr3.xyz/dtlc
	Log \| Files \| Refs \| README \| LICENSE

M	abstract.ml	\|	2	+-
M	cnctoabs.ml	\|	15	++++++++++-----
M	concrete.ml	\|	2	++
M	elab.ml	\|	21	++++++++++++++-------
M	lexer.mll	\|	13	+++++++++++--
M	main.ml	\|	5	+++--
M	parser.mly	\|	2	++