commit 008e2cfcb987935ecfd7e9ba10446aa38b529807
parent e28dea8dbba00607a1670757d0f37b84b9b8a6e9
Author: Robert Russell <robertrussell.72001@gmail.com>
Date: Sun, 3 Dec 2023 21:12:59 -0800
Start nice version
Diffstat:
| A | Makefile | | | 14 | ++++++++++++++ |
| R | dep.ml -> dep/dep.ml | | | 0 | |
| A | lam/lam.ml | | | 59 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | lexer.mll | | | 90 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | main.ml | | | 4 | ++++ |
| A | parser.mly | | | 54 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | surf.ml | | | 40 | ++++++++++++++++++++++++++++++++++++++++ |
7 files changed, 261 insertions(+), 0 deletions(-)
diff --git a/Makefile b/Makefile
@@ -0,0 +1,13 @@
+main: surf.ml parser.ml lexer.ml main.ml
+ ocamlfind ocamlopt -o $@ -g -linkpkg -package zarith $^
+
+lexer.ml: lexer.mll
+ ocamllex lexer.mll
+
+parser.ml: parser.mly
+ ocamlyacc parser.mly
+ rm parser.mli # Hack
+
+.PHONY: clean
+clean:
+ rm -f main *.o *.cm* lexer.ml parser.ml
+\ No newline at end of file
diff --git a/dep.ml b/dep/dep.ml
diff --git a/lam/lam.ml b/lam/lam.ml
@@ -0,0 +1,59 @@
+type idx = int
+
+type term =
+ | TVar of idx
+ | TLam of term
+ | TApp of term * term
+ | TLet of term * term
+ | TInt of int
+ | TAdd of term * term
+ | TMul of term * term
+
+let rec string_of_term = function
+ | TVar i -> "$" ^ string_of_int i
+ | TLam t -> "(λ" ^ string_of_term t ^ ")"
+ | TApp (t, u) -> "(" ^ string_of_term t ^ ") (" ^ string_of_term u ^ ")"
+ | TLet (t, u) -> "(" ^ string_of_term t ^ "; " ^ string_of_term u ^ ")"
+ | TInt i -> string_of_int i
+ | TAdd (t, u) -> "(" ^ string_of_term t ^ " + " ^ string_of_term u ^ ")"
+ | TMul (t, u) -> "(" ^ string_of_term t ^ " * " ^ string_of_term u ^ ")"
+
+type value =
+ | VLam of env * term
+ | VInt of int
+and env = value list
+
+let rec string_of_value = function
+ | VLam (e, t) -> String.concat ", " (List.map string_of_value e) ^ " ⊢ " ^ string_of_term t
+ | VInt i -> string_of_int i
+
+let rec lookup e i =
+ match e, i with
+ | v :: _, 0 -> v
+ | _ :: vs, j -> lookup vs j
+ | [], _ -> failwith "variable not in scope"
+
+let rec eval e = function
+ | TVar i -> lookup e i
+ | TLam t -> VLam (e, t)
+ | TApp (t, u) -> (
+ match eval e t, eval e u with
+ | VLam (c, b), v -> eval (v :: c) b
+ | _, _ -> failwith "can only apply functions"
+ )
+ | TLet (t, u) -> eval (eval e t :: e) u
+ | TInt i -> VInt i
+ | TAdd (t, u) -> (
+ match eval e t, eval e u with
+ | VInt i, VInt j -> VInt (i + j)
+ | _, _ -> failwith "can only add ints"
+ )
+ | TMul (t, u) -> (
+ match eval e t, eval e u with
+ | VInt i, VInt j -> VInt (i * j)
+ | _, _ -> failwith "can only multiply ints"
+ )
+
+let t = TLet (TLam (TAdd (TInt 3, TVar 0)), TApp (TVar 0, TInt 4))
+let () = print_endline (string_of_term t)
+let () = print_endline (string_of_value (eval [] t))
diff --git a/lexer.mll b/lexer.mll
@@ -0,0 +1,90 @@
+{
+open Parser
+exception LexerError of Lexing.position * string
+}
+
+let dnumeral = ['0'-'9']
+let bnumeral = ['0'-'1']
+let onumeral = ['0'-'7']
+let xnumeral = ['0'-'9' 'a'-'f' 'A'-'F']
+
+let ulatin = ['A'-'Z']
+let llatin = ['a'-'z']
+let latin = ulatin | llatin
+
+(* We allow only those Greek letters that can't be confused with Latin letters
+ * in most fonts. *)
+let ugreek = "Γ" | "Δ" | "Θ" | "Λ" | "Ξ" | "Π" | "Σ" | "Φ" | "Ψ" | "Ω"
+let lgreek = "α" | "β" | "γ" | "δ" | "ε" | "ζ" | "η" | "θ" | "ι" | "κ" | "λ" | "μ" | "ν" | "ξ" | "π" | "ρ" | "σ" | "τ" | "υ" | "φ" | "χ" | "ψ" | "ω"
+let lgreekalt = "ϵ" | "ϑ" | "ϰ" | "ϖ" | "ϱ" | "ς" | "ϕ"
+let greek = ugreek | lgreek | lgreekalt
+
+let ident_start = latin | greek | '_'
+let ident_cont = latin | greek | '_' | dnumeral | '\'' | '"'
+
+(* This format is accepted by Z.of_string. *)
+let dnumber = dnumeral (dnumeral | '_')*
+let bnumber = '0' ['b' 'B'] bnumeral (bnumeral | '_')*
+let onumber = '0' ['o' 'O'] onumeral (onumeral | '_')*
+let xnumber = '0' ['x' 'X'] xnumeral (xnumeral | '_')*
+let number = dnumber | bnumber | onumber | xnumber
+
+(* This regex is used to prevent input like "0b02" from successfully lexing as
+ * the two tokens "0b0" and "2". *)
+let invalid_number_terminator = ident_cont
+
+rule token = parse
+| [' ' '\t' '\r'] { token lexbuf }
+| '\n' { Lexing.new_line lexbuf; token lexbuf }
+| eof { EOF }
+
+| ":" { COLON }
+
+| "=" { EQ }
+| ";" { SEMICOLON }
+
+| "->" { SINGLE_ARROW }
+| "=>" { DOUBLE_ARROW }
+
+| "@(" { AT_LPAREN }
+| "#(" { HASH_LPAREN }
+
+| "(" { LPAREN }
+| ")" { RPAREN }
+| "[" { LBRACK }
+| "]" { RBRACK }
+| "{" { LBRACE }
+| "}" { RBRACE }
+
+| "+" { PLUS }
+| "*" { ASTERISK }
+
+| "let" { LET }
+
+| "if" { IF }
+| "then" { THEN }
+| "else" { ELSE }
+
+| ident_start ident_cont* { IDENT (Lexing.lexeme lexbuf) }
+
+| number invalid_number_terminator { raise (LexerError (Lexing.lexeme_start_p lexbuf, "invalid terminator after integer literal")) }
+| number { INT (Z.of_string (Lexing.lexeme lexbuf)) }
+
+(*
+| "<" { LT }
+| "<=" { LE }
+| "=" { EQ }
+| ">=" { GE }
+| ">" { GT }
+| "<>" { NE }
+| "-" { MINUS }
+| "/" { SLASH }
+| "%" { PERCENT }
+| "not" { NOT }
+| "and" { AND }
+| "or" { OR }
+| "imp" { IMP }
+*)
+
+{
+}
diff --git a/main.ml b/main.ml
@@ -0,0 +1,3 @@
+let buf = Lexing.from_string "let f = @(n) -> 2 * n : Nat; f 7"
+let e = Parser.expr0 Lexer.token buf
+let () = print_endline (Surf.string_of_expr 0 e)
+\ No newline at end of file
diff --git a/parser.mly b/parser.mly
@@ -0,0 +1,54 @@
+%{
+open Surf
+%}
+
+%token EOF
+%token COLON
+%token EQ SEMICOLON
+%token SINGLE_ARROW DOUBLE_ARROW
+%token AT_LPAREN HASH_LPAREN
+%token LPAREN RPAREN
+%token LBRACK RBRACK
+%token LBRACE RBRACE
+%token PLUS ASTERISK
+%token LET
+%token IF THEN ELSE
+%token <Surf.ident> IDENT
+%token <Z.t> INT
+
+%start expr0
+%type <Surf.expr> expr0
+
+%%
+
+expr0:
+ | LET patt0 EQ expr1 SEMICOLON expr0 { Let ($2, $4, $6) }
+ | IF expr1 THEN expr1 ELSE expr1 { If ($2, $4, $6) }
+ | expr1 { $1 }
+expr1:
+ | AT_LPAREN patt0 RPAREN SINGLE_ARROW expr1 { Fn ($2, $5) }
+ | HASH_LPAREN patt0 RPAREN SINGLE_ARROW expr1 { Pi ($2, $5) }
+ | expr2 COLON expr1 { Hint ($1, $3) }
+ | expr2 { $1 }
+expr2:
+ | expr2 PLUS expr3 { Plus ($1, $3) }
+ | expr3 { $1 }
+expr3:
+ | expr3 ASTERISK expr4 { Asterisk ($1, $3) }
+ | expr4 { $1 }
+expr4:
+ | expr4 expr5 { Apply ($1, $2) }
+ | expr5 { $1 }
+expr5:
+ | IDENT { Var $1 }
+ | INT { Int $1 }
+ | LPAREN expr0 RPAREN { $2 }
+
+patt0:
+ | patt1 COLON expr1 { Annot ($1, $3) }
+ | patt1 { $1 }
+patt1:
+ | IDENT { Wild $1 }
+ | LPAREN patt0 RPAREN { $2 }
+
+%%
diff --git a/surf.ml b/surf.ml
@@ -0,0 +1,40 @@
+type ident = string
+
+type expr =
+| Let of patt * expr * expr
+| If of expr * expr * expr
+| Fn of patt * expr
+| Pi of patt * expr
+| Hint of expr * expr
+| Plus of expr * expr
+| Asterisk of expr * expr
+| Apply of expr * expr
+| Var of ident
+| Int of Z.t
+and patt =
+| Annot of patt * expr
+| Wild of ident
+
+let rec string_of_expr m e =
+ let sp = string_of_patt in
+ let se = string_of_expr in
+ let (c, s) = match e with
+ | Let (p, a, b) -> (0, "let " ^ sp 0 p ^ " = " ^ se 1 a ^ "; " ^ se 0 b)
+ | If (c, t, f) -> (0, "if " ^ se 1 c ^ " then " ^ se 1 t ^ " else " ^ se 1 f)
+ | Fn (p, e) -> (1, "@(" ^ sp 0 p ^ ") -> " ^ se 1 e)
+ | Pi (p, e) -> (1, "#(" ^ sp 0 p ^ ") -> " ^ se 1 e)
+ | Hint (a, b) -> (1, se 2 a ^ " : " ^ se 1 b)
+ | Plus (a, b) -> (2, se 2 a ^ " + " ^ se 3 b)
+ | Asterisk (a, b) -> (3, se 3 a ^ " * " ^ se 4 b)
+ | Apply (a, b) -> (4, se 4 a ^ " " ^ se 5 b)
+ | Var x -> (5, x)
+ | Int z -> (5, Z.to_string z) in
+ if c < m then "(" ^ s ^ ")" else s
+
+and string_of_patt m p =
+ let sp = string_of_patt in
+ let se = string_of_expr in
+ let (c, s) = match p with
+ | Annot (p, e) -> (0, sp 1 p ^ " : " ^ se 1 e)
+ | Wild x -> (1, x) in
+ if c < m then "(" ^ s ^ ")" else s