haskell.lua (5522B)
1 local l = require("lexer") 2 local lex = l.new(...) 3 local P, R, S = lpeg.P, lpeg.R, lpeg.S 4 local T = function(name, patt) return lex:tag(name, patt) end 5 6 local function I(s) -- Case-insensitive string match 7 local p = P(true) 8 for i = 1, #s do 9 local c = s:sub(i, i) 10 p = p * (P(c:lower()) + P(c:upper())) 11 end 12 return p 13 end 14 15 -- Variables beginning with "hs_" are (subsets of) the official nonterminals in 16 -- the Haskell 2010 grammar of the same name. 17 local hs_special = S"(),;[]`{}" 18 local hs_whitechar = S" \t\n\r" 19 local hs_small = R"az" + P"_" 20 + P"Γ" + P"Δ" + P"Θ" + P"Λ" + P"Ξ" + P"Π" + P"Σ" + P"Φ" + P"Ψ" + P"Ω" 21 + P"α" + P"β" + P"γ" + P"δ" + P"ε" + P"ζ" + P"η" + P"θ" + P"ι" + P"κ" 22 + P"λ" + P"μ" + P"ν" + P"ξ" + P"ο" + P"π" + P"ρ" + P"σ" + P"τ" + P"υ" 23 + P"φ" + P"χ" + P"ψ" + P"ω" + P"ϵ" + P"ϑ" + P"ϰ" + P"ϖ" + P"ϱ" + P"ς" 24 + P"ϕ" 25 local hs_large = R"AZ" 26 local hs_symbol = S"!#$%&*+./<=>?@\\^|-~:" 27 + P"⋅" 28 + P"→" + P"↑" + P"←" + P"↓" 29 + P"⇒" + P"⇑" + P"⇐" + P"⇓" 30 + P"⊢" + P"⊥" + P"⊣" + P"⊤" 31 + P"─" + P"│" + P"┌" + P"┐" + P"└" + P"┘" + P"├" + P"┤" + P"┬" + P"┴" + P"┼" + P"╭" + P"╮" + P"╯" + P"╰" + P"╴" + P"╵" + P"╶" + P"╷" 32 local hs_digit = R"09" 33 local hs_octit = R"07" 34 local hs_hexit = R("09", "AF", "af") 35 local hs_graphic = hs_small + hs_large + hs_symbol + hs_digit + hs_special + S"\"'" 36 local hs_any = hs_graphic + S" \t" 37 local hs_ANY = hs_graphic + hs_whitechar 38 local letter = hs_small + hs_large + hs_digit + P"'" 39 local hs_reservedid = ( 40 P"case" + P"class" + P"data" + P"default" + P"deriving" + P"do" + P"else" 41 + P"foreign" + P"if" + P"import"+ P"infixl" + P"infixr" + P"infix" 42 + P"instance" + P"in" + P"let" + P"module" + P"newtype" + P"of" 43 + P"then" + P"type" + P"where" + P"_" 44 ) * #-letter 45 local hs_varid = hs_small * letter^0 - hs_reservedid 46 local hs_conid = hs_large * letter^0 47 local hs_reservedop = 48 (P".." + P":" + P"::" + P"=" + P"\\" + P"<-" + P"->" + P"@" + P"~" + P"=>") * #-hs_symbol 49 local hs_varsym = (hs_symbol - P":") * hs_symbol^0 - hs_reservedop 50 local hs_consym = P":" * hs_symbol^0 - hs_reservedop 51 local ghc_keywords = (P"forall" + P"pattern" + P"family") * #-letter 52 local import_keywords = (P"qualified" + P"as" + P"hiding") * #-letter 53 local pragma = ( 54 P"INLINE" + P"NOINLINE" + P"SPECIALIZE" + P"SPECIALISE" + P"LANGUAGE" 55 + P"OPTIONS_GHC" + P"INCLUDE" + P"WARNING" + P"DEPRECATED" 56 + P"MINIMAL" + P"INLINABLE" + P"OPAQUE" + P"LINE" + P"COLUMN" 57 + P"RULES" + P"UNPACK" + P"NOUNPACK" + P"SOURCE" + P"COMPLETE" 58 + P"OVERLAPPING" + P"OVERLAPPABLE" + P"OVERLAPS" + P"INCOHERENT" 59 ) * #-letter 60 61 local whitespace = T("whitespace", hs_whitechar^1) 62 63 local comment_keyword = T("comment_keyword", (I"todo" + I"xxx" + I"fixme") * #(hs_ANY - R("09", "AZ", "az") - S"_'")) 64 local line_comment_text = T("comment_text", (hs_any - comment_keyword)^1) 65 local line_comment = T("comment_text", P"-"^2 * #(hs_ANY - hs_symbol)) * (line_comment_text + comment_keyword)^0 * (T("whitespace", P"\n") + P"") 66 local block_comment_text = T("comment_text", (hs_ANY - comment_keyword - P"{-" - P"-}")^1) 67 local block_comment = P{T("comment_text", P"{-" - P"{-#" * hs_whitechar^0 * pragma) * (block_comment_text + comment_keyword)^0 * T("comment_text", P"-}")^-1} 68 local comment = line_comment + block_comment 69 70 local intlit_dec = hs_digit^1 71 local intlit_oct = P"0" * S"oO" * hs_octit^1 72 local intlit_hex = P"0" * S"xX" * hs_hexit^1 73 local intlit = intlit_dec + intlit_oct + intlit_hex 74 75 local fltlit_exp = S"eE" * S"+-"^-1 * hs_digit^1 76 local fltlit = hs_digit^1 * (P"." * hs_digit^1 * fltlit_exp^-1 + fltlit_exp) 77 78 local numlit = T("numlit", intlit + fltlit) 79 80 local escape_char = S"abfnrtv\\\"'&" 81 local escape_ascii = P"^" * (R"AZ" + S"@[\\]^_") 82 -- It's important here that "SOH" precedes "SO"; it's the only ambiguous case. 83 + P"NUL" + P"SOH" + P"STX" + P"ETX" + P"EOT" + P"ENQ" + P"ACK" 84 + P"BEL" + P"BS" + P"HT" + P"LF" + P"VT" + P"FF" + P"CR" + P"SO" + P"SI" + P"DLE" 85 + P"DC1" + P"DC2" + P"DC3" + P"DC4" + P"NAK" + P"SYN" + P"ETB" + P"CAN" 86 + P"EM" + P"SUB" + P"ESC" + P"FS" + P"GS" + P"RS" + P"US" + P"SP" + P"DEL" 87 local escape_number = hs_digit^1 + P"o" * hs_octit^1 + P"x" * hs_hexit^1 88 local escape = T("escape", P"\\" * (escape_char + escape_ascii + escape_number)) 89 local gap = T("escape", P"\\" * hs_whitechar^1 * P"\\") 90 91 local chrlit_delim = T("chrlit_delim", P"'") 92 local chrlit_text = T("chrlit_text", hs_graphic - S"'\\" + P" ") 93 local chrlit = chrlit_delim * (chrlit_text + escape) * chrlit_delim 94 95 local strlit_delim = T("strlit_delim", P"\"") 96 local strlit_text = T("strlit_text", (hs_graphic - S"\"\\" + P" ")^1) 97 local strlit = strlit_delim * (strlit_text + escape + gap)^0 * strlit_delim 98 99 local delimiter = T("delimiter", P"{-#" + P"#-}" + hs_special) 100 local keyword = T("keyword", hs_reservedid + hs_reservedop + ghc_keywords + import_keywords + pragma) 101 102 local qualifier = T("qualifier", (hs_conid * P".")^0) 103 local operator = qualifier * T("operator", hs_varsym + hs_consym) 104 local type_ = qualifier * T("type", hs_conid) 105 local identifier = qualifier * T("identifier", hs_varid) 106 107 lex:add_rule("whitespace", whitespace) 108 lex:add_rule("comment", comment) 109 lex:add_rule("numlit", numlit) 110 lex:add_rule("chrlit", chrlit) 111 lex:add_rule("strlit", strlit) 112 lex:add_rule("delimiter", delimiter) 113 lex:add_rule("keyword", keyword) 114 lex:add_rule("operator", operator) 115 lex:add_rule("type", type_) 116 lex:add_rule("identifier", identifier) 117 118 lex:add_rule("error", T("error", P(1))) -- TODO: TEMP 119 120 return lex