lexer.ml
1.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
(*===----------------------------------------------------------------------===
* Lexer
*===----------------------------------------------------------------------===*)
let rec lex = parser
(* Skip any whitespace. *)
| [< ' (' ' | '\n' | '\r' | '\t'); stream >] -> lex stream
(* identifier: [a-zA-Z][a-zA-Z0-9] *)
| [< ' ('A' .. 'Z' | 'a' .. 'z' as c); stream >] ->
let buffer = Buffer.create 1 in
Buffer.add_char buffer c;
lex_ident buffer stream
(* number: [0-9.]+ *)
| [< ' ('0' .. '9' as c); stream >] ->
let buffer = Buffer.create 1 in
Buffer.add_char buffer c;
lex_number buffer stream
(* Comment until end of line. *)
| [< ' ('#'); stream >] ->
lex_comment stream
(* Otherwise, just return the character as its ascii value. *)
| [< 'c; stream >] ->
[< 'Token.Kwd c; lex stream >]
(* end of stream. *)
| [< >] -> [< >]
and lex_number buffer = parser
| [< ' ('0' .. '9' | '.' as c); stream >] ->
Buffer.add_char buffer c;
lex_number buffer stream
| [< stream=lex >] ->
[< 'Token.Number (float_of_string (Buffer.contents buffer)); stream >]
and lex_ident buffer = parser
| [< ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream >] ->
Buffer.add_char buffer c;
lex_ident buffer stream
| [< stream=lex >] ->
match Buffer.contents buffer with
| "def" -> [< 'Token.Def; stream >]
| "extern" -> [< 'Token.Extern; stream >]
| "if" -> [< 'Token.If; stream >]
| "then" -> [< 'Token.Then; stream >]
| "else" -> [< 'Token.Else; stream >]
| "for" -> [< 'Token.For; stream >]
| "in" -> [< 'Token.In; stream >]
| "binary" -> [< 'Token.Binary; stream >]
| "unary" -> [< 'Token.Unary; stream >]
| id -> [< 'Token.Ident id; stream >]
and lex_comment = parser
| [< ' ('\n'); stream=lex >] -> stream
| [< 'c; e=lex_comment >] -> e
| [< >] -> [< >]