1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
type lex_token =
| PartialToken of string * Errors.Lexing_l.t
| Token of Parser.token * string
let rec buf n acc =
if n = 0 then lex_rec buf acc
else
match%sedlex buf with
| "(*" -> eat_comment buf (n + 1) (acc ^ (Sedlexing.Utf8.lexeme buf))
| "*)" -> eat_comment buf (n - 1) (acc ^ (Sedlexing.Utf8.lexeme buf))
| eof -> PartialToken (acc, Errors.Lexing_l.UnterminatedComment)
| Star (Sub (any, '*'))
| '*' -> eat_comment buf n (acc ^ (Sedlexing.Utf8.lexeme buf))
| _ -> assert false
and lex_rec buf acc =
try
match%sedlex buf with
| ' ' | '\t' | '\n' | '\013' -> lex_rec buf (acc ^ (Sedlexing.Utf8.lexeme buf))
| "(*" -> eat_comment buf 1 (acc ^ (Sedlexing.Utf8.lexeme buf))
| "*)" -> let loc = Sedlexing.lexing_positions buf in
Errors.(LexingErrors.emit Lexing_l.UnstartedComment ~loc)
| '|' -> Token (Parser.PIPE, acc ^ (Sedlexing.Utf8.lexeme buf))
| ';' -> Token (Parser.SEMI, acc ^ (Sedlexing.Utf8.lexeme buf))
| ":=" -> Token (Parser.COLON_EQUAL, acc ^ (Sedlexing.Utf8.lexeme buf))
| '=' -> Token (Parser.EQUAL, acc ^ (Sedlexing.Utf8.lexeme buf))
| '<' -> Token (Parser.INF, acc ^ (Sedlexing.Utf8.lexeme buf))
| '+' -> Token (Parser.PLUS, acc ^ (Sedlexing.Utf8.lexeme buf))
| '-' -> Token (Parser.MINUS, acc ^ (Sedlexing.Utf8.lexeme buf))
| ',' -> Token (Parser.COMMA, acc ^ (Sedlexing.Utf8.lexeme buf))
| "let" -> Token (Parser.LET, acc ^ (Sedlexing.Utf8.lexeme buf))
| '"', Star (Sub (any, '"')), '"' ->
let t = Sedlexing.Utf8.lexeme buf in
Token (Parser.DATA (String.sub t 1 (String.length t - 2)), acc ^ (Sedlexing.Utf8.lexeme buf))
| Star ('0' .. '9') -> Token (Parser.INT (int_of_string (Sedlexing.Utf8.lexeme buf)), acc ^ (Sedlexing.Utf8.lexeme buf))
| (id_continue | '.' | '/'), Star (id_continue | '-' | '.' | '/') ->
Token (Parser.ID (Sedlexing.Utf8.lexeme buf), acc ^ (Sedlexing.Utf8.lexeme buf))
| '"', Star (Sub (any, '"')), eof ->
PartialToken (Sedlexing.Utf8.lexeme buf, (Errors.Lexing_l.Unclosed "\""))
| eof -> Token (Parser.EOI, (acc ^ (Sedlexing.Utf8.lexeme buf)))
| any -> let loc = Sedlexing.lexing_positions buf in
Errors.(LexingErrors.emit (Lexing_l.BadChar (Sedlexing.Utf8.lexeme buf)) ~loc)
| _ -> assert false
with
| Sedlexing.MalFormed ->
let loc = Sedlexing.lexing_positions buf in
Errors.(LexingErrors.emit Lexing_l.Malformed ~loc)
let lex buf =
lex_rec buf ""