Source file lex_buffer.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
(** A custom lexbuffer that automatically keeps track of the source location.
This module is a thin wrapper arounds sedlexing's default buffer, which does
not provide this functionality. *)
(** the lex buffer type *)
type t =
{ buf : Sedlexing.lexbuf
; mutable pos : Lexing.position
; mutable pos_mark : Lexing.position
; mutable last_char : int option
; mutable last_char_mark : int option
}
let of_sedlex ?(file = "<n/a>") ?pos buf =
let pos =
match pos with
| None ->
{ Lexing.pos_fname = file
; pos_lnum = 1
;
pos_bol = 0
;
pos_cnum = 0
}
| Some p -> p
in
{ buf; pos; pos_mark = pos; last_char = None; last_char_mark = None }
;;
let of_ascii_string ?pos s = of_sedlex ?pos (Sedlexing.Latin1.from_string s)
let of_ascii_file file =
let chan = open_in file in
of_sedlex ~file (Sedlexing.Latin1.from_channel chan)
;;
(** The next four functions are used by sedlex internally.
See https://www.lexifi.com/sedlex/libdoc/Sedlexing.html. *)
let mark lexbuf p =
lexbuf.pos_mark <- lexbuf.pos;
lexbuf.last_char_mark <- lexbuf.last_char;
Sedlexing.mark lexbuf.buf p
;;
let backtrack lexbuf =
lexbuf.pos <- lexbuf.pos_mark;
lexbuf.last_char <- lexbuf.last_char_mark;
Sedlexing.backtrack lexbuf.buf
;;
let start lexbuf =
lexbuf.pos_mark <- lexbuf.pos;
lexbuf.last_char_mark <- lexbuf.last_char;
Sedlexing.start lexbuf.buf
;;
(** location of next character *)
let next_loc lexbuf = { lexbuf.pos with pos_cnum = lexbuf.pos.pos_cnum + 1 }
let cr = Char.code '\r'
(** next character *)
let next lexbuf =
let c = Sedlexing.next lexbuf.buf in
let pos = next_loc lexbuf in
let ch =
match c with
| None -> None
| Some c ->
(try Some (Uchar.to_char c) with
| Invalid_argument _ -> None)
in
(match ch with
| Some '\r' ->
lexbuf.pos <- { pos with pos_bol = pos.pos_cnum - 1; pos_lnum = pos.pos_lnum + 1 }
| Some '\n' when not (lexbuf.last_char = Some cr) ->
lexbuf.pos <- { pos with pos_bol = pos.pos_cnum - 1; pos_lnum = pos.pos_lnum + 1 }
| Some '\n' -> ()
| _ -> lexbuf.pos <- pos);
(match c with
| None -> lexbuf.last_char <- None
| Some c -> lexbuf.last_char <- Some (Uchar.to_int c));
c
;;
let __private__next_int lexbuf =
match next lexbuf with
| None -> -1
| Some x -> Uchar.to_int x
;;
let raw lexbuf = Sedlexing.lexeme lexbuf.buf
let latin1 ?(skip = 0) ?(drop = 0) lexbuf =
let len = Sedlexing.lexeme_length lexbuf.buf - skip - drop in
Sedlexing.Latin1.sub_lexeme lexbuf.buf skip len
;;
let utf8 ?(skip = 0) ?(drop = 0) lexbuf =
let len = Sedlexing.lexeme_length lexbuf.buf - skip - drop in
Sedlexing.Utf8.sub_lexeme lexbuf.buf skip len
;;
let container_lnum_ref = ref 0
let fix_loc loc =
let fix_pos pos =
let pos_cnum =
if pos.Lexing.pos_lnum > !container_lnum_ref
then pos.Lexing.pos_cnum - 2
else pos.Lexing.pos_cnum
in
{ pos with Lexing.pos_cnum }
in
let loc_start = fix_pos loc.Location.loc_start in
let loc_end = fix_pos loc.Location.loc_end in
{ loc with Location.loc_start; loc_end }
;;
let make_loc ?(loc_ghost = false) start_pos end_pos : Location.t =
{ Location.loc_start = start_pos; loc_end = end_pos; loc_ghost }
;;
let make_loc_and_fix ?(loc_ghost = false) start_pos end_pos : Location.t =
make_loc ~loc_ghost start_pos end_pos |> fix_loc
;;