1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
module Err_msg = struct
let pf = Format.fprintf
let pp_sp = Format.pp_print_space
let pp_nop _ () = ()
let pp_any fmt ppf _ = pf ppf fmt
let pp_op_enum op ?(empty = pp_nop) pp_v ppf = function
| [] -> empty ppf ()
| [v] -> pp_v ppf v
| _ as vs ->
let rec loop ppf = function
| [v0; v1] -> pf ppf "%a@ %s@ %a" pp_v v0 op pp_v v1
| v :: vs -> pf ppf "%a,@ " pp_v v; loop ppf vs
| [] -> assert false
in
loop ppf vs
let pp_and_enum ?empty pp_v ppf vs = pp_op_enum "and" ?empty pp_v ppf vs
let pp_or_enum ?empty pp_v ppf vs = pp_op_enum "or" ?empty pp_v ppf vs
let pp_did_you_mean pp_v ppf = function
| [] -> () | vs -> pf ppf "Did@ you@ mean %a ?" (pp_or_enum pp_v) vs
let pp_must_be pp_v ppf = function
| [] -> () | vs -> pf ppf "Must be %a." (pp_or_enum pp_v) vs
let pp_unknown ~kind pp_v ppf v = pf ppf "Unknown %a %a." kind () pp_v v
let pp_unknown' ~kind pp_v ~hint ppf (v, hints) = match hints with
| [] -> pp_unknown ~kind pp_v ppf v
| hints -> pp_unknown ~kind pp_v ppf v; pp_sp ppf (); (hint pp_v) ppf hints
let min_by f a b = if f a <= f b then a else b
let max_by f a b = if f a <= f b then b else a
let edit_distance s0 s1 =
let minimum a b c = min a (min b c) in
let s0 = min_by String.length s0 s1
and s1 = max_by String.length s0 s1 in
let m = String.length s0 and n = String.length s1 in
let rec rows row0 row i =
if i > n then row0.(m) else begin
row.(0) <- i;
for j = 1 to m do
if s0.[j - 1] = s1.[i - 1] then row.(j) <- row0.(j - 1) else
row.(j) <-minimum (row0.(j - 1) + 1) (row0.(j) + 1) (row.(j - 1) + 1)
done;
rows row row0 (i + 1)
end in
rows (Array.init (m + 1) (fun x -> x)) (Array.make (m + 1) 0) 1
let suggest ?(dist = 2) candidates s =
let add (min, acc) name =
let d = edit_distance s name in
if d = min then min, (name :: acc) else
if d < min then d, [name] else
min, acc
in
let d, suggs = List.fold_left add (max_int, []) candidates in
if d <= dist then List.rev suggs else []
end
module Tloc = struct
type fpath = string
let pp_path = Format.pp_print_string
type pos = int
type line = int
type line_pos = line * pos
let l v = v
type t =
{ file : fpath;
sbyte : pos; ebyte : pos;
sline : pos * line; eline : pos * line }
let no_file = "-"
let v ~file ~sbyte ~ebyte ~sline ~eline = { file; sbyte; ebyte; sline; eline }
let file l = l.file
let sbyte l = l.sbyte
let ebyte l = l.ebyte
let sline l = l.sline
let eline l = l.eline
let nil =
let pnil = -1 in
let lnil = (-1, pnil) in
v ~file:no_file ~sbyte:pnil ~ebyte:pnil ~sline:lnil ~eline:lnil
let merge l0 l1 =
let sbyte, sline =
if l0.sbyte < l1.sbyte then l0.sbyte, l0.sline else l1.sbyte, l1.sline
in
let ebyte, eline =
if l0.ebyte < l1.ebyte then l1.ebyte, l1.eline else l0.ebyte, l0.eline
in
v ~file:l0.file ~sbyte ~ebyte ~sline ~eline
let to_start l =
v ~file:l.file ~sbyte:l.sbyte ~ebyte:l.sbyte ~sline:l.sline ~eline:l.sline
let to_end l =
v ~file:l.file ~sbyte:l.ebyte ~ebyte:l.ebyte ~sline:l.eline ~eline:l.eline
let restart ~at:s e =
v ~file:e.file ~sbyte:s.sbyte ~ebyte:e.ebyte ~sline:s.sline ~eline:e.eline
let pf = Format.fprintf
let pp_ocaml ppf l = match l.ebyte < 0 with
| true -> pf ppf "File \"%a\", line n/a, characters n/a" pp_path l.file
| false ->
let pp_lines ppf l = match fst l.sline = fst l.eline with
| true -> pf ppf "line %d" (fst l.sline)
| false -> pf ppf "lines %d-%d" (fst l.sline) (fst l.eline)
in
let pos_s = l.sbyte - snd l.sline in
let pos_e = l.ebyte - snd l.eline + 1 in
pf ppf "File \"%a\", %a, characters %d-%d"
pp_path l.file pp_lines l pos_s pos_e
let pp_gnu ppf l = match l.ebyte < 0 with
| true -> pf ppf "%a:" pp_path l.file
| false ->
let pp_lines ppf l =
let col_s = l.sbyte - snd l.sline + 1 in
let col_e = l.ebyte - snd l.eline + 1 in
match fst l.sline = fst l.eline with
| true -> pf ppf "%d.%d-%d" (fst l.sline) col_s col_e
| false ->
pf ppf "%d.%d-%d.%d" (fst l.sline) col_s (fst l.eline) col_e
in
pf ppf "%a:%a" pp_path l.file pp_lines l
let pp_dump ppf l =
pf ppf "[bytes %d;%d][lines %d;%d][lbytes %d;%d]"
l.sbyte l.ebyte (fst l.sline) (fst l.eline) (snd l.sline) (snd l.eline)
let pp = pp_gnu
let string_subrange ?(first = 0) ?last s =
let max = String.length s - 1 in
let last = match last with
| None -> max
| Some l when l > max -> max
| Some l -> l
in
let first = if first < 0 then 0 else first in
if first > last then "" else
String.sub s first (last - first + 1)
let string_replace ~start ~stop ~rep s =
let len = String.length s in
if stop < start || start < 0 || start > len || stop < 0 || stop > len
then invalid_arg (Printf.sprintf "invalid start:%d stop:%d" start stop) else
let b = String.sub s 0 start in
let a = String.sub s stop (len - stop) in
String.concat "" [b; rep; a]
end
module Utf_8 = struct
type case =
| L1 | L2 | L3_E0 | L3_E1_EC_or_EE_EF | L3_ED | L4_F0 | L4_F1_F3 | L4_F4 | E
let case =
[|
L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1; L1;
E; E; E; E; E; E; E; E; E; E; E; E; E; E; E; E;
E; E; E; E; E; E; E; E; E; E; E; E; E; E; E; E;
E; E; E; E; E; E; E; E; E; E; E; E; E; E; E; E;
E; E; E; E; E; E; E; E; E; E; E; E; E; E; E; E;
E; E; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2;
L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2; L2;
L3_E0; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF;
L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF;
L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF;
L3_E1_EC_or_EE_EF; L3_ED;L3_E1_EC_or_EE_EF; L3_E1_EC_or_EE_EF;
L4_F0; L4_F1_F3; L4_F1_F3; L4_F1_F3; L4_F4; E; E; E; E; E; E; E; E; E; E; E;
|]
end
module Tdec = struct
type 'a fmt = Format.formatter -> 'a -> unit
let pp_did_you_mean = Err_msg.pp_did_you_mean
let pp_and_enum = Err_msg.pp_and_enum
let pp_or_enum = Err_msg.pp_or_enum
let pp_did_you_mean = Err_msg.pp_did_you_mean
let pp_must_be = Err_msg.pp_must_be
let pp_unknown = Err_msg.pp_unknown
let pp_unknown' = Err_msg.pp_unknown'
type t =
{ file : Tloc.fpath; i : string; tok : Buffer.t;
mutable pos : int; mutable line : int; mutable line_pos : int; }
let create ?(file = Tloc.no_file) i =
{ file; i; tok = Buffer.create 255; pos = 0; line = 1; line_pos = 0 }
let file d = d.file
let pos d = d.pos
let line d = d.line, d.line_pos
let loc d ~sbyte ~ebyte ~sline ~eline =
Tloc.v ~file:d.file ~sbyte ~ebyte ~sline ~eline
let loc_to_here d ~sbyte ~sline =
loc d ~sbyte ~ebyte:d.pos ~sline ~eline:(d.line, d.line_pos)
let loc_here d = loc_to_here d ~sbyte:d.pos ~sline:(d.line, d.line_pos)
exception Err of Tloc.t * string
let err loc msg = raise_notrace (Err (loc, msg))
let err_to_here d ~sbyte ~sline fmt =
Format.kasprintf (err (loc_to_here d ~sbyte ~sline)) fmt
let err_here d fmt = Format.kasprintf (err (loc_here d)) fmt
let err_suggest = Err_msg.suggest
let incr_line d = match d.i.[d.pos] with
| '\r' -> d.line <- d.line + 1; d.line_pos <- d.pos + 1
| '\n' ->
(if d.pos = 0 || d.i.[d.pos - 1] <> '\r' then d.line <- d.line + 1);
d.line_pos <- d.pos + 1;
| _ -> ()
[@@ ocaml.inline]
let eoi d = d.pos >= String.length d.i [@@ ocaml.inline]
let byte d = if eoi d then 0xFFFF else Char.code d.i.[d.pos] [@@ ocaml.inline]
let accept_byte d = incr_line d; d.pos <- d.pos + 1
[@@ ocaml.inline]
let accept_utf_8 accept d =
let err d = match byte d with
| 0xFFFF -> err_here d "UTF-8 decoding error: unexpected end of input"
| b -> err_here d "UTF-8 decoding error: byte %02x illegal here" b
in
let accept_tail d = if (byte d lsr 6 = 0b10) then accept d else err d in
match byte d with
| 0xFFFF -> err d
| b ->
match Utf_8.case.(b) with
| L1 -> accept d
| L2 -> accept d; accept_tail d
| L3_E0 ->
accept d;
if (byte d - 0xA0 < 0xBF - 0xA0) then accept d else err d;
accept_tail d
| L3_E1_EC_or_EE_EF -> accept d; accept_tail d; accept_tail d
| L3_ED ->
accept d;
if (byte d - 0x80 < 0x9F - 0x80) then accept d else err d;
accept_tail d
| L4_F0 ->
accept d;
if (byte d - 0x90 < 0xBF - 0x90) then accept d else err d;
accept_tail d; accept_tail d
| L4_F1_F3 ->
accept d;
accept_tail d; accept_tail d; accept_tail d;
| L4_F4 ->
accept d;
if (byte d - 0x80 < 0x8F - 0x80) then accept d else err d;
| E -> err d
let accept_uchar d = accept_utf_8 accept_byte d
let tok_reset d = Buffer.reset d.tok [@@ ocaml.inline]
let tok_pop d = let t = Buffer.contents d.tok in tok_reset d; t
[@@ ocaml.inline]
let tok_accept_byte d =
Buffer.add_char d.tok d.i.[d.pos]; accept_byte d; [@@ ocaml.inline]
let tok_accept_uchar d = accept_utf_8 tok_accept_byte d [@@ ocaml.inline]
let tok_add_byte d b = Buffer.add_char d.tok (Char.chr b) [@@ ocaml.inline]
let tok_add_bytes d s = Buffer.add_string d.tok s [@@ ocaml.inline]
let tok_add_char d c = Buffer.add_char d.tok c [@@ ocaml.inline]
let buffer_add_uchar b u = match Uchar.to_int u with
| u when u < 0 -> assert false
| u when u <= 0x007F ->
Buffer.add_char b (Char.unsafe_chr u)
| u when u <= 0x07FF ->
Buffer.add_char b (Char.unsafe_chr (0xC0 lor (u lsr 6)));
Buffer.add_char b (Char.unsafe_chr (0x80 lor (u land 0x3F)));
| u when u <= 0xFFFF ->
Buffer.add_char b (Char.unsafe_chr (0xE0 lor (u lsr 12)));
Buffer.add_char b (Char.unsafe_chr (0x80 lor ((u lsr 6) land 0x3F)));
Buffer.add_char b (Char.unsafe_chr (0x80 lor (u land 0x3F)));
| u when u <= 0x10FFFF ->
Buffer.add_char b (Char.unsafe_chr (0xF0 lor (u lsr 18)));
Buffer.add_char b (Char.unsafe_chr (0x80 lor ((u lsr 12) land 0x3F)));
Buffer.add_char b (Char.unsafe_chr (0x80 lor ((u lsr 6) land 0x3F)));
Buffer.add_char b (Char.unsafe_chr (0x80 lor (u land 0x3F)))
| _ -> assert false
let tok_add_uchar d u = buffer_add_uchar d.tok u
end