1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
type error_strategy = Stop | PopFirst
module P (T : sig
type value_parsed
end) =
struct
module type parser = sig
type token
exception Error
module MenhirInterpreter : sig
include
MenhirLib.IncrementalEngine.INCREMENTAL_ENGINE with type token = token
type 'a terminal
type _ nonterminal
include
MenhirLib.IncrementalEngine.INSPECTION
with type 'a lr1state := 'a lr1state
with type production := production
with type 'a terminal := 'a terminal
with type 'a nonterminal := 'a nonterminal
with type 'a env := 'a env
end
module Incremental : sig
val main : Lexing.position -> T.value_parsed MenhirInterpreter.checkpoint
end
end
module type parser_logger = sig
val state_to_lr0_list : int -> string list
val parse_to_derivation :
?strategy:error_strategy ->
string ->
Lexing.lexbuf ->
T.value_parsed option
* ParserLog.configuration list
* (string * string * string) list
val parse :
?strategy:error_strategy ->
?interactive:bool ->
?log_file:string ->
?error_file:string ->
string ->
Lexing.lexbuf ->
T.value_parsed option
val parse_string :
?strategy:error_strategy ->
?interactive:bool ->
?log_file:string ->
?error_file:string ->
string ->
T.value_parsed option
val parse_file :
?strategy:error_strategy ->
?interactive:bool ->
?log_file:string ->
?error_file:string ->
string ->
T.value_parsed option
end
end
module type parser_messages = sig
val message : int -> string
end
module Make
(T : sig
type value_parsed
end)
(Parser : P(T).parser)
(Lexer : sig
val token : Lexing.lexbuf -> Parser.token
end)
(ParserMessages : parser_messages)
(Grammar : MenhirSdk.Cmly_api.GRAMMAR) =
struct
module MI = Parser.MenhirInterpreter
module StateMap = Map.Make (Int)
module G = Grammar
let show text positions =
MenhirLib.ErrorReports.extract text positions
|> MenhirLib.ErrorReports.sanitize |> MenhirLib.ErrorReports.compress
|> MenhirLib.ErrorReports.shorten 20
let find_attribute label attributes =
try
Some
(List.hd
(List.filter_map
(fun attr ->
if G.Attribute.has_label label attr then
Some (G.Attribute.payload attr)
else None)
attributes))
with _ -> None
let find_short_attribute attributes = find_attribute "short" attributes
let string_of_gsymbol = function
| G.N a -> (
match find_short_attribute (G.Nonterminal.attributes a) with
| Some str -> str
| None -> G.Nonterminal.name a)
| G.T a -> (
match find_short_attribute (G.Terminal.attributes a) with
| Some str -> str
| None -> G.Terminal.name a)
let string_arrays_of_gprod g_prod =
let lhs = string_of_gsymbol (G.N (G.Production.lhs g_prod)) in
let rhs =
Array.map (fun (a, _, _) -> string_of_gsymbol a) (G.Production.rhs g_prod)
in
(lhs, rhs)
let string_of_gproduction g_prod =
let lhs, rhs = string_arrays_of_gprod g_prod in
let rhs = Array.fold_left (fun acc s -> acc ^ " " ^ s) "" rhs in
lhs ^ " ->" ^ rhs
let string_of_production production =
string_of_gproduction (G.Production.of_int (MI.production_index production))
let string_of_gitem (prod, pos) =
let lhs, rhs = string_arrays_of_gprod prod in
lhs ^ " ->"
^ Array.fold_left ( ^ ) ""
(Array.mapi (fun i symb -> (if i = pos then " _ " else " ") ^ symb) rhs)
^ if pos = Array.length rhs then " _" else ""
let get_env_text text env i =
match Parser.MenhirInterpreter.get i env with
| Some (Parser.MenhirInterpreter.Element (_, _, pos1, pos2)) ->
show text (pos1, pos2)
| None -> "???"
let get_env checkpoint =
match checkpoint with
| MI.InputNeeded env
| MI.Shifting (env, _, _)
| MI.AboutToReduce (env, _)
| MI.HandlingError env ->
env
| Rejected -> failwith "reject"
| Accepted _ -> failwith "accepted"
let get_accepting_state axiom =
G.Lr0.fold
(fun state acc ->
let incoming_sym = Option.map G.symbol_name (G.Lr0.incoming state) in
let is_accepting =
List.fold_left
(fun acc (prod, _) -> acc || G.Production.kind prod = `START)
false (G.Lr0.items state)
in
if incoming_sym = Some axiom && is_accepting then G.Lr0.to_int state
else acc)
0
let rec stepParsingDerivations strategy checkpoint supplier text buffer
derivations errors =
match checkpoint with
| MI.Rejected ->
( None,
List.rev
(ParserLog.apply_action (List.hd derivations) ParserLog.Reject
:: derivations),
errors )
| MI.Accepted value ->
( Some value,
List.rev
(ParserLog.apply_action (List.hd derivations) ParserLog.Accept
:: derivations),
errors )
| MI.InputNeeded _ ->
let t, n1, n2 = supplier () in
stepParsingDerivations strategy
(MI.offer checkpoint (t, n1, n2))
supplier text buffer
(ParserLog.apply_action (List.hd derivations)
(ParserLog.Input (show text (n1, n2)))
:: derivations)
errors
| MI.Shifting (_, env2, notEnd) ->
let t, n1, n2 = supplier () in
let tk =
try
string_of_gsymbol
(match
G.Lr0.incoming
(G.Lr1.lr0 (G.Lr1.of_int (MI.current_state_number env2)))
with
| Some s -> s
| None -> failwith "")
with _ -> ""
in
stepParsingDerivations strategy
(MI.offer (MI.input_needed env2) (t, n1, n2))
supplier text buffer
(ParserLog.apply_action (List.hd derivations)
(ParserLog.ShiftRead
( MI.current_state_number env2,
tk,
if notEnd then
let s = show text (n1, n2) in
if s = "" then "EOF" else s
else "END OF FILE" ))
:: derivations)
errors
| MI.AboutToReduce (_, prod) ->
let new_checkpoint = MI.resume checkpoint in
let prod_str = string_of_production prod in
let lhs_str = List.hd (String.split_on_char ' ' prod_str) in
stepParsingDerivations strategy new_checkpoint supplier text buffer
(ParserLog.apply_action (List.hd derivations)
(ParserLog.Reduce
( lhs_str,
prod_str,
(try MI.current_state_number (get_env new_checkpoint) with
| Failure s when s = "accepted" -> get_accepting_state lhs_str
| _ ->
Format.eprintf "Neither accepted nor able to get a state";
G.Lr0.count - 1),
List.length (MI.rhs prod) ))
:: derivations)
errors
| MI.HandlingError env -> (
let location =
List.hd
(String.split_on_char '\n'
(MenhirLib.LexerUtil.range (MI.positions env)))
in
let indication = MenhirLib.ErrorReports.show (show text) buffer in
let message =
try
List.hd
(String.split_on_char '\n'
(ParserMessages.message (MI.current_state_number env)))
with _ -> "Not Found"
in
let message =
MenhirLib.ErrorReports.expand (get_env_text text env) message
in
match strategy with
| Stop ->
( None,
List.rev
(ParserLog.apply_action (List.hd derivations)
(ParserLog.Error (location, indication, message))
:: derivations),
[ (location, indication, message) ] )
| PopFirst ->
let deriv = List.hd derivations in
let new_deriv, new_env =
let rec pop_st deriv env =
match Parser.MenhirInterpreter.top env with
| None -> (deriv, env)
| Some (Element (s, _, _, _)) -> (
let gsymb =
G.Lr0.incoming (G.Lr1.lr0 (G.Lr1.of_int (MI.number s)))
in
if gsymb = None then (deriv, env)
else
let gsymb = Option.get gsymb in
if
List.fold_left
(fun acc attr ->
acc || G.Attribute.has_label "backtrack" attr)
false
(match gsymb with
| G.N n -> G.Nonterminal.attributes n
| G.T t -> G.Terminal.attributes t)
then (deriv, env)
else
match MI.pop env with
| Some b -> pop_st (ParserLog.pop_configuration deriv) b
| None -> (deriv, env))
in
pop_st deriv env
in
let checkpoint = MI.input_needed new_env in
let next_lookahead = ref (supplier ()) in
let cond = ref true in
let get_token (tk, _, _) = tk
and get_fst_pos (_, p, _) = p
and get_snd_pos (_, _, p) = p in
while
!cond
&& not
(MI.acceptable checkpoint
(get_token !next_lookahead)
(get_fst_pos !next_lookahead))
do
next_lookahead := supplier ();
if get_fst_pos !next_lookahead = get_snd_pos !next_lookahead then
cond := false
done;
if !cond then
let new_checkpoint = MI.offer checkpoint !next_lookahead in
let n1, n2 =
(get_fst_pos !next_lookahead, get_snd_pos !next_lookahead)
in
stepParsingDerivations strategy new_checkpoint supplier text
buffer
(ParserLog.apply_action new_deriv
(ParserLog.Input (show text (n1, n2)))
:: ParserLog.apply_action (List.hd derivations)
(ParserLog.Error (location, indication, message))
:: derivations)
((location, indication, message) :: errors)
else
( None,
List.rev
(ParserLog.apply_action (List.hd derivations)
(ParserLog.Error (location, indication, message))
:: derivations),
(location, indication, message) :: errors ))
let state_to_lr0_list num =
let lr0 = G.Lr1.lr0 (G.Lr1.of_int num) in
let item_list = G.Lr0.items lr0 in
List.map string_of_gitem item_list
let parse_to_derivation ?(strategy = Stop) text lexbuf =
let supplier = MI.lexer_lexbuf_to_supplier Lexer.token lexbuf in
let buffer, supplier = MenhirLib.ErrorReports.wrap_supplier supplier in
let checkpoint = Parser.Incremental.main lexbuf.lex_curr_p in
stepParsingDerivations strategy checkpoint supplier text buffer
[ ParserLog.initial_configuration ]
[]
let interactive_or_log interactive log_file error_file value derivations
errors =
(match log_file with
| Some name ->
let channel = open_out name in
let ch = Format.formatter_of_out_channel channel in
Format.fprintf ch "@[<v 0> %a @]@," ParserLog.print_configuration_list
(List.tl derivations);
close_out channel
| None -> ());
(if errors != [] then
let error_log =
Format.sprintf "@[<v 0>"
^ List.fold_left
(fun acc (location, indication, message) ->
acc
^ Format.sprintf "@[<v 2>%s %s@,%s@]@," location indication
message)
"" errors
^ Format.sprintf "@]@,"
in
match error_file with
| None -> Format.eprintf "\027[38;5;9m%s\027[0m" error_log
| Some name ->
let channel = open_out name in
let ch = Format.formatter_of_out_channel channel in
Format.fprintf ch "%s" error_log;
close_out channel);
if interactive then
ParserLog.derivations_explorer derivations state_to_lr0_list;
value
let parse ?strategy ?(interactive = true) ?log_file ?error_file text lexbuf =
let value, derivations, errors =
parse_to_derivation ?strategy text lexbuf
in
interactive_or_log interactive log_file error_file value derivations errors
let parse_string ?strategy ?interactive ?log_file ?error_file string =
let lexbuf = Lexing.from_string string in
parse ?strategy ?interactive ?log_file ?error_file string lexbuf
let parse_file ?strategy ?interactive ?log_file ?error_file file =
let text, lexbuf = MenhirLib.LexerUtil.read file in
parse ?strategy ?interactive ?log_file ?error_file text lexbuf
end
module MakeWithDefaultMessage
(T : sig
type value_parsed
end)
(Parser : P(T).parser)
(Lexer : sig
val token : Lexing.lexbuf -> Parser.token
end)
(Grammar : MenhirSdk.Cmly_api.GRAMMAR) =
Make (T) (Parser) (Lexer)
(struct
let message x = "Error on state " ^ string_of_int x ^ "."
end)
(Grammar)