1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
(** Utilities for parsing. *)
open Angstrom
open T
let is_prefix ~s ~pref =
let len_s = String.length s in
let len_pref = String.length pref in
(len_pref <= len_s) &&
(String.sub s 0 len_pref) = pref
let remove_options : 'a option list -> 'a list = fun l ->
List.fold_right
(fun x acc ->
match x with
| None -> acc
| Some x -> x :: acc)
l []
let with_loc : ctx -> 'a Angstrom.t -> ('a * loc) Angstrom.t = fun ctx p ->
ctx.get_pos >>= fun start ->
p >>= fun v ->
ctx.get_pos >>= fun stop ->
return (v, (start, stop))
let is_hex = function
| '0' .. '9' | 'a' .. 'f' | 'A' .. 'F' -> true
| _ -> false
let is_digit = function '0'..'9' -> true | _ -> false
(** Parser failing with the given message where current position is added. *)
let fail_at ctx msg =
let p =
match Angstrom.Unbuffered.parse ctx.last_pos with
| Done (_,p) -> Some p
| Partial _ | Fail _ -> None
in
T.error (Parse_error (p, msg))
(** [parse_error ctx e] raises a {!T.Error} with a parse error [e]
and last position of [ctx]. *)
let parse_error_at ctx e = ctx.last_pos >>= fun p ->
T.error (Parse_error (Some p, e))
(**/**)
include Log
(**/**)
let string_of_char_list l =
let a = Array.of_list l in
String.init (Array.length a) (Array.get a)
(** {2 Util parsers} *)
(** [opt_ p] returns [None] if [p] fails, else return [Some r] where [r]
is the result of [p]. *)
let opt_ p = option None (p >>| fun x -> Some x)
(** [take_char] is a parser returning [None] is there is no more char,
else accept any character [c], {!Angstrom.advance} by 1 char and returns [Some c].*)
let take_char = peek_char >>= function
| None -> return None
| Some c -> advance 1 >>= (fun _ -> return (Some c))
(** [take_while_upto pred n] accepts at most [n] characters
or as long a [pred] returns [true]. Returns accepted characters as a string.*)
let take_while_upto =
let rec f pred acc n =
if n <= 0 then
return (String.concat "" (List.rev acc))
else
peek_char >>= function
| Some c when pred c ->
(advance 1 >>= function () ->
f pred (String.make 1 c :: acc) (n-1))
| _ -> return (String.concat "" (List.rev acc))
in
fun pred n -> f pred [] n
(** Same as {!take_while_upto} but fails if no character is available. *)
let take_while1_upto pred n =
take_char >>= function
| None -> fail ""
| Some c ->
take_while_upto pred n >>=
fun s -> return (Printf.sprintf "%c%s" c s)
(** Accept a comment. *)
let ctx =
ctx.get_pos >>= fun start_pos ->
Angstrom.(string "/*" *>
choice [
((many_till any_char (string "*/")) >>| string_of_char_list) ;
(peek_char >>= fun _ -> T.(error (Parse_error (Some start_pos, Unterminated_comment))))
]
) <?> "comment"
let is_ws = function
| '\x20' | '\x0a' | '\x0d' | '\x09' -> true
| _ -> false
(** [ws ctx] accepts any number of white spaces or comments. *)
let ws ctx : string Angstrom.t =
many ((take_while1 is_ws) <|> comment ctx) >>| String.concat ""
(** {2 Parser combinators} *)
let map_fst parser = (fun ctx -> Angstrom.map (parser ctx) ~f:fst)
(** [p1 &&& p2] returns a parser succeeding when [p1] and [p2] succeed, in any order. *)
let (&&&) p1 p2 = choice [
(p1 >>= fun v1 -> p2 >>| fun v2 -> (v1, v2) ) ;
(p2 >>= fun v2 -> p1 >>| fun v1 -> (v1, v2) ) ;
]
(** [p1 ||| p2] returns a parser accepting values for [p1] or [p2] or both. *)
let (|||) : 'a Angstrom.t -> 'b Angstrom.t -> ('a option * 'b option) Angstrom.t =
fun p1 p2 -> choice [
(p1 >>= fun v1 -> choice [
(p2 >>| fun v2 -> (Some v1, Some v2)) ;
return (Some v1, None)]) ;
(p2 >>= fun v2 -> choice [
(p1 >>| fun v1 -> (Some v1, Some v2)) ;
return (None, Some v2)])
]
(** [alt_1_2 def1 p1 def2 p2] is the same as {!(|||)} but returns the provided
default values for each parser. *)
let alt_1_2 : 'a -> 'a Angstrom.t -> 'b -> 'b Angstrom.t -> ('a * 'b) Angstrom.t =
fun def1 p1 def2 p2 ->
p1 ||| p2 >>| fun (x1, x2) ->
(Option.value x1 ~default:def1, Option.value x2 ~default:def2)
let handle_end parser ctx =
parser ctx <* ws ctx
>>= fun v -> at_end_of_input
>>= function
| true -> return v
| false ->
take 1 >>= fun c -> parse_error_at ctx
(Other (Printf.sprintf "Unexpected character %s" c))
(** {2 Predefined character parsers}
All these parser accept a character after optional white spaces or comments.
*)
let lchar c ctx = ws ctx *> char c
let lbracket = lchar '['
let rbracket = lchar ']'
let lbrace = lchar '{'
let rbrace = lchar '}'
let lpar = lchar '('
let rpar = lchar ')'
let dot = lchar '.'
let colon = lchar ':'
let semicolon = lchar ';'
let ampersand = lchar '&'
let comma = lchar ','
let dquote = lchar '"'
let quote = lchar '\''
let plus = lchar '+'
let minus = lchar '-'
let pipe = lchar '|'
let sharp = lchar '#'
let slash = lchar '/'
let lt = lchar '<'
let lte ctx = lchar '<' ctx *> char '='
let gt = lchar '>'
let gte ctx = lchar '>' ctx *> char '='
let tilde = lchar '~'
(** {2 Parsing integers and sign} *)
let sign ctx = choice [minus ctx ; plus ctx]
let integer ctx = (ws ctx *> option '+' (sign ctx) >>=
fun sign -> take_while1 is_digit >>=
fun n -> return (int_of_string (Printf.sprintf "%c%s" sign n))) <?> "integer"
(** {2 Strings} *)
let quote_string =
let f b = function
| '"' -> Buffer.add_string b "\\\""
| c -> Buffer.add_char b c
in
fun str ->
let len = String.length str in
let b = Buffer.create (len+2) in
Buffer.add_char b '"' ;
String.iter (f b) str ;
Buffer.add_char b '"';
Buffer.contents b