1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
module Re = Core
exception Parse_error
type enclosed =
| Char of char
| Range of char * char
type piece =
| Exactly of char
| Any_of of enclosed list
| Any_but of enclosed list
| One
| Many
| ManyMany
type t = piece list
let of_string ~double_asterisk s : t =
let buf = Parse_buffer.create s in
let eos () = Parse_buffer.eos buf in
let read c = Parse_buffer.accept buf c in
let char () =
ignore (read '\\' : bool);
if eos () then raise Parse_error;
Parse_buffer.get buf
in
let enclosed () : enclosed list =
let rec loop s =
if s <> [] && read ']'
then s
else (
let c = char () in
if not (read '-')
then loop (Char c :: s)
else if read ']'
then Char c :: Char '-' :: s
else (
let c' = char () in
loop (Range (c, c') :: s)))
in
loop []
in
let piece acc =
if double_asterisk && Parse_buffer.accept_s buf "/**"
then ManyMany :: (if eos () then Exactly '/' :: acc else acc)
else if read '*'
then (if double_asterisk && read '*' then ManyMany else Many) :: acc
else if read '?'
then One :: acc
else if not (read '[')
then Exactly (char ()) :: acc
else if read '^' || read '!'
then Any_but (enclosed ()) :: acc
else Any_of (enclosed ()) :: acc
in
let rec loop pieces = if eos () then List.rev pieces else loop (piece pieces) in
loop []
;;
let mul l l' = List.flatten (List.map (fun s -> List.map (fun s' -> s ^ s') l') l)
let explode str =
let l = String.length str in
let rec expl inner s i acc beg =
if i >= l
then (
if inner then raise Parse_error;
mul beg [ String.sub str s (i - s) ], i)
else (
match str.[i] with
| '\\' -> expl inner s (i + 2) acc beg
| '{' ->
let t, i' = expl true (i + 1) (i + 1) [] [ "" ] in
expl inner i' i' acc (mul beg (mul [ String.sub str s (i - s) ] t))
| ',' when inner ->
expl inner (i + 1) (i + 1) (mul beg [ String.sub str s (i - s) ] @ acc) [ "" ]
| '}' when inner -> mul beg [ String.sub str s (i - s) ] @ acc, i + 1
| _ -> expl inner s (i + 1) acc beg)
in
List.rev (fst (expl false 0 0 [] [ "" ]))
;;
module State = struct
type t =
{ re_pieces : Re.t list
; remaining : piece list
; am_at_start_of_pattern : bool
; am_at_start_of_component : bool
; pathname : bool
; match_backslashes : bool
; period : bool
}
let create ~period ~pathname ~match_backslashes remaining =
{ re_pieces = []
; am_at_start_of_pattern = true
; am_at_start_of_component = true
; pathname
; match_backslashes
; period
; remaining
}
;;
let explicit_period t =
t.period && (t.am_at_start_of_pattern || (t.am_at_start_of_component && t.pathname))
;;
let explicit_slash t = t.pathname
let slashes t = if t.match_backslashes then [ '/'; '\\' ] else [ '/' ]
let append ?(am_at_start_of_component = false) t piece =
{ t with
re_pieces = piece :: t.re_pieces
; am_at_start_of_pattern = false
; am_at_start_of_component
}
;;
let to_re t = Re.seq (List.rev t.re_pieces)
let next t =
match t.remaining with
| [] -> None
| piece :: remaining -> Some (piece, { t with remaining })
;;
end
let one ~explicit_slash ~slashes ~explicit_period =
Re.compl
(List.concat
[ (if explicit_slash then List.map Re.char slashes else [])
; (if explicit_period then [ Re.char '.' ] else [])
])
;;
let enclosed enclosed =
match enclosed with
| Char c -> Re.char c
| Range (low, high) -> Re.rg low high
;;
let enclosed_set ~explicit_slash ~slashes ~explicit_period kind set =
let set = List.map enclosed set in
let enclosure =
match kind with
| `Any_of -> Re.alt set
| `Any_but -> Re.compl set
in
Re.inter [ enclosure; one ~explicit_slash ~slashes ~explicit_period ]
;;
let exactly state c =
let slashes = State.slashes state in
let am_at_start_of_component = List.mem c slashes in
let chars = if am_at_start_of_component then slashes else [ c ] in
State.append state (Re.alt (List.map Re.char chars)) ~am_at_start_of_component
;;
let many_many state =
let explicit_period = state.State.period && state.State.pathname in
let first_explicit_period = State.explicit_period state in
let slashes = State.slashes state in
let match_component ~explicit_period =
Re.seq
[ one ~explicit_slash:true ~slashes ~explicit_period
; Re.rep (one ~explicit_slash:true ~slashes ~explicit_period:false)
]
in
State.append
state
(Re.seq
[ Re.opt (match_component ~explicit_period:first_explicit_period)
; Re.rep
(Re.seq
[ Re.alt (List.map Re.char slashes)
; Re.opt (match_component ~explicit_period)
])
])
;;
let many (state : State.t) =
let explicit_slash = State.explicit_slash state in
let explicit_period = State.explicit_period state in
let slashes = State.slashes state in
if not explicit_period
then State.append state (Re.rep (one ~explicit_slash ~slashes ~explicit_period))
else if not explicit_slash
then
State.append
state
(Re.opt
(Re.seq
[ one ~explicit_slash:false ~slashes ~explicit_period
; Re.rep (one ~explicit_slash:false ~slashes ~explicit_period:false)
]))
else (
let not_empty =
Re.seq
[ one ~explicit_slash:true ~slashes ~explicit_period:true
; Re.rep (one ~explicit_slash:true ~slashes ~explicit_period:false)
]
in
let maybe_empty = Re.opt not_empty in
let enclosed_set state kind set =
State.append
state
(Re.alt
[ enclosed_set kind set ~explicit_slash:true ~slashes ~explicit_period:true
; Re.seq
[ not_empty
;
enclosed_set
kind
set
~explicit_slash:true
~slashes
~explicit_period:false
]
])
in
let rec lookahead state =
match State.next state with
| None -> State.append state maybe_empty
| Some (Many, state) -> lookahead state
| Some (Exactly c, state) ->
let state = State.append state (if c = '.' then not_empty else maybe_empty) in
exactly state c
| Some (One, state) -> State.append state not_empty
| Some (Any_of enclosed, state) -> enclosed_set state `Any_of enclosed
| Some (Any_but enclosed, state) -> enclosed_set state `Any_but enclosed
| Some (ManyMany, state) -> many_many state
in
lookahead state)
;;
let piece state piece =
let explicit_slash = State.explicit_slash state in
let explicit_period = State.explicit_period state in
let slashes = State.slashes state in
match piece with
| One -> State.append state (one ~explicit_slash ~slashes ~explicit_period)
| Many -> many state
| Any_of enclosed ->
State.append
state
(enclosed_set `Any_of ~explicit_slash ~slashes ~explicit_period enclosed)
| Any_but enclosed ->
State.append
state
(enclosed_set `Any_but ~explicit_slash ~slashes ~explicit_period enclosed)
| Exactly c -> exactly state c
| ManyMany -> many_many state
;;
let glob ~pathname ~match_backslashes ~period glob =
let rec loop state =
match State.next state with
| None -> State.to_re state
| Some (p, state) -> loop (piece state p)
in
loop (State.create ~pathname ~match_backslashes ~period glob)
;;
let glob
?(anchored = false)
?(pathname = true)
?(match_backslashes = false)
?(period = true)
?(expand_braces = false)
?(double_asterisk = true)
s
=
let to_re s =
let re = glob ~pathname ~match_backslashes ~period (of_string ~double_asterisk s) in
if anchored then Re.whole_string re else re
in
if expand_braces then Re.alt (List.map to_re (explode s)) else to_re s
;;
let glob' ?anchored period s = glob ?anchored ~period s
let globx ?anchored s = glob ?anchored ~expand_braces:true s
let globx' ?anchored period s = glob ?anchored ~expand_braces:true ~period s