1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
module Re = Core
exception Parse_error = Perl.Parse_error
exception Not_supported = Perl.Not_supported
type regexp = Re.re
type flag =
[ `CASELESS
| `MULTILINE
| `ANCHORED
| `DOTALL
]
type split_result =
| Text of string
| Delim of string
| Group of int * string
| NoGroup
type groups = Core.Group.t
let re ?(flags = []) pat =
let opts =
List.map
(function
| `CASELESS -> `Caseless
| `MULTILINE -> `Multiline
| `ANCHORED -> `Anchored
| `DOTALL -> `Dotall)
flags
in
Perl.re ~opts pat
;;
let regexp ?flags pat = Re.compile (re ?flags pat)
let ~rex s = Re.Group.all (Re.exec rex s)
let exec ~rex ?pos s = Re.exec rex ?pos s
let get_substring s i = Re.Group.get s i
let names rex = Re.group_names rex |> List.map fst |> Array.of_list
let get_named_substring rex name s =
let rec loop = function
| [] -> raise Not_found
| (n, i) :: rem when n = name ->
(try get_substring s i with
| Not_found -> loop rem)
| _ :: rem -> loop rem
in
loop (Re.group_names rex)
;;
let get_substring_ofs s i = Re.Group.offset s i
let pmatch ~rex s = Re.execp rex s
let substitute ~rex ~subst str =
let b = Buffer.create 1024 in
let rec loop pos on_match =
if Re.execp ~pos rex str
then (
let ss = Re.exec ~pos rex str in
let start, fin = Re.Group.offset ss 0 in
if on_match && start = pos && start = fin
then (
if
pos < String.length str
then (
Buffer.add_char b str.[pos];
loop (pos + 1) false))
else (
let pat = Re.Group.get ss 0 in
Buffer.add_substring b str pos (start - pos);
Buffer.add_string b (subst pat);
if start = fin
then (
if
fin < String.length str
then (
Buffer.add_char b str.[fin];
loop (fin + 1) false))
else loop fin true))
else Buffer.add_substring b str pos (String.length str - pos)
in
loop 0 false;
Buffer.contents b
;;
let split ~rex str =
let finish str last accu =
let accu = String.sub str last (String.length str - last) :: accu in
List.rev accu
in
let rec loop accu last pos on_match =
if Re.execp ~pos rex str
then (
let ss = Re.exec ~pos rex str in
let start, fin = Re.Group.offset ss 0 in
if on_match && start = pos && start = fin
then
if
pos = String.length str
then finish str last accu
else loop accu last (pos + 1) false
else (
let accu = String.sub str last (start - last) :: accu in
if start = fin
then
if
fin = String.length str
then finish str fin accu
else loop accu fin (fin + 1) false
else loop accu fin fin true))
else finish str last accu
in
loop [] 0 0 false
;;
let string_unsafe_sub s ofs len =
let r = Bytes.create len in
Bytes.unsafe_blit s ofs r 0 len;
Bytes.unsafe_to_string r
;;
let quote s =
let len = String.length s in
let buf = Bytes.create (len lsl 1) in
let pos = ref 0 in
for i = 0 to len - 1 do
match String.unsafe_get s i with
| ('\\' | '^' | '$' | '.' | '[' | '|' | '(' | ')' | '?' | '*' | '+' | '{') as c ->
Bytes.unsafe_set buf !pos '\\';
incr pos;
Bytes.unsafe_set buf !pos c;
incr pos
| c ->
Bytes.unsafe_set buf !pos c;
incr pos
done;
string_unsafe_sub buf 0 !pos
;;
let full_split ?(max = 0) ~rex s =
if String.length s = 0
then []
else if max = 1
then [ Text s ]
else (
let results = Re.split_full rex s in
let matches =
List.map
(function
| `Text s -> [ Text s ]
| `Delim d ->
let matches = Re.Group.all_offset d in
let delim = Re.Group.get d 0 in
Delim delim
::
(let l = ref [] in
for i = 1 to Array.length matches - 1 do
l
:= (if matches.(i) = (-1, -1) then NoGroup else Group (i, Re.Group.get d i))
:: !l
done;
List.rev !l))
results
in
List.concat matches)
;;
type substrings = Group.t