1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
module Re = Core
type regexp = Re.re
type flag = [ `CASELESS | `MULTILINE | `ANCHORED ]
type split_result =
| Text of string
| Delim of string
| Group of int * string
| NoGroup
type groups = Core.Group.t
let re ?(flags = []) pat =
let opts = List.map (function
| `CASELESS -> `Caseless
| `MULTILINE -> `Multiline
| `ANCHORED -> `Anchored
) flags in
Perl.re ~opts pat
let regexp ?flags pat = Re.compile (re ?flags pat)
let ~rex s =
Re.Group.all (Re.exec rex s)
let exec ~rex ?pos s =
Re.exec rex ?pos s
let get_substring s i =
Re.Group.get s i
let get_substring_ofs s i =
Re.Group.offset s i
let pmatch ~rex s =
Re.execp rex s
let substitute ~rex ~subst str =
let b = Buffer.create 1024 in
let rec loop pos =
if pos >= String.length str then
Buffer.contents b
else if Re.execp ~pos rex str then (
let ss = Re.exec ~pos rex str in
let start, fin = Re.Group.offset ss 0 in
let pat = Re.Group.get ss 0 in
Buffer.add_substring b str pos (start - pos);
Buffer.add_string b (subst pat);
loop fin
) else (
Buffer.add_substring b str pos (String.length str - pos);
loop (String.length str)
)
in
loop 0
let split ~rex str =
let rec loop accu pos =
if pos >= String.length str then
List.rev accu
else if Re.execp ~pos rex str then (
let ss = Re.exec ~pos rex str in
let start, fin = Re.Group.offset ss 0 in
let s = String.sub str pos (start - pos) in
loop (s :: accu) fin
) else (
let s = String.sub str pos (String.length str - pos) in
loop (s :: accu) (String.length str)
) in
loop [] 0
let string_unsafe_sub s ofs len =
let r = Bytes.create len in
Bytes.unsafe_blit s ofs r 0 len;
Bytes.unsafe_to_string r
let quote s =
let len = String.length s in
let buf = Bytes.create (len lsl 1) in
let pos = ref 0 in
for i = 0 to len - 1 do
match String.unsafe_get s i with
| '\\' | '^' | '$' | '.' | '[' | '|'
| '(' | ')' | '?' | '*' | '+' | '{' as c ->
Bytes.unsafe_set buf !pos '\\';
incr pos;
Bytes.unsafe_set buf !pos c; incr pos
| c -> Bytes.unsafe_set buf !pos c; incr pos
done;
string_unsafe_sub buf 0 !pos
let full_split ?(max=0) ~rex s =
if String.length s = 0 then []
else if max = 1 then [Text s]
else
let results = Re.split_full rex s in
let matches =
List.map (function
| `Text s -> [Text s]
| `Delim d ->
let matches = Re.Group.all_offset d in
let delim = Re.Group.get d 0 in
(Delim delim)::(
let l = ref [] in
for i = 1 to Array.length matches - 1 do
l :=
(if matches.(i) = (-1, -1)
then NoGroup
else Group (i, Re.Group.get d i))
::(!l)
done;
List.rev !l)) results in
List.concat matches
type substrings = Group.t