1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
open! Core
open! Import
open Rx_intf
module Q = struct
include Q
let alnum = "alnum" |> Symbol.intern
let alpha = "alpha" |> Symbol.intern
let any = "any" |> Symbol.intern
let anything = "anything" |> Symbol.intern
let digit = "digit" |> Symbol.intern
let line_end = "line-end" |> Symbol.intern
let line_start = "line-start" |> Symbol.intern
let lower = "lower" |> Symbol.intern
let not_ = "not" |> Symbol.intern
let one_or_more = "one-or-more" |> Symbol.intern
let or_ = "or" |> Symbol.intern
let point = "point" |> Symbol.intern
let seq = "seq" |> Symbol.intern
let space = "space" |> Symbol.intern
let submatch = "submatch" |> Symbol.intern
let submatch_n = "submatch-n" |> Symbol.intern
let sym_eq = "=" |> Symbol.intern
let sym_ge = ">=" |> Symbol.intern
let sym_star_star = "**" |> Symbol.intern
let upper = "upper" |> Symbol.intern
let word = "word" |> Symbol.intern
let xdigit = "xdigit" |> Symbol.intern
let zero_or_more = "zero-or-more" |> Symbol.intern
let zero_or_one = "zero-or-one" |> Symbol.intern
end
module Named_char_class = struct
include Named_char_class
let to_form t =
match (t : t) with
| Alphabetic -> Form.symbol Q.alpha
| Alphanumeric -> Form.symbol Q.alnum
| Digit -> Form.symbol Q.digit
| Hex_digit -> Form.symbol Q.xdigit
| Lower -> Form.symbol Q.lower
| Space -> Form.symbol Q.space
| Upper -> Form.symbol Q.upper
| Word -> Form.symbol Q.word
;;
end
module Char_class = struct
include Char_class
let to_form t =
match (t : t) with
| Chars_in string -> Form.string string
| Named named -> Named_char_class.to_form named
| Range (c1, c2) -> Form.string (sprintf "%c-%c" c1 c2)
;;
let dash_char_set = Char.Set.singleton '-'
let to_forms ts =
let chars, other_ts =
List.partition_map ts ~f:(function
| Chars_in s -> First s
| (Named _ | Range _) as t -> Second t)
in
let chars =
List.fold chars ~init:Char.Set.empty ~f:(fun init s ->
String.fold s ~init ~f:Set.add)
in
let ts =
let chars_in chars =
match Set.is_empty chars with
| true -> []
| false ->
let chars = chars |> Set.to_list |> String.of_char_list in
[ Chars_in chars ]
in
List.concat
[ chars_in (Set.inter chars dash_char_set)
; chars_in (Set.diff chars dash_char_set)
; other_ts
]
in
List.map ts ~f:to_form
;;
end
module Start_or_end = Start_or_end
include T
let label symbol args = Form.list (Form.symbol symbol :: args)
let rec to_forms ts = List.map ts ~f:to_form
and to_form t =
match (t : t) with
| Any_char -> Form.symbol Q.anything
| Any_in char_classes -> label Q.any (Char_class.to_forms char_classes)
| Exactly string -> Form.string string
| Line End -> Form.symbol Q.line_end
| Line Start -> Form.symbol Q.line_start
| None_in char_classes -> label Q.not_ [ to_form (Any_in char_classes) ]
| One_or_more t -> to_form (Repeat { min = 1; max = None; t })
| Or ts -> label Q.or_ (to_forms ts)
| Pattern pattern -> label Q.regexp [ Form.string pattern ]
| Point -> Form.symbol Q.point
| Repeat { min; max; t } ->
(match min, max with
| 0, None -> label Q.zero_or_more [ to_form t ]
| 0, Some 1 -> label Q.zero_or_one [ to_form t ]
| 1, None -> label Q.one_or_more [ to_form t ]
| n, None -> label Q.sym_ge [ Form.int n; to_form t ]
| n, Some m ->
(match Int.( = ) n m with
| true -> label Q.sym_eq [ Form.int n; to_form t ]
| false -> label Q.sym_star_star [ Form.int n; Form.int m; to_form t ]))
| Seq ts -> label Q.seq (to_forms ts)
| Submatch t -> label Q.submatch [ to_form t ]
| Submatch_n { index; t } -> label Q.submatch_n [ Form.int index; to_form t ]
| Zero_or_more t -> to_form (Repeat { min = 0; max = None; t })
| Zero_or_one t -> to_form (Repeat { min = 0; max = Some 1; t })
;;
let rx_to_string = Funcall.Wrap.("rx-to-string" <: Form.t @-> return string)
let pattern t = rx_to_string (to_form t)