Source file specification.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
open Core_kernel
open Vangstrom
type t =
{ match_template : string
; rule : Rule.t option
; rewrite_template : string option
}
[@@deriving sexp]
let create ?rewrite_template ?rule ~match_template () =
{ match_template; rule; rewrite_template }
let identifier_parser () =
many (Omega_parser_helper.alphanum <|> char '_')
>>| String.of_char_list
let single_hole_parser () =
string ":[[" *> identifier_parser () <* string "]]" >>| fun _ -> Some {|(\w+)|}
let everything_hole_parser () =
string ":[" *> identifier_parser () <* string "]" >>| fun _ -> Some {|(\n|.)*?|}
let expression_hole_parser () =
string ":[" *> identifier_parser () <* string ":e" <* string "]" >>| fun _ -> Some {|(\n|.)*?|}
let non_space_hole_parser () =
string ":[" *> identifier_parser () <* string ".]" >>| fun _ -> Some {|([^ \t\s\r\n])+|}
let line_hole_parser () =
string ":[" *> identifier_parser () <* string "\\n]" >>| fun _ -> Some {|(\n|.)*?|}
let blank_hole_parser () =
string ":["
*> many1 Omega_parser_helper.blank
*> identifier_parser ()
<* string "]"
>>| fun _ -> Some {|(\ |\t|\s|\r|\n)+|}
let any_char_except ~reserved =
List.fold reserved
~init:(return `OK)
~f:(fun acc reserved_sequence ->
option `End_of_input
(peek_string (String.length reserved_sequence)
>>= fun s ->
if String.equal s reserved_sequence then
return `Reserved_sequence
else
acc))
>>= function
| `OK -> any_char
| `End_of_input -> any_char
| `Reserved_sequence -> fail "reserved sequence hit"
let regex_body () =
fix (fun expr ->
(choice
[ ((char '[' *> (many1 expr) <* char ']')
>>| fun char_class -> Format.sprintf "[%s]" @@ String.concat char_class)
; (char '\\' *> any_char >>| fun c -> (Format.sprintf "\\%c" c))
; ((any_char_except ~reserved:["]"])) >>| Char.to_string
]
))
let regex_hole_parser () =
string ":["
*> identifier_parser ()
*> char '~'
*> (many1 @@ regex_body ()) >>= fun regex ->
string "]" >>= fun _ -> return (Some (String.concat regex))
let up_to p =
many1 (not_followed_by p *> any_char)
let : extracted list Vangstrom.t =
let hole =
choice
[ single_hole_parser ()
; everything_hole_parser ()
; expression_hole_parser ()
; non_space_hole_parser ()
; line_hole_parser ()
; blank_hole_parser ()
; regex_hole_parser ()
]
in
many @@ choice
[ (hole >>| fun v -> Option.map v ~f:(fun v -> Regex v))
; (Omega_parser_helper.spaces1 >>| fun s -> Some (Contiguous_whitespace s))
; (lift
(fun v -> Some (Non_space (String.of_char_list v)))
(up_to (choice [hole *> return (); Omega_parser_helper.spaces1 *> return ()])))
]
>>| fun result -> List.filter_opt result
let escape s =
let rec aux chars =
match chars with
| [] -> []
| x :: xs ->
match x with
| '\\' | '.' | '+' | '*' | '?' | '(' | ')' | '|' | '[' | ']' | '{' | '}' | '^' | '$' as c ->
'\\' :: c :: (aux xs)
| c -> c :: (aux xs)
in
aux (String.to_list s)
|> String.of_char_list
let to_regex { match_template; _ } =
let = parse_string ~consume:All extract match_template |> Result.ok_or_failwith in
let b = Buffer.create 10 in
Buffer.add_string b "(";
List.iter extracted ~f:(function
| Regex s -> Buffer.add_string b s
| Non_space s -> Buffer.add_string b (escape s)
| Contiguous_whitespace _ -> Buffer.add_string b {|\s+|});
Buffer.add_string b ")";
Buffer.contents b