Source file parse_everything.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
open Core
let read_of_next_char
: next_char:(unit -> char option) -> (unit -> [ `Ok of string | `Eof ]) Staged.t
=
fun ~next_char ->
let should_be_quoted c =
match c with
| ';' | '|' | '#' | ')' -> true
| _ -> false
in
let escape = unstage (String.Escaping.escape ~escapeworthy:[ '"' ] ~escape_char:'\\') in
let maybe_quote_not_inside_string_atom s =
if String.exists s ~f:should_be_quoted then "\"" ^ escape s ^ "\"" else s
in
let terminates_atom c ~paren_depth =
match c with
| '(' | '"' | ' ' | '\t' | '\012' | '\n' | '\r' -> true
| ')' when Int.( > ) !paren_depth 0 -> true
| _ -> false
in
let paren_depth = ref 0 in
let inside_string = ref false in
let follows_escape_in_string = ref false in
let atom_so_far = Buffer.create 32 in
let all_done = ref false in
let read () =
if !all_done
then `Eof
else (
match next_char () with
| Some c ->
if
!inside_string
then (
let followed_escape_in_string = !follows_escape_in_string in
follows_escape_in_string := false;
if followed_escape_in_string
then (
Buffer.add_char atom_so_far c;
`Ok "" )
else (
match c with
| '"' ->
Buffer.add_char atom_so_far c;
let s = Buffer.contents atom_so_far in
Buffer.clear atom_so_far;
inside_string := false;
`Ok s
| c ->
if Char.equal c '\\' then follows_escape_in_string := true;
Buffer.add_char atom_so_far c;
`Ok "" ))
else if
not (terminates_atom c ~paren_depth)
then (
Buffer.add_char atom_so_far c;
`Ok "" )
else (
let ret = Buffer.contents atom_so_far in
Buffer.clear atom_so_far;
let ret = maybe_quote_not_inside_string_atom ret in
match c with
| '(' ->
incr paren_depth;
`Ok (ret ^ String.of_char c)
| ')' ->
decr paren_depth;
`Ok (ret ^ String.of_char c)
| ' ' | '\t' | '\012' | '\n' | '\r' -> `Ok (ret ^ String.of_char c)
| '"' ->
inside_string := true;
Buffer.add_char atom_so_far c;
`Ok ret
| _ -> assert false)
| None ->
let ret =
if !inside_string
then (
if !follows_escape_in_string then Buffer.add_char atom_so_far '\\';
Buffer.add_char atom_so_far '"';
let ret = Buffer.contents atom_so_far in
Buffer.clear atom_so_far;
ret
)
else (
let ret = Buffer.contents atom_so_far in
Buffer.clear atom_so_far;
maybe_quote_not_inside_string_atom ret)
in
while !paren_depth > 0 do
Buffer.add_char atom_so_far ')';
decr paren_depth
done;
all_done := true;
`Ok (ret ^ Buffer.contents atom_so_far))
in
let rec read_until () =
match read () with
| `Ok "" -> read_until ()
| `Ok s -> `Ok s
| `Eof -> `Eof
in
stage read_until
;;
let lexbuf_of_channel chan =
let next_char () = In_channel.input_char chan in
let read = unstage (read_of_next_char ~next_char) in
let leftover = ref ("", 0) in
let lex_fun bytes n =
let result =
if String.length (fst !leftover) - snd !leftover > 0
then (
let s = !leftover in
leftover := "", 0;
`Ok s)
else (
match read () with
| `Eof -> `Eof
| `Ok s -> `Ok (s, 0))
in
match result with
| `Eof -> 0
| `Ok (s, used) ->
if String.length s - used > n
then (
Bytes.From_string.blit ~src_pos:used ~dst_pos:0 ~src:s ~dst:bytes ~len:n;
leftover := s, used + n;
n)
else (
Bytes.From_string.blit
~src_pos:used
~dst_pos:0
~src:s
~dst:bytes
~len:(String.length s - used);
String.length s - used)
in
Lexing.from_function lex_fun
;;
let transform_string s =
let pos = ref 0 in
let next_char () =
if !pos >= String.length s
then None
else (
let c = s.[!pos] in
incr pos;
Some c)
in
let read = unstage (read_of_next_char ~next_char) in
let buf = Buffer.create (String.length s) in
let rec loop () =
match read () with
| `Eof -> Buffer.contents buf
| `Ok s ->
Buffer.add_string buf s;
loop ()
in
loop ()
;;
open String.Replace_polymorphic_compare
let unchanged s = transform_string s = s
let%test _ = unchanged ""
let%test _ = unchanged "abc"
let%test _ = unchanged "()"
let%test _ = unchanged "bf((a)d((c\"eg\")))"
let%test _ = unchanged " d ( ef) \n (\r\t ) \\ \\m x \") \b\r (\""
let%test _ = unchanged "%!@&*^:'?/,.~`[}]{-+=_-"
let%test _ = unchanged "\"foo\\\"d\""
let%test "completes unmatched parens" = transform_string "(" = "()"
let%test "completes unmatched parens" = transform_string "(a)(b(()(c" = "(a)(b(()(c)))"
let%test "completes unmatched quotes" = transform_string "\"" = "\"\""
let%test "completes unmatched quotes" = transform_string "\"\\\"" = "\"\\\"\""
let%test "completes unmatched quotes" = transform_string "((\"ab" = "((\"ab\"))"
let%test "completes unmatched escape in string" = transform_string "\"\\" = "\"\\\\\""
let%test "stringifies extra close parens" = transform_string ")" = "\")\""
let%test "stringifies extra close parens" =
transform_string ")(())))())" = "\")\"(())\"))\"()\")\""
;;
let%test "turns sexp special chars to strings" = transform_string "#" = "\"#\""
let%test "turns sexp special chars to strings" = transform_string ";" = "\";\""
let%test "turns sexp special chars to strings" = transform_string "|" = "\"|\""
let%test "turns sexp special chars to strings" =
transform_string "## |#| (#a;) ;a\"bc\"|\n;#)|"
= "\"##\" \"|#|\" (\"#a;\") \";a\"\"bc\"\"|\"\n\";#)|\""
;;