1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
let esc = Char.chr 0x1b
let br_paste_start = "\x1b[200~"
let br_paste_end = "\x1b[201~"
let br_paste_start_len = String.length br_paste_start
let br_paste_end_len = String.length br_paste_end
let max_paste_len = 1_048_576
let max_sequence_len = 4_096
let br_paste_end_failure =
let len = br_paste_end_len in
let fail = Array.make len 0 in
let j = ref 0 in
for i = 1 to len - 1 do
while !j > 0 && br_paste_end.[!j] <> br_paste_end.[i] do
j := fail.(!j - 1)
done;
if br_paste_end.[!j] = br_paste_end.[i] then incr j;
fail.(i) <- !j
done;
fail
type token = Sequence of string | Text of string | Paste of string
type parser = {
buffer : Buffer.t;
mutable paste_buffer : bytes;
mutable paste_len : int;
mutable paste_match : int;
mutable flush_deadline : float option;
mutable mode : [ `Normal | `Paste ];
}
let ambiguity_timeout = 0.050
let incomplete_seq_timeout = 0.100
let schedule_flush t now =
if t.mode = `Paste || Buffer.length t.buffer = 0 then t.flush_deadline <- None
else
let len = Buffer.length t.buffer in
let delay =
if len = 1 && Buffer.nth t.buffer 0 = esc then
ambiguity_timeout
else if len >= 2 && Buffer.nth t.buffer 0 = esc then
incomplete_seq_timeout
else
ambiguity_timeout
in
t.flush_deadline <- Some (now +. delay)
let create () =
{
buffer = Buffer.create 128;
paste_buffer = Bytes.create 128;
paste_len = 0;
paste_match = 0;
flush_deadline = None;
mode = `Normal;
}
let pending t = Bytes.of_string (Buffer.contents t.buffer)
let reset t =
Buffer.clear t.buffer;
t.paste_len <- 0;
t.paste_match <- 0;
t.mode <- `Normal;
t.flush_deadline <- None
let push_tokens acc tokens =
List.rev_append tokens acc
let add_paste_tokens acc payload =
let acc = if payload = "" then acc else Paste payload :: acc in
Sequence br_paste_end :: acc
let has_substring_at s ~sub ~pos =
let sub_len = String.length sub in
let limit = String.length s - sub_len in
if pos < 0 || pos > limit then false
else
let rec loop i =
if i = sub_len then true
else if s.[pos + i] <> sub.[i] then false
else loop (i + 1)
in
loop 0
let find_substring_from s sub start =
let sub_len = String.length sub in
let len = String.length s in
let limit = len - sub_len in
let rec scan i =
if i > limit then -1
else if has_substring_at s ~sub ~pos:i then i
else scan (i + 1)
in
if sub_len = 0 || start > limit then -1 else scan start
let ensure_paste_capacity t needed =
let required = t.paste_len + needed in
if required > Bytes.length t.paste_buffer then (
let new_cap = max required (Bytes.length t.paste_buffer * 2) in
let buf = Bytes.create new_cap in
Bytes.blit t.paste_buffer 0 buf 0 t.paste_len;
t.paste_buffer <- buf)
let reset_paste_state t =
t.paste_len <- 0;
t.paste_match <- 0
let complete_paste t =
let payload_len = t.paste_len - br_paste_end_len in
let payload =
if payload_len <= 0 then ""
else Bytes.sub_string t.paste_buffer 0 payload_len
in
reset_paste_state t;
t.mode <- `Normal;
payload
let rec advance_paste_match current c =
if c = br_paste_end.[current] then current + 1
else if current = 0 then 0
else advance_paste_match br_paste_end_failure.(current - 1) c
let add_paste_char t c =
if t.paste_len < max_paste_len then (
ensure_paste_capacity t 1;
Bytes.unsafe_set t.paste_buffer t.paste_len c;
t.paste_len <- t.paste_len + 1);
t.paste_match <- advance_paste_match t.paste_match c;
t.paste_match = br_paste_end_len
let is_csi_final c =
let code = Char.code c in
(code >= 0x40 && code <= 0x7e) || code = 0x24 || code = 0x5e
let rec find_st s i len =
if i + 1 >= len then None
else if s.[i] = esc && s.[i + 1] = '\\' then Some (i + 2)
else find_st s (i + 1) len
let find_sequence_end s start len =
if start + 1 >= len then None
else
match s.[start + 1] with
| '[' ->
if start + 2 < len && s.[start + 2] = 'M' then
let expected = start + 6 in
if expected <= len then Some expected else None
else
let rec loop i =
if i >= len then None
else if is_csi_final s.[i] then
if (s.[i] = '$' || s.[i] = '^') && i + 1 < len then loop (i + 1)
else Some (i + 1)
else loop (i + 1)
in
loop (start + 2)
| ']' ->
let rec loop i =
if i >= len then None
else
let c = s.[i] in
if c = '\x07' then Some (i + 1)
else if c = esc && i + 1 < len && s.[i + 1] = '\\' then Some (i + 2)
else loop (i + 1)
in
loop (start + 2)
| 'P' | '_' ->
find_st s (start + 2) len
| 'O' ->
if start + 2 < len then Some (start + 3) else None
| _ ->
Some (start + 2)
let s =
let len = String.length s in
let rec loop pos acc =
if pos >= len then (List.rev acc, "")
else
let c = s.[pos] in
if c = esc then
match find_sequence_end s pos len with
| None ->
(List.rev acc, String.sub s pos (len - pos))
| Some end_pos ->
let seq = String.sub s pos (end_pos - pos) in
loop end_pos (Sequence seq :: acc)
else
let rec find_esc i =
if i >= len then len else if s.[i] = esc then i else find_esc (i + 1)
in
let stop = find_esc (pos + 1) in
let txt = String.sub s pos (stop - pos) in
loop stop (Text txt :: acc)
in
loop 0 []
let consume_paste_from_string t s start stop acc =
if start >= stop then (acc, None)
else
let rec loop i acc =
if i >= stop then (acc, None)
else
let matched = add_paste_char t s.[i] in
if matched then
let payload = complete_paste t in
let acc = add_paste_tokens acc payload in
(acc, Some (i + 1))
else loop (i + 1) acc
in
loop start acc
let consume_paste_from_bytes t bytes start stop acc =
if start >= stop then (acc, None)
else
let rec loop i acc =
if i >= stop then (acc, None)
else
let matched = add_paste_char t (Bytes.unsafe_get bytes i) in
if matched then
let payload = complete_paste t in
let acc = add_paste_tokens acc payload in
(acc, Some (i + 1))
else loop (i + 1) acc
in
loop start acc
let rec process t now acc =
if t.mode = `Paste then List.rev acc
else if Buffer.length t.buffer = 0 then List.rev acc
else
let buf_str = Buffer.contents t.buffer in
Buffer.clear t.buffer;
let len = String.length buf_str in
let start_idx = find_substring_from buf_str br_paste_start 0 in
if start_idx < 0 then
let seqs, rem = extract_sequences_from buf_str in
if rem <> "" then
if String.length rem > max_sequence_len then (
t.flush_deadline <- None;
let acc = push_tokens acc seqs in
List.rev (Text rem :: acc))
else (
Buffer.add_string t.buffer rem;
schedule_flush t now;
let acc = push_tokens acc seqs in
List.rev acc)
else (
t.flush_deadline <- None;
let acc = push_tokens acc seqs in
List.rev acc)
else
let before = String.sub buf_str 0 start_idx in
let after_start = start_idx + br_paste_start_len in
let after_len = len - after_start in
let after =
if after_len > 0 then String.sub buf_str after_start after_len else ""
in
let seqs, rem = extract_sequences_from before in
reset_paste_state t;
t.mode <- `Paste;
t.flush_deadline <- None;
let acc = push_tokens acc seqs in
let acc = Sequence br_paste_start :: acc in
let acc, rem_stop =
if rem = "" then (acc, None)
else consume_paste_from_string t rem 0 (String.length rem) acc
in
if t.mode = `Normal then (
(match rem_stop with
| Some idx when idx < String.length rem ->
Buffer.add_substring t.buffer rem idx (String.length rem - idx)
| _ -> ());
if after <> "" then Buffer.add_string t.buffer after;
t.flush_deadline <- None;
process t now acc)
else
let acc, after_stop =
if after = "" then (acc, None)
else consume_paste_from_string t after 0 (String.length after) acc
in
if t.mode = `Normal then (
(match after_stop with
| Some idx when idx < String.length after ->
Buffer.add_substring t.buffer after idx (String.length after - idx)
| _ -> ());
t.flush_deadline <- None;
process t now acc)
else List.rev acc
let feed t bytes off len ~now =
if off < 0 || len < 0 || off + len > Bytes.length bytes then
invalid_arg "Input_tokenizer.feed: out of bounds";
if t.mode = `Paste then (
let acc, stop_opt = consume_paste_from_bytes t bytes off (off + len) [] in
match stop_opt with
| None -> List.rev acc
| Some stop ->
let remaining = off + len - stop in
if remaining > 0 then Buffer.add_subbytes t.buffer bytes stop remaining;
t.flush_deadline <- None;
process t now acc)
else (
Buffer.add_subbytes t.buffer bytes off len;
t.flush_deadline <- None;
process t now [])
let deadline t = t.flush_deadline
let flush_expired t now =
match t.flush_deadline with
| Some expiry when now >= expiry && t.mode = `Normal ->
t.flush_deadline <- None;
if Buffer.length t.buffer = 0 then []
else
let leftover = Buffer.contents t.buffer in
Buffer.clear t.buffer;
if leftover = "" then [] else [ Sequence leftover ]
| _ -> []