Source file segment_int_array.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
open Utils
let max_length = Limit.max_hash_postfix_bytes * 8 - 1
let max_short_segment_length = 27 * 8 - 1
let max_serialization_bytes = Limit.max_hash_postfix_bytes
type side = Left | Right
let string_of_side = function
| Left -> "L"
| Right -> "R"
let string_of_sides sides = String.concat "" (List.map string_of_side sides)
module Int63 = struct
let of_sides sides =
let rec i63_of_sides_aux lshifts i = function
| [] -> i lsl lshifts, [], 63 - lshifts
| xs when lshifts = 0 -> i, xs, 63
| Left :: xs -> i63_of_sides_aux (lshifts - 1) (i lsl 1) xs
| Right :: xs -> i63_of_sides_aux (lshifts - 1) (i lsl 1 + 1) xs
in
i63_of_sides_aux 63 0 sides
let to_sides i nsides =
let i = i lsr (63 - nsides) in
let rec i63_to_sides_aux acc nsides i =
if nsides = 0 then acc
else
let side = if i land 1 = 0 then Left else Right in
i63_to_sides_aux (side :: acc) (nsides - 1) (i lsr 1)
in
i63_to_sides_aux [] nsides i
let db_sequence =
0b0000001000011000101000111001001011001101001111010101110110111111
let db_map n = (n * db_sequence) lsr 57
let db_table_for_common_prefix =
let tbl = Array.init 64 (fun _ -> -255) in
for i = 0 to 63 do
let j = db_map (1 lsl i) in
if tbl.(j) <> -255 then assert false;
tbl.(j) <- 62 - (i - 1)
done;
tbl
let common_prefix i1 i2 =
let n = i1 lxor i2 in
if n = 0 then 63
else
let n = n lor (n lsr 1) in
let n = n lor (n lsr 2) in
let n = n lor (n lsr 4) in
let n = n lor (n lsr 8) in
let n = n lor (n lsr 16) in
let n = n lor (n lsr 32) in
Array.unsafe_get db_table_for_common_prefix (db_map (n+1))
end
let nts = ref 0
let nints = ref 0
let stat () = !nts, !nints
let count_segments = Envconf.count_segments
module Vector : sig
type t = private int array
val mk : int array -> t
val unsafe_get_side : t -> int -> side
val safe_get_int : t -> int -> int
end = struct
type t = int array
let mk a =
let len = Array.length a in
if len = 0 || not count_segments then a
else begin
nints := !nints + len + 1 ;
Stdlib.Gc.finalise (fun a -> nints := !nints - Array.length a - 1) a;
a
end
let unsafe_get_side vec pos =
let i63 = Array.unsafe_get vec (pos / 63) in
if (i63 lsr (62 - (pos mod 63))) land 1 = 0 then Left else Right
let safe_get_int a i =
if i < 0 then 0
else if Array.length a <= i then 0
else Array.unsafe_get a i
end
module Bits = struct
module Type : sig
type t = private { off : int; len : int; vec : Vector.t }
val mk : off:int -> len:int -> Vector.t -> t
end = struct
type t = { off : int; len : int; vec : Vector.t }
let mk ~off ~len vec =
let t = { off; len; vec } in
if count_segments then begin
incr nts;
Stdlib.Gc.finalise (fun _ -> decr nts) t;
t
end else t
end
include Type
let of_sides sides =
let rec bits_of_sides_aux acc curlen sides =
let i63, sides_left, nsides_used = Int63.of_sides sides in
let acc = i63::acc in
let curlen = curlen + nsides_used in
if sides_left = [] then
mk ~off:0 ~len:curlen @@ Vector.mk @@ Array.of_list (List.rev acc)
else
bits_of_sides_aux acc curlen sides_left
in
bits_of_sides_aux [] 0 sides
let to_sides bits =
let vec = bits.vec in
let rec bits_to_sides_aux acc off len i =
if off >= 63 then
let n63 = off / 63 in
bits_to_sides_aux acc (off - 63 * n63) len (i+n63)
else
let n = Array.unsafe_get (vec :> int array) i in
let sides = Int63.to_sides (n lsl off) (Int.min len (63 - off)) in
let acc = sides :: acc in
let len = len - (63 - off) in
if len <= 0 then acc
else bits_to_sides_aux acc 0 len (i+1)
in
List.concat (List.rev (bits_to_sides_aux [] bits.off bits.len 0))
let unsafe_get_side bits pos = Vector.unsafe_get_side bits.vec (bits.off + pos)
let empty = mk ~off:0 ~len:0 (Vector.mk [||])
let is_empty { len; _ } = len = 0
let cut { off; len; vec } =
match len with
| 0 -> None
| 1 -> Some (Vector.unsafe_get_side vec off, empty)
| _ -> Some (Vector.unsafe_get_side vec off, mk ~off:(off+1) ~len:(len-1) vec)
let get_side { off; len; vec } i =
if i < 0 || len <= i then None
else Some (Vector.unsafe_get_side vec (off + i))
let drop n { off; len; vec } =
if n >= len then empty
else mk ~off:(off + n) ~len:(len - n) vec
let length { len; _ } = len
let get_i63 bs off =
let head_reset = Int.max 0 (bs.off - off)
in
if head_reset >= 63 then 0
else
let tail_reset = Int.max 0 (off + 63 - (bs.off + bs.len))
in
if tail_reset >= 63 then 0
else
let off_div_63 = if off >= 0 then off / 63 else - ((-off + 62) / 63) in
let shift = off - off_div_63 * 63 in
assert (shift >= 0);
let nleft0 = Vector.safe_get_int bs.vec (off_div_63) in
let nleft = nleft0 lsl shift in
let nright =
if shift = 0 then 0
else
let nright0 = Vector.safe_get_int bs.vec (off_div_63+1) in
nright0 lsr (63 - shift)
in
let n = nleft + nright in
let n = if head_reset = 0 then n else (n lsl head_reset) lsr head_reset in
let n = if tail_reset = 0 then n else (n lsr tail_reset) lsl tail_reset in
n
let append bs1 bs2 =
let len = bs1.len + bs2.len in
let off = bs1.off mod 63 in
let ioff1 = bs1.off / 63 in
let ilast1 = (bs1.off + bs1.len - 1) / 63 in
let ns = (off + len + 62) / 63 in
let vec =
Vector.mk @@
Array.init ns (fun i ->
let i1 = ioff1 + i in
let n1 =
match compare i1 ilast1 with
| -1 -> Vector.safe_get_int bs1.vec i1
| 0 ->
let n1 = Vector.safe_get_int bs1.vec i1 in
let shift = (ilast1 + 1) * 63 - (bs1.off + bs1.len) in
if shift = 0 then n1 else (n1 lsr shift) lsl shift
| _ -> 0
in
let n2 =
get_i63 bs2 (i1 * 63 - (bs1.off + bs1.len - bs2.off))
in
n1 + n2)
in
mk ~off ~len vec
let rec concat = function
| [] -> empty
| bs::bss -> append bs @@ concat bss
let normalize bs =
let off = 0 in
let len = bs.len in
if len = 0 then empty
else
let ns = (len + 62) / 63 in
let vec = Vector.mk @@ Array.init ns (fun i ->
let off = i * 63 + bs.off in
get_i63 bs off)
in
mk ~off ~len vec
let equal bs1 bs2 =
if bs1 == bs2 then true
else if bs1.len <> bs2.len then false
else
if bs1.vec == bs2.vec && bs1.off = bs2.off then true
else
let bs1 = normalize bs1 in
let bs2 = normalize bs2 in
bs1.vec = bs2.vec
let rec equal_list bss1 bss2 =
match bss1, bss2 with
| [], [] -> true
| bs1::bss1, bs2::bss2 when equal bs1 bs2 -> equal_list bss1 bss2
| _ -> false
let to_string s = String.concat "" (List.map string_of_side @@ to_sides s)
let pp ppf s = Format.fprintf ppf "%s" (to_string s)
let compare bs1 bs2 =
if bs1 == bs2 then 0
else if bs1.vec == bs2.vec && bs1.off = bs2.off then compare bs1.len bs2.len
else Stdlib.compare (to_sides bs1) (to_sides bs2)
let pp_debug ppf s =
let s' = mk ~off:0 ~len:(Array.length (s.vec :> int array) * 63) s.vec in
Format.fprintf ppf "off=%d len=%d %s" s.off s.len (to_string s')
let common_prefix seg1 seg2 =
if seg1 == seg2 then seg1, empty, empty
else
if seg1.vec == seg2.vec && seg1.off = seg2.off then
if seg1.len = seg2.len then seg1, empty, empty
else
let len = Int.min seg1.len seg2.len in
if len = 0 then empty, seg1, seg2
else
if seg1.len < seg2.len then
seg1, empty, mk ~off:(seg2.off+len) ~len:(seg2.len - len) seg2.vec
else
seg2, mk ~off:(seg1.off + len) ~len:(seg1.len - len) seg1.vec, empty
else
let rec cp_aux i =
let off1 = i * 63 in
let off2 = - seg1.off + seg2.off + i * 63 in
let nbits1 = seg1.off + seg1.len - i * 63 in
let nbits2 = seg1.off + seg2.len - i * 63 in
let i1 = get_i63 seg1 off1 in
let i2 = get_i63 seg2 off2 in
let ncommonbits = Int63.common_prefix i1 i2 in
if nbits1 >= 0 && nbits2 >= 0 && ncommonbits = 63 then
cp_aux (i+1)
else
let total_commonbits = i * 63 + Int.min ncommonbits (Int.min nbits1 nbits2) - seg1.off in
(if total_commonbits = 0 then empty
else if seg1.len < seg2.len
then mk ~off:seg1.off ~len:total_commonbits seg1.vec
else mk ~off:seg2.off ~len:total_commonbits seg2.vec),
(let len = seg1.len - total_commonbits in
if len = 0 then empty
else
mk ~off:(seg1.off+total_commonbits) ~len seg1.vec),
(let len = seg2.len - total_commonbits in
if len = 0 then empty
else
mk ~off:(seg2.off+total_commonbits) ~len seg2.vec)
in
cp_aux (seg1.off / 63)
end
include Bits
type segment = Bits.t
type fat = [`Left | `Right | `Segment of t] list
let unfat fs =
let rec make_segs = function
| [] -> []
| `Segment seg::xs -> seg :: make_segs xs
| (`Left | `Right as s)::xs ->
let to_side s =
match s with
| `Left -> Left
| `Right -> Right
| _ -> assert false
in
let rec f acc = function
| (`Left | `Right as s)::xs -> f (to_side s::acc) xs
| xs -> of_sides (List.rev acc), xs
in
let seg, xs = f [to_side s] xs in
seg :: make_segs xs
in
concat (make_segs fs)
let string_of_segments segs =
"["
^ String.concat
"; "
(List.map
(fun seg ->
if is_empty seg then "<empty>" else to_string seg)
segs)
^ "]"
let pp_segments ppf segs = Format.fprintf ppf "%s" (string_of_segments segs)
let of_string s =
let rec aux st = function
| -1 -> Some (of_sides st)
| n ->
match String.unsafe_get s n with
| 'L' -> aux (Left :: st) (n-1)
| 'R' -> aux (Right :: st) (n-1)
| _ -> None
in
aux [] @@ String.length s - 1
let gen_unsafe_of_encoding off blen s =
if not (String.length s >= off + (blen + 7) / 8) then begin
Format.eprintf "%d %d (off=%d)@." blen (String.length s) off;
assert false
end;
let outpos = off + (blen + 7) / 8 + 1 in
let rec enc_aux acc curi curbits pos =
match Stdlib.compare pos outpos with
| 0 -> Array.of_list @@ List.rev ((curi lsl (63 - curbits)) :: acc)
| -1 ->
let c = Char.code @@ String.unsafe_get s pos in
let c =
if pos + 1 <> outpos then c
else
c land (0xff lsl (7 - blen mod 8))
in
if curbits + 8 < 63 then
enc_aux acc (curi lsl 8 + c) (curbits + 8) (pos+1)
else begin
let usebits = 63 - curbits in
let curbits = 8 - usebits in
let acc = (curi lsl usebits) lor (c lsr curbits) :: acc in
let curi = c land (0xff lsr usebits) in
enc_aux acc curi curbits (pos+1)
end
| _ -> assert false
in
let vec = Vector.mk @@ enc_aux [] 0 0 off in
mk ~off:0 ~len:blen vec
let unsafe_of_encoding = gen_unsafe_of_encoding 0
let to_encoding seg =
let slen = (seg.len + 7) / 8 in
let buf = Buffer.create slen in
let rec to_enc_aux i =
if i * 56 >= seg.len then ()
else begin
let i63 = get_i63 seg (seg.off + i * 56) in
Buffer.add_char buf @@ Char.chr (i63 lsr 55);
Buffer.add_char buf @@ Char.chr ((i63 lsr 47) land 0xff);
Buffer.add_char buf @@ Char.chr ((i63 lsr 39) land 0xff);
Buffer.add_char buf @@ Char.chr ((i63 lsr 31) land 0xff);
Buffer.add_char buf @@ Char.chr ((i63 lsr 23) land 0xff);
Buffer.add_char buf @@ Char.chr ((i63 lsr 15) land 0xff);
Buffer.add_char buf @@ Char.chr ((i63 lsr 7) land 0xff);
to_enc_aux (i+1)
end
in
to_enc_aux 0;
seg.len, Buffer.sub buf 0 slen
module Serialization = struct
let fix_tail len bytes =
let last_char_pat = 128 lsr (len mod 8) in
let pos = Bytes.length bytes - 1 in
Bytes.unsafe_set bytes pos
(Char.chr
(Char.code (Bytes.unsafe_get bytes pos)
lor last_char_pat));
Bytes.unsafe_to_string bytes
let encode seg =
let slen = seg.len / 8 + 1 in
let buf = Buffer.create slen in
let rec ser_enc_aux i =
if i * 56 >= seg.len then ()
else begin
let i63 = get_i63 seg (seg.off + i * 56) in
Buffer.add_char buf @@ Char.chr (i63 lsr 55);
Buffer.add_char buf @@ Char.chr ((i63 lsr 47) land 0xff);
Buffer.add_char buf @@ Char.chr ((i63 lsr 39) land 0xff);
Buffer.add_char buf @@ Char.chr ((i63 lsr 31) land 0xff);
Buffer.add_char buf @@ Char.chr ((i63 lsr 23) land 0xff);
Buffer.add_char buf @@ Char.chr ((i63 lsr 15) land 0xff);
Buffer.add_char buf @@ Char.chr ((i63 lsr 7) land 0xff);
ser_enc_aux (i+1)
end
in
ser_enc_aux 0;
if seg.len mod 8 = 0 then Buffer.add_char buf '\000';
let bytes = Bytes.unsafe_of_string @@ Buffer.sub buf 0 slen in
fix_tail seg.len bytes
let decode_slice_exn (s,off,len) =
assert (String.length s >= off + len);
let nz =
let rec skip_last_zeros i =
if i < 0 then assert false
else
let c = String.unsafe_get s (off+i) in
if c = '\x00' then skip_last_zeros (i-1)
else i
in
skip_last_zeros (len-1)
in
assert (nz >= 0);
let last_c = Char.code @@ String.unsafe_get s (off+nz) in
assert (last_c <> 0);
let last_one, _last_byte =
if last_c land 1 <> 0 then 7, last_c land 0b11111110
else if last_c land 2 <> 0 then 6, last_c land 0b11111100
else if last_c land 4 <> 0 then 5, last_c land 0b11111000
else if last_c land 8 <> 0 then 4, last_c land 0b11110000
else if last_c land 16 <> 0 then 3, last_c land 0b11100000
else if last_c land 32 <> 0 then 2, last_c land 0b11000000
else if last_c land 64 <> 0 then 1, last_c land 0b10000000
else 0, 0
in
let seglen = nz * 8 + last_one in
gen_unsafe_of_encoding off seglen s
let decode_exn s = decode_slice_exn (s,0,String.length s)
let decode s = try Some (decode_exn s) with _ -> None
let decode_list_slice (s,off) =
let slen = String.length s in
let rec dls_aux rev_segs i =
if slen <= i then None
else
let l = Char.code @@ String.unsafe_get s i in
if l = 0 then Some (List.rev rev_segs, i+1)
else
match decode_slice_exn (s, (i+1), l) with
| exception _ -> None
| seg -> dls_aux (seg::rev_segs) (i+1+l)
in
dls_aux [] off
let decode_list s =
match decode_list_slice (s,0) with
| Some (ss,off) when String.length s = off -> Some ss
| _ -> None
let encode_list ts =
let buf = Buffer.create (List.length ts * 256) in
List.iter (fun t ->
let s = encode t in
let len = String.length s in
assert (0 <= len && len < 256);
Buffer.add_char buf (Char.chr len);
Buffer.add_string buf s) ts;
Buffer.add_char buf '\000';
Buffer.contents buf
end
let () = assert (max_serialization_bytes <= 255)
let encoding =
let open Data_encoding in
conv
(fun s -> Bytes.of_string @@ Serialization.encode s)
(fun b -> Serialization.decode_exn @@ Bytes.to_string b)
(Bounded.bytes max_serialization_bytes)
module Segs = struct
type t' =
{ rev_last : fat
; rev_segs : segment list
}
type t = t' option
let empty = None
let empty' = Some { rev_segs = []; rev_last = [] }
let add_side t side =
match t with
| None -> assert false
| Some t ->
let side = match side with Left -> `Left | Right -> `Right in
Some { t with rev_last = side :: t.rev_last }
let append_seg t seg =
match t with
| None -> assert false
| Some t -> Some { t with rev_last = `Segment seg :: t.rev_last }
let append_sides t sides =
match t with
| None -> assert false
| Some t ->
let rev_last =
let rec loop acc = function
| [] -> acc
| Left::sides -> loop (`Left::acc) sides
| Right::sides -> loop (`Right::acc) sides
in
loop t.rev_last sides
in
Some { t with rev_last }
let push_bud t =
match t with
| None -> Some { rev_segs= []; rev_last= [] }
| Some t ->
if t.rev_last = [] && t.rev_segs = [] then Some t
else begin
assert ( t.rev_last <> [] );
Some { rev_segs = unfat (List.rev t.rev_last) :: t.rev_segs; rev_last= [] }
end
let to_segments = function
| None -> []
| Some t -> List.rev (unfat (List.rev t.rev_last) :: t.rev_segs)
let to_string t =
let segs = to_segments t in
String.concat "/" (List.map to_string segs)
let of_segments segs =
match List.rev segs with
| [] -> empty
| rev_last :: rev_segs ->
Some { rev_segs; rev_last= [ `Segment rev_last ] }
let last = function
| None -> None
| Some t -> Some (List.rev t.rev_last)
end
module StringEnc = struct
let of_char c =
let c = Char.code c in
let bit n = if c land n = 0 then Left else Right in
[ bit 128 ; bit 64 ; bit 32 ; bit 16 ; bit 8 ; bit 4 ; bit 2 ; bit 1]
let encode s =
let open Data_encoding in
match Binary.to_bytes Data_encoding.Encoding.string s with
| Error _ -> assert false
| Ok b ->
let of_binary_string s =
let rec f st = function
| -1 -> st
| i ->
let c = String.unsafe_get s i in
f (of_char c @ st) (i-1)
in
f [] (String.length s - 1)
in
of_sides @@ of_binary_string (Bytes.to_string b)
let decode seg =
let sides = to_sides seg in
let buf = Buffer.create 10 in
let bit n = function
| Left -> 0
| Right -> n
in
let rec f = function
| [] -> Some (Buffer.contents buf)
| b7::b6::b5::b4::b3::b2::b1::b0::sides ->
Buffer.add_char buf
@@ Char.chr @@ bit 128 b7
+ bit 64 b6
+ bit 32 b5
+ bit 16 b4
+ bit 8 b3
+ bit 4 b2
+ bit 2 b1
+ bit 1 b0;
f sides
| _ -> None
in
match f sides with
| None -> None
| Some s ->
match Data_encoding.Binary.of_string Data_encoding.Encoding.string s with
| Error _ -> None
| Ok x -> Some x
end
module Internal = struct
module Int63 = Int63
module Vector = Vector
module Bits = Bits
end