Source file segment_int_array.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
(*****************************************************************************)
(*                                                                           *)
(* Open Source License                                                       *)
(* Copyright (c) 2022 DaiLambda, Inc. <contact@dailambda.jp>                 *)
(*                                                                           *)
(* Permission is hereby granted, free of charge, to any person obtaining a   *)
(* copy of this software and associated documentation files (the "Software"),*)
(* to deal in the Software without restriction, including without limitation *)
(* the rights to use, copy, modify, merge, publish, distribute, sublicense,  *)
(* and/or sell copies of the Software, and to permit persons to whom the     *)
(* Software is furnished to do so, subject to the following conditions:      *)
(*                                                                           *)
(* The above copyright notice and this permission notice shall be included   *)
(* in all copies or substantial portions of the Software.                    *)
(*                                                                           *)
(* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR*)
(* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,  *)
(* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL   *)
(* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER*)
(* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING   *)
(* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER       *)
(* DEALINGS IN THE SOFTWARE.                                                 *)
(*                                                                           *)
(*****************************************************************************)

(*  For hashing and on disk:

        |<- nbits/8 + 1 bytes ---->|
        |<- segment bits ->|10{0,7}|

        Segment length <= (Limit.max_hash_postfix_bytes * 8  - 1) bytes = 2039bits.

    In Extender format on disk:

                         |<-- 1 Plebeia cell (32bytes) -->|
                         |<-27bytes->|
        |<- ncells * 32 + 27 bytes ->|<-8bits->|<-4bytes->|
        +----------------------------|---------+
        |<-- segment bits -->|10{0,7}|ncells|01|
                                     |6bits |

        The longest segment fits in one Plebeia cell: 27 * 8 - 1
        127cells + 27bytes are enough to carry the longest segment.

    In memory for fast comparison:

        (nbits : int) and

        |<- (nbits+7)/8 bytes --->|
        +-------------------------+
        |<- segment bits ->|0{0,7}|

        String size <= Limit.max_hash_postfix_bytes
*)

open Utils

let max_length = Limit.max_hash_postfix_bytes * 8 - 1
let max_short_segment_length = 27 * 8 - 1
let max_serialization_bytes = Limit.max_hash_postfix_bytes

type side = Left | Right

let string_of_side = function
  | Left -> "L"
  | Right -> "R"

let string_of_sides sides = String.concat "" (List.map string_of_side sides)

module Int63 = struct
  (* (encoded int63, sides_left, nsides_used) *)
  let of_sides sides =
    let rec i63_of_sides_aux lshifts i = function
      | [] -> i lsl lshifts, [], 63 - lshifts
      | xs when lshifts = 0 -> i, xs, 63
      | Left :: xs -> i63_of_sides_aux (lshifts - 1) (i lsl 1) xs
      | Right :: xs -> i63_of_sides_aux (lshifts - 1) (i lsl 1 + 1) xs
    in
    i63_of_sides_aux 63 0 sides

  let to_sides i nsides =
    let i = i lsr (63 - nsides) in
    let rec i63_to_sides_aux acc nsides i =
      if nsides = 0 then acc
      else
        let side = if i land 1 = 0 then Left else Right in
        i63_to_sides_aux (side :: acc) (nsides - 1) (i lsr 1)
    in
    i63_to_sides_aux [] nsides i

  (* http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightMultLookup
     http://combos.org/bruijn
  *)

  (* one of de Bruijn sequences B(2,6) (binary, 6 chars to cover 64),
     obtained at http://combos.org/bruijn
  *)
  let db_sequence =
    0b0000001000011000101000111001001011001101001111010101110110111111

  let db_map n = (n * db_sequence) lsr 57 (* 63 - 6.  We only have 63 bits *)

  let db_table_for_common_prefix =
    let tbl = Array.init 64 (fun _ -> -255) in
    for i = 0 to 63 do
      let j = db_map (1 lsl i) in
      if tbl.(j) <> -255 then assert false;
      tbl.(j) <- 62 - (i - 1)
    done;
    tbl

  let common_prefix i1 i2 =
    let n = i1 lxor i2 in
    if n = 0 then 63
    else
      let n = n lor (n lsr 1) in
      let n = n lor (n lsr 2) in
      let n = n lor (n lsr 4) in
      let n = n lor (n lsr 8) in
      let n = n lor (n lsr 16) in
      let n = n lor (n lsr 32) in
      (* now n is 0{i}1{63-i}, only 63 variations *)
      (* map them to [0..62] using deBruijn sequence.
         Note that we have only 63bits, not 64! *)
      (* somehow http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightMultLookup shows an algorith without +1.
         I could not build 63 bit version of it *)
      Array.unsafe_get db_table_for_common_prefix (db_map (n+1))
end

let nts = ref 0
let nints = ref 0

let stat () = !nts, !nints

let count_segments = Envconf.count_segments

module Vector : sig
  type t = private int array
  val mk : int array -> t
  val unsafe_get_side : t -> int -> side
  val safe_get_int : t -> int -> int
end = struct
  type t = int array

  let mk a =
    let len = Array.length a in
    if len = 0  (* Cannot use Gc.finalise for [||] *) || not count_segments then a
    else begin
      (* Do not use it in production.  This slows down a lot! *)
      nints := !nints + len + 1 (* for vector *);
      Stdlib.Gc.finalise (fun a -> nints := !nints - Array.length a - 1) a;
      a
    end

  let unsafe_get_side vec pos =
    (* assert (0 <= pos && pos < Array.length vec * 63); *)
    let i63 = Array.unsafe_get vec (pos / 63) in
    if (i63 lsr (62 - (pos mod 63))) land 1 = 0 then Left else Right

  let safe_get_int a i =
    if i < 0 then 0
    else if Array.length a <= i then 0
    else Array.unsafe_get a i
end

module Bits = struct

  module Type : sig
    (* 4 words = 32 bytes *)
    type t = private { off : int; len : int; vec : Vector.t }
    val mk : off:int -> len:int -> Vector.t -> t
  end = struct
    type t = { off : int; len : int; vec : Vector.t }
    let mk ~off ~len vec =
      let t = { off; len; vec } in
      if count_segments then begin
        incr nts;
        Stdlib.Gc.finalise (fun _ -> decr nts) t;
        t
      end else t
  end

  include Type

  let of_sides sides =
    let rec bits_of_sides_aux acc curlen sides =
      let i63, sides_left, nsides_used = Int63.of_sides sides in
      let acc = i63::acc in
      let curlen = curlen + nsides_used in
      if sides_left = [] then
        mk ~off:0 ~len:curlen @@ Vector.mk @@ Array.of_list (List.rev acc)
      else
        bits_of_sides_aux acc curlen sides_left
    in
    bits_of_sides_aux [] 0 sides

  let to_sides bits =
    let vec = bits.vec in
    let rec bits_to_sides_aux acc off len i =
      if off >= 63 then
        let n63 = off / 63 in
        bits_to_sides_aux acc (off - 63 * n63) len (i+n63)
      else
        let n = Array.unsafe_get (vec :> int array) i in
        let sides = Int63.to_sides (n lsl off) (Int.min len (63 - off)) in
        let acc = sides :: acc in
        let len = len - (63 - off) in
        if len <= 0 then acc
        else bits_to_sides_aux acc 0 len (i+1)
    in
    List.concat (List.rev (bits_to_sides_aux [] bits.off bits.len 0))

  (* No boundary check *)
  let unsafe_get_side bits pos = Vector.unsafe_get_side bits.vec (bits.off + pos)

  let empty = mk ~off:0 ~len:0 (Vector.mk [||])

  let is_empty { len; _ } = len = 0

  let cut { off; len; vec } =
    match len with
    | 0 -> None
    | 1 -> Some (Vector.unsafe_get_side vec off, empty)
    | _ -> Some (Vector.unsafe_get_side vec off, mk ~off:(off+1) ~len:(len-1) vec)

  let get_side { off; len; vec } i =
    if i < 0 || len <= i then None
    else Some (Vector.unsafe_get_side vec (off + i))

  let drop n { off; len; vec } =
    if n >= len then empty
    else mk ~off:(off + n) ~len:(len - n) vec

  let length { len; _ } = len

  (* Get int63 at [off] bits of [bs.vec], resetting the sides outside of [bs.off]
     and [bs.off + bs.len -1] to [Left]. *)
  let get_i63 bs off =
    (* maybe the head and tail n need to be 0 reset *)
    let head_reset = Int.max 0 (bs.off - off)
      (* |<-- bs.off --------------->|
         |<--- off --------->|
      *)
    in
    if head_reset >= 63 then 0
    else
      let tail_reset = Int.max 0 (off + 63 - (bs.off + bs.len))
        (* |<-- bs.off + bs.len -->|
           |<----------- off+63 ----------->|
        *)
      in
      if tail_reset >= 63 then 0
      else
        (*
                         nleft      nright
           |<---63--->|<---63--->|<---63--->|
           |<------ off ---->|<---63--->|
           |<-  off ->|<---63--->|     (when shift=0)
        *)
        (* Beware, off can be negative! *)
        let off_div_63 = if off >= 0 then off / 63 else - ((-off + 62) / 63) in
        (* for the best performance, shift must be 0 *)
        let shift = off - off_div_63 * 63 in
        assert (shift >= 0);
        let nleft0 = Vector.safe_get_int bs.vec (off_div_63) in
        let nleft = nleft0 lsl shift in
        let nright =
          if shift = 0 then 0
          else
            let nright0 = Vector.safe_get_int bs.vec (off_div_63+1) in
            nright0 lsr (63 - shift)
        in
        let n = nleft + nright in
        (* Format.eprintf "bres %s@." (string_of_sides (Int63.to_sides n 63)); *)
        let n = if head_reset = 0 then n else (n lsl head_reset) lsr head_reset in
        let n = if tail_reset = 0 then n else (n lsr tail_reset) lsl tail_reset in
        n

  let append bs1 bs2 =
    (* based on bs1 *)
    (* XXX if bs2 is longer, we should baesd on bs2 *)
    let len = bs1.len + bs2.len in
    let off = bs1.off mod 63 in
    let ioff1 = bs1.off / 63 in
    let ilast1 = (bs1.off + bs1.len - 1) / 63 in
    let ns = (off + len + 62) / 63 in
    let vec =
      Vector.mk @@
      Array.init ns (fun i ->
          let i1 = ioff1 + i in
          let n1 =
            match compare i1 ilast1 with
            | -1 -> Vector.safe_get_int bs1.vec i1
            | 0 ->
                (* need to reset the tail *)
                let n1 = Vector.safe_get_int bs1.vec i1 in
                let shift = (ilast1 + 1) * 63 - (bs1.off + bs1.len) in
                if shift = 0 then n1 else (n1 lsr shift) lsl shift
            | _ -> 0
          in
          (*
             bs1 |<---- bs1.off ---->|<--- bs1.len --->|
                                   bs2   |<- bs2.off ->|<--- bs2.len ---->|
          *)
          let n2 =
            get_i63 bs2 (i1 * 63 - (bs1.off + bs1.len - bs2.off))
          in
          n1 + n2)
    in
    mk ~off ~len vec

  let rec concat = function
    | [] -> empty
    | bs::bss -> append bs @@ concat bss

  let normalize bs =
    (* XXX if already normalized, return it immediately *)
    (* XXX should replace it? *)
    let off = 0 in
    let len = bs.len in
    if len = 0 then empty
    else
      let ns = (len + 62) / 63 in
      let vec = Vector.mk @@ Array.init ns (fun i ->
          let off = i * 63 + bs.off in
          get_i63 bs off)
      in
      mk ~off ~len vec

  let equal bs1 bs2 =
    if bs1 == bs2 then true
    else if bs1.len <> bs2.len then false
    else
      if bs1.vec == bs2.vec && bs1.off = bs2.off then true
      else
        let bs1 = normalize bs1 in
        let bs2 = normalize bs2 in
        bs1.vec = bs2.vec

  let rec equal_list bss1 bss2 =
    match bss1, bss2 with
    | [], [] -> true
    | bs1::bss1, bs2::bss2 when equal bs1 bs2 -> equal_list bss1 bss2
    | _ -> false

  let to_string s = String.concat "" (List.map string_of_side @@ to_sides s)
  let pp ppf s = Format.fprintf ppf "%s" (to_string s)

  let compare bs1 bs2 =
    if bs1 == bs2 then 0
    else if bs1.vec == bs2.vec && bs1.off = bs2.off then compare bs1.len bs2.len
    else Stdlib.compare (to_sides bs1) (to_sides bs2)

  (* print sides including off range *)
  let pp_debug ppf s =
    let s' = mk ~off:0 ~len:(Array.length (s.vec :> int array) * 63) s.vec in
    Format.fprintf ppf "off=%d len=%d %s" s.off s.len (to_string s')

  let common_prefix seg1 seg2 =
    if seg1 == seg2 then seg1, empty, empty
    else
    if seg1.vec == seg2.vec && seg1.off = seg2.off then
      (* this case (seg1.vec==seg2.vec) is not well tested *)
      if seg1.len = seg2.len then seg1, empty, empty
      else
        let len = Int.min seg1.len seg2.len in
        if len = 0 then empty, seg1, seg2
        else
          if seg1.len < seg2.len then
            seg1, empty, mk ~off:(seg2.off+len) ~len:(seg2.len - len) seg2.vec
          else
            seg2, mk ~off:(seg1.off + len) ~len:(seg1.len - len) seg1.vec, empty
      else
        let rec cp_aux i =
          let off1 = i * 63 in
          let off2 = - seg1.off + seg2.off + i * 63 in
          let nbits1 = seg1.off + seg1.len - i * 63 in
          let nbits2 = seg1.off + seg2.len - i * 63 in
          let i1 = get_i63 seg1 off1 in
          let i2 = get_i63 seg2 off2 in
          let ncommonbits = Int63.common_prefix i1 i2 in
          if nbits1 >= 0 && nbits2 >= 0 && ncommonbits = 63 then
            cp_aux (i+1)
          else
            let total_commonbits = i * 63 + Int.min ncommonbits (Int.min nbits1 nbits2) - seg1.off in
            (if total_commonbits = 0 then empty
             else if seg1.len < seg2.len
             then mk ~off:seg1.off ~len:total_commonbits seg1.vec
             else mk ~off:seg2.off ~len:total_commonbits seg2.vec),
            (let len = seg1.len - total_commonbits in
             if len = 0 then empty
             else
               mk ~off:(seg1.off+total_commonbits) ~len seg1.vec),
            (let len = seg2.len - total_commonbits in
             if len = 0 then empty
             else
               mk ~off:(seg2.off+total_commonbits) ~len seg2.vec)
        in
        cp_aux (seg1.off / 63)
end

include Bits

type segment = Bits.t

type fat = [`Left | `Right | `Segment of t] list

let unfat fs =
  let rec make_segs = function
    | [] -> []
    | `Segment seg::xs -> seg :: make_segs xs
    | (`Left | `Right as s)::xs ->
        let to_side s =
          match s with
          | `Left -> Left
          | `Right -> Right
          | _ -> assert false
        in
        let rec f acc = function
          | (`Left | `Right as s)::xs -> f (to_side s::acc) xs
          | xs -> of_sides (List.rev acc), xs
        in
        let seg, xs = f [to_side s] xs in
        seg :: make_segs xs
  in
  concat (make_segs fs)

let string_of_segments segs =
  "["
  ^ String.concat
    "; "
    (List.map
       (fun seg ->
          if is_empty seg then "<empty>" else to_string seg)
       segs)
  ^ "]"

let pp_segments ppf segs = Format.fprintf ppf "%s" (string_of_segments segs)

let of_string s =
  let rec aux st = function
    | -1 -> Some (of_sides st)
    | n ->
        match String.unsafe_get s n with
        | 'L' -> aux (Left :: st) (n-1)
        | 'R' -> aux (Right :: st) (n-1)
        | _ -> None
  in
  aux [] @@ String.length s - 1

let gen_unsafe_of_encoding off blen s =
  if not (String.length s >= off + (blen + 7) / 8) then begin
    Format.eprintf "%d %d (off=%d)@." blen (String.length s) off;
    assert false
  end;
  let outpos = off + (blen + 7) / 8 + 1 in
  let rec enc_aux acc curi curbits pos =
    match Stdlib.compare pos outpos with
    | 0 -> Array.of_list @@ List.rev ((curi lsl (63 - curbits)) :: acc)
    | -1 ->
        let c = Char.code @@ String.unsafe_get s pos in
        (* clean the tail  (serialization put the tail) *)
        let c =
          if pos + 1 <> outpos then c
          else
          (* last byte *)
            c land (0xff lsl (7 - blen mod 8))
        in
        if curbits + 8 < 63 then
          enc_aux acc (curi lsl 8 + c) (curbits + 8) (pos+1)
        else begin
          let usebits = 63 - curbits in
          let curbits = 8 - usebits in
          let acc = (curi lsl usebits) lor (c lsr curbits) :: acc in
          let curi = c land (0xff lsr usebits) in
          enc_aux acc curi curbits (pos+1)
        end
    | _ -> assert false
  in
  let vec = Vector.mk @@ enc_aux [] 0 0 off in
  mk ~off:0 ~len:blen vec

let unsafe_of_encoding = gen_unsafe_of_encoding 0

(* Maybe we should use byte aligned int56 *)
let to_encoding seg =
  let slen = (seg.len + 7) / 8 in
  let buf = Buffer.create slen in
  let rec to_enc_aux i =
    if i * 56 >= seg.len then ()
    else begin
      let i63 = get_i63 seg (seg.off + i * 56) in
      Buffer.add_char buf @@ Char.chr (i63 lsr 55);
      Buffer.add_char buf @@ Char.chr ((i63 lsr 47) land 0xff);
      Buffer.add_char buf @@ Char.chr ((i63 lsr 39) land 0xff);
      Buffer.add_char buf @@ Char.chr ((i63 lsr 31) land 0xff);
      Buffer.add_char buf @@ Char.chr ((i63 lsr 23) land 0xff);
      Buffer.add_char buf @@ Char.chr ((i63 lsr 15) land 0xff);
      Buffer.add_char buf @@ Char.chr ((i63 lsr 7) land 0xff);
      to_enc_aux (i+1)
    end
  in
  to_enc_aux 0;
  seg.len, Buffer.sub buf 0 slen

module Serialization = struct
  (* How to stored on disk *)

  let fix_tail len bytes =
    let last_char_pat = 128 lsr (len mod 8) in
    let pos = Bytes.length bytes - 1 in
    Bytes.unsafe_set bytes pos
      (Char.chr
         (Char.code (Bytes.unsafe_get bytes pos)
          lor last_char_pat));
    Bytes.unsafe_to_string bytes

  (* XXX dupe! *)
  let encode seg =
    let slen = seg.len / 8 + 1 in
    let buf = Buffer.create slen in
    let rec ser_enc_aux i =
      if i * 56 >= seg.len then ()
      else begin
        let i63 = get_i63 seg (seg.off + i * 56) in
        Buffer.add_char buf @@ Char.chr (i63 lsr 55);
        Buffer.add_char buf @@ Char.chr ((i63 lsr 47) land 0xff);
        Buffer.add_char buf @@ Char.chr ((i63 lsr 39) land 0xff);
        Buffer.add_char buf @@ Char.chr ((i63 lsr 31) land 0xff);
        Buffer.add_char buf @@ Char.chr ((i63 lsr 23) land 0xff);
        Buffer.add_char buf @@ Char.chr ((i63 lsr 15) land 0xff);
        Buffer.add_char buf @@ Char.chr ((i63 lsr 7) land 0xff);
        ser_enc_aux (i+1)
      end
    in
    ser_enc_aux 0;
    if seg.len mod 8 = 0 then Buffer.add_char buf '\000';
    let bytes = Bytes.unsafe_of_string @@ Buffer.sub buf 0 slen in
    fix_tail seg.len bytes

  (* fast conversion of LR bytes *)
  let decode_slice_exn (s,off,len) =
    (*
       |<-----------  len ----------->|
       |xxxxxxxxxxxxxxxxxxx10..0|0...0|
       | 0  | ..         |  nz  |
                           ^
                           |
                           +------ last_one
    *)
    assert (String.length s >= off + len);
    let nz =
      let rec skip_last_zeros i =
        if i < 0 then assert false
        else
          let c = String.unsafe_get s (off+i) in
          if c = '\x00' then skip_last_zeros (i-1)
          else i
      in
      skip_last_zeros (len-1)
    in
    assert (nz >= 0);
    let last_c = Char.code @@ String.unsafe_get s (off+nz) in
    assert (last_c <> 0); (* XXX proper error? *)
    let last_one, _last_byte =
      if last_c land 1 <> 0 then       7, last_c land 0b11111110
      else if last_c land 2 <> 0 then  6, last_c land 0b11111100
      else if last_c land 4 <> 0 then  5, last_c land 0b11111000
      else if last_c land 8 <> 0 then  4, last_c land 0b11110000
      else if last_c land 16 <> 0 then 3, last_c land 0b11100000
      else if last_c land 32 <> 0 then 2, last_c land 0b11000000
      else if last_c land 64 <> 0 then 1, last_c land 0b10000000
      else 0, 0
    in
    let seglen = nz * 8 + last_one in
    gen_unsafe_of_encoding off seglen s

  let decode_exn s = decode_slice_exn (s,0,String.length s)

  let decode s = try Some (decode_exn s) with _ -> None

  let decode_list_slice (s,off) =
    let slen = String.length s in
    let rec dls_aux rev_segs i =
      if slen <= i then None (* overrun *)
      else
        let l = Char.code @@ String.unsafe_get s i in
        if l = 0 then Some (List.rev rev_segs, i+1)
        else
          match decode_slice_exn (s, (i+1), l) with
          | exception _ -> None
          | seg -> dls_aux (seg::rev_segs) (i+1+l)
    in
    dls_aux [] off

  let decode_list s =
    match decode_list_slice (s,0) with
    | Some (ss,off) when String.length s = off -> Some ss
    | _ -> None

  let encode_list ts =
    let buf = Buffer.create (List.length ts * 256) in
    List.iter (fun t ->
        let s = encode t in
        let len = String.length s in (* 1815+1 bits = 227 bytes < 256 *)
        assert (0 <= len && len < 256);
        Buffer.add_char buf (Char.chr len);
        Buffer.add_string buf s) ts;
    Buffer.add_char buf '\000';
    Buffer.contents buf

end

let () = assert (max_serialization_bytes <= 255)

let encoding =
  let open Data_encoding in
  conv
    (fun s -> Bytes.of_string @@ Serialization.encode s)
    (fun b -> Serialization.decode_exn @@ Bytes.to_string b)
    (Bounded.bytes max_serialization_bytes)

module Segs = struct
  (* growing segments at the end *)
  type t' =
    { rev_last : fat (* reversed! *)
    ; rev_segs : segment list
    }

  type t = t' option

  (* Note: this is not truly the empty segments but
     a singleton of an empty segment.
  *)
  let empty = None
  let empty' = Some { rev_segs = []; rev_last = [] } (* '/' *)

  let add_side t side =
    match t with
    | None -> assert false
    | Some t ->
        let side = match side with Left -> `Left | Right -> `Right in
        Some { t with rev_last = side :: t.rev_last }

  let append_seg t seg =
    match t with
    | None -> assert false
    | Some t -> Some { t with rev_last = `Segment seg :: t.rev_last }

  let append_sides t sides =
    match t with
    | None -> assert false
    | Some t ->
        let rev_last =
          let rec loop acc = function
            | [] -> acc
            | Left::sides -> loop (`Left::acc) sides
            | Right::sides -> loop (`Right::acc) sides
          in
          loop t.rev_last sides
        in
        Some { t with rev_last }

  let push_bud t =
    match t with
    | None -> Some { rev_segs= []; rev_last= [] }
    | Some t ->
        (* root bud never changes the segs *)
        if t.rev_last = [] && t.rev_segs = [] then Some t
        else begin
          assert ( t.rev_last <> [] );
          (* XXX Should encode ? *)
          Some { rev_segs = unfat (List.rev t.rev_last) :: t.rev_segs; rev_last= [] }
        end

  let to_segments = function
    | None -> []
    | Some t -> List.rev (unfat (List.rev t.rev_last) :: t.rev_segs)

  let to_string t =
    let segs = to_segments t in
    String.concat "/" (List.map to_string segs)

  let of_segments segs =
    match List.rev segs with
    | [] -> empty
    | rev_last :: rev_segs ->
        Some { rev_segs; rev_last= [ `Segment rev_last ] }

  let last = function
    | None -> None
    | Some t -> Some (List.rev t.rev_last)
end

module StringEnc = struct
  (* Only for testing purpose *)

  let of_char c =
    let c = Char.code c in
    let bit n = if c land n = 0 then Left else Right in
    [ bit 128 ; bit 64 ; bit 32 ; bit 16 ; bit 8 ; bit 4 ; bit 2 ; bit 1]

  let encode s =
    let open Data_encoding in
    match Binary.to_bytes Data_encoding.Encoding.string s with
    | Error _ -> assert false
    | Ok b ->
        let of_binary_string s =
          let rec f st = function
            | -1 -> st
            | i ->
                let c = String.unsafe_get s i in
                f (of_char c @ st) (i-1)
          in
          f [] (String.length s - 1)
        in
        of_sides @@ of_binary_string (Bytes.to_string b)

  let decode seg =
    let sides = to_sides seg in
    let buf = Buffer.create 10 in
    let bit n = function
      | Left -> 0
      | Right -> n
    in
    let rec f = function
      | [] -> Some (Buffer.contents buf)
      | b7::b6::b5::b4::b3::b2::b1::b0::sides ->
          Buffer.add_char buf
            @@ Char.chr @@ bit 128 b7
                         + bit 64 b6
                         + bit 32 b5
                         + bit 16 b4
                         + bit 8 b3
                         + bit 4 b2
                         + bit 2 b1
                         + bit 1 b0;
            f sides
      | _ -> None
    in
    match f sides with
    | None -> None
    | Some s ->
        match Data_encoding.Binary.of_string Data_encoding.Encoding.string s with
        | Error _ -> None
        | Ok x -> Some x
end

module Internal = struct
  module Int63 = Int63
  module Vector = Vector
  module Bits = Bits
end