123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267(**************************************************************************)(* This file is part of the Codex semantics library. *)(* *)(* Copyright (C) 2013-2025 *)(* CEA (Commissariat à l'énergie atomique et aux énergies *)(* alternatives) *)(* *)(* you can redistribute it and/or modify it under the terms of the GNU *)(* Lesser General Public License as published by the Free Software *)(* Foundation, version 2.1. *)(* *)(* It is distributed in the hope that it will be useful, *)(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *)(* GNU Lesser General Public License for more details. *)(* *)(* See the GNU Lesser General Public License version 2.1 *)(* for more details (enclosed in the file LICENSE). *)(* *)(**************************************************************************)exceptionUncompressable(** The storage format is
{v
+------------------------------------+------------+
| Aligned data (int32, 64, bytes...) | extra bits |
+------------------------------------+------------+
0 size-1
v}
- Aligned data is in order of insertion
- Extra bits is in the reverse order of insertion, grouped by bytes.
So the last byte contains [b0, b1, ... b7] where [b0] is the first pushed
boolean, [b1] the second and so on.
[b0] is the least significant bit, [b1] is shifted by 1, [b2] by 2, etc...
The byte before last is [b8, ... b15] and so on.
If the total number of booleans isn't a multiple of eight, the extra bits
is padded with 0 on the left.
This essentially allows a double stack, growing from the front for aligned
data, and from the rear for booleans, until the middle point, whose position
is unknown. *)(** {1 Compress} *)(******************************************************************************)(** Implementation as a byte list - easy, but lots of allocations *)(* module ByteList = struct
type compress = {
mutable aligned_data: bytes list;
mutable extra_bits: bool list;
}
let make _ = { aligned_data = []; extra_bits = [] }
let write_bytes x bytes = x.aligned_data <- bytes :: x.aligned_data
let write_int8 x char =
let bytes = Bytes.create 1 in
Bytes.set bytes 0 char;
write_bytes x bytes
let write_int32 x int32 =
let bytes = Bytes.create 4 in
Bytes.set_int32_ne bytes 0 int32;
write_bytes x bytes
let write_int64 x int64 =
let bytes = Bytes.create 8 in
Bytes.set_int64_ne bytes 0 int64;
write_bytes x bytes
let write_bool x b = x.extra_bits <- b :: x.extra_bits
let byte_of_bits x =
let (_, offset, value, char_list) = List.fold_left (fun (pos, offset, value, char_list) b ->
let value = if b then value lor (1 lsl offset) else value in
if offset = 7
then (pos+1, 0, 0, char_of_int value :: char_list)
else (pos, offset+1, value, char_list))
(0, 0, 0, []) (List.rev x) in
(if offset != 0 then char_of_int value :: char_list else char_list)
|> List.to_seq
|> Bytes.of_seq
let to_bytes { aligned_data; extra_bits } =
let extra_bits = byte_of_bits extra_bits in
Bytes.concat Bytes.empty (List.rev_append aligned_data [extra_bits])
end *)(** Vector/Dynarrat-like byte structure: aligned data is resized as stuff is inserted *)typecompress={mutablepos:int;mutablealigned_data:bytes;mutablebit_pos:int;mutableextra_bits:intlist;(* This is really a list of char, but since bitwise operation aren't available on char,
and a char takes the same space as an int, we only convert to char at the last minute. *)}letmaken={pos=0;aligned_data=Bytes.create(n+1);bit_pos=8;extra_bits=[]}letresizexn=letlen=Bytes.lengthx.aligned_datainiflen<nthenx.aligned_data<-Bytes.extendx.aligned_data0(max(n-len)len)letwrite_bytesxbytes=letlen=Bytes.lengthbytesinletnew_pos=x.pos+leninresizexnew_pos;Bytes.blitbytes0x.aligned_datax.poslen;x.pos<-new_posletwrite_int8xchar=letnew_pos=x.pos+1inresizexnew_pos;Bytes.setx.aligned_datax.poschar;x.pos<-new_posletwrite_int32xi32=letnew_pos=x.pos+4inresizexnew_pos;Bytes.set_int32_nex.aligned_datax.posi32;x.pos<-new_posletwrite_int64xi64=letnew_pos=x.pos+8inresizexnew_pos;Bytes.set_int64_nex.aligned_datax.posi64;x.pos<-new_posletwrite_boolxb=ifx.bit_pos=8then(x.bit_pos<-1;x.extra_bits<-Bool.to_intb::x.extra_bits)elsematchx.extra_bitswith|[]->failwith"Invalid compress"|i::is->x.extra_bits<-(ilor(Bool.to_intb)lslx.bit_pos)::is;x.bit_pos<-x.bit_pos+1letto_bytesx=letlen_extra=List.lengthx.extra_bitsinletremaining_size=Bytes.lengthx.aligned_data-x.posinifremaining_size!=len_extrathenx.aligned_data<-Bytes.extendx.aligned_data0(len_extra-remaining_size);List.iteri(funic->Bytes.setx.aligned_data(x.pos+i)(char_of_intc))x.extra_bits;x.aligned_data(** {2 Compound operations} *)(********************************************************************)letwrite_optionwrite_valuexopt=matchoptwith|None->write_boolxfalse|Somevalue->write_boolxtrue;write_valuexvalueletwrite_eitherwrite_leftwrite_rightcompress=function|Either.Leftl->write_boolcompressfalse;write_leftcompressl|Either.Rightr->write_boolcompresstrue;write_rightcompressrletwrite_z~signedcompressz=letfits_32=ifsignedthenZ.fits_int32zelseZ.fits_int32_unsignedzinwrite_boolcompressfits_32;iffits_32thenwrite_int32compress(ifsignedthenZ.to_int32zelseZ.to_int32_unsignedz)elsetrywrite_int64compress(ifsignedthenZ.to_int64zelseZ.to_int64_unsignedz)withZ.Overflow->raiseUncompressableletfits_31int=-0x40_00_00_00<=int&&int<=0x3f_ff_ff_ffletmark_i32i32=ifSys.big_endianthenInt32.logori32Int32.min_intelseInt32.shift_lefti321|>Int32.add1l(* Return true if the mark is 0, false if it isn't *)letunmark_i32i32=ifSys.big_endianthen((* get top bit to check mark, then shift left then right to recover top bit sign *)Int32.logandi32Int32.max_int|>Int32.equal0l,leti32=Int32.shift_lefti321inInt32.shift_righti321)else(Int32.logandi321l|>Int32.equal0l,Int32.shift_righti321)letunmark_i64i64=ifSys.big_endianthenleti64=Int64.shift_lefti641inInt64.shift_righti641elseInt64.shift_righti641letmark_i64i64=ifSys.big_endiantheni64elseInt64.shift_lefti641letwrite_intcompressint=(* If fits 31 bits, ensure lowest bit is 1 *)iffits_31intthenInt32.of_intint|>mark_i32|>write_int32compresselseInt64.of_intint|>mark_i64|>write_int64compress(** {1 Decompress} *)(******************************************************************************)typedecompress={bytes:bytes;mutablealigned_pos:int;mutableextra_bits_pos:int;}letof_bytesbytes={bytes;aligned_pos=0;extra_bits_pos=0}letread_bytesxn=letpos=x.aligned_posinx.aligned_pos<-pos+n;Bytes.subx.bytesposnletread_int8x=letpos=x.aligned_posinx.aligned_pos<-pos+1;Bytes.getx.bytesposletread_int32x=letpos=x.aligned_posinx.aligned_pos<-pos+4;Bytes.get_int32_nex.bytesposletread_int64x=letpos=x.aligned_posinx.aligned_pos<-pos+8;Bytes.get_int64_nex.bytesposletread_boolx=letpos=x.extra_bits_posinx.extra_bits_pos<-x.extra_bits_pos+1;letchar=Bytes.getx.bytes(Bytes.lengthx.bytes-1-pos/8)inint_of_charcharland(1lsl(posmod8))<>0(** {2 Compound operations} *)(********************************************************************)letread_optionread_valuex=ifread_boolxthenSome(read_valuex)elseNoneletread_eitherread_leftread_rightx=ifread_boolxthenEither.Right(read_rightx)elseEither.Left(read_leftx)letread_z~signedx=ifread_boolxthenread_int32x|>(ifsignedthenZ.of_int32elseZ.of_int32_unsigned)elseread_int64x|>(ifsignedthenZ.of_int64elseZ.of_int64_unsigned)letread_intx=leti32=read_int32xinletis_64,i32=unmark_i32i32inifis_64then(* undo read, then read int64 *)let()=x.aligned_pos<-x.aligned_pos-4inread_int64x|>unmark_i64|>Int64.to_intelseInt32.to_inti32