Source file immutable_chunked_byte_vector.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
module String = StdLabels.String
module Bytes = StdLabels.Bytes
exception Bounds
exception SizeOverflow
let reraise = function
| Lazy_vector.Bounds -> raise Bounds
| Lazy_vector.SizeOverflow -> raise SizeOverflow
| exn -> raise exn
module Chunk = struct
type t = string
(** Number of bits in an address for the chunk offset *)
let offset_bits = 9
(** Size of a chunk in bytes - with 9 bits of address space the
chunk is 512 bytes *)
let size = Int64.shift_left 1L offset_bits
(** The same size but of type int, for internal usage. *)
let size_int : int = Int.shift_left 1 offset_bits
(** Get the chunk index for an address. *)
let index address = Int64.shift_right address offset_bits
(** Get the offset within its chunk for a given address. *)
let offset address = Int64.(logand address (sub size 1L))
(** Get the address from a page index and an offset. *)
let address ~index ~offset = Int64.(add (shift_left index offset_bits) offset)
let alloc () = String.make size_int (Char.chr 0)
let blit ~(src : bytes) ~(src_pos : int) ~(dst : t) ~(dst_pos : int64)
~(len : int) : t =
let bytes_len = Bytes.length src in
if
len <= 0
|| src_pos + len > bytes_len
|| Int64.add dst_pos (Int64.of_int len) > size
then raise Bounds
else if len = size_int then
Bytes.unsafe_to_string @@ Bytes.sub src ~pos:src_pos ~len
else
let new_chunk_bytes = String.to_bytes dst in
Bytes.blit
~src
~src_pos
~dst:new_chunk_bytes
~dst_pos:(Int64.to_int dst_pos)
~len ;
Bytes.unsafe_to_string new_chunk_bytes
let of_bytes_sub bytes ~(pos : int) ~(len : int) =
if len > size_int then raise Bounds
else if len = size_int then
let copied_sub = Bytes.sub bytes ~pos ~len in
Bytes.unsafe_to_string copied_sub
else blit ~src:bytes ~src_pos:pos ~dst:(alloc ()) ~dst_pos:0L ~len
let of_bytes bytes =
of_bytes_sub bytes ~pos:0 ~len:(Int.min (Bytes.length bytes) size_int)
let to_bytes = String.to_bytes
let num_needed length =
if Int64.compare length 0L > 0 then
Int64.(div (pred length) size |> succ)
else 0L
let inclusive_boundaries chunk_id =
( address ~index:chunk_id ~offset:0L,
address ~index:chunk_id ~offset:(Int64.sub size 1L) )
let set_char (s : t) (offset : int64) (chr : char) =
let copied_bytes = String.to_bytes s in
Bytes.set copied_bytes (Int64.to_int offset) chr ;
Bytes.unsafe_to_string copied_bytes
let encoding =
let open Tezos_tree_encoding in
conv of_bytes to_bytes (raw [])
end
module Vector = Lazy_vector.Int64Vector
type t = {length : int64; chunks : Chunk.t Vector.t}
let def_get_chunk _ = Lwt.return (Chunk.alloc ())
let set_chunk (vec : t) (chunk_id : int64) (chunk : Chunk.t) =
let new_chunks = Vector.set chunk_id chunk vec.chunks in
{vec with chunks = new_chunks}
let create ?origin ?get_chunk length =
let chunks =
Vector.create ?origin ?produce_value:get_chunk (Chunk.num_needed length)
in
{length; chunks}
let origin vector = Vector.origin vector.chunks
let grow vector size_delta =
if size_delta > 0L then
let new_size = Int64.add vector.length size_delta in
let new_chunks = Chunk.num_needed new_size in
let current_chunks = Vector.num_elements vector.chunks in
let chunk_count_delta = Int64.sub new_chunks current_chunks in
if Int64.compare chunk_count_delta 0L > 0 then
{length = new_size; chunks = Vector.grow chunk_count_delta vector.chunks}
else {vector with length = new_size}
else vector
let allocate length = grow (create 0L) length
let of_bytes bytes =
let length = Int64.of_int (Bytes.length bytes) in
let rec set_chunks (chunk_id : int64) vec =
let chunk_left, _ = Chunk.inclusive_boundaries chunk_id in
if chunk_left >= length then vec
else
let suffix_len = Int64.to_int @@ Int64.sub length chunk_left in
let len = Int.min Chunk.size_int suffix_len in
let chunk =
Chunk.of_bytes_sub bytes ~pos:(Int64.to_int chunk_left) ~len
in
let new_vec = set_chunk vec chunk_id chunk in
(set_chunks [@tailcall]) (Int64.add chunk_id 1L) new_vec
in
set_chunks 0L (allocate length)
let of_string str = of_bytes @@ Bytes.unsafe_of_string str
let length vector = vector.length
let get_chunk index {chunks; _} =
Lwt.catch
(fun () -> Vector.get index chunks)
(function
| Lazy_vector.Bounds as exn -> reraise exn | _ -> def_get_chunk ())
let load_byte vector address =
let open Lwt.Syntax in
if address >= vector.length || address < 0L then raise Bounds ;
let+ chunk = get_chunk (Chunk.index address) vector in
let offset = Int64.to_int @@ Chunk.offset address in
Char.code @@ String.get chunk offset
let load_bytes vector offset length =
let open Lwt.Syntax in
let end_offset = Int64.pred @@ Int64.add offset length in
if
offset < 0L || length < 0L
|| end_offset >= vector.length
|| vector.length > Int64.of_int Sys.max_string_length
then raise Bounds ;
let accum = Bytes.make (Int64.to_int length) (Char.chr 0) in
let rec copy_chunks (vector_pos : int64) =
if vector_pos > end_offset then Lwt.return_unit
else
let chunk_id = Chunk.index vector_pos in
let left, right = Chunk.inclusive_boundaries chunk_id in
let* chunk = get_chunk chunk_id vector in
let l_chunk_offset, r_chunk_offset =
if left == vector_pos && right <= end_offset then (0, Chunk.size_int - 1)
else
( Int64.to_int @@ Chunk.offset @@ Int64.max left vector_pos,
Int64.to_int @@ Chunk.offset @@ Int64.min right end_offset )
in
let sub_chunk_len = r_chunk_offset - l_chunk_offset + 1 in
let accum_pos = Int64.to_int @@ Int64.sub vector_pos offset in
String.blit
~src:chunk
~src_pos:l_chunk_offset
~dst:accum
~dst_pos:accum_pos
~len:sub_chunk_len ;
(copy_chunks [@tailcall])
(Int64.add vector_pos @@ Int64.of_int sub_chunk_len)
in
let+ () = copy_chunks offset in
accum
let store_byte vector address byte =
let open Lwt.Syntax in
if address >= vector.length || address < 0L then raise Bounds ;
let+ chunk = get_chunk (Chunk.index address) vector in
let offset = Chunk.offset address in
let new_chunk = Chunk.set_char chunk offset (Char.chr byte) in
set_chunk vector (Chunk.index address) new_chunk
let store_bytes vector offset bytes =
let open Lwt.Syntax in
let length = Int64.of_int @@ Bytes.length bytes in
let end_offset = Int64.pred @@ Int64.add offset length in
if
offset < 0L
|| end_offset >= vector.length
|| vector.length > Int64.of_int Sys.max_string_length
then raise Bounds ;
let rec set_chunks (bytes_pos : int) vec =
if Int64.of_int bytes_pos >= length then Lwt.return vec
else
let offseted_pos = Int64.add offset (Int64.of_int bytes_pos) in
let chunk_id = Chunk.index offseted_pos in
let _, chunk_right = Chunk.inclusive_boundaries chunk_id in
let* chunk = get_chunk chunk_id vec in
let l_range = Chunk.offset offseted_pos in
let r_range =
if end_offset >= chunk_right then
Chunk.offset chunk_right
else
Chunk.offset end_offset
in
let len = Int64.succ (Int64.sub r_range l_range) |> Int64.to_int in
let new_chunk =
Chunk.blit
~src:bytes
~src_pos:bytes_pos
~dst:chunk
~dst_pos:l_range
~len
in
let new_vec = set_chunk vec chunk_id new_chunk in
(set_chunks [@tailcall]) (bytes_pos + len) new_vec
in
set_chunks 0 vector
let to_bytes vector = load_bytes vector 0L vector.length
let to_string vector =
let open Lwt.Syntax in
let+ buffer = to_bytes vector in
Bytes.to_string buffer
let loaded_chunks vector = Vector.loaded_bindings vector.chunks
module Enc_intf = struct
type nonrec t = t
type chunk = Chunk.t
let origin = origin
let loaded_chunks = loaded_chunks
let length = length
let create = create
end
module Encoding = Tezos_tree_encoding.CBV_encoding.Make (Enc_intf)
let encoding = Encoding.cbv Chunk.encoding