Source file chunked_byte_vector.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
open Bigarray
exception Bounds
exception SizeOverflow
let reraise = function
| Lazy_vector.Bounds -> raise Bounds
| Lazy_vector.SizeOverflow -> raise SizeOverflow
| exn -> raise exn
module Array1_64 = struct
let create kind layout n =
if n < 0L || n > Int64.of_int max_int then
raise (Invalid_argument "Bigarray.Array1_64.create") ;
Array1.create kind layout (Int64.to_int n)
let index_of_int64 i =
if i < 0L || i > Int64.of_int max_int then -1 else Int64.to_int i
let get a i = Array1.get a (index_of_int64 i)
let set a i x = Array1.set a (index_of_int64 i) x
end
module Chunk = struct
type t = (int, int8_unsigned_elt, c_layout) Array1.t
(** Number of bits in an address for the chunk offset *)
let offset_bits = 9
(** Size of a chunk in bytes - with 9 bits of address space the
chunk is 512 bytes *)
let size = Int64.shift_left 1L offset_bits
(** Get the chunk index for an address. *)
let index address = Int64.shift_right address offset_bits
(** Get the offset within its chunk for a given address. *)
let offset address = Int64.(logand address (sub size 1L))
(** Get the address from a page index and an offset. *)
let address ~index ~offset = Int64.(add (shift_left index offset_bits) offset)
let alloc () =
let chunk = Array1_64.create Int8_unsigned C_layout size in
Array1.fill chunk 0 ;
chunk
let of_bytes bytes =
let chunk = alloc () in
for i = 0 to Int.max (Int64.to_int size) (Bytes.length bytes) - 1 do
Array1.set chunk i (Char.code (Bytes.get bytes i))
done ;
chunk
let to_bytes chunk =
let len = Array1.size_in_bytes chunk in
Bytes.init len (fun i -> Char.chr @@ Array1.get chunk i)
let num_needed length =
if Int64.compare length 0L > 0 then
Int64.(div (pred length) size |> succ)
else 0L
end
module Vector = Lazy_vector.Mutable.Int64Vector
type t = {mutable length : int64; chunks : Chunk.t Vector.t}
let def_get_chunk _ = Lwt.return (Chunk.alloc ())
let create ?origin ?get_chunk length =
let chunks =
Vector.create ?origin ?produce_value:get_chunk (Chunk.num_needed length)
in
{length; chunks}
let origin vector = Vector.origin vector.chunks
let grow vector size_delta =
if 0L < size_delta then (
let new_size = Int64.add vector.length size_delta in
let new_chunks = Chunk.num_needed new_size in
let current_chunks = Vector.num_elements vector.chunks in
let chunk_count_delta = Int64.sub new_chunks current_chunks in
if Int64.compare chunk_count_delta 0L > 0 then
Vector.grow chunk_count_delta vector.chunks ;
vector.length <- new_size)
let allocate length =
let res = create 0L in
grow res length ;
res
let length vector = vector.length
let get_chunk index {chunks; _} =
Lwt.catch
(fun () -> Vector.get index chunks)
(function
| Lazy_vector.Bounds as exn -> reraise exn | _ -> def_get_chunk ())
let set_chunk index chunk {chunks; _} =
try Vector.set index chunk chunks with exn -> reraise exn
let load_byte vector address =
let open Lwt.Syntax in
if Int64.compare address vector.length >= 0 then raise Bounds ;
let+ chunk = get_chunk (Chunk.index address) vector in
Array1_64.get chunk (Chunk.offset address)
let load_bytes vector start length =
let open Lwt.Syntax in
let end_offset = Int64.pred @@ Int64.add start length in
if
start < 0L || length < 0L || end_offset > vector.length
|| vector.length > Int64.of_int Sys.max_string_length
then raise Bounds ;
if length = 0L then Lwt.return Bytes.empty
else
let buffer = Bytes.create @@ Int64.to_int length in
let rec copy chunk offset length dest_offset =
if length > 0L then (
Array1.get chunk offset |> Char.chr |> Bytes.set buffer dest_offset ;
(copy [@tailcall])
chunk
(Int.succ offset)
(Int64.pred length)
(Int.succ dest_offset))
else ()
in
let rec go offset length =
if length > 0L then (
let chunk_index = Chunk.index offset in
let chunk_offset = Chunk.offset offset in
let chunk_length = Int64.(min (sub Chunk.size chunk_offset) length) in
let* chunk = get_chunk chunk_index vector in
copy
chunk
(Int64.to_int chunk_offset)
chunk_length
Int64.(sub offset start |> to_int) ;
(go [@tailcall])
(Int64.add offset chunk_length)
(Int64.sub length chunk_length))
else Lwt.return_unit
in
let+ () = go start length in
buffer
let store_byte vector address byte =
let open Lwt.Syntax in
if Int64.compare address vector.length >= 0 then raise Bounds ;
let+ chunk = get_chunk (Chunk.index address) vector in
Array1_64.set chunk (Chunk.offset address) byte ;
Vector.set (Chunk.index address) chunk vector.chunks
let store_bytes vector address bytes =
List.init (Bytes.length bytes) (fun i ->
let c = Bytes.get bytes i in
store_byte vector Int64.(of_int i |> add address) (Char.code c))
|> Lwt.join
let of_string str =
let len = String.length str in
let vector = create (Int64.of_int len) in
let _ =
List.init
(Vector.num_elements vector.chunks |> Int64.to_int)
(fun index ->
let index = Int64.of_int index in
let chunk = Chunk.alloc () in
let _ =
List.init (Chunk.size |> Int64.to_int) (fun offset ->
let offset = Int64.of_int offset in
let address = Chunk.address ~index ~offset |> Int64.to_int in
if address < len then
let c = String.get str address in
Array1_64.set chunk offset (Char.code c))
in
set_chunk index chunk vector)
in
vector
let of_bytes bytes =
let len = Bytes.length bytes in
let vector = create (Int64.of_int len) in
let _ =
List.init
(Vector.num_elements vector.chunks |> Int64.to_int)
(fun index ->
let index = Int64.of_int index in
let chunk = Chunk.alloc () in
let _ =
List.init (Chunk.size |> Int64.to_int) (fun offset ->
let offset = Int64.of_int offset in
let address = Chunk.address ~index ~offset |> Int64.to_int in
if address < len then
let c = Bytes.get bytes address in
Array1_64.set chunk offset (Char.code c))
in
set_chunk index chunk vector)
in
vector
let to_bytes vector = load_bytes vector 0L vector.length
let to_string vector =
let open Lwt.Syntax in
let+ buffer = to_bytes vector in
Bytes.to_string buffer
let loaded_chunks vector =
Vector.Vector.loaded_bindings (Vector.snapshot vector.chunks)