PackedIntArray.ml1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211(******************************************************************************) (* *) (* Menhir *) (* *) (* Copyright Inria. All rights reserved. This file is distributed under *) (* the terms of the GNU Library General Public License version 2, with a *) (* special exception on linking, as described in the file LICENSE. *) (* *) (******************************************************************************) (* A packed integer array is represented as a pair of an integer [k] and a string [s]. The integer [k] is the number of bits per integer that we use. The string [s] is just an array of bits, which is read in 8-bit chunks. *) (* The ocaml programming language treats string literals and array literals in slightly different ways: the former are statically allocated, while the latter are dynamically allocated. (This is rather arbitrary.) In the context of Menhir's table-based back-end, where compact, immutable integer arrays are needed, ocaml strings are preferable to ocaml arrays. *) type t = int * string (* The magnitude [k] of an integer [v] is the number of bits required to represent [v]. It is rounded up to the nearest power of two, so that [k] divides [Sys.word_size]. *) let magnitude (v : int) = if v < 0 then Sys.word_size else let rec check k max = (* [max] equals [2^k] *) if (max <= 0) || (v < max) then k (* if [max] just overflew, then [v] requires a full ocaml integer, and [k] is the number of bits in an ocaml integer plus one, that is, [Sys.word_size]. *) else check (2 * k) (max * max) in check 1 2 (* [pack a] turns an array of integers into a packed integer array. *) (* Because the sign bit is the most significant bit, the magnitude of any negative number is the word size. In other words, [pack] does not achieve any space savings as soon as [a] contains any negative numbers, even if they are ``small''. *) let pack (a : int array) : t = let m = Array.length a in (* Compute the maximum magnitude of the array elements. This tells us how many bits per element we are going to use. *) let k = Array.fold_left (fun k v -> max k (magnitude v) ) 1 a in (* Because access to ocaml strings is performed on an 8-bit basis, two cases arise. If [k] is less than 8, then we can pack multiple array entries into a single character. If [k] is greater than 8, then we must use multiple characters to represent a single array entry. *) if k <= 8 then begin (* [w] is the number of array entries that we pack in a character. *) assert (8 mod k = 0); let w = 8 / k in (* [n] is the length of the string that we allocate. *) let n = if m mod w = 0 then m / w else m / w + 1 in let s = Bytes.create n in (* Define a reader for the source array. The reader might run off the end if [w] does not divide [m]. *) let i = ref 0 in let next () = let ii = !i in if ii = m then 0 (* ran off the end, pad with zeroes *) else let v = a.(ii) in i := ii + 1; v in (* Fill up the string. *) for j = 0 to n - 1 do let c = ref 0 in for _x = 1 to w do c := (!c lsl k) lor next() done; Bytes.set s j (Char.chr !c) done; (* Done. *) k, Bytes.unsafe_to_string s end else begin (* k > 8 *) (* [w] is the number of characters that we use to encode an array entry. *) assert (k mod 8 = 0); let w = k / 8 in (* [n] is the length of the string that we allocate. *) let n = m * w in let s = Bytes.create n in (* Fill up the string. *) for i = 0 to m - 1 do let v = ref a.(i) in for x = 1 to w do Bytes.set s ((i + 1) * w - x) (Char.chr (!v land 255)); v := !v lsr 8 done done; (* Done. *) k, Bytes.unsafe_to_string s end (* Access to a string. *) let[@inline] read (s : string) (i : int) : int = Char.code (String.unsafe_get s i) let[@inline] get1 (s : string) (i : int) : int = let c = read s (i lsr 3) in let c = c lsr ((lnot i) land 0b111) in let c = c land 0b1 in c let[@inline] get2 (s : string) (i : int) : int = let c = read s (i lsr 2) in let c = c lsr (2 * ((lnot i) land 0b11)) in let c = c land 0b11 in c let[@inline] get4 (s : string) (i : int) : int = let c = read s (i lsr 1) in let c = c lsr (4 * ((lnot i) land 0b1)) in let c = c land 0b1111 in c let get8 = read let[@inline] get16 (s : string) (i : int) : int = let j = 2 * i in (read s j) lsl 8 + read s (j + 1) let[@inline] get32 (s : string) (i : int) : int = let j = 4 * i in (((read s j lsl 8) + read s (j + 1)) lsl 8 + read s (j + 2)) lsl 8 + read s (j + 3) (* [get] is now commented out, as it is no longer used. Only its specialized variants are used. This is in principle faster (though we have not found a significant difference in practice). (* [get t i] returns the integer stored in the packed array [t] at index [i]. *) (* Together, [pack] and [get] satisfy the following property: if the index [i] is within bounds, then [get (pack a) i] equals [a.(i)]. *) let get ((k, s) : t) (i : int) : int = match k with | 1 -> get1 s i | 2 -> get2 s i | 4 -> get4 s i | 8 -> get8 s i | 16 -> get16 s i | _ -> assert (k = 32); (* 64 bits unlikely, not supported *) get32 s i *)