Source file custom_section.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
(*****************************************************************************)
(*                                                                           *)
(* Open Source License                                                       *)
(* Copyright (c) 2023 Nomadic Labs <contact@nomadic-labs.com>                *)
(*                                                                           *)
(* Permission is hereby granted, free of charge, to any person obtaining a   *)
(* copy of this software and associated documentation files (the "Software"),*)
(* to deal in the Software without restriction, including without limitation *)
(* the rights to use, copy, modify, merge, publish, distribute, sublicense,  *)
(* and/or sell copies of the Software, and to permit persons to whom the     *)
(* Software is furnished to do so, subject to the following conditions:      *)
(*                                                                           *)
(* The above copyright notice and this permission notice shall be included   *)
(* in all copies or substantial portions of the Software.                    *)
(*                                                                           *)
(* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR*)
(* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,  *)
(* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL   *)
(* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER*)
(* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING   *)
(* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER       *)
(* DEALINGS IN THE SOFTWARE.                                                 *)
(*                                                                           *)
(*****************************************************************************)

(**
   This files implements the parsing of custom subsection, especially the `name`
   custom section (see
   https://webassembly.github.io/spec/core/appendix/custom.html#name-section).

   The `name` section has the following format:
   [h] [len] [vec_len:n] ([index] [name_len] [name])^n
   where
   - [h] is a tag encoded in a single byte (`1` for the functions subsection)
   - [len] is a variable-length unsigned 32bits integer (`vu32`), which is
     the length of the subsection
   - [vec_len] (`vu32`) encoding the number of values in the vector
   then for each value of the vector:
     - [index] (`vu32`) encoding the function representation
     - [name_len] (`vu32`) encoding the length in bytes of the name
     - [name] (`utf8`) bytes of length `name_len` encoding an utf8
       representation of the symbol

*)

(* Adapted from {Tezos_lib_webassembly.Decode} *)
let rec vuN n bytes index =
  let b, next_index = (String.get bytes index |> Char.code, succ index) in
  assert (n >= 7 || b land 0x7f < 1 lsl n) ;
  let x = Int64.of_int (b land 0x7f) in
  if b land 0x80 = 0 then (x, next_index)
  else
    let v, next_index = vuN (n - 7) bytes next_index in
    (Int64.(logor x (shift_left v 7)), next_index)

let vu32 bytes index =
  let value, next_index = vuN 32 bytes index in
  (Int64.to_int32 value, next_index)

(** [parse_subsection_header bytes index] reads the tag for the subsection and
    its length, and returns the next index to continue reading. Returns `None`
    if there are not at least 2 bytes to read. *)
let parse_subsection_header bytes start =
  (* At least two string: one for the header, and at least one for the length of
     the subsection. *)
  if String.length bytes < start + 2 then None
  else
    let len, next_index = vu32 bytes (start + 1) in
    Some (String.get bytes 0, len, next_index)

let u32_to_int u =
  match Int32.unsigned_to_int u with None -> assert false | Some i -> i

(** [get_function_name_section_indexes bytes] returns the starting index of the
    `functions` subsection and its length. *)
let get_function_name_section_indexes bytes =
  let rec parse next_index =
    match parse_subsection_header bytes next_index with
    | None -> None
    | Some ('\001', len, next_index) -> Some (next_index, len)
    | Some (_, len, next_index) -> parse (next_index + u32_to_int len)
  in
  parse 0

(** [parse_nameassoc bytes index] parses a `(index, name)` encoded value and
    returns the index to continue the reading. *)
let parse_nameassoc bytes start =
  let idx, next_index = vu32 bytes start in
  let name_len, start_index = vu32 bytes next_index in
  let name_len = u32_to_int name_len in
  let buffer = Buffer.create name_len in
  let rec decode string index =
    if index >= name_len + start_index then index
    else
      let uchar = String.get_utf_8_uchar string index in
      (if Uchar.utf_decode_is_valid uchar then
       let u = Uchar.utf_decode_uchar uchar in
       if Uchar.is_char u then Buffer.add_char buffer (Uchar.to_char u)) ;
      decode string (index + Uchar.utf_decode_length uchar)
  in
  let index = decode bytes start_index in
  let name = Buffer.contents buffer in
  ((idx, name), index)

module FuncMap = Map.Make (Int32)

(** [parse_vec bytes start parse_value] parses an encoded vector and its values
    with [parse_value]. *)
let parse_vec bytes start parse_value =
  let len, next_index = vu32 bytes start in
  let len = u32_to_int len in
  let rec parse_values index nth acc =
    if nth >= len then acc
    else
      let value, next_index = parse_value bytes index in
      parse_values next_index (succ nth) (Seq.cons value acc)
  in
  parse_values next_index 0 Seq.empty

(** [parse_function_subsection bytes] parse and returns the `functions`
    subsection, as described by the reference documentation. *)
let parse_function_subsection subsection =
  match get_function_name_section_indexes subsection with
  | None -> FuncMap.empty
  | Some (start, _len) ->
      parse_vec subsection start parse_nameassoc |> FuncMap.of_seq

(** [pp_function_subsection ppf map] pretty-prints the parsed functions
    subsection. *)
let pp_function_subsection ppf map =
  let pp_assoc ppf (idx, name) =
    Format.fprintf ppf " - func[%ld] <%s>" idx name
  in
  FuncMap.to_seq map
  |> Format.pp_print_seq
       ~pp_sep:(fun ppf () -> Format.fprintf ppf "\n")
       pp_assoc
       ppf

let parse_custom_sections name module_ =
  let open Lwt_syntax in
  let bytes = Tezos_lazy_containers.Chunked_byte_vector.of_string module_ in
  let+ custom =
    Tezos_webassembly_interpreter.Decode.decode_custom "name" ~name ~bytes
  in
  let functions_section =
    List.map parse_function_subsection custom
    |> List.fold_left (FuncMap.merge (fun _ -> Option.either)) FuncMap.empty
  in
  functions_section