123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150(*****************************************************************************)(* *)(* Open Source License *)(* Copyright (c) 2023 Nomadic Labs <contact@nomadic-labs.com> *)(* *)(* Permission is hereby granted, free of charge, to any person obtaining a *)(* copy of this software and associated documentation files (the "Software"),*)(* to deal in the Software without restriction, including without limitation *)(* the rights to use, copy, modify, merge, publish, distribute, sublicense, *)(* and/or sell copies of the Software, and to permit persons to whom the *)(* Software is furnished to do so, subject to the following conditions: *)(* *)(* The above copyright notice and this permission notice shall be included *)(* in all copies or substantial portions of the Software. *)(* *)(* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR*)(* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *)(* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *)(* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER*)(* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING *)(* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER *)(* DEALINGS IN THE SOFTWARE. *)(* *)(*****************************************************************************)(**
This files implements the parsing of custom subsection, especially the `name`
custom section (see
https://webassembly.github.io/spec/core/appendix/custom.html#name-section).
The `name` section has the following format:
[h] [len] [vec_len:n] ([index] [name_len] [name])^n
where
- [h] is a tag encoded in a single byte (`1` for the functions subsection)
- [len] is a variable-length unsigned 32bits integer (`vu32`), which is
the length of the subsection
- [vec_len] (`vu32`) encoding the number of values in the vector
then for each value of the vector:
- [index] (`vu32`) encoding the function representation
- [name_len] (`vu32`) encoding the length in bytes of the name
- [name] (`utf8`) bytes of length `name_len` encoding an utf8
representation of the symbol
*)(* Adapted from {Tezos_lib_webassembly.Decode} *)letrecvuNnbytesindex=letb,next_index=(String.getbytesindex|>Char.code,succindex)inassert(n>=7||bland0x7f<1lsln);letx=Int64.of_int(bland0x7f)inifbland0x80=0then(x,next_index)elseletv,next_index=vuN(n-7)bytesnext_indexin(Int64.(logorx(shift_leftv7)),next_index)letvu32bytesindex=letvalue,next_index=vuN32bytesindexin(Int64.to_int32value,next_index)(** [parse_subsection_header bytes index] reads the tag for the subsection and
its length, and returns the next index to continue reading. Returns `None`
if there are not at least 2 bytes to read. *)letparse_subsection_headerbytesstart=(* At least two string: one for the header, and at least one for the length of
the subsection. *)ifString.lengthbytes<start+2thenNoneelseletlen,next_index=vu32bytes(start+1)inSome(String.getbytes0,len,next_index)letu32_to_intu=matchInt32.unsigned_to_intuwithNone->assertfalse|Somei->i(** [get_function_name_section_indexes bytes] returns the starting index of the
`functions` subsection and its length. *)letget_function_name_section_indexesbytes=letrecparsenext_index=matchparse_subsection_headerbytesnext_indexwith|None->None|Some('\001',len,next_index)->Some(next_index,len)|Some(_,len,next_index)->parse(next_index+u32_to_intlen)inparse0(** [parse_nameassoc bytes index] parses a `(index, name)` encoded value and
returns the index to continue the reading. *)letparse_nameassocbytesstart=letidx,next_index=vu32bytesstartinletname_len,start_index=vu32bytesnext_indexinletname_len=u32_to_intname_leninletbuffer=Buffer.createname_leninletrecdecodestringindex=ifindex>=name_len+start_indexthenindexelseletuchar=String.get_utf_8_ucharstringindexin(ifUchar.utf_decode_is_validucharthenletu=Uchar.utf_decode_ucharucharinifUchar.is_charuthenBuffer.add_charbuffer(Uchar.to_charu));decodestring(index+Uchar.utf_decode_lengthuchar)inletindex=decodebytesstart_indexinletname=Buffer.contentsbufferin((idx,name),index)moduleFuncMap=Map.Make(Int32)(** [parse_vec bytes start parse_value] parses an encoded vector and its values
with [parse_value]. *)letparse_vecbytesstartparse_value=letlen,next_index=vu32bytesstartinletlen=u32_to_intleninletrecparse_valuesindexnthacc=ifnth>=lenthenaccelseletvalue,next_index=parse_valuebytesindexinparse_valuesnext_index(succnth)(Seq.consvalueacc)inparse_valuesnext_index0Seq.empty(** [parse_function_subsection bytes] parse and returns the `functions`
subsection, as described by the reference documentation. *)letparse_function_subsectionsubsection=matchget_function_name_section_indexessubsectionwith|None->FuncMap.empty|Some(start,_len)->parse_vecsubsectionstartparse_nameassoc|>FuncMap.of_seq(** [pp_function_subsection ppf map] pretty-prints the parsed functions
subsection. *)letpp_function_subsectionppfmap=letpp_assocppf(idx,name)=Format.fprintfppf" - func[%ld] <%s>"idxnameinFuncMap.to_seqmap|>Format.pp_print_seq~pp_sep:(funppf()->Format.fprintfppf"\n")pp_assocppfletparse_custom_sectionsnamemodule_=letopenLwt_syntaxinletbytes=Tezos_lazy_containers.Chunked_byte_vector.of_stringmodule_inlet+custom=Tezos_webassembly_interpreter.Decode.decode_custom"name"~name~bytesinletfunctions_section=List.mapparse_function_subsectioncustom|>List.fold_left(FuncMap.merge(fun_->Option.either))FuncMap.emptyinfunctions_section