123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505(*****************************************************************************)(* *)(* Open Source License *)(* Copyright (c) 2020 Nomadic Labs <contact@nomadic-labs.com> *)(* *)(* Permission is hereby granted, free of charge, to any person obtaining a *)(* copy of this software and associated documentation files (the "Software"),*)(* to deal in the Software without restriction, including without limitation *)(* the rights to use, copy, modify, merge, publish, distribute, sublicense, *)(* and/or sell copies of the Software, and to permit persons to whom the *)(* Software is furnished to do so, subject to the following conditions: *)(* *)(* The above copyright notice and this permission notice shall be included *)(* in all copies or substantial portions of the Software. *)(* *)(* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR*)(* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *)(* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *)(* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER*)(* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING *)(* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER *)(* DEALINGS IN THE SOFTWARE. *)(* *)(*****************************************************************************)typejsonm_lexeme=[`Null|`Boolofbool|`Stringofstring|`Floatoffloat|`Nameofstring|`As|`Ae|`Os|`Oe]letstring_of_floatf=let(fract,intr)=modffiniffract=0.0thenFormat.asprintf"%.0f"intrelseFormat.asprintf"%g"fletstring_needs_escaping_atindexs=letexceptionAtofintintryfori=indextoString.lengths-1domatchs.[i]with|'\"'|'\n'|'\r'|'\b'|'\t'|'\\'|'\x00'..'\x1F'->raise(Ati)|_->()done;-1withAti->iletdo_escape_strings=letbuff=Buffer.create(String.lengths)infori=0toString.lengths-1domatchs.[i]with|'\"'->Buffer.add_stringbuff"\\\""|'\n'->Buffer.add_stringbuff"\\n"|'\r'->Buffer.add_stringbuff"\\r"|'\b'->Buffer.add_stringbuff"\\b"|'\t'->Buffer.add_stringbuff"\\t"|'\\'->Buffer.add_stringbuff"\\\\"|'\x00'..'\x1F'asc->Format.kasprintf(Buffer.add_stringbuff)"\\u%04x"(Char.codec)|c->Buffer.add_charbuffcdone;Buffer.contentsbuffletescape_strings=ifstring_needs_escaping_at0s>=0thendo_escape_stringselses(** small_string_seq_of_jsonm_lexeme_seq: converts a seq of lexeme into a naive
seq of small strings. This may or may not be appripriate depending on the
way the resulting seq is consumed. *)letsmall_string_seq_of_jsonm_lexeme_seq~newline(s:jsonm_lexemeSeq.t):stringSeq.t=letrecsseqfirstdepthseq()=matchseq()with|Seq.Nil->assert(depth=0);ifnewlinethenSeq.Cons("\n",Seq.empty)elseSeq.Nil|Seq.Cons(`Null,seq)->lettail=Seq.Cons("null",sseqfalsedepthseq)inif(notfirst)&&depth>0thenSeq.Cons(",",fun()->tail)elsetail|Seq.Cons(`Booltrue,seq)->lettail=Seq.Cons("true",sseqfalsedepthseq)inif(notfirst)&&depth>0thenSeq.Cons(",",fun()->tail)elsetail|Seq.Cons(`Boolfalse,seq)->lettail=Seq.Cons("false",sseqfalsedepthseq)inif(notfirst)&&depth>0thenSeq.Cons(",",fun()->tail)elsetail|Seq.Cons(`As,seq)->lettail=Seq.Cons("[",sseqtrue(depth+1)seq)inif(notfirst)&&depth>0thenSeq.Cons(",",fun()->tail)elsetail|Seq.Cons(`Ae,seq)->Seq.Cons("]",sseqfalse(depth-1)seq)|Seq.Cons(`Os,seq)->lettail=Seq.Cons("{",sseqtrue(depth+1)seq)inif(notfirst)&&depth>0thenSeq.Cons(",",fun()->tail)elsetail|Seq.Cons(`Oe,seq)->Seq.Cons("}",sseqfalse(depth-1)seq)|Seq.Cons(`Strings,seq)->lettail=Seq.Cons("\"",fun()->Seq.Cons(escape_strings,fun()->Seq.Cons("\"",sseqfalsedepthseq)))inif(notfirst)&&depth>0thenSeq.Cons(",",fun()->tail)elsetail|Seq.Cons(`Floatf,seq)->letf=string_of_floatfinlettail=Seq.Cons(f,sseqfalsedepthseq)inif(notfirst)&&depth>0thenSeq.Cons(",",fun()->tail)elsetail|Seq.Cons(`Namen,seq)->lettail=Seq.Cons("\"",fun()->Seq.Cons(escape_stringn,fun()->Seq.Cons("\"",fun()->Seq.Cons(":",sseqtruedepthseq))))inif(notfirst)&&depth>0thenSeq.Cons(",",fun()->tail)elsetailinsseqfalse0sletdump_unescaped_stringchunk_size_hintbuffstrindexseq=letrecauxbytes_left_in_buffindex=ifbytes_left_in_buff>String.lengthstr-indexthen(Buffer.add_substringbuffstrindex(String.lengthstr-index);seq())else(Buffer.add_substringbuffstrindexbytes_left_in_buff;lets=Buffer.contentsbuffinBuffer.clearbuff;Seq.Cons(s,fun()->auxchunk_size_hint(index+bytes_left_in_buff)))inaux(chunk_size_hint-Buffer.lengthbuff)index(* This is written with the assumption that there aren't many characters that
need escaping: a few here and there, maybe just a couple of newlines in a
block of text.
Breaking this assumption does not cause errors. However, the performances
might degrade somewhat. *)letdump_escaped_stringchunk_size_hintbuffstrseq=letrecaux_outerbytes_left_in_buffindex=letnext_escape=string_needs_escaping_atindexstrinifbytes_left_in_buff<=6then(lets=Buffer.contentsbuffinBuffer.clearbuff;Seq.Cons(s,fun()->aux_outerchunk_size_hintindex))elseifnext_escape<0then(* string does not need escaping: dump the rest of the string as is *)dump_unescaped_stringchunk_size_hintbuffstrindexseqelseifnext_escape=0then((* index is at character that needs escaping *)(matchstr.[index]with|'\"'->Buffer.add_stringbuff"\\\""|'\n'->Buffer.add_stringbuff"\\n"|'\r'->Buffer.add_stringbuff"\\r"|'\b'->Buffer.add_stringbuff"\\b"|'\t'->Buffer.add_stringbuff"\\t"|'\\'->Buffer.add_stringbuff"\\\\"|'\x00'..'\x1F'asc->Format.kasprintf(Buffer.add_stringbuff)"\\u%04x"(Char.codec)|c->Buffer.add_charbuffc);aux_outer(chunk_size_hint-Buffer.lengthbuff)(index+1))else(* string needs escaping but later: write the non-empty non-escaped
prefix and loop back *)letto_write_unescaped=next_escape-indexinifbytes_left_in_buff>to_write_unescapedthen(Buffer.add_substringbuffstrindexto_write_unescaped;aux_outer(bytes_left_in_buff-to_write_unescaped)(index+to_write_unescaped))elseletrecaux_innerbytes_left_in_buffindexto_write_unescapedcontinue=ifbytes_left_in_buff<to_write_unescapedthen(Buffer.add_substringbuffstrindexbytes_left_in_buff;lets=Buffer.contentsbuffinSeq.Cons(s,fun()->aux_innerchunk_size_hint(index+bytes_left_in_buff)(to_write_unescaped-bytes_left_in_buff)continue))else(Buffer.add_substringbuffstrindexto_write_unescaped;continue(index+to_write_unescaped))inaux_innerbytes_left_in_buffindexto_write_unescaped(funindex->aux_outer(chunk_size_hint-Buffer.lengthbuff)index)inaux_outer(chunk_size_hint-Buffer.lengthbuff)0letdump_string_literalchunk_size_hintbuffliteralseq=Buffer.add_charbuff'"';dump_escaped_stringchunk_size_hintbuffliteral(fun()->Buffer.add_charbuff'"';seq())letstring_seq_of_jsonm_lexeme_seq~newline~chunk_size_hint(s:jsonm_lexemeSeq.t):stringSeq.t=(* we need chunk_size_hint to be reasonably high to accommodate all small
literals *)letchunk_size_hint=minchunk_size_hint16in(* we occasionally print several characters before checking the length of the
buffer (e.g., in the case of a key-value name with non-printable character
at the end: 6 for one hex-encoded non-printable character in a string + 1
for the string closing double-quote + 1 for the key-value colon separator
= 8 in total) (e.g., [Float.pred 0.] is ["-4.94066e-324"] which is 12
characters long).
So we allocate just above chunk_size +8 to avoid the need to resize. *)letbuff_size=chunk_size_hint+16in(* single buffer for the whole serialisation *)letbuff=Buffer.createbuff_sizeinletrecsseqfirstdepthseq()=ifBuffer.lengthbuff>=buff_sizethen((* emit buffer content if we have reached the chunk size *)letb=Buffer.contentsbuffinBuffer.clearbuff;Seq.Cons(b,fun()->(sseq[@ocaml.tailcall])firstdepthseq()))elsematchseq()with(* termination *)|Seq.Nil->assert(depth=0);ifnewlinethenBuffer.add_charbuff'\n';(* value terminator: newline *)ifBuffer.lengthbuff=0then(* corner case: we just flushed and haven't added a newline *)Seq.Nilelseletb=Buffer.contentsbuffinBuffer.clearbuff;Seq.Cons(b,Seq.empty)(* fixed length, small lexemes *)|Seq.Cons(`Null,seq)->(* if we are inside an object/array (i.e., depth is > 0) and we are
not the first item (i.e., first is false) then we put a delimiter
character. *)if(notfirst)&&depth>0thenBuffer.add_charbuff',';(* then the value *)Buffer.add_stringbuff"null";(* and we continue with the rest. Note that depth is unchanged
but first is false whatever it's original value (because whatever
follows _follows_) *)(sseq[@ocaml.tailcall])falsedepthseq()|Seq.Cons(`Booltrue,seq)->if(notfirst)&&depth>0thenBuffer.add_charbuff',';Buffer.add_stringbuff"true";(sseq[@ocaml.tailcall])falsedepthseq()|Seq.Cons(`Boolfalse,seq)->if(notfirst)&&depth>0thenBuffer.add_charbuff',';Buffer.add_stringbuff"false";(sseq[@ocaml.tailcall])falsedepthseq()|Seq.Cons(`As,seq)->if(notfirst)&&depth>0thenBuffer.add_charbuff',';Buffer.add_charbuff'[';(* We increase the depth and mark the next value as being the first
value of an array. *)(sseq[@ocaml.tailcall])true(depth+1)seq()|Seq.Cons(`Ae,seq)->assert(depth>0);Buffer.add_charbuff']';(sseq[@ocaml.tailcall])false(depth-1)seq()|Seq.Cons(`Os,seq)->if(notfirst)&&depth>0thenBuffer.add_charbuff',';Buffer.add_charbuff'{';(sseq[@ocaml.tailcall])true(depth+1)seq()|Seq.Cons(`Oe,seq)->assert(depth>0);Buffer.add_charbuff'}';(sseq[@ocaml.tailcall])false(depth-1)seq()|Seq.Cons(`Strings,seq)->if(notfirst)&&depth>0thenBuffer.add_charbuff',';(* we delegate string literals to [dump_string_literal]. Note that we
pass the rest of the sequence as a kind of continuation. This is
because [dump_string_literal] may fill up the buffer and then some
(depending on the size of the literal) and so it needs to be able
to stick a few things in front. *)dump_string_literalchunk_size_hintbuffs(fun()->(sseq[@ocaml.tailcall])falsedepthseq())|Seq.Cons(`Floatf,seq)->if(notfirst)&&depth>0thenBuffer.add_charbuff',';letf=string_of_floatfinBuffer.add_stringbufff;(sseq[@ocaml.tailcall])falsedepthseq()|Seq.Cons(`Namen,seq)->if(notfirst)&&depth>0thenBuffer.add_charbuff',';dump_string_literalchunk_size_hintbuffn(fun()->(* set first to true to avoid printing of separator *)Buffer.add_charbuff':';(sseq[@ocaml.tailcall])truedepthseq())insseqfalse0sletbiseq_escaped_string_contentbufferoffsetsk=letcan_be_written=Bytes.lengthbuffer-offsetinifString.lengths+1>can_be_written+Bytes.lengthbufferthenif(* large string and.. *)(* TODO: present the string as a sequence of of blit instructions with
increasing offsets *)offset<Bytes.lengthbuffer/2then((* ..and the current buffer is almost empty:
dump as much as we can on the current buffer to avoid sending a
small buffer in the seq, and then use the rest of the string as its
own buffer *)Bytes.blit_strings0bufferoffsetcan_be_written;letoffset=offset+can_be_writteninassert(offset=Bytes.lengthbuffer);Seq.Cons((buffer,0,offset),fun()->lets=Bytes.unsafe_of_stringsinSeq.Cons((s,can_be_written,Bytes.lengths-can_be_written),fun()->k0)))else(* ..and the buffer is reasonably full:
put the current buffer in the seq and then the string as a single
chunk *)Seq.Cons((buffer,0,offset),fun()->lets=Bytes.unsafe_of_stringsinSeq.Cons((s,0,Bytes.lengths),fun()->k0))elseifString.lengths+1<=can_be_writtenthen((* we [+ 1] to account for the closing quote that will be added by [k] *)(* small string: we dump it on the buffer and continue *)Bytes.blit_strings0bufferoffset(String.lengths);letoffset=offset+String.lengthsinkoffset)else((* medium string: we blit two parts onto the buffer *)Bytes.blit_strings0bufferoffsetcan_be_written;letoffset=offset+can_be_writteninassert(offset=Bytes.lengthbuffer);Seq.Cons((buffer,0,offset),fun()->letremain_to_be_written=String.lengths-can_be_writteninBytes.blit_stringscan_be_writtenbuffer0remain_to_be_written;letoffset=remain_to_be_writteninkoffset))letbiseq_string_literalbufferoffsetsk=Bytes.setbufferoffset'"';letoffset=offset+1inletfirst_escape=string_needs_escaping_at0siniffirst_escape<0thenbiseq_escaped_string_contentbufferoffsets(funoffset->Bytes.setbufferoffset'"';k(offset+1))else(* NOTE: offset can't be 0 because we just wrote '"', also the string cannot
be empty because this is matched-for earlier *)(* TODO: optimise by escaping using the available buffer *)lets=do_escape_stringsinbiseq_escaped_string_contentbufferoffsets(funoffset->Bytes.setbufferoffset'"';k(offset+1))letblit_instructions_seq_of_jsonm_lexeme_seq~newline~bufferlexeme_seq=letbuffer_size=Bytes.lengthbufferinifbuffer_size<32thenraise(Invalid_argument"Data_encoding.blit_instructions_seq_of_jsonm_lexeme_seq");letflush_at=buffer_size-16inlet[@ocaml.inline]sepfirstdepthoffset=(* if we are inside an object/array (i.e., depth is > 0) and we are
not the first item (i.e., first is false) then we put a delimiter
character. *)if(notfirst)&&depth>0then(Bytes.setbufferoffset',';offset+1)elseoffsetinletrecbiseqfirstdepthoffsetseq()=ifoffset>=flush_atthen(* emit buffer content if we have reached the chunk size *)Seq.Cons((buffer,0,offset),fun()->(biseq[@ocaml.tailcall])firstdepth0seq())elsematchseq()with(* termination *)|Seq.Nil->assert(depth=0);letoffset=ifnewlinethen(Bytes.setbufferoffset'\n';offset+1)elseoffsetinifoffset=0then(* corner case: we just flushed (and haven't added a newline) *)Seq.NilelseSeq.Cons((buffer,0,offset),fun()->Seq.Nil)(* fixed length, small lexemes *)|Seq.Cons(`Null,seq)->letoffset=sepfirstdepthoffsetinBytes.blit_string"null"0bufferoffset4;letoffset=offset+4in(biseq[@ocaml.tailcall])falsedepthoffsetseq()|Seq.Cons(`Booltrue,seq)->letoffset=sepfirstdepthoffsetinBytes.blit_string"true"0bufferoffset4;letoffset=offset+4in(biseq[@ocaml.tailcall])falsedepthoffsetseq()|Seq.Cons(`Boolfalse,seq)->letoffset=sepfirstdepthoffsetinBytes.blit_string"false"0bufferoffset5;letoffset=offset+5in(biseq[@ocaml.tailcall])falsedepthoffsetseq()|Seq.Cons(`As,seq)->letoffset=sepfirstdepthoffsetinBytes.setbufferoffset'[';letoffset=offset+1in(biseq[@ocaml.tailcall])true(depth+1)offsetseq()|Seq.Cons(`Ae,seq)->Bytes.setbufferoffset']';letoffset=offset+1in(biseq[@ocaml.tailcall])false(depth-1)offsetseq()|Seq.Cons(`Os,seq)->letoffset=sepfirstdepthoffsetinBytes.setbufferoffset'{';letoffset=offset+1in(biseq[@ocaml.tailcall])true(depth+1)offsetseq()|Seq.Cons(`Oe,seq)->Bytes.setbufferoffset'}';letoffset=offset+1in(biseq[@ocaml.tailcall])false(depth-1)offsetseq()|Seq.Cons(`String"",seq)->letoffset=sepfirstdepthoffsetinBytes.blit_string"\"\""0bufferoffset2;letoffset=offset+2in(biseq[@ocaml.tailcall])falsedepthoffsetseq()|Seq.Cons(`Strings,seq)->letoffset=sepfirstdepthoffsetin(* we delegate string literals to [dump_string_literal]. Note that we
pass the rest of the sequence as a kind of continuation. This is
because [dump_string_literal] may fill up the buffer and then some
(depending on the size of the literal) and so it needs to be able
to stick a few things in front. *)biseq_string_literalbufferoffsets(funoffset->(biseq[@ocaml.tailcall])falsedepthoffsetseq())|Seq.Cons(`Floatf,seq)->letoffset=sepfirstdepthoffsetinletf=string_of_floatfinbiseq_escaped_string_contentbufferoffsetf(funoffset->(biseq[@ocaml.tailcall])falsedepthoffsetseq())|Seq.Cons(`Namen,seq)->letoffset=sepfirstdepthoffsetinbiseq_string_literalbufferoffsetn(funoffset->ifoffset=buffer_sizethenSeq.Cons((buffer,0,offset),fun()->Bytes.setbuffer0':';letoffset=1in(* set first to true to avoid printing of separator *)(biseq[@ocaml.tailcall])truedepthoffsetseq())else((* set first to true to avoid printing of separator *)Bytes.setbufferoffset':';letoffset=offset+1in(biseq[@ocaml.tailcall])truedepthoffsetseq()))inbiseqfalse00lexeme_seq