123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602open!CoreopenWriter_intf(** We want to be able to write event arguments without allocating, which requires users
specify the argument types they will pass up front. To avoid allocating a record to
store those counts we pre-compile the fields of the event header which have to do
with argument counts and total size into an immediate value. *)moduleHeader_template=structtypet=intletnone=0letcreate?(int64s=0)?(int32s=0)?(floats=0)?(strings=0)()=letnum_args=int64s+floats+int32s+stringsinletarg_words=(int64s*2)+(floats*2)+int32s+stringsin(* This also guards [arg_words] since it has a much larger bound *)ifnum_args>15thenfailwithf"%i is over the 15 event argument limit"num_args();(arg_wordslsl4)lor(num_argslsl20);;letadd_sizetwords=t+(wordslsl4)(* isolate the rsize field, which is a word count shifted to the left by 4 bits,
we want the word count multiplied by 8, which is equivalent to the word count
shifted left by 3 (2**3=8), so we just need to shift right by one. *)letbyte_sizet=(tland0xFFF0)lsr1(* Because of the two bitfields for total size and argument count, we can effectively
treat the full [Header_template] as the sum of integers representing the arguments
we've comitted to. We can subtract integers representing those individual arguments
to remove them from the template, and if we reach zero then we've subtracted
compatible arguments. Except for issues involving overflow between the two fields,
which are unlikely to happen accidentally in practice, and this is only used by
a check to try to avoid writing invalid traces. See the comment for [pending_args]
inside [flush]. *)let[@inline]remove_argst?int64s?int32s?floats?strings()=t-create?int64s?int32s?floats?strings();;(* [pending_args] below is a trick to check that we've written arguments matching the
signature we gave to the event writer function.
If we set [pending_args] to the header when we write an event, and then use
[remove_args] every time we write an argument, then if [pending_args] ends up being
zero then the written arguments match the header. *)letcheck_nonet=ift<>nonethenift<nonethenfailwith"too many args written for arg type signature"elsefailwith"not enough args written for arg type signature";;end(** In the public API it makes more sense for it to be named [Arg_types] since that's
all the functionality which is exposed *)moduleArg_types=Header_templatetypet={mutablebuf:(read_write,Iobuf.seek)Iobuf.t;mutablenotified_lo:int;destination:(moduleDestination);mutablenext_thread_id:int;mutablenext_string_id:int;mutablenum_temp_strs:int;mutablepending_args:Header_template.t;mutableword_to_flush:int;mutablepending_word:bool}moduleTick_translation=Writer_intf.Tick_translationlet[@inline]write_int64ti=Iobuf.Fill.int64_let.bufilet[@inline]write_int64_tti=Iobuf.Fill.int64_t_let.bufi(* Due to the zero-alloc approach to writing arguments, some checking and writing needs
to be delayed until all arguments have been written, which should be before the next
event is written or the file is closed. *)letflusht=Header_template.check_nonet.pending_args;ift.pending_wordthen(write_int64tt.word_to_flush;t.pending_word<-false);;letnotify_writest=(* We don't notify on every write, just update on how much we've written since we last
called [D.wrote_bytes]. *)letbuf_lo=Iobuf.Expert.lot.bufinletpartially_written=buf_lo-t.notified_loinlet(moduleD:Destination)=t.destinationinD.wrote_bytespartially_written;t.notified_lo<-buf_lo;;let[@cold]switch_bufferst~ensure_capacity=notify_writest;let(moduleD:Destination)=t.destinationinletbuf=D.next_buf~ensure_capacityint.buf<-buf;t.notified_lo<-Iobuf.Expert.lobuf;letbuf_len=Iobuf.lengtht.bufinifbuf_len<ensure_capacitythenfailwithf"new buffer too small: %i bytes < %i requested"buf_lenensure_capacity();;(* In probes we never leave events with a pending_word and use a PPX to ensure arguments
are written correctly. So skip the flush for performance *)let[@inline]ensure_capacity_no_flushtamount=ifIobuf.lengtht.buf<amountthenswitch_bufferst~ensure_capacity:amount;;(* Everything that writes uses this call to allocate space beforehand, and should use one
call to allocate all the space it needs, both for efficiency and so that no events are
cut in half when buffers are dropped in any future shared memory transport. *)letensure_capacitytamount=flusht;ensure_capacity_no_flushtamount;;(* Because the format guarantees aligned 64-bit words, some things need to be padded to
8 bytes. This is an efficient expression for doing that. *)letpadding_to_wordx=-xland(8-1)(* many size fields in FTF are based on number of words, since the format is based on
everything being aligned 64-bit words. *)letround_words_forbytes=(bytes+8-1)/8letprovider_name="jane_tracing"letwrite_string_streamts=letlen=String.lengthsinletpadding=padding_to_wordleninensure_capacityt(len+padding);Iobuf.Fill.stringot.bufs;(* Pad with zero bytes *)Iobuf.memsett.buf~pos:0~len:paddingChar.min_value;Iobuf.advancet.bufpadding;;moduleString_id=structtypet=int[@@derivingequal]letempty=0letprocess=1letfirst_temp=2letmax_value=(1lsl15)-1letmax_number_of_temp_string_slots=max_value-first_temp+1endletset_string_slott~string_ids=letstr_len=String.lengthsin(* maximum string length defined in spec, somewhat less than 2**15 *)ifstr_len>=32000thenfailwithf"string too long for FTF trace: %i is over the limit of 32kb"str_len();(* String record *)letrtype=2inletrsize=1+round_words_forstr_leninensure_capacityt(rsize*8);write_int64t(rtypelor(rsizelsl4)lor(string_idlsl16)lor(str_lenlsl32));write_string_streamts;;letset_temp_string_slott~slots=ifslot>=t.num_temp_strsthenfailwithf"temp string slot over the limit: %i >= %i"slott.num_temp_strs();letstring_id=slot+String_id.first_tempinset_string_slott~string_ids;string_id;;letintern_stringts=(* This is an easy mistake to make, so give a more specific error message *)ift.pending_args<>0thenfailwith"can't intern strings while you still need to write arguments";letstring_id=t.next_string_idinifstring_id>String_id.max_valuethenfailwith"ran out of FTF string IDs";t.next_string_id<-t.next_string_id+1;set_string_slott~string_ids;string_id;;letnum_temp_strst=t.num_temp_strsletwrite_headert=ensure_capacityt8;(* Magic number record *)write_int64t0x0016547846040010;(* Provider info metadata *)letrtype=0inletname_len=String.lengthprovider_nameinletrsize=1+round_words_forname_leninletmtype=1inletprovider_id=0inensure_capacityt(rsize*8);write_int64t(rtypelor(rsizelsl4)lor(mtypelsl16)lor(provider_idlsl20)lor(name_lenlsl52));write_string_streamtprovider_name;(* Provider section metadata *)letrtype=0inletrsize=1inletmtype=2inensure_capacityt(rsize*8);write_int64t(rtypelor(rsizelsl4)lor(mtypelsl16)lor(provider_idlsl20));(* String constants used internally *)set_string_slott~string_id:String_id.process"process";();;letwrite_tick_initializationt(tick_translation:Tick_translation.t)=letrtype=1inletrsize=4inensure_capacityt(rsize*8);write_int64t(rtypelor(rsizelsl4));write_int64ttick_translation.ticks_per_second;write_int64ttick_translation.base_ticks;write_int64t(Time_ns.to_int_ns_since_epochtick_translation.base_time);;moduleThread_id=structtypet=intletfirst=1(* 0 means inline so 1 is first valid value *)endletset_thread_slott~slot~pid~tid=letthread_id=slot+Thread_id.firstinifthread_id>=1lsl8||thread_id<=0thenfailwithf"thread slot outside of valid range [0,254]: %i"slot();(* Thread record *)letrtype=3inletrsize=3inensure_capacityt(rsize*8);write_int64t(rtypelor(rsizelsl4)lor(thread_idlsl16));write_int64tpid;write_int64ttid;thread_id;;letset_process_namet~pid~name=(* Kernel object record *)letrtype=7inletrsize=2inletnum_args=0inletobj_type=1(* process *)inensure_capacityt(rsize*8);write_int64t(rtypelor(rsizelsl4)lor(obj_typelsl16)lor(namelsl24)lor(num_argslsl40));write_int64tpid;();;letset_thread_namet~pid~tid~name=(* Kernel object record *)letrtype=7inletarg_size=2inletrsize=2(* header *)+arg_sizeinletnum_args=1inletobj_type=2(* thread *)inensure_capacityt(rsize*8);write_int64t(rtypelor(rsizelsl4)lor(obj_typelsl16)lor(namelsl24)lor(num_argslsl40));write_int64ttid;(* Perfetto requires the thread to have an argument specifying the process ID *)letarg_type=8(* kernel object ID *)inletarg_name=String_id.processinwrite_int64t(arg_typelor(arg_sizelsl4)lor(arg_namelsl16));write_int64tpid;();;type'aevent_writer=t->arg_types:Arg_types.t->thread:Thread_id.t->category:String_id.t->name:String_id.t->ticks:int->'alet[@inline]event_header~counts~event_type~thread~category~name=Int64.(4L(* rtype *)lorof_intcountslor(of_intevent_typelsl16)lor(of_intthreadlsl24)lor(of_intcategorylsl32)lor(of_intnamelsl48));;moduleEvent_type=structtypet=intletinstant=0letcounter=1letduration_begin=2letduration_end=3letduration_complete=4letflow_begin=8letflow_step=9letflow_end=10endletwrite_eventt~event_type~extra_words~arg_types~thread~category~name~ticks=(* Event record *)letcounts=Header_template.add_sizearg_types(2+extra_words)inensure_capacityt(Header_template.byte_sizecounts);t.pending_args<-arg_types;letheader=event_header~counts~event_type~thread~category~nameinwrite_int64_ttheader;write_int64tticks;();;(* I believe using currying for these would allocate or involve additional cost. *)letwrite_instantt~arg_types~thread~category~name~ticks=(* The [let writer] style avoids ocamlformat splitting these over a million lines.
I checked under flambda it generates the same code as a single call. *)letwriter=write_eventt~event_type:Event_type.instant~extra_words:0inwriter~arg_types~thread~category~name~ticks;;letwrite_countert~arg_types~thread~category~name~ticks~counter_id=letwriter=write_eventt~event_type:Event_type.counter~extra_words:1inwriter~arg_types~thread~category~name~ticks;t.word_to_flush<-counter_id;t.pending_word<-true;;letwrite_duration_begint~arg_types~thread~category~name~ticks=letwriter=write_eventt~event_type:Event_type.duration_begin~extra_words:0inwriter~arg_types~thread~category~name~ticks;;letwrite_duration_endt~arg_types~thread~category~name~ticks=letwriter=write_eventt~event_type:Event_type.duration_end~extra_words:0inwriter~arg_types~thread~category~name~ticks;;letwrite_duration_completet~arg_types~thread~category~name~ticks~ticks_end=ifticks_end<ticksthenfailwithf"duration_complete event must have start tick (%i) greater than end tick (%i)"ticksticks_end();letwriter=write_eventt~event_type:Event_type.duration_complete~extra_words:1inwriter~arg_types~thread~category~name~ticks;t.word_to_flush<-ticks_end;t.pending_word<-true;;(* Flow events in the Fuchsia Trace Format are kind of weird in that they have a name,
category and arguments. These are all just ignored by Perfetto and have no good way of
being represented in its data model. The fact that these fields are in the FTF data
model is probably a legacy of attempted consistency in the Chromium JSON format. We
just set all these fields to dummy values. *)letwrite_flow_begint~thread~ticks~flow_id=write_eventt~event_type:Event_type.flow_begin~extra_words:1~arg_types:Arg_types.none~thread~category:String_id.empty~name:String_id.empty~ticks;write_int64tflow_id;;letwrite_flow_stept~thread~ticks~flow_id=write_eventt~event_type:Event_type.flow_step~extra_words:1~arg_types:Arg_types.none~thread~category:String_id.empty~name:String_id.empty~ticks;write_int64tflow_id;;letwrite_flow_endt~thread~ticks~flow_id=write_eventt~event_type:Event_type.flow_end~extra_words:1~arg_types:Arg_types.none~thread~category:String_id.empty~name:String_id.empty~ticks;write_int64tflow_id;;moduleWrite_arg_unchecked=struct(* None of the argument writers allocate capacity, the event does that. *)letstringt~namevalue=letatype=6inletasize=1inwrite_int64t(atypelor(asizelsl4)lor(namelsl16)lor(valuelsl32));;letint32t~namevalue=letatype=1Linletasize=1Lin(* int32 arguments can use the most significant bit, so we need to use Int64.t
and we also need to be careful to truncate the int32 properly. *)write_int64_ttInt64.(atypelor(asizelsl4)lor(of_intnamelsl16)(* because we use Int64 this also truncates to 32 bits *)lor(of_intvaluelsl32));;letint64t~namevalue=letatype=3inletasize=2inwrite_int64t(atypelor(asizelsl4)lor(namelsl16));write_int64tvalue;;letfloatt~namevalue=letatype=5inletasize=2inwrite_int64t(atypelor(asizelsl4)lor(namelsl16));write_int64_tt(Int64.bits_of_floatvalue);;endmoduleWrite_arg=structletstringt~namevalue=t.pending_args<-Header_template.remove_argst.pending_args~strings:1();Write_arg_unchecked.stringt~namevalue;;letint32t~namevalue=t.pending_args<-Header_template.remove_argst.pending_args~int32s:1();Write_arg_unchecked.int32t~namevalue;;letint64t~namevalue=t.pending_args<-Header_template.remove_argst.pending_args~int64s:1();Write_arg_unchecked.int64t~namevalue;;letfloatt~namevalue=t.pending_args<-Header_template.remove_argst.pending_args~floats:1();Write_arg_unchecked.floatt~namevalue;;endmoduleExpert=structmoduletypeDestination=Destinationletcreate?(num_temp_strs=100)~destination()=ifnum_temp_strs>String_id.max_number_of_temp_string_slotsthenfailwith"num_temp_strs too large";(* If [num_temp_strs] is set to [String_id.max_number_of_temp_string_slots],
[first_real_string] will be one greater than [String_id.max_value]. *)letfirst_real_string=String_id.first_temp+num_temp_strsinlet(moduleD:Destination)=destinationinletensure_capacity=8inletbuf=D.next_buf~ensure_capacityinlett={buf;destination;next_thread_id=Thread_id.first;next_string_id=first_real_string;num_temp_strs;pending_args=Header_template.none;word_to_flush=0;notified_lo=Iobuf.Expert.lobuf;pending_word=false}inwrite_headert;t;;letset_string_slott~slots=letfirst_non_temp_slot=String_id.first_temp+t.num_temp_strsinifslot>=first_non_temp_slotthenfailwith"Cannot call [Expert.set_string_slot] with a slot that is not a temp string slot";ifslot<=0thenfailwithf"string slot must be positive: slot %i <= 0"slot();ifslot=String_id.processthen(ifnotString.(s="process")thenfailwith"tried to overwrite the slot for the process string")elseset_string_slott~string_id:slots;slot;;letforce_switch_bufferst=flusht;switch_bufferst~ensure_capacity:1;;letflush_and_notifyt=flusht;notify_writest;;typeheader=Int64.tmoduleEvent_type=Event_type(* See [Header_template.byte_size] comment, this is the same but with Int64 operations *)let[@inline]header_byte_sizeheader=Int64.((headerland0xFFF0L)lsr1)|>Int64.to_int_trunc;;letprecompute_header~event_type~extra_words~arg_types~thread~category~name=letcounts=Header_template.add_sizearg_types(2+extra_words)inletheader=(event_header[@inlined])~counts~event_type~thread~category~namein(* we're going to unsafely write 16 bytes so validate this ahead of time using the
same function we'll use when writing. *)assert(header_byte_sizeheader>=16);header;;let[@inline]int64_of_tscticks=Time_stamp_counter.to_int63ticks|>Int63.to_int64let[@inline]write_from_header_and_get_tsct~header=(* Using [unsafe_set_int64_t_le] makes the assembly produced by this function
much simpler, with the writes getting completely inlined and only one conditional
branch for capacity checking.
The benchmark does show a 1.5x-2x slowdown for using safe set calls (3-6ns/event).
Safety proof sketch:
- By assert in [precompute_header] and abstraction of the type,
[header_byte_size header] >= 16 = bytes we write unsafely
- By [ensure_capacity], we know [Iobuf.length t.buf >= 16]
(this is either checked by the conditional or the check after [switch_buffers])
- By the definition of [Iobuf.length = hi - lo] we now have [hi - lo >= 16] and so
[hi >= lo + 16]
- By the invariant of [Iobuf] that [hi <= Bigstring.length (Iobuf.Expert.buf b)],
substitution and transitivity we have [Bigstring.length bstr >= lo + 16]
- We write 8 bytes at [pos = lo] and [pos = lo + 8], thus
we never write beyond [lo + 16].
- By another invariant of [Iobuf] we have [lo >= 0]
- By transitivity since we only write bytes at offsets x such that
[lo <= x < lo+16], given the above we have [0 <= x < Bigstring.length bstr] so
our writes are in bounds.
- Since [final_pos = lo + 16] and [lo+16<=hi] our [set_lo] maintains the [Iobuf]
invariant that [lo <= hi]. This function doesn't rely on [lo <= hi] but other
functions might.*)letbyte_size=header_byte_sizeheaderinensure_capacity_no_flushtbyte_size;letpos=Iobuf.Expert.lot.bufinletbstr=Iobuf.Expert.buft.bufinletfinal_pos=pos+16inIobuf.Expert.set_lot.buffinal_pos;Bigstring.unsafe_set_int64_t_lebstr~posheader;letpos=pos+8inletticks=Time_stamp_counter.now()inBigstring.unsafe_set_int64_t_lebstr~pos(int64_of_tscticks);ticks;;letwrite_from_header_with_tsct~header=ignore(write_from_header_and_get_tsct~header:Time_stamp_counter.t);;letwrite_tsctticks=write_int64_tt(int64_of_tscticks)moduleWrite_arg_unchecked=Write_arg_uncheckedendletcreate_for_file?num_temp_strs~filename()=letdestination=Destinations.file_destination~filename()inExpert.create?num_temp_strs~destination();;letcloset=Expert.flush_and_notifyt;(* Make buffer have zero length so further writes will ask for a new buffer and throw
an exception. The [close] function should do that but we don't want to rely on it. *)Iobuf.resizet.buf~len:0;(* Now that it's safer, close the underlying file *)let(moduleD:Destination)=t.destinationinD.close();;