123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119typet={total:int;qc_pass:int;(** [not_passing_quality_controls] returns [false],
assumed for all other counts *)single_reads:int;(** [has_multiple_segments] returns [false] *)read_pairs:int;(** [has_multiple_segments] and [first_segment] *)mapped_reads:int;(** [!segment_unmapped] and [!secondary_alignment]
and [!supplementary_alignment] *)mapped_pairs:int;(** [has_multiple_segments] and [first_segment]
and [each_segment_properly_aligned]
and [!secondary_alignment]
and [!supplementary_alignment] *)}[@@derivingsexp]letzero={total=0;qc_pass=0;single_reads=0;read_pairs=0;mapped_reads=0;mapped_pairs=0;}letincr_ifbi=ifbtheni+1elseiletupdate_gensflags=letopenSam.Flagsinlettotal=s.total+1inifnot_passing_quality_controlsflagsthen{swithtotal}elseletqc_pass=s.qc_pass+1inletsingle_reads=incr_if(not(has_multiple_segmentsflags))s.single_readsinletpair_witness=has_multiple_segmentsflags&&first_segmentflagsinletread_pairs=incr_ifpair_witnesss.read_pairsinletmain_mapping=not(segment_unmappedflags||secondary_alignmentflags||supplementary_alignmentflags)inletmapped_reads=incr_ifmain_mappings.mapped_readsinletmapped_pairs=incr_if(main_mapping&&pair_witness)s.mapped_pairsin{total;qc_pass;single_reads;read_pairs;mapped_reads;mapped_pairs}letupdatesal=update_gensal.Sam.flagsletupdate0sal=letopenOr_error.Monad_infixinBam.Alignment0.flagsal>>|update_gensmoduleFragment_length_histogram=structtypet={min_mapq:int;counts:intAccu.Counter.t;}letcreate?(min_mapq=0)()={min_mapq;counts=Accu.Counter.create();}letwith_mapped_pair~min_mapqalf=letopenOr_error.Monad_infixinBam.Alignment0.flagsal>>=funfl->letmulti_segment=Sam.Flags.has_multiple_segmentsflinleteach_segment_properly_aligned=Sam.Flags.each_segment_properly_alignedflinletsegment_unmapped=Sam.Flags.segment_unmappedflinletqc_ok=matchBam.Alignment0.mapqalwith|Somemapq->mapq>=min_mapq|None->falseinletmapped_fragment=multi_segment&¬segment_unmapped&&each_segment_properly_alignedinifmapped_fragment&&qc_okthenf()elseOk()letupdate0{min_mapq;counts}al=with_mapped_pair~min_mapqal(fun()->letlength=matchBam.Alignment0.tlenalwith|Somel->Int.absl|_->assertfalse(* both reads are mapped so there should be a length *)inAccu.Counter.tickcountslength;Ok())endmoduleChr_histogram=structtypet={min_mapq:int;bam_header:Bam.Header.t;counts:stringAccu.Counter.t;}letcreate?(min_mapq=0)bam_header={min_mapq;bam_header;counts=Accu.Counter.create();}letupdate0{min_mapq;counts;bam_header}al=Fragment_length_histogram.with_mapped_pair~min_mapqal(fun()->matchBam.Alignment0.rnamealbam_headerwith|Ok(Somechr)->Accu.Counter.tickcountschr;Ok()|OkNone->Ok()|Error_ase->e)end