1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
type t = {
total : int ;
qc_pass : int ; (** [not_passing_quality_controls] returns [false],
assumed for all other counts *)
single_reads : int ; (** [has_multiple_segments] returns [false] *)
read_pairs : int ; (** [has_multiple_segments] and [first_segment] *)
mapped_reads : int ; (** [!segment_unmapped] and [!secondary_alignment]
and [!supplementary_alignment] *)
mapped_pairs : int ; (** [has_multiple_segments] and [first_segment]
and [each_segment_properly_aligned]
and [!secondary_alignment]
and [!supplementary_alignment] *)
}
[@@deriving sexp]
let zero = {
total = 0 ;
qc_pass = 0 ;
single_reads = 0 ;
read_pairs = 0 ;
mapped_reads = 0 ;
mapped_pairs = 0 ;
}
let incr_if b i = if b then i + 1 else i
let update_gen s flags =
let open Sam.Flags in
let total = s.total + 1 in
if not_passing_quality_controls flags then
{ s with total }
else
let qc_pass = s.qc_pass + 1 in
let single_reads = incr_if (not (has_multiple_segments flags)) s.single_reads in
let pair_witness = has_multiple_segments flags && first_segment flags in
let read_pairs = incr_if pair_witness s.read_pairs in
let main_mapping =
not (segment_unmapped flags ||
secondary_alignment flags ||
supplementary_alignment flags)
in
let mapped_reads = incr_if main_mapping s.mapped_reads in
let mapped_pairs = incr_if (main_mapping && pair_witness) s.mapped_pairs in
{ total ; qc_pass ; single_reads ; read_pairs ; mapped_reads ; mapped_pairs }
let update s al =
update_gen s al.Sam.flags
let update0 s al =
let open Or_error.Monad_infix in
Bam.Alignment0.flags al
>>| update_gen s
module Fragment_length_histogram = struct
type t = {
min_mapq : int ;
counts : int Accu.Counter.t ;
}
let create ?(min_mapq = 0) () = {
min_mapq ;
counts = Accu.Counter.create () ;
}
let with_mapped_pair ~min_mapq al f =
let open Or_error.Monad_infix in
Bam.Alignment0.flags al >>= fun fl ->
let multi_segment = Sam.Flags.has_multiple_segments fl in
let each_segment_properly_aligned = Sam.Flags.each_segment_properly_aligned fl in
let segment_unmapped = Sam.Flags.segment_unmapped fl in
let qc_ok = match Bam.Alignment0.mapq al with
| Some mapq -> mapq >= min_mapq
| None -> false
in
let mapped_fragment =
multi_segment && not segment_unmapped && each_segment_properly_aligned
in
if mapped_fragment && qc_ok then f () else Ok ()
let update0 { min_mapq ; counts } al =
with_mapped_pair ~min_mapq al (fun () ->
let length = match Bam.Alignment0.tlen al with
| Some l -> Int.abs l
| _ -> assert false
in
Accu.Counter.tick counts length ;
Ok ()
)
end
module Chr_histogram = struct
type t = {
min_mapq : int ;
bam_header : Bam.Header.t ;
counts : string Accu.Counter.t ;
}
let create ?(min_mapq = 0) = {
min_mapq ;
bam_header ;
counts = Accu.Counter.create () ;
}
let update0 { min_mapq ; counts ; } al =
Fragment_length_histogram.with_mapped_pair ~min_mapq al (fun () ->
match Bam.Alignment0.rname al bam_header with
| Ok (Some chr) ->
Accu.Counter.tick counts chr ;
Ok ()
| Ok None -> Ok ()
| Error _ as e -> e
)
end