Source file fasta_in_channel.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
open! Base

module In_channel = Stdio.In_channel
module U = Utils

type t = In_channel.t

let peek_char chan =
  match In_channel.input_char chan with
  | None -> None
  | Some c ->
      let pos = In_channel.pos chan in
      let () = assert (Int64.(pos > of_int 0)) in
      let () = In_channel.seek chan Int64.(pos - of_int 1) in
      Some c

let rewind ?(pos = Int64.of_int 0) chan = In_channel.seek chan pos

let equal c1 c2 = In_channel.equal c1 c2

exception Exn of string [@@deriving sexp]

let clean_sequence s = String.filter s ~f:(fun c -> Char.(c <> ' '))

let of_in_channel chan = chan
let to_in_channel chan = chan

let create_exn fname = In_channel.create fname
let create fname = U.try1 create_exn fname

let close_exn chan = In_channel.close chan
let close chan = U.try1 close_exn chan

let stdin_exn () = In_channel.stdin
let stdin () = U.try0 stdin_exn

let input_record_exn chan =
  let rec loop record =
    match (peek_char chan, record) with
    | None, None -> None
    | None, Some record' -> Some record'
    | Some '>', None ->
        let line = In_channel.input_line_exn ~fix_win_eol:true chan in
        loop (Some (Fasta_record.of_header_exn line))
    | Some '>', Some record' -> Some record'
    | Some _, None ->
        raise (Exn "Not at a header line, but not currently in a sequence")
    | Some _, Some record' ->
        let line = In_channel.input_line_exn ~fix_win_eol:true chan in
        let seq_part = clean_sequence line in
        let new_seq = Fasta_record.seq record' ^ seq_part in
        loop (Some (Fasta_record.with_seq new_seq record'))
  in
  loop None

let input_record chan = U.try1 input_record_exn chan

let fold_records_exn chan ~init ~f =
  let rec loop acc record =
    match record with
    | None -> acc
    | Some record' -> loop (f acc record') (input_record_exn chan)
  in
  loop init (input_record_exn chan)

let fold_records chan ~init ~f = U.try_fold fold_records_exn chan ~init ~f

let records_exn chan =
  List.rev
    (fold_records_exn chan ~init:[] ~f:(fun records record -> record :: records))

let records chan = U.try1 records_exn chan

let foldi_records_exn chan ~init ~f =
  snd
    (fold_records_exn chan ~init:(0, init) ~f:(fun (i, acc) record ->
         (i + 1, f i acc record)))

let foldi_records chan ~init ~f = U.try_fold foldi_records_exn chan ~init ~f

let iter_records_exn chan ~f =
  fold_records_exn chan ~init:() ~f:(fun () record -> f record)
let iter_records chan ~f =
  fold_records chan ~init:() ~f:(fun () record -> f record)

let iteri_records_exn chan ~f =
  foldi_records_exn chan ~init:() ~f:(fun i () record -> f i record)
let iteri_records chan ~f = U.try_map iteri_records_exn chan ~f

let with_file_exn fname ~f = In_channel.with_file fname ~f
let with_file fname ~f = U.try_map with_file_exn fname ~f

(* These are the with file versions of the above. *)

let with_file_records_exn fname = with_file_exn fname ~f:records_exn
let with_file_records fname = U.try1 with_file_records_exn fname

let with_file_fold_records_exn fname ~init ~f =
  with_file_exn fname ~f:(fun chan -> fold_records_exn chan ~init ~f)
let with_file_fold_records fname ~init ~f =
  U.try_fold with_file_fold_records_exn fname ~init ~f

let with_file_foldi_records_exn fname ~init ~f =
  with_file_exn fname ~f:(fun chan -> foldi_records_exn chan ~init ~f)
let with_file_foldi_records fname ~init ~f =
  U.try_fold with_file_foldi_records_exn fname ~init ~f

let with_file_iter_records_exn fname ~f =
  with_file_exn fname ~f:(fun chan -> iter_records_exn chan ~f)
let with_file_iter_records fname ~f =
  U.try_map with_file_iter_records_exn fname ~f

let with_file_iteri_records_exn fname ~f =
  with_file_exn fname ~f:(fun chan -> iteri_records_exn chan ~f)
let with_file_iteri_records fname ~f =
  U.try_map with_file_iteri_records_exn fname ~f

(* Sequence generating functions are a little bit different. *)

let record_sequence_exn chan =
  Sequence.unfold ~init:chan ~f:(fun ch ->
      Option.map (input_record_exn ch) ~f:(fun record -> (record, ch)))

let record_sequence chan =
  Sequence.unfold ~init:(Some chan) ~f:(fun chan' ->
      match chan' with
      (* None means the sequence is over. *)
      | None -> None
      | Some chan'' -> (
          match input_record chan'' with
          (* Some Error seems weird, but we need to yield an Error so the caller
             can handle it, then we need to trigger one more yield iteration to
             end the sequence next time with the None channel. *)
          | Error err -> Some (Error err, None)
          | Ok record -> (
              match record with
              (* None needed here to end the Sequence. *)
              | None -> None
              | Some record' -> Some (Or_error.return record', Some chan''))))