1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
open CFStream
type idata = {mean:float; stdv:float; npixels:int}
type irow = {xcoord:int; ycoord:int; idata:idata}
type isection = irow list
let icolumns = ["X";"Y";"MEAN";"STDV";"NPIXELS"]
let isection_name = "INTENSITY"
let inum_values = List.length
let ifold f init l = List.fold_left ~f ~init l
let iiter f l = List.iter ~f l
type t = isection
exception Bad of string
let raise_bad msg = raise (Bad msg)
module Tbl = struct
module HT = Caml.Hashtbl
let of_cel1 (cel:t) : (int * int, idata) HT.t =
let tbl = HT.create (inum_values cel) in
let f r = HT.add tbl (r.xcoord,r.ycoord) r.idata in
iiter f cel; tbl
let of_cel2 (cel:t) : (int * int, float) HT.t =
let tbl = HT.create (inum_values cel) in
let f r = HT.add tbl (r.xcoord,r.ycoord) r.idata.mean in
iiter f cel; tbl
let find tbl (x,y) =
try HT.find tbl (x,y)
with Caml.Not_found ->
failwith (Msg.err (sprintf "CEL file does not have values for probe \
at position x = %d, y = %d" x y))
end
let data bpmap cels =
let cels = List.map ~f:Tbl.of_cel1 cels in
let f ans r =
let datl =
List.map cels ~f:(fun cel ->
Tbl.find cel r.Bpmap.pmcoord, Tbl.find cel r.Bpmap.mmcoord) in
(r.Bpmap.probe, datl) :: ans
in
Bpmap.fold f [] bpmap
let pm_mm bpmap cels =
let cels = List.map ~f:Tbl.of_cel2 cels in
let f ans r =
let datl =
List.map cels ~f:(fun cel ->
(Tbl.find cel r.Bpmap.pmcoord) -. (Tbl.find cel r.Bpmap.mmcoord)) in
(r.Bpmap.probe, datl) :: ans
in
Bpmap.fold f [] bpmap
let pm bpmap cels =
let cels = List.map ~f:Tbl.of_cel2 cels in
let f ans r =
let datl = List.map cels ~f:(fun cel -> Tbl.find cel r.Bpmap.pmcoord) in
(r.Bpmap.probe, datl) :: ans
in
Bpmap.fold f [] bpmap
let mm bpmap cels =
let cels = List.map ~f:Tbl.of_cel2 cels in
let f ans r =
let datl = List.map ~f:(fun cel -> Tbl.find cel r.Bpmap.mmcoord) cels in
(r.Bpmap.probe, datl) :: ans
in
Bpmap.fold f [] bpmap
module Parser = struct
(** parse string of form "\[sss\]" *)
let section_name s =
let s = String.strip s in
let l = String.length s in
if l < 2 || not Char.(s.[0] = '[' && s.[l-1] = ']')
then None
else Some (String.slice s 1 (l-1))
let line_is_section sec_name l =
match section_name l with
| None -> false
| Some s -> String.(s = sec_name)
let intensity_row s =
let to_int s = Int.of_string (String.strip s) in
let to_float s = Float.of_string (String.strip s) in
match String.split s ~on:'\t' with
| [xcoord; ycoord; mean; stdv; npixels] ->
{
xcoord = to_int xcoord;
ycoord = to_int ycoord;
idata =
{
mean = to_float mean;
stdv = to_float stdv;
npixels = to_int npixels
}
}
| _ -> raise_bad "expecting 5 columns"
(** lines should point to beginning of intensity section,
upon return lines will point to first blank line after data rows *)
let intensity_section lines =
assert (
match Stream.peek lines with
| None -> false
| Some l -> line_is_section isection_name l
);
Stream.junk lines;
let sl = String.split (Stream.next_exn lines) ~on:'=' in
let num_cells = int_of_string (String.strip (List.nth_exn sl 1)) in
let sl = String.split (Stream.next_exn lines) ~on:'=' in
let sl = String.split (List.nth_exn sl 1) ~on:'\t' in
let sl = List.map ~f:String.strip sl in
let () =
if Poly.(sl <> icolumns) then
raise_bad "intensity section column names incorrect" in
let lines =
Stream.take_while
~f:(fun s -> not (String.for_all s ~f:Char.is_whitespace)) lines in
let lines = Stream.map ~f:intensity_row lines in
let ans = Stream.to_list lines in
let count = List.length ans in
if count = num_cells then ans
else raise_bad (sprintf "expected %d intensity rows but found %d" num_cells count)
let cel file =
let of_channel cin =
let lines = Lines.of_channel cin in
let err msg = Msg.err ~pos:(Pos.make ~source:file ~line:(Stream.count lines) ()) msg in
try
Stream.drop_while ~f:(fun (s : Lines.item) -> not (line_is_section isection_name (s :> string))) lines;
if Stream.is_empty lines
then failwith (isection_name ^ " section not found");
intensity_section (Stream.map lines ~f:(fun (x : Lines.item) -> (x :> string)))
with
Failure msg | Bad msg -> raise_bad (err msg)
in
In_channel.with_file file ~f:of_channel
end
let of_file = Parser.cel
let of_file_opt file = try Some (of_file file) with Bad _ -> None