1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
type t = Float.Array.t
let get t n =
let x = Float.Array.get t n in
if Float.is_infinite x then None else Some x
let of_feature_values (t : Parser.t) features : t =
let n = int_of_string t.learner.learner_model_param.num_feature in
let a = Float.Array.make n Float.infinity in
let h = Hashtbl.create 10 in
Array.iteri (fun i s -> Hashtbl.add h s i) t.learner.feature_names;
List.iter
(fun (s, f) ->
let i =
match Hashtbl.find_opt h s with
| None -> invalid_arg (Printf.sprintf "Feature %s not found" s)
| Some i -> i
in
Float.Array.set a i f)
features;
a
let of_csv_file t filename =
let cin = open_in filename in
Fun.protect
~finally:(fun () -> close_in cin)
(fun () ->
let cin = Csv.of_channel ~has_header:true cin in
let acc =
Csv.Rows.fold_left
~f:(fun acc row ->
let row = Csv.Row.to_assoc row in
let row = List.tl row in
let row = List.map (fun (s, f) -> (s, Float.of_string f)) row in
of_feature_values t row :: acc)
~init:[] cin
in
List.rev acc)
let filename =
let cin = open_in filename in
Fun.protect
~finally:(fun () -> close_in cin)
(fun () ->
let cin = Csv.of_channel ~has_header:false cin in
let acc =
Csv.Rows.fold_left
~f:(fun acc row ->
let row = Csv.Row.to_list row in
let row = List.map Float.of_string row in
Float.Array.of_list row :: acc)
~init:[] cin
in
List.rev acc)
let of_svm_file (t : Parser.t) filename =
let cin = open_in filename in
let n = int_of_string t.learner.learner_model_param.num_feature in
let rec aux acc =
match input_line cin with
| exception End_of_file -> acc
| s -> (
match String.split_on_char ' ' s with
| _output :: features ->
let a = Float.Array.make n Float.infinity in
List.iter
(fun feature ->
match String.split_on_char ':' feature with
| [ id; v ] ->
let id = int_of_string id - 1 in
Float.Array.set a id (Float.of_string v)
| _ ->
invalid_arg
(Printf.sprintf "invalid svm in file %s: %s" filename feature))
features;
aux (a :: acc)
| [] -> invalid_arg ("invalid svm file " ^ filename))
in
List.rev (aux [])
let of_filename (t : Parser.t) filename =
match Filename.extension filename with
| ".csv" ->
if Array.length t.learner.feature_names = 0
then of_csv_file_no_header filename
else of_csv_file t filename
| ".svm" -> of_svm_file t filename
| s ->
invalid_arg
(Printf.sprintf "Unknown extension %s known are .csv and .svm" s)