1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
open Core
module Csv = Csvlib.Csv
type t =
{ kv_maps : (string String.Map.t * string String.Map.t) list
; : string Int.Map.t
}
[@@deriving fields ~getters]
let create kv_maps = { kv_maps; header_map }
let to_mapped_csv split_csv =
let merge =
Map.merge ~f:(fun ~key:_ -> function
| `Both _ -> raise (Invalid_argument "Column appears in both key and value maps")
| `Left s | `Right s -> Some s)
in
Mapped_csv.create
(List.fold split_csv.kv_maps ~init:[] ~f:(fun accum (key_map, value_map) ->
merge key_map value_map :: accum))
split_csv.header_map
;;
let of_mapped_csv mapped_csv ~(key_spec : Key_specifier.t) =
let module SM = String.Map in
let module SS = String.Set in
let column_set = Key_specifier.to_string_set key_spec in
let validate_key_spec () =
if Set.is_empty column_set
then raise (Invalid_argument "No key columns specified")
else (
let =
Map.fold
(Mapped_csv.header_map mapped_csv : _ Int.Map.t)
~init:SS.empty
~f:(fun ~key:_ ~data: output_set -> Set.add output_set header)
in
Set.iter column_set ~f:(fun col ->
if not (Set.mem header_set col)
then raise (Invalid_argument (sprintf "Key column %s does not exist" col)));
if Set.length column_set = Set.length header_set
then raise (Invalid_argument "All columns are marked as key columns.")
else ())
in
validate_key_spec ();
let get_key_and_value_for_row_map row_map =
Map.fold row_map ~init:(SM.empty, SM.empty) ~f:(fun ~key ~data (key_map, val_map) ->
if Set.mem column_set key
then Map.set key_map ~key ~data, val_map
else key_map, Map.set val_map ~key ~data)
in
create
(List.fold (Mapped_csv.row_maps mapped_csv) ~init:[] ~f:(fun accum row_map ->
get_key_and_value_for_row_map row_map :: accum))
(Mapped_csv.header_map mapped_csv)
;;
let by_key ?( = true) ~key csvs =
let col_spec =
if header
then Key_specifier.specifier_of_string key
else Key_specifier.int_specifier_of_string key
in
let csv =
let num_columns = Csv.columns csv in
let rec num_columns accum =
if num_columns = 0
then accum
else header_row (num_columns - 1) (string_of_int num_columns :: accum)
in
header_row num_columns [] :: csv
in
let csvs = if header then csvs else List.map csvs ~f:insert_numerical_header in
let split_csvs =
List.map csvs ~f:(fun csv -> of_mapped_csv (Mapped_csv.of_csv csv) ~key_spec:col_spec)
in
split_csvs
;;
let by_key_from_files ?( = true) ~key files =
by_key ~header ~key (List.map files ~f:Csv.load)
;;