1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
module Row = struct
let module_error e = Error (`table_row e)
type item_type = [`type_int | `type_float | `type_string ] [@@deriving sexp]
type t_type = item_type array [@@deriving sexp]
type item = [`int of int | `float of float | `string of string ] [@@deriving sexp]
type t = item array [@@deriving sexp]
module Tags = struct
type t = [
| `separator of char
| `strict_about of [ `row_length | `cell_type ]
| `format of item_type array
] list
[@@deriving sexp]
let separators tags =
List.filter_map tags ~f:(function
| `separator c -> Some c
| _ -> None)
let strict_row_length tags =
List.exists tags ~f:(function `strict_about `row_length -> true | _ -> false)
let strict_cell_type tags =
List.exists tags ~f:(function `strict_about `cell_type -> true | _ -> false)
let format tags =
List.find_map tags ~f:(function `format f -> Some f | _ -> None)
let default = [ `separator '\t' ]
let default_extension tags =
match separators tags with
| '\t' :: _ -> "tsv"
| ',' :: _ -> "csv"
| _ -> "table"
let to_string t = sexp_of_t t |> Sexplib.Sexp.to_string
let of_string s =
try Ok (Sexplib.Sexp.of_string s |> t_of_sexp)
with e -> module_error (`tags_of_string e)
end
module Error = struct
type line_parsing =
[ `wrong_format of
[ `column_number
| `float_of_string of string
| `int_of_string of string ] * t_type * string ]
[@@deriving sexp]
type t = line_parsing [@@deriving sexp]
end
let of_line ?(separators=[' '; '\t']) ?(strict_row_length=false)
?(strict_cell_type=false) ?format line =
let l = (line : Line.t :> string) in
let module With_exns = struct
exception Int_of_string of string
exception Float_of_string of string
let int s =
try Int.of_string s with _ -> raise (Int_of_string s)
let float s =
try Float.of_string s with _ -> raise (Float_of_string s)
let of_line ~format l =
let tokens =
String.split_on_chars ~on:separators l |> List.filter ~f:String.((<>) "")
|> Array.of_list in
begin match format with
| None ->
Ok (Array.map tokens ~f:(fun s -> `string s))
| Some format ->
begin try
if strict_row_length && Array.length format > Array.length tokens
then Error (`wrong_format (`column_number, format, l))
else begin
let row =
Array.mapi tokens ~f:(fun i tok ->
let typ =
if strict_cell_type then format.(i)
else (try format.(i) with _ -> `type_string) in
match typ with
| `type_int -> `int (int tok)
| `type_float -> `float (float tok)
| `type_string -> `string tok) in
Ok row
end
with
| Invalid_argument _ ->
Error (`wrong_format (`column_number, format, l))
| Int_of_string s ->
Error (`wrong_format (`int_of_string s, format, l))
| Float_of_string s ->
Error (`wrong_format (`float_of_string s, format, l))
end
end
end in
(With_exns.of_line ~format l : (t, _) Result.t)
let to_line ~sep t =
let item_to_string = function
| `int i -> Int.to_string i
| `float f -> sprintf "%g" f
| `string s -> s in
Line.of_string_unsafe
(String.concat_array ~sep (Array.map t ~f:item_to_string))
module Transform = struct
let line_to_item ?(tags: Tags.t = Tags.default) () =
let separators = Tags.separators tags in
let strict_row_length = Tags.strict_row_length tags in
let strict_cell_type = Tags.strict_cell_type tags in
let format = Tags.format tags in
Tfxm.on_output
~f:begin fun s ->
of_line ~separators ~strict_row_length ~strict_cell_type ?format
(s : Lines.item)
|> begin function
| Ok o -> Ok o
| Error e -> Error (`table_row (e : Error.line_parsing))
end
end
(Tfxm.identity ())
let item_to_line ?(tags: Tags.t = Tags.default) () =
let sep =
Tags.separators tags |> List.hd |> Option.value ~default:'\t'
|> Char.to_string in
Tfxm.on_output ~f:(to_line ~sep)
(Tfxm.identity ())
end
end