Source file talon_json.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
open Talon
open Yojson.Basic
let value_to_json col idx =
match col with
| Col.P (dtype, tensor, _) -> (
match dtype with
| Nx.Float32 ->
let arr : float array = Nx.to_array tensor in
let value = arr.(idx) in
if classify_float value = FP_nan then `Null else `Float value
| Nx.Float64 ->
let arr : float array = Nx.to_array tensor in
let value = arr.(idx) in
if classify_float value = FP_nan then `Null else `Float value
| Nx.Float16 ->
let arr : float array = Nx.to_array tensor in
let value = arr.(idx) in
if classify_float value = FP_nan then `Null else `Float value
| Nx.BFloat16 ->
let arr : float array = Nx.to_array tensor in
let value = arr.(idx) in
if classify_float value = FP_nan then `Null else `Float value
| Nx.Int8 ->
let arr : int array = Nx.to_array tensor in
`Int arr.(idx)
| Nx.UInt8 ->
let arr : int array = Nx.to_array tensor in
`Int arr.(idx)
| Nx.Int16 ->
let arr : int array = Nx.to_array tensor in
`Int arr.(idx)
| Nx.UInt16 ->
let arr : int array = Nx.to_array tensor in
`Int arr.(idx)
| Nx.Int32 ->
let arr : int32 array = Nx.to_array tensor in
`Int (Int32.to_int arr.(idx))
| Nx.Int64 ->
let arr : int64 array = Nx.to_array tensor in
`String (Int64.to_string arr.(idx))
| Nx.Int ->
let arr : int array = Nx.to_array tensor in
`Int arr.(idx)
| Nx.NativeInt ->
let arr : nativeint array = Nx.to_array tensor in
`String (Nativeint.to_string arr.(idx))
| Nx.Complex32 ->
let arr : Complex.t array = Nx.to_array tensor in
let c = arr.(idx) in
`String (Printf.sprintf "%g+%gi" c.re c.im)
| Nx.Complex64 ->
let arr : Complex.t array = Nx.to_array tensor in
let c = arr.(idx) in
`String (Printf.sprintf "%g+%gi" c.re c.im)
| Nx.Bool ->
let arr : bool array = Nx.to_array tensor in
`Bool arr.(idx)
| Nx.Int4 ->
let arr : int array = Nx.to_array tensor in
`Int arr.(idx)
| Nx.UInt4 ->
let arr : int array = Nx.to_array tensor in
`Int arr.(idx)
| Nx.Float8_e4m3 ->
let arr : float array = Nx.to_array tensor in
let value = arr.(idx) in
if classify_float value = FP_nan then `Null else `Float value
| Nx.Float8_e5m2 ->
let arr : float array = Nx.to_array tensor in
let value = arr.(idx) in
if classify_float value = FP_nan then `Null else `Float value
| Nx.Complex16 ->
let arr : Complex.t array = Nx.to_array tensor in
let c = arr.(idx) in
`String (Printf.sprintf "%g+%gi" c.re c.im)
| Nx.QInt8 ->
let arr : int array = Nx.to_array tensor in
`Int arr.(idx)
| Nx.QUInt8 ->
let arr : int array = Nx.to_array tensor in
`Int arr.(idx))
| Col.S arr -> ( match arr.(idx) with Some s -> `String s | None -> `Null)
| Col.B arr -> ( match arr.(idx) with Some b -> `Bool b | None -> `Null)
let to_string ?(orient = `Records) df =
let n_rows = num_rows df in
let col_names = column_names df in
match orient with
| `Records ->
let records =
List.init n_rows (fun i ->
let fields =
List.map
(fun col_name ->
let col = get_column_exn df col_name in
(col_name, value_to_json col i))
col_names
in
`Assoc fields)
in
to_string (`List records)
| `Columns ->
let columns =
List.map
(fun col_name ->
let col = get_column_exn df col_name in
let values = List.init n_rows (fun i -> value_to_json col i) in
(col_name, `List values))
col_names
in
to_string (`Assoc columns)
let detect_json_dtype values =
let non_null_values = List.filter (fun v -> v <> `Null) values in
if List.length non_null_values = 0 then
`String
else
let all_bool =
List.for_all (function `Bool _ -> true | _ -> false) non_null_values
in
let all_int =
List.for_all (function `Int _ -> true | _ -> false) non_null_values
in
let all_float =
List.for_all
(function `Float _ | `Int _ -> true | _ -> false)
non_null_values
in
if all_bool then `Bool
else if all_int then `Int32
else if all_float then `Float32
else `String
let from_string ?(orient = `Records) json_str =
let json = from_string json_str in
match orient with
| `Records -> (
match json with
| `List records ->
if List.length records = 0 then empty
else
let col_names =
match List.hd records with
| `Assoc fields -> List.map fst fields
| _ -> failwith "Invalid JSON: expected object in records array"
in
let columns_data =
List.map
(fun col_name ->
let values =
List.map
(function
| `Assoc fields -> (
try List.assoc col_name fields
with Not_found -> `Null)
| _ -> `Null)
records
in
(col_name, values))
col_names
in
let columns =
List.map
(fun (col_name, values) ->
let dtype = detect_json_dtype values in
let column =
match dtype with
| `Float32 ->
let arr =
List.map
(function
| `Null -> None
| `Float f -> Some f
| `Int i -> Some (float_of_int i)
| _ -> None)
values
|> Array.of_list
in
Col.float32_opt arr
| `Int32 ->
let arr =
List.map
(function
| `Null -> None
| `Int i -> Some (Int32.of_int i)
| _ -> None)
values
|> Array.of_list
in
Col.int32_opt arr
| `Bool ->
let arr =
List.map
(function
| `Null -> None | `Bool b -> Some b | _ -> None)
values
|> Array.of_list
in
Col.bool_opt arr
| _ ->
let arr =
List.map
(function
| `Null -> None
| `String s -> Some s
| `Int i -> Some (string_of_int i)
| `Float f -> Some (string_of_float f)
| `Bool b -> Some (string_of_bool b)
| _ -> None)
values
|> Array.of_list
in
Col.string_opt arr
in
(col_name, column))
columns_data
in
create columns
| _ -> failwith "Invalid JSON: expected array for records orientation")
| `Columns -> (
match json with
| `Assoc fields ->
if List.length fields = 0 then empty
else
let columns =
List.map
(fun (col_name, values) ->
match values with
| `List vals ->
let dtype = detect_json_dtype vals in
let column =
match dtype with
| `Float32 ->
let arr =
List.map
(function
| `Null -> None
| `Float f -> Some f
| `Int i -> Some (float_of_int i)
| _ -> None)
vals
|> Array.of_list
in
Col.float32_opt arr
| `Int32 ->
let arr =
List.map
(function
| `Null -> None
| `Int i -> Some (Int32.of_int i)
| _ -> None)
vals
|> Array.of_list
in
Col.int32_opt arr
| `Bool ->
let arr =
List.map
(function
| `Null -> None
| `Bool b -> Some b
| _ -> None)
vals
|> Array.of_list
in
Col.bool_opt arr
| _ ->
let arr =
List.map
(function
| `Null -> None
| `String s -> Some s
| `Int i -> Some (string_of_int i)
| `Float f -> Some (string_of_float f)
| `Bool b -> Some (string_of_bool b)
| _ -> None)
vals
|> Array.of_list
in
Col.string_opt arr
in
(col_name, column)
| _ ->
failwith
(Printf.sprintf
"Invalid JSON: column %s is not an array" col_name))
fields
in
create columns
| _ -> failwith "Invalid JSON: expected object for columns orientation")
let to_file ?orient df file =
let json_string = to_string ?orient df in
let oc = open_out file in
output_string oc json_string;
close_out oc
let from_file ?orient file =
let ic = open_in file in
let contents = really_input_string ic (in_channel_length ic) in
close_in ic;
from_string ?orient contents