1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
open Core
let contains xs x =
try
ignore (List.find_exn xs ~f:(fun y -> String.equal x y));
true
with
| _ -> false
;;
let rec has_duplicates = function
| [] -> None
| x :: xs -> if contains xs x then Some x else has_duplicates xs
;;
module Query : sig
val is_a_record : Sexp.t -> bool
val record
: view_atoms_as_strings:bool
-> warn_on_missing_fields:bool
-> string list
-> Sexp.t
-> Csv_record.t
end = struct
open Syntax
let is_a_record =
let pair = and_ [ Index 0; Index 1; Not (Index 2) ] in
let q =
and_ [ Not Atomic; pipe [ Each; and_ [ pair; Test (pipe [ Index 0; Atomic ]) ] ] ]
in
fun sexp ->
match Lazy_list.decons (Semantics.query q sexp) with
| None -> false
| Some _ -> true
;;
let =
let q = pipe [ Each; Index 0 ] in
let strip = function
| Sexp.Atom x -> x
| Sexp.List _ -> failwith "failed to ensure that header is called on a record"
in
fun sexp ->
let hs = Lazy_list.to_list (Lazy_list.map ~f:strip (Semantics.query q sexp)) in
match has_duplicates hs with
| Some x -> failwith ("duplicate field name '" ^ x ^ "'")
| None -> hs
;;
let record ~view_atoms_as_strings ~warn_on_missing_fields fields =
let q = cat (List.map fields ~f:(fun f -> Wrap (Field f))) in
let coerce (f, results) =
match results with
| Sexp.List [ x ] ->
(match x with
| Sexp.Atom str when view_atoms_as_strings -> str
| _ -> Sexp.to_string x)
| Sexp.List (x :: _) ->
eprintf "multiple values for field %s. Arbitrarily picking the first one.\n" f;
Sexp.to_string x
| _ ->
if warn_on_missing_fields then eprintf "missing value for field %s\n" f;
""
in
fun sexp ->
List.map
~f:coerce
(List.zip_exn fields (Lazy_list.to_list (Semantics.query q sexp)))
;;
end
let ~two_pass_processing sexps =
if two_pass_processing
then (
let fields =
let q = String.Hash_queue.create () in
Lazy_list.iter sexps ~f:(fun sexp ->
if Query.is_a_record sexp
then
List.iter (Query.header sexp) ~f:(fun field ->
match Hash_queue.enqueue_back q field () with
| `Ok | `Key_already_present -> ()));
Hash_queue.keys q
in
if List.is_empty fields then None else Some fields)
else (
match Lazy_list.decons sexps with
| Some (x, _) ->
if not (Query.is_a_record x) then failwith "first element is not a record\n";
Some (Query.header x)
| _ -> None)
;;
let csv_of_sexp ~view_atoms_as_strings ~two_pass_processing sexps =
match header ~two_pass_processing sexps with
| Some ->
let warn_on_missing_fields =
not two_pass_processing
in
let = Query.record ~view_atoms_as_strings ~warn_on_missing_fields header in
Lazy_list.cons header (Lazy_list.map ~f:extract sexps)
| None -> Lazy_list.empty ()
;;