1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
open Core
open Async
module Target_fields = struct
module T = struct
type t =
| All
| Field_names of String.Set.t
[@@deriving compare, sexp_of]
end
include T
include struct
open Command.Param
let arg_type =
Arg_type.comma_separated ~strip_whitespace:true string ~allow_empty:true
|> Arg_type.map ~f:(function
| [] -> All
| list -> Field_names (String.Set.of_list list))
;;
let param =
flag
"-grep-fields"
(optional_with_default All arg_type)
~doc:"_ comma separated fieldnames to grep in, defaults to all"
~aliases:[ "--grep-fields" ]
;;
end
end
let = ref []
let =
`Transform
(fun ->
the_headers := headers;
headers)
;;
let run ?separator ?skip_lines ~invert ~ ~grep_fields ~regexp file =
let delimited_reader_pipe reader =
Delimited.Read.pipe_of_reader
Delimited.Read.Row.builder
?skip_lines
~header
?sep:separator
reader
in
let csv_pipe ({ ; lines } : Csv_common.t) =
let =
List.mapi header ~f:(fun i -> header, i)
|> Hashtbl.of_alist_exn (module String)
in
Pipe.of_list
(List.map lines ~f:(fun line ->
Delimited.Read.Row.create header (Array.of_list line)))
in
let run rows_pipe =
Deferred.Or_error.try_with
~run:`Schedule
~rest:`Log
(fun () ->
let writer =
Delimited.Write.Expert.By_row.of_writer_and_close
?sep:separator
(Lazy.force Writer.stdout)
in
Pipe.fold rows_pipe ~init:`Haven't_printed_header ~f:(fun row ->
(match is_header_printed with
| `Header_printed -> Deferred.return `Header_printed
| `Haven't_printed_header ->
Pipe.write writer !the_headers >>| fun () -> `Header_printed)
>>= fun ->
let matches_grep =
Delimited.Read.Row.fold row ~init:false ~f:(fun print_it ~ ~data ->
print_it
||
let possible_to_check =
match grep_fields with
| Target_fields.All -> true
| Target_fields.Field_names field_name_set -> Set.mem field_name_set header
in
if possible_to_check then Re2.matches regexp data else false)
in
(if Bool.( <> ) matches_grep invert
then Pipe.write writer (Delimited.Read.Row.to_list row)
else Deferred.unit)
>>| fun () -> is_header_printed)
>>= function
| `Haven't_printed_header ->
if always_print_header then Pipe.write writer !the_headers else Deferred.unit
| `Header_printed -> Deferred.unit)
in
match (file : Csv_common.Or_file.t) with
| Csv csv -> run (csv_pipe csv)
| Stdin | File "-" -> run (delimited_reader_pipe (Lazy.force Reader.stdin))
| File x -> Reader.with_file x ~f:(fun reader -> run (delimited_reader_pipe reader))
;;