1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
(** {1 Small Parser for IRC Logs} *)
type 'a sequence = ('a -> unit) -> unit
type log_record = {
author: string;
time: string;
msg: string;
}
let string_of_record r =
Printf.sprintf "{author=%s, time=%s, msg=%s}" r.author r.time r.msg
let pp_record out r =
Format.fprintf out "{author=%s, time=%s, msg=%s}" r.author r.time r.msg
let = Re.Posix.re "([0-9:]*)<([^>]*)> (.*)" |> Re.compile
let re_weechat = Re.Posix.re "([0-9 :]*)\t([^>]*)\t(.*)" |> Re.compile
type fmt =
| Weechat
let re_of_fmt = function
| Irssi -> re_irssi
| Weechat -> re_weechat
let fmt_of_string = function
| "irssi" -> Irssi
| "weechat" -> Weechat
| s -> invalid_arg ("unknown Irclog.fmt: " ^ s)
let string_of_fmt = function
| Irssi -> "irssi"
| Weechat -> "weechat"
let fmt_l = List.map string_of_fmt [Irssi; Weechat]
let rec seq_lines_ ic yield =
match try Some (input_line ic) with End_of_file -> None with
| Some s -> yield s; seq_lines_ ic yield
| None -> ()
let norm_author s =
if s="" then s
else match s.[0] with
| '+' | '@' -> String.sub s 1 (String.length s-1)
| _ -> s
let parse_record fmt s =
let re = re_of_fmt fmt in
begin match Re.exec_opt re s with
| None -> None
| Some g ->
let time = Re.Group.get g 1 |> String.trim in
let author = Re.Group.get g 2 |> String.trim |> norm_author in
let msg = Re.Group.get g 3 in
begin match author, fmt with
| ("--" | "<--" | "-->"), Weechat -> None
| _ -> Some {author; time; msg}
end
end
let seq_record_ fmt ic yield =
seq_lines_ ic
(fun l -> match parse_record fmt l with
| None -> ()
| Some r -> yield r)
let iter_file fmt file yield =
CCIO.with_in file (fun ic -> seq_record_ fmt ic yield)
let rec seq_files_ dir yield =
let d = Unix.opendir dir in
CCFun.finally1
~h:(fun () -> Unix.closedir d)
(fun d ->
let rec aux () = match try Some (Unix.readdir d) with End_of_file -> None with
| Some s ->
let abs_s = Filename.concat dir s in
begin
if s = "." || s = ".." then ()
else if Sys.is_directory abs_s
then seq_files_ abs_s yield
else yield abs_s
end;
aux ()
| None -> ()
in
aux ())
d
let iter_dir fmt dir yield =
seq_files_ dir
(fun file ->
CCIO.with_in file
(fun ic -> seq_record_ fmt ic (fun x -> yield (file,x))))
let iter_file_or_dir fmt s =
if Sys.is_directory s
then
seq_files_ s
|> Iter.flat_map (iter_file fmt)
else iter_file fmt s