Source file options.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
module Stable0 = struct
  open! Core.Core_stable

  module Encoding = struct
    module V1 = struct
      type t =
        | Latin1
        | Utf8
      [@@deriving bin_io, compare, hash, sexp]

      let%expect_test _ =
        print_endline [%bin_digest: t];
        [%expect {| bf67b13f243e7b82146959041854651d |}]
      ;;
    end
  end

  (* This [Serialization.t] is the serialization of [t] and it's slightly
     different from [t]:
     - Serialization.t has [case_insensitive] instead of [case_sensitive], since
       Re2.Options.default has [case_sensitive] as the only field that is [true].
       By using [case_insensitive] we have the nice property where default representation
       has all bool as false (and an empty sexp).
     - [max_mem] is stripped (and populated with default) during
       serialisation, since I don't think it makes sense to serialise this
     - it seems that some parameters in Re2.Options.t have implied values,
       so there might be room for improvement of this [t] (at the cost of
       more complex code here); for example posix_syntax=false implies that
       some of the other parameters are actually ignored
  *)
  module V2 = struct
    module Serialization = struct
      type t =
        { case_insensitive : bool [@sexp.bool]
        ; dot_nl : bool [@sexp.bool]
        ; encoding : Encoding.V1.t
                     [@sexp.default Encoding.V1.Utf8] [@sexp_drop_default.compare]
        ; literal : bool [@sexp.bool]
        ; log_errors : bool [@sexp.bool]
        ; longest_match : bool [@sexp.bool]
        ; never_capture : bool [@sexp.bool]
        ; never_nl : bool [@sexp.bool]
        ; one_line : bool [@sexp.bool]
        ; perl_classes : bool [@sexp.bool]
        ; posix_syntax : bool [@sexp.bool]
        ; word_boundary : bool [@sexp.bool]
        }
      [@@deriving bin_io, compare, hash, sexp]

      let%expect_test _ =
        print_endline [%bin_digest: t];
        [%expect {| 7e4458318a614214b63cb4b98577c10a |}]
      ;;
    end
  end
end

open! Core

module Encoding = struct
  type t = Stable0.Encoding.V1.t =
    | Latin1
    | Utf8
  [@@deriving compare, equal, sexp_of]

  module C_repr = struct
    type t = int [@@deriving compare, sexp_of]

    let equal = Int.( = )

    (* would use [@@deriving equal], but equal_int is not in scope *)

    external get_latin1 : unit -> int = "mlre2__options__encoding__get_latin1" [@@noalloc]
    external get_utf8 : unit -> int = "mlre2__options__encoding__get_utf8" [@@noalloc]

    let utf8 = get_utf8 ()
    let latin1 = get_latin1 ()
  end

  let to_c_repr = function
    | Latin1 -> C_repr.latin1
    | Utf8 -> C_repr.utf8
  ;;

  let of_c_repr c_repr =
    if C_repr.equal c_repr C_repr.utf8
    then Utf8
    else if C_repr.equal c_repr C_repr.latin1
    then Latin1
    else raise_s [%message "Unexpected Encoding.C_repr" ~_:(c_repr : C_repr.t)]
  ;;
end

type t =
  { case_sensitive : bool
  ; dot_nl : bool
  ; encoding : Encoding.t
  ; literal : bool
  ; log_errors : bool
  ; longest_match : bool
  ; max_mem : int
  ; never_capture : bool
  ; never_nl : bool
  ; one_line : bool
  ; perl_classes : bool
  ; posix_syntax : bool
  ; word_boundary : bool
  }
[@@deriving compare, fields, sexp_of]

module C_repr = struct
  type t

  (*$ Re2_options_cinaps.print_c_repr_external_bindings () *)
  external case_sensitive : t -> bool = "mlre2__options__case_sensitive" [@@noalloc]

  external set_case_sensitive : t -> bool -> unit = "mlre2__options__set_case_sensitive"
  [@@noalloc]

  external dot_nl : t -> bool = "mlre2__options__dot_nl" [@@noalloc]
  external set_dot_nl : t -> bool -> unit = "mlre2__options__set_dot_nl" [@@noalloc]
  external encoding : t -> Encoding.C_repr.t = "mlre2__options__encoding" [@@noalloc]

  external set_encoding : t -> Encoding.C_repr.t -> unit = "mlre2__options__set_encoding"
  [@@noalloc]

  external literal : t -> bool = "mlre2__options__literal" [@@noalloc]
  external set_literal : t -> bool -> unit = "mlre2__options__set_literal" [@@noalloc]
  external log_errors : t -> bool = "mlre2__options__log_errors" [@@noalloc]

  external set_log_errors : t -> bool -> unit = "mlre2__options__set_log_errors"
  [@@noalloc]

  external longest_match : t -> bool = "mlre2__options__longest_match" [@@noalloc]

  external set_longest_match : t -> bool -> unit = "mlre2__options__set_longest_match"
  [@@noalloc]

  external max_mem : t -> int = "mlre2__options__max_mem" [@@noalloc]
  external set_max_mem : t -> int -> unit = "mlre2__options__set_max_mem" [@@noalloc]
  external never_capture : t -> bool = "mlre2__options__never_capture" [@@noalloc]

  external set_never_capture : t -> bool -> unit = "mlre2__options__set_never_capture"
  [@@noalloc]

  external never_nl : t -> bool = "mlre2__options__never_nl" [@@noalloc]
  external set_never_nl : t -> bool -> unit = "mlre2__options__set_never_nl" [@@noalloc]
  external one_line : t -> bool = "mlre2__options__one_line" [@@noalloc]
  external set_one_line : t -> bool -> unit = "mlre2__options__set_one_line" [@@noalloc]
  external perl_classes : t -> bool = "mlre2__options__perl_classes" [@@noalloc]

  external set_perl_classes : t -> bool -> unit = "mlre2__options__set_perl_classes"
  [@@noalloc]

  external posix_syntax : t -> bool = "mlre2__options__posix_syntax" [@@noalloc]

  external set_posix_syntax : t -> bool -> unit = "mlre2__options__set_posix_syntax"
  [@@noalloc]

  external word_boundary : t -> bool = "mlre2__options__word_boundary" [@@noalloc]

  external set_word_boundary : t -> bool -> unit = "mlre2__options__set_word_boundary"
  [@@noalloc]
  (*$*)

  external create_quiet : unit -> t = "mlre2__options__create_quiet"
end

let to_c_repr t =
  let c_repr = C_repr.create_quiet () in
  let f set _field _t value = set c_repr value in
  Fields.Direct.iter
    t (*$ Re2_options_cinaps.print_to_c_repr_fields () *)
    ~case_sensitive:(f C_repr.set_case_sensitive)
    ~dot_nl:(f C_repr.set_dot_nl)
    ~encoding:
      (f (fun c_repr value -> C_repr.set_encoding c_repr (Encoding.to_c_repr value)))
    ~literal:(f C_repr.set_literal)
    ~log_errors:(f C_repr.set_log_errors)
    ~longest_match:(f C_repr.set_longest_match)
    ~max_mem:(f C_repr.set_max_mem)
    ~never_capture:(f C_repr.set_never_capture)
    ~never_nl:(f C_repr.set_never_nl)
    ~one_line:(f C_repr.set_one_line)
    ~perl_classes:(f C_repr.set_perl_classes)
    ~posix_syntax:(f C_repr.set_posix_syntax)
    ~word_boundary:(f C_repr.set_word_boundary)
  (*$*);
  c_repr
;;

let of_c_repr =
  let f get _field () = get, () in
  Fields.make_creator (*$ Re2_options_cinaps.print_of_c_repr_fields () *)
    ~case_sensitive:(f C_repr.case_sensitive)
    ~dot_nl:(f C_repr.dot_nl)
    ~encoding:(f (fun c_repr -> Encoding.of_c_repr (C_repr.encoding c_repr)))
    ~literal:(f C_repr.literal)
    ~log_errors:(f C_repr.log_errors)
    ~longest_match:(f C_repr.longest_match)
    ~max_mem:(f C_repr.max_mem)
    ~never_capture:(f C_repr.never_capture)
    ~never_nl:(f C_repr.never_nl)
    ~one_line:(f C_repr.one_line)
    ~perl_classes:(f C_repr.perl_classes)
    ~posix_syntax:(f C_repr.posix_syntax)
    ~word_boundary:(f C_repr.word_boundary) (*$*)
    ()
  |> fst
;;

let default = C_repr.create_quiet () |> of_c_repr
let latin1 = { default with encoding = Latin1 }
let noisy = { default with log_errors = true }
let posix = { default with longest_match = true; posix_syntax = true }
let default_max_mem = max_mem default

module Private = struct
  module C_repr = C_repr

  let of_c_repr = of_c_repr
  let to_c_repr = to_c_repr
end

module Stable = struct
  include Stable0

  module V2 = struct
    module Serialization = V2.Serialization

    type nonrec t = t =
      { case_sensitive : bool
      ; dot_nl : bool
      ; encoding : Encoding.V1.t
      ; literal : bool
      ; log_errors : bool
      ; longest_match : bool
      ; max_mem : int
      ; never_capture : bool
      ; never_nl : bool
      ; one_line : bool
      ; perl_classes : bool
      ; posix_syntax : bool
      ; word_boundary : bool
      }
    [@@deriving compare, hash]

    let to_serialization
          { case_sensitive
          ; dot_nl
          ; encoding
          ; literal
          ; log_errors
          ; longest_match
          ; max_mem = _
          ; never_capture
          ; never_nl
          ; one_line
          ; perl_classes
          ; posix_syntax
          ; word_boundary
          }
      : Serialization.t
      =
      { case_insensitive = not case_sensitive
      ; dot_nl
      ; encoding
      ; literal
      ; log_errors
      ; longest_match
      ; never_capture
      ; never_nl
      ; one_line
      ; perl_classes
      ; posix_syntax
      ; word_boundary
      }
    ;;

    let of_serialization
          ({ case_insensitive
           ; dot_nl
           ; encoding
           ; literal
           ; log_errors
           ; longest_match
           ; never_capture
           ; never_nl
           ; one_line
           ; perl_classes
           ; posix_syntax
           ; word_boundary
           } :
             Serialization.t)
      =
      { case_sensitive = not case_insensitive
      ; dot_nl
      ; encoding
      ; literal
      ; log_errors
      ; longest_match
      ; max_mem = default_max_mem
      ; never_capture
      ; never_nl
      ; one_line
      ; perl_classes
      ; posix_syntax
      ; word_boundary
      }
    ;;

    let sexp_of_t t = Serialization.sexp_of_t (to_serialization t)
    let t_of_sexp sexp = of_serialization (Serialization.t_of_sexp sexp)
    let default () = to_serialization default
    let is_default t = [%compare.equal: Serialization.t] (to_serialization t) (default ())

    include
      Core.Binable.Of_binable_without_uuid [@alert "-legacy"]
        (Serialization)
        (struct
          type nonrec t = t

          let to_binable = to_serialization
          let of_binable = of_serialization
        end)

    (* This check verifies the default value produces '()',
       acknowledging that the fields that we believe are default in C code, are
       coded as default in the sexp as well. If this changes, a new stable type
       should be created *)
    let%expect_test _ =
      [%sexp_of: Serialization.t] (default ()) |> print_s;
      [%expect {| () |}]
    ;;
  end
end