Source file piqi_getopt.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
(*
   Copyright 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2017 Anton Lavrik

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
*)


(*
 * Interpreting command-line arguments as Piq data
 *)


(* @doc

   Piqi getopt uses different option syntax than Posix/GNU getopt, because their
   syntax is way too relaxed and imprecise. These are examples of GNU getopt
   options and their possible meanings:

        --c-long=10 // c-long = 10
        -c 10 // c, 10
        -c10 // c = 10
        -ac10 // a, c = 10
        -ca10 // c = a10

   In Piqi getopt, both short and long options are supported. Both type of
   options must be seprated from a value by whitespace, e.g.

        -c 10
        --c-long 10

   Short options start with '-' character followed by one or more letters. In
   the latter case, each letter is treated as if it was specified separaterly.
   For example,

        -abc 10

        is equivalent to

        -a -b -c 10

   '-' followed by a <number> is normally treated as a negative number, e.g.

        -10
        -0.nan
        -0.0
        -0.infinity

    Words will be treated either as Piq strings or binaries or words, depending
    on the expected type. Examples of words:

        a

        foo

    Strings or binaries can be specified explicitly using Piq string syntax.

        '"a"'

        '"foo\u0000"'

        '"\x00\n\r"'

    Lists can be specified using regular Piq syntax, but '[' and ']' characters
    can be specified as separate arguments and not as a part of other arguments.
    Examples:

        []

        [ a b ] // this is correct
        [a b]   // this is incorrect

        [ a b 10 -1 ]

        [ a b [ c d ] ]


    Values for the arguments that start with '@' character will be loaded from a
    file which names follows the '@' character. For example:

        @foo  // string or binary value will be loaded from file "foo"

TODO:   @-    // string or binary value will be loaded from stdin
*)


module C = Piqi_common
open C


(*
 * Set "alt-name" fields for Piqi options and fields based on "getopt-name"
 * fields provided by user in the Piqi spec.
 *
 * "alt-name" field is specific to the library implementation while
 * "getopt-name" field is a part of public Piqi specification.
 *)

let check_getopt_letter s =
  let error err =
    error s ("invalid getopt-letter " ^ U.quote s ^ ": " ^ err)
  in
  (* NOTE: getopt-letter is a Piq word and, therefore, it can't be empty -- so
   * there's no need to check for that *)

  if String.length s > 1
  then error "must contain exactly one letter";

  match s.[0] with
    | 'a'..'z' | 'A'..'Z' -> ()
    | c -> error "must be lower- or upper-case alphabet letter"


let getopt_name_field x =
  let open Field in
  let letter = x.getopt_letter in
  match letter with
    | None -> ()
    | Some n ->
        check_getopt_letter n;
        x.piq_alias <- letter


let getopt_name_option x =
  let open Option in
  let letter = x.getopt_letter in
  match letter with
    | None -> ()
    | Some n ->
        check_getopt_letter n;
        x.piq_alias <- letter


(* name fields and options *)
let getopt_name_record x =
   List.iter getopt_name_field x.R.field

let getopt_name_variant x =
   List.iter getopt_name_option x.V.option

let getopt_name_enum x =
   List.iter getopt_name_option x.E.option

let getopt_name_typedef = function
  | `record x -> getopt_name_record x
  | `variant x -> getopt_name_variant x
  | `enum x -> getopt_name_enum x
  | _ -> ()


let getopt_name_defs defs =
    (* name fields and options *)
    List.iter getopt_name_typedef defs


let getopt_name_piqi _idtable (piqi:T.piqi) =
  let open P in
  getopt_name_defs piqi.resolved_typedef


(* NOTE: this function is called only in case if a getopt-related operation is
 * performed (e.g. "piqi getopt" or "piqi call". We don't need this startup
 * overhead otherwise *)
let init () =
  trace "init getopt\n";
  Piqi.register_processing_hook getopt_name_piqi


(**)


(* fake filename for error reporting *)
let getopt_filename = "argv"


let error s =
  (* using fake location here, the actual location (i.e. the index of the
   * argument) will be correctly provided by the exception handler below *)
  let loc = (0,0) in
  raise (Piq_lexer.Error (s, loc))


let parse_string_arg s =
  let lexbuf = Piq_lexer.init_from_string s in
  let token () =
    try
      Piq_lexer.token lexbuf
    with
      Piq_lexer.Error (err, _loc) -> error (err ^ ": " ^ s)
  in
  let res = token () in
  match res with
    | Piq_lexer.String _ ->
        (* there must be no other literal after the string *)
        if token() = Piq_lexer.EOF
        then res
        else
          (* s is alread quoted *)
          error ("trailing characters after string: " ^ s)
    | _ ->
        assert false (* something that starts with '"' have to be a string *)


let parse_word_arg s =
  if Piq_lexer.is_valid_word s
  then
    Piq_lexer.Word s
  else
    (* Raw string -- just a sequence of bytes: may be parsed as binary or utf8
     * string *)
    Piq_lexer.Raw_string s


let parse_name_arg s =
  (* cut the leading '-' and check if what we got is a valid Piq name *)
  let n = String.sub s 1 (String.length s - 1) in
  if Piqi_name.is_valid_name n ~allow:"."
  then (
    let s = Bytes.of_string s in
    Bytes.set s 0 '.'; (* replace '-' with '.' to turn it into a Piq name *)
    Piq_lexer.Name (Bytes.unsafe_to_string s)
  )
  else error ("invalid name: " ^ U.quote s)


let read_file filename =
  let ch = open_in_bin filename in
  let len = in_channel_length ch in
  let buf = Buffer.create len in
  Buffer.add_channel buf ch len;
  close_in ch;
  Buffer.contents buf


let read_file filename =
  try read_file filename
  with Sys_error s ->
    error ("error reading file argument: " ^ s)


let parse_arg s =
  let len = String.length s in

  match s with
    (* NOTE: we don't support '(' and ')' and '[]' is handeled separately below *)
    | "[" -> Piq_lexer.Lbr
    | "]" -> Piq_lexer.Rbr
    | s when s.[0] = '"' -> parse_string_arg s
    | s when s.[0] = '@' ->
        let filename = String.sub s 1 (len - 1) in
        let content = read_file filename in
        (* Raw string -- just a sequence of bytes: may be parsed as either
         * binary or utf8 string *)
        Piq_lexer.Raw_string content

    (* parsing long options starting with "--"
     *
     * NOTE: it is safe to check s.[1] because a single '-' case is eliminated
     * in the calling function *)
    | s when s.[0] = '-' && s.[1] = '-' ->
        let name = String.sub s 1 (len - 1) in (* skip first '-' *)
        parse_name_arg name

    | s when s.[0] = '.' ->
        parse_name_arg s (* XXX: allowing Piq -style names *)

    (* XXX: support typenames and, possibly, other literals? *)
    | s ->
        parse_word_arg s


let parse_argv start =
  let error i err =
    C.error_at (getopt_filename, 0, i) err
  in
  let make_token i tok =
    (* 1-based token position in the argv starting from the position after "--" *)
    let loc = (0, i - start + 1) in
    (tok, loc)
  in
  let parse_make_arg i x =
    let tok =
      try parse_arg x
      with Piq_lexer.Error (err, _loc) -> error i err
    in
    make_token i tok
  in
  let parse_letter_args i s =
    let len = String.length s in
    let rec aux j =
      if j = len
      then [] (* end of string *)
      else
        let c = s.[j] in
        match c with
          (* only letters are allowed as single-letter options *)
          | 'a'..'z' | 'A'..'Z' ->
              (* creating Piq name: '.' followed by the letter *)
              let word = Bytes.create 2 in
              Bytes.set word 0 '.'; Bytes.set word 1 c;
              let tok = Piq_lexer.Name (Bytes.unsafe_to_string word) in
              (make_token i tok) :: (aux (j+1))
          | _ ->
              error i ("invalid single-letter argument: " ^ Char.escaped c)
    in
    aux 1 (* start at position 1 skipping the leading '-' *)
  in
  let len = Array.length Sys.argv in
  let rec aux i =
    if i >= len
    then [make_token i Piq_lexer.EOF]
    else
      let a = Sys.argv.(i) in
      match a with
        | "" ->
            error i "empty argument"

        | "-" | "--" ->
            error i ("invalid argument: " ^ a)

        | "[]" -> (* split it into two tokens '[' and ']' *)
            (parse_make_arg i "[") :: (parse_make_arg i "]") :: (aux (i+1))

        (* After skipping negative integers, and those arguments that start with
         * '--', we end up having '-' followed by one or more characters. We
         * treat those characters as single-letter arguments.
         *
         * NOTE: it is safe to check s.[1] because a single '-' case is
         * eliminated above *)
        | s when s.[0] = '-' && s.[1] <> '-' && (s.[1] < '0' || s.[1] > '9') ->
            (parse_letter_args i s) @ (aux (i+1))

        | s ->
            (parse_make_arg i s) :: (aux (i+1))
  in
  aux start


(* index of the "--" element in argv array *)
let argv_start_index = ref 0


(* find the position of the first argument after "--" *)
let rest_fun arg =
  if !argv_start_index = 0 (* first argument after first occurrence of "--" *)
  then argv_start_index := !Arg.current + 1
  else ()


let arg__rest =
    "--", Arg.Rest rest_fun,
    "separator between piqi command-line arguments and data arguments"


let getopt_piq () :piq_ast list =
  let start =
    if !argv_start_index = 0 (* "--" is not present in the list of arguments *)
    then Array.length Sys.argv
    else !argv_start_index
  in
  let tokens = parse_argv start in
  let piq_parser = Piq_parser.init_from_token_list getopt_filename tokens in
  let piq_ast_list =
    U.with_bool Config.piq_relaxed_parsing true
    (fun () -> Piq_parser.read_all piq_parser)
  in
  piq_ast_list


let parse_args (piqtype: T.piqtype) (args: piq_ast list) :Piqobj.obj =
  let ast =
    match args with
      | [x] when not (C.is_container_type piqtype) ->  (* scalar type? *)
          x
      | l ->
          let res = `list l in
          (* set the location *)
          let loc = (getopt_filename, 0, 1) in
          Piqloc.addlocret loc res
  in
  let ast = Piq_parser.expand ast in
  let piqobj = U.with_bool Config.piq_relaxed_parsing true
    (fun () -> Piqobj_of_piq.parse_obj piqtype ast)
  in
  piqobj