Source file preprocess.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
(****************************************************************************)
(*     Sail                                                                 *)
(*                                                                          *)
(*  Sail and the Sail architecture models here, comprising all files and    *)
(*  directories except the ASL-derived Sail code in the aarch64 directory,  *)
(*  are subject to the BSD two-clause licence below.                        *)
(*                                                                          *)
(*  The ASL derived parts of the ARMv8.3 specification in                   *)
(*  aarch64/no_vector and aarch64/full are copyright ARM Ltd.               *)
(*                                                                          *)
(*  Copyright (c) 2013-2021                                                 *)
(*    Kathyrn Gray                                                          *)
(*    Shaked Flur                                                           *)
(*    Stephen Kell                                                          *)
(*    Gabriel Kerneis                                                       *)
(*    Robert Norton-Wright                                                  *)
(*    Christopher Pulte                                                     *)
(*    Peter Sewell                                                          *)
(*    Alasdair Armstrong                                                    *)
(*    Brian Campbell                                                        *)
(*    Thomas Bauereiss                                                      *)
(*    Anthony Fox                                                           *)
(*    Jon French                                                            *)
(*    Dominic Mulligan                                                      *)
(*    Stephen Kell                                                          *)
(*    Mark Wassell                                                          *)
(*    Alastair Reid (Arm Ltd)                                               *)
(*                                                                          *)
(*  All rights reserved.                                                    *)
(*                                                                          *)
(*  This work was partially supported by EPSRC grant EP/K008528/1 <a        *)
(*  href="http://www.cl.cam.ac.uk/users/pes20/rems">REMS: Rigorous          *)
(*  Engineering for Mainstream Systems</a>, an ARM iCASE award, EPSRC IAA   *)
(*  KTF funding, and donations from Arm.  This project has received         *)
(*  funding from the European Research Council (ERC) under the European     *)
(*  Union’s Horizon 2020 research and innovation programme (grant           *)
(*  agreement No 789108, ELVER).                                            *)
(*                                                                          *)
(*  This software was developed by SRI International and the University of  *)
(*  Cambridge Computer Laboratory (Department of Computer Science and       *)
(*  Technology) under DARPA/AFRL contracts FA8650-18-C-7809 ("CIFV")        *)
(*  and FA8750-10-C-0237 ("CTSRD").                                         *)
(*                                                                          *)
(*  SPDX-License-Identifier: BSD-2-Clause                                   *)
(****************************************************************************)

open Parse_ast

(* Simple preprocessor features for conditional file loading *)
module StringSet = Set.Make (String)

(* Adjust a pragma location so it doesn't end after the newline. *)
let pragma_loc l =
  let open Lexing in
  Reporting.map_loc_range
    (fun p1 p2 ->
      if p1.pos_lnum + 1 = p2.pos_lnum then (
        let p2 = { p2 with pos_cnum = p2.pos_cnum - 1; pos_bol = p1.pos_bol; pos_lnum = p1.pos_lnum } in
        (p1, p2)
      )
      else (p1, p2)
    )
    l

let default_symbols =
  List.fold_left
    (fun set str -> StringSet.add str set)
    StringSet.empty
    [
      "FEATURE_IMPLICITS";
      "FEATURE_CONSTANT_TYPES";
      "FEATURE_BITVECTOR_TYPE";
      "FEATURE_UNION_BARRIER";
      "FEATURE_STRICT_VAR";
      "FEATURE_STRICT_BITVECTOR";
    ]

let symbols = ref default_symbols

let have_symbol symbol = StringSet.mem symbol !symbols

let clear_symbols () = symbols := default_symbols

let add_symbol str = symbols := StringSet.add str !symbols

let () =
  let open Interactive in
  ArgString ("symbol", fun symbol -> ActionUnit (fun _ -> add_symbol symbol))
  |> register_command ~name:"define_symbol" ~help:"Define preprocessor symbol";

  ArgString ("symbol", fun symbol -> ActionUnit (fun _ -> symbols := StringSet.remove symbol !symbols))
  |> register_command ~name:"undef_symbol" ~help:"Undefine preprocessor symbol";

  ActionUnit (fun _ -> List.iter print_endline (StringSet.elements !symbols))
  |> register_command ~name:"symbols" ~help:"Print defined preprocessor symbols"

let cond_pragma l defs =
  let depth = ref 0 in
  let in_then = ref true in
  let then_defs = ref [] in
  let else_defs = ref [] in

  let push_def def = if !in_then then then_defs := def :: !then_defs else else_defs := def :: !else_defs in

  let rec scan = function
    | DEF_aux (DEF_pragma ("endif", _), _) :: defs when !depth = 0 -> (List.rev !then_defs, List.rev !else_defs, defs)
    | DEF_aux (DEF_pragma ("else", _), _) :: defs when !depth = 0 ->
        in_then := false;
        scan defs
    | (DEF_aux (DEF_pragma (p, _), _) as def) :: defs when p = "ifdef" || p = "ifndef" || p = "iftarget" ->
        incr depth;
        push_def def;
        scan defs
    | (DEF_aux (DEF_pragma ("endif", _), _) as def) :: defs ->
        decr depth;
        push_def def;
        scan defs
    | def :: defs ->
        push_def def;
        scan defs
    | [] -> raise (Reporting.err_general (pragma_loc l) "$ifdef, $ifndef, or $iftarget never ended by $endif")
  in
  scan defs

let wrap_include l file = function
  | [] -> []
  | defs ->
      [DEF_aux (DEF_pragma ("include_start", Pragma_line (file, 1)), l)]
      @ defs
      @ [DEF_aux (DEF_pragma ("include_end", Pragma_line (file, 1)), l)]

type argv_offsets = Argv_actual | Argv_unknown | Argv_inline of int ref * Lexing.position * int list

let inline_argv = ref Argv_actual

let create_argv_array ~offset ~current l str =
  let reset_value = !inline_argv in
  let reset () = inline_argv := reset_value in
  match Reporting.simp_loc l with
  | None ->
      inline_argv := Argv_unknown;
      (Str.split (Str.regexp " +") str, reset)
  | Some (p, _) ->
      let args = Util.split_on_char ' ' str in
      let _, args =
        Util.fold_left_map
          (fun n arg -> if arg = "" then (n + 1, None) else (n + String.length arg + 1, Some (n, arg)))
          0 args
      in
      let args = Util.option_these args in
      inline_argv := Argv_inline (current, { p with pos_cnum = p.pos_cnum + offset }, List.map fst args);
      (List.map snd args, reset)

let get_argv_position ~plus =
  let open Util.Option_monad in
  let open Lexing in
  match !inline_argv with
  | Argv_unknown -> None
  | Argv_actual ->
      let argv_lnum = !Arg.current + plus + 1 in
      let* bol = Sail_file.bol_of_lnum argv_lnum Sail_file.argv in
      Some { pos_fname = "ARGV"; pos_lnum = argv_lnum; pos_bol = bol; pos_cnum = bol }
  | Argv_inline (current, p, offsets) ->
      let n = !current in
      let* char_offset = List.nth_opt offsets n in
      Some { p with pos_cnum = p.pos_cnum + char_offset }

let preprocess dir target opts =
  let module P = Parse_ast in
  let rec aux includes acc = function
    | [] -> List.rev acc
    | DEF_aux (DEF_pragma ("define", Pragma_line (symbol, _)), _) :: defs ->
        symbols := StringSet.add symbol !symbols;
        aux includes acc defs
    | DEF_aux (DEF_pragma ("include_error", Pragma_line (message, _)), l) :: defs -> begin
        match List.rev includes with
        | [] -> raise (Reporting.err_general (pragma_loc l) message)
        | (include_root, l) :: ls ->
            let open Error_format in
            let b = Buffer.create 20 in
            let _, message =
              List.fold_left
                (fun (includer, msg) (file, l) -> (file, Location ("", Some ("included by " ^ includer), l, msg)))
                (include_root, Line message) ls
            in
            format_message message (buffer_formatter b);
            raise (Reporting.err_general l (Buffer.contents b))
      end
    | (DEF_aux (DEF_pragma ("option", Pragma_line (command, ltrim)), l) as opt_pragma) :: defs ->
        let l = pragma_loc l in
        let first_line err_msg =
          match String.split_on_char '\n' err_msg with line :: _ -> "\n" ^ line | [] -> ("" [@coverage off])
          (* Don't expect this should ever happen, but we are fine if it does *)
        in
        let current = ref 0 in
        let args, reset = create_argv_array ~offset:(ltrim + 7) ~current l command in
        begin
          try
            let file_arg file =
              raise
                (Reporting.err_general l ("Anonymous argument '" ^ file ^ "' cannot be passed via $option directive"))
            in
            Arg.parse_argv ~current (Array.of_list ("sail" :: args)) opts file_arg ""
          with
          | Arg.Help msg -> raise (Reporting.err_general l "-help flag passed to $option directive")
          | Arg.Bad msg -> raise (Reporting.err_general l ("Invalid flag passed to $option directive" ^ first_line msg))
        end;
        reset ();
        aux includes (opt_pragma :: acc) defs
    | DEF_aux (DEF_pragma ("ifndef", Pragma_line (symbol, _)), l) :: defs ->
        let then_defs, else_defs, defs = cond_pragma l defs in
        if not (StringSet.mem symbol !symbols) then aux includes acc (then_defs @ defs)
        else aux includes acc (else_defs @ defs)
    | DEF_aux (DEF_pragma ("ifdef", Pragma_line (symbol, _)), l) :: defs ->
        let then_defs, else_defs, defs = cond_pragma l defs in
        if StringSet.mem symbol !symbols then aux includes acc (then_defs @ defs)
        else aux includes acc (else_defs @ defs)
    | DEF_aux (DEF_pragma ("iftarget", Pragma_line (t, _)), l) :: defs ->
        let then_defs, else_defs, defs = cond_pragma l defs in
        begin
          match target with
          | Some t' when t = t' -> aux includes acc (then_defs @ defs)
          | _ -> aux includes acc (else_defs @ defs)
        end
    | DEF_aux (DEF_pragma ("include", Pragma_line (file, _)), l) :: defs ->
        let len = String.length file in
        if len = 0 then (
          Reporting.warn "" (pragma_loc l) "Skipping bad $include. No file argument.";
          aux includes acc defs
        )
        else if file.[0] = '"' && file.[len - 1] = '"' then (
          let relative =
            match l with
            | Parse_ast.Range (pos, _) -> Filename.dirname Lexing.(pos.pos_fname)
            | _ ->
                Reporting.unreachable (pragma_loc l) __POS__
                  "Couldn't figure out relative path for $include. This really shouldn't ever happen."
          in
          let file = String.sub file 1 (len - 2) in
          let include_file = Filename.concat relative file in
          let include_defs =
            Initial_check.parse_file ~loc:l (Filename.concat relative file)
            |> snd
            |> aux ((file, pragma_loc l) :: includes) []
          in
          aux includes (List.rev (wrap_include l include_file include_defs) @ acc) defs
        )
        else if file.[0] = '<' && file.[len - 1] = '>' then (
          let lib_file = String.sub file 1 (len - 2) in
          let sail_dir = Reporting.get_sail_dir dir in
          let file = Filename.concat sail_dir ("lib/" ^ lib_file) in
          let include_defs =
            Initial_check.parse_file ~loc:l file |> snd |> aux ((lib_file, pragma_loc l) :: includes) []
          in
          aux includes (List.rev (wrap_include l file include_defs) @ acc) defs
        )
        else (
          let help = "Make sure the filename is surrounded by quotes or angle brackets" in
          Reporting.warn "" (pragma_loc l) ("Skipping bad $include " ^ file ^ ". " ^ help);
          aux includes acc defs
        )
    | DEF_aux (DEF_pragma ("suppress_warnings", _), l) :: defs ->
        begin
          match Reporting.simp_loc l with
          | None -> () (* This shouldn't happen, but if it does just continue *)
          | Some (p, _) -> Reporting.suppress_warnings_for_file p.pos_fname
        end;
        aux includes acc defs
    (* Filter file_start and file_end out of the AST so when we
       round-trip files through the compiler we don't end up with
       incorrect start/end annotations *)
    | (DEF_aux (DEF_pragma ("file_start", _), _) | DEF_aux (DEF_pragma ("file_end", _), _)) :: defs ->
        aux includes acc defs
    | (DEF_aux (DEF_pragma (p, _), l) as pragma_def) :: defs ->
        if not (StringSet.mem p (Pragma.all ()) || String.contains p '#') then
          Reporting.warn "" (pragma_loc l) ("Unrecognised directive: " ^ p);
        aux includes (pragma_def :: acc) defs
    | DEF_aux (DEF_outcome (outcome_spec, inner_defs), l) :: defs ->
        aux includes (DEF_aux (DEF_outcome (outcome_spec, aux includes [] inner_defs), l) :: acc) defs
    | (DEF_aux (DEF_default (DT_aux (DT_order (_, ATyp_aux (atyp, _)), _)), l) as def) :: defs -> begin
        symbols := StringSet.add "_DEFAULT_ORDER_SET" !symbols;
        match atyp with
        | Parse_ast.ATyp_inc ->
            symbols := StringSet.add "_DEFAULT_INC" !symbols;
            aux includes (def :: acc) defs
        | Parse_ast.ATyp_dec ->
            symbols := StringSet.add "_DEFAULT_DEC" !symbols;
            aux includes (def :: acc) defs
        | _ -> aux includes (def :: acc) defs
      end
    | def :: defs -> aux includes (def :: acc) defs
  in
  aux [] []