Source file parse_js.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
(* Js_of_ocaml compiler
 * Copyright (C) 2013 Hugo Heuzard
 *)
(* Yoann Padioleau
 *
 * Copyright (C) 2010 Facebook
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License
 * version 2.1 as published by the Free Software Foundation, with the
 * special exception on linking described in file license.txt.
 *
 * This library is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the file
 * license.txt for more details.
 *)

open! Stdlib

module Lexer = struct
  type t = Lexing.lexbuf

  let of_file file : t =
    let ic = open_in file in
    let lexbuf = Lexing.from_channel ic in
    { lexbuf with lex_curr_p = { lexbuf.lex_curr_p with pos_fname = file } }

  let of_channel ci : t = Lexing.from_channel ci

  let of_lexbuf lexbuf : t = lexbuf
end

exception Parsing_error of Parse_info.t

let parse_aux the_parser lexbuf =
  let init = the_parser lexbuf.Lexing.lex_start_p in
  let reset lexbuf =
    lexbuf.Lexing.lex_curr_p <- lexbuf.Lexing.lex_start_p;
    lexbuf.Lexing.lex_curr_pos <- lexbuf.Lexing.lex_start_pos
  in
  let fol prev (_, tok_pi) =
    match prev with
    | [] -> true
    | (_, p_pi) :: _ -> p_pi.Parse_info.line <> tok_pi.Parse_info.line
  in
  let rec loop_error prev checkpoint =
    let module I = Js_parser.MenhirInterpreter in
    match checkpoint with
    | I.InputNeeded _env ->
        let checkpoint =
          I.offer
            checkpoint
            (Js_token.EOF, lexbuf.Lexing.lex_curr_p, lexbuf.Lexing.lex_curr_p)
        in
        loop_error prev checkpoint
    | I.Shifting _ | I.AboutToReduce _ -> loop_error prev (I.resume checkpoint)
    | I.Accepted _ -> assert false
    | I.Rejected -> `Error prev
    | I.HandlingError _ -> loop_error prev (I.resume checkpoint)
  in
  let parse_annot s =
    match String.drop_prefix ~prefix:"//" s with
    | None -> None
    | Some s -> (
        let buf = Lexing.from_string s in
        try
          match Annot_parser.annot Annot_lexer.main buf with
          | `Requires l -> Some (`Requires l)
          | `Provides (n, k, ka) -> Some (`Provides (n, k, ka))
          | `Version l -> Some (`Version l)
          | `Weakdef -> Some `Weakdef
          | `Always -> Some `Always
          | `If name -> Some (`If name)
          | `Ifnot name -> Some (`Ifnot name)
          | `Alias name -> Some (`Alias name)
        with
        | Not_found -> None
        | _ -> None)
  in
  let rec loop prev prev_with_comment (last_checkpoint, checkpoint) =
    let module I = Js_parser.MenhirInterpreter in
    match checkpoint with
    | I.InputNeeded _env ->
        let inputneeded = checkpoint in
        let token, prev_with_comment =
          match prev with
          | ((Js_token.EOF, _) as prev) :: _ -> prev, prev_with_comment
          | _ ->
              let rec read_one prev_with_comment (lexbuf : Lexing.lexbuf) =
                match Js_lexer.main lexbuf with
                | TCommentLineDirective _ as tok ->
                    let pi = Parse_info.t_of_pos lexbuf.lex_start_p in
                    read_one ((tok, pi) :: prev_with_comment) lexbuf
                | TComment s as tok ->
                    let pi = Parse_info.t_of_pos lexbuf.lex_start_p in
                    if fol prev_with_comment (tok, pi)
                    then
                      match parse_annot s with
                      | None -> read_one ((tok, pi) :: prev_with_comment) lexbuf
                      | Some annot ->
                          let tok = Js_token.TAnnot (s, annot) in
                          (tok, pi), prev_with_comment
                    else read_one ((tok, pi) :: prev_with_comment) lexbuf
                | TAnnot _ -> assert false
                | t ->
                    let pi = Parse_info.t_of_pos lexbuf.lex_start_p in
                    (t, pi), prev_with_comment
              in
              let t, prev_with_comment = read_one prev_with_comment lexbuf in
              let t, pi =
                match prev, t with
                (* restricted productions
                 * 7.9.1 - 3
                 * When, as the program is parsed from left to right, a token is encountered
                 * that is allowed by some production of the grammar, but the production
                 * is a restricted production and the token would be the first token for a
                 * terminal or nonterminal immediately following the annotation [no LineTerminator here]
                 * within the restricted production (and therefore such a token is called a restricted token),
                 * and the restricted token is separated from the previous token by at least
                 * one LineTerminator, then a semicolon is automatically inserted before the
                 * restricted token. *)
                | ( ((T_RETURN | T_CONTINUE | T_BREAK | T_THROW), _) :: _
                  , (((T_SEMICOLON | T_VIRTUAL_SEMICOLON), _) as t) ) -> t
                | ((T_RETURN | T_CONTINUE | T_BREAK | T_THROW), _) :: _, t when fol prev t
                  ->
                    reset lexbuf;
                    T_VIRTUAL_SEMICOLON, Parse_info.zero
                (* The practical effect of these restricted productions is as follows:
                 * When a ++ or -- token is encountered where the parser would treat it
                 * as a postfix operator, and at least one LineTerminator occurred between
                 * the preceding token and the ++ or -- token, then a semicolon is automatically
                 * inserted before the ++ or -- token. *)
                | _, ((T_DECR, pi) as tok) when not (fol prev tok) ->
                    Js_token.T_DECR_NB, pi
                | _, ((T_INCR, pi) as tok) when not (fol prev tok) ->
                    Js_token.T_INCR_NB, pi
                | _, (((T_DIV | T_DIV_ASSIGN), _) as tok) ->
                    if I.acceptable checkpoint (fst tok) lexbuf.Lexing.lex_start_p
                    then tok
                    else (
                      reset lexbuf;
                      let t = Js_lexer.main_regexp lexbuf in
                      let pi = Parse_info.t_of_pos lexbuf.lex_start_p in
                      t, pi)
                | _, t -> t
              in
              (t, pi), prev_with_comment
        in
        let last_checkpoint = prev, prev_with_comment, inputneeded in
        let checkpoint =
          I.offer
            checkpoint
            (fst token, lexbuf.Lexing.lex_start_p, lexbuf.Lexing.lex_curr_p)
        in
        loop (token :: prev) (token :: prev_with_comment) (last_checkpoint, checkpoint)
    | I.Shifting _ | I.AboutToReduce _ ->
        loop prev prev_with_comment (last_checkpoint, I.resume checkpoint)
    | I.Accepted v -> `Ok (v, prev_with_comment)
    | I.Rejected -> `Error prev
    | I.HandlingError _ -> (
        (* 7.9.1 - 1 *)
        (* When, as the program is parsed from left to right, a token (called the offending token)
           is encountered that is not allowed by any production of the grammar, then a semicolon
           is automatically inserted before the offending token if one or more of the following
           conditions is true:
           - The offending token is }.
           - The offending token is separated from the previous
             token by at least one LineTerminator. *)

        (* 7.9.1 - 2 *)
        (* When, as the program is parsed from left to right, the end of the input stream of tokens *)
        (* is encountered and the parser is unable to parse the input token stream as a single *)
        (* complete ECMAScript Program, then a semicolon is automatically inserted at the end *)
        let insert_virtual_semmit =
          match prev with
          | [] | (T_VIRTUAL_SEMICOLON, _) :: _ -> false
          | (T_RCURLY, _) :: _ -> true
          | (EOF, _) :: _ -> true
          | offending :: before :: _ when fol [ before ] offending -> true
          | _ -> false
        in
        let drop_annot_or_error () =
          match prev with
          | (TAnnot (s, _), i) :: _ ->
              let prev, prev_with_comment, checkpoint = last_checkpoint in
              let t = Js_token.TComment s in
              loop prev ((t, i) :: prev_with_comment) (last_checkpoint, checkpoint)
          | _ -> loop_error prev (I.resume checkpoint)
        in
        match insert_virtual_semmit with
        | false -> drop_annot_or_error ()
        | true ->
            let prev, prev_with_comment, checkpoint = last_checkpoint in
            if I.acceptable
                 checkpoint
                 Js_token.T_VIRTUAL_SEMICOLON
                 lexbuf.Lexing.lex_curr_p
            then (
              reset lexbuf;
              let t = Js_token.T_VIRTUAL_SEMICOLON, Parse_info.zero in
              let checkpoint =
                I.offer
                  checkpoint
                  (fst t, lexbuf.Lexing.lex_curr_p, lexbuf.Lexing.lex_curr_p)
              in
              loop (t :: prev) (t :: prev_with_comment) (last_checkpoint, checkpoint))
            else drop_annot_or_error ())
  in
  match loop [] [] (([], [], init), init) with
  | `Ok x -> x
  | `Error tok ->
      let pi =
        match tok with
        | [] -> Parse_info.zero
        | (_, pi) :: _ -> pi
      in
      raise (Parsing_error pi)

let parse' lex =
  let p, t_rev = parse_aux Js_parser.Incremental.program lex in
  p, List.rev t_rev

let parse lex =
  let p, _ = parse_aux Js_parser.Incremental.program lex in
  List.map p ~f:(fun (c, _) -> c)

let parse_expr lex =
  let expr, _ = parse_aux Js_parser.Incremental.standalone_expression lex in
  expr