Source file cparser.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
(* Parser for directives in C-like syntax, rewriting them into extensions,
   like ones we would get from parsing OCaml file.
*)

module Unshadow = struct
  module Parser = Parser
end

open Ppxlib
open Unshadow
module Parsing = Stdlib.Parsing

type lexer = Lexing.lexbuf -> Parser.token

(* +---------------------------------------------------------------+
   | Parsing of directives                                         |
   +---------------------------------------------------------------+ *)

let located x lexbuf = { Location.txt = x; loc = Location.of_lexbuf lexbuf }

let parse parsing_fun lexer lexbuf =
  try parsing_fun lexer lexbuf with
  | Parsing.Parse_error | Syntaxerr.Escape_error ->
    let loc = Location.of_lexbuf lexbuf in
    raise (Syntaxerr.Error (Syntaxerr.Other loc))
;;

let fetch_directive_argument (lexer : lexer) lexbuf =
  let rec loop acc (brackets : Parser.token list) =
    match lexer lexbuf, brackets with
    | EOF, _ | EOL, [] -> located Parser.EOF lexbuf :: acc
    | (EOL | COMMENT _), _ -> loop acc brackets
    | token, _ ->
      let acc = located token lexbuf :: acc in
      (match token, brackets with
       | BEGIN, _ -> loop acc (END :: brackets)
       | DO, _ -> loop acc (DONE :: brackets)
       | LPAREN, _ -> loop acc (RPAREN :: brackets)
       | LBRACE, _ -> loop acc (RBRACE :: brackets)
       | LBRACELESS, _ -> loop acc (GREATERRBRACE :: brackets)
       | LBRACKETLESS, _ -> loop acc (GREATERRBRACKET :: brackets)
       | LBRACKETBAR, _ -> loop acc (BARRBRACKET :: brackets)
       | ( ( LBRACKET
           | LBRACKETGREATER
           | LBRACKETPERCENT
           | LBRACKETPERCENTPERCENT
           | LBRACKETAT
           | LBRACKETATAT
           | LBRACKETATATAT )
         , _ ) -> loop acc (RBRACKET :: brackets)
       | _, closing :: brackets when token = closing -> loop acc brackets
       | _ -> loop acc brackets)
  in
  let start_pos = Lexing.lexeme_end_p lexbuf in
  match loop [] [] |> List.rev with
  | [] -> None
  | tokens ->
    let tokens = ref tokens in
    let fake_lexer (lexbuf : Lexing.lexbuf) : Parser.token =
      match !tokens with
      | [] -> EOF
      | token :: rest ->
        tokens := rest;
        lexbuf.lex_start_p <- token.loc.loc_start;
        lexbuf.lex_curr_p <- token.loc.loc_end;
        token.txt
    in
    let fake_lexbuf = Lexing.from_function (fun _ _ -> assert false) in
    fake_lexbuf.lex_curr_p <- start_pos;
    (match
       Parse.Of_ocaml.copy_structure (parse Parser.implementation fake_lexer fake_lexbuf)
     with
     | [] -> None
     | [ st ] ->
       assert_no_attributes_in#structure_item st;
       Some st
     | _ :: st :: _ ->
       Location.raise_errorf ~loc:st.pstr_loc "optcomp: too many structure items")
;;

let parse_directive (lexer : lexer) lexbuf : 'a Token.t =
  let token = located (lexer lexbuf) lexbuf in
  let arg = fetch_directive_argument lexer lexbuf in
  let loc = { token.loc with loc_end = Lexing.lexeme_end_p lexbuf } in
  let payload =
    match arg with
    | Some st_item -> PStr [ st_item ]
    | None -> PStr []
  in
  match token.txt with
  | IF -> Token.make_directive "if" loc payload
  | ELSE -> Token.make_directive "else" loc payload
  | LIDENT s -> Token.make_directive s loc payload
  | _ -> Location.raise_errorf ~loc "optcomp: unknown token"
;;

let parse_loop lexbuf =
  let is_beginning_of_line lexbuf =
    let pos = Lexing.lexeme_start_p lexbuf in
    pos.pos_cnum = pos.pos_bol
  in
  let rec parse_loop_aux acc =
    match Lexer.token_with_comments lexbuf with
    | HASH when is_beginning_of_line lexbuf ->
      let acc = parse_directive Lexer.token_with_comments lexbuf :: acc in
      parse_loop_aux acc
    | EOF -> acc
    | _ -> parse_loop_aux acc
  in
  List.rev (parse_loop_aux [])
;;