Source file lexer.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
open Base
open Liquid_syntax
open Tools
open Syntax
open Keyword_lexer

let lex_bool text =
  let literal_true = "true" in
  let literal_false = "false" in

  if starts_with text literal_true then
    Some (LexValue (LexBool true), remove_prefix text literal_true)
  else if starts_with text literal_false then
    Some (LexValue (LexBool false), remove_prefix text literal_false)
  else
    None


let lex_digit_group text =
  let rec lex_digit_group_aux t acc =
    let chunk = String.sub t ~pos:(List.length acc) ~len:1 in
    match chunk with
    | "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ->
      lex_digit_group_aux t (acc @ [chunk])
    | _ -> acc
  in

  lex_digit_group_aux text [] |> join

let lex_number text =
  let to_num v t = Some (LexValue (LexNumber (Float.of_string v)), t) in

  let (neg_literal, t_text) =
    if starts_with text "-" then
      ("-", remove_prefix text "-")
    else ("", text) in

  match lex_digit_group t_text with
  | "" -> None
  | first_group -> begin
    let t_first_group = neg_literal ^ first_group in
    let decimal_part = remove_prefix t_text first_group in
    if starts_with decimal_part "." then
      let second_group_part = remove_prefix decimal_part "." in
      match lex_digit_group second_group_part with
      | "" -> to_num t_first_group second_group_part
      | second_group -> to_num (t_first_group ^ "." ^ second_group) (remove_prefix second_group_part second_group)
    else
      to_num t_first_group decimal_part
  end

let has_prefix_or_fail text prefix func =
  if starts_with text prefix then
    remove_prefix text prefix |> func
  else
    None

let lex_range text =
  let (popen, pclose) = "(", ")" in
  let dotdot = ".." in
  let lex_first_group wo_paren =
    match lex_digit_group wo_paren with
    | "" -> None
    | first_number ->
      let after_first = remove_prefix wo_paren first_number in
      let lex_second_group wo_dot =
        match lex_digit_group wo_dot with
        | "" -> None
        | second_number ->
          let after_second = remove_prefix wo_dot second_number in
          if starts_with after_second pclose then
            let range = LexValue (LexRange (Int.of_string first_number, Int.of_string second_number)) in
            Some (range, (remove_prefix after_second pclose))
          else
            None
      in

      has_prefix_or_fail after_first dotdot lex_second_group
  in

  has_prefix_or_fail text popen lex_first_group


let lex_delimited_string delim escaped_delim text =
  if starts_with text delim then
    let d_len = String.length escaped_delim in
    let folder acc index =
      match String.sub text ~pos:(index+1) ~len:d_len with
      | e when e = escaped_delim -> Next (acc ^ escaped_delim, index + d_len)
      | other -> (
        match first_letter other with
        | e when e = delim -> Stop acc
        | other_letter -> Next (acc ^ other_letter, index + 1)
      )
    in

    let string_literal = unfold "" 0 folder in
    let complete_literal = delim ^ string_literal ^ delim in

    Some (LexValue (LexString string_literal), remove_prefix text complete_literal)
  else
    None

let lex_string text =
  let double_quote = lex_delimited_string "\"" "\\\"" in
  let single_quote = lex_delimited_string "\'" "\\\'" in

  match (double_quote text, single_quote text) with
  | (Some (r, rest), _) -> Some (r, rest)
  | (_, Some (r, rest)) -> Some (r, rest)
  | _ -> None


let lex_id text =
  let id_exp = ~/"^[a-zA-Z_](?:(?:[a-zA-Z0-9_\\-\\.]|((\\[(\"|\')?.+)(\"|\')?\\]))+)?" in
  if Re2.matches id_exp text then
    let literal = Re2.find_first_exn id_exp text in
    let bracket_group_exp = ~/"\\[(?:\"|')?(.+?)(?:\"|')?\\]" in
    let wo_bg =
      match Re2.find_all bracket_group_exp text with
      | Ok mats ->
        let folder acc m =
          let is_string = starts_with m "[\"" || starts_with m "['" in
          let pos = if is_string then 2 else 1 in
          let dot_notation = "." ^ String.sub m ~pos:pos ~len:(String.length m - (pos * 2)) in
          String.substr_replace_first acc ~pattern:m ~with_:dot_notation
        in
        List.fold mats ~init:literal ~f:folder
      | Error _ -> literal in

    let pieces = String.split wo_bg ~on:'.' in
    Some (LexValue (LexId pieces), remove_prefix text literal)
  else
    None


let rec first_successful text =
  function
  | lexer :: other_lexers -> (
    match lexer text with
    | Some p -> Some p
    | _ -> first_successful text other_lexers
  )
  | _ -> None

let lex_token text =
  let lexers = [lex_keyword; lex_range; lex_bool; lex_string; lex_number; lex_id] in
  first_successful text lexers


let lex_block_token_chunk (chunk2, chunk3) acc index =
  if is_block_token_whitespace_string chunk3 then
    Next (acc @ [block_token_of_string chunk3], index+(String.length chunk3))
  else if is_block_token_string chunk2 then
    Next (acc @ [block_token_of_string chunk2], index+(String.length chunk2))
  else
    match List.rev acc with
    | (RawText "liquid") :: StatementStart _ :: hds ->
      Next (List.rev hds @ [LiquidStart], index+1)
    | (RawText tl) :: hds ->
      Next (List.rev hds @ [RawText (tl ^ first_letter chunk2)], index+1)
    | _ ->
      Next (acc @ [RawText (first_letter chunk2)], index+1)

let lex_block_tokens text =
  let folder acc index =
    let curr = String.sub text ~pos:index ~len:(String.length text - index) in
    if Preprocessor.is_raw curr then
      let raw_text = Preprocessor.until_end_raw curr in
      let raw_body = Preprocessor.trim_raw_tags raw_text in
      Next (acc @ [RawText raw_body], index+(String.length raw_text))
    else if index + 3 < (String.length text) then
      let chunk2 = String.sub text ~pos:index ~len:2 in
      let chunk3 = String.sub text ~pos:index ~len:3 in
      lex_block_token_chunk (chunk2, chunk3) acc index
    else
      Stop acc
  in

  unfold [] 0 folder

let lex_line_tokens text =
  let t_text = text ^ " " in
  let folder acc index =
    let chunk = String.sub t_text ~pos:index ~len:(String.length t_text - index) in
    let sub_rest rest = String.length t_text - String.length rest in
    match lex_token chunk with
    | Some (Newline, rest) -> Next (acc @ [EOS; Newline], sub_rest rest)
    | Some (t, rest) -> Next (acc @ [t], sub_rest rest)
    | _ -> Stop acc
  in

  let raw_list = unfold [] 0 folder in
  let wo_spaces = List.filter raw_list ~f:((!=) Space) in

  wo_spaces

let echo_to_expression tokens =
  let rec aux acc pool =
    match pool with
    | LexValue (LexId ["echo"]) :: LexValue (LexString t) :: tl -> aux (acc @ [LexExpression [LexText t]]) tl
    | LexValue (LexId ["echo"]) :: LexValue (LexId id) :: tl -> aux (acc @ [LexExpression [LexValue (LexId id)]]) tl
    | hd :: tl -> aux (acc @ [hd]) tl
    | [] -> acc
  in aux [] tokens


let lex_all_tokens (block_tokens: block_token list) =
  let folder acc index =

    let max = List.length block_tokens in
    if index > max then
      Stop (acc)
    else begin
      let sub = List.sub block_tokens ~pos:index ~len:(max - index) in
      match sub with
      | StatementStart _ :: RawText(body) :: StatementEnd _ :: _ ->
        Next (acc @ lex_line_tokens body @ [EOS], index+3)
      | ExpressionStart _ :: RawText(body) :: ExpressionEnd _ :: _ ->
        Next (acc @ [LexExpression (lex_line_tokens body)], index+3)
      | LiquidStart :: RawText(body) :: StatementEnd _ :: _ ->
        let liq = body
          |> Preprocessor.remove_liquid_comments
          |> lex_line_tokens in
        Next (acc @ liq, index+3)
      | RawText (other) :: _ ->
        (* There was once a newline here *)
        (* [Newline; LexText other] *)
        Next (acc @ [LexText other], index + 1)
      | _ -> Stop (acc)
    end
  in

  let base_lex = unfold [] 0 folder in
  base_lex |> echo_to_expression


let lex text =
  text
  |> lex_block_tokens
  |> lex_all_tokens