1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
open Base
open Liquid_syntax
open Tools
open Syntax
open Keyword_lexer
let lex_bool text =
let literal_true = "true" in
let literal_false = "false" in
if starts_with text literal_true then
Some (LexValue (LexBool true), remove_prefix text literal_true)
else if starts_with text literal_false then
Some (LexValue (LexBool false), remove_prefix text literal_false)
else
None
let lex_digit_group text =
let rec lex_digit_group_aux t acc =
let chunk = String.sub t ~pos:(List.length acc) ~len:1 in
match chunk with
| "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ->
lex_digit_group_aux t (acc @ [chunk])
| _ -> acc
in
lex_digit_group_aux text [] |> join
let lex_number text =
let to_num v t = Some (LexValue (LexNumber (Float.of_string v)), t) in
let (neg_literal, t_text) =
if starts_with text "-" then
("-", remove_prefix text "-")
else ("", text) in
match lex_digit_group t_text with
| "" -> None
| first_group -> begin
let t_first_group = neg_literal ^ first_group in
let decimal_part = remove_prefix t_text first_group in
if starts_with decimal_part "." then
let second_group_part = remove_prefix decimal_part "." in
match lex_digit_group second_group_part with
| "" -> to_num t_first_group second_group_part
| second_group -> to_num (t_first_group ^ "." ^ second_group) (remove_prefix second_group_part second_group)
else
to_num t_first_group decimal_part
end
let has_prefix_or_fail text prefix func =
if starts_with text prefix then
remove_prefix text prefix |> func
else
None
let lex_range text =
let (popen, pclose) = "(", ")" in
let dotdot = ".." in
let lex_first_group wo_paren =
match lex_digit_group wo_paren with
| "" -> None
| first_number ->
let after_first = remove_prefix wo_paren first_number in
let lex_second_group wo_dot =
match lex_digit_group wo_dot with
| "" -> None
| second_number ->
let after_second = remove_prefix wo_dot second_number in
if starts_with after_second pclose then
let range = LexValue (LexRange (Int.of_string first_number, Int.of_string second_number)) in
Some (range, (remove_prefix after_second pclose))
else
None
in
has_prefix_or_fail after_first dotdot lex_second_group
in
has_prefix_or_fail text popen lex_first_group
let lex_delimited_string delim escaped_delim text =
if starts_with text delim then
let d_len = String.length escaped_delim in
let folder acc index =
match String.sub text ~pos:(index+1) ~len:d_len with
| e when e = escaped_delim -> Next (acc ^ escaped_delim, index + d_len)
| other -> (
match first_letter other with
| e when e = delim -> Stop acc
| other_letter -> Next (acc ^ other_letter, index + 1)
)
in
let string_literal = unfold "" 0 folder in
let complete_literal = delim ^ string_literal ^ delim in
Some (LexValue (LexString string_literal), remove_prefix text complete_literal)
else
None
let lex_string text =
let double_quote = lex_delimited_string "\"" "\\\"" in
let single_quote = lex_delimited_string "\'" "\\\'" in
match (double_quote text, single_quote text) with
| (Some (r, rest), _) -> Some (r, rest)
| (_, Some (r, rest)) -> Some (r, rest)
| _ -> None
let lex_id text =
let id_exp = ~/"^[a-zA-Z_](?:(?:[a-zA-Z0-9_\\-\\.]|((\\[(\"|\')?.+)(\"|\')?\\]))+)?" in
if Re2.matches id_exp text then
let literal = Re2.find_first_exn id_exp text in
let bracket_group_exp = ~/"\\[(?:\"|')?(.+?)(?:\"|')?\\]" in
let wo_bg =
match Re2.find_all bracket_group_exp text with
| Ok mats ->
let folder acc m =
let is_string = starts_with m "[\"" || starts_with m "['" in
let pos = if is_string then 2 else 1 in
let dot_notation = "." ^ String.sub m ~pos:pos ~len:(String.length m - (pos * 2)) in
String.substr_replace_first acc ~pattern:m ~with_:dot_notation
in
List.fold mats ~init:literal ~f:folder
| Error _ -> literal in
let pieces = String.split wo_bg ~on:'.' in
Some (LexValue (LexId pieces), remove_prefix text literal)
else
None
let rec first_successful text =
function
| lexer :: other_lexers -> (
match lexer text with
| Some p -> Some p
| _ -> first_successful text other_lexers
)
| _ -> None
let lex_token text =
let lexers = [lex_keyword; lex_range; lex_bool; lex_string; lex_number; lex_id] in
first_successful text lexers
let lex_block_token_chunk (chunk2, chunk3) acc index =
if is_block_token_whitespace_string chunk3 then
Next (acc @ [block_token_of_string chunk3], index+(String.length chunk3))
else if is_block_token_string chunk2 then
Next (acc @ [block_token_of_string chunk2], index+(String.length chunk2))
else
match List.rev acc with
| (RawText "liquid") :: StatementStart _ :: hds ->
Next (List.rev hds @ [LiquidStart], index+1)
| (RawText tl) :: hds ->
Next (List.rev hds @ [RawText (tl ^ first_letter chunk2)], index+1)
| _ ->
Next (acc @ [RawText (first_letter chunk2)], index+1)
let lex_block_tokens text =
let folder acc index =
let curr = String.sub text ~pos:index ~len:(String.length text - index) in
if Preprocessor.is_raw curr then
let raw_text = Preprocessor.until_end_raw curr in
let raw_body = Preprocessor.trim_raw_tags raw_text in
Next (acc @ [RawText raw_body], index+(String.length raw_text))
else if index + 3 < (String.length text) then
let chunk2 = String.sub text ~pos:index ~len:2 in
let chunk3 = String.sub text ~pos:index ~len:3 in
lex_block_token_chunk (chunk2, chunk3) acc index
else
Stop acc
in
unfold [] 0 folder
let lex_line_tokens text =
let t_text = text ^ " " in
let folder acc index =
let chunk = String.sub t_text ~pos:index ~len:(String.length t_text - index) in
let sub_rest rest = String.length t_text - String.length rest in
match lex_token chunk with
| Some (Newline, rest) -> Next (acc @ [EOS; Newline], sub_rest rest)
| Some (t, rest) -> Next (acc @ [t], sub_rest rest)
| _ -> Stop acc
in
let raw_list = unfold [] 0 folder in
let wo_spaces = List.filter raw_list ~f:((!=) Space) in
wo_spaces
let echo_to_expression tokens =
let rec aux acc pool =
match pool with
| LexValue (LexId ["echo"]) :: LexValue (LexString t) :: tl -> aux (acc @ [LexExpression [LexText t]]) tl
| LexValue (LexId ["echo"]) :: LexValue (LexId id) :: tl -> aux (acc @ [LexExpression [LexValue (LexId id)]]) tl
| hd :: tl -> aux (acc @ [hd]) tl
| [] -> acc
in aux [] tokens
let lex_all_tokens (block_tokens: block_token list) =
let folder acc index =
let max = List.length block_tokens in
if index > max then
Stop (acc)
else begin
let sub = List.sub block_tokens ~pos:index ~len:(max - index) in
match sub with
| StatementStart _ :: RawText(body) :: StatementEnd _ :: _ ->
Next (acc @ lex_line_tokens body @ [EOS], index+3)
| ExpressionStart _ :: RawText(body) :: ExpressionEnd _ :: _ ->
Next (acc @ [LexExpression (lex_line_tokens body)], index+3)
| LiquidStart :: RawText(body) :: StatementEnd _ :: _ ->
let liq = body
|> Preprocessor.remove_liquid_comments
|> lex_line_tokens in
Next (acc @ liq, index+3)
| RawText (other) :: _ ->
Next (acc @ [LexText other], index + 1)
| _ -> Stop (acc)
end
in
let base_lex = unfold [] 0 folder in
base_lex |> echo_to_expression
let lex text =
text
|> lex_block_tokens
|> lex_all_tokens