Source file parse_with_lexer.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
module type ANY = Fmlib_std.Interfaces.ANY


module type RUN =
sig
    type ch

    val string_at:
        ('a -> bool)
        -> (ch -> 'a -> 'a)
        -> ('a -> 'a)
        -> int
        -> string
        -> 'a
        -> int * 'a

    val string:
        ('a -> bool)
        -> (ch -> 'a -> 'a)
        -> ('a -> 'a)
        -> string ->
        'a
        -> 'a

    val channel:
        ('a -> bool)
        -> (ch -> 'a -> 'a)
        -> ('a -> 'a)
        -> in_channel
        -> 'a
        -> 'a
end



module Run_char =
struct
    type ch = char
    include Run_on
end



module Run_utf8 =
struct
    type ch = Utf8.Decoder.t
    include Run_on.Make (Utf8.Decoder)
end



module Make_basic
        (Run: RUN)
        (State: ANY)
        (Token: ANY)
        (Final: ANY)
        (Semantic: ANY)
        (Lex: Interfaces.LEXER with type final = Position.range * Token.t
                                and type token = Run.ch)
        (Parse: Interfaces.FULL_PARSER with
                type state = State.t
            and type token = Position.range * Token.t
            and type expect= string * Indent.expectation option
            and type final = Final.t
            and type semantic = Semantic.t)
=
struct
    type token    = Run.ch
    type item     = token
    type final    = Final.t
    type expect   = string * Indent.expectation option
    type semantic = Semantic.t
    type state    = State.t

    type t = {
        lex:   Lex.t;
        parse: Parse.t;
    }

    let make (lex: Lex.t) (parse: Parse.t): t =
        {lex; parse}


    let make_next (p: t) (parse: Parse.t): t =
        {
            lex = p.lex;
            parse = Parse.transfer_lookahead p.parse parse;
        }


    let lex (p: t): Lex.t =
        p.lex


    let parse (p: t): Parse.t =
        p.parse


    let needs_more (p: t): bool =
        Lex.needs_more p.lex
        &&
        Parse.needs_more p.parse

    let has_succeeded (p: t): bool =
        Parse.has_succeeded p.parse

    let has_ended (p: t): bool =
        Parse.has_ended p.parse

    let has_consumed_end (p: t): bool =
        Parse.has_consumed_end p.parse


    let has_failed_syntax (p:t): bool =
        if Parse.needs_more p.parse then
            Lex.has_failed_syntax p.lex
        else
            Parse.has_failed_syntax p.parse


    let has_failed_semantic (p: t): bool =
        Parse.has_failed_semantic p.parse


    let final (p: t): Final.t =
        assert (has_succeeded p);
        Parse.final p.parse


    let failed_expectations
            (p: t)
        : expect list
        =
        assert (has_failed_syntax p);
        if Parse.needs_more p.parse then
            Lex.failed_expectations p.lex
        else
            Parse.failed_expectations p.parse


    let failed_semantic (p: t): Semantic.t =
        assert (has_failed_semantic p);
        Parse.failed_semantic p.parse



    let position (p: t): Position.t =
        match
            Parse.first_lookahead_token p.parse
        with
        | None ->
            Lex.position p.lex
        | Some ((p1, _), _) ->
            p1


    let range (p: t): Position.range =
        match
            Parse.first_lookahead_token p.parse
        with
        | None ->
            let pos = Lex.position p.lex in
            pos, pos
        | Some (range, _) ->
            range


    let state (p: t): State.t =
        Parse.state p.parse


    let rec check_token (p: t): t =
        if
            Lex.(has_succeeded p.lex && not (has_consumed_end p.lex))
        then
            check_token {
                lex =
                    Lex.restart p.lex;
                parse =
                    Parse.put (Lex.final p.lex) p.parse
            }
        else
            p


    let put (c: Run.ch) (p: t): t =
        check_token {p with lex = Lex.put c p.lex}

    let put_end (p: t): t =
        let p =
            check_token {p with lex = Lex.put_end p.lex}
        in
        assert Lex.(not (has_succeeded p.lex) || has_consumed_end p.lex);
        if Lex.has_succeeded p.lex then
            (* The lexer has succeeded and has consumed the end of input.
             * Therefore it has encountered the end token.
             *)
            {p with parse = Parse.put_end p.parse}
        else
            p


    let run_on_string    = Run.string     needs_more put put_end
    let run_on_string_at = Run.string_at  needs_more put put_end
    let run_on_channel   = Run.channel    needs_more put put_end
end






module Make
        (State: ANY)
        (Token: ANY)
        (Final: ANY)
        (Semantic: ANY)
        (Lex: Interfaces.LEXER with type final = Position.range * Token.t
                                and type token = char)
        (Parse: Interfaces.FULL_PARSER with
                type state = State.t
            and type token = Position.range * Token.t
            and type expect= string * Indent.expectation option
            and type final = Final.t
            and type semantic = Semantic.t)
    =
    Make_basic (Run_char) (State) (Token) (Final) (Semantic) (Lex) (Parse)






module Make_utf8
        (State: ANY)
        (Token: ANY)
        (Final: ANY)
        (Semantic: ANY)
        (Lex: Interfaces.LEXER with type final = Position.range * Token.t
                                and type token = Utf8.Decoder.t)
        (Parse: Interfaces.FULL_PARSER with
                type state = State.t
            and type token = Position.range * Token.t
            and type expect= string * Indent.expectation option
            and type final = Final.t
            and type semantic = Semantic.t)
    =
    Make_basic (Run_utf8) (State) (Token) (Final) (Semantic) (Lex) (Parse)