Source file indent.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
type expectation =
    | Indent of int
    | Align of int
    | Align_between of int * int


type violation = expectation




let group
        (lst: ('a * expectation option) list)
    : (expectation option * 'a list) list
    =
    let rec grp lst =
        match lst with
        | [] ->
            []
        | (a, ea) :: lst ->
            match grp lst with
            | [] ->
                [ea, [a]]
            | (e, elst) :: lst ->
                if ea = e then begin
                    (e, a :: elst) :: lst
                end
                else begin
                    (ea, [a]) :: (e, elst) :: lst
                end
    in
    grp lst



(* Indentation set of a construct:

   It consists of a lower bound, an optional upper bound and an alignment flag.

   The upper bound is valid only if the alignment flag is set.

   The indentation of a construct is defined as the start position its first
   token.

   Tokens can appear anywhere starting from the lower bound as long as the
   alignment flag is not set. If the alignment flag is set, then a token has to
   appear between the lower bound and the optional upper bound.

   A token appearing within the allowed indentation set when the alignment flag
   is set, restricts the allowed indentation set to exactly the position of the
   start of the token and clears the alignment flag. I.e. all subsequent token
   belonging to the same parent can appear anywhere starting from the lower
   bound of the indentation set.

*)


type t = {
    lb: int;          (* lower bound of the indentation set *)
    ub: int option;   (* upper bound of the indentation set *)
    abs: bool;        (* alignment flag; if set, then we are waiting for the
                         first token which has to be in the indentation set. *)
}




let expectation (ind: t): expectation option =
    let expect () =
        if ind.lb = 0 then
            None
        else
            Some (Indent ind.lb)
    in
    match ind.ub with
    | None ->
        expect ()
    | Some ub ->
        if not ind.abs then
            expect ()
        else if ind.lb = ub then
            Some (Align ub)
        else
            Some (Align_between (ind.lb, ub))



let initial: t = {
    lb = 0;
    ub = None;
    abs = false
}




let check_position (pos: int) (ind: t): expectation option =
    let check_indent () =
        if ind.lb <= pos then
            None
        else
            Some (Indent ind.lb)
    in
    match ind.ub with
    | None ->
        check_indent ()
    | Some ub ->
        if not ind.abs then
            check_indent ()
        else if ind.lb <= pos && pos <= ub then
            None
        else if ind.lb = ub then
            Some (Align ind.lb)
        else
            Some (Align_between (ind.lb, ub))





let token (pos: int) (ind: t): t =
    assert (check_position pos ind = None);
    if not ind.abs then
        (* Normal case. Token are all wrapped with '>=' i.e. they can appear to
           the right of the indentation set of the parent. However, if the token
           appears within the indentation set of the parent, then it restricts
           the upper bound of the indentation set of the parent. A parent can
           by definition never be indented more than all its tokens. *)
        match ind.ub with
        | Some ub when ub <= pos ->
            ind
        | _ ->
            {ind with ub = Some pos}
    else
        (* First token of an aligned parent. Indentation set consists of exactly
           the token position. *)
        {
            lb = pos;
            ub = Some pos;
            abs = false
        }




let align (ind: t): t =
    {
        ind with
        abs = true
    }


let left_align (ind: t): t =
    {
        ind with
        ub = Some ind.lb;
        abs = true;
    }


let end_align (ind0: t) (ind: t): t =
    if not ind.abs then
        (* flag is cleared, the aligned sequence is not empty. *)
        ind
    else
        (* the aligned sequence is empty and therefore must not have any effect
         *)
        {ind with abs = ind0.abs}



let start_indent (i: int) (ind: t): t =
    assert (0 <= i);
    if ind.abs then
        (* No effect on aligned structures which have not yet received a first
           token. *)
        ind
    else
        {
            lb  = ind.lb + i;
            ub  = None;
            abs = false;
        }


let end_indent (i: int) (ind0: t) (ind: t): t =
    if ind0.abs then
        ind
    else
        match ind.ub, ind0.ub with
        | None, _ ->
            (* Not yet received any token. *)
            ind0
        | Some ub, None ->
            (* Received some token, but parent not yet. *)
            assert (ind0.lb + i <= ub);
            {ind0 with ub = ind.ub}
        | Some ub, Some ub0 ->
            assert (ind0.lb + i <= ub);
            {ind0 with ub = Some (min ub0 (ub - i))}