Source file Explicator.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
open Bwd
open Bwd.Infix
open Explication
include ExplicatorSigs
let to_start_of_line (pos : Range.position) = {pos with offset = pos.start_of_line}
let default_blend ~(priority : _ -> int) t1 t2 = if priority t2 <= priority t1 then t2 else t1
module Make (Tag : Tag) = struct
type position = Range.position
(** [find_eol_traditional pos] finds the position of the next ['\n']. If the end of source is reached before ['\n'], then the position of the end of the source is returned. *)
let find_eol_traditional ~source ~eof next =
let rec go i =
if i >= eof then
eof, 0
else
match SourceReader.unsafe_get source i with
| '\n' -> i, 1
| '\r' ->
if i+1 < eof && SourceReader.unsafe_get source (i+1) = '\n'
then i, 2
else i, 1
| _ ->
go (i+1)
in
go next
(** [find_eol_unicode pos] finds the position of the next ['\n']. If the end of source is reached before ['\n'], then the position of the end of the source is returned. *)
let find_eol_unicode ~source ~eof next =
let rec go i =
if i >= eof then
eof, 0
else
match SourceReader.unsafe_get source i with
| '\n' | '\x0b' | '\x0c' -> i, 1
| '\r' ->
if i+1 < eof && SourceReader.unsafe_get source (i+1) = '\n'
then i, 2
else i, 1
| '\xc2' ->
if i+1 < eof && SourceReader.unsafe_get source (i+1) = '\x85'
then i, 2
else go (i+1)
| '\xe2' ->
if i+2 < eof && SourceReader.unsafe_get source (i+1) = '\x80' &&
(let c2 = SourceReader.unsafe_get source (i+2) in c2 = '\xa8' || c2 = '\xa9')
then i, 3
else go (i+1)
| _ ->
go (i+1)
in
go next
(** Skip the ['\n'] character, assuming that [eol] is not the end of source *)
let eol_to_next_line shift (pos : position) : position =
{ source = pos.source;
offset = pos.offset + shift;
start_of_line = pos.offset + shift;
line_num = pos.line_num + 1 }
let read_between ~source begin_ end_ : string =
String.init (end_ - begin_) @@ fun i ->
SourceReader.unsafe_get source (begin_ + i)
type explicator_state =
{ lines : Tag.t line bwd
; segments : Tag.t segment bwd
; remaining_tagged_lines : (Tag.t * int) list
; current_tag : Tag.t option
; cursor : Range.position
; eol : int
; eol_shift : int
; line_num : int
}
exception Unexpected_end_of_source of Range.position
exception Unexpected_line_num_increment of Range.position
exception Unexpected_newline of Range.position
exception Unexpected_position_in_newline of Range.position
module F = Flattener.Make(Tag)
let explicate_block ~line_breaking (b : Tag.t Flattener.block) : Tag.t block =
let find_eol = match line_breaking with `Unicode -> find_eol_unicode | `Traditional -> find_eol_traditional in
match b.tagged_positions with
| [] -> invalid_arg "explicate_block: empty block"
| ((_, ploc) :: _) as ps ->
let source = SourceReader.load ploc.source in
let eof = SourceReader.length source in
let[@tailcall] rec go state : (Tag.t option * Range.position) list -> _ =
function
| (ptag,ploc)::ps when state.cursor.line_num = ploc.line_num ->
if ploc.offset > eof then raise @@ Unexpected_end_of_source ploc;
if ploc.offset > state.eol then raise @@ Unexpected_newline ploc;
if ploc.offset = state.cursor.offset then
go {state with cursor = ploc; current_tag = ptag} ps
else
let segments =
state.segments <:
(state.current_tag, read_between ~source state.cursor.offset ploc.offset)
in
go { state with segments; cursor = ploc; current_tag = ptag } ps
| ps ->
let lines, remaining_tagged_lines =
let segments =
if state.cursor.offset < state.eol then
state.segments
<: (state.current_tag, read_between ~source state.cursor.offset state.eol)
else if state.cursor.offset = eof && Option.is_some state.current_tag then
state.segments
<: (state.current_tag, "‹EOF›")
else
state.segments
in
let tagged_lines, remaining_tagged_lines = Utils.span (fun (_, i) -> i = state.line_num) state.remaining_tagged_lines in
(state.lines <: {segments = Bwd.to_list segments; tags = List.map fst tagged_lines}), remaining_tagged_lines
in
match ps with
| [] ->
assert (state.line_num = b.end_line_num);
lines
| (_, ploc) :: _ ->
if ploc.offset > eof then raise @@ Unexpected_end_of_source ploc;
if ploc.offset <= state.eol then raise @@ Unexpected_line_num_increment ploc;
if ploc.offset < state.eol + state.eol_shift then raise @@ Unexpected_position_in_newline ploc;
let cursor =
eol_to_next_line state.eol_shift {state.cursor with offset = state.eol}
in
let eol, eol_shift = find_eol ~source ~eof (state.eol + state.eol_shift) in
go {lines; segments=Emp; current_tag = state.current_tag; cursor; eol; eol_shift; remaining_tagged_lines; line_num = state.line_num + 1} ps
in
let begin_pos = to_start_of_line ploc in
let eol, eol_shift = find_eol ~source ~eof ploc.offset in
let lines =
go
{ lines = Emp
; segments = Emp
; remaining_tagged_lines = b.tagged_lines
; current_tag = None
; cursor = begin_pos
; eol
; eol_shift
; line_num = b.begin_line_num
}
ps
in
{ begin_line_num = b.begin_line_num
; end_line_num = b.end_line_num
; lines = Bwd.to_list @@ lines
}
let[@inline] explicate_blocks ~line_breaking = List.map (explicate_block ~line_breaking)
let[@inline] explicate_part ~line_breaking (source, bs) : Tag.t part =
{ source; blocks = explicate_blocks ~line_breaking bs }
let explicate ?(line_breaking=`Traditional) ?(block_splitting_threshold=5) ?(blend=default_blend ~priority:Tag.priority) ranges =
List.map (explicate_part ~line_breaking) @@ F.flatten ~block_splitting_threshold ~blend ranges
end