Source file collation_mapping.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
type t = Uchar.t -> node
and node = Reject | Accept of Collation_element.t array * t
let rec unstash xs node =
(match xs with
| [] -> node
| x :: xs' -> unstash xs' (Seq.Cons (x, fun () -> node)))
let emit_array arr cont =
let rec loop i () =
if i = Array.length arr then cont () else
Seq.Cons (arr.(i), loop (i + 1))
in
loop 0
let run mapping =
let
rec pluck ccc useq_stash useq_node candidate state =
(match useq_node with
| Seq.Nil ->
emit_array candidate (start (unstash useq_stash useq_node)) ()
| Seq.Cons (ch', useq') ->
let ccc' = Uucp_ext.canonical_combining_class ch' in
if ccc' = 0 then
emit_array candidate (start (unstash useq_stash useq_node)) ()
else
if ccc' <= ccc then
pluck ccc (ch' :: useq_stash) (useq' ()) candidate state
else
(match state ch' with
| Accept (candidate', state') ->
pluck ccc' useq_stash (useq' ()) candidate' state'
| Reject ->
pluck (max ccc ccc')
(ch' :: useq_stash) (useq' ()) candidate state))
and extend useq_node candidate state =
(match useq_node with
| Seq.Nil ->
emit_array candidate (start Seq.Nil) ()
| Seq.Cons (ch, useq') ->
(match state ch with
| Reject ->
let ccc = Uucp_ext.canonical_combining_class ch in
if ccc <> 0 then
pluck ccc [ch] (useq' ()) candidate state
else
emit_array candidate (start useq_node) ()
| Accept (candidate', state') ->
extend (useq' ()) candidate' state'))
and start useq_node () =
(match useq_node with
| Seq.Nil -> Seq.Nil
| Seq.Cons (ch, useq') ->
(match mapping ch with
| Reject ->
Printf.ksprintf failwith
"Invalid collation mapping, got stuck on codepoint %#x."
(Uchar.to_int ch)
| Accept (candidate', state') ->
extend (useq' ()) candidate' state'))
in
fun useq -> start (useq ())