Source file uuseg_string.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
type 'a folder = 'a -> string -> 'a
let fold dec_uchar enc_uchar seg f acc0 s =
let flush_segment buf acc =
let segment = Buffer.contents buf in
Buffer.clear buf; if segment = "" then acc else f acc segment
in
let rec add buf acc segmenter v = match Uuseg.add segmenter v with
| `Uchar u -> enc_uchar buf u; add buf acc segmenter `Await
| `Boundary -> add buf (flush_segment buf acc) segmenter `Await
| `Await | `End -> acc
in
let rec loop buf acc s i max segmenter =
if i > max then flush_segment buf (add buf acc segmenter `End) else
let dec = dec_uchar s i in
let acc = add buf acc segmenter (`Uchar (Uchar.utf_decode_uchar dec)) in
loop buf acc s (i + Uchar.utf_decode_length dec) max segmenter
in
let buf = Buffer.create 42 in
let segmenter = Uuseg.create seg in
loop buf acc0 s 0 (String.length s - 1) segmenter
let fold_utf_8 seg f acc0 s =
fold String.get_utf_8_uchar Buffer.add_utf_8_uchar seg f acc0 s
let fold_utf_16be seg f acc0 s =
fold String.get_utf_16be_uchar Buffer.add_utf_16be_uchar seg f acc0 s
let fold_utf_16le seg f acc0 s =
fold String.get_utf_16le_uchar Buffer.add_utf_16le_uchar seg f acc0 s
let pp_utf_8 ppf s =
let flush buf =
let gc = Buffer.contents buf in
if gc = "" then () else (Format.fprintf ppf "@<1>%s" gc; Buffer.clear buf)
in
let rec add buf segmenter v = match Uuseg.add segmenter v with
| `Uchar u -> Buffer.add_utf_8_uchar buf u; add buf segmenter `Await
| `Boundary -> flush buf; add buf segmenter `Await
| `Await | `End -> ()
in
let rec loop buf s i max segmenter =
if i > max then (add buf segmenter `End; flush buf) else
let dec = String.get_utf_8_uchar s i in
add buf segmenter (`Uchar (Uchar.utf_decode_uchar dec));
loop buf s (i + Uchar.utf_decode_length dec) max segmenter
in
let buf = Buffer.create 10 in
let segmenter = Uuseg.create `Grapheme_cluster in
loop buf s 0 (String.length s - 1) segmenter
let pp_utf_8_text ~only_mandatory ppf s =
let b = Buffer.create 10 in
let buf_buf = ref None in
let buf_flush () =
let gc = Buffer.contents b in
if gc = "" then () else (Format.fprintf ppf "@<1>%s" gc; Buffer.clear b)
in
let buf_add u = match !buf_buf with
| None -> buf_buf := Some u
| Some last ->
match Uchar.to_int last with
| 0x000D when Uchar.to_int u = 0x000A -> buf_buf := Some u
| _ -> Buffer.add_utf_8_uchar b last; buf_buf := Some u
in
let buf_cut mandatory =
let bbuf = !buf_buf in
buf_buf := None;
match bbuf with
| None -> buf_flush (); Format.pp_print_cut ppf ()
| Some u when mandatory && Uucp.White.is_white_space u ->
buf_flush (); Format.pp_force_newline ppf ()
| Some u when mandatory ->
Buffer.add_utf_8_uchar b u; buf_flush (); Format.pp_force_newline ppf ()
| Some u when Uucp.White.is_white_space u ->
buf_flush (); Format.pp_print_break ppf 1 0;
| Some u ->
Buffer.add_utf_8_uchar b u; buf_flush (); Format.pp_print_cut ppf ()
in
let gseg = Uuseg.create `Grapheme_cluster in
let lseg = Uuseg.create `Line_break in
let rec line_add a = match Uuseg.add lseg a with
| `Uchar u -> buf_add u; line_add `Await
| `Boundary ->
let m = Uuseg.mandatory lseg in
if (only_mandatory && m) || (not only_mandatory) then buf_cut m;
line_add `Await
| `Await | `End -> ()
in
let rec add a = match Uuseg.add gseg a with
| `Uchar _ as a -> line_add a; add `Await
| `Boundary -> buf_flush (); add `Await
| `Await -> ()
| `End -> line_add `End; ()
in
let rec loop s i max =
if i > max then add `End else
let dec = String.get_utf_8_uchar s i in
add (`Uchar (Uchar.utf_decode_uchar dec));
loop s (i + Uchar.utf_decode_length dec) max
in
loop s 0 (String.length s - 1)
let pp_utf_8_lines = pp_utf_8_text ~only_mandatory:true
let pp_utf_8_text = pp_utf_8_text ~only_mandatory:false