123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113(*---------------------------------------------------------------------------
Copyright (c) 2014 The uuseg programmers. All rights reserved.
SPDX-License-Identifier: ISC
---------------------------------------------------------------------------*)letunicode_version=Uucp.unicode_version(* Segmenters *)type'asegmenter={id:'aUuseg_base.Type.Id.t;name:string;create:unit->'a;copy:'a->'a;equal:'a->'a->bool;mandatory:'a->bool;add:'a->[`UcharofUchar.t|`Await|`End]->[`Boundary|`UcharofUchar.t|`Await|`End]}typecustom=C:'asegmenter->customtypeboundary=[`Grapheme_cluster|`Word|`Sentence|`Line_break|`Customofcustom]letpp_boundaryppfb=match(b:>boundary)with|`Grapheme_cluster->Format.fprintfppf"`Grapheme_cluster"|`Word->Format.fprintfppf"`Word"|`Sentence->Format.fprintfppf"`Sentence"|`Line_break->Format.fprintfppf"`Line_break"|`Custom(Cs)->Format.fprintfppf"`Custom %s"s.name(* Built-in segmenters *)letmandatory_default_=trueletgrapheme_cluster=C{id=Uuseg_base.Type.Id.make();name="Uuseg.grapheme_cluster";create=Uuseg_grapheme_cluster.create;copy=Uuseg_grapheme_cluster.copy;equal=Uuseg_grapheme_cluster.equal;mandatory=mandatory_default;add=Uuseg_grapheme_cluster.add;}letword=C{id=Uuseg_base.Type.Id.make();name="Uuseg.word";create=Uuseg_word.create;copy=Uuseg_word.copy;equal=Uuseg_word.equal;mandatory=mandatory_default;add=Uuseg_word.add;}letsentence=C{id=Uuseg_base.Type.Id.make();name="Uuseg.sentence";create=Uuseg_sentence.create;copy=Uuseg_sentence.copy;equal=Uuseg_sentence.equal;mandatory=mandatory_default;add=Uuseg_sentence.add;}letline_break=C{id=Uuseg_base.Type.Id.make();name="Uuseg.line_break";create=Uuseg_line_break.create;copy=Uuseg_line_break.copy;equal=Uuseg_line_break.equal;mandatory=Uuseg_line_break.mandatory;add=Uuseg_line_break.add;}(* Generic segmenter inteface *)typet=Seg:boundary*'a*'asegmenter->ttyperet=Uuseg_base.retletcreateboundary=let(Cseg)=matchboundarywith|`Grapheme_cluster->grapheme_cluster|`Word->word|`Sentence->sentence|`Line_break->line_break|`Customc->cinSeg((boundary:>boundary),seg.create(),seg)letboundary(Seg(boundary,_,_))=boundaryletadd(Seg(_,s,seg))add=seg.addsaddletmandatory(Seg(_,s,seg))=seg.mandatorysletcopy(Seg(b,s,seg))=Seg(b,seg.copys,seg)letequal(Seg(b0,s0,seg0))(Seg(b1,s1,seg1))=matchb0,b1with|`Custom_,_|_,`Custom_->invalid_arg"Cannot test custom segmenters for equality"|_->matchUuseg_base.Type.Id.provably_equalseg0.idseg1.idwith|None->false|SomeUuseg_base.Type.Equal->seg0.equals0s1letpp_ret=Uuseg_base.pp_ret(* Custom segmenters *)letcustom?(mandatory=mandatory_default)~name~create~copy~add()=(* N.B. when we require > 5.1 we can replace Uuseg_base.Type by
Stdlib.Type and open up equality testing for custom *)letid=Uuseg_base.Type.Id.make()inletequal__=assertfalseinC{id;name;create;copy;equal;mandatory;add}leterr_exp_await=Uuseg_base.err_exp_awaitleterr_ended=Uuseg_base.err_ended