1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768(*---------------------------------------------------------------------------
Copyright (c) 2014 The uucp programmers. All rights reserved.
SPDX-License-Identifier: ISC
---------------------------------------------------------------------------*)includeUucp_break_basemoduleLow=structletlineu=Uucp_tmapbyte.getUucp_break_data.line_break_map(Uchar.to_intu)letline_max=line_maxletline_of_int=line_of_byteletgrapheme_clusteru=Uucp_tmapbyte.getUucp_break_data.grapheme_cluster_break_map(Uchar.to_intu)letgrapheme_cluster_max=grapheme_cluster_maxletgrapheme_cluster_of_int=grapheme_cluster_of_byteletwordu=Uucp_tmapbyte.getUucp_break_data.word_break_map(Uchar.to_intu)letword_max=word_maxletword_of_int=word_of_byteletsentenceu=Uucp_tmapbyte.getUucp_break_data.sentence_break_map(Uchar.to_intu)letsentence_max=sentence_maxletsentence_of_int=sentence_of_byteletindic_conjunct_breaku=Uucp_tmapbyte.getUucp_break_data.indic_conjunct_break_map(Uchar.to_intu)letindic_conjunct_break_max=indic_conjunct_break_maxletindic_conjunct_break_of_int=indic_conjunct_break_of_byteendletlineu=Array.unsafe_getLow.line_of_int(Low.lineu)letgrapheme_clusteru=Array.unsafe_getLow.grapheme_cluster_of_int(Low.grapheme_clusteru)letwordu=Array.unsafe_getLow.word_of_int(Low.wordu)letsentenceu=Array.unsafe_getLow.sentence_of_int(Low.sentenceu)letindic_conjunct_breaku=Array.unsafe_getLow.indic_conjunct_break_of_int(Low.indic_conjunct_breaku)leteast_asian_widthu=Uucp_rmap.getUucp_break_data.east_asian_width_map(Uchar.to_intu)lettty_width_hint=letgci=Uucp__gc.general_category(Uchar.unsafe_of_inti)inleteawi=east_asian_width(Uchar.unsafe_of_inti)infunu->matchUchar.to_intuwith(* U+0000 is actually safe to (non-)render. *)|0->0(* C0 or DELETE and C1 (general category Cc) is non-sensical. *)|uwhenu<=0x001F||0x007F<=u&&u<=0x009F->-1(* Euro-centric fast path (blocks ASCII - Modifier Letters).
Notably includes one Cf character, U+00AD (Soft hyphen). *)|uwhenu<=0x02FF->1(* Non-spacing. *)|uwhen(letc=gcuinc=`Mn||c=`Me||c=`Cf)->0(* Wide east-asian; intersects non-spacing. *)|uwhen(letw=eawuinw=`W||w=`F)->2(* or else. Notably includes Zl (U+2028) and Zp (U+2029). *)|_->1