123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146open!Coretypet=string[@@derivingcompare,sexp_of]letfold_with_start_post~init~f=letrequire_ucharpos=function|`Malformeds->raise_s[%message"Not UTF-8"~_:(s:string)(pos:int)]|`Ucharuchar->ucharinUutf.String.fold_utf_8(funinitposx->finitpos(require_ucharposx))initt;;letinvariantt=Invariant.invariant[%here]t[%sexp_of:t](fun()->fold_with_start_post~init:()~f:(fun()(_:int)(_:Uchar.t)->()));;includeContainer.Make0(structtypenonrect=tmoduleElt=Ucharletfoldt~init~f=fold_with_start_post~init~f:(funinit_posuchar->finituchar);;letiter=`Define_using_foldletlength=`Define_using_foldend)letconcat?septs=String.concatts?sepletis_empty=String.is_emptyletof_stringt=invariantt;t;;letsplitstr~on=matchonwith|'\000'..'\127'->String.splitstr~on|'\128'..'\255'->raise_s[%sexp"Utf8_text.split: can't split on a non-ascii char",(on:char)];;letto_string=String.to_stringletassumed_width_per_uchar=1letwidtht=sum(moduleInt)t~f:(constassumed_width_per_uchar)letbytes=String.lengthletspace_uchar=Uchar.of_scalar_exn(Char.to_int' ')letchunks_oft~width~prefer_split_on_spaces=matchtwith|""->[""]|_->letuchar_ends_before_pos=Uutf.String.fold_utf_8(funaccstart_pos_->start_pos::acc)[]t|>List.cons(String.lengtht)|>List.rev|>List.tl_exnin(* We identify uchars by the byte positions after their last bytes *)letchunks=matchprefer_split_on_spaceswith|false->assert(assumed_width_per_uchar=1);uchar_ends_before_pos|>List.chunks_of~length:width|true->letget_num_uchars_in_chunk=letends_of_spaces=fold_with_start_post~init:[]~f:(funaccstart_posuchar->matchUchar.equalspace_ucharucharwith|true->(start_pos+assumed_width_per_uchar)::acc|false->acc)|>Set.of_list(moduleInt)infunuchars_left->List.takeuchars_leftwidth|>List.rev|>List.findi~f:(fun_pos->Set.memends_of_spacespos)|>Option.map~f:(fun(uchars_after_last_space,_)->width-uchars_after_last_space)|>Option.value~default:widthinletrecchunks_split_on_spaceschunksnum_uchars_left=function|[]->List.revchunks|_::_asuchars_left->(matchnum_uchars_left*assumed_width_per_uchar<=widthwith|true->chunks_split_on_spaces(uchars_left::chunks)0[]|false->letnum_uchars_in_chunk=get_num_uchars_in_chunkuchars_leftinletchunk,rest=List.split_nuchars_leftnum_uchars_in_chunkinletnum_uchars_left=num_uchars_left-num_uchars_in_chunkinchunks_split_on_spaces(chunk::chunks)num_uchars_leftrest)inletnum_uchars=List.lengthuchar_ends_before_posinchunks_split_on_spaces[]num_ucharsuchar_ends_before_posinletchunk_ends_before_pos=chunks|>List.map~f:List.last_exn|>Sequence.of_listinchunk_ends_before_pos|>Sequence.unfold_with~init:0~f:(funstart_atend_before->Yield(String.subt~pos:start_at~len:(end_before-start_at),end_before))|>Sequence.to_list;;letof_uchar_listuchars=letbuf=Buffer.create8(* arbitrary small number *)inList.iteruchars~f:(Uutf.Buffer.add_utf_8buf);of_string(Buffer.contentsbuf);;includeQuickcheckable.Of_quickcheckable(structmoduleUchar=structtypet=Uchar.tincludeQuickcheckable.Of_quickcheckable_filtered(Int)(structtypenonrect=tletof_quickcheckable=Uchar.of_scalarletto_quickcheckable=Uchar.to_scalarend)endtypet=Uchar.tlist[@@derivingquickcheck]end)(structtypenonrect=tletof_quickcheckable=of_uchar_listletto_quickcheckable=to_listend)letiterit~f=ignore(foldt~init:0~f:(funiuchar->fiuchar;i+1):int);;