123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272open!Core(* Invariants:
- [Append (x, y)] must have both [x] and [y] non-empty (complexity analysis
of [to_string] relies on it).
- Overall length is less than [String.max_length] (so [to_string] can work, at least in
principle). *)moduleTree=structtypet=|Baseofstring|Appendoft*tletrecunrolltaux=matchtwith|Basex->x,aux|Append(x,y)->unrollx(y::aux);;letto_char_sequencet=letf(((x,xs)asxxs),xpos):_Sequence.Step.t=ifxpos<String.lengthxthenYield(x.[xpos],(xxs,xpos+1))else(matchxswith|[]->Done|y::ys->Skip(unrollyys,0))inSequence.unfold_step~init:(unrollt[],0)~f;;leteither_is_prefix_of_othert1t2=Sequence.for_all(Sequence.zip(to_char_sequencet1)(to_char_sequencet2))~f:(fun(x,y)->Char.equalxy);;endtypet={len:int;tree:Tree.t}letof_strings={len=String.lengths;tree=Bases}letempty=of_string""letlengtht=t.lenletis_emptyt=lengtht=0moduleTo_string=struct(* [todo_right] avoids stack overflow (some usage patterns result in highly
unbalanced trees, so the naive recursive approach doesn't work). However we can
avoid that allocation by using the process stack when the depth appears small, as
inspired by [Base.List.map]. This is sufficient to make the common case do
zero minor-heap allocations.
Using unsafe blitting substantially improves performance, but depends on the
correctness of the [len] field to avoid memory corruption. To be precise, if [len] is
too small the code may (but also may not, if you're lucky) write past the bounds of
the buffer. If [len] is too large, we always write in-bounds, but will leave some of
the buffer uninitialized. In either case if we don't corrupt memory (if we do all
bets are off) then the assert at the end should fail, so we won't actually return the
bad data to the caller. *)letrecunsafe_blit_allocate_tailcall~dst~todo_right~lefttree:int=match(tree:Tree.t)with|Append(t1,t2)->unsafe_blit_allocate_tailcall~dst~todo_right:(t2::todo_right)~leftt1|Bases->letleft=letlen_s=String.lengthsinBytes.From_string.unsafe_blit~src:s~src_pos:0~dst~dst_pos:left~len:len_s;left+len_sin(matchtodo_rightwith|[]->left|tree::todo_right->unsafe_blit_allocate_tailcall~dst~todo_right~lefttree);;(* We call this function when we're recursing into a left subtree but we don't know the
size of the right subtree, so we don't know how much of the buffer we'll write into.
*)letrecunsafe_blit_fast_partial~dst~left~depthtree:int=match(tree:Tree.t)with|Append(t1,t2)->letleft=(* Only check the [depth] when we plan to increase it. *)ifdepth>1000thenunsafe_blit_allocate_tailcall~dst~todo_right:[]~leftt1elseunsafe_blit_fast_partial~dst~left~depth:(depth+1)t1inunsafe_blit_fast_partial~dst~left~deptht2|Bases->letlen_s=String.lengthsinBytes.From_string.unsafe_blit~src:s~src_pos:0~dst~dst_pos:left~len:len_s;left+len_s;;(* We call this function when we know both bounds of the data we'll write. *)letrecunsafe_blit_fast_entire_range~dst~left~righttree=match(tree:Tree.t)with|Append(t1,Bases2)->(* Optimization: preserve tailcall by blitting from the right. We can only do this
when we know [right], so we can't do it once we recurse into the left of
[Append (Append _, Append _)] *)letlen_s=String.lengths2inletright=right-len_sinBytes.From_string.unsafe_blit~src:s2~src_pos:0~dst~dst_pos:right~len:len_s;unsafe_blit_fast_entire_range~dst~left~rightt1|Append(t1,t2)->letleft=unsafe_blit_fast_partial~dst~left~depth:1t1inunsafe_blit_fast_entire_range~dst~left~rightt2|Bases->letlen_s=String.lengthsin(* This assert is not expensive since it can occur only once per [to_string] *)assert(left+len_s=right);Bytes.From_string.unsafe_blit~src:s~src_pos:0~dst~dst_pos:left~len:len_s;;letto_string{len;tree}=matchtreewith|Bases->s|Append_->letbuf=Bytes.createleninunsafe_blit_fast_entire_range~dst:buf~left:0~right:lentree;Bytes.unsafe_to_string~no_mutation_while_string_reachable:buf;;endletto_string=To_string.to_stringletto_char_sequencet=Tree.to_char_sequencet.treeincludeSexpable.Of_stringable(structtypenonrect=tletto_string=to_stringletof_string=of_stringend)(* We could loosen the [String.max_length] length restriction, since people can still read
an arbitrary-length sequence out of [to_char_sequence]. I choose not to do this because
I think [to_string] will be the more popular choice, and I'd prefer for it not to be
able to raise. If someone else chooses differently, we'll likely still want to check
against [Int.max_value]. *)let(^)ab=ifis_emptyathenbelseifis_emptybthenaelseifString.max_length-a.len<b.lenthenError.raise_s[%message"Rope.(a ^ b) would be longer than String.max_length"(lengtha:int)(lengthb:int)(String.max_length:int)]else{len=a.len+b.len;tree=Append(a.tree,b.tree)};;letconcat?(sep=empty)ts=List.reducets~f:(funxy->x^sep^y)|>Option.value~default:empty;;letconcat_array?(sep=empty)ts=Array.reducets~f:(funxy->x^sep^y)|>Option.value~default:empty;;letrecadd_to_buffer_internalbuffertodo:Tree.t->_=function|Append(s1,s2)->add_to_buffer_internalbuffer(s2::todo)s1|Bases->Buffer.add_stringbuffers;(matchtodowith|[]->()|x::xs->add_to_buffer_internalbufferxsx);;letadd_to_buffer{len=_;tree}buffer=add_to_buffer_internalbuffer[]treeletis_prefixt~prefix=prefix.len<=t.len&&Tree.either_is_prefix_of_othert.treeprefix.tree;;letequalab=a.len=b.len&&Tree.either_is_prefix_of_othera.treeb.treeletquickcheck_generator=Quickcheck.Generator.weighted_union[1.,Quickcheck.Generator.singletonempty;(100.,Quickcheck.Generator.recursive_union[Quickcheck.Generator.mapString.gen_nonempty~f:of_string]~f:(funt->[Quickcheck.Generator.map2tt~f:(^);Quickcheck.Generator.map2tString.gen_nonempty~f:(funleftright->left^of_stringright);Quickcheck.Generator.map2String.gen_nonemptyt~f:(funleftright->of_stringleft^right)]))];;moduleT_deriving_hash=structtypenonrect=tlethash_fold_tacct=String.hash_fold_tacc(to_stringt)lethasht=String.hash(to_stringt)endletquickcheck_observer=Quickcheck.Observer.of_hash(moduleT_deriving_hash)letquickcheck_shrinker=letof_treetree={len=(letrecgotodototalt=match(t:Tree.t)with|Append(t1,t2)->go(t1::todo)totalt2|Bases->lettotal=String.lengths+totalin(matchtodowith|[]->total|t::todo->gotodototalt)ingo[]0tree);tree}inQuickcheck.Shrinker.create(funt->matcht.treewith|Basestring->Sequence.map~f:of_string(Quickcheck.Shrinker.shrinkString.quickcheck_shrinkerstring)|Append(left,right)->Sequence.of_list[of_treeleft;of_treeright]);;moduleFor_testing=structmoduleTree=structtypet=Tree.t=|Baseofstring|Appendoft*t[@@derivingsexp_of]endtypenonrect=t={len:int;tree:Tree.t}[@@derivingsexp_of]letnum_bases{len=_;tree}=letrecgotodoacc=function|Tree.Append(t1,t2)->go(t2::todo)acct1|Base_->letacc=acc+1in(matchtodowith|[]->acc|t::ts->gotsacct)ingo[]0tree;;letto_string_tailcall({len;tree}ast)=matchtreewith|Bases->s|Append_->letbuf=Bytes.createleninletleft=To_string.unsafe_blit_allocate_tailcall~dst:buf~todo_right:[]~left:0t.treeinassert(left=len);Bytes.unsafe_to_string~no_mutation_while_string_reachable:buf;;end