123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050openCore_kernelopenCore_kernel.Int.Replace_polymorphic_comparelet(<|>)ar(i,j)=ifj<=ithen[||]elseArray.slicearijmoduleOrdered_sequence:sigtypeelt=int*int[@@derivingcompare](* A [t] has its second coordinates in increasing order *)typet=privateeltarray[@@derivingsexp_of]valcreate:(int*int)list->tvalis_empty:t->boolend=structtypeelt=int*int[@@derivingsexp_of]letcompare_elt=Comparable.lexicographic[(fun(_,y0)(_,y1)->Int.comparey0y1);(fun(x0,_)(x1,_)->Int.comparex0x1)];;typet=eltarray[@@derivingsexp_of]letcreatel=lett=Array.of_listlinArray.sortt~compare:compare_elt;t;;letis_empty=Array.is_emptyend(* This is an implementation of the patience sorting algorithm as explained at
http://en.wikipedia.org/wiki/Patience_sorting *)modulePatience:sigvallongest_increasing_subsequence:Ordered_sequence.t->(int*int)listend=structmodulePile=structtype'at='aStack.tletcreatex=lett=Stack.create()inStack.pushtx;t;;lettopt=Stack.topt|>Option.value_exnletput_on_toptx=Stack.pushtxendmodulePiles=structtype'at='aPile.tDeque.tletempty():'at=Deque.create~never_shrink:true()letget_ith_piletidir=letgetindexoffset=Option.bind(indext)~f:(funindex->Deque.get_optt(index+offset))inmatchdirwith|`From_left->getDeque.front_indexi|`From_right->getDeque.back_index(-i);;letnew_rightmost_piletpile=Deque.enqueue_backtpileendmoduleBackpointers=struct(* in the terminology of the Wikipedia article, this corresponds to a card together
with its backpointers *)type'atag='atand'at={value:'a;tag:'atagoption}letto_listt=letrecto_listacct=matcht.tagwith|None->t.value::acc|Somet'->to_list(t.value::acc)t'into_list[]t;;endmodulePlay_patience:sigvalplay_patience:Ordered_sequence.t->get_tag:(pile_opt:intoption->piles:Ordered_sequence.eltBackpointers.tPiles.t->Ordered_sequence.eltBackpointers.tagoption)->Ordered_sequence.eltBackpointers.tPiles.tend=structletoptimized_findi_from_leftpilesx=(* first see if any work *)letlast_pile=Piles.get_ith_pilepiles0`From_rightin(* [x_pile] is a dummy pile just used for comparisons *)letx_pile=Pile.create{Backpointers.value=x,0;tag=None}inletcompare_top_valuespile1pile2=lettoppile=fst(Pile.toppile).Backpointers.valueinInt.compare(toppile1)(toppile2)inlet%bind.Optionlast_pile=last_pileinifcompare_top_valueslast_pilex_pile<0thenNoneelse(* do binary search *)Deque.binary_searchpiles`First_strictly_greater_thanx_pile~compare:compare_top_values;;(* [play_patience ar ~get_tag] plays patience with the greedy algorithm as described
in the Wikipedia article, taking [ar] to be the deck of cards. It returns the
resulting [Piles.t]. Before putting an element of [ar] in a pile, it tags it using
[get_tag]. [get_tag] takes as its arguments the full [Piles.t] in its current
state, and also the specific [Pile.t] that the element of [ar] is being added to.
*)letplay_patiencear~get_tag=letar=(ar:Ordered_sequence.t:>Ordered_sequence.eltarray)inifArray.lengthar=0thenraise(Invalid_argument"Patience_diff.play_patience");letpiles=Piles.empty()inArray.iterar~f:(funx->letpile_opt=optimized_findi_from_leftpiles(fstx)inlettagged_x={Backpointers.value=x;tag=get_tag~pile_opt~piles}inmatchpile_optwith|None->Piles.new_rightmost_pilepiles(Pile.createtagged_x)|Somei->letpile=Deque.getpilesiinPile.put_on_toppiletagged_x);piles;;endletlongest_increasing_subsequencear=ifOrdered_sequence.is_emptyarthen[]elseletmoduleP=Play_patienceinletget_tag~pile_opt~piles=matchpile_optwith|None->Piles.get_ith_pilepiles0`From_right|>Option.map~f:Pile.top|Somei->ifi=0thenNoneelsePiles.get_ith_pilepiles(i-1)`From_left|>Option.value_exn|>Pile.top|>Option.someinletpiles=P.play_patiencear~get_taginPiles.get_ith_pilepiles0`From_right|>Option.value_exn|>Pile.top|>Backpointers.to_list;;endletcompare_int_pair=Tuple.T2.compare~cmp1:Int.compare~cmp2:Int.comparelet_longest_increasing_subsequencear=letar=(ar:Ordered_sequence.t:>(int*int)array)inletlen=Array.lengthariniflen<=1thenArray.to_listarelse(letmaxlen=ref0inletm=Array.create~len:(len+1)(-1)inletpred=Array.create~len:(len+1)(-1)infori=0tolen-1doletp=Array.binary_search~compare:Ordered_sequence.compare_eltar`First_greater_than_or_equal_toar.(i)~len:(max(!maxlen-1)0)~pos:1|>Option.value~default:0inpred.(i)<-m.(p);ifp=!maxlen||compare_int_pairar.(i)ar.(p+1)<0then(m.(p+1)<-i;ifp+1>!maxlenthenmaxlen:=p+1)done;letrecloopacp=ifp=-1thenacelseloop(ar.(p)::ac)pred.(p)inloop[]m.(!maxlen));;moduleMatching_block=structmoduleStable=structmoduleV1=structtypet={prev_start:int;next_start:int;length:int}[@@derivingsexp,bin_io]endendincludeStable.V1endmoduleRange=structmoduleStable=structmoduleV1=structtype'at=|Sameof('a*'a)array|Prevof'aarray|Nextof'aarray|Replaceof'aarray*'aarray|Unifiedof'aarray[@@derivingsexp,bin_io]endendincludeStable.V1letall_sameranges=List.for_allranges~f:(funrange->matchrangewith|Same_->true|_->false);;letprev_onlyranges=letf=function|Replace(l_range,_)->[Prevl_range]|Next_->[]|range->[range]inList.concat_mapranges~f;;letnext_onlyranges=letf=function|Replace(_,r_range)->[Nextr_range]|Prev_->[]|range->[range]inList.concat_mapranges~f;;letprev_size=function|Unifiedlines|Replace(lines,_)|Prevlines->Array.lengthlines|Samelines->Array.lengthlines|Next_->0;;letnext_size=function|Unifiedlines|Replace(_,lines)|Nextlines->Array.lengthlines|Samelines->Array.lengthlines|Prev_->0;;endmoduleHunk=structmoduleStable=structmoduleV1=structtype'at={prev_start:int;prev_size:int;next_start:int;next_size:int;ranges:'aRange.Stable.V1.tlist}[@@derivingfields,sexp,bin_io]endendincludeStable.V1let_invariantt=Invariant.invariant[%here]t[%sexp_of:_t](fun()->[%test_result:int](List.sum(moduleInt)t.ranges~f:Range.prev_size)~expect:t.prev_size~message:"prev_size";[%test_result:int](List.sum(moduleInt)t.ranges~f:Range.next_size)~expect:t.next_size~message:"next_size");;(* Does the nitty gritty of turning indexes into
line numbers and reversing the ranges, returning a nice new hunk *)letcreateprev_startprev_stopnext_startnext_stopranges={prev_start=prev_start+1;prev_size=prev_stop-prev_start;next_start=next_start+1;next_size=next_stop-next_start;ranges=List.revranges};;letall_samehunk=Range.all_samehunk.rangesletconcat_mapt~f={twithranges=List.concat_mapt.ranges~f}endmoduleHunks=structmoduleStable=structmoduleV1=structtype'at='aHunk.Stable.V1.tlist[@@derivingsexp,bin_io]endendincludeStable.V1letconcat_map_rangeshunks~f=List.maphunks~f:(Hunk.concat_map~f)letunifiedhunks=letf:'aRange.t->'aRange.tlist=function|Replace(l_range,r_range)->[Prevl_range;Nextr_range]|range->[range]inconcat_map_rangeshunks~f;;letrangeshunks=List.concat_maphunks~f:(funhunk->hunk.Hunk.ranges)endmoduletypeS=sigtypeeltvalget_matching_blocks:transform:('a->elt)->?big_enough:int->prev:'aarray->next:'aarray->Matching_block.tlistvalmatches:eltarray->eltarray->(int*int)listvalmatch_ratio:eltarray->eltarray->floatvalget_hunks:transform:('a->elt)->context:int->?big_enough:int->prev:'aarray->next:'aarray->'aHunk.tlisttype'asegment=|Sameof'aarray|Differentof'aarrayarraytype'amerged_array='asegmentlistvalmerge:eltarrayarray->eltmerged_arrayend(* Configurable parameters for [semantic_cleanup] and [unique_lcs], all chosen based
on empirical observation. *)(* This function is called on the edge case of semantic cleanup, when there's a change
that's exactly the same length as the size of the match. If the insert on the next
side is a LOT larger than the match, it should be semantically cleaned up, but
most of the time it should be left alone. *)letshould_discard_if_other_side_equal~big_enough=100/big_enough(* These are the numerator and denominator of the cutoff for aborting the patience diff
algorithm in [unique_lcs]. (This will result in us using [Plain_diff] instead.)
Lowering [switch_to_plain_diff_numerator] / [switch_to_plain_diff_denominator]
makes us switch to plain diff less often. The range of this cutoff is from 0 to 1,
where 0 means we always switch and 1 means we never switch. *)letswitch_to_plain_diff_numerator=1letswitch_to_plain_diff_denominator=10moduleMake(Elt:Hashtbl.Key)=structmoduleTable=Hashtbl.Make(Elt)typeelt=Elt.t(* This is an implementation of the patience diff algorithm by Bram Cohen as seen in
Bazaar version 1.14.1 *)moduleLine_metadata=structtypet=|Unique_in_aof{index_in_a:int}|Unique_in_a_bof{index_in_a:int;index_in_b:int}|Not_uniqueof{occurrences_in_a:int}endletunique_lcs(alpha,alo,ahi)(bravo,blo,bhi)=(* Create a hashtable which takes elements of a to their index in a iff they're
unique. If an element is not unique, it takes it to its frequency in a. *)letunique:(elt,Line_metadata.t)Table.hashtbl=Table.create~size:(Int.min(ahi-alo)(bhi-blo))()inforx's_pos_in_a=alotoahi-1doletx=alpha.(x's_pos_in_a)inmatchHashtbl.finduniquexwith|None->Hashtbl.setunique~key:x~data:(Unique_in_a{index_in_a=x's_pos_in_a})|Some(Unique_in_a_)->Hashtbl.setunique~key:x~data:(Not_unique{occurrences_in_a=2})|Some(Not_unique{occurrences_in_a=n})->Hashtbl.setunique~key:x~data:(Not_unique{occurrences_in_a=n+1})(* This case doesn't occur until the second pass through [unique] *)|Some(Unique_in_a_b_)->assertfalsedone;(* [num_pairs] is the size of the list we use for Longest Increasing Subsequence.
[intersection_size] is the number of tokens in the intersection of the two
sequences, with multiplicity, and is an upper bound on the size of the LCS. *)letnum_pairs=ref0inletintersection_size=ref0inforx's_pos_in_b=blotobhi-1doletx=bravo.(x's_pos_in_b)inHashtbl.finduniquex|>Option.iter~f:(funpos->matchposwith|Not_unique{occurrences_in_a=n}->ifn>0then(Hashtbl.setunique~key:x~data:(Not_unique{occurrences_in_a=n-1});incrintersection_size)|Unique_in_a{index_in_a=x's_pos_in_a}->incrnum_pairs;incrintersection_size;Hashtbl.setunique~key:x~data:(Unique_in_a_b{index_in_a=x's_pos_in_a;index_in_b=x's_pos_in_b})|Unique_in_a_b_->decrnum_pairs;Hashtbl.setunique~key:x~data:(Not_unique{occurrences_in_a=0}))done;(* If we're ignoring almost all of the text when we perform the patience
diff algorithm, it will often give bad results. *)if!num_pairs*switch_to_plain_diff_denominator<!intersection_size*switch_to_plain_diff_numeratorthen`Not_enough_unique_tokenselse(leta_b=letunique=Hashtbl.filter_mapunique~f:(function|Not_unique_|Unique_in_a_->None|Unique_in_a_b{index_in_a=i_a;index_in_b=i_b}->Some(i_a,i_b))inOrdered_sequence.create(Hashtbl.dataunique)in`Computed_lcs(Patience.longest_increasing_subsequencea_b));;(* [matches a b] returns a list of pairs (i,j) such that a.(i) = b.(j) and such that
the list is strictly increasing in both its first and second coordinates.
This is done by first applying unique_lcs to find matches from a to b among those
elements which are unique in both a and b, and then recursively applying [matches] to
each subinterval determined by those matches. The uniqueness requirement is waived
for blocks of matching lines at the beginning or end.
I couldn't figure out how to do this efficiently in a functional way, so
this is pretty much a straight translation of the original Python code. *)letmatchesalphabravo=letmatches_ref_length=ref0inletmatches_ref=ref[]inletadd_matchm=incrmatches_ref_length;matches_ref:=m::!matches_refinletrecrecurse_matchesalobloahibhi=(* printf "alo %d blo %d ahi %d bhi %d\n%!" alo blo ahi bhi; *)letold_length=!matches_ref_lengthinifnot(alo>=ahi||blo>=bhi)thenifElt.comparealpha.(alo)bravo.(blo)=0then(letalo=refaloinletblo=refbloinwhile!alo<ahi&&!blo<bhi&&Elt.comparealpha.(!alo)bravo.(!blo)=0doadd_match(!alo,!blo);incralo;incrblodone;recurse_matches!alo!bloahibhi)elseifElt.comparealpha.(ahi-1)bravo.(bhi-1)=0then(letnahi=ref(ahi-1)inletnbhi=ref(bhi-1)inwhile!nahi>alo&&!nbhi>blo&&Elt.comparealpha.(!nahi-1)bravo.(!nbhi-1)=0dodecrnahi;decrnbhidone;recurse_matchesaloblo!nahi!nbhi;fori=0toahi-!nahi-1doadd_match(!nahi+i,!nbhi+i)done)else(letlast_a_pos=ref(alo-1)inletlast_b_pos=ref(blo-1)inletplain_diff()=Plain_diff.iter_matches~hashable:(moduleElt)(Array.subalpha~pos:alo~len:(ahi-alo))(Array.subbravo~pos:blo~len:(bhi-blo))~f:(fun(i1,i2)->add_match(alo+i1,blo+i2))inmatchunique_lcs(alpha,alo,ahi)(bravo,blo,bhi)with|`Not_enough_unique_tokens->plain_diff()|`Computed_lcslcs->lcs|>List.iter~f:(fun(apos,bpos)->if!last_a_pos+1<>apos||!last_b_pos+1<>bposthenrecurse_matches(!last_a_pos+1)(!last_b_pos+1)aposbpos;last_a_pos:=apos;last_b_pos:=bpos;add_match(apos,bpos));if!matches_ref_length>old_length(* Did unique_lcs find anything at all? *)thenrecurse_matches(!last_a_pos+1)(!last_b_pos+1)ahibhielseplain_diff())inrecurse_matches00(Array.lengthalpha)(Array.lengthbravo);List.rev!matches_ref;;letcollapse_sequencesmatches=letcollapsed=ref[]inletstart_a=refNoneinletstart_b=refNoneinletlength=ref0inList.itermatches~f:(fun(i_a,i_b)->match!start_a,!start_bwith|Somestart_a_val,Somestart_b_valwheni_a=start_a_val+!length&&i_b=start_b_val+!length->incrlength|_->(match!start_a,!start_bwith|Somestart_a_val,Somestart_b_val->letmatching_block={Matching_block.prev_start=start_a_val;next_start=start_b_val;length=!length}incollapsed:=matching_block::!collapsed|_->());start_a:=Somei_a;start_b:=Somei_b;length:=1);(match!start_a,!start_bwith|Somestart_a_val,Somestart_b_valwhen!length<>0->letmatching_block={Matching_block.prev_start=start_a_val;next_start=start_b_val;length=!length}incollapsed:=matching_block::!collapsed|_->());List.rev!collapsed;;(* Given that there's an insert/delete of size [left_change] to the left, and
an insert/delete of size [right_change] to the right, should we keep
this block of length [block_len] in our list of matches, or discard it? *)letshould_discard_match~big_enough~left_change~right_change~block_len=(* Throw away if its effective length is too small,
relative to its surrounding inserts / deletes. *)block_len<big_enough&&((left_change>block_len&&right_change>block_len)||(left_change>=block_len+should_discard_if_other_side_equal~big_enough&&right_change=block_len)||(right_change>=block_len+should_discard_if_other_side_equal~big_enough&&left_change=block_len));;letchange_between(left_matching_block:Matching_block.t)(right_matching_block:Matching_block.t)=max(right_matching_block.prev_start-left_matching_block.prev_start)(right_matching_block.next_start-left_matching_block.next_start)-left_matching_block.length;;(* See the "Semantic Chaff" section of https://neil.fraser.name/writing/diff/ *)letbasic_semantic_cleanup~big_enoughmatching_blocks=ifbig_enough<=1thenmatching_blockselse(matchmatching_blockswith|[]->[]|first_block::other_blocks->letfinal_ans,final_pending=List.foldother_blocks~init:([],first_block)~f:(fun(ans,pending)current_block->letrecloopanspending=matchanswith|[]->ans,pending|hd::tl->ifshould_discard_match~big_enough~left_change:(change_betweenhdpending)~right_change:(change_betweenpendingcurrent_block)~block_len:pending.lengththenlooptlhdelseans,pendinginletupdated_ans,updated_pending=loopanspendinginupdated_pending::updated_ans,current_block)inList.rev(final_pending::final_ans));;(* Attempts to eliminate the "tunnel vision" problem described in the
"Semantic Chaff" section of https://neil.fraser.name/writing/diff/.
To do this, we go through each pair of consecutive matches
and pretend to combine them into one match. If that match would
be deleted by [basic_semantic_cleanup], we delete both. *)letadvanced_semantic_cleanup~big_enoughmatching_blocks=ifbig_enough<=1thenmatching_blockselse(matchmatching_blockswith|[]->[]|[first_block]->[first_block]|first_block::second_block::other_blocks->letfinal_ans,final_pendingA,final_pendingB=List.foldother_blocks~init:([],first_block,second_block)~f:(fun(ans,pendingA,pendingB)current_block->letrecloopanspendingApendingB=matchanswith|[]->ans,pendingA,pendingB|hd::tl->ifshould_discard_match~big_enough~left_change:(change_betweenhdpendingA)~right_change:(change_betweenpendingBcurrent_block)~block_len:(pendingB.length+min(pendingB.prev_start-pendingA.prev_start)(pendingB.next_start-pendingA.next_start))thenlooptlhdpendingAelseans,pendingA,pendingBinletupdated_ans,updated_pendingA,updated_pendingB=loopanspendingApendingBinupdated_pendingA::updated_ans,updated_pendingB,current_block)inList.rev(final_pendingB::final_pendingA::final_ans)(* The loop above only deleted the second element of each pair we're supposed to
delete. This call to [basic_semantic_cleanup] is guaranteed to finish the job
by deleting the remaining element of those pairs. *)|>basic_semantic_cleanup~big_enough);;(* Goal: eliminate small, semantically meaningless matches. *)letsemantic_cleanup~big_enoughmatching_blocks=basic_semantic_cleanup~big_enoughmatching_blocks|>advanced_semantic_cleanup~big_enough;;(* When we have a choice, we'd prefer one block of equality to two.
For example, instead of A <insert>B A</insert> C D E F, we prefer
<insert>A B</insert> A C D E F. There are two reasons:
(1) A is usually something like "let", and so the second version is more
semantically accurate
(2) Semantic cleanup may delete the lone A match, but it will not delete
the A C D E F match). So by moving the A match, we've also saved it. *)letcombine_equalities~prev~next~matches=matchmatcheswith|[]->[]|first_block::tl->List.foldtl~init:([],first_block)~f:(fun(ans,pending)block->letrecloopans~(pending:Matching_block.t)~(new_block:Matching_block.t)=ifpending.length=0thenans,pending,new_blockelse(letadvance_in_prev=Elt.compareprev.(pending.prev_start+pending.length-1)prev.(new_block.prev_start-1)=0inletadvance_in_next=Elt.comparenext.(pending.next_start+pending.length-1)next.(new_block.next_start-1)=0inifadvance_in_prev&&advance_in_nextthenloopans~pending:{prev_start=pending.prev_start;next_start=pending.next_start;length=pending.length-1}~new_block:{prev_start=new_block.prev_start-1;next_start=new_block.next_start-1;length=new_block.length+1}elseans,pending,new_block)inletupdated_ans,updated_pending,updated_new_block=loopans~pending~new_block:blockin(* In the original Google heuristic, we would either move all or none
of pending. But because it might start with an unmatched `Newline(0, None),
we are fine with moving all but one token of it. *)ifupdated_pending.length=0||updated_pending.length=1then(letnew_ans=ifupdated_pending.length=0thenupdated_anselseupdated_pending::updated_ansinnew_ans,updated_new_block)else(* Do nothing *)pending::ans,block)|>fun(ans,pending)->List.rev(pending::ans);;letget_matching_blocks~transform?(big_enough=1)~prev~next=letprev=Array.mapprev~f:transforminletnext=Array.mapnext~f:transforminletmatches=matchesprevnext|>collapse_sequencesinletmatches=combine_equalities~prev~next~matchesinletlast_match={Matching_block.prev_start=Array.lengthprev;next_start=Array.lengthnext;length=0}inList.appendmatches[last_match]|>semantic_cleanup~big_enough;;letget_ranges_rev~transform~big_enough~prev~next=letrecaux(matching_blocks:Matching_block.tlist)ijl:_Range.tlist=matchmatching_blockswith|current_block::remaining_blocks->letprev_index,next_index,size=current_block.prev_start,current_block.next_start,current_block.lengthin(* Throw away crossover matches *)ifprev_index<i||next_index<jthenauxremaining_blocksijlelse(letrange_opt:_Range.toption=ifi<prev_index&&j<next_indexthen(letprev_range=prev<|>(i,prev_index)inletnext_range=next<|>(j,next_index)inSome(Replace(prev_range,next_range)))elseifi<prev_indexthen(letprev_range=prev<|>(i,prev_index)inSome(Prevprev_range))elseifj<next_indexthen(letnext_range=next<|>(j,next_index)inSome(Nextnext_range))elseNoneinletl=matchrange_optwith|Somerange->range::l|None->linletprev_stop,next_stop=prev_index+size,next_index+sizeinletl=ifsize=0thenlelse(letprev_range=prev<|>(prev_index,prev_stop)inletnext_range=next<|>(next_index,next_stop)inletrange=Array.map2_exnprev_rangenext_range~f:(funxy->x,y)inSamerange::l)inauxremaining_blocksprev_stopnext_stopl)|[]->List.revlinletmatching_blocks=get_matching_blocks~transform~big_enough~prev~nextinauxmatching_blocks00[];;letget_hunks~transform~context?(big_enough=1)~prev~next=letranges=get_ranges_rev~transform~big_enough~prev~nextinleta=previnletb=nextinifcontext<0then(letsingleton_hunk=Hunk.create0(Array.lengtha)0(Array.lengthb)(List.revranges)in[singleton_hunk])else(letrecauxranges_remainingcurr_rangesaloahiblobhiacc_hunks=match(ranges_remaining:_Range.tlist)with|[]->(* Finish the last hunk *)letnew_hunk=Hunk.createaloahiblobhicurr_rangesin(* Add it to the accumulator *)letacc_hunks=new_hunk::acc_hunksin(* Finished! Return the accumulator *)List.revacc_hunks|[Samerange]->(* If the last range is a Same, we might need to crop to context. *)letstop=min(Array.lengthrange)contextinletnew_range=Range.Same(range<|>(0,stop))inletcurr_ranges=new_range::curr_rangesin(* Finish the current hunk *)letahi=ahi+stopinletbhi=bhi+stopinletnew_hunk=Hunk.createaloahiblobhicurr_rangesin(* Add it to the accumulator *)letacc_hunks=new_hunk::acc_hunksin(* Finished! Return the accumulator *)List.revacc_hunks|Samerange::rest->letsize=Array.lengthrangeinifsize>context*2then((* If this Same range is sufficiently large, split off a new hunk *)letnew_range=Range.Same(range<|>(0,context))inletcurr_ranges=new_range::curr_rangesin(* Advance both hi's by context *)letahi=ahi+contextinletbhi=bhi+contextin(* Finish the current hunk *)letnew_hunk=Hunk.createaloahiblobhicurr_rangesin(* Add it to the accumulator *)letacc_hunks=new_hunk::acc_hunksin(* Calculate ranges for the next hunk *)letalo=ahi+size-(2*context)inletahi=aloinletblo=bhi+size-(2*context)inletbhi=bloin(* Push the remainder of the Equal range back onto the remaining_ranges *)letrest=Range.Same(range<|>(size-context,size))::restinauxrest[]aloahiblobhiacc_hunks)else((* Otherwise, this range is small enough that it qualifies as context for
the both the previous and forthcoming range, so simply add it to
curr_ranges untouched *)letcurr_ranges=Range.Samerange::curr_rangesinletahi=ahi+sizeinletbhi=bhi+sizeinauxrestcurr_rangesaloahiblobhiacc_hunks)|range::rest->(* Any range that isn't an Equal is important and not just context, so keep
it in curr_ranges *)letcurr_ranges=range::curr_rangesin(* rest could be anything, so extract hunk_info from range *)letahi,bhi=matchrangewith|Same_->(* We eliminate the possibility of a Same above *)assertfalse|Unified_->(* get_ranges_rev never returns a Unified range *)assertfalse|Nextrange->letstop=bhi+Array.lengthrangeinahi,stop|Prevrange->letstop=ahi+Array.lengthrangeinstop,bhi|Replace(a_range,b_range)->letprev_stop=ahi+Array.lengtha_rangeinletnext_stop=bhi+Array.lengthb_rangeinprev_stop,next_stopinauxrestcurr_rangesaloahiblobhiacc_hunksinletranges,alo,ahi,blo,bhi=matchrangeswith(* If the first range is an Equal, shave off the front of the range, according to
context. Keep it on the ranges list so hunk construction can see where the range
begins *)|Samerange::rest->letstop=Array.lengthrangeinletstart=max0(stop-context)inletnew_range=Range.Same(range<|>(start,stop))innew_range::rest,start,start,start,start|rest->rest,0,0,0,0inauxranges[]aloahiblobhi[]);;letmatch_ratioab=(matchesab|>List.length|>(*)2|>float)/.(Array.lengtha+Array.lengthb|>float);;letcollapse_multi_sequencesmatches=letcollapsed=ref[]inletvalue_exnx=Option.value_exnxinifList.is_emptymatchesthen[]else(letstart=Array.create~len:(List.length(List.hd_exnmatches))Noneinletlength=ref0inList.itermatches~f:(funil->ifArray.for_allstart~f:Option.is_some&&List.mapiil~f:(funix->x=value_exnstart.(i)+!length)|>List.for_all~f:(funx->x)thenincrlengthelse(ifArray.for_allstart~f:Option.is_somethencollapsed:=(Array.mapstart~f:value_exn|>Array.to_list,!length)::!collapsed;List.iteriil~f:(funix->start.(i)<-Somex);length:=1));ifArray.for_allstart~f:Option.is_some&&!length<>0thencollapsed:=(Array.mapstart~f:value_exn|>Array.to_list,!length)::!collapsed;List.rev!collapsed);;type'asegment=|Sameof'aarray|Differentof'aarrayarraytype'amerged_array='asegmentlistletarray_mapi2ar1ar2~f=Array.zip_exnar1ar2|>Array.mapi~f:(funi(x,y)->fixy);;letmergear=ifArray.lengthar=0then[]elseifArray.lengthar=1then[Samear.(0)]else(letmatches's=Array.map(ar<|>(1,Array.lengthar))~f:(matchesar.(0))inletlen=Array.lengtharinlethashtbl=Int.Table.create()~size:0inArray.iterimatches's~f:(funimatches->List.itermatches~f:(fun(a,b)->matchHashtbl.findhashtblawith|None->Hashtbl.sethashtbl~key:a~data:[i,b]|Somel->Hashtbl.sethashtbl~key:a~data:((i,b)::l)));letlist=Hashtbl.to_alisthashtbl|>List.filter_map~f:(fun(a,l)->ifList.lengthl=len-1thenSome(a::(List.sortl~compare:compare_int_pair|>List.map~f:snd))elseNone)|>List.sort~compare:(List.compareInt.compare)inletmatching_blocks=collapse_multi_sequenceslistinletlast_pos=Array.create~len:(Array.lengthar)0inletmerged_array=ref[]inList.itermatching_blocks~f:(fun(l,len)->letar'=Array.of_listlinifArray.compareInt.comparelast_posar'<>0thenmerged_array:=Different(array_mapi2last_posar'~f:(funinm->ar.(i)<|>(n,m)))::!merged_array;merged_array:=Same(ar.(0)<|>(ar'.(0),ar'.(0)+len))::!merged_array;Array.iterilast_pos~f:(funi_->last_pos.(i)<-ar'.(i)+len));List.rev!merged_array);;endlet%test_module_=(modulestructmoduleP=Make(Int)let%test_unit_=letcheckab~expect=[%test_result:(int*int)list](P.matchesab)~expectincheck[||][||]~expect:[];check[|0|][|0|]~expect:[0,0];check[|0;1;1;2|][|3;1;4;5|]~expect:[1,1];;(* Needs the plain diff section *)letrecis_increasinga=function|[]->true|hd::tl->Int.compareahd<=0&&is_increasinghdtl;;letcheck_lisa=letb=Patience.longest_increasing_subsequence(Ordered_sequence.createa)inifis_increasing(-1)(List.mapb~f:fst)&&is_increasing(-1)(List.mapb~f:snd)then()elsefailwiths"invariant failure"(a,b)[%sexp_of:(int*int)list*(int*int)list];;let%test_unit_=check_lis[2,0;5,1;6,2;3,3;0,4;4,5;1,6]let%test_unit_=check_lis[0,0;2,0;5,1;6,2;3,3;0,4;4,5;1,6]let%test_unit_=check_lis[5,1;6,2;3,3;0,4;4,5;1,6]let%test_unit_=letcheckab=letmatches=P.matchesabinifis_increasing(-1)(List.mapmatches~f:fst)&&is_increasing(-1)(List.mapmatches~f:snd)then()elsefailwiths"invariant failure"(a,b,matches)[%sexp_of:intarray*intarray*(int*int)list]incheck[|0;1;2;3;4;5;6|][|2;5;6;3;0;4;1|];;end);;moduleString=Make(String)moduleStable=structmoduleMatching_block=Matching_block.StablemoduleRange=Range.StablemoduleHunk=Hunk.StablemoduleHunks=Hunks.Stableend