123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724open!Coreopen!ImportincludePatdiff_core_intfincludestructopenConfigurationletdefault_context=default_contextletdefault_line_big_enough=default_line_big_enoughletdefault_word_big_enough=default_word_big_enoughend(* Strip whitespace from a string by stripping and replacing with spaces *)letws_rex=Re.compileRe.(rep1space)letws_rex_anchored=Re.compileRe.(seq[bol;repspace;eol])letws_sub=" "letremove_wss=String.strip(Re.replace_stringws_rexs~by:ws_sub)letis_ws=Re.execpws_rex_anchored(* This regular expression describes the delimiters on which to split the string *)letwords_rex=letopenReinletdelim=set{|"{}[]#,.;()_|}inletpunct=rep1(set{|=`+-/!@$%^&*:|<>|})inletspace=rep1spacein(* We don't want to split up ANSI color sequences, so let's make sure they get through
intact. *)letansi_sgr_sequence=letesc=char'\027'inseq[esc;char'[';rep(alt[char';';digit]);char'm']incompile(alt[delim;punct;space;ansi_sgr_sequence]);;(* Split a string into a list of string options delimited by words_rex
(delimiters included) *)letsplits~keep_ws=lets=ifkeep_wsthenselseString.rstripsinifString.is_emptys&&keep_wsthen[""]elseRe.split_fullwords_rexs|>List.filter_map~f:(funtoken->letstring=matchtokenwith|`Delimd->Re.Group.getd0|`Textt->tinifString.is_emptystringthenNoneelseSomestring);;(* This function ensures that the tokens passed to Patience diff do not include
whitespace. Whitespace is appended to words, and then removed by [~transform] later
on. The point is to make the semantic cleanup go well -- we don't want whitespace
matches to "count" as part of the length of a match. *)letwhitespace_ignorant_splits=ifString.is_emptysthen[]else(letistexts=not(Re.execpws_rexs)insplits~keep_ws:false|>List.group~break:(funsplit_result1_->istextsplit_result1)|>List.map~f:String.concat);;includestructlet%expect_test_=print_s([%sexp_of:stringlist](split~keep_ws:true""));[%expect{| ("") |}];;endmoduleMake(Output_impls:Output_impls)=structmoduleOutput_ops=structmoduleRule=structletapplytext~rule~output~refined=let(moduleO)=Output_impls.implementationoutputinO.Rule.applytext~rule~refined;;endmoduleRules=structletto_string(rules:Format.Rules.t)output:stringPatience_diff.Range.t->stringPatience_diff.Range.t=letapplytext~rule~refined=Rule.applytext~rule~output~refinedinfunction|Samear->letformatted_ar=Array.mapar~f:(fun(x,y)->letapp=apply~rule:rules.line_same~refined:falseinappx,appy)inSameformatted_ar|Nextar->Next(Array.mapar~f:(apply~refined:false~rule:rules.line_next))|Prevar->Prev(Array.mapar~f:(apply~refined:false~rule:rules.line_prev))|Unifiedar->Unified(Array.mapar~f:(apply~refined:true~rule:rules.line_unified))|Replace(ar1,ar2)->letar1=Array.mapar1~f:(apply~refined:true~rule:rules.line_prev)inletar2=Array.mapar2~f:(apply~refined:true~rule:rules.line_next)inReplace(ar1,ar2);;letmap_ranges(hunks:_Patience_diff.Hunk.tlist)~f=List.maphunks~f:(funhunk->{hunkwithranges=List.maphunk.ranges~f});;letapplyhunks~rules~output=map_rangeshunks~f:(to_stringrulesoutput)endletprint~print_global_header~file_names~rules~output~print~location_stylehunks=letformatted_hunks=Rules.apply~rules~outputhunksinlet(moduleO)=Output_impls.implementationoutputinO.print~print_global_header~file_names~rules~print~location_styleformatted_hunks;;endletdiff~context~line_big_enough~keep_ws~prev~next=lettransform=ifkeep_wsthenFn.idelseremove_wsinPatience_diff.String.get_hunks~transform~context~big_enough:line_big_enough~prev~next();;typeword_or_newline=[`Newlineofint*stringoption(* (number of newlines, subsequent_whitespace) *)|`Wordofstring][@@derivingsexp_of](* Splits an array of lines into an array of pieces (`Newlines and R.Words) *)letexplodear~keep_ws=letwords=Array.to_listarinletwords=ifkeep_wsthenList.mapwords~f:(split~keep_ws)elseList.mapwords~f:whitespace_ignorant_splitinletto_wordsl=List.mapl~f:(funs->`Words)in(*
[`Newline of (int * string option)]
can be thought of as:
[`Newline of
([`How_many_consecutive_newlines of int]
* [`Some_subsequent_whitespace of string
|`Empty_string
])]
This representation is used to try to collapse consecutive whitespace as tightly as
possible, but it's not a great abstraction, so some consecutive whitespace does not
get collapsed.
*)letwords=List.concat_mapwords~f:(funx->matchxwith|hd::tl->ifkeep_ws&&(not(String.is_emptyhd))&&is_wshdthen`Newline(1,Somehd)::to_wordstlelse`Newline(1,None)::`Wordhd::to_wordstl|[]->[`Newline(1,None)])inletwords=List.fold_rightwords~init:[]~f:(funxacc->(* look back at what we've accumulated so far to see if there's any whitespace that
can be collapsed. *)matchaccwith|`Words::tl->x::`Words::tl|`Newline(i,None)::tl->(matchxwith|`Words->`Words::`Newline(i,None)::tl|`Newline(j,opt)->(* collapse the whitespace from each [`Newline] by summing
how_many_consecutive_newlines from each (i+j) *)`Newline(i+j,opt)::tl)|`Newline(i,Somes1)::tl->(matchxwith|`Words2->`Words2::`Newline(i,Somes1)::tl|`Newline(j,opt)->(* collapse the whitespace from each [`Newline] by concatenating any
subsequent_whitespace (opt ^ s1) and summing how_many_consecutive_newlines
(i+j) from each. *)lets1=Option.valueopt~default:""^s1in`Newline(i+j,Somes1)::tl)|[]->[x])in(* Throw away the very first `Newline *)letwords=matchwordswith|`Newline(i,opt)::tl->`Newline(i-1,opt)::tl|`Word_::_|[]->raise_s[%message"Expected words to start with a `Newline."(words:word_or_newlinelist)]in(* Append a newline to the end, if this array has any words *)letwords=matchwordswith|[]->[]|[`Newline(0,None)]->[]|list->List.appendlist[`Newline(1,None)]inArray.of_listwords;;(* Takes hunks of `Words and `Newlines and collapses them back into lines,
* formatting appropriately. *)letcollapseranges~rule_same~rule_prev~rule_next~kind~output=(* flag indicates what kind of range is currently being collapsed *)letflag=ref`Samein(* segment is the current series of words being processed. *)letsegment=ref[]in(* line is the current series of formatted segments *)letline=ref[]in(* lines is the return array *)letlines=ref[]inletapply~rule=function|""->""|s->Output_ops.Rule.applys~rule~output~refined:falsein(*
* Finish the current segment by applying the appropriate format
* and popping it on to the end of the current line
*)letfinish_segment()=letrule=match!flagwith|`Same->rule_same|`Prev->rule_prev|`Next->rule_nextinletformatted_segment=List.rev!segment|>String.concat|>apply~ruleinline:=formatted_segment::!line;segment:=[]in(*
* Finish the current segment, apply the reset rule to the line,
* and pop the finished line onto the return array
*)letnewlinei=for_=1toidofinish_segment();lines:=String.concat(List.rev!line)::!lines;line:=[]doneinletfrange=(* Extract the array, set flag appropriately, *)letar=match(range:_Patience_diff.Range.t)with|Samear->flag:=`Same;(* R.Same ar is an array of tuples. The first tuple is an
* element from the old file, the second tuple, an element
* from the new file. Depending on what kind of collapse
* this is, we want only one or the other. *)letf=matchkindwith|`Prev_only->fst|`Next_only->snd|`Unified->sndinArray.mapar~f|Prevar->flag:=`Prev;ar|Nextar->flag:=`Next;ar|Replace_|Unified_->(* When calling collapse, we always call
* Patience_diff.unified first, which removes all R.Replaces
* and R.Unifieds. *)assertfalsein(* Iterate through the elements of the range, appending each `Word to
* segment and calling newline on each `Newline
*)Array.iterar~f:(function|`Newline(i,None)->newlinei|`Newline(i,Somes)->newlinei;segment:=s::!segment|`Words->segment:=s::!segment);finish_segment()inList.iterranges~f;(match!linewith|[]|[""]->()|line->letline=String.concat(List.revline)inifis_wslinethen(* This branch was unreachable in our regression tests, but I can't prove it's
unreachable in all cases. Rather than raise in production, let's drop this
whitespace. *)()elseraise_s[%message"Invariant violated: [collapse] got a line not terminated with a newline"(line:string)]);Array.of_list(List.rev!lines);;(* Get the hunks from two arrays of pieces (`Words and `Newlines) *)letdiff_pieces~prev_pieces~next_pieces~keep_ws~word_big_enough=letcontext=-1inlettransform=ifkeep_wsthenfunction|`Words->s|`Newline(lines,trailing_whitespace)->Option.foldtrailing_whitespace~init:(String.makelines'\n')~f:String.(^)elsefunction|`Words->remove_wss|`Newline(0,_)->""|`Newline(_,_)->" "inPatience_diff.String.get_hunks~transform~context~big_enough:word_big_enough~prev:prev_pieces~next:next_pieces();;letranges_are_just_whitespace(ranges:_Patience_diff.Range.tlist)=List.for_allranges~f:(function|Prevpiece_array|Nextpiece_array->Array.for_allpiece_array~f:(function|`Words->String.is_empty(remove_wss)|`Newline_->true)|_->true);;(* Interleaves the display of minus lines and plus lines so that equal words are presented
close together. There is some heuristic for when we think doing this improves the
diff. *)letsplit_for_readabilityrangelist=letans:_Patience_diff.Range.tlistlistref=ref[]inletpending_ranges:_Patience_diff.Range.tlistref=ref[]inletappend_rangerange=pending_ranges:=range::!pending_rangesinList.iterrangelist~f:(funrange->letsplit_was_executed=match(range:_Patience_diff.Range.t)with|Next_|Prev_|Replace_|Unified_->false|Sameseq->letfirst_newline=Array.find_mapiseq~f:(funi->function|`Word_,_|_,`Word_|`Newline(0,_),_|_,`Newline(0,_)->None|`Newlinefirst_nlA,`Newlinefirst_nlB->Some(i,first_nlA,first_nlB))in(matchfirst_newlinewith|None->false|Some(i,first_nlA,first_nlB)->ifArray.lengthseq-i<=Configuration.too_short_to_splitthenfalseelse(append_range(Same(Array.subseq~pos:0~len:i));(* A non-zero `Newline is required for [collapse] to work properly. *)append_range(Same[|`Newline(1,None),`Newline(1,None)|]);ans:=List.rev!pending_ranges::!ans;pending_ranges:=[];letsuf=Array.subseq~pos:i~len:(Array.lengthseq-i)inletdecr_first(x,y)=x-1,yinsuf.(0)<-`Newline(decr_firstfirst_nlA),`Newline(decr_firstfirst_nlB);append_range(Samesuf);true))inifnotsplit_was_executedthenappend_rangerange);List.rev(match!pending_rangeswith|[]->!ans|_::_asranges->List.revranges::!ans);;(* Refines the diff, splitting the lines into smaller arrays and diffing them, then
collapsing them back into their initial lines after applying a format. *)letrefine~(rules:Format.Rules.t)~produce_unified_lines~output~keep_ws~split_long_lines~interleave~word_big_enough(hunks:stringPatience_diff.Hunk.tlist)=letrule_prev=rules.word_previnletrule_next=rules.word_nextinletcollapse=collapse~rule_prev~rule_next~outputinlet()=matchoutputwith|Ansi|Html->()|Ascii->ifproduce_unified_linesthenfailwith"produce_unified_lines is not supported in Ascii mode"inletconsole_width=lazy(matchOutput_impls.console_width()with|Error_->80|Okwidth->width)inletrefine_range:_Patience_diff.Range.t->_Patience_diff.Range.tlist=function|Nextawhen(notkeep_ws)&&Array.for_alla~f:is_ws->[Same(Array.zip_exnaa)]|Prevawhen(notkeep_ws)&&Array.for_alla~f:is_ws->[]|(Next_|Prev_|Same_|Unified_)asrange->[range]|Replace(prev_ar,next_ar)->(* Explode the arrays *)letprev_pieces=explodeprev_ar~keep_wsinletnext_pieces=explodenext_ar~keep_wsin(* Diff the pieces *)letsub_diff=diff_pieces~prev_pieces~next_pieces~keep_ws~word_big_enoughin(* Smash the hunks' ranges all together *)letsub_diff=Patience_diff.Hunks.rangessub_diffin(* Break it up where lines are too long *)letsub_diff_pieces=ifnotsplit_long_linesthen[sub_diff]else(letmax_len=Int.max20(forceconsole_width-2)in(* Accumulates the total length of the line so far, summing lengths
of word tokens but resetting when newlines are hit *)letget_new_len_so_far~len_so_fartokens_arr=Array.fold~init:len_so_fartokens_arr~f:(funlen_so_fartoken->matchtokenwith|`Newline_->0|`Wordword->len_so_far+String.lengthword)in(* Iteratively split long lines up.
Produces a list of "range lists", where each range list should be displayed
all together in one unbroken piece before being followed by the next range
list, etc. *)letrecsplit_lineslen_so_farsub_diffrangeaccumrangelistaccum=matchsub_diffwith|[]->(matchrangeaccumwith|[]->List.revrangelistaccum|_->List.rev(List.revrangeaccum::rangelistaccum))(* More tokens ranges left to process *)|range::rest->(match(range:_Patience_diff.Range.t)with|Sametokenpairs_arr->letrange_of_tokenstokenpairs=Patience_diff.Range.Same(Array.of_listtokenpairs)in(* Keep taking tokens until we exceed max_len or hit a newline.
Returns (new len_so_far, new range, remaining tokens)*)letrectake_until_maxlen_so_fartokenpairsaccum=matchtokenpairswith|[]->len_so_far,range_of_tokens(List.revaccum),[]|((token,_)astokenpair)::rest->(matchtokenwith|`Newline_->0,range_of_tokens(List.rev(tokenpair::accum)),rest|`Wordword->letwordlen=String.lengthwordinifwordlen+len_so_far>max_len&&len_so_far>0then0,range_of_tokens(List.revaccum),tokenpairselsetake_until_max(wordlen+len_so_far)rest(tokenpair::accum))inletmake_newline()=Patience_diff.Range.Same[|`Newline(1,None),`Newline(1,None)|]in(* Keep taking ranges until all tokens exhausted.
Returns (new len_so_far, range list) *)letrectake_ranges_until_exhaustedlen_so_fartokenpairsaccum=matchtokenpairswith|[]->len_so_far,List.revaccum|_->letnew_len_so_far,new_range,new_tokenpairs=take_until_maxlen_so_fartokenpairs[]inletnew_accum=`Rangenew_range::accumin(* If there are token pairs left, that means we hit the max_len,
so add a break at this point *)letnew_accum=matchnew_tokenpairswith|_::_->`Break::`Range(make_newline())::new_accum|[]->new_accumintake_ranges_until_exhaustednew_len_so_farnew_tokenpairsnew_accuminletnew_len_so_far,new_ranges=take_ranges_until_exhaustedlen_so_far(Array.to_listtokenpairs_arr)[]in(* Update rangeaccum and rangelistaccum according to the `Ranges and
`Breaks. `Ranges accumulate on to the existing range list to be
displayed contiguously, `Breaks start a new range list. *)letrangeaccum,rangelistaccum=List.foldnew_ranges~init:(rangeaccum,rangelistaccum)~f:(fun(rangeaccum,rangelistaccum)r->matchrwith|`Break->[],List.revrangeaccum::rangelistaccum|`Ranger->r::rangeaccum,rangelistaccum)insplit_linesnew_len_so_farrestrangeaccumrangelistaccum|Nexttokens_arr|Prevtokens_arr->letnew_len_so_far=get_new_len_so_far~len_so_fartokens_arrinsplit_linesnew_len_so_farrest(range::rangeaccum)rangelistaccum|Replace(prev_arr,next_arr)->letnew_len_so_far=Int.max(get_new_len_so_far~len_so_farprev_arr)(get_new_len_so_far~len_so_farnext_arr)insplit_linesnew_len_so_farrest(range::rangeaccum)rangelistaccum|Unified_->assertfalse)insplit_lines0sub_diff[][])inletsub_diff_pieces=ifinterleavethenList.concat_mapsub_diff_pieces~f:split_for_readabilityelsesub_diff_piecesinList.concat_mapsub_diff_pieces~f:(funsub_diff->letsub_prev=Patience_diff.Range.prev_onlysub_diffinletsub_next=Patience_diff.Range.next_onlysub_diffinletall_sameranges=List.for_allranges~f:(funrange->match(range:_Patience_diff.Range.t)with|Same_->true|Preva|Nexta->ifkeep_wsthenfalseelseArray.for_alla~f:(function|`Newline_->true|`Word_->false)|_->false)inletprev_all_same=all_samesub_previnletnext_all_same=all_samesub_nextinletproduce_unified_lines=produce_unified_lines&&(((not(ranges_are_just_whitespacesub_prev))&&next_all_same)||((not(ranges_are_just_whitespacesub_next))&&prev_all_same))in(* Collapse the pieces back into lines *)letprev_next_pairs=matchprev_all_same,next_all_samewith|true,true->letkind=`Next_onlyinletrule_same=rules.word_same_unifiedinletnext_ar=collapsesub_next~rule_same~kindin[next_ar,next_ar]|false,true->letkind=`Prev_onlyinletrule_same=ifproduce_unified_linesthenrules.word_same_unifiedelserules.word_same_previnletprev_ar=collapsesub_prev~rule_same~kindinletkind=`Next_onlyinletrule_same=rules.word_same_nextinletnext_ar=collapsesub_next~rule_same~kindin[prev_ar,next_ar]|true,false->letkind=`Next_onlyinletrule_same=ifproduce_unified_linesthenrules.word_same_unifiedelserules.word_same_nextinletnext_ar=collapsesub_next~rule_same~kindinletkind=`Prev_onlyinletrule_same=rules.word_same_previnletprev_ar=collapsesub_prev~rule_same~kindin[prev_ar,next_ar]|false,false->letkind=`Prev_onlyinletrule_same=rules.word_same_previnletprev_ar=collapsesub_prev~rule_same~kindinletkind=`Next_onlyinletrule_same=rules.word_same_nextinletnext_ar=collapsesub_next~rule_same~kindin[prev_ar,next_ar]inList.mapprev_next_pairs~f:(fun(prev_ar,next_ar)->letrange:_Patience_diff.Range.t=matchprev_all_same,next_all_samewith|true,true->Same(Array.mapnext_ar~f:(funx->x,x))|_->(matchprev_ar,next_arwith(* Ugly hack that takes care of empty files *)|[|""|],next_ar->Replace([||],next_ar)|prev_ar,[|""|]->Replace(prev_ar,[||])|prev_ar,next_ar->(matchproduce_unified_lines,prev_all_same,next_all_samewith|true,true,false->Unifiednext_ar|true,false,true->Unifiedprev_ar|false,_,_|_,false,false->Replace(prev_ar,next_ar)|_->assertfalse))inrange))inhunks|>List.map~f:(funhunk->{hunkwithranges=List.concat_maphunk.ranges~f:refine_range})|>List.filter~f:(not<<Patience_diff.Hunk.all_same);;letprint~file_names~rules~output~location_stylehunks=Output_ops.printhunks~rules~output~file_names~print:(Printf.printf"%s\n")~location_style~print_global_header:true;;letoutput_to_string?(print_global_header=false)~file_names~rules~output~location_stylehunks=letbuf=Queue.create()inOutput_ops.printhunks~file_names~location_style~output~print_global_header~print:(Queue.enqueuebuf)~rules;String.concat(Queue.to_listbuf)~sep:"\n";;letiter_ansi~rules~f_hunk_break~f_linehunks=lethunks=Output_ops.Rules.applyhunks~rules~output:AnsiinHunks.iter~f_hunk_break~f_linehunks;;letpatdiff?(context=Configuration.default_context)?(keep_ws=false)?(rules=Format.Rules.default)?(output=Output.Ansi)?(produce_unified_lines=true)?(split_long_lines=true)?print_global_header?(location_style=Format.Location_style.Diff)?(interleave=true)?(line_big_enough=Configuration.default_line_big_enough)?(word_big_enough=Configuration.default_word_big_enough)~(prev:Diff_input.t)~(next:Diff_input.t)()=letkeep_ws=keep_ws||Should_keep_whitespace.for_diff~prev~nextinlethunks=diff~context~keep_ws~line_big_enough~prev:(List.to_array(String.split_linesprev.text))~next:(List.to_array(String.split_linesnext.text))|>refine~rules~produce_unified_lines~output~keep_ws~split_long_lines~interleave~word_big_enoughinoutput_to_string?print_global_header~file_names:(Fakeprev.name,Fakenext.name)~rules~output~location_stylehunks;;endmoduleWithout_unix=Make(structletconsole_width()=Ok80letimplementation:Output.t->(moduleOutput.S)=function|Ansi->(moduleAnsi_output)|Ascii->(moduleAscii_output)|Html->(moduleHtml_output.Without_mtime);;end)modulePrivate=structmoduleMake=Makeend