123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430external(|>):'a->('a->'b)->'b="%revapply";;external(@@):('a->'b)->'a->'b="%apply"(* Separators. *)moduleBreak=struct(* A break can be a whitespace or a newline if the text has to be splited. *)typet=|Space|Newlineend(* The internal representation of a document and the engine. *)moduleAtom=struct(* An atom is the low-level tree describing a document. *)typet=|Stringofstring*int*int(* A non-breaking string. It should be newlines free. Represented as a
sub-string of an other string, with an offset and a length. *)|BreakofBreak.t(* A separator. *)|GroupOneofbool*tlist(* A list of atoms. Only the necessary number of breaks are splited.
The boolean is true if nesting is activated. *)|GroupAllofbool*tlist(* A list of atoms. No or all the breaks are splited.
The boolean is true if nesting is activated. *)|Indentofint*t(* Indents by [n] tabulations the atom. Can be negative. *)(* If we overflow a line. *)exceptionOverflow(* Print "at best" an atom [a] for a line width [width] and tabulation width [tab].
[i] is the indentation level, [p] the current column position (in number
of spaces), [last_break] the last break printed if any (so we can collapse
spaces). It returns the same atom where spaces have been evaluated to
newlines, the new current column position and the last break printed if any.
Must succeed (no uncaught [Overflow] exception). *)letreceval(width:int)(tab:int)(i:int)(a:t)(p:int)(last_break:Break.toption):t*int*Break.toption=matchawith|String(_,_,l)->(a,(iflast_break=SomeBreak.Newlinethenp+i+lelsep+l),None)|BreakBreak.Space->iflast_break=Nonethen(a,p+1,SomeBreak.Space)else(a,p,last_break)|BreakBreak.Newline->(a,0,SomeBreak.Newline)|GroupOne(can_nest,_as)->let(_as,p,last_break)=try_eval_list_onewidthtabi_asplast_breakfalsecan_nestfalsein(GroupOne(can_nest,_as),p,last_break)|GroupAll(can_nest,_as)->let(_as,p,last_break)=trylet(p,last_break)=try_eval_list_flatwidthtab(i+tab)_asplast_breakin(_as,p,last_break)with|Overflow->eval_list_allwidthitab_asplast_breakcan_nestin(GroupAll(can_nest,_as),p,last_break)|Indent(n,a)->let(a,p,last_break)=evalwidthtab(i+n*tab)aplast_breakin(Indent(n,a),p,last_break)(* Try to print an atom without evaluating the spaces. May raise [Overflow] if we
overflow the line [width]. *)andtry_eval_flat(width:int)(tab:int)(i:int)(a:t)(p:int)(last_break:Break.toption):int*Break.toption=lettry_return(p,last_break)=ifp>widththenraiseOverflowelse(p,last_break)inmatchawith|String(_,_,l)->try_return((iflast_break=SomeBreak.Newlinethenp+i+lelsep+l),None)|BreakBreak.Space->iflast_break=Nonethentry_return(p+1,SomeBreak.Space)elsetry_return(p,last_break)|BreakBreak.Newline->raiseOverflow|GroupOne(can_nest,_as)->let(p,last_break)=try_eval_list_flatwidthtab(i+tab)_asplast_breakin(p,last_break)|GroupAll(can_nest,_as)->let(p,last_break)=try_eval_list_flatwidthtab(i+tab)_asplast_breakin(p,last_break)|Indent(_,a)->try_eval_flatwidthtabiaplast_break(* Like [try_eval_flat] but for a list of atoms. *)andtry_eval_list_flat(width:int)(tab:int)(i:int)(_as:tlist)(p:int)(last_break:Break.toption):int*Break.toption=match_aswith|[]->(p,last_break)|a::_as->let(p,last_break)=try_eval_flatwidthtabiaplast_breakinlet(p,last_break)=try_eval_list_flatwidthtabi_asplast_breakin(p,last_break)(* Eval "at best" a list of atoms using the "split only when necessary" policy. The [can_fail]
flag controls if we can raise an [Overflow], the [can_nest] if we can nest when we break,
[in_nest] if we have already nested. *)andtry_eval_list_one(width:int)(tab:int)(i:int)(_as:tlist)(p:int)(last_break:Break.toption)(can_fail:bool)(can_nest:bool)(in_nest:bool):tlist*int*Break.toption=match_aswith|[]->(_as,p,last_break)|BreakBreak.Space::_as->iflast_break=Nonethen(* If it is not possible in flat mode, switch back to "at best". *)(trylet(_as,p,last_break)=try_eval_list_onewidthtabi_as(p+1)(SomeBreak.Space)truecan_nestin_nestin(BreakBreak.Space::_as,p,last_break)with|Overflow->letdo_indent=can_nest&¬in_nestinlet(_as,p,last_break)=try_eval_list_onewidthtab(ifdo_indenttheni+tabelsei)_as0(SomeBreak.Newline)falsecan_nestcan_nestinifdo_indentthen([BreakBreak.Newline;Indent(1,GroupOne(false,_as))],p,last_break)else(BreakBreak.Newline::_as,p,last_break))elsetry_eval_list_onewidthtabi_asplast_breakcan_failcan_nestin_nest|BreakBreak.Newline::_as->let(_as,p,last_break)=(* If there is an explicit newline we always undo the nesting. *)ifin_nestthentry_eval_list_onewidthtab(i-tab)_as0(SomeBreak.Newline)falsecan_nestfalseelsetry_eval_list_onewidthtabi_as0(SomeBreak.Newline)falsecan_nestfalseinifin_nestthen([BreakBreak.Newline;Indent(-1,GroupOne(false,_as))],p,last_break)else(BreakBreak.Newline::_as,p,last_break)|a::_as->let(a,p,last_break)=(* If [Overflow] is possible we try in flat mode, else "at best". *)ifcan_failthenlet(p,last_break)=try_eval_flatwidthtabiaplast_breakin(a,p,last_break)elseevalwidthtabiaplast_breakinlet(_as,p,last_break)=try_eval_list_onewidthtabi_asplast_breakcan_failcan_nestin_nestin(a::_as,p,last_break)(* Eval "at best" a list of atoms splitting all the spaces. The flag [can_nest]
sets if we indent when we break lines. *)andeval_list_all(width:int)(tab:int)(i:int)(_as:tlist)(p:int)(last_break:Break.toption)(can_nest:bool):tlist*int*Break.toption=match_aswith|[]->(_as,p,last_break)|BreakBreak.Space::_as->iflast_break=Nonethen(let(_as,p,last_break)=eval_list_allwidthtab(ifcan_nesttheni+tabelsei)_as0(SomeBreak.Newline)falseinifcan_nestthen([BreakBreak.Newline;Indent(1,GroupAll(false,_as))],p,last_break)else(BreakBreak.Newline::_as,p,last_break))elseeval_list_allwidthtabi_asplast_breakcan_nest|a::_as->let(a,p,last_break)=evalwidthtabiaplast_breakinlet(_as,p,last_break)=eval_list_allwidthtabi_asplast_breakcan_nestin(a::_as,p,last_break)(* Evaluate the breaks with a maximal [width] per line and a tabulation width [tab]. *)letrender(width:int)(tab:int)(_as:tlist):t=let(a,_,_)=evalwidthtab0(GroupOne(false,_as))0(SomeBreak.Newline)ina(* A buffer eating trailing spaces. *)moduleNonTrailingBuffer=structtypet={add_char:char->unit;add_string:string->unit;add_sub_string:string->int->int->unit;mutablenb_spaces:int}(* A new buffer. *)letmake(add_char:char->unit)(add_string:string->unit)(add_sub_string:string->int->int->unit):t={add_char=add_char;add_string=add_string;add_sub_string=add_sub_string;nb_spaces=0(* A number of spaces we may print if they are not trailing. *)}(* Forget previous spaces which appear to be trailing. *)letforget_spaces(b:t):unit=b.nb_spaces<-0(* Spaces are not trailing: print all of them. *)letflush_spaces(b:t):unit=b.add_string(String.makeb.nb_spaces' ');forget_spacesb(* Indent by [i] spaces. By convention, indentation spaces are always
printed, even one an empty line, to mark the indentation level. *)letindent(b:t)(i:int):unit=forget_spacesb;b.add_string(String.makei' ')(* Print a sub-string. *)letsub_string(b:t)(s:string)(o:int)(l:int):unit=flush_spacesb;b.add_sub_stringsol(* Add one space in the buffer. *)letspace(b:t):unit=b.nb_spaces<-b.nb_spaces+1(* Print a newline, with no trailing space before it. *)letnewline(b:t):unit=forget_spacesb;b.add_char'\n'end(* Write to something, given the [add_char] and [add_string] functions. *)letto_something(tab:int)(add_char:char->unit)(add_string:string->unit)(add_sub_string:string->int->int->unit)(a:t):unit=letopenNonTrailingBufferinletb=makeadd_charadd_stringadd_sub_stringinletrecauxai(last_break:Break.toption):Break.toption=matchawith|String(s,o,l)->(*Printf.printf "<%d, %b>" i (last_break = Some Break.Newline);*)iflast_break=SomeBreak.Newlinethenindentbi;sub_stringbsol;None|BreakBreak.Space->iflast_break=Nonethen(spaceb;SomeBreak.Space)elselast_break|BreakBreak.Newline->iflast_break=SomeBreak.Newlinethenindentbi;newlineb;SomeBreak.Newline|GroupOne(_,_as)|GroupAll(_,_as)->letlast_break=reflast_breakin_as|>List.iter(funa->last_break:=auxai!last_break);!last_break|Indent(n,a)->auxa(i+n*tab)last_breakinignore(auxa0(SomeBreak.Newline))end(* A document is a binary tree of atoms so that concatenation happens in O(1). *)typet=|Empty|LeafofAtom.t|Nodeoft*tletempty:t=Emptyletstring(s:string):t=ifs=""thenemptyelseLeaf(Atom.String(s,0,String.lengths))let(!^)=stringletsub_string(s:string)(o:int)(l:int):t=ifl=0thenemptyelseLeaf(Atom.String(s,o,l))letspace:t=Leaf(Atom.BreakBreak.Space)letnewline:t=Leaf(Atom.BreakBreak.Newline)letappend(d1:t)(d2:t):t=Node(d1,d2)let(^-^)=appendletconcat_with_space(d1:t)(d2:t):t=d1^-^space^-^d2let(^^)=concat_with_space(* Convert a document, which is a tree of atoms, to a list of atoms. In O(n). *)letto_atoms(d:t):Atom.tlist=letrecaux(d:t)(l:Atom.tlist):Atom.tlist=matchdwith|Empty->l|Leafa->a::l|Node(d1,d2)->auxd1(auxd2l)inauxd[]letrecindent(d:t):t=matchdwith|Empty->Empty|Leafa->Leaf(Atom.Indent(1,a))|Node(d1,d2)->Node(indentd1,indentd2)letnest(d:t):t=Leaf(Atom.GroupOne(true,to_atomsd))letnest_all(d:t):t=Leaf(Atom.GroupAll(true,to_atomsd))letgroup(d:t):t=Leaf(Atom.GroupOne(false,to_atomsd))letgroup_all(d:t):t=Leaf(Atom.GroupAll(false,to_atomsd))letparens(d:t):t=!^"("^-^d^-^!^")"letbraces(d:t):t=!^"{"^-^d^-^!^"}"letbrakets(d:t):t=!^"["^-^d^-^!^"]"letangle_brakets(d:t):t=!^"<"^-^d^-^!^">"letsingle_quotes(d:t):t=!^"'"^-^d^-^!^"'"letdouble_quotes(d:t):t=!^"\""^-^d^-^!^"\""letconcat(ds:tlist):t=List.fold_leftappendemptydsletseparate(separator:t)(ds:tlist):t=letrecauxds=matchdswith|[]->empty|d::ds->separator^-^d^-^auxdsinmatchdswith|[]->empty|d::ds->d^-^auxds(* Split a non-unicode string in a list of offsets / lengths according to a predicate [f]. *)letrecsplit(s:string)(f:char->bool)(o:int)(l:int):(int*int)list=ifo+l=String.lengthsthen[(o,l)]elseiffs.[o+l]then(o,l)::splitsf(o+l+1)0elsesplitsfo(l+1)letwords(s:string):t=group@@separatespace@@List.map(fun(o,l)->sub_stringsol)(splits(func->c=' '||c='\n'||c='\t')00)letlines(s:string):t=separatenewline@@List.map(fun(o,l)->sub_stringsol)(splits(func->c='\n')00)moduleOCaml=structletunit(_:unit):t=!^"()"letbool(b:bool):t=!^(string_of_boolb)letint(i:int):t=!^(string_of_inti)letfloat(f:float):t=!^(string_of_floatf)letstring(s:string):t=double_quotes(!^(String.escapeds))letoption(d:'a->t)(o:'aoption):t=matchowith|None->!^"None"|Somex->nest(!^"Some"^^dx)letlist(d:'a->t)(l:'alist):t=brakets@@nest_all(space^^separate(!^";"^^space)(List.mapdl)^^space)lettuple(ds:tlist):t=parens@@nest@@separate(!^","^^space)dsendmoduleDebug=struct(* Pretty-print an atom. *)letrecpp_atom(a:Atom.t):t=matchawith|Atom.String(s,o,l)->OCaml.string(String.subsol)|Atom.BreakBreak.Space->!^"Space"|Atom.BreakBreak.Newline->!^"Newline"|Atom.GroupOne(can_nest,_as)->nest(!^"GroupOne"^^parens(OCaml.boolcan_nest^-^!^","^^pp_atoms_as))|Atom.GroupAll(can_nest,_as)->nest(!^"GroupAll"^^parens(OCaml.boolcan_nest^-^!^","^^pp_atoms_as))|Atom.Indent(n,a)->nest(!^"Indent"^^parens(OCaml.intn^-^!^","^^pp_atoma))(* Pretty-print a list of atoms. *)andpp_atoms(_as:Atom.tlist):t=group_all(separate(!^","^^space)(List.mappp_atom_as))letpp_document(d:t):t=OCaml.listpp_atom(to_atomsd)letpp_document_after_rendering(width:int)(tab:int)(d:t):t=pp_atom@@Atom.renderwidthtab@@to_atomsdendletto_something(width:int)(tab:int)(add_char:char->unit)(add_string:string->unit)(add_sub_string:string->int->int->unit)(d:t):unit=Atom.to_somethingtabadd_charadd_stringadd_sub_string@@Atom.renderwidthtab@@to_atomsdletto_buffer(width:int)(tab:int)(b:Buffer.t)(d:t):unit=to_somethingwidthtab(Buffer.add_charb)(Buffer.add_stringb)(Buffer.add_substringb)dletto_string(width:int)(tab:int)(d:t):string=letb=Buffer.create10into_bufferwidthtabbd;Buffer.contentsbletto_out_channel(width:int)(tab:int)(c:out_channel)(d:t):unit=letoutput_sub_string(s:string)(o:int)(l:int):unit=output_stringc(String.subsol)into_somethingwidthtab(output_charc)(output_stringc)output_sub_stringdletto_stdout(width:int)(tab:int)(d:t):unit=to_out_channelwidthtabstdoutd