123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685(**************************************************************************)(* *)(* PPrint *)(* *)(* François Pottier, Inria Paris *)(* Nicolas Pouillard *)(* *)(* Copyright 2007-2019 Inria. All rights reserved. This file is *)(* distributed under the terms of the GNU Library General Public *)(* License, with an exception, as described in the file LICENSE. *)(**************************************************************************)(** A point is a pair of a line number and a column number. *)typepoint=int*int(** A range is a pair of points. *)typerange=point*point(* ------------------------------------------------------------------------- *)(* A type of integers with infinity. *)typerequirement=int(* with infinity *)(* Infinity is encoded as [max_int]. *)letinfinity:requirement=max_int(* Addition of integers with infinity. *)let(++)(x:requirement)(y:requirement):requirement=ifx=infinity||y=infinitytheninfinityelsex+y(* Comparison between an integer with infinity and a normal integer. *)let(<==)(x:requirement)(y:int)=x<=y(* ------------------------------------------------------------------------- *)(* A uniform interface for output channels. *)classtypeoutput=object(** [char c] sends the character [c] to the output channel. *)methodchar:char->unit(** [substring s ofs len] sends the substring of [s] delimited by the
offset [ofs] and the length [len] to the output channel. *)methodsubstring:string->int(* offset *)->int(* length *)->unitend(* Three kinds of output channels are wrapped so as to satisfy the above
interface: OCaml output channels, OCaml memory buffers, and OCaml
formatters. *)classchannel_outputchannel=objectmethodchar=output_charchannelmethodsubstring=output_substringchannel(* We used to use [output], but, as of OCaml 4.02 and with -safe-string
enabled, the type of [output] has changed: this function now expects
an argument of type [bytes]. The new function [output_substring] must
be used instead. Furthermore, as of OCaml 4.06, -safe-string is enabled
by default. In summary, we require OCaml 4.02, use [output_substring],
and enable -safe-string. *)endclassbuffer_outputbuffer=objectmethodchar=Buffer.add_charbuffermethodsubstring=Buffer.add_substringbufferendclassformatter_outputfmt=objectmethodchar=function|'\n'->Format.pp_force_newlinefmt()|' '->Format.pp_print_spacefmt()|c->Format.pp_print_charfmtcmethodsubstringstrofslen=Format.pp_print_textfmt(ifofs=0&&len=String.lengthstrthenstrelseString.substrofslen)end(* ------------------------------------------------------------------------- *)(** The rendering engine maintains the following internal state. Its structure
is subject to change in future versions of the library. Nevertheless, it is
exposed to the user who wishes to define custom documents. *)typestate={width:int;(** The line width. This parameter is fixed throughout the execution of
the renderer. *)ribbon:int;(** The ribbon width. This parameter is fixed throughout the execution of
the renderer. *)mutablelast_indent:int;(** The number of blanks that were printed at the beginning of the current
line. This field is updated (only) when a hardline is emitted. It is
used (only) to determine whether the ribbon width constraint is
respected. *)mutableline:int;(** The current line. This field is updated (only) when a hardline is
emitted. It is not used by the pretty-printing engine itself. *)mutablecolumn:int;(** The current column. This field must be updated whenever something is
sent to the output channel. It is used (only) to determine whether the
width constraint is respected. *)}(* ------------------------------------------------------------------------- *)(* [initial rfrac width] creates a fresh initial state. *)letinitialrfracwidth={width=width;ribbon=max0(minwidth(truncate(float_of_intwidth*.rfrac)));last_indent=0;line=0;column=0}(* ------------------------------------------------------------------------- *)(** A custom document is defined by implementing the following methods. *)classtypecustom=object(** A custom document must publish the width (i.e., the number of columns)
that it would like to occupy if it is printed on a single line (that is,
in flattening mode). The special value [infinity] means that this
document cannot be printed on a single line; this value causes any
groups that contain this document to be dissolved. This method should
in principle work in constant time. *)methodrequirement:requirement(** The method [pretty] is used by the main rendering algorithm. It has
access to the output channel and to the algorithm's internal state, as
described above. In addition, it receives the current indentation level
and the current flattening mode (on or off). If flattening mode is on,
then the document must be printed on a single line, in a manner that is
consistent with the requirement that was published ahead of time. If
flattening mode is off, then there is no such obligation. The state must
be updated in a manner that is consistent with what is sent to the
output channel. *)methodpretty:output->state->int->bool->unit(** The method [compact] is used by the compact rendering algorithm. It has
access to the output channel only. *)methodcompact:output->unitend(* ------------------------------------------------------------------------- *)(* Here is the algebraic data type of documents. It is analogous to Daan
Leijen's version, but the binary constructor [Union] is replaced with
the unary constructor [Group], and the constant [Line] is replaced with
more general constructions, namely [IfFlat], which provides alternative
forms depending on the current flattening mode, and [HardLine], which
represents a newline character, and causes a failure in flattening mode. *)typedocument=(* [Empty] is the empty document. *)|Empty(* [Char c] is a document that consists of the single character [c]. We
enforce the invariant that [c] is not a newline character. *)|Charofchar(* [String s] is a document that consists of just the string [s]. We
assume, but do not check, that this string does not contain a newline
character. [String] is a special case of [FancyString], which takes up
less space in memory. *)|Stringofstring(* [FancyString (s, ofs, len, apparent_length)] is a (portion of a) string
that may contain fancy characters: color escape characters, UTF-8 or
multi-byte characters, etc. Thus, the apparent length (which corresponds
to what will be visible on screen) differs from the length (which is a
number of bytes, and is reported by [String.length]). We assume, but do
not check, that fancystrings do not contain a newline character. *)|FancyStringofstring*int*int*int(* [Blank n] is a document that consists of [n] blank characters. *)|Blankofint(* When in flattening mode, [IfFlat (d1, d2)] turns into the document
[d1]. When not in flattening mode, it turns into the document [d2]. *)|IfFlatofdocument*document(* When in flattening mode, [HardLine] causes a failure, which requires
backtracking all the way until the stack is empty. When not in flattening
mode, it represents a newline character, followed with an appropriate
number of indentation. A common way of using [HardLine] is to only use it
directly within the right branch of an [IfFlat] construct. *)|HardLine(* The following constructors store their space requirement. This is the
document's apparent length, if printed in flattening mode. This
information is computed in a bottom-up manner when the document is
constructed. *)(* In other words, the space requirement is the number of columns that the
document needs in order to fit on a single line. We express this value in
the set of `integers extended with infinity', and use the value
[infinity] to indicate that the document cannot be printed on a single
line. *)(* Storing this information at [Group] nodes is crucial, as it allows us to
avoid backtracking and buffering. *)(* Storing this information at other nodes allows the function [requirement]
to operate in constant time. This means that the bottom-up computation of
requirements takes linear time. *)(* [Cat (req, doc1, doc2)] is the concatenation of the documents [doc1] and
[doc2]. The space requirement [req] is the sum of the requirements of
[doc1] and [doc2]. *)|Catofrequirement*document*document(* [Nest (req, j, doc)] is the document [doc], in which the indentation
level has been increased by [j], that is, in which [j] blanks have been
inserted after every newline character. The space requirement [req] is
the same as the requirement of [doc]. *)|Nestofrequirement*int*document(* [Group (req, doc)] represents an alternative: it is either a flattened
form of [doc], in which occurrences of [Group] disappear and occurrences
of [IfFlat] resolve to their left branch, or [doc] itself. The space
requirement [req] is the same as the requirement of [doc]. *)|Groupofrequirement*document(* [Align (req, doc)] increases the indentation level to reach the current
column. Thus, the document [doc] is rendered within a box whose upper
left corner is the current position. The space requirement [req] is the
same as the requirement of [doc]. *)|Alignofrequirement*document(* [Range (req, hook, doc)] is printed like [doc]. After it is printed, the
function [hook] is applied to the range that is occupied by [doc] in the
output. *)|Rangeofrequirement*(range->unit)*document(* [Custom (req, f)] is a document whose appearance is user-defined. *)|Customofcustom(* ------------------------------------------------------------------------- *)(* Retrieving or computing the space requirement of a document. *)letrecrequirement=function|Empty->0|Char_->1|Strings->String.lengths|FancyString(_,_,_,len)|Blanklen->len|IfFlat(doc1,_)->(* In flattening mode, the requirement of [ifflat x y] is just the
requirement of its flat version, [x]. *)(* The smart constructor [ifflat] ensures that [IfFlat] is never nested
in the left-hand side of [IfFlat], so this recursive call is not a
problem; the function [requirement] has constant time complexity. *)requirementdoc1|HardLine->(* A hard line cannot be printed in flattening mode. *)infinity|Cat(req,_,_)|Nest(req,_,_)|Group(req,_)|Align(req,_)|Range(req,_,_)->(* These nodes store their requirement -- which is computed when the
node is constructed -- so as to allow us to answer in constant time
here. *)req|Customc->c#requirement(* ------------------------------------------------------------------------- *)(* The above algebraic data type is not exposed to the user. Instead, we
expose the following functions. These functions construct a raw document
and compute its requirement, so as to obtain a document. *)letempty=Emptyletcharc=assert(c<>'\n');Charcletspace=char' 'letstrings=Stringsletfancysubstringsofslenapparent_length=iflen=0thenemptyelseFancyString(s,ofs,len,apparent_length)letsubstringsofslen=fancysubstringsofslenlenletfancystringsapparent_length=fancysubstrings0(String.lengths)apparent_length(* The following function was stolen from [Batteries]. *)letutf8_lengths=letreclength_auxsci=ifi>=String.lengthsthencelseletn=Char.code(String.unsafe_getsi)inletk=ifn<0x80then1elseifn<0xe0then2elseifn<0xf0then3else4inlength_auxs(c+1)(i+k)inlength_auxs00letutf8strings=fancystrings(utf8_lengths)lethardline=HardLineletblankn=matchnwith|0->empty|1->space|_->Blanknletifflatdoc1doc2=(* Avoid nesting [IfFlat] in the left-hand side of [IfFlat], as this
is redundant. *)matchdoc1with|IfFlat(doc1,_)|doc1->IfFlat(doc1,doc2)letinternal_breaki=ifflat(blanki)hardlineletbreak0=internal_break0letbreak1=internal_break1letbreaki=matchiwith|0->break0|1->break1|_->internal_breakilet(^^)xy=matchx,ywith|Empty,_->y|_,Empty->x|_,_->Cat(requirementx++requirementy,x,y)letnestix=assert(i>=0);Nest(requirementx,i,x)letgroupx=letreq=requirementxin(* Minor optimisation: an infinite requirement dissolves a group. *)ifreq=infinitythenxelseGroup(req,x)letalignx=Align(requirementx,x)letrangehookx=Range(requirementx,hook,x)letcustomc=(* Sanity check. *)assert(c#requirement>=0);Customc(* ------------------------------------------------------------------------- *)(* Printing blank space (indentation characters). *)letblank_length=80letblank_buffer=String.makeblank_length' 'letrecblanksoutputn=ifn<=0then()elseifn<=blank_lengththenoutput#substringblank_buffer0nelsebeginoutput#substringblank_buffer0blank_length;blanksoutput(n-blank_length)end(* ------------------------------------------------------------------------- *)(* This function expresses the following invariant: if we are in flattening
mode, then we must be within bounds, i.e. the width and ribbon width
constraints must be respected. *)letokstateflatten:bool=notflatten||state.column<=state.width&&state.column<=state.last_indent+state.ribbon(* ------------------------------------------------------------------------- *)(* The pretty rendering engine. *)(* The renderer is supposed to behave exactly like Daan Leijen's, although its
implementation is quite radically different, and simpler. Our documents are
constructed eagerly, as opposed to lazily. This means that we pay a large
space overhead, but in return, we get the ability of computing information
bottom-up, as described above, which allows to render documents without
backtracking or buffering. *)(* The [state] record is never copied; it is just threaded through. In
addition to it, the parameters [indent] and [flatten] influence the
manner in which the document is rendered. *)(* The code is written in tail-recursive style, so as to avoid running out of
stack space if the document is very deep. Each [KCons] cell in a
continuation represents a pending call to [pretty]. Each [KRange] cell
represents a pending call to a user-provided range hook. *)typecont=|KNil|KConsofint*bool*document*cont|KRangeof(range->unit)*point*contletrecpretty(output:output)(state:state)(indent:int)(flatten:bool)(doc:document)(cont:cont):unit=matchdocwith|Empty->continueoutputstatecont|Charc->output#charc;state.column<-state.column+1;(* assert (ok state flatten); *)continueoutputstatecont|Strings->letlen=String.lengthsinoutput#substrings0len;state.column<-state.column+len;(* assert (ok state flatten); *)continueoutputstatecont|FancyString(s,ofs,len,apparent_length)->output#substringsofslen;state.column<-state.column+apparent_length;(* assert (ok state flatten); *)continueoutputstatecont|Blankn->blanksoutputn;state.column<-state.column+n;(* assert (ok state flatten); *)continueoutputstatecont|HardLine->(* We cannot be in flattening mode, because a hard line has an [infinity]
requirement, and we attempt to render a group in flattening mode only
if this group's requirement is met. *)assert(notflatten);(* Emit a hardline. *)output#char'\n';blanksoutputindent;state.line<-state.line+1;state.column<-indent;state.last_indent<-indent;(* Continue. *)continueoutputstatecont|IfFlat(doc1,doc2)->(* Pick an appropriate sub-document, based on the current flattening
mode. *)prettyoutputstateindentflatten(ifflattenthendoc1elsedoc2)cont|Cat(_,doc1,doc2)->(* Push the second document onto the continuation. *)prettyoutputstateindentflattendoc1(KCons(indent,flatten,doc2,cont))|Nest(_,j,doc)->prettyoutputstate(indent+j)flattendoccont|Group(req,doc)->(* If we already are in flattening mode, stay in flattening mode; we
are committed to it. If we are not already in flattening mode, we
have a choice of entering flattening mode. We enter this mode only
if we know that this group fits on this line without violating the
width or ribbon width constraints. Thus, we never backtrack. *)letflatten=flatten||letcolumn=state.column++reqincolumn<==state.width&&column<==state.last_indent+state.ribboninprettyoutputstateindentflattendoccont|Align(_,doc)->(* The effect of this combinator is to set [indent] to [state.column].
Usually [indent] is equal to [state.last_indent], hence setting it
to [state.column] increases it. However, if [nest] has been used
since the current line began, then this could cause [indent] to
decrease. *)(* assert (state.column > state.last_indent); *)prettyoutputstatestate.columnflattendoccont|Range(_,hook,doc)->letstart:point=(state.line,state.column)inprettyoutputstateindentflattendoc(KRange(hook,start,cont))|Customc->(* Invoke the document's custom rendering function. *)c#prettyoutputstateindentflatten;(* Sanity check. *)assert(okstateflatten);(* Continue. *)continueoutputstatecontandcontinueoutputstate=function|KNil->()|KCons(indent,flatten,doc,cont)->prettyoutputstateindentflattendoccont|KRange(hook,start,cont)->letfinish:point=(state.line,state.column)inhook(start,finish);continueoutputstatecont(* Publish a version of [pretty] that does not take an explicit continuation.
This function may be used by authors of custom documents. We do not expose
the internal [pretty] -- the one that takes a continuation -- because we
wish to simplify the user's life. The price to pay is that calls that go
through a custom document cannot be tail calls. *)letprettyoutputstateindentflattendoc=prettyoutputstateindentflattendocKNil(* ------------------------------------------------------------------------- *)(* The compact rendering algorithm. *)letreccompactoutputdoccont=matchdocwith|Empty->continueoutputcont|Charc->output#charc;continueoutputcont|Strings->letlen=String.lengthsinoutput#substrings0len;continueoutputcont|FancyString(s,ofs,len,_apparent_length)->output#substringsofslen;continueoutputcont|Blankn->blanksoutputn;continueoutputcont|HardLine->output#char'\n';continueoutputcont|Cat(_,doc1,doc2)->compactoutputdoc1(doc2::cont)|IfFlat(doc,_)|Nest(_,_,doc)|Group(_,doc)|Align(_,doc)|Range(_,_,doc)->compactoutputdoccont|Customc->(* Invoke the document's custom rendering function. *)c#compactoutput;continueoutputcontandcontinueoutputcont=matchcontwith|[]->()|doc::cont->compactoutputdoccontletcompactoutputdoc=compactoutputdoc[](* ------------------------------------------------------------------------- *)(* We now instantiate the renderers for the three kinds of output channels. *)(* This is just boilerplate. *)moduleMakeRenderer(X:sigtypechannelvaloutput:channel->outputend)=structtypechannel=X.channeltypedummy=documenttypedocument=dummyletprettyrfracwidthchanneldoc=pretty(X.outputchannel)(initialrfracwidth)0falsedocletcompactchanneldoc=compact(X.outputchannel)docendmoduleToChannel=MakeRenderer(structtypechannel=out_channelletoutput=newchannel_outputend)moduleToBuffer=MakeRenderer(structtypechannel=Buffer.tletoutput=newbuffer_outputend)moduleToFormatter=MakeRenderer(structtypechannel=Format.formatterletoutput=newformatter_outputend)