123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845(************************************************************************)(* * The Coq Proof Assistant / The Coq Development Team *)(* v * Copyright INRIA, CNRS and contributors *)(* <O___,, * (see version control and CREDITS file for authors & dates) *)(* \VV/ **************************************************************)(* // * This file is distributed under the terms of the *)(* * GNU Lesser General Public License Version 2.1 *)(* * (see LICENSE file for the text of the license) *)(************************************************************************)openPpopenUtilopenTokmoduleStream=Gramlib.Stream(* Dictionaries: trees annotated with string options, each node being a map
from chars to dictionaries (the subtrees). A trie, in other words. *)moduleCharOrd=structtypet=charletcompare:char->char->int=compareendmoduleCharMap=Map.Make(CharOrd)typestarts_quotation=NoQuotation|Quotationtypettree={node:(string*starts_quotation)option;branch:ttreeCharMap.t;}letempty_keyword_state={node=None;branch=CharMap.empty}letttree_addttree(str,quot)=letrecinserttti=ifi==String.lengthstrthen{node=Some(str,quot);branch=tt.branch}elseletc=str.[i]inletbr=matchtrySome(CharMap.findctt.branch)withNot_found->Nonewith|Somett'->CharMap.addc(inserttt'(i+1))(CharMap.removectt.branch)|None->lettt'={node=None;branch=CharMap.empty}inCharMap.addc(inserttt'(i+1))tt.branchin{node=tt.node;branch=br}ininsertttree0(* Search a string in a dictionary: raises [Not_found]
if the word is not present. *)letttree_findttreestr=letrecproc_rectti=ifi==String.lengthstrthenttelseproc_rec(CharMap.findstr.[i]tt.branch)(i+1)inproc_recttree0letttree_elementsttree=letreceltsttaccu=letaccu=matchtt.nodewith|None->accu|Some(s,_)->CString.Set.addsaccuinCharMap.fold(fun_ttaccu->eltsttaccu)tt.branchaccuineltsttreeCString.Set.empty(* Errors occurring while lexing (explained as "Lexer error: ...") *)moduleError=structtypet=|Illegal_character|Unterminated_comment|Unterminated_string|Undefined_token|Bad_tokenofstringexceptionEoftletto_stringx="Syntax Error: Lexer: "^(matchxwith|Illegal_character->"Illegal character"|Unterminated_comment->"Unterminated comment"|Unterminated_string->"Unterminated string"|Undefined_token->"Undefined token"|Bad_tokentok->Format.sprintf"Bad token %S"tok)endopenErrorleterrlocstr=Loc.raise~loc(Error.Estr)letbad_tokenstr=raise(Error.E(Bad_tokenstr))(* Update a loc without allocating an intermediate pair *)letset_loc_poslocbpep=Loc.subloc(bp-loc.Loc.bp)(ep-bp)(* Increase line number by 1 and update position of beginning of line *)letbump_loc_linelocbol_pos=Loc.{locwithline_nb=loc.line_nb+1;line_nb_last=loc.line_nb+1;bol_pos;bol_pos_last=bol_pos;}(* Same as [bump_loc_line], but for the last line in location *)(* For an obscure reason, camlp5 does not give an easy way to set line_nb_stop,
so we have to resort to a hack merging two locations. *)(* Warning: [bump_loc_line_last] changes the end position. You may need to call
[set_loc_pos] to fix it. *)letbump_loc_line_lastlocbol_pos=letopenLocinletloc'={locwithline_nb=loc.line_nb_last+1;line_nb_last=loc.line_nb_last+1;bol_pos;bol_pos_last=bol_pos;bp=loc.bp+1;ep=loc.ep+1;}inLoc.mergelocloc'(* For some reason, the [Ploc.after] function of Camlp5 does not update line
numbers, so we define our own function that does it. *)letafterloc=Loc.{locwithline_nb=loc.line_nb_last;bol_pos=loc.bol_pos_last;bp=loc.ep;}(** Lexer conventions on tokens *)typetoken_kind=|Utf8Tokenof(Unicode.status*int)|EmptyStreamleterror_utf8loccs=letbp=Stream.countcsinStream.junk()cs;(* consume the char to avoid read it and fail again *)letloc=set_loc_poslocbp(bp+1)inerrlocIllegal_characterletcheck_utf8_trailing_byteloccsc=ifnot(Int.equal(Char.codecland0xC0)0x80)thenerror_utf8loccs(* Recognize utf8 blocks (of length less than 4 bytes) *)(* but don't certify full utf8 compliance (e.g. no emptyness check) *)letlookup_utf8_charlocnjcs=matchtrySome(List.nth(Stream.npeek()(nj+1)cs)nj)withFailure_->Nonewith|None->[]|Somec1->matchc1with|'\x00'..'\x7F'->[c1]|c1->letc1=Char.codec1inifInt.equal(c1land0x40)0||Int.equal(c1land0x38)0x38thenerror_utf8loccselseifInt.equal(c1land0x20)0thenmatchList.skipnnj(Stream.npeek()(nj+2)cs)with|[_;c2]asl->check_utf8_trailing_byteloccsc2;l|_->error_utf8loccselseifInt.equal(c1land0x10)0thenmatchList.skipnnj(Stream.npeek()(nj+3)cs)with|[_;c2;c3]asl->check_utf8_trailing_byteloccsc2;check_utf8_trailing_byteloccsc3;l|_->error_utf8loccselsematchList.skipnnj(Stream.npeek()(nj+4)cs)with|[_;c2;c3;c4]asl->check_utf8_trailing_byteloccsc2;check_utf8_trailing_byteloccsc3;check_utf8_trailing_byteloccsc4;l|_->error_utf8loccsletstatus_of_utf8=function|[]->EmptyStream|l->letn,unicode=matchlwith|[c1]->1,Char.codec1|[c1;c2]->2,(Char.codec1land0x1F)lsl6+(Char.codec2land0x3F)|[c1;c2;c3]->3,(Char.codec1land0x0F)lsl12+(Char.codec2land0x3F)lsl6+(Char.codec3land0x3F)|[c1;c2;c3;c4]->4,(Char.codec1land0x07)lsl18+(Char.codec2land0x3F)lsl12+(Char.codec3land0x3F)lsl6+(Char.codec4land0x3F)|_->assertfalseinUtf8Token(Unicode.classifyunicode,n)letlookup_utf8loccs=status_of_utf8(lookup_utf8_charloc0cs)letis_letterl=matchstatus_of_utf8lwith|EmptyStream->false|Utf8Token(st,_)->Unicode.is_letterstletunlocatedfx=letdummy_loc=Loc.(initialToplevelInput)infdummy_locx(** FIXME: should we still unloc the exception? *)(* try f x with Loc.Exc_located (_, exc) -> raise exc *)letcheck_keywordstr=letrecloop_symbs=matchStream.peek()swith|Some(' '|'\n'|'\r'|'\t')->Stream.junk()s;bad_tokenstr|_->matchunlocatedlookup_utf8swith|Utf8Token(_,n)->Stream.njunk()ns;loop_symbs|EmptyStream->()inloop_symb(Stream.of_stringstr)letcheck_identstr=letrecloop_idintails=matchunlocatedlookup_utf8swith|Utf8Token(st,n)whennotintail&&Unicode.is_valid_ident_initialst->Stream.njunk()ns;loop_idtrues|Utf8Token(st,n)whenintail&&Unicode.is_valid_ident_trailingst->Stream.njunk()ns;loop_idtrues|EmptyStream->()|Utf8Token_->bad_tokenstrinloop_idfalse(Stream.of_stringstr)letis_identstr=trylet_=check_identstrintruewithError.E_->falseletis_keywordttrees=trymatch(ttree_findttrees).nodewithNone->false|Some_->truewithNot_found->falseletadd_keyword?(quotation=NoQuotation)ttreestr=ifnot(is_keywordttreestr)thenbegincheck_keywordstr;ttree_addttree(str,quotation)endelsettreeletadd_keyword_tok:typec._->cTok.p->_=funttree->function|PKEYWORDs->add_keyword~quotation:NoQuotationttrees|PQUOTATIONs->add_keyword~quotation:Quotationttrees|_->ttreeletkeywords=ttree_elements(* Freeze and unfreeze the state of the lexer *)typekeyword_state=ttree(* The string buffering machinery *)letbuff=ref(Bytes.create80)letstorelenx=letopenBytesiniflen>=length!buffthenbuff:=cat!buff(create(length!buff));set!bufflenx;succlenletrecnstorenlencs=ifn>0thennstore(n-1)(storelen(Stream.next()cs))cselselenletget_bufflen=Bytes.sub_string!buff0len(* The classical lexer: idents, numbers, quoted strings, comments *)letwarn_unrecognized_unicode=CWarnings.create~name:"unrecognized-unicode"~category:CWarnings.CoreCategories.parsing(fun(u,id)->strbrk(Printf.sprintf"Not considering unicode character \"%s\" of unknown \
lexical status as part of identifier \"%s\"."uid))letrecident_tailloclens=matchlookup_utf8locswith|Utf8Token(st,n)whenUnicode.is_valid_ident_trailingst->ident_tailloc(nstorenlens)s|Utf8Token(st,n)whenUnicode.is_unknownst->letid=get_buffleninletu=String.concat""(List.map(String.make1)(Stream.npeek()ns))inwarn_unrecognized_unicode~loc(u,id);len|_->lenletwarn_comment_terminator_in_string=CWarnings.create~name:"comment-terminator-in-string"~category:CWarnings.CoreCategories.parsing(fun()->(strbrk"Not interpreting \"*)\" as the end of current \
non-terminated comment because it occurs in a \
non-terminated string of the comment."))(* If the string being lexed is in a comment, [comm_level] is Some i with i the
current level of comments nesting. Otherwise, [comm_level] is None. *)letrecstringloc~comm_levelbplens=matchStream.peek()swith|Some'"'->Stream.junk()s;letesc=matchStream.peek()swithSome'"'->Stream.junk()s;true|_->falseinifescthenstringloc~comm_levelbp(storelen'"')selse(loc,len)|Some'('->Stream.junk()s;(funs->matchStream.peek()swith|Some'*'->Stream.junk()s;letcomm_level=Option.mapsucccomm_levelinstringloc~comm_levelbp(store(storelen'(')'*')s|_->stringloc~comm_levelbp(storelen'(')s)s|Some'*'->Stream.junk()s;(funs->matchStream.peek()swith|Some')'->Stream.junk()s;let()=matchcomm_levelwith|Some0->warn_comment_terminator_in_string~loc()|_->()inletcomm_level=Option.mappredcomm_levelinstringloc~comm_levelbp(store(storelen'*')')')s|_->stringloc~comm_levelbp(storelen'*')s)s|Some('\n'asc)->Stream.junk()s;letep=Stream.countsin(* If we are parsing a comment, the string if not part of a token so we
update the first line of the location. Otherwise, we update the last
line. *)letloc=ifOption.has_somecomm_levelthenbump_loc_linelocepelsebump_loc_line_lastlocepinstringloc~comm_levelbp(storelenc)s|Somec->Stream.junk()s;stringloc~comm_levelbp(storelenc)s|_->let()=ifnot(Stream.is_empty()s)thenraiseStream.Failureinletep=Stream.countsinletloc=set_loc_poslocbpepinerrlocUnterminated_string(* Utilities for comments in beautify *)letcomment_begin=refNoneletcomm_locbp=match!comment_beginwith|None->comment_begin:=Somebp|_->()letcomments=ref[]letcurrent_comment=Buffer.create8192letreal_push_charc=Buffer.add_charcurrent_commentcletpush_strings=Buffer.add_stringcurrent_commentsletdbg=CDebug.create~name:"comment-lexing"()letcomment_stopep=letcurrent_s=Buffer.contentscurrent_commentin(if!Flags.record_comments&&Buffer.lengthcurrent_comment>0thenletbp=match!comment_beginwithSomebp->bp|None->Feedback.msg_debug(str"No begin location for comment '"++strcurrent_s++str"' ending at "++intep);ep-1indbgPp.(fun()->str"comment at chars "++intbp++str"-"++intep++str":"++fnl()++strcurrent_s);comments:=((bp,ep),current_s)::!comments);Buffer.clearcurrent_comment;comment_begin:=Noneletreccommentlocbps=letbp2=Stream.countsinmatchStream.peek()swithSome'('->Stream.junk()s;letloc=trymatchStream.peek()swithSome'*'->Stream.junk()s;push_string"(*";commentlocbps|_->push_string"(";locwithStream.Failure->raise(Gramlib.Grammar.Error"")incommentlocbps|Some'*'->Stream.junk()s;begintrymatchStream.peek()swithSome')'->Stream.junk()s;push_string"*)";loc|_->real_push_char'*';commentlocbpswithStream.Failure->raise(Gramlib.Grammar.Error"")end|Some'"'->Stream.junk()s;letloc,len=stringloc~comm_level:(Some0)bp20sinpush_string"\"";push_string(get_bufflen);push_string"\"";commentlocbps|_->matchStream.is_empty()swith|true->letep=Stream.countsinletloc=set_loc_poslocbpepinerrlocUnterminated_comment|false->matchStream.peek()swithSome('\n'asz)->Stream.junk()s;letep=Stream.countsinreal_push_charz;comment(bump_loc_linelocep)bps|Somez->Stream.junk()s;real_push_charz;commentlocbps|_->raiseStream.Failure(* Parse a special token, using the [token_tree] *)(* Below, [last] is the last valid token found in the table *)(* [nj] is the number of bytes read since then and *)(* these bytes form a complete utf-8 sequence *)(* [tt] is the tree after having taken into account these [nj] bytes *)letupdate_longest_valid_tokenlastnjttcs=matchtt.nodewith|Some_aslast'->(* [last] extended with the [nj] bytes constitutes a valid token *)(* we update cs, last and nj *)Stream.njunk()njcs;0,last'|None->(* [last] extended with the [nj] bytes does not form a full valid token *)nj,last(* try to progress by peeking the next utf-8 lexeme *)(* and retain the longest valid special token obtained *)letrecprogress_furtherloclastnjlast_is_letterttcs=matchlookup_utf8_charlocnjcswith|[]->snd(update_longest_valid_tokenlastnjttcs)|l->progress_utf8loclastnjlast_is_letterttcsl(* under the same assumptions as [update_longest_valid_token], *)(* read the [n] bytes of the current utf-8 lexeme whose first byte is [c] *)andprogress_utf8loclastnjlast_is_letterttcsl=letis_letter'=is_letterlin(* compute longest match before considering utf8 block [l] *)(* not allowing update if in the middle of an ident part *)letnj,last=iflast_is_letter&&is_letter'thennj,lastelseupdate_longest_valid_tokenlastnjttcsintry(* descend in tree according to current utf8 block [l] *)lettt=List.fold_left(funttc->CharMap.findctt.branch)ttlinprogress_furtherloclast(nj+List.lengthl)is_letter'ttcswithNot_found->lastletblank_or_eofcs=matchStream.peek()cswith|None->true|Some(' '|'\t'|'\n'|'\r')->true|_->falsetypemarker=Delimitedofint*charlist*charlist|ImmediateAsciiIdentletpeek_marker_lenbes=letrecpeekn=matchStream.nth()nswith|c->ifc=bthenpeek(n+1)elsen,List.makenb,List.makene|exceptionStream.Failure->n,List.makenb,List.makeneinletlen,start,stop=peek0iniflen=0thenraiseStream.FailureelseDelimited(len,start,stop)letpeek_markers=matchStream.nth()0swith|'('->peek_marker_len'('')'s|'['->peek_marker_len'['']'s|'{'->peek_marker_len'{''}'s|('a'..'z'|'A'..'Z'|'_')->ImmediateAsciiIdent|_->raiseStream.Failureletparse_quotationlocbps=matchpeek_markerswith|ImmediateAsciiIdent->letc=Stream.next()sinletlen=tryident_tailloc(store0c)swithStream.Failure->raise(Gramlib.Grammar.Error"")inget_bufflen,set_loc_poslocbp(Stream.counts)|Delimited(lenmarker,bmarker,emarker)->letdot_gobbling=(* only quotations starting with two curly braces can gobble sentences *)matchbmarkerwith|'{'::'{'::_->true|_->falseinletb=Buffer.create80inletcommit1c=Buffer.add_charbc;Stream.junk()sinletcommitl=List.itercommit1linletrecquotationlocdepth=matchStream.npeek()lenmarkerswith|lwhenl=bmarker->commitl;quotationloc(depth+1)|lwhenl=emarker->commitl;ifdepth>1thenquotationloc(depth-1)elseloc|'\n'::cs->commit1'\n';letloc=bump_loc_line_lastloc(Stream.counts)inquotationlocdepth|'.'::_->commit1'.';ifnotdot_gobbling&&blank_or_eofsthenraiseStream.Failure;quotationlocdepth|c::cs->commit1c;quotationlocdepth|[]->raiseStream.Failureinletloc=quotationloc0inBuffer.contentsb,set_loc_poslocbp(Stream.counts)letpeek_stringvs=letl=String.lengthvinletrecauxi=ifInt.equalilthentrueelseletl'=Stream.npeek()(i+1)sinmatchList.nth_optl'iwith|Somec->Char.equalcv.[i]&&aux(i+1)|None->false(* EOF *)inaux0letfind_keywordttreelocidbps=ifpeek_string":{{"sthenbegin(* "xxx:{{" always starts a sentence-gobbling quotation, whether registered or not *)Stream.junk()s;lettxt,loc=parse_quotationlocbpsinQUOTATION(id^":",txt),locendelselettt=ttree_findttreeidinmatchprogress_furtherloctt.node0truettswith|None->raiseNot_found|Some(c,NoQuotation)->letep=Stream.countsinKEYWORDc,set_loc_poslocbpep|Some(c,Quotation)->lettxt,loc=parse_quotationlocbpsinQUOTATION(c,txt),locletprocess_sequencelocbpccs=letrecauxncs=matchStream.peek()cswith|Somec'whenc==c'->Stream.junk()cs;aux(n+1)cs|_->BULLET(String.makenc),set_loc_poslocbp(Stream.countcs)inaux1cs(* Must be a special token *)letprocess_chars~diff_modettreelocbplcs=lett=progress_utf8locNone(-(List.lengthl))falsettreecslinletep=Stream.countcsinmatchtwith|Some(t,NoQuotation)->(KEYWORDt,set_loc_poslocbpep)|Some(c,Quotation)->lettxt,loc=parse_quotationlocbpcsinQUOTATION(c,txt),loc|None->ifdiff_modethenbeginlets=String.concat""(List.map(String.make1)l)inIDENTs,set_loc_poslocbpependelsebeginletloc=set_loc_poslocbpepinerrlocUndefined_tokenend(* Parse what follows a dot *)letparse_after_dot~diff_modettreeloccbps=matchlookup_utf8locswith|Utf8Token(st,n)whenUnicode.is_valid_ident_initialst->letlen=ident_tailloc(nstoren0s)sinletfield=get_buffleninbegintryfind_keywordttreeloc("."^field)bpswithNot_found->letep=Stream.countsinFIELDfield,set_loc_poslocbpepend|Utf8Token_|EmptyStream->process_chars~diff_modettreelocbp[c]s(* Parse what follows a question mark *)letparse_after_qmark~diff_modettreelocbps=matchlookup_utf8locswith|Utf8Token(st,_)whenUnicode.is_valid_ident_initialst->LEFTQMARK|EmptyStream->KEYWORD"?"|Utf8Token_->fst(process_chars~diff_modettreelocbp['?']s)(* Parse a token in a char stream *)(* between_commands is used to parse bullets and { and } differently depending on the context. *)letbetween_commands=reftrueletrecnext_token~diff_modettreelocs=letbp=Stream.countsinmatchStream.peek()swith|Some'\n'->Stream.junk()s;letep=Stream.countsinnext_token~diff_modettree(bump_loc_linelocep)s|Some(' '|'\t'|'\r')->Stream.junk()s;next_token~diff_modettreelocs|Some('.'asc)->Stream.junk()s;lett,newloc=tryparse_after_dot~diff_modettreeloccbpswithStream.Failure->raise(Gramlib.Grammar.Error"")inbetween_commands:=false;(* We enforce that "." should either be part of a larger keyword,
for instance ".(", or followed by a blank or eof. *)let()=matchtwith|KEYWORD("."|"...")->ifnot(blank_or_eofs)thenbeginletep=Stream.countsinerr(set_loc_poslocbp(ep+1))Undefined_tokenend;between_commands:=true;|_->()int,newloc|Some('-'|'+'|'*'asc)->Stream.junk()s;lett,new_between_commands=if!between_commandsthenprocess_sequencelocbpcs,trueelseprocess_chars~diff_modettreelocbp[c]s,falseinbetween_commands:=new_between_commands;t|Some'?'->Stream.junk()s;letep=Stream.countsinlett=parse_after_qmark~diff_modettreelocbpsinbetween_commands:=false;(t,set_loc_poslocbpep)|Some('a'..'z'|'A'..'Z'|'_'asc)->Stream.junk()s;letlen=tryident_tailloc(store0c)swithStream.Failure->raise(Gramlib.Grammar.Error"")inletid=get_buffleninbetween_commands:=false;begintryfind_keywordttreelocidbpswithNot_found->letep=Stream.countsinIDENTid,set_loc_poslocbpepend|Some('0'..'9')->letn=NumTok.Unsigned.parsesinbetween_commands:=false;begintryfind_keywordttreeloc(NumTok.Unsigned.sprintn)bpswithNot_found->letep=Stream.countsinNUMBERn,set_loc_poslocbpepend|Some'\"'->Stream.junk()s;let(loc,len)=trystringloc~comm_level:Nonebp0swithStream.Failure->raise(Gramlib.Grammar.Error"")inletep=Stream.countsinbetween_commands:=false;letstr=get_buffleninbegintryfind_keywordttreeloc(CString.quote_coq_stringstr)bpswithNot_found->(STRINGstr,set_loc_poslocbpep)end|Some('('asc)->Stream.junk()s;begintrymatchStream.peek()swith|Some'*'whendiff_mode->Stream.junk()s;letep=Stream.countsin(IDENT"(*",set_loc_poslocbpep)|Some'*'->Stream.junk()s;comm_locbp;push_string"(*";letloc=commentlocbpsincomment_stop(Stream.counts);next_token~diff_modettreelocs|_->lett=process_chars~diff_modettreelocbp[c]sinbetween_commands:=false;twithStream.Failure->raise(Gramlib.Grammar.Error"")end|Some('{'|'}'asc)->Stream.junk()s;letep=Stream.countsinlett,new_between_commands=if!between_commandsthen(KEYWORD(String.make1c),set_loc_poslocbpep),trueelseprocess_chars~diff_modettreelocbp[c]s,falseinbetween_commands:=new_between_commands;t|_->letl=lookup_utf8_charloc0sinmatchstatus_of_utf8lwith|Utf8Token(st,n)whenUnicode.is_valid_ident_initialst->letlen=ident_tailloc(nstoren0s)sinletid=get_buffleninbetween_commands:=false;begintryfind_keywordttreelocidbpswithNot_found->letep=Stream.countsinIDENTid,set_loc_poslocbpepend|Utf8Token(_,n)->Stream.njunk()ns;lett=process_chars~diff_modettreelocbplsinbetween_commands:=false;t|EmptyStream->between_commands:=false;(EOI,set_loc_poslocbp(bp+1))(* Reify exceptions for next_token, it would be really nice if we
could actually delimit what this function raises, for now Error.E *)letnext_token~diff_modettreelocs:(Tok.t*Loc.t,Exninfo.iexn)Result.t=letf(diff_mode,ttree,loc,s)=next_token~diff_modettreelocsinCErrors.to_result~f(diff_mode,ttree,loc,s)(** {6 The lexer of Coq} *)moduleMakeLexer(Diff:sigvalmode:boolend):Gramlib.Plexing.Swithtypekeyword_state=keyword_stateandtypete=Tok.tandtype'cpattern='cTok.p=structtypenonreckeyword_state=keyword_statetypete=Tok.ttype'cpattern='cTok.plettok_pattern_eq=Tok.equal_plettok_pattern_strings=Tok.pattern_stringslettok_func?(loc=Loc.(initialToplevelInput))cs=letcur_loc=reflocinGramlib.LStream.from~loc(funttree->matchnext_token~diff_mode:Diff.modettree!cur_loccswith|Ok(tok,loc)->cur_loc:=afterloc;Some(tok,loc)|Error(exn,info)->(* If the error contains the location of the failing token,
we still need to update [cur_loc] as to resume parsing
correctly after the error. See
https://github.com/ejgallego/coq-lsp/issues/633 for an
example. *)letloc=Loc.get_locinfoinOption.iter(funloc->cur_loc:=afterloc)loc;Exninfo.iraise(exn,info))lettok_match=Tok.match_patternlettok_text=Tok.token_text(* The state of the lexer visible from outside *)moduleState=structtypet=intoption*string*bool*((int*int)*string)listletinit()=(None,"",true,[])letset(o,s,b,c)=comment_begin:=o;Buffer.clearcurrent_comment;Buffer.add_stringcurrent_comments;between_commands:=b;comments:=cletget()=(!comment_begin,Buffer.contentscurrent_comment,!between_commands,!comments)letdrop()=set(init())letget_comments(_,_,_,c)=cendendmoduleLexer=MakeLexer(structletmode=falseend)moduleLexerDiff=MakeLexer(structletmode=trueend)(** Terminal symbols interpretation *)letis_ident_not_keywordttrees=is_idents&¬(is_keywordttrees)letstrips=letlen=letrecloopilen=ifInt.equali(String.lengths)thenlenelseifs.[i]==' 'thenloop(i+1)lenelseloop(i+1)(len+1)inloop00iniflen==String.lengthsthenselselets'=Bytes.createleninletrecloopii'=ifi==String.lengthsthens'elseifs.[i]==' 'thenloop(i+1)i'elsebeginBytes.sets'i's.[i];loop(i+1)(i'+1)endinBytes.to_string(loop00)letterminalttrees=lets=stripsinlet()=matchswith""->failwith"empty token."|_->()inifis_ident_not_keywordttreesthenPIDENT(Somes)elsePKEYWORDs(* Precondition: the input is a number (c.f. [NumTok.t]) *)letterminal_numbers=matchNumTok.Unsigned.parse_stringswith|Somen->PNUMBER(Somen)|None->failwith"number token expected."