123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793openImportopenPpx_sexp_conv_libmodulePublic=structtypestack=|Empty|Openofstack|SexpofSexp.t*stackletempty_stack=Emptytypestack_cst=(* at top-level *)|Empty(* after the given sexp or comment *)|T_or_commentofCst.t_or_comment*stack_cst(* after the opening paren *)|OpenofPositions.pos*stack_cst(* Similar to [ignoring] below, In_sexp_comment only indicates if the next
s-expression is to be commented out, but if we are nested below parens below an
sexp comment, the stack would look like [Open (.., In_sexp_comment ..)]. *)|In_sexp_commentof{hash_semi_pos:Positions.pos;rev_comments:Cst.commentlist;stack:stack_cst}letempty_stack_cst=Emptytypestate_cst={token_buffer:Buffer.t;(* Starting positions of the current token *)mutabletoken_start_pos:Positions.pos}type('u,'s)kind=|Positions:(Positions.Builder.t,unit)kind|Sexp:(unit,stack)kind|Sexp_with_positions:(Positions.Builder.t,stack)kind|Cst:(state_cst,stack_cst)kindtype('u,'s)state={mutableautomaton_state:int;kind:('u,'s)kind;mutabledepth:int;(* Number of opened #| when parsing a block comment *)mutableblock_comment_depth:int;(* Stack of ignoring depths; the current depth is pushed
each time a #; comment is entered. *)mutableignoring_stack:intlist;(* When parsing an escape sequence of the form "\\NNN" or "\\XX", this accumulates
the computed number *)mutableescaped_value:int;(* Buffer for accumulating atoms *)atom_buffer:Buffer.t;user_state:'u;mode:('u,'s)mode;mutablefull_sexps:int;mutableoffset:int(* global offset *);mutableline_number:int;mutablebol_offset:int(* offset of beginning of line *)}and('u,'s)mode=|Single|Many|Eagerof{got_sexp:('u,'s)state->'s->'s;mutableno_sexp_is_error:bool}letinitial_user_state:typeus.(u,s)kind->Positions.pos->u=funkindinitial_pos->matchkindwith|Positions->Positions.Builder.create~initial_pos()|Sexp->()|Sexp_with_positions->Positions.Builder.create~initial_pos()|Cst->(* [token_start_pos] is set to a dummy location here. It is properly set when we
start to capture a token from the input *){token_buffer=Buffer.create128;token_start_pos=Positions.beginning_of_file}(* these magic numbers are checked in gen_parser_automaton.ml:
let () = assert (initial = 0)
let () = assert (to_int Error = 1) *)letinitial_state=0leterror_state=1letnew_state?(initial_pos=Positions.beginning_of_file)modekind={kind=kind;depth=0;automaton_state=initial_state;block_comment_depth=0;ignoring_stack=[];escaped_value=0;atom_buffer=Buffer.create128;user_state=initial_user_statekindinitial_pos;mode=mode;full_sexps=0;offset=initial_pos.offset;line_number=initial_pos.line;bol_offset=initial_pos.offset-initial_pos.col}letmodet=t.modeletpositionst=Positions.Builder.contentst.user_stateletatom_buffert=t.atom_bufferletoffsetstate=state.offsetletlinestate=state.line_numberletcolumnstate=state.offset-state.bol_offsetletpositiont={Positions.col=columnt;line=linet;offset=offsett}letreset_user_state:typeus.(u,s)state->unit=funt->matcht.kindwith|Positions->Positions.Builder.resett.user_state(positiont)|Sexp->()|Sexp_with_positions->Positions.Builder.resett.user_state(positiont)|Cst->Buffer.cleart.user_state.token_bufferletreset?(pos=Positions.beginning_of_file)t=t.depth<-0;t.automaton_state<-initial_state;t.block_comment_depth<-0;t.ignoring_stack<-[];t.escaped_value<-0;t.full_sexps<-0;t.offset<-pos.offset;t.line_number<-pos.line;t.bol_offset<-pos.offset-pos.col;reset_user_statet;Buffer.cleart.atom_buffertypecontext=Sexp_comment|Sexpletis_ignoringstate=matchstate.ignoring_stackwith|_::_->true|[]->falseletis_not_ignoringstate=not(is_ignoringstate)letcontextstate=ifis_not_ignoringstatethenSexpelseSexp_commentlethas_unclosed_parenstate=state.depth>0letset_error_statestate=state.automaton_state<-error_statemoduleOld_parser_cont_state=structtypet=|Parsing_toplevel_whitespace|Parsing_nested_whitespace|Parsing_atom|Parsing_list|Parsing_sexp_comment|Parsing_block_comment[@@deriving_inlinesexp_of]letsexp_of_t=(function|Parsing_toplevel_whitespace->Ppx_sexp_conv_lib.Sexp.Atom"Parsing_toplevel_whitespace"|Parsing_nested_whitespace->Ppx_sexp_conv_lib.Sexp.Atom"Parsing_nested_whitespace"|Parsing_atom->Ppx_sexp_conv_lib.Sexp.Atom"Parsing_atom"|Parsing_list->Ppx_sexp_conv_lib.Sexp.Atom"Parsing_list"|Parsing_sexp_comment->Ppx_sexp_conv_lib.Sexp.Atom"Parsing_sexp_comment"|Parsing_block_comment->Ppx_sexp_conv_lib.Sexp.Atom"Parsing_block_comment":t->Ppx_sexp_conv_lib.Sexp.t)[@@@end]endmoduleError=structtypet={position:Positions.pos;message:string;old_parser_exn:[`Parse_error|`Failure]}letsexp_of_t{position;message;old_parser_exn=_}:Sexp.t=List[List[Atom"position";Positions.sexp_of_posposition];List[Atom"message";sexp_of_stringmessage]]letpositiont=t.positionletmessaget=t.messageletold_parser_exnt=t.old_parser_exnendexceptionParse_errorofError.t[@@deriving_inlinesexp]let()=Ppx_sexp_conv_lib.Conv.Exn_converter.add([%extension_constructorParse_error])(function|Parse_errorv0->letv0=Error.sexp_of_tv0inPpx_sexp_conv_lib.Sexp.List[Ppx_sexp_conv_lib.Sexp.Atom"parser_automaton_internal.ml.Public.Parse_error";v0]|_->assertfalse)[@@@end]letsexp_of_stack:stack->Sexp.t=function|Sexp(sexp,Empty)->sexp|_->failwith"Parser_automaton.sexp_of_stack"letsexps_of_stack=letrecloopacc:stack->Sexp.tlist=function|Empty->acc|Open_->failwith"Parser_automaton.sexps_of_stack"|Sexp(sexp,stack)->loop(sexp::acc)stackinfunstack->loop[]stackletsexps_cst_of_stack=letrecloopacc(stack:stack_cst)=matchstackwith|Empty->acc|T_or_comment(t,stack)->loop(t::acc)stack|Open_|In_sexp_comment_->failwith"Parser_automaton.sexps_cst_of_stack"infunstack->loop[]stackletautomaton_statestate=state.automaton_stateendopenPublicmoduleError=structincludeErrormoduleReason=struct(* To be kept in sync with the Error module in gen/gen_parser_automaton.ml *)typet=|Unexpected_char_parsing_hex_escape|Unexpected_char_parsing_dec_escape|Unterminated_quoted_string|Unterminated_block_comment|Escape_sequence_out_of_range|Unclosed_paren|Too_many_sexps|Closed_paren_without_opened|Comment_token_in_unquoted_atom|Sexp_comment_without_sexp|Unexpected_character_after_cr|No_sexp_found_in_input|Automaton_in_error_stateendletraise:typeab.(a,b)state->at_eof:bool->Reason.t->_=funstate~at_eofreason->set_error_statestate;letmessage=(* These messages where choosen such that we can build the various Sexplib parsing
functions on top of Parsexp and keep the same exceptions.
At the time of writing this, a simple layer on top of parsexp to implement the
sexplib API is passing all the sexplib tests.
Note that parsexp matches the semantic of Sexp.parse which is slightly
different from the ocamllex/ocamlyacc based parser of Sexplib. The latter one
is less tested and assumed to be less used. *)matchreasonwith|Unexpected_char_parsing_hex_escape->"unterminated hexadecimal escape sequence"|Unexpected_char_parsing_dec_escape->"unterminated decimal escape sequence"|Unterminated_quoted_string->"unterminated quoted string"|Unterminated_block_comment->"unterminated block comment"|Escape_sequence_out_of_range->"escape sequence in quoted string out of range"|Unclosed_paren->"unclosed parentheses at end of input"|Too_many_sexps->"s-expression followed by data"|Closed_paren_without_opened->"unexpected character: ')'"|Comment_token_in_unquoted_atom->ifString.equal(Buffer.contentsstate.atom_buffer)"|"then"illegal end of comment"else"comment tokens in unquoted atom"|Sexp_comment_without_sexp->"unterminated sexp comment"|Unexpected_character_after_cr->ifat_eofthen"unexpected end of input after carriage return"else"unexpected character after carriage return"|No_sexp_found_in_input->"no s-expression found in input"|Automaton_in_error_state->failwith"Parsexp.Parser_automaton: parser is dead"inletold_parser_exn=matchreason,at_eofwith|Too_many_sexps,_|_,true->`Failure|Comment_token_in_unquoted_atom,_whenString.equal(Buffer.contentsstate.atom_buffer)"|"->`Failure|_->`Parse_errorinletposition:Positions.pos={line=state.line_number;col=state.offset-state.bol_offset;offset=state.offset}inraise(Parse_error{position;message;old_parser_exn})endtypenonreccontext=context=Sexp_comment|Sexpletcontext=contexttype('u,'s)action=('u,'s)state->char->'s->'stype('u,'s)epsilon_action=('u,'s)state->'s->'sletcurrent_pos?(delta=0)state:Positions.pos=letoffset=state.offset+deltain{line=state.line_number;col=offset-state.bol_offset;offset=offset}letset_automaton_statestatex=state.automaton_state<-xletadvancestate=state.offset<-state.offset+1letadvance_eol:typeus.(u,s)state->unit=funstate->letnewline_offset=state.offsetinstate.offset<-newline_offset+1;state.bol_offset<-state.offset;state.line_number<-state.line_number+1;matchstate.kindwith|Positions->Positions.Builder.add_newlinestate.user_state~offset:newline_offset|Sexp_with_positions->Positions.Builder.add_newlinestate.user_state~offset:newline_offset|_->()letblock_comment_depthstate=state.block_comment_depthletadd_token_char:typeus.(u,s)action=funstatecharstack->matchstate.kindwith|Cst->Buffer.add_charstate.user_state.token_bufferchar;stack|_->stackletadd_atom_charstatecstack=Buffer.add_charstate.atom_bufferc;stackletadd_quoted_atom_charstatecstack=Buffer.add_charstate.atom_bufferc;add_token_charstatecstackletcheck_new_sexp_allowedstate=letis_single=matchstate.modewithSingle->true|_->falseinifis_single&&state.full_sexps>0&&is_not_ignoringstatethenError.raisestate~at_eof:falseToo_many_sexpsletadd_posstate~delta=Positions.Builder.addstate.user_state~offset:(state.offset+delta)letadd_first_char:typeus.(u,s)action=funstatecharstack->check_new_sexp_allowedstate;Buffer.add_charstate.atom_bufferchar;(* For non-quoted atoms, we save both positions at the end. We can always determine the
start position from the end position and the atom length for non-quoted atoms.
Doing it this way allows us to detect single characater atoms for which we need to
save the position twice. *)stackleteps_add_first_char_hash:typeus.(u,s)epsilon_action=funstatestack->check_new_sexp_allowedstate;Buffer.add_charstate.atom_buffer'#';stackletstart_quoted_string:typeus.(u,s)action=funstate_charstack->check_new_sexp_allowedstate;matchstate.kindwith|Positions->ifis_not_ignoringstatethenadd_posstate~delta:0;stack|Sexp_with_positions->ifis_not_ignoringstatethenadd_posstate~delta:0;stack|Cst->state.user_state.token_start_pos<-current_posstate;Buffer.add_charstate.user_state.token_buffer'"';stack|Sexp->stackletadd_escapedstatecstack=letc'=matchcwith|'n'->'\n'|'r'->'\r'|'b'->'\b'|'t'->'\t'|'\\'|'\''|'"'->c|_->Buffer.add_charstate.atom_buffer'\\';cinBuffer.add_charstate.atom_bufferc';add_token_charstatecstackleteps_add_escaped_crstatestack=Buffer.add_charstate.atom_buffer'\r';stackletdec_valc=Char.codec-Char.code'0'lethex_valc=matchcwith|'0'..'9'->Char.codec-Char.code'0'|'a'..'f'->Char.codec-Char.code'a'+10|_->Char.codec-Char.code'A'+10letadd_dec_escape_charstatecstack=state.escaped_value<-state.escaped_value*10+dec_valc;add_token_charstatecstackletadd_last_dec_escape_charstatecstack=letvalue=state.escaped_value*10+dec_valcinstate.escaped_value<-0;ifvalue>255thenError.raisestate~at_eof:falseEscape_sequence_out_of_range;Buffer.add_charstate.atom_buffer(Char.chrvalue);add_token_charstatecstackletcomment_add_last_dec_escape_charstatecstack=letvalue=state.escaped_value*10+dec_valcinstate.escaped_value<-0;ifvalue>255thenError.raisestate~at_eof:falseEscape_sequence_out_of_range;add_token_charstatecstackletadd_hex_escape_charstatecstack=state.escaped_value<-(state.escaped_valuelsl4)lorhex_valc;add_token_charstatecstackletadd_last_hex_escape_charstatecstack=letvalue=(state.escaped_valuelsl4)lorhex_valcinstate.escaped_value<-0;Buffer.add_charstate.atom_buffer(Char.chrvalue);add_token_charstatecstackletopening:typeus.(u,s)state->char->s->s=funstate_charstack->check_new_sexp_allowedstate;state.depth<-state.depth+1;matchstate.kindwith|Positions->ifis_not_ignoringstatethenadd_posstate~delta:0;stack|Sexp->ifis_not_ignoringstatethenOpenstackelsestack|Sexp_with_positions->ifis_not_ignoringstatethenbeginadd_posstate~delta:0;Openstackendelsestack|Cst->Open(current_posstate,stack)letdo_reset_positionsstate=Positions.Builder.resetstate.user_state{line=state.line_number;col=state.offset-state.bol_offset;offset=state.offset}letreset_positions:typeus.(u,s)state->unit=funstate->matchstate.kindwith|Positions->do_reset_positionsstate|Sexp_with_positions->do_reset_positionsstate|Sexp->()|Cst->()lettoplevel_sexp_or_comment_addedstatestack~delta=matchstate.modewith|Single|Many->stack|Eager{got_sexp=f;_}->(* Modify the offset so that [f] get a state pointing to the end of the current
s-expression *)letsaved_offset=state.offsetinstate.offset<-state.offset+delta;letsaved_full_sexps=state.full_sexpsinmatchfstatestackwith|exceptione->set_error_statestate;raisee|stack->(* This assert is not a full protection against the user mutating the state but
it should catch most cases. *)assert(state.offset=saved_offset+delta&&state.full_sexps=saved_full_sexps);state.offset<-saved_offset;reset_positionsstate;stackletis_top_levelstate=is_not_ignoringstate&&state.depth=0letcomment_added_assuming_cststatestack~delta=ifis_top_levelstatethentoplevel_sexp_or_comment_addedstatestack~deltaelsestackletmaybe_pop_ignoring_stackstate=matchstate.ignoring_stackwith|inner_comment_depth::_tlwheninner_comment_depth>state.depth->Error.raisestate~at_eof:falseSexp_comment_without_sexp;|inner_comment_depth::tlwheninner_comment_depth=state.depth->state.ignoring_stack<-tl;true|_->falseletsexp_added:typeus.(u,s)state->s->delta:int->s=funstatestack~delta->letis_comment=maybe_pop_ignoring_stackstateinifis_top_levelstatethenbeginifnotis_commentthenstate.full_sexps<-state.full_sexps+1;ifnotis_comment||(matchstate.kindwithCst->true|_->false)thentoplevel_sexp_or_comment_addedstatestack~deltaelsestackendelsestackletrecmake_listacc:stack->stack=function|Empty->assertfalse|Openstack->Sexp(Listacc,stack)|Sexp(sexp,stack)->make_list(sexp::acc)stackletadd_comment_to_stack_cstcomment(stack:stack_cst):stack_cst=matchstackwith|In_sexp_commentr->In_sexp_comment{rwithrev_comments=comment::r.rev_comments}|_->T_or_comment(Commentcomment,stack)letadd_sexp_to_stack_cstsexp:stack_cst->stack_cst=function|In_sexp_comment{hash_semi_pos;rev_comments;stack}->letcomment:Cst.comment=Sexp_comment{hash_semi_pos;comments=List.revrev_comments;sexp}inadd_comment_to_stack_cstcommentstack|stack->T_or_comment(Sexpsexp,stack)letrecmake_list_cstend_posacc:stack_cst->stack_cst=function|T_or_comment(t,stack)->make_list_cstend_pos(t::acc)stack|Open(start_pos,stack)->letsexp:Cst.t=List{loc={start_pos;end_pos};elements=acc}inadd_sexp_to_stack_cstsexpstack|Empty|In_sexp_comment_->assertfalseletclosing:typeus.(u,s)state->char->s->s=funstate_charstack->ifstate.depth>0thenbeginletstack=matchstate.kindwith|Positions->(* Note we store end positions as inclusive in [Positions.t], so we use [delta:0],
while in the [Cst] case we save directly the final ranges, so we use
[delta:1]. *)ifis_not_ignoringstatethenadd_posstate~delta:0;stack|Sexp->ifis_not_ignoringstatethenmake_list[]stackelsestack|Sexp_with_positions->ifis_not_ignoringstatethenbeginadd_posstate~delta:0;make_list[]stackendelsestack|Cst->make_list_cst(current_posstate~delta:1)[]stackinstate.depth<-state.depth-1;sexp_addedstatestack~delta:1endelseError.raisestate~at_eof:falseClosed_paren_without_openedletmake_loc?(delta=0)state:Positions.range={start_pos=state.user_state.token_start_pos;end_pos=current_posstate~delta}(* This is always called on the position exactly following the last character of a
non-quoted atom *)letadd_non_quoted_atom_posstate~atom=letlen=String.lengthatominiflen=1thenPositions.Builder.add_twicestate.user_state~offset:(state.offset-1)elsebeginadd_posstate~delta:(-len);add_posstate~delta:(-1);endleteps_push_atom:typeus.(u,s)epsilon_action=funstatestack->letstr=Buffer.contentsstate.atom_bufferinBuffer.clearstate.atom_buffer;letstack=matchstate.kindwith|Positions->ifis_not_ignoringstatethenadd_non_quoted_atom_posstate~atom:str;stack|Sexp->ifis_not_ignoringstatethenSexp(Atomstr,stack)elsestack|Sexp_with_positions->ifis_not_ignoringstatethenbeginadd_non_quoted_atom_posstate~atom:str;Sexp(Atomstr,stack)endelsestack|Cst->letloc:Positions.range={start_pos=current_posstate~delta:(-(String.lengthstr));end_pos=current_posstate~delta:0}inletsexp:Cst.t=Atom{loc;atom=str;unescaped=Somestr}inadd_sexp_to_stack_cstsexpstackinsexp_addedstatestack~delta:0letpush_quoted_atom:typeus.(u,s)action=funstate_charstack->letstr=Buffer.contentsstate.atom_bufferinBuffer.clearstate.atom_buffer;letstack=matchstate.kindwith|Positions->ifis_not_ignoringstatethenadd_posstate~delta:0;stack|Sexp->ifis_not_ignoringstatethenSexp(Atomstr,stack)elsestack|Sexp_with_positions->ifis_not_ignoringstatethenbeginadd_posstate~delta:0;Sexp(Atomstr,stack)endelsestack|Cst->letbuf=state.user_state.token_bufferinBuffer.add_charbuf'"';lets=Buffer.contentsbufinBuffer.clearbuf;letsexp:Cst.t=Atom{loc=make_locstate~delta:1;atom=str;unescaped=Somes}inadd_sexp_to_stack_cstsexpstackinsexp_addedstatestack~delta:1letstart_sexp_comment:typeus.(u,s)action=funstate_charstack->state.ignoring_stack<-state.depth::state.ignoring_stack;matchstate.kindwith|Cst->In_sexp_comment{hash_semi_pos=current_posstate~delta:(-1);rev_comments=[];stack}|_->stackletstart_block_comment:typeus.(u,s)state->char->s->s=funstatecharstack->state.block_comment_depth<-state.block_comment_depth+1;matchstate.kindwith|Positions->stack|Sexp->stack|Sexp_with_positions->stack|Cst->ifstate.block_comment_depth=1thenbeginstate.user_state.token_start_pos<-current_posstate~delta:(-1);Buffer.add_charstate.user_state.token_buffer'#'end;Buffer.add_charstate.user_state.token_bufferchar;stackletend_block_comment:typeus.(u,s)state->char->s->s=funstatecharstack->state.block_comment_depth<-state.block_comment_depth-1;matchstate.kindwith|Positions->stack|Sexp->stack|Sexp_with_positions->stack|Cst->letbuf=state.user_state.token_bufferinBuffer.add_charbufchar;ifstate.block_comment_depth=0thenbeginlets=Buffer.contentsbufinBuffer.clearbuf;letcomment:Cst.comment=Plain_comment{loc=make_locstate~delta:1;comment=s}inletstack=add_comment_to_stack_cstcommentstackincomment_added_assuming_cststatestack~delta:1endelsestackletstart_line_comment:typeus.(u,s)action=funstatecharstack->matchstate.kindwith|Cst->state.user_state.token_start_pos<-current_posstate;Buffer.add_charstate.user_state.token_bufferchar;stack|_->stackletend_line_comment:typeus.(u,s)epsilon_action=funstatestack->matchstate.kindwith|Positions->stack|Sexp->stack|Sexp_with_positions->stack|Cst->letbuf=state.user_state.token_bufferinlets=Buffer.contentsbufinBuffer.clearbuf;letcomment:Cst.comment=Plain_comment{loc=make_locstate;comment=s}inletstack=add_comment_to_stack_cstcommentstackincomment_added_assuming_cststatestack~delta:0leteps_eoi_check:typeus.(u,s)epsilon_action=funstatestack->ifstate.depth>0thenError.raisestate~at_eof:trueUnclosed_paren;ifis_ignoringstatethenError.raisestate~at_eof:trueSexp_comment_without_sexp;ifstate.full_sexps=0then(matchstate.modewith|Many|Eager{no_sexp_is_error=false;_}->()|Single|Eager{no_sexp_is_error=true;_}->Error.raisestate~at_eof:trueNo_sexp_found_in_input);stack