123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701(* Extensible lexer: one can define new terminals or extend existing ones but not redefine nor remove them. *)includeAutomaton(*let soc c = let x = " " in x.[0] <- c; x*)letany_char=(0,255)letinterval_diff(a1,b1)(a2,b2)=ifa1<a2thenifb1<=b2then[a1,min(a2-1)b1]else[(a1,a2-1);(b2+1,b1)]elseifb1<=b2then[]else[maxa1(b2+1),b1]letdiffcs1cs2=letauxcs1ci2=List.fold_left(funlci1->(interval_diffci1ci2)@l)[]cs1inList.fold_leftauxcs1cs2letinterval_inter(a1,b1)(a2,b2)=ifb1<a2||b2<a1thenNoneelseSome(maxa1a2,minb1b2)letregexp_of_strings=letrecauxil=ifi=-1thenlelseaux(i-1)((RE_Chars.[i])::l)inletlen=String.lengthsiniflen=0thenfailwith"Lexer generator: empty string in regular expression"elseRE_Seq(aux(len-1)[])letnorm_cscs=List.map(fun(a,b)->leta,b=int_of_chara,int_of_charbinminab,maxab)csletcheck_cscs=List.map(fun(a,b)->ifa>bthenfailwith"check_cs")csletdisjointcs=matchcswith[]|[_]->cs|_->letcs=List.sort(fun(a,_)(b,_)->compareab)csinletrecauxhlaccu=matchlwith|[]->h::accu|((a2,b2)ash2)::t->leta1,b1=hinifb1>=a2-1thenaux(a1,maxb1b2)taccuelseauxh2t(h::accu)inmatchcswithh::t->List.rev(auxht[])|_->assertfalseletstr_int_listl=letl=List.map(funi->string_of_inti)linString.concat" "lletlist_of_sets=Int_set.fold(funxl->x::l)s[]letstr_int_sets=str_int_list(list_of_sets)letstr_intc_listl=String.concat" "(List.map(fun(a,b)->(string_of_inta)^"-"^(string_of_intb))l)letstr_trans(l,n)=Printf.sprintf"\n <%d> %s"n.id(str_intc_listl)letstr_epsn=Printf.sprintf"\n <%d>"n.idletstr_trans_listl=String.concat""(List.mapstr_transl)letstr_eps_listl=String.concat""(List.mapstr_epsl)letprint_nodes=Printf.fprintf!log_channel"State [%d]\n trans=%s\n eps=%s\n matched=%s\n\n"s.id(str_trans_lists.trans)(str_eps_lists.eps)(str_int_sets.matched)letprnn=Printf.fprintf!log_channel"Node [%d]\n trans=%s\n eps=%s\n matched=%s\n\n"n.id(str_trans_listn.trans)(str_int_list(List.map(funn->n.id)n.eps))(str_int_setn.matched);nletprint_nfas=letrecauxsvisited=ifnot(Int_set.mems.idvisited)then(print_nodes;letvisited=List.fold_left(funvisited(_,n)->auxnvisited)(Int_set.adds.idvisited)s.transinList.fold_left(funvisitedn->auxnvisited)visiteds.eps)elsevisitedinlet_=auxsInt_set.emptyin()letbuild_nfatable=letrecaux=function|RE_Charc->letc=int_of_charcin(funsuccid->(*prn*){id=id;trans=[[c,c],succ];eps=[];matched=Int_set.empty},id+1)|RE_Char_setcs->letcs=disjoint(norm_cscs)in(funsuccid->(*prn*){id=id;trans=[cs,succ];eps=[];matched=Int_set.empty},id+1)|RE_Char_set_exclucs->letcs=disjoint(diff[any_char](norm_cscs))in(funsuccid->(*prn*){id=id;trans=[cs,succ];eps=[];matched=Int_set.empty},id+1)|RE_Strings->aux(regexp_of_strings)|RE_Altrl->letfl=aux_listrlin(funsuccid->letnl,id=List.fold_left(fun(nl,id)f->letn,id=fsuccidinn::nl,id)([],id)flin(*prn*){id=id;trans=[];eps=nl;matched=Int_set.empty},id+1)|RE_Seqrl->letfl=aux_listrlin(funsuccid->List.fold_left(fun(n,id)f->fnid)(succ,id)fl)|RE_Starr->letf=auxrin(funsuccid->letn={id=id;trans=[];eps=[];matched=Int_set.empty}inlets,id1=fn(id+1)inn.eps<-[s;succ];(*prn*)n,id1)|RE_Plusr->letf=auxrin(funsuccid->letn={id=id;trans=[];eps=[];matched=Int_set.empty}inlets,id1=fn(id+1)inn.eps<-[s;succ];(*prn n;*)(*prn*)s,id1)|RE_Optionr->letf=auxrin(funsuccid->lets,id=fsuccidin(*prn*){id=id;trans=[];eps=[s;succ];matched=Int_set.empty},id+1)|RE_Names->Hashtbl.findtables|RE_Eof_char->(funsuccid->(*prn*){id=id;trans=[[256,256],succ];eps=[];matched=Int_set.empty},id+1)andaux_listrl=List.fold_left(funflr->(auxr)::fl)[]rlinfunctionr->auxrletmake_nfartableregexp_idid=letf=build_nfartableinf((*prn*){id=id;trans=[];eps=[];matched=Int_set.addregexp_idInt_set.empty})(id+1)letcompile_regexp_declrl=lettable=Hashtbl.create(List.lengthrl)in(*print_endline "regexp_decl:\n";*)List.iter(fun(name,r)->(*Printf.printf "%s : %s\n" name (print_pretty_regexp r);*)letf=build_nfatablerin(*let n, _ =
f { id = 0; trans = []; eps = [];
matched = Int_set.add (-1) Int_set.empty } 1
in
print_nfa n;*)Hashtbl.addtablenamef)rl;(*print_newline ();*)tableletprint_tltl=Printf.printf"disjoint_tl called, trans list:\n";List.iter(fun(cil,n)->Printf.printf"node id: %d\n"n.id;Printf.printf" %s\n"(str_intc_listcil))tlletprint_disjoint_tl_resres=Printf.printf"disjoint_tl result:\n";List.iter(fun((a,b),nl)->Printf.printf"character interval: %d-%d\n"ab;Printf.printf" %s\n"(str_int_list(List.map(funn->n.id)nl)))resmoduleOrdered_node=structtypet=nodeletcomparen1n2=Stdlib.comparen1.idn2.idendmoduleNode_set=Set.Make(Ordered_node)letlist_of_nsns=Node_set.fold(funnl->n::l)ns[]letci_begin=Array.make257[]letci_end=Array.make257[]letdisjoint_tltlscount=List.iter(fun(il,n)->List.iter(fun(a,b)->ci_begin.(a)<-n::ci_begin.(a);ci_end.(b)<-n::ci_end.(b))il)tl;letrecaux1ires=ifi=257thenreselsematchci_begin.(i)with|[]->aux1(i+1)res|l->letnew_ns=List.fold_left(funnsn->scount.(n.id)<-scount.(n.id)+1;ifscount.(n.id)=1thenNode_set.addnnselsens)Node_set.emptylinaux2iinew_nsresandaux2iinfnsres=matchci_end.(i)with|[]->aux3(i+1)infnsres|l->letnew_ns=List.fold_left(funnsn->scount.(n.id)<-scount.(n.id)-1;ifscount.(n.id)=0thenNode_set.removennselsens)nslinletnl=list_of_nsnsinifi=256then((inf,i),nl)::reselseifNode_set.is_emptynew_nsthenaux1(i+1)(((inf,i),nl)::res)elseaux3(i+1)(i+1)new_ns(((inf,i),nl)::res)andaux3iinfnsres=matchci_begin.(i)with|[]->aux2iinfnsres|l->letnew_ns=List.fold_left(funnsn->scount.(n.id)<-scount.(n.id)+1;ifscount.(n.id)=1thenNode_set.addnnselsens)nslinifinf=ithenaux2iinew_nsreselseaux2iinew_ns(((inf,i-1),list_of_nsns)::res)inletres=aux10[]inList.iter(fun(il,_)->List.iter(fun(a,b)->ci_begin.(a)<-[];ci_end.(b)<-[])il)tl;resmoduleOrdered_int_set=structtypet=Int_set.tletcompare=Int_set.compareendmoduleState_map=Map.Make(Ordered_int_set)letprint_node_listsl=Printf.fprintf!log_channel"States list:\n\n";List.iterprint_nodeslletunion_matchednlfilter_matched=List.fold_left(funsn->Int_set.unions(filter_matchedn))Int_set.emptynlletmake_dfastate_count_arrayfilter_matched=letrecmake_state(next,sl,sm,id)((ci,nl),ids)=trylets=State_map.findidssmin([ci],s)::next,sl,sm,idwithNot_found->lets={id=id;trans=[];eps=[];matched=union_matchednlfilter_matched}inletnew_next,sl,sm,id=letsm=State_map.addidsssminmake_nextnlslsm(id+1)ins.trans<-new_next;([ci],s)::next,s::sl,sm,idandmake_nextnlslsmid=(*Printf.printf "make_next pour %d\n" (id-1); print_node_list nl;*)letl=letdtlres=(disjoint_tl(List.concat(List.map(funn->n.trans)nl))state_count_array)in(*print_disjoint_tl_res dtlres;*)List.map(fun(ci,nl)->letnl,ids=epsilon_closurenlin(ci,nl),ids)dtlresinletres=List.fold_leftmake_state([],sl,sm,id)lin(*(print_endline "make_next ends";
let a,_,_,_ = res in
Printf.printf "result = %s\n" (str_trans_list a));*)resandepsilon_closurenl=letrecaux(accu,id_set)n=ifInt_set.memn.idid_setthenaccu,id_setelseList.fold_leftaux(n::accu,Int_set.addn.idid_set)n.epsinList.fold_leftaux([],Int_set.empty)nlinfunctionnfa_start_list->(*Printf.printf "nfa_start_list length = %d\n"
(List.length nfa_start_list);*)letnl,ids=epsilon_closurenfa_start_listinletmatched=union_matchednlfilter_matchedinletstart={id=0;trans=[];eps=[];matched=matched}inletnext,sl,_,snb=letsmap=State_map.addidsstartState_map.emptyinmake_nextnl[start]smap1instart.trans<-next;start,sl,snbletwrite_interval(a,b)idec_tablenext_id=forj=i*257+atoi*257+bdodec_table.(j)<-next_iddoneletprint_dec_tabledt=Printf.fprintf!log_channel"Transition table\n";fori=0toArray.lengthdt-1doPrintf.fprintf!log_channel" (%d,%d):%d\n"(i/257)(imod257)dt.(i)done;Printf.fprintf!log_channel"\n"letprint_finalf=Printf.fprintf!log_channel"Final table\n";fori=0toArray.lengthf-1doPrintf.fprintf!log_channel" state %d:%s\n"i(str_int_listf.(i))done;Printf.fprintf!log_channel"\n\n"letmake_lexerbuild_nfa_table=function[]->letdummy_node={id=0;trans=[];eps=[];matched=Int_set.empty}in{tbl_trans=[||];tbl_final=[||];tbl_notrans=[||]},dummy_node|rl->(*Printf.printf "rl length = %d\n" (List.length rl);*)(*print_endline "main lexer:\n";*)letnfa_list,_,nfa_state_nb=List.fold_left(fun(nfa_l,regexp_id,node_id)regexp->letnfa,node_id=make_nfabuild_nfa_tableregexpregexp_idnode_idin(*Printf.printf "%d : %s\n" regexp_id (print_pretty_regexp regexp);
print_nfa nfa;*)nfa::nfa_l,(regexp_id+1),node_id)([],0,0)rlinletstate_count_array=Array.makenfa_state_nb0inletstart,sl,_=make_dfastate_count_array(funn->n.matched)nfa_listinif!dypgen_verbose>4thenprint_node_listsl;letstate_nb=List.lengthslinletdec_table=Array.make(state_nb*257)(-1)in(* This is the decision table, 256 indices for characters and
the last one for eof. *)letfinal=Array.makestate_nb[]inletnotrans=Array.makestate_nbfalseinlet_=List.iter(funn->(matchn.transwith[]->notrans.(n.id)<-true|trans_l->List.iter(function([ci],n1)->write_intervalcin.iddec_tablen1.id|_->assertfalse)trans_l);final.(n.id)<-List.sortStdlib.compare(list_of_setn.matched)(* Is it necessary to sort the list or doesn't
list_of_set already do it? *))slinif!dypgen_verbose>4then(print_dec_tabledec_table;print_finalfinal);{tbl_trans=dec_table;tbl_final=final;tbl_notrans=notrans},startletextend_lexermain_lexer_startregexp_listbuild_nfa_tablenode_nbregexp_nb=letaux_nfa(nfa_l,regexp_id,node_id)regexp=letnfa,node_id=make_nfabuild_nfa_tableregexpregexp_idnode_idinnfa::nfa_l,(regexp_id+1),node_idinletnfa_list,fst_regexp_id,nfa_state_nb=List.fold_leftaux_nfa([main_lexer_start],0,node_nb)(fstregexp_list)inletnfa_list,_,nfa_state_nb=List.fold_leftaux_nfa(nfa_list,regexp_nb+fst_regexp_id,node_nb+nfa_state_nb)(sndregexp_list)inletstate_count_array=Array.makenfa_state_nb0inletfilter_matchedn=ifn.id<node_nbthenInt_set.fold(funis->Int_set.add(i+fst_regexp_id)s)n.matchedInt_set.emptyelsen.matchedinletstart,sl,_=make_dfastate_count_arrayfilter_matchednfa_listinif!dypgen_verbose>4thenprint_node_listsl;letstate_nb=List.lengthslinletdec_table=Array.make(state_nb*257)(-1)in(* This is the decision table, 256 indices for characters and
the last one for eof. *)letfinal=Array.makestate_nb[]inletnotrans=Array.makestate_nbfalseinlet_=List.iter(funn->(matchn.transwith[]->notrans.(n.id)<-true|trans_l->List.iter(function([ci],n1)->write_intervalcin.iddec_tablen1.id|_->assertfalse)trans_l);final.(n.id)<-List.sortStdlib.compare(list_of_setn.matched)(* Is it necessary to sort the list or doesn't
list_of_set already do it? *))slinif!dypgen_verbose>4then(print_dec_tabledec_table;print_finalfinal);{tbl_trans=dec_table;tbl_final=final;tbl_notrans=notrans},startopenLexingletlexemedyplexbuf=Lexing.lexemedyplexbuf.lb_lexbufletlexeme_chardyplexbufi=Lexing.lexeme_chardyplexbuf.lb_lexbufiletlexeme_startdyplexbuf=Lexing.lexeme_startdyplexbuf.lb_lexbufletlexeme_enddyplexbuf=Lexing.lexeme_enddyplexbuf.lb_lexbufletlexeme_start_pdyplexbuf=Lexing.lexeme_start_pdyplexbuf.lb_lexbufletlexeme_end_pdyplexbuf=Lexing.lexeme_end_pdyplexbuf.lb_lexbufletflush_inputdyplexbuf=Lexing.flush_inputdyplexbuf.lb_lexbufletlex_engineis_main_lexertbl_list(lexbuf:Lexing.lexbuf)reset_start_pos=(*let input_string = dyplexbuf.lb_string in*)ifreset_start_posthen(lexbuf.lex_start_pos<-lexbuf.lex_curr_pos;lexbuf.lex_start_p<-lexbuf.lex_curr_p);(*let curr_pos = lexbuf.lex_curr_pos in*)if!dypgen_verbose>4then(Printf.printf"lex_engine begins: curr_pos = %d\n"lexbuf.lex_curr_pos);letadd_final_p,add_final=ifis_main_lexerthen(funab->a::b),(funab->a::b)else(funa_->[[List.hda]]),(fun(a,b)_->[a,[List.hdb]])inletlex_nb=List.lengthtbl_listinletrecauxstate_listfinalvalid_lex=letaux_final(final_p,valid_lex,matched)tblstate=ifstate=-1then[-1]::final_p,valid_lex,matchedelseletfinal_p,matched=matchtbl.tbl_final.(state)with|[]->[-1]::final_p,matched(* -1 is useful as a placeholder when the list will be read
in the function select_token *)|lf->if!dypgen_verbose>4then(Printf.fprintf!log_channel"add_final_p : %s\n"(str_int_listlf));add_final_plffinal_p,trueinletvalid_lex=if(trytbl.tbl_notrans.(state)with_->false)thenvalid_lex-1elsevalid_lexinfinal_p,valid_lex,matchedinletfinal_p,valid_lex,matched=List.fold_left2aux_final([],valid_lex,false)tbl_liststate_listinletfinal=ifmatchedthenletabs_curr_pos=lexbuf.lex_abs_pos+lexbuf.lex_curr_posinadd_final(abs_curr_pos,final_p)final(* The position here is useful information: in lex_token
lex_curr_p.pos_cnum will be set to the position recorded here. *)elsefinalinifvalid_lex=0thenmatchfinalwith_::_->final|[]->failwith("lexing: empty token")elseletc=letb=(lexbuf.lex_curr_pos=lexbuf.lex_buffer_len)inifbthenlexbuf.refill_bufflexbuf;ifb&&lexbuf.lex_eof_reachedthen256elseletp=lexbuf.lex_curr_posinlexbuf.lex_curr_pos<-p+1;if!dypgen_verbose>4then(Printf.fprintf!log_channel"lex_engine reads: `%c'\n"(Bytes.getlexbuf.lex_bufferp));tryChar.code(Bytes.getlexbuf.lex_bufferp)withInvalid_argument_->(Printf.printf"%d, %d, %s, %d, %d\n"lexbuf.lex_curr_poslexbuf.lex_buffer_len(string_of_boolreset_start_pos)p(Bytes.lengthlexbuf.lex_buffer);raise(Invalid_argument("index out of bounds")))inletaux_lex(new_state_list,valid_lex)tblstate=ifstate=-1then((*Printf.printf "next_state = -1\n";*)(-1)::new_state_list,valid_lex)elseletnext_state=tbl.tbl_trans.(state*257+c)inif!dypgen_verbose>4then(Printf.fprintf!log_channel"next_state = %d\n"next_state);letvalid_lex=if(try(next_state=-1&¬tbl.tbl_notrans.(state))with_->false)thenvalid_lex-1elsevalid_lexinnext_state::new_state_list,valid_lexinletnew_state_list,valid_lex=List.fold_left2aux_lex([],valid_lex)tbl_liststate_listinletnew_state_list=List.revnew_state_listinifvalid_lex=0thenmatchfinalwith_::_->final|[]->failwith("lexing: empty token")elseauxnew_state_listfinalvalid_lexinletl0=List.map(fun_->0)tbl_listinauxl0[]lex_nbletlexlexer_nameargldyplexbuf=lettable=Hashtbl.finddyplexbuf.lb_aux_lex.aux_lexer_tablelexer_nameinmatchlex_enginefalse[table]dyplexbuf.lb_lexbuftruewith[p,[[final]]]->(*dyplexbuf.lb_curr_p <- { dyplexbuf.lb_curr_p with Lexing.pos_cnum = p };*)dyplexbuf.lb_lexbuf.lex_curr_p<-{dyplexbuf.lb_lexbuf.lex_curr_pwithLexing.pos_cnum=p};dyplexbuf.lb_lexbuf.lex_curr_pos<-p-dyplexbuf.lb_lexbuf.lex_abs_pos;(*Printf.printf "lex: curr_pos = %d\n" p;*)letaction=(Hashtbl.finddyplexbuf.lb_aux_lex.aux_lexer_actionslexer_name).(final)inactionargldyplexbuf|_->assertfalseletzero_position={Lexing.pos_fname="";Lexing.pos_lnum=0;Lexing.pos_bol=0;Lexing.pos_cnum=0;}(*let make_lexbuf pp str = {
lb_curr_p = zero_position;
lb_start_p = zero_position;
lb_string = str;
lb_aux_lex = (fst pp).aux_lexer }*)letfrom_stringppstr={lb_lexbuf=Lexing.from_stringstr;lb_aux_lex=pp.pp_dev.aux_lexer}letfrom_channelppic={lb_lexbuf=Lexing.from_channelic;lb_aux_lex=pp.pp_dev.aux_lexer}letfrom_functionppf={lb_lexbuf=Lexing.from_functionf;lb_aux_lex=pp.pp_dev.aux_lexer}letdyplex_lexbuf_positiondyplexbuf=dyplexbuf.lb_lexbuf.lex_start_p,dyplexbuf.lb_lexbuf.lex_curr_pletstd_lexbufdyplexbuf=dyplexbuf.lb_lexbufletset_newlinedyplexbuf=letl=std_lexbufdyplexbufinletpos=l.lex_curr_pinletnpos={poswithpos_lnum=pos.pos_lnum+1;pos_bol=pos.pos_cnum}inl.lex_curr_p<-nposletset_fnamedyplexbuffname=letl=std_lexbufdyplexbufinletpos=l.lex_curr_pinletnpos={poswithpos_fname=fname}inl.lex_curr_p<-npos;letpos=l.lex_start_pinletnpos={poswithpos_fname=fname}inl.lex_start_p<-npos