123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411(***********************************************************************)(* *)(* ocamlbuild *)(* *)(* Nicolas Pouillard, Berke Durak, projet Gallium, INRIA Rocquencourt *)(* *)(* Copyright 2007 Institut National de Recherche en Informatique et *)(* en Automatique. All rights reserved. This file is distributed *)(* under the terms of the GNU Library General Public License, with *)(* the special exception on linking described in file ../LICENSE. *)(* *)(***********************************************************************)(* Original author: Berke Durak *)(* Glob *)openFormula;;includeAst;;openLexer;;letsf=Printf.sprintf;;letbrute_limit=10;;(*** string_of_token *)letstring_of_token=function|ATOM_->"ATOM"|AND->"AND"|OR->"OR"|NOT->"NOT"|LPAR->"LPAR"|RPAR->"RPAR"|TRUE->"TRUE"|FALSE->"FALSE"|EOF->"EOF";;(* ***)(*** match_character_class *)letmatch_character_classclc=Formula.evalbeginfunction(c1,c2)->c1<=c&&c<=c2endcl;;(* ***)(*** NFA *)moduleNFA=structtypetransition=|QCLASSofcharacter_class|QEPSILON;;moduleIS=Set.Make(structtypet=intletcompare(x:t)y=comparexyend);;moduleISM=Map.Make(structtypet=IS.tletcompare=IS.compareend);;typemachine={mc_qi:IS.t;mc_table:(character_class*IS.t)listarray;mc_qf:int;mc_power_table:(char,IS.tISM.t)Hashtbl.t}(*** build' *)letbuild'p=letcount=ref0inlettransitions=ref[]inletepsilons:(int*int)listref=ref[]inletstate()=letid=!countinincrcount;idinlet(-->)q1tq2=matchtwith|QEPSILON->epsilons:=(q1,q2)::!epsilons;q1|QCLASScl->transitions:=(q1,cl,q2)::!transitions;q1in(* Build the transitions corresponding to the given pattern and arriving
* on state qf. Return the original state. *)letrecloopqf=function|Epsilon->qf|Wordu->letm=String.lengthuinletq0=state()inletrecloopqi=ifi=mthenq0elsebeginletq'=ifi=m-1thenqfelsestate()inlet_=(q-->QCLASS(Atom(u.[i],u.[i])))q'inloopq'(i+1)endinloopq00|Classcl->letq1=state()in(q1-->QCLASScl)qf|Starp->(* The fucking Kleene star *)letq2=state()inletq1=loopq2pin(* q1 -{p}-> q2 *)let_=(q1-->QEPSILON)qfinlet_=(q2-->QEPSILON)q1inlet_=(q2-->QEPSILON)q1inq1|Concat(p1,p2)->letq12=state()inletq1=loopq12p1in(* q1 -{p1}-> q12 *)letq2=loopqfp2in(* q2 -{p2}-> qf *)let_=(q12-->QEPSILON)q2inq1|Unionpl->letqi=state()inList.iterbeginfunp->letq=loopqfpin(* q -{p2}-> qf *)let_=(qi-->QEPSILON)qin(* qi -{}---> q *)()endpl;qiinletqf=state()inletqi=loopqfpinletm=!countin(* Compute epsilon closure *)letgraph=Array.makemIS.emptyinList.iterbeginfun(q,q')->graph.(q)<-IS.addq'graph.(q)end!epsilons;letclosure=Array.makemIS.emptyinletrectransitivepast=function|[]->past|q::future->letpast'=IS.addqpastinletfuture'=IS.foldbeginfunq'future'->(* q -{}--> q' *)ifIS.memq'past'thenfuture'elseq'::future'endgraph.(q)futureintransitivepast'future'infori=0tom-1doclosure.(i)<-transitiveIS.empty[i](* O(n^2), I know *)done;(* Finally, build the table *)lettable=Array.makem[]inList.iterbeginfun(q,t,q')->table.(q)<-(t,closure.(q'))::table.(q)end!transitions;(graph,closure,{mc_qi=closure.(qi);mc_table=table;mc_qf=qf;mc_power_table=Hashtbl.create37});;letbuildx=let(_,_,machine)=build'xinmachine;;(* ***)(*** run *)letrun?(trace=false)machineu=letm=String.lengthuinletapplyqsc=trylett=Hashtbl.findmachine.mc_power_tablecinISM.findqstwith|Not_found->letqs'=IS.foldbeginfunqqs'->List.fold_leftbeginfunqs'(cl,qs'')->ifmatch_character_classclcthenIS.unionqs'qs''elseqs'endqs'machine.mc_table.(q)endqsIS.emptyinlett=tryHashtbl.findmachine.mc_power_tablecwith|Not_found->ISM.emptyinHashtbl.replacemachine.mc_power_tablec(ISM.addqsqs't);qs'inletrecloopqsi=ifIS.is_emptyqsthenfalseelsebeginifi=mthenIS.memmachine.mc_qfqselsebeginletc=u.[i]iniftracethenbeginPrintf.printf"%d %C {"ic;IS.iter(funq->Printf.printf" %d"q)qs;Printf.printf" }\n%!"end;letqs'=applyqscinloopqs'(i+1)endendinloopmachine.mc_qi0;;(* ***)end;;(* ***)(*** Brute *)moduleBrute=structexceptionToo_hard;;(*** match_pattern *)letmatch_patterncounterpu=letm=String.lengthuin(* [loop i n p] returns [true] iff the word [u.(i .. i + n - 1)] is in the
* language generated by the pattern [p].
* We must have 0 <= i and i + n <= m *)letrecloop(i,n,p)=assert(0<=i&&0<=n&&i+n<=m);incrcounter;if!counter>=brute_limitthenraiseToo_hard;matchpwith|Wordv->String.lengthv=n&&beginletreccheckj=j=n||(v.[j]=u.[i+j]&&check(j+1))incheck0end|Epsilon->n=0|Star(ClassTrue)->true|Star(Classcl)->letreccheckk=ifk=nthentrueelse(match_character_classclu.[i+k])&&check(k+1)incheck0|Star_->raiseToo_hard|Classcl->n=1&&match_character_classclu.[i]|Concat(p1,p2)->letrecscanj=j<=n&&((loop(i,j,p1)&&loop(i+j,n-j,p2))||scan(j+1))inscan0|Unionpl->List.exists(funp'->loop(i,n,p'))plinloop(0,m,p);;(* ***)end;;(* ***)(*** fast_pattern_contents, fast_pattern, globber *)typefast_pattern_contents=|Bruteofintref*pattern|MachineofNFA.machine;;typefast_pattern=fast_pattern_contentsref;;typeglobber=fast_patternatomFormula.t;;(* ***)(*** fast_pattern_of_pattern *)letfast_pattern_of_patternp=ref(Brute(ref0,p));;(* ***)(*** add_dir *)letadd_dirdirx=matchdirwith|None->x|Some(dir)->matchxwith|Constant(s)->Constant(Filename.concatdirs)|Pattern(p)->Pattern(Concat(Word(Filename.concatdir""),p));;(* ***)(*** add_ast_dir *)letadd_ast_dirdirx=matchdirwith|None->x|Somedir->letslash=Class(Atom('/','/'))inletany=ClassTrueinletq=Union[Epsilon;Concat(slash,Starany)]in(* ( /** )? *)And[Atom(Pattern(ref(Brute(ref0,Concat(Worddir,q)))));x];;(* ***)(*** parse *)letparse?diru=letl=Lexing.from_stringuinlettok=refNoneinletf=fun()->match!tokwith|None->tokenl|Somex->tok:=None;xinletgt=match!tokwith|None->tok:=Somet|Somet'->raise(Parse_error(sf"Trying to unput token %s while %s is active"(string_of_tokent)(string_of_tokent')))inletreadx=lety=f()inifx=ythen()elseraise(Parse_error(sf"Unexpected token, expecting %s, got %s"(string_of_tokenx)(string_of_tokeny)))inletrecatomizercontinuation=matchf()with|NOT->atomizer(funx->continuation(Notx))|ATOMx->beginleta=matchadd_dirdirxwith|Constantu->Constantu|Patternp->Pattern(fast_pattern_of_patternp)incontinuation(Atoma)end|TRUE->continuationTrue|FALSE->continuationFalse|LPAR->lety=parse_s()inreadRPAR;continuationy|t->raise(Parse_error(sf"Unexpected token %s in atomizer"(string_of_tokent)))andparse_s1x=matchf()with|OR->lety=parse_s()inOr[x;y]|AND->parse_tx|t->gt;xandparse_t1xy=matchf()with|OR->letz=parse_s()inOr[And[x;y];z]|AND->parse_t(And[x;y])|t->gt;And[x;y]andparse_s()=atomizerparse_s1andparse_tx=atomizer(parse_t1x)inletx=parse_s()inreadEOF;add_ast_dirdirx;;(* ***)(*** eval *)letevalgu=Formula.evalbeginfunction|Constantv->u=v|Patternkind->match!kindwith|Brute(count,p)->beginletdo_nfa()=letm=NFA.buildpinkind:=Machinem;NFA.runmuinif!count>=brute_limitthendo_nfa()elsetryBrute.match_patterncountpuwith|Brute.Too_hard->do_nfa()end|Machinem->NFA.runmuendg(* ***)(*** Debug *)(*let (Atom(Pattern x)) = parse "<{a,b}>";;
#install_printer IS.print;;
#install_printer ISM.print;;
let (graph, closure, machine) = build' x;;*)(* ***)