123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352(*
RE - A regular expression library
Copyright (C) 2001 Jerome Vouillon
email: Jerome.Vouillon@pps.jussieu.fr
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation, with
linking exception; either version 2.1 of the License, or (at
your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*)moduleRe=CoreexceptionParse_errortypeenclosed=|Charofchar|Rangeofchar*chartypepiece=|Exactlyofchar|Any_ofofenclosedlist|Any_butofenclosedlist|One|Many|ManyManytypet=piecelistletof_string~double_asterisks:t=leti=ref0inletl=String.lengthsinleteos()=!i=linletreadc=letr=not(eos())&&s.[!i]=cinifrthenincri;rin(**
[read_ahead pattern] will attempt to read [pattern] and will return [true] if it was successful.
If it fails, it will return [false] and not increment the read index.
*)letread_aheadpattern=letpattern_len=String.lengthpatternin(* if the pattern we are looking for exeeds the remaining length of s, return false immediately *)if!i+pattern_len>=lthenfalseelsetryforj=0topattern_len-1doletfound=not(eos())&&s.[!i+j]=pattern.[j]inifnotfoundthenraise_notraceExit;done;i:=!i+pattern_len;truewith|Exit->falseinletchar()=ignore(read'\\':bool);ifeos()thenraiseParse_error;letr=s.[!i]inincri;rinletenclosed():enclosedlist=letrecloops=(* This returns the list in reverse order, but order isn't important anyway *)ifs<>[]&&read']'thenselseletc=char()inifnot(read'-')thenloop(Charc::s)elseifread']'thenCharc::Char'-'::selseletc'=char()inloop(Range(c,c')::s)inloop[]inletpiece()=ifdouble_asterisk&&read_ahead"/**"&¬(eos())thenManyManyelseifread'*'thenifdouble_asterisk&&read'*'thenManyManyelseManyelseifread'?'thenOneelseifnot(read'[')thenExactly(char())elseifread'^'||read'!'thenAny_but(enclosed())elseAny_of(enclosed())inletreclooppieces=ifeos()thenList.revpieceselseloop(piece()::pieces)inloop[]letmulll'=List.flatten(List.map(funs->List.map(funs'->s^s')l')l)letexplodestr=letl=String.lengthstrinletrecexplinnersiaccbeg=ifi>=lthenbeginifinnerthenraiseParse_error;(mulbeg[String.substrs(i-s)],i)endelsematchstr.[i]with|'\\'->explinners(i+2)accbeg|'{'->let(t,i')=expltrue(i+1)(i+1)[][""]inexplinneri'i'acc(mulbeg(mul[String.substrs(i-s)]t))|','wheninner->explinner(i+1)(i+1)(mulbeg[String.substrs(i-s)]@acc)[""]|'}'wheninner->(mulbeg[String.substrs(i-s)]@acc,i+1)|_->explinners(i+1)accbeginList.rev(fst(explfalse00[][""]))moduleState=structtypet={re_pieces:Re.tlist;(* last piece at head of list. *)remaining:piecelist;(* last piece at tail of list. *)am_at_start_of_pattern:bool;(* true at start of pattern *)am_at_start_of_component:bool;(* true at start of pattern or immediately
after '/' *)pathname:bool;match_backslashes:bool;period:bool;}letcreate~period~pathname~match_backslashesremaining={re_pieces=[];am_at_start_of_pattern=true;am_at_start_of_component=true;pathname;match_backslashes;period;remaining;}letexplicit_periodt=t.period&&(t.am_at_start_of_pattern||(t.am_at_start_of_component&&t.pathname))letexplicit_slasht=t.pathnameletslashest=ift.match_backslashesthen['/';'\\']else['/']letappend?(am_at_start_of_component=false)tpiece={twithre_pieces=piece::t.re_pieces;am_at_start_of_pattern=false;am_at_start_of_component;}letto_ret=Re.seq(List.revt.re_pieces)letnextt=matcht.remainingwith|[]->None|piece::remaining->Some(piece,{twithremaining})endletone~explicit_slash~slashes~explicit_period=Re.compl(List.concat[ifexplicit_slashthenList.mapRe.charslasheselse[];ifexplicit_periodthen[Re.char'.']else[];])letenclosedenclosed=matchenclosedwith|Charc->Re.charc|Range(low,high)->Re.rglowhighletenclosed_set~explicit_slash~slashes~explicit_periodkindset=letset=List.mapenclosedsetinletenclosure=matchkindwith|`Any_of->Re.altset|`Any_but->Re.complsetinRe.inter[enclosure;one~explicit_slash~slashes~explicit_period]letexactlystatec=letslashes=State.slashesstateinletam_at_start_of_component=List.memcslashesinletchars=ifam_at_start_of_componentthenslasheselse[c]inState.appendstate(Re.alt(List.mapRe.charchars))~am_at_start_of_componentletmany_manystate=letexplicit_period=state.State.period&&state.State.pathnameinletfirst_explicit_period=State.explicit_periodstateinletslashes=State.slashesstateinletmatch_component~explicit_period=Re.seq[one~explicit_slash:true~slashes~explicit_period;Re.rep(one~explicit_slash:true~slashes~explicit_period:false);]in(* We must match components individually when [period] flag is set,
making sure to not match ["foo/.bar"]. *)State.appendstate(Re.seq[Re.opt(match_component~explicit_period:first_explicit_period);Re.rep(Re.seq[Re.alt(List.mapRe.charslashes);Re.opt(match_component~explicit_period);]);])letmany(state:State.t)=letexplicit_slash=State.explicit_slashstateinletexplicit_period=State.explicit_periodstateinletslashes=State.slashesstatein(* Whether we must explicitly match period depends on the surrounding characters, but
slashes are easy to explicit match. This conditional splits out some simple cases.
*)ifnotexplicit_periodthenbeginState.appendstate(Re.rep(one~explicit_slash~slashes~explicit_period))endelseifnotexplicit_slashthenbegin(* In this state, we explicitly match periods only at the very beginning *)State.appendstate(Re.opt(Re.seq[one~explicit_slash:false~slashes~explicit_period;Re.rep(one~explicit_slash:false~slashes~explicit_period:false);]))endelsebeginletnot_empty=Re.seq[one~explicit_slash:true~slashes~explicit_period:true;Re.rep(one~explicit_slash:true~slashes~explicit_period:false);]in(* [maybe_empty] is the default translation of Many, except in some special cases.
*)letmaybe_empty=Re.optnot_emptyinletenclosed_setstatekindset=State.appendstate(Re.alt[enclosed_setkindset~explicit_slash:true~slashes~explicit_period:true;Re.seq[not_empty;(* Since [not_empty] matched, subsequent dots are not leading. *)enclosed_setkindset~explicit_slash:true~slashes~explicit_period:false;];])inletreclookaheadstate=matchState.nextstatewith|None->State.appendstatemaybe_empty(* glob ** === glob * . *)|Some(Many,state)->lookaheadstate|Some(Exactlyc,state)->letstate=State.appendstate(ifc='.'thennot_emptyelsemaybe_empty)inexactlystatec(* glob *? === glob ?* *)|Some(One,state)->State.appendstatenot_empty|Some(Any_ofenclosed,state)->enclosed_setstate`Any_ofenclosed|Some(Any_butenclosed,state)->enclosed_setstate`Any_butenclosed(* * then ** === ** *)|Some(ManyMany,state)->many_manystateinlookaheadstateendletpiecestatepiece=letexplicit_slash=State.explicit_slashstateinletexplicit_period=State.explicit_periodstateinletslashes=State.slashesstateinmatchpiecewith|One->State.appendstate(one~explicit_slash~slashes~explicit_period)|Many->manystate|Any_ofenclosed->State.appendstate(enclosed_set`Any_of~explicit_slash~slashes~explicit_periodenclosed)|Any_butenclosed->State.appendstate(enclosed_set`Any_but~explicit_slash~slashes~explicit_periodenclosed)|Exactlyc->exactlystatec|ManyMany->many_manystateletglob~pathname~match_backslashes~periodglob=letrecloopstate=matchState.nextstatewith|None->State.to_restate|Some(p,state)->loop(piecestatep)inloop(State.create~pathname~match_backslashes~periodglob)letglob?(anchored=false)?(pathname=true)?(match_backslashes=false)?(period=true)?(expand_braces=false)?(double_asterisk=true)s=letto_res=letre=glob~pathname~match_backslashes~period(of_string~double_asterisks)inifanchoredthenRe.whole_stringreelsereinifexpand_bracesthenRe.alt(List.mapto_re(explodes))elseto_resletglob'?anchoredperiods=glob?anchored~periodsletglobx?anchoreds=glob?anchored~expand_braces:truesletglobx'?anchoredperiods=glob?anchored~expand_braces:true~periods