123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321(*
RE - A regular expression library
Copyright (C) 2001 Jerome Vouillon
email: Jerome.Vouillon@pps.jussieu.fr
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation, with
linking exception; either version 2.1 of the License, or (at
your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*)moduleRe=CoreexceptionParse_errortypeenclosed=|Charofchar|Rangeofchar*chartypepiece=|Exactlyofchar|Any_ofofenclosedlist|Any_butofenclosedlist|One|Many|ManyManytypet=piecelistletof_string~double_asterisks:t=letbuf=Parse_buffer.createsinleteos()=Parse_buffer.eosbufinletreadc=Parse_buffer.acceptbufcinletchar()=ignore(read'\\':bool);ifeos()thenraiseParse_error;Parse_buffer.getbufinletenclosed():enclosedlist=letrecloops=(* This returns the list in reverse order, but order isn't important
anyway *)ifs<>[]&&read']'thenselse(letc=char()inifnot(read'-')thenloop(Charc::s)elseifread']'thenCharc::Char'-'::selse(letc'=char()inloop(Range(c,c')::s)))inloop[]inletpieceacc=ifdouble_asterisk&&Parse_buffer.accept_sbuf"/**"thenManyMany::(ifeos()thenExactly'/'::accelseacc)elseifread'*'then(ifdouble_asterisk&&read'*'thenManyManyelseMany)::accelseifread'?'thenOne::accelseifnot(read'[')thenExactly(char())::accelseifread'^'||read'!'thenAny_but(enclosed())::accelseAny_of(enclosed())::accinletreclooppieces=ifeos()thenList.revpieceselseloop(piecepieces)inloop[];;letmulll'=List.flatten(List.map(funs->List.map(funs'->s^s')l')l)letexplodestr=letl=String.lengthstrinletrecexplinnersiaccbeg=ifi>=lthen(ifinnerthenraiseParse_error;mulbeg[String.substrs(i-s)],i)else(matchstr.[i]with|'\\'->explinners(i+2)accbeg|'{'->lett,i'=expltrue(i+1)(i+1)[][""]inexplinneri'i'acc(mulbeg(mul[String.substrs(i-s)]t))|','wheninner->explinner(i+1)(i+1)(mulbeg[String.substrs(i-s)]@acc)[""]|'}'wheninner->mulbeg[String.substrs(i-s)]@acc,i+1|_->explinners(i+1)accbeg)inList.rev(fst(explfalse00[][""]));;moduleState=structtypet={re_pieces:Re.tlist(* last piece at head of list. *);remaining:piecelist(* last piece at tail of list. *);am_at_start_of_pattern:bool(* true at start of pattern *);am_at_start_of_component:bool(* true at start of pattern or immediately
after '/' *);pathname:bool;match_backslashes:bool;period:bool}letcreate~period~pathname~match_backslashesremaining={re_pieces=[];am_at_start_of_pattern=true;am_at_start_of_component=true;pathname;match_backslashes;period;remaining};;letexplicit_periodt=t.period&&(t.am_at_start_of_pattern||(t.am_at_start_of_component&&t.pathname));;letexplicit_slasht=t.pathnameletslashest=ift.match_backslashesthen['/';'\\']else['/']letappend?(am_at_start_of_component=false)tpiece={twithre_pieces=piece::t.re_pieces;am_at_start_of_pattern=false;am_at_start_of_component};;letto_ret=Re.seq(List.revt.re_pieces)letnextt=matcht.remainingwith|[]->None|piece::remaining->Some(piece,{twithremaining});;endletone~explicit_slash~slashes~explicit_period=Re.compl(List.concat[(ifexplicit_slashthenList.mapRe.charslasheselse[]);(ifexplicit_periodthen[Re.char'.']else[])]);;letenclosedenclosed=matchenclosedwith|Charc->Re.charc|Range(low,high)->Re.rglowhigh;;letenclosed_set~explicit_slash~slashes~explicit_periodkindset=letset=List.mapenclosedsetinletenclosure=matchkindwith|`Any_of->Re.altset|`Any_but->Re.complsetinRe.inter[enclosure;one~explicit_slash~slashes~explicit_period];;letexactlystatec=letslashes=State.slashesstateinletam_at_start_of_component=List.memcslashesinletchars=ifam_at_start_of_componentthenslasheselse[c]inState.appendstate(Re.alt(List.mapRe.charchars))~am_at_start_of_component;;letmany_manystate=letexplicit_period=state.State.period&&state.State.pathnameinletfirst_explicit_period=State.explicit_periodstateinletslashes=State.slashesstateinletmatch_component~explicit_period=Re.seq[one~explicit_slash:true~slashes~explicit_period;Re.rep(one~explicit_slash:true~slashes~explicit_period:false)]in(* We must match components individually when [period] flag is set,
making sure to not match ["foo/.bar"]. *)State.appendstate(Re.seq[Re.opt(match_component~explicit_period:first_explicit_period);Re.rep(Re.seq[Re.alt(List.mapRe.charslashes);Re.opt(match_component~explicit_period)])]);;letmany(state:State.t)=letexplicit_slash=State.explicit_slashstateinletexplicit_period=State.explicit_periodstateinletslashes=State.slashesstatein(* Whether we must explicitly match period depends on the surrounding
characters, but slashes are easy to explicit match. This conditional
splits out some simple cases. *)ifnotexplicit_periodthenState.appendstate(Re.rep(one~explicit_slash~slashes~explicit_period))elseifnotexplicit_slashthen(* In this state, we explicitly match periods only at the very beginning *)State.appendstate(Re.opt(Re.seq[one~explicit_slash:false~slashes~explicit_period;Re.rep(one~explicit_slash:false~slashes~explicit_period:false)]))else(letnot_empty=Re.seq[one~explicit_slash:true~slashes~explicit_period:true;Re.rep(one~explicit_slash:true~slashes~explicit_period:false)]in(* [maybe_empty] is the default translation of Many, except in some special
cases. *)letmaybe_empty=Re.optnot_emptyinletenclosed_setstatekindset=State.appendstate(Re.alt[enclosed_setkindset~explicit_slash:true~slashes~explicit_period:true;Re.seq[not_empty;(* Since [not_empty] matched, subsequent dots are not leading. *)enclosed_setkindset~explicit_slash:true~slashes~explicit_period:false]])inletreclookaheadstate=matchState.nextstatewith|None->State.appendstatemaybe_empty(* glob ** === glob * . *)|Some(Many,state)->lookaheadstate|Some(Exactlyc,state)->letstate=State.appendstate(ifc='.'thennot_emptyelsemaybe_empty)inexactlystatec(* glob *? === glob ?* *)|Some(One,state)->State.appendstatenot_empty|Some(Any_ofenclosed,state)->enclosed_setstate`Any_ofenclosed|Some(Any_butenclosed,state)->enclosed_setstate`Any_butenclosed(* * then ** === ** *)|Some(ManyMany,state)->many_manystateinlookaheadstate);;letpiecestatepiece=letexplicit_slash=State.explicit_slashstateinletexplicit_period=State.explicit_periodstateinletslashes=State.slashesstateinmatchpiecewith|One->State.appendstate(one~explicit_slash~slashes~explicit_period)|Many->manystate|Any_ofenclosed->State.appendstate(enclosed_set`Any_of~explicit_slash~slashes~explicit_periodenclosed)|Any_butenclosed->State.appendstate(enclosed_set`Any_but~explicit_slash~slashes~explicit_periodenclosed)|Exactlyc->exactlystatec|ManyMany->many_manystate;;letglob~pathname~match_backslashes~periodglob=letrecloopstate=matchState.nextstatewith|None->State.to_restate|Some(p,state)->loop(piecestatep)inloop(State.create~pathname~match_backslashes~periodglob);;letglob?(anchored=false)?(pathname=true)?(match_backslashes=false)?(period=true)?(expand_braces=false)?(double_asterisk=true)s=letto_res=letre=glob~pathname~match_backslashes~period(of_string~double_asterisks)inifanchoredthenRe.whole_stringreelsereinifexpand_bracesthenRe.alt(List.mapto_re(explodes))elseto_res;;letglob'?anchoredperiods=glob?anchored~periodsletglobx?anchoreds=glob?anchored~expand_braces:truesletglobx'?anchoredperiods=glob?anchored~expand_braces:true~periods