123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218(*
RE - A regular expression library
Copyright (C) 2001 Jerome Vouillon
email: Jerome.Vouillon@pps.jussieu.fr
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation, with
linking exception; either version 2.1 of the License, or (at
your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*)openImportincludestructletcset=Ast.csetletcharc=cset(Cset.csinglec)letrgcc'=cset(Cset.cseqcc')letany=csetCset.canyletnotnl=csetCset.notnlletlower=csetCset.lowerletupper=csetCset.upperletalpha=csetCset.alphaletdigit=csetCset.cdigitletalnum=csetCset.alnumletwordc=csetCset.wordcletascii=csetCset.asciiletblank=csetCset.blankletcntrl=csetCset.cntrlletgraph=csetCset.graphletprint=csetCset.printletpunct=csetCset.punctletspace=csetCset.spaceletxdigit=csetCset.xdigitendincludeAst.Exportletexec_internal?(pos=0)?(len=-1)~partial~groupsres=Compile.match_str~groups~partialres~pos~len;;letexec?pos?lenres=matchexec_internal?pos?len~groups:true~partial:falsereswith|Matchsubstr->substr|_->raiseNot_found;;letexec_opt?pos?lenres=matchexec_internal?pos?len~groups:true~partial:falsereswith|Matchsubstr->Somesubstr|_->None;;letexecp?pos?lenres=matchexec_internal~groups:false~partial:false?pos?lenreswith|Match_substr->true|_->false;;letexec_partial?pos?lenres=matchexec_internal~groups:false~partial:true?pos?lenreswith|Match_->`Full|Running_->`Partial|Failed->`Mismatch;;letexec_partial_detailed?pos?lenres=matchexec_internal~groups:true~partial:true?pos?lenreswith|Matchgroup->`Fullgroup|Running{no_match_starts_before}->`Partialno_match_starts_before|Failed->`Mismatch;;moduleMark=structtypet=Pmark.tlettest(g:Group.t)p=Pmark.Set.mempg.pmarksletall(g:Group.t)=g.pmarksmoduleSet=Pmark.Setletequal=Pmark.equalletcompare=Pmark.compareendtypesplit_token=[`Textofstring|`DelimofGroup.t]moduleGen=structtype'agen=unit->'aoptionletgen_of_seq(s:'aSeq.t):'agen=letr=refsinfun()->match!r()with|Seq.Nil->None|Seq.Cons(x,tl)->r:=tl;Somex;;letsplit?pos?lenres:_gen=Search.split?pos?lenres|>gen_of_seqletsplit_full?pos?lenres:_gen=Search.split_full?pos?lenres|>gen_of_seqletall?pos?lenres=Search.all?pos?lenres|>gen_of_seqletmatches?pos?lenres=Search.matches?pos?lenres|>gen_of_seqendmoduleGroup=Group(** {2 Deprecated functions} *)letsplit_full_seq=Search.split_fullletsplit_seq=Search.splitletmatches_seq=Search.matchesletall_seq=Search.alltype'agen='aGen.genletall_gen=Gen.allletmatches_gen=Gen.matchesletsplit_gen=Gen.splitletsplit_full_gen=Gen.split_fulltypesubstrings=Group.tletget=Group.getletget_ofs=Group.offsetletget_all=Group.allletget_all_ofs=Group.all_offsetlettest=Group.testtypemarkid=Mark.tletmarked=Mark.testletmark_set=Mark.all(**********************************)(*
Information about the previous character:
- does not exists
- is a letter
- is not a letter
- is a newline
- is last newline
Beginning of word:
- previous is not a letter or does not exist
- current is a letter or does not exist
End of word:
- previous is a letter or does not exist
- current is not a letter or does not exist
Beginning of line:
- previous is a newline or does not exist
Beginning of buffer:
- previous does not exist
End of buffer
- current does not exist
End of line
- current is a newline or does not exist
*)(*
Rep: e = T,e | ()
- semantics of the comma (shortest/longest/first)
- semantics of the union (greedy/non-greedy)
Bounded repetition
a{0,3} = (a,(a,a?)?)?
*)typegroups=Group.tmoduleList=structletlist_of_seq(s:'aSeq.t):'alist=Seq.fold_left(funlx->x::l)[]s|>List.rev;;letall?pos?lenres=Search.all?pos?lenres|>list_of_seqletmatches?pos?lenres=Search.matches?pos?lenres|>list_of_seqletsplit_full?pos?lenres=Search.split_full?pos?lenres|>list_of_seqletsplit?pos?lenres=Search.split?pos?lenres|>list_of_seqletsplit_delim?pos?lenres=Search.split_delim?pos?lenres|>list_of_seqendincludeListincludestructopenCompiletypenonrecre=reletcompile=compileletpp_re=pp_reletprint_re=print_reletgroup_names=group_namesletgroup_count=group_countendmoduleSeq=Search