123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155(*
RE - A regular expression library
Copyright (C) 2001 Jerome Vouillon
email: Jerome.Vouillon@pps.jussieu.fr
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation, with
linking exception; either version 2.1 of the License, or (at
your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*)(*
What we could (should?) do:
- a* ==> longest ((shortest (no_group a)* ), a | ()) (!!!)
- abc understood as (ab)c
- "((a?)|b)" against "ab" should not bind the first subpattern to anything
Note that it should be possible to handle "(((ab)c)d)e" efficiently
*)moduleRe=CoreexceptionParse_error=Parse_buffer.Parse_errorexceptionNot_supportedletparsenewlines=letbuf=Parse_buffer.createsinletaccept=Parse_buffer.acceptbufinleteos()=Parse_buffer.eosbufinlettestc=Parse_buffer.testbufcinletunget()=Parse_buffer.ungetbufinletget()=Parse_buffer.getbufinletrecregexp()=regexp'(branch())andregexp'left=ifaccept'|'thenregexp'(Re.alt[left;branch()])elseleftandbranch()=branch'[]andbranch'left=ifeos()||test'|'||test')'thenRe.seq(List.revleft)elsebranch'(piece()::left)andpiece()=letr=atom()inifaccept'*'thenRe.rep(Re.nestr)elseifaccept'+'thenRe.rep1(Re.nestr)elseifaccept'?'thenRe.optrelseifaccept'{'then(matchParse_buffer.integerbufwith|Somei->letj=ifaccept','thenParse_buffer.integerbufelseSomeiinifnot(accept'}')thenraiseParse_error;(matchjwith|Somejwhenj<i->raiseParse_error|_->());Re.repn(Re.nestr)ij|None->unget();r)elserandatom()=ifaccept'.'thenifnewlinethenRe.notnlelseRe.anyelseifaccept'('then(letr=regexp()inifnot(accept')')thenraiseParse_error;Re.groupr)elseifaccept'^'thenifnewlinethenRe.bolelseRe.boselseifaccept'$'thenifnewlinethenRe.eolelseRe.eoselseifaccept'['thenifaccept'^'thenRe.diff(Re.compl(bracket[]))(Re.char'\n')elseRe.alt(bracket[])elseifaccept'\\'then(ifeos()thenraiseParse_error;matchget()with|('|'|'('|')'|'*'|'+'|'?'|'['|'.'|'^'|'$'|'{'|'\\')asc->Re.charc|_->raiseParse_error)else(ifeos()thenraiseParse_error;matchget()with|'*'|'+'|'?'|'{'|'\\'->raiseParse_error|c->Re.charc)andbrackets=ifs<>[]&&accept']'thenselse(matchchar()with|`Setst->bracket(st::s)|`Charc->ifaccept'-'thenifaccept']'thenRe.charc::Re.char'-'::selsebracket(matchchar()with|`Charc'->Re.rgcc'::s|`Setst'->Re.charc::Re.char'-'::st'::s)elsebracket(Re.charc::s))andchar()=ifeos()thenraiseParse_error;letc=get()inifc='['then(matchPosix_class.parsebufwith|Someset->`Setset|None->ifaccept'.'then(ifeos()thenraiseParse_error;letc=get()inifnot(accept'.')thenraiseNot_supported;ifnot(accept']')thenraiseParse_error;`Charc)else`Charc)else`Charcinletres=regexp()inifnot(eos())thenraiseParse_error;res;;typeopt=[`ICase|`NoSub|`Newline]letre?(opts=[])s=letr=parse(List.memq`Newlineopts)sinletr=ifList.memq`ICaseoptsthenRe.no_caserelserinletr=ifList.memq`NoSuboptsthenRe.no_grouprelserinr;;letcompilere=Re.compile(Re.longestre)letcompile_pat?(opts=[])s=compile(re~optss)