123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132openVangstromletdebug=matchSys.getenv"DEBUG_COMBY"with|exceptionNot_found->false|_->truemoduletypeRegexp_engine_intf=sigtypettypesubstringsvalmake:string->tvalget_substring:substrings->int->stringoptionvalget_all_substrings:substrings->stringarrayvalexec:rex:t->pos:int->Bytes.t->substringsoptionendtypet={buffer_pos:int;buffer:bytes}(* I think I should just implement the analog of string_ for regex with some bounded buffer size. *)moduleMake(Regexp:Regexp_engine_intf)=struct(* https://sourcegraph.com/github.com/comby-tools/mparser/-/blob/src/mParser_Char_Stream.ml#L231:8 *)letmatch_regexpsposrex=Regexp.exec~rex~pos:(pos-s.buffer_pos)s.bufferletmake_regexppat=Regexp.makepat(* TODO: tests and blit thing below *)(* FIXME: size. about advance => want to use internal unsafe_apply_opt
actually. cf. string_ in angstrom.ml. instead, trying "do peek, then
advance/commit." *)letregexprex=(* Why do Unsafe if I can just do peek_string? => So I don't allocate on copy of buffer. *)(* But it looks like we can't avoid allocation in converting bigstringaf to bytes *)Unsafe.peek1(funbuffer~off~len:_->Bigstringaf.lengthbuffer-off)>>=funn->Unsafe.peekn(funbuffer~off~len->(* This still does a copy :( *)letbytes=Bytes.createleninBigstringaf.unsafe_blit_to_bytesbuffer~src_off:offbytes~dst_off:0~len;ifdebugthenFormat.printf"Matching regex against string: %S@."@@Bytes.to_stringbytes;matchRegexp.exec~rex~pos:0byteswith|None->ifdebugthenFormat.printf"None (1)@.";None|Somesubstrings->matchRegexp.get_substringsubstrings0with|None->ifdebugthenFormat.printf"None (2)@.";None|Someresult->ifdebugthenFormat.printf"Matchy Matchy (3)@.";Some(result,String.lengthresult))>>=function|Some(result,n)->(* if empty string matches, this hole like for optionals (x?), advance 1. *)(* we want to advance one so parsing can continue, but if we advance 1 here we will think
that the match context is at least length 1 and not 0 if this hole is the only thing
defining the match context *)(* let n = if n > 0 then n else 1 in
advance n >>= fun () -> *)ifdebugthenFormat.printf"Result indeed: %S len %d@."resultn;advancen>>=fun()->returnresult|None->fail"No match"endmodulePCRE=structmoduleEngine:Regexp_engine_intf=structtypet=Pcre.regexptypesubstrings=Pcre.substringsletcompile_flags=Pcre.cflags[`ANCHORED]letmakepattern=Pcre.regexp~iflags:compile_flagspatternletget_substringsidx=matchPcre.get_substringsidxwith|result->Someresult|exceptionNot_found|exceptionInvalid_argument_->Noneletget_all_substringss=Pcre.get_substringssletexec~rex~posb=matchPcre.exec~pos~rex(Bytes.unsafe_to_stringb)with|result->Someresult|exceptionNot_found->NoneendincludeMake(Engine)endmoduleRE=structmoduleEngine:Regexp_engine_intf=structtypet=Re.retypesubstrings=Re.substringsletcompile_flags=[`Anchored]letmakepattern=Re.Perl.(compile(re~opts:compile_flagspattern))letget_substringsidx=matchRe.getsidxwith|result->Someresult|exceptionNot_found->Noneletget_all_substringss=Re.get_allsletexec~rex~posb=matchRe.exec~posrex(Bytes.unsafe_to_stringb)with|result->Someresult|exceptionNot_found->NoneendincludeMake(Engine)end