123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410(*********************************************************************************)(* OCaml-IRI *)(* *)(* Copyright (C) 2016 Institut National de Recherche en Informatique *)(* et en Automatique. All rights reserved. *)(* *)(* This program is free software; you can redistribute it and/or modify *)(* it under the terms of the GNU Lesser General Public License version *)(* 3 as published by the Free Software Foundation. *)(* *)(* This program is distributed in the hope that it will be useful, *)(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *)(* GNU Library General Public License for more details. *)(* *)(* You should have received a copy of the GNU Lesser General Public *)(* License along with this program; if not, write to the Free Software *)(* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA *)(* 02111-1307 USA *)(* *)(* Contact: Maxence.Guesdon@inria.fr *)(* *)(* *)(*********************************************************************************)(* Core rules from ABNF http://tools.ietf.org/html/rfc2234 *)letdigit=[%sedlex.regexp?'0'..'9']letalpha=[%sedlex.regexp?'a'..'z'|'A'..'Z']letbit=[%sedlex.regexp?'0'|'1']letchar=[%sedlex.regexp?0x01..0x7F](* any 7-bit US-ASCII character, excluding NUL *)letcr=[%sedlex.regexp?0x0D](* carriage return *)letlf=[%sedlex.regexp?0x0A](* line feed *)letcrlf=[%sedlex.regexp?cr,lf](* Internet standard newline *)letctl=[%sedlex.regexp?0x00..0x1F|0x7F](* controls *)letdquote=[%sedlex.regexp?'"'](* Double Quote, \x22 *)lethexdig=[%sedlex.regexp?digit|'A'..'F'|'a'..'f'](* we tolerate lowercase *)lethtab=[%sedlex.regexp?0x09](* horizontal tab *)letsp=[%sedlex.regexp?' '](* space, \x20 *)letwsp=[%sedlex.regexp?sp|htab](* white space *)letlwsp=[%sedlex.regexp?Star((wsp|crlf),wsp)](* linear white space (past newline) *)letoctect=[%sedlex.regexp?0x00..0xFF](* 8 bits of data *)letvchar=[%sedlex.regexp?0x21..0x7E](* visible (printing) characters *)(* tools to handle locations in lexbuf *)letpos?(file="")~line~bol~char()=Lexing.{pos_lnum=line;pos_bol=bol;pos_cnum=char;pos_fname=file}typeloc={loc_start:Lexing.position;loc_stop:Lexing.position}type'awith_loc='a*locoptiontypeerror=loc*stringexceptionErroroferrorleterror?(msg="Parse error")loc=raise(Error(loc,msg))letstring_of_locloc=letopenLexinginletstart=loc.loc_startinletstop=loc.loc_stopinletline=start.pos_lnuminletchar=start.pos_cnum-start.pos_bolinletlen=ifstart.pos_fname=stop.pos_fnamethenstop.pos_cnum-start.pos_cnumelse1inletfile=start.pos_fnameinPrintf.sprintf"%sline %d, character%s %d%s"(matchfilewith|""->""|_->Printf.sprintf"File %S, "file)line(iflen>1then"s"else"")char(iflen>1thenPrintf.sprintf"-%d"(char+len)else"")letloc_sprintflocfmt=matchlocwith|None->Printf.sprintffmt|Someloc->Printf.ksprintf(funs->Printf.sprintf"%s:\n%s"(string_of_locloc)s)fmtletstring_of_error(loc,str)=Printf.sprintf"%s: %s"(string_of_locloc)strletlocloc_startloc_stop={loc_start;loc_stop}letloc_of_posposlen={loc_start=pos;loc_stop=Lexing.{poswithpos_cnum=pos.pos_cnum+len};}leterror_pos?msgpos=error?msg(loc_of_pospos1)letnl_char=Uchar.of_char'\n'letupdate_posposstr=letopenLexinginletfposi=function|`Malformedmsg->error~msg(loc_of_pospos1)|`UcharcwhenUchar.equalcnl_char->letbol=pos.pos_cnumin{poswithpos_lnum=pos.pos_lnum+1;pos_bol=bol;pos_cnum=pos.pos_cnum+1;}|_->{poswithpos_cnum=pos.pos_cnum+1}inUutf.String.fold_utf_8fposstrletlexemeposlexbuf=trySedlexing.Utf8.lexemelexbufwithSedlexing.MalFormed->error_pos~msg:"Malformed character in lexeme"posletupdposlexbuf=update_pospos(lexemeposlexbuf)(* rules from IRI RFC *)letucschar=[%sedlex.regexp?0xA0..0xD7FF|0xF900..0xFDCF|0xFDF0..0xFFEF|0x10000..0x1FFFD|0x20000..0x2FFFD|0x30000..0x3FFFD|0x40000..0x4FFFD|0x50000..0x5FFFD|0x60000..0x6FFFD|0x70000..0x7FFFD|0x80000..0x8FFFD|0x90000..0x9FFFD|0xA0000..0xAFFFD|0xB0000..0xBFFFD|0xC0000..0xCFFFD|0xD0000..0xDFFFD|0xE1000..0xEFFFD]letiprivate=[%sedlex.regexp?0xE000..0xF8FF|0xF0000..0xFFFFD|0x100000..0x10FFFD]letiunreserved=[%sedlex.regexp?alpha|digit|Chars"-._~"|ucschar]letpct_encoded=[%sedlex.regexp?'%',hexdig,hexdig]letgen_delims=[%sedlex.regexp?Chars":/?#[]@"]letsub_delims=[%sedlex.regexp?Chars"!$&'()*+,;="]letiuserinfo=[%sedlex.regexp?Star(iunreserved|pct_encoded|sub_delims|':')]letunreserved=[%sedlex.regexp?alpha|digit|Chars"-._~"]letdec_octet=[%sedlex.regexp?digit|('1'..'9',digit)|('1',digit,digit)|('2','0'..'4',digit)|("25",'0'..'5')]letipv4address=[%sedlex.regexp?dec_octet,'.',dec_octet,'.',dec_octet,'.',dec_octet]letipvfuture=[%sedlex.regexp?'v',Plus(hexdig),'.',Plus(unreserved|sub_delims|':')]leth16=[%sedlex.regexp?hexdig|(hexdig,hexdig)|(hexdig,hexdig,hexdig)|(hexdig,hexdig,hexdig,hexdig)]letls32=[%sedlex.regexp?(h16,':',h16)|ipv4address]letipv6address=[%sedlex.regexp?(h16,':',h16,':',h16,':',h16,':',h16,':',h16,':',ls32)|("::",h16,':',h16,':',h16,':',h16,':',h16,':',ls32)|(h16,"::",h16,':',h16,':',h16,':',h16,':',ls32)|(Opt(h16,':'),h16,"::",h16,':',h16,':',h16,':',ls32)|(Opt(h16,':'),Opt(h16,':'),h16,"::",h16,':',h16,':',ls32)|(Opt(h16,':'),Opt(h16,':'),Opt(h16,':'),h16,"::",h16,':',ls32)|(Opt(h16,':'),Opt(h16,':'),Opt(h16,':'),Opt(h16,':'),h16,"::",ls32)|(Opt(h16,':'),Opt(h16,':'),Opt(h16,':'),Opt(h16,':'),Opt(h16,':'),h16,"::",h16)|(Opt(h16,':'),Opt(h16,':'),Opt(h16,':'),Opt(h16,':'),Opt(h16,':'),Opt(h16,':'),h16,"::")]letip_literal=[%sedlex.regexp?'[',(ipv6address|ipvfuture),']']letireg_name=[%sedlex.regexp?Star(iunreserved|pct_encoded|sub_delims)]letihost=[%sedlex.regexp?ip_literal|ipv4address|ireg_name]letport=[%sedlex.regexp?Star(digit)]letipchar_noat=[%sedlex.regexp?iunreserved|pct_encoded|sub_delims|':']letipchar=[%sedlex.regexp?ipchar_noat|'@']openIri_typesletfragment_optposlexbuf=match%sedlexlexbufwith'#',Star(ipchar|'/'|'?')->letstr=lexemeposlexbufinletlen=String.lengthstrinletpos=updposlexbufin(pos,Some(String.substr1(len-1)))|'#',any->error_pospos|_->Sedlexing.rollbacklexbuf;(pos,None)letquery_optposlexbuf=match%sedlexlexbufwith'?',Star(ipchar|iprivate|'/'|'?')->letstr=lexemeposlexbufinletlen=String.lengthstrinletpos=updposlexbufin(pos,Some(String.substr1(len-1)))|'?',any->error_pospos|_->Sedlexing.rollbacklexbuf;(pos,None)letrecisegment_listaccposlexbuf=match%sedlexlexbufwith'/',Star(ipchar)->letstr=lexemeposlexbufinletlen=String.lengthstrinletpos=updposlexbufinisegment_list((String.substr1(len-1))::acc)poslexbuf|_->Sedlexing.rollbacklexbuf;(pos,List.revacc)letipath_abemptyposlexbuf=let(pos,path)=isegment_list[]poslexbufin(pos,Absolutepath)letiauthorityposlexbuf=match%sedlex