123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201(**************************************************************************)(* *)(* OCaml *)(* *)(* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)(* *)(* Copyright 1996 Institut National de Recherche en Informatique et *)(* en Automatique. *)(* *)(* All rights reserved. This file is distributed under the terms of *)(* the GNU Lesser General Public License version 2.1, with the *)(* special exception on linking described in the file LICENSE. *)(* *)(**************************************************************************)typetoken=Kwdofstring|Identofstring|Intofint|Floatoffloat|Stringofstring|Charofchar(* The string buffering machinery *)letinitial_buffer=Bytes.create32letbuffer=refinitial_bufferletbufpos=ref0letreset_buffer()=buffer:=initial_buffer;bufpos:=0letstorec=if!bufpos>=Bytes.length!bufferthenbeginletnewbuffer=Bytes.create(2*!bufpos)inBytes.blit!buffer0newbuffer0!bufpos;buffer:=newbufferend;Bytes.set!buffer!bufposc;incrbufposletget_string()=lets=Bytes.sub_string!buffer0!bufposinbuffer:=initial_buffer;s(* The lexer *)letmake_lexerkeywords=letkwd_table=Hashtbl.create17inList.iter(funs->Hashtbl.addkwd_tables(Kwds))keywords;letident_or_keywordid=tryHashtbl.findkwd_tableidwithNot_found->Identidandkeyword_or_errorc=lets=String.make1cintryHashtbl.findkwd_tableswithNot_found->raise(Stream.Error("Illegal character "^s))inletrecnext_token(strm__:_Stream.t)=matchStream.peekstrm__withSome(' '|'\010'|'\013'|'\009'|'\026'|'\012')->Stream.junkstrm__;next_tokenstrm__|Some('A'..'Z'|'a'..'z'|'_'|'\192'..'\255'asc)->Stream.junkstrm__;lets=strm__inreset_buffer();storec;idents|Some('!'|'%'|'&'|'$'|'#'|'+'|'/'|':'|'<'|'='|'>'|'?'|'@'|'\\'|'~'|'^'|'|'|'*'asc)->Stream.junkstrm__;lets=strm__inreset_buffer();storec;ident2s|Some('0'..'9'asc)->Stream.junkstrm__;lets=strm__inreset_buffer();storec;numbers|Some'\''->Stream.junkstrm__;letc=trycharstrm__withStream.Failure->raise(Stream.Error"")inbeginmatchStream.peekstrm__withSome'\''->Stream.junkstrm__;Some(Charc)|_->raise(Stream.Error"")end|Some'\"'->Stream.junkstrm__;lets=strm__inreset_buffer();Some(String(strings))|Some'-'->Stream.junkstrm__;neg_numberstrm__|Some'('->Stream.junkstrm__;maybe_commentstrm__|Somec->Stream.junkstrm__;Some(keyword_or_errorc)|_->Noneandident(strm__:_Stream.t)=matchStream.peekstrm__withSome('A'..'Z'|'a'..'z'|'\192'..'\255'|'0'..'9'|'_'|'\''asc)->Stream.junkstrm__;lets=strm__instorec;idents|_->Some(ident_or_keyword(get_string()))andident2(strm__:_Stream.t)=matchStream.peekstrm__withSome('!'|'%'|'&'|'$'|'#'|'+'|'-'|'/'|':'|'<'|'='|'>'|'?'|'@'|'\\'|'~'|'^'|'|'|'*'asc)->Stream.junkstrm__;lets=strm__instorec;ident2s|_->Some(ident_or_keyword(get_string()))andneg_number(strm__:_Stream.t)=matchStream.peekstrm__withSome('0'..'9'asc)->Stream.junkstrm__;lets=strm__inreset_buffer();store'-';storec;numbers|_->lets=strm__inreset_buffer();store'-';ident2sandnumber(strm__:_Stream.t)=matchStream.peekstrm__withSome('0'..'9'asc)->Stream.junkstrm__;lets=strm__instorec;numbers|Some'.'->Stream.junkstrm__;lets=strm__instore'.';decimal_parts|Some('e'|'E')->Stream.junkstrm__;lets=strm__instore'E';exponent_parts|_->Some(Int(int_of_string(get_string())))anddecimal_part(strm__:_Stream.t)=matchStream.peekstrm__withSome('0'..'9'asc)->Stream.junkstrm__;lets=strm__instorec;decimal_parts|Some('e'|'E')->Stream.junkstrm__;lets=strm__instore'E';exponent_parts|_->Some(Float(float_of_string(get_string())))andexponent_part(strm__:_Stream.t)=matchStream.peekstrm__withSome('+'|'-'asc)->Stream.junkstrm__;lets=strm__instorec;end_exponent_parts|_->end_exponent_partstrm__andend_exponent_part(strm__:_Stream.t)=matchStream.peekstrm__withSome('0'..'9'asc)->Stream.junkstrm__;lets=strm__instorec;end_exponent_parts|_->Some(Float(float_of_string(get_string())))andstring(strm__:_Stream.t)=matchStream.peekstrm__withSome'\"'->Stream.junkstrm__;get_string()|Some'\\'->Stream.junkstrm__;letc=tryescapestrm__withStream.Failure->raise(Stream.Error"")inlets=strm__instorec;strings|Somec->Stream.junkstrm__;lets=strm__instorec;strings|_->raiseStream.Failureandchar(strm__:_Stream.t)=matchStream.peekstrm__withSome'\\'->Stream.junkstrm__;begintryescapestrm__withStream.Failure->raise(Stream.Error"")end|Somec->Stream.junkstrm__;c|_->raiseStream.Failureandescape(strm__:_Stream.t)=matchStream.peekstrm__withSome'n'->Stream.junkstrm__;'\n'|Some'r'->Stream.junkstrm__;'\r'|Some't'->Stream.junkstrm__;'\t'|Some('0'..'9'asc1)->Stream.junkstrm__;beginmatchStream.peekstrm__withSome('0'..'9'asc2)->Stream.junkstrm__;beginmatchStream.peekstrm__withSome('0'..'9'asc3)->Stream.junkstrm__;Char.chr((Char.codec1-48)*100+(Char.codec2-48)*10+(Char.codec3-48))|_->raise(Stream.Error"")end|_->raise(Stream.Error"")end|Somec->Stream.junkstrm__;c|_->raiseStream.Failureandmaybe_comment(strm__:_Stream.t)=matchStream.peekstrm__withSome'*'->Stream.junkstrm__;lets=strm__incomments;next_tokens|_->Some(keyword_or_error'(')andcomment(strm__:_Stream.t)=matchStream.peekstrm__withSome'('->Stream.junkstrm__;maybe_nested_commentstrm__|Some'*'->Stream.junkstrm__;maybe_end_commentstrm__|Some_->Stream.junkstrm__;commentstrm__|_->raiseStream.Failureandmaybe_nested_comment(strm__:_Stream.t)=matchStream.peekstrm__withSome'*'->Stream.junkstrm__;lets=strm__incomments;comments|Some_->Stream.junkstrm__;commentstrm__|_->raiseStream.Failureandmaybe_end_comment(strm__:_Stream.t)=matchStream.peekstrm__withSome')'->Stream.junkstrm__;()|Some'*'->Stream.junkstrm__;maybe_end_commentstrm__|Some_->Stream.junkstrm__;commentstrm__|_->raiseStream.Failureinfuninput->Stream.from(fun_count->next_tokeninput)