123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875(*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*)openJs_tokenmoduleLex_mode=structtypet=|NORMAL|BACKQUOTE|REGEXPendmoduleParse_error=structtypet=|Unexpectedofstring|IllegalUnicodeEscape|InvalidSciBigInt|InvalidFloatBigInt|UnterminatedRegExpletto_string=function|Unexpectedunexpected->Printf.sprintf"Unexpected %s"unexpected|IllegalUnicodeEscape->"Illegal Unicode escape"|InvalidSciBigInt->"A bigint literal cannot use exponential notation"|InvalidFloatBigInt->"A bigint literal must be an integer"|UnterminatedRegExp->"Invalid regular expression: missing /"endmoduleLex_env=structtypelex_state={lex_errors_acc:(Loc.t*Parse_error.t)list}[@@ocaml.unboxed]typet={lex_lb:Sedlexing.lexbuf;lex_state:lex_state;lex_mode_stack:Lex_mode.tlist;lex_last_loc:Loc.tref}[@@ocaml.warning"-69"]letempty_lex_state={lex_errors_acc=[]}letcreatelex_lb={lex_lb;lex_state=empty_lex_state;lex_mode_stack=[Lex_mode.NORMAL];lex_last_loc=ref(Loc.createLexing.dummy_posLexing.dummy_pos)}endletpush_modeenvmode={envwithLex_env.lex_mode_stack=mode::env.Lex_env.lex_mode_stack}letpop_modeenv={envwithLex_env.lex_mode_stack=(matchenv.Lex_env.lex_mode_stackwith|[]->[]|_::xs->xs)}moduleLex_result=structtypet={lex_token:Js_token.t;lex_loc:Loc.t;lex_errors:(Loc.t*Parse_error.t)list}[@@ocaml.warning"-69"]lettokenresult=result.lex_tokenletlocresult=result.lex_locleterrorsresult=result.lex_errorsendletlexeme=Sedlexing.Utf8.lexemeletlexeme_to_bufferlexbufb=Buffer.add_stringb(Sedlexing.Utf8.lexemelexbuf)letletter=[%sedlex.regexp?'a'..'z'|'A'..'Z'|'$']letid_letter=[%sedlex.regexp?letter|'_']letdigit=[%sedlex.regexp?'0'..'9']letdigit_non_zero=[%sedlex.regexp?'1'..'9']letdecintlit=[%sedlex.regexp?'0'|'1'..'9',Stardigit](* DecimalIntegerLiteral *)letalphanumeric=[%sedlex.regexp?digit|letter]letword=[%sedlex.regexp?letter,Staralphanumeric]lethex_digit=[%sedlex.regexp?digit|'a'..'f'|'A'..'F']letnon_hex_letter=[%sedlex.regexp?'g'..'z'|'G'..'Z'|'$']letbin_digit=[%sedlex.regexp?'0'|'1']letoct_digit=[%sedlex.regexp?'0'..'7'](* This regex could be simplified to (digit Star (digit OR '_' digit))
* That makes the underscore and failure cases faster, and the base case take x2-3 the steps
* As the codebase contains more base cases than underscored or errors, prefer this version *)letunderscored_bin=[%sedlex.regexp?Plusbin_digit|bin_digit,Star(bin_digit|'_',bin_digit)]letunderscored_oct=[%sedlex.regexp?Plusoct_digit|oct_digit,Star(oct_digit|'_',oct_digit)]letunderscored_hex=[%sedlex.regexp?Plushex_digit|hex_digit,Star(hex_digit|'_',hex_digit)]letunderscored_digit=[%sedlex.regexp?Plusdigit|digit_non_zero,Star(digit|'_',digit)]letunderscored_decimal=[%sedlex.regexp?Plusdigit|digit,Star(digit|'_',digit)](* Different ways you can write a number *)letbinnumber=[%sedlex.regexp?'0',('B'|'b'),underscored_bin]letoctnumber=[%sedlex.regexp?'0',('O'|'o'),underscored_oct]letlegacyoctnumber=[%sedlex.regexp?'0',Plusoct_digit](* no underscores allowed *)letlegacynonoctnumber=[%sedlex.regexp?'0',Staroct_digit,'8'..'9',Stardigit]lethexnumber=[%sedlex.regexp?'0',('X'|'x'),underscored_hex]letscinumber=[%sedlex.regexp?((decintlit,Opt('.',Optunderscored_decimal)|'.',underscored_decimal),('e'|'E'),Opt('-'|'+'),underscored_digit)]letinteger=[%sedlex.regexp?underscored_digit]letfloatnumber=[%sedlex.regexp?Optunderscored_digit,'.',underscored_decimal]letbinbigint=[%sedlex.regexp?binnumber,'n']letoctbigint=[%sedlex.regexp?octnumber,'n']lethexbigint=[%sedlex.regexp?hexnumber,'n']letwholebigint=[%sedlex.regexp?underscored_digit,'n'](* https://tc39.github.io/ecma262/#sec-white-space *)letwhitespace=[%sedlex.regexp?(0x0009|0x000B|0x000C|0x0020|0x00A0|0xfeff|0x1680|0x2000..0x200a|0x202f|0x205f|0x3000)](* minus sign in front of negative numbers
(only for types! regular numbers use T_MINUS!) *)letneg=[%sedlex.regexp?'-',Starwhitespace]letline_terminator_sequence=[%sedlex.regexp?'\n'|'\r'|"\r\n"|0x2028|0x2029]letline_terminator_sequence_start=[%sedlex.regexp?'\n'|'\r'|0x2028|0x2029]lethex_quad=[%sedlex.regexp?hex_digit,hex_digit,hex_digit,hex_digit]letunicode_escape=[%sedlex.regexp?"\\u",hex_quad]letcodepoint_escape=[%sedlex.regexp?"\\u{",Plushex_digit,'}']letjs_id_start=[%sedlex.regexp?'$'|'_'|id_start]letjs_id_continue=[%sedlex.regexp?'$'|'_'|id_continue|0x200C|0x200D]letjs_id_start_with_escape=[%sedlex.regexp?js_id_start|unicode_escape|codepoint_escape]letjs_id_continue_with_escape=[%sedlex.regexp?js_id_continue|unicode_escape|codepoint_escape]exceptionNot_an_identletis_basic_ident=letl=Array.init256(funi->letc=Char.chriinmatchcwith|'a'..'z'|'A'..'Z'|'_'|'$'->1|'0'..'9'->2|_->0)infuns->tryfori=0toString.lengths-1doletcode=l.(Char.codes.[i])inifi=0then(ifcode<>1thenraiseNot_an_ident)elseifcode<1thenraiseNot_an_identdone;truewithNot_an_ident->falseletis_valid_identifier_names=is_basic_idents||letlexbuf=Sedlexing.Utf8.from_stringsinmatch%sedlexlexbufwith|js_id_start,Starjs_id_continue,eof->true|_->falseletloc_of_lexbuf_env(lexbuf:Sedlexing.lexbuf)=letstart_offset,stop_offset=Sedlexing.lexing_positionslexbufinLoc.createstart_offsetstop_offsetletlex_error(env:Lex_env.t)locerr:Lex_env.t=letlex_errors_acc=(loc,err)::env.lex_state.lex_errors_accin{envwithlex_state={lex_errors_acc}}letillegal(env:Lex_env.t)(loc:Loc.t)reason=letreason=matchreasonwith|""->"token ILLEGAL"|s->sinlex_errorenvloc(Parse_error.Unexpectedreason)letdecode_identifier=letsub_lexemelexbuftrim_starttrim_end=Sedlexing.Utf8.sub_lexemelexbuftrim_start(Sedlexing.lexeme_lengthlexbuf-trim_start-trim_end)inletunicode_escape_codelexbuf=lethex=sub_lexemelexbuf20inletcode=int_of_string("0x"^hex)incodeinletcodepoint_escape_codelexbuf=lethex=sub_lexemelexbuf31inletcode=int_of_string("0x"^hex)incodeinletis_high_surrogatec=0xD800<=c&&c<=0xDBFFinletis_low_surrogatec=0xDC00<=c&&c<=0xDFFFinletcombine_surrogatehilo=(((hiland0x3FF)lsl10)lor(loland0x3FF))+0x10000inletlow_surrogateenvlocbuflexbuflead=letenv=lex_errorenvlocParse_error.IllegalUnicodeEscapeinmatch%sedlexlexbufwith|unicode_escape->letcode=unicode_escape_codelexbufinifis_low_surrogatecodethen(letcode=combine_surrogateleadcodeinBuffer.add_utf_8_ucharbuf(Uchar.of_intcode);env)elselex_errorenvlocParse_error.IllegalUnicodeEscape|codepoint_escape->letcode=codepoint_escape_codelexbufinifis_low_surrogatecodethen(letcode=combine_surrogateleadcodeinBuffer.add_utf_8_ucharbuf(Uchar.of_intcode);env)elselex_errorenvlocParse_error.IllegalUnicodeEscape|_->lex_errorenvlocParse_error.IllegalUnicodeEscapeinletrecid_charenvlocbuflexbuf=match%sedlexlexbufwith|unicode_escape->letcode=unicode_escape_codelexbufinletenv=ifis_high_surrogatecodethenlow_surrogateenvlocbuflexbufcodeelseletenv=ifnot(Uchar.is_validcode)thenlex_errorenvlocParse_error.IllegalUnicodeEscapeelseenvinBuffer.add_utf_8_ucharbuf(Uchar.of_intcode);envinid_charenvlocbuflexbuf|codepoint_escape->letcode=codepoint_escape_codelexbufinletenv=ifis_high_surrogatecodethenlow_surrogateenvlocbuflexbufcodeelseletenv=ifnot(Uchar.is_validcode)thenlex_errorenvlocParse_error.IllegalUnicodeEscapeelseenvinBuffer.add_utf_8_ucharbuf(Uchar.of_intcode);envinid_charenvlocbuflexbuf|eof->env,Buffer.contentsbuf(* match multi-char substrings that don't contain the start chars of the above patterns *)|Plus(Compl(eof|"\\"))|any->lexeme_to_bufferlexbufbuf;id_charenvlocbuflexbuf|_->failwith"unreachable id_char"infunenvlocraw->letlexbuf=Sedlexing.Utf8.from_stringrawinletbuf=Buffer.create(String.lengthraw)inid_charenvlocbuflexbufletrecoverenvlexbuf~f=letenv=illegalenv(loc_of_lexbufenvlexbuf)"recovery"inSedlexing.rollbacklexbuf;fenvlexbuftyperesult=|TokenofLex_env.t*Js_token.t|CommentofLex_env.t*string|ContinueofLex_env.tletnewlinelexbuf=letstart=Sedlexing.lexeme_startlexbufinletstop=Sedlexing.lexeme_endlexbufinletlen=stop-startinletpending=reffalseinfori=0tolen-1domatchUchar.to_int(Sedlexing.lexeme_charlexbufi)with|0x000d->pending:=true|0x000a->pending:=false|0x2028|0x2029->if!pendingthen(pending:=false;Sedlexing.new_linelexbuf);Sedlexing.new_linelexbuf|_->if!pendingthen(pending:=false;Sedlexing.new_linelexbuf)done;if!pendingthenSedlexing.new_linelexbufletreccommentenvbuflexbuf=match%sedlexlexbufwith|line_terminator_sequence->newlinelexbuf;lexeme_to_bufferlexbufbuf;commentenvbuflexbuf|"*/"->lexeme_to_bufferlexbufbuf;env|"*-/"->Buffer.add_stringbuf"*-/";commentenvbuflexbuf(* match multi-char substrings that don't contain the start chars of the above patterns *)|Plus(Compl(line_terminator_sequence_start|'*'))|any->lexeme_to_bufferlexbufbuf;commentenvbuflexbuf|_->letenv=illegalenv(loc_of_lexbufenvlexbuf)""inenvletdrop_lineenv=letlexbuf=env.Lex_env.lex_lbinmatch%sedlexlexbufwith|Star(Compl(eof|line_terminator_sequence_start))->()|_->assertfalseletrecline_commentenvbuflexbuf=match%sedlexlexbufwith|eof->env|line_terminator_sequence->Sedlexing.rollbacklexbuf;env(* match multi-char substrings that don't contain the start chars of the above patterns *)|Plus(Compl(eof|line_terminator_sequence_start))|any->lexeme_to_bufferlexbufbuf;line_commentenvbuflexbuf|_->failwith"unreachable line_comment"letstring_escape~accept_invalidenvlexbuf=match%sedlexlexbufwith|eof|'\\'->letstr=lexemelexbufinenv,str|'x',hex_digit,hex_digit->letstr=lexemelexbufin(* 0xAB *)env,str|'0'..'7','0'..'7','0'..'7'->letstr=lexemelexbufinenv,str|'0'..'7','0'..'7'->letstr=lexemelexbufin(* 0o01 *)env,str|'0'->env,"0"|'b'->env,"b"|'f'->env,"f"|'n'->env,"n"|'r'->env,"r"|'t'->env,"t"|'v'->env,"v"|'0'..'7'->letstr=lexemelexbufin(* 0o1 *)env,str|'u',hex_quad->letstr=lexemelexbufinenv,str|"u{",Plushex_digit,'}'->letstr=lexemelexbufinlethex=String.substr2(String.lengthstr-3)inletcode=int_of_string("0x"^hex)in(* 11.8.4.1 *)letenv=ifcode>0x10FFFF&¬accept_invalidthenillegalenv(loc_of_lexbufenvlexbuf)"unicode escape out of range"elseenvinenv,str|'u'|'x'|'0'..'7'->letstr=lexemelexbufinletenv=ifaccept_invalidthenenvelseillegalenv(loc_of_lexbufenvlexbuf)""inenv,str|line_terminator_sequence->newlinelexbuf;letstr=lexemelexbufinenv,str|any->letstr=lexemelexbufinenv,str|_->failwith"unreachable string_escape"(* Really simple version of string lexing. Just try to find beginning and end of
* string. We can inspect the string later to find invalid escapes, etc *)letrecstring_quoteenvqbuflexbuf=match%sedlexlexbufwith|"'"|'"'->letq'=lexemelexbufinifq=q'thenenvelse(Buffer.add_stringbufq';string_quoteenvqbuflexbuf)|'\\',line_terminator_sequence->newlinelexbuf;string_quoteenvqbuflexbuf|'\\'->letenv,str=string_escape~accept_invalid:falseenvlexbufin(matchstrwith|"'"|"\""->()|_->Buffer.add_stringbuf"\\");Buffer.add_stringbufstr;string_quoteenvqbuflexbuf|'\n'->letx=lexemelexbufinBuffer.add_stringbufx;letenv=illegalenv(loc_of_lexbufenvlexbuf)""instring_quoteenvqbuflexbuf(* env, end_pos_of_lexbuf env lexbuf *)|eof->letx=lexemelexbufinBuffer.add_stringbufx;letenv=illegalenv(loc_of_lexbufenvlexbuf)""inenv(* match multi-char substrings that don't contain the start chars of the above patterns *)|Plus(Compl("'"|'"'|'\\'|'\n'|eof))|any->lexeme_to_bufferlexbufbuf;string_quoteenvqbuflexbuf|_->failwith"unreachable string_quote"lettoken(env:Lex_env.t)lexbuf:result=match%sedlexlexbufwith|line_terminator_sequence->newlinelexbuf;Continueenv|Pluswhitespace->Continueenv|"/*"->letbuf=Buffer.create127inlexeme_to_bufferlexbufbuf;letenv=commentenvbuflexbufinComment(env,Buffer.contentsbuf)|"//"->letbuf=Buffer.create127inlexeme_to_bufferlexbufbuf;letenv=line_commentenvbuflexbufinComment(env,Buffer.contentsbuf)(* Support for the shebang at the beginning of a file. It is treated like a
* comment at the beginning or an error elsewhere *)|"#!"->ifSedlexing.lexeme_startlexbuf=0thenletenv=line_commentenv(Buffer.create127)lexbufinContinueenvelseToken(env,T_ERROR"#!")(* Values *)|"'"|'"'->letquote=lexemelexbufinletp1=Sedlexing.lexeme_startlexbufinletbuf=Buffer.create127inletenv=string_quoteenvquotebuflexbufinletp2=Sedlexing.lexeme_endlexbufinToken(env,T_STRING(Stdlib.Utf8_string.of_string_exn(Buffer.contentsbuf),p2-p1-1))|'`'->letenv=push_modeenvBACKQUOTEinToken(env,T_BACKQUOTE)|binbigint,word->(* Numbers cannot be immediately followed by words *)recoverenvlexbuf~f:(funenvlexbuf->match%sedlexlexbufwith|binbigint->Token(env,T_BIGINT(BIG_BINARY,lexemelexbuf))|_->failwith"unreachable token bigint")|binbigint->Token(env,T_BIGINT(BIG_BINARY,lexemelexbuf))|binnumber,(letter|'2'..'9'),Staralphanumeric->(* Numbers cannot be immediately followed by words *)recoverenvlexbuf~f:(funenvlexbuf->match%sedlexlexbufwith|binnumber->Token(env,T_NUMBER(BINARY,lexemelexbuf))|_->failwith"unreachable token bignumber")|binnumber->Token(env,T_NUMBER(BINARY,lexemelexbuf))|octbigint,word->(* Numbers cannot be immediately followed by words *)recoverenvlexbuf~f:(funenvlexbuf->match%sedlexlexbufwith|octbigint->Token(env,T_BIGINT(BIG_OCTAL,lexemelexbuf))|_->failwith"unreachable token octbigint")|octbigint->Token(env,T_BIGINT(BIG_OCTAL,lexemelexbuf))|octnumber,(letter|'8'..'9'),Staralphanumeric->(* Numbers cannot be immediately followed by words *)recoverenvlexbuf~f:(funenvlexbuf->match%sedlexlexbufwith|octnumber->Token(env,T_NUMBER(OCTAL,lexemelexbuf))|_->failwith"unreachable token octnumber")|octnumber->Token(env,T_NUMBER(OCTAL,lexemelexbuf))|legacynonoctnumber,word->(* Numbers cannot be immediately followed by words *)recoverenvlexbuf~f:(funenvlexbuf->match%sedlexlexbufwith|legacynonoctnumber->Token(env,T_NUMBER(LEGACY_NON_OCTAL,lexemelexbuf))|_->failwith"unreachable token legacynonoctnumber")|legacynonoctnumber->Token(env,T_NUMBER(LEGACY_NON_OCTAL,lexemelexbuf))|legacyoctnumber,(letter|'8'..'9'),Staralphanumeric->(* Numbers cannot be immediately followed by words *)recoverenvlexbuf~f:(funenvlexbuf->match%sedlexlexbufwith|legacyoctnumber->Token(env,T_NUMBER(LEGACY_OCTAL,lexemelexbuf))|_->failwith"unreachable token legacyoctnumber")|legacyoctnumber->Token(env,T_NUMBER(LEGACY_OCTAL,lexemelexbuf))|hexbigint,word->(* Numbers cannot be immediately followed by words *)recoverenvlexbuf~f:(funenvlexbuf->match%sedlexlexbufwith|hexbigint->Token(env,T_BIGINT(BIG_NORMAL,lexemelexbuf))|_->failwith"unreachable token hexbigint")|hexbigint->Token(env,T_BIGINT(BIG_NORMAL,lexemelexbuf))|hexnumber,non_hex_letter,Staralphanumeric->(* Numbers cannot be immediately followed by words *)recoverenvlexbuf~f:(funenvlexbuf->match%sedlexlexbufwith|hexnumber->Token(env,T_NUMBER(NORMAL,lexemelexbuf))|_->failwith"unreachable token hexnumber")|hexnumber->Token(env,T_NUMBER(NORMAL,lexemelexbuf))|scinumber,word->(* Numbers cannot be immediately followed by words *)recoverenvlexbuf~f:(funenvlexbuf->match%sedlexlexbufwith|scinumber->Token(env,T_NUMBER(NORMAL,lexemelexbuf))|_->failwith"unreachable token scinumber")|scinumber->Token(env,T_NUMBER(NORMAL,lexemelexbuf))|wholebigint,word->(* Numbers cannot be immediately followed by words *)recoverenvlexbuf~f:(funenvlexbuf->match%sedlexlexbufwith|wholebigint->Token(env,T_BIGINT(BIG_NORMAL,lexemelexbuf))|_->failwith"unreachable token wholebigint")|wholebigint->Token(env,T_BIGINT(BIG_NORMAL,lexemelexbuf))|integer,word->(* Numbers cannot be immediately followed by words *)recoverenvlexbuf~f:(funenvlexbuf->match%sedlexlexbufwith|integer->Token(env,T_NUMBER(NORMAL,lexemelexbuf))|_->failwith"unreachable token wholenumber")|integer,'.',word->(Sedlexing.rollbacklexbuf;match%sedlexlexbufwith|integer->Token(env,T_NUMBER(NORMAL,lexemelexbuf))|_->failwith"unreachable token wholenumber")|floatnumber,word->(* Numbers cannot be immediately followed by words *)recoverenvlexbuf~f:(funenvlexbuf->match%sedlexlexbufwith|floatnumber->Token(env,T_NUMBER(NORMAL,lexemelexbuf))|_->failwith"unreachable token wholenumber")|integer,Opt'.'|floatnumber->Token(env,T_NUMBER(NORMAL,lexemelexbuf))(* Syntax *)|"{"->letenv=push_modeenvNORMALinToken(env,T_LCURLY)|"}"->letenv=pop_modeenvinToken(env,T_RCURLY)|"("->Token(env,T_LPAREN)|")"->Token(env,T_RPAREN)|"["->Token(env,T_LBRACKET)|"]"->Token(env,T_RBRACKET)|"..."->Token(env,T_ELLIPSIS)|"."->Token(env,T_PERIOD)|";"->Token(env,T_SEMICOLON)|","->Token(env,T_COMMA)|":"->Token(env,T_COLON)|"?.",digit->(Sedlexing.rollbacklexbuf;match%sedlexlexbufwith|"?"->Token(env,T_PLING)|_->failwith"unreachable, expected ?")|"?."->Token(env,T_PLING_PERIOD)|"??"->Token(env,T_PLING_PLING)|"?"->Token(env,T_PLING)|"&&"->Token(env,T_AND)|"||"->Token(env,T_OR)|"==="->Token(env,T_STRICT_EQUAL)|"!=="->Token(env,T_STRICT_NOT_EQUAL)|"<="->Token(env,T_LESS_THAN_EQUAL)|">="->Token(env,T_GREATER_THAN_EQUAL)|"=="->Token(env,T_EQUAL)|"!="->Token(env,T_NOT_EQUAL)|"++"->Token(env,T_INCR)|"--"->Token(env,T_DECR)|"<<="->Token(env,T_LSHIFT_ASSIGN)|"<<"->Token(env,T_LSHIFT)|">>="->Token(env,T_RSHIFT_ASSIGN)|">>>="->Token(env,T_RSHIFT3_ASSIGN)|">>>"->Token(env,T_RSHIFT3)|">>"->Token(env,T_RSHIFT)|"+="->Token(env,T_PLUS_ASSIGN)|"-="->Token(env,T_MINUS_ASSIGN)|"*="->Token(env,T_MULT_ASSIGN)|"**="->Token(env,T_EXP_ASSIGN)|"%="->Token(env,T_MOD_ASSIGN)|"&="->Token(env,T_BIT_AND_ASSIGN)|"|="->Token(env,T_BIT_OR_ASSIGN)|"^="->Token(env,T_BIT_XOR_ASSIGN)|"??="->Token(env,T_NULLISH_ASSIGN)|"&&="->Token(env,T_AND_ASSIGN)|"||="->Token(env,T_OR_ASSIGN)|"<"->Token(env,T_LESS_THAN)|">"->Token(env,T_GREATER_THAN)|"+"->Token(env,T_PLUS)|"-"->Token(env,T_MINUS)|"*"->Token(env,T_MULT)|"**"->Token(env,T_EXP)|"%"->Token(env,T_MOD)|"|"->Token(env,T_BIT_OR)|"&"->Token(env,T_BIT_AND)|"^"->Token(env,T_BIT_XOR)|"!"->Token(env,T_NOT)|"~"->Token(env,T_BIT_NOT)|"="->Token(env,T_ASSIGN)|"=>"->Token(env,T_ARROW)|"/="->Token(env,T_DIV_ASSIGN)|"/"->Token(env,T_DIV)|"@"->Token(env,T_AT)|"#"->Token(env,T_POUND)(* To reason about its correctness:
1. all tokens are still matched
2. tokens like opaque, opaquex are matched correctly
the most fragile case is `opaquex` (matched with `opaque,x` instead)
3. \a is disallowed
4. a世界 recognized
*)|js_id_start_with_escape,Starjs_id_continue_with_escape->(letraw=Sedlexing.Utf8.lexemelexbufinmatchJs_token.is_keywordrawwith|Somet->Token(env,t)|None->ifis_basic_identrawthenToken(env,T_IDENTIFIER(Stdlib.Utf8_string.of_string_exnraw,raw))elseletenv,decoded=decode_identifierenv(loc_of_lexbufenvlexbuf)rawinletenv=matchJs_token.is_keyworddecodedwith|None->(matchis_valid_identifier_namedecodedwith|true->env|false->illegalenv(loc_of_lexbufenvlexbuf)(Printf.sprintf"%S is not a valid identifier"decoded))|Some_->(* accept keyword as ident if escaped *)envinToken(env,T_IDENTIFIER(Stdlib.Utf8_string.of_string_exndecoded,raw)))|eof->Token(env,T_EOF)|any->letenv=illegalenv(loc_of_lexbufenvlexbuf)""inToken(env,T_ERROR(lexemelexbuf))|_->failwith"unreachable token"letrecregexp_classenvbuflexbuf=match%sedlexlexbufwith|eof->env|"\\\\"->Buffer.add_stringbuf"\\\\";regexp_classenvbuflexbuf|'\\',']'->Buffer.add_charbuf'\\';Buffer.add_charbuf']';regexp_classenvbuflexbuf|']'->Buffer.add_charbuf']';env|line_terminator_sequence->newlinelexbuf;letloc=loc_of_lexbufenvlexbufinletenv=lex_errorenvlocParse_error.UnterminatedRegExpinenv(* match multi-char substrings that don't contain the start chars of the above patterns *)|Plus(Compl(eof|'\\'|']'|line_terminator_sequence_start))|any->letstr=lexemelexbufinBuffer.add_stringbufstr;regexp_classenvbuflexbuf|_->failwith"unreachable regexp_class"letrecregexp_bodyenvbuflexbuf=match%sedlexlexbufwith|eof->letloc=loc_of_lexbufenvlexbufinletenv=lex_errorenvlocParse_error.UnterminatedRegExpinenv,""|'\\',line_terminator_sequence->newlinelexbuf;letloc=loc_of_lexbufenvlexbufinletenv=lex_errorenvlocParse_error.UnterminatedRegExpinenv,""|'\\',any->lets=lexemelexbufinBuffer.add_stringbufs;regexp_bodyenvbuflexbuf|'/',Plusid_letter->letflags=letstr=lexemelexbufinString.substr1(String.lengthstr-1)inenv,flags|'/'->env,""|'['->Buffer.add_charbuf'[';letenv=regexp_classenvbuflexbufinregexp_bodyenvbuflexbuf|line_terminator_sequence->newlinelexbuf;letloc=loc_of_lexbufenvlexbufinletenv=lex_errorenvlocParse_error.UnterminatedRegExpinenv,""(* match multi-char substrings that don't contain the start chars of the above patterns *)|Plus(Compl(eof|'\\'|'/'|'['|line_terminator_sequence_start))|any->letstr=lexemelexbufinBuffer.add_stringbufstr;regexp_bodyenvbuflexbuf|_->failwith"unreachable regexp_body"letregexpenvlexbuf=match%sedlexlexbufwith|eof->Token(env,T_EOF)|line_terminator_sequence->newlinelexbuf;Continueenv|Pluswhitespace->Continueenv|"//"->letbuf=Buffer.create127inlexeme_to_bufferlexbufbuf;letenv=line_commentenvbuflexbufinComment(env,Buffer.contentsbuf)|"/*"->letbuf=Buffer.create127inlexeme_to_bufferlexbufbuf;letenv=commentenvbuflexbufinComment(env,Buffer.contentsbuf)|'/'->letbuf=Buffer.create127inletenv,flags=regexp_bodyenvbuflexbufinToken(env,T_REGEXP(Stdlib.Utf8_string.of_string_exn(Buffer.contentsbuf),flags))|any->letenv=illegalenv(loc_of_lexbufenvlexbuf)""inToken(env,T_ERROR(lexemelexbuf))|_->failwith"unreachable regexp"(*****************************************************************************)(* Rule backquote *)(*****************************************************************************)letbackquoteenvlexbuf=match%sedlexlexbufwith|'`'->letenv=pop_modeenvinToken(env,T_BACKQUOTE)|"${"->letenv=push_modeenvNORMALinToken(env,T_DOLLARCURLY)|Plus(Compl('`'|'$'|'\\'))->Token(env,T_ENCAPSED_STRING(lexemelexbuf))|'$'->Token(env,T_ENCAPSED_STRING(lexemelexbuf))|'\\'->letbuf=Buffer.create127inBuffer.add_charbuf'\\';letenv,str=string_escape~accept_invalid:trueenvlexbufinBuffer.add_stringbufstr;Token(env,T_ENCAPSED_STRING(Buffer.contentsbuf))|eof->Token(env,T_EOF)|_->letenv=illegalenv(loc_of_lexbufenvlexbuf)""inToken(env,T_ERROR(lexemelexbuf))letwrapf=letfenv=letstart=Sedlexing.lexing_position_startenv.Lex_env.lex_lbinlett=fenvenv.Lex_env.lex_lbinletstop=Sedlexing.lexing_position_currenv.Lex_env.lex_lbint,Loc.create~last_line:(Loc.line_end'!(env.lex_last_loc))startstopinletrechelpercommentsenv=Sedlexing.startenv.Lex_env.lex_lb;letres,lex_loc=fenvinmatchreswith|Token(env,t)->env.lex_last_loc:=lex_loc;letlex_token=tinletlex_errors_acc=env.lex_state.lex_errors_acciniflex_errors_acc=[]thenenv,{Lex_result.lex_token;lex_loc;lex_errors=[]}else({envwithlex_state=Lex_env.empty_lex_state},{Lex_result.lex_token;lex_loc;lex_errors=List.revlex_errors_acc})|Comment(env,comment)->env.lex_last_loc:=lex_loc;letlex_errors_acc=env.lex_state.lex_errors_accin(env,{Lex_result.lex_token=TCommentcomment;lex_loc;lex_errors=List.revlex_errors_acc})|Continueenv->helpercommentsenvinfunenv->helper[]envletregexp=wrapregexplettoken=wraptokenletbackquote=wrapbackquoteletlexenv=matchenv.Lex_env.lex_mode_stackwith|Lex_mode.NORMAL::_|[]->tokenenv|Lex_mode.BACKQUOTE::_->backquoteenv|Lex_mode.REGEXP::_->regexpenv