123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101(* This file is part of the Catala compiler, a specification language for tax and social benefits
computation rules. Copyright (C) 2020 Inria, contributors: Denis Merigoux
<denis.merigoux@inria.fr>, Emile Rolley <emile.rolley@tuta.io>
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
in compliance with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
or implied. See the License for the specific language governing permissions and limitations under
the License. *)openTokensopenSedlexingopenUtilsmoduleR=Re.Pcre(* Calculates the precedence according a {!val: matched_regex} of the form : '[#]+'.
@note -2 because [LAW_HEADING] start with at least "#" and the number of '#' remaining
corresponds to the precedence. *)letcalc_precedence(matched_regex:string):int=String.lengthmatched_regex-1(* Gets the [LAW_HEADING] token from the current {!val: lexbuf} *)letget_law_heading(lexbuf:lexbuf):token=letextract_article_title=R.regexp"([#]+)\\s*([^\\|]+)(\\|([^\\|]+)|)(\\|\\s*([0-9]{4}\\-[0-9]{2}\\-[0-9]{2})|)"inletget_substring=R.get_substring(R.exec~rex:extract_article_title(Utf8.lexemelexbuf))inlettitle=String.trim(get_substring2)inletarticle_id=trySome(String.trim(get_substring4))withNot_found->Noneinletarticle_expiration_date=trySome(String.trim(get_substring6))withNot_found->Noneinletprecedence=calc_precedence(String.trim(get_substring1))inLAW_HEADING(title,article_id,article_expiration_date,precedence)typelexing_context=Law|Code|Directive|Directive_args(** Boolean reference, used by the lexer as the mutable state to distinguish whether it is lexing
code or law. *)letcontext:lexing_contextref=refLaw(** Mutable string reference that accumulates the string representation of the body of code being
lexed. This string representation is used in the literate programming backends to faithfully
capture the spacing pattern of the original program *)letcode_buffer:Buffer.t=Buffer.create4000(** Updates {!val:code_buffer} with the current lexeme *)letupdate_acc(lexbuf:lexbuf):unit=Buffer.add_stringcode_buffer(Utf8.lexemelexbuf)(** Error-generating helper *)letraise_lexer_error(loc:Pos.t)(token:string)=Errors.raise_spanned_error(Printf.sprintf"Parsing error after token \"%s\": what comes after is unknown"token)loc(** Associative list matching each punctuation string part of the Catala syntax with its {!module:
Surface.Parser} token. Same for all the input languages (English, French, etc.) *)lettoken_list_language_agnostic:(string*token)list=[(".",DOT);("<=",LESSER_EQUAL);(">=",GREATER_EQUAL);(">",GREATER);("!=",NOT_EQUAL);("=",EQUAL);("(",LPAREN);(")",RPAREN);("{",LBRACKET);("}",RBRACKET);("{",LSQUARE);("}",RSQUARE);("+",PLUS);("-",MINUS);("*",MULT);("/",DIV);("|",VERTICAL);(":",COLON);(";",SEMICOLON);("--",ALT);("++",PLUSPLUS);]moduletypeLocalisedLexer=sigvaltoken_list:(string*Tokens.token)list(** Same as {!val: token_list_language_agnostic}, but with tokens specialized to a given language. *)vallex_builtin:string->Ast.builtin_expressionoption(** Simple lexer for builtins *)vallex_code:Sedlexing.lexbuf->Tokens.token(** Main lexing function used in code blocks *)vallex_law:Sedlexing.lexbuf->Tokens.token(** Main lexing function used outside code blocks *)vallexer:Sedlexing.lexbuf->Tokens.token(** Entry point of the lexer, distributes to {!val: lex_code} or {!val: lex_law} depending of
{!val: Surface.Lexer_common.is_code}. *)end