123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129(*********************************************************************************)(* Higlo *)(* *)(* Copyright (C) 2014-2021 Institut National de Recherche en Informatique *)(* et en Automatique. All rights reserved. *)(* *)(* This program is free software; you can redistribute it and/or modify *)(* it under the terms of the GNU Lesser General Public License version *)(* 3 as published by the Free Software Foundation. *)(* *)(* This program is distributed in the hope that it will be useful, *)(* but WITHOUT ANY WARRANTY; without even the implied warranty of *)(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *)(* GNU Library General Public License for more details. *)(* *)(* You should have received a copy of the GNU Lesser General Public *)(* License along with this program; if not, write to the Free Software *)(* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA *)(* 02111-1307 USA *)(* *)(* Contact: Maxence.Guesdon@inria.fr *)(* *)(* *)(*********************************************************************************)(** *)typetoken=|Bcommentofstring(** block comment *)|Constantofstring|Directiveofstring|Escapeofstring(** Escape sequence like [\123] *)|Idofstring|Keywordofint*string|Lcommentofstring(** one line comment *)|Numericofstring|Stringofstring|Symbolofint*string|Textofstring(** Used for everything else *)letstring_of_token=function|Bcomments->Printf.sprintf"Bcomment(%S)"s|Constants->Printf.sprintf"Constant(%S)"s|Directives->Printf.sprintf"Directive(%S)"s|Escapes->Printf.sprintf"Escape(%S)"s|Ids->Printf.sprintf"Id(%S)"s|Keyword(n,s)->Printf.sprintf"Keyword(%d, %S)"ns|Lcomments->Printf.sprintf"Lcomment(%S)"s|Numerics->Printf.sprintf"Numeric(%S)"s|Strings->Printf.sprintf"String(%S)"s|Symbol(n,s)->Printf.sprintf"Symbol(%d, %S)"ns|Texts->Printf.sprintf"Text(%S)"smoduleSmap=Map.Make(String)typeerror=|Unknown_langofstring|Lex_errorofLocation.t*stringexceptionErroroferrorletstring_of_error=function|Unknown_langs->Printf.sprintf"Unknown language %S"s|Lex_error(loc,s)->letb=Buffer.create256inletfmt=Format.formatter_of_bufferbinLocation.print_locfmtloc;Format.pp_print_flushfmt();letmsg=Printf.sprintf"Lexing error at %s: %s"(Buffer.contentsb)sinmsgletppfmte=Format.pp_print_stringfmt(string_of_errore)let()=Printexc.register_printer(functionErrore->Some(string_of_errore)|_->None)typelexer=Sedlexing.lexbuf->tokenlistletlangs=refSmap.emptyletget_lexerlang=trySmap.findlang!langswithNot_found->raise(Error(Unknown_langlang));;letregister_langnamef=langs:=Smap.addnamef!langs;;letparse?(raise_exn=false)~langs=letlexer=get_lexerlanginletlexbuf=Sedlexing.Utf8.from_stringsinletrecmerge_text_tokensacctext_acc=function|[]->letl=matchtext_accwith|[]->acc|l->(Text(String.concat""(List.revl)))::accinList.revl|Texts::q->merge_text_tokensacc(s::text_acc)q|t::q->lett1=matchtext_accwith|[]->None|l->Some(Text(String.concat""(List.revl)))inletacc=matcht1with|None->t::acc|Somet1->t::t1::accinmerge_text_tokensacc[]qinletreciteracc=matchlexerlexbufwith|[]->List.revacc|tokens->iter((List.revtokens)@acc)intrylettokens=iter[]inmerge_text_tokens[][]tokenswithe->ifraise_exnthen(matchewith|Failures->let(loc_start,loc_end)=Sedlexing.lexing_positionslexbufinletloc={Location.loc_start;loc_end;loc_ghost=false}inraise(Error(Lex_error(loc,s)))|e->raisee)else[Texts];;