123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981(* Yoann Padioleau
*
* Copyright (C) 2010 Facebook
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* version 2.1 as published by the Free Software Foundation, with the
* special exception on linking described in file license.txt.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the file
* license.txt for more details.
*)openCommon(*****************************************************************************)(* Prelude *)(*****************************************************************************)(*
* Some helpers for the different lexers and parsers in pfff.
* The main types are:
* ('token_location' < 'token_origin' < 'token_mutable') * token_kind
*
*)(*****************************************************************************)(* Types *)(*****************************************************************************)(* Currently, lexing.ml in the standard OCaml libray does not handle
* the line number position.
* Even if there are certain fields in the lexing structure, they are not
* maintained by the lexing engine so the following code does not work:
*
* let pos = Lexing.lexeme_end_p lexbuf in
* sprintf "at file %s, line %d, char %d" pos.pos_fname pos.pos_lnum
* (pos.pos_cnum - pos.pos_bol) in
*
* Hence those types and functions below to overcome the previous limitation,
* (see especially complete_token_location_large()).
*)typetoken_location={str:string;charpos:int;line:int;column:int;file:filename;}(* with tarzan *)letfake_token_location={charpos=-1;str="";line=-1;column=-1;file="";}letfirst_loc_of_filefile={charpos=0;str="";line=1;column=0;file=file;}typetoken_origin=(* Present both in the AST and list of tokens *)|OriginTokoftoken_location(* Present only in the AST and generated after parsing. Can be used
* when building some extra AST elements. *)|FakeTokStrofstring(* to help the generic pretty printer *)*(* Sometimes we generate fake tokens close to existing
* origin tokens. This can be useful when have to give an error
* message that involves a fakeToken. The int is a kind of
* virtual position, an offset. See compare_pos below.
*)(token_location*int)option(* In the case of a XHP file, we could preprocess it and incorporate
* the tokens of the preprocessed code with the tokens from
* the original file. We want to mark those "expanded" tokens
* with a special tag so that if someone do some transformation on
* those expanded tokens they will get a warning (because we may have
* trouble back-propagating the transformation back to the original file).
*)|ExpandedTokof(* refers to the preprocessed file, e.g. /tmp/pp-xxxx.pphp *)token_location*(* kind of virtual position. This info refers to the last token
* before a serie of expanded tokens and the int is an offset.
* The goal is to be able to compare the position of tokens
* between then, even for expanded tokens. See compare_pos
* below.
*)token_location*int(* The Ab constructor is (ab)used to call '=' to compare
* big AST portions. Indeed as we keep the token information in the AST,
* if we have an expression in the code like "1+1" and want to test if
* it's equal to another code like "1+1" located elsewhere, then
* the Pervasives.'=' of OCaml will not return true because
* when it recursively goes down to compare the leaf of the AST, that is
* the token_location, there will be some differences of positions. If instead
* all leaves use Ab, then there is no position information and we can
* use '='. See also the 'al_info' function below.
*
* Ab means AbstractLineTok. I Use a short name to not
* polluate in debug mode.
*)|Ab(* with tarzan *)typetoken_mutable={(* contains among other things the position of the token through
* the token_location embedded inside the token_origin type.
*)token:token_origin;mutabletransfo:transformation;(* less: mutable comments: ...; *)}(* poor's man refactoring *)andtransformation=|NoTransfo|Remove|AddBeforeofadd|AddAfterofadd|Replaceofadd|AddArgsBeforeofstringlistandadd=|AddStrofstring|AddNewlineAndIdent(* with tarzan *)(* Synthesize a token. *)letfake_infostr:token_mutable={token=FakeTokStr(str,None);transfo=NoTransfo;}typetoken_kind=(* for the fuzzy parser and sgrep/spatch fuzzy AST *)|LPar|RPar|LBrace|RBrace|LBracket|RBracket|LAngle|RAngle(* for the unparser helpers in spatch, and to filter
* irrelevant tokens in the fuzzy parser
*)|Esthetofesthet(* mostly for the lexer helpers, and for fuzzy parser *)(* less: want to factorize all those TH.is_eof to use that?
* but extra cost? same for TH.is_comment?
* todo: could maybe get rid of that now that we don't really use
* berkeley DB and prefer Prolog, and so we don't need a sentinel
* ast elements to associate the comments with it
*)|Eof|Otherandesthet=|Comment|Newline|Space(* shortcut *)typet=token_mutabletypeinfo_=ttypeparsing_stat={filename:Common.filename;mutablecorrect:int;mutablebad:int;(* used only for cpp for now *)mutablehave_timeout:bool;(* by our cpp commentizer *)mutablecommentized:int;(* if want to know exactly what was passed through, uncomment:
*
* mutable passing_through_lines: int;
*
* it differs from bad by starting from the error to
* the synchro point instead of starting from start of
* function to end of function.
*)(* for instance to report most problematic macros when parse c/c++ *)mutableproblematic_lines:(stringlist(* ident in error line *)*int(* line_error *))list;}letdefault_statfile={filename=file;have_timeout=false;correct=0;bad=0;commentized=0;problematic_lines=[];}(* Many parsers need to interact with the lexer, or use tricks around
* the stream of tokens, or do some error recovery, or just need to
* pass certain tokens (like the comments token) which requires
* to have access to this stream of remaining tokens.
* The token_state type helps.
*)type'toktokens_state={mutablerest:'toklist;mutablecurrent:'tok;(* it's passed since last "checkpoint", not passed from the beginning *)mutablepassed:'toklist;(* if want to do some lalr(k) hacking ... cf yacfe.
* mutable passed_clean : 'tok list;
* mutable rest_clean : 'tok list;
*)}letmk_tokens_statetoks={rest=toks;current=(List.hdtoks);passed=[];(* passed_clean = [];
* rest_clean = (toks +> List.filter TH.is_not_comment);
*)}(*****************************************************************************)(* Lexer helpers *)(*****************************************************************************)letlexbuf_to_strposlexbuf=(Lexing.lexemelexbuf,Lexing.lexeme_startlexbuf)lettokinfo_str_posstrpos={token=OriginTok{charpos=pos;str=str;(* info filled in a post-lexing phase, see complete_token_location_large*)line=-1;column=-1;file="";};transfo=NoTransfo;}(* pad: hack around ocamllex to emulate the yyless() of flex. The semantic
* is not exactly the same than yyless(), so I use yyback() instead.
* http://my.safaribooksonline.com/book/programming/flex/9780596805418/a-reference-for-flex-specifications/yyless
*)letyybacknlexbuf=lexbuf.Lexing.lex_curr_pos<-lexbuf.Lexing.lex_curr_pos-n;letcurrp=lexbuf.Lexing.lex_curr_pinlexbuf.Lexing.lex_curr_p<-{currpwithLexing.pos_cnum=currp.Lexing.pos_cnum-n;}(*****************************************************************************)(* Errors *)(*****************************************************************************)(* this can be used in the different lexer/parsers in pfff *)exceptionLexical_errorofstring*texceptionParsing_erroroftexceptionAst_builder_errorofstring*texceptionOther_errorofstring*tlettokinfolexbuf=tokinfo_str_pos(Lexing.lexemelexbuf)(Lexing.lexeme_startlexbuf)letlexical_errorslexbuf=if!Flag_parsing.exn_when_lexical_errorthenraise(Lexical_error(s,tokinfolexbuf))elseif!Flag_parsing.verbose_lexingthenpr2_once("LEXER: "^s)else()(*****************************************************************************)(* Misc *)(*****************************************************************************)(*
val rewrap_token_location : token_location.token_location -> info -> info
let rewrap_token_location pi ii =
{ii with pinfo =
(match ii.pinfo with
| OriginTok _oldpi -> OriginTok pi
| FakeTokStr _ | Ab | ExpandedTok _ ->
failwith "rewrap_parseinfo: no OriginTok"
)
}
*)lettoken_location_of_infoii=matchii.tokenwith|OriginTokpinfo->pinfo(* TODO ? dangerous ? *)|ExpandedTok(pinfo_pp,_pinfo_orig,_offset)->pinfo_pp|FakeTokStr(_,(Some(pi,_)))->pi|FakeTokStr(_,None)|Ab->failwith"token_location_of_info: no OriginTok"(* for error reporting *)(*
let string_of_token_location x =
spf "%s at %s:%d:%d" x.str x.file x.line x.column
*)letstring_of_token_locationx=spf"%s:%d:%d"x.filex.linex.columnletstring_of_infox=string_of_token_location(token_location_of_infox)letstr_of_infoii=(token_location_of_infoii).strletfile_of_infoii=(token_location_of_infoii).fileletline_of_infoii=(token_location_of_infoii).lineletcol_of_infoii=(token_location_of_infoii).column(* todo: return a Real | Virt position ? *)letpos_of_infoii=(token_location_of_infoii).charposletpinfo_of_infoii=ii.tokenletis_origintokii=matchii.tokenwith|OriginTok_->true|_->false(*
let opos_of_info ii =
PI.get_orig_info (function x -> x.PI.charpos) ii
val pos_of_tok : Parser_cpp.token -> int
val str_of_tok : Parser_cpp.token -> string
val file_of_tok : Parser_cpp.token -> Common.filename
let pos_of_tok x = Ast.opos_of_info (info_of_tok x)
let str_of_tok x = Ast.str_of_info (info_of_tok x)
let file_of_tok x = Ast.file_of_info (info_of_tok x)
let pinfo_of_tok x = Ast.pinfo_of_info (info_of_tok x)
val is_origin : Parser_cpp.token -> bool
val is_expanded : Parser_cpp.token -> bool
val is_fake : Parser_cpp.token -> bool
val is_abstract : Parser_cpp.token -> bool
let is_origin x =
match pinfo_of_tok x with Parse_info.OriginTok _ -> true | _ -> false
let is_expanded x =
match pinfo_of_tok x with Parse_info.ExpandedTok _ -> true | _ -> false
let is_fake x =
match pinfo_of_tok x with Parse_info.FakeTokStr _ -> true | _ -> false
let is_abstract x =
match pinfo_of_tok x with Parse_info.Ab -> true | _ -> false
*)(* info about the current location *)(*
let get_pi = function
| OriginTok pi -> pi
| ExpandedTok (_,pi,_) -> pi
| FakeTokStr (_,(Some (pi,_))) -> pi
| FakeTokStr (_,None) ->
failwith "FakeTokStr None"
| Ab ->
failwith "Ab"
*)(* original info *)letget_original_token_location=function|OriginTokpi->pi|ExpandedTok(pi,_,_)->pi|FakeTokStr(_,_)->failwith"no position information"|Ab->failwith"Ab"(* used by token_helpers *)(*
let get_info f ii =
match ii.token with
| OriginTok pi -> f pi
| ExpandedTok (_,pi,_) -> f pi
| FakeTokStr (_,Some (pi,_)) -> f pi
| FakeTokStr (_,None) ->
failwith "FakeTokStr None"
| Ab ->
failwith "Ab"
*)(*
let get_orig_info f ii =
match ii.token with
| OriginTok pi -> f pi
| ExpandedTok (pi,_, _) -> f pi
| FakeTokStr (_,Some (pi,_)) -> f pi
| FakeTokStr (_,None ) ->
failwith "FakeTokStr None"
| Ab ->
failwith "Ab"
*)(* not used but used to be useful in coccinelle *)typeposrv=|Realoftoken_location|Virtoftoken_location(* last real info before expanded tok *)*int(* virtual offset *)letcompare_posii1ii2=letget_pos=function|OriginTokpi->Realpi(* todo? I have this for lang_php/
| FakeTokStr (s, Some (pi_orig, offset)) ->
Virt (pi_orig, offset)
*)|FakeTokStr_|Ab->failwith"get_pos: Ab or FakeTok"|ExpandedTok(_pi_pp,pi_orig,offset)->Virt(pi_orig,offset)inletpos1=get_pos(pinfo_of_infoii1)inletpos2=get_pos(pinfo_of_infoii2)inmatch(pos1,pos2)with|(Realp1,Realp2)->comparep1.charposp2.charpos|(Virt(p1,_),Realp2)->if(comparep1.charposp2.charpos)=|=(-1)then(-1)else1|(Realp1,Virt(p2,_))->if(comparep1.charposp2.charpos)=|=1then1else(-1)|(Virt(p1,o1),Virt(p2,o2))->letpoi1=p1.charposinletpoi2=p2.charposinmatchcomparepoi1poi2with|-1->-1|0->compareo1o2|1->1|_->raiseImpossibleletmin_max_ii_by_posxs=matchxswith|[]->failwith"empty list, max_min_ii_by_pos"|[x]->(x,x)|x::xs->letpos_leqp1p2=(compare_posp1p2)=|=(-1)inxs|>List.fold_left(fun(minii,maxii)e->letmaxii'=ifpos_leqmaxiietheneelsemaxiiinletminii'=ifpos_leqeminiitheneelseminiiinminii',maxii')(x,x)(*
let mk_info_item2 ~info_of_tok toks =
let buf = Buffer.create 100 in
let s =
(* old: get_slice_file filename (line1, line2) *)
begin
toks +> List.iter (fun tok ->
let info = info_of_tok tok in
match info.token with
| OriginTok _
| ExpandedTok _ ->
Buffer.add_string buf (str_of_info info)
(* the virtual semicolon *)
| FakeTokStr _ ->
()
| Ab -> raise Impossible
);
Buffer.contents buf
end
in
(s, toks)
let mk_info_item_DEPRECATED ~info_of_tok a =
Common.profile_code "Parsing.mk_info_item"
(fun () -> mk_info_item2 ~info_of_tok a)
*)(*
I used to have:
type program2 = toplevel2 list
(* the token list contains also the comment-tokens *)
and toplevel2 = Ast_php.toplevel * Parser_php.token list
type program_with_comments = program2
and a function below called distribute_info_items_toplevel that
would distribute the list of tokens to each toplevel entity.
This was when I was storing parts of AST in berkeley DB and when
I wanted to get some information about an entity (a function, a class)
I wanted to get the list also of tokens associated with that entity.
Now I just have
type program_and_tokens = Ast_php.program * Parser_php.token list
because I don't use berkeley DB. I use codegraph and an entity_finder
we just focus on use/def and does not store huge asts on disk.
let rec distribute_info_items_toplevel2 xs toks filename =
match xs with
| [] -> raise Impossible
| [Ast_php.FinalDef e] ->
(* assert (null toks) ??? no cos can have whitespace tokens *)
let info_item = toks in
[Ast_php.FinalDef e, info_item]
| ast::xs ->
(match ast with
| Ast_js.St (Ast_js.Nop None) ->
distribute_info_items_toplevel2 xs toks filename
| _ ->
let ii = Lib_parsing_php.ii_of_any (Ast.Toplevel ast) in
(* ugly: I use a fakeInfo for lambda f_name, so I have
* have to filter the abstract info here
*)
let ii = List.filter PI.is_origintok ii in
let (min, max) = PI.min_max_ii_by_pos ii in
let toks_before_max, toks_after =
(* on very huge file, this function was previously segmentation fault
* in native mode because span was not tail call
*)
Common.profile_code "spanning tokens" (fun () ->
toks +> Common2.span_tail_call (fun tok ->
match PI.compare_pos (TH.info_of_tok tok) max with
| -1 | 0 -> true
| 1 -> false
| _ -> raise Impossible
))
in
let info_item = toks_before_max in
(ast, info_item)::distribute_info_items_toplevel2 xs toks_after filename
let distribute_info_items_toplevel a b c =
Common.profile_code "distribute_info_items" (fun () ->
distribute_info_items_toplevel2 a b c
)
*)letrewrap_strsii={iiwithtoken=(matchii.tokenwith|OriginTokpi->OriginTok{piwithstr=s;}|FakeTokStr(s,info)->FakeTokStr(s,info)|Ab->Ab|ExpandedTok_->(* ExpandedTok ({ pi with Common.str = s;},vpi) *)failwith"rewrap_str: ExpandedTok not allowed here")}lettok_add_ssii=rewrap_str((str_of_infoii)^s)ii(*****************************************************************************)(* Error location report *)(*****************************************************************************)(* A changen is a stand-in for a file for the underlying code. We use
* channels in the underlying parsing code as this avoids loading
* potentially very large source files directly into memory before we
* even parse them, but this makes it difficult to parse small chunks of
* code. The changen works around this problem by providing a channel,
* size and source for underlying data. This allows us to wrap a string
* in a channel, or pass a file, depending on our needs.
*)typechangen=unit->(in_channel*int*Common.filename)(* Many functions in parse_php were implemented in terms of files and
* are now adapted to work in terms of changens. However, we wish to
* provide the original API to users. This wraps changen-based functions
* and makes them operate on filenames again.
*)letfile_wrap_changen:(changen->'a)->(Common.filename->'a)=funf->(funfile->f(fun()->(open_infile,Common2.filesizefile,file)))(*
let full_charpos_to_pos_from_changen changen =
let (chan, chansize, _) = changen () in
let size = (chansize + 2) in
let arr = Array.create size (0,0) in
let charpos = ref 0 in
let line = ref 0 in
let rec full_charpos_to_pos_aux () =
try
let s = (input_line chan) in
incr line;
(* '... +1 do' cos input_line dont return the trailing \n *)
for i = 0 to (String.length s - 1) + 1 do
arr.(!charpos + i) <- (!line, i);
done;
charpos := !charpos + String.length s + 1;
full_charpos_to_pos_aux();
with End_of_file ->
for i = !charpos to Array.length arr - 1 do
arr.(i) <- (!line, 0);
done;
();
in
begin
full_charpos_to_pos_aux ();
close_in chan;
arr
end
let full_charpos_to_pos2 = file_wrap_changen full_charpos_to_pos_from_changen
let full_charpos_to_pos a =
profile_code "Common.full_charpos_to_pos" (fun () -> full_charpos_to_pos2 a)
*)(*
let test_charpos file =
full_charpos_to_pos file +> Common2.dump +> pr2
*)(*
let complete_token_location filename table x =
{ x with
file = filename;
line = fst (table.(x.charpos));
column = snd (table.(x.charpos));
}
*)letfull_charpos_to_pos_large_from_changen=funchangen->let(chan,chansize,_)=changen()inletsize=(chansize+2)in(* old: let arr = Array.create size (0,0) in *)letarr1=Bigarray.Array1.createBigarray.intBigarray.c_layoutsizeinletarr2=Bigarray.Array1.createBigarray.intBigarray.c_layoutsizeinBigarray.Array1.fillarr10;Bigarray.Array1.fillarr20;letcharpos=ref0inletline=ref0inletfull_charpos_to_pos_aux()=trywhiletruedobeginlets=(input_linechan)inincrline;(* '... +1 do' cos input_line dont return the trailing \n *)fori=0to(String.lengths-1)+1do(* old: arr.(!charpos + i) <- (!line, i); *)arr1.{!charpos+i}<-(!line);arr2.{!charpos+i}<-i;done;charpos:=!charpos+String.lengths+1;enddonewithEnd_of_file->fori=!charposto(* old: Array.length arr *)Bigarray.Array1.dimarr1-1do(* old: arr.(i) <- (!line, 0); *)arr1.{i}<-!line;arr2.{i}<-0;done;();inbeginfull_charpos_to_pos_aux();close_inchan;(funi->arr1.{i},arr2.{i})endletfull_charpos_to_pos_large2=file_wrap_changenfull_charpos_to_pos_large_from_changenletfull_charpos_to_pos_largea=profile_code"Common.full_charpos_to_pos_large"(fun()->full_charpos_to_pos_large2a)letcomplete_token_location_largefilenametablex={xwithfile=filename;line=fst(table(x.charpos));column=snd(table(x.charpos));}(*---------------------------------------------------------------------------*)(* return line x col x str_line from a charpos. This function is quite
* expensive so don't use it to get the line x col from every token in
* a file. Instead use full_charpos_to_pos.
*)let(info_from_charpos2:int->filename->(int*int*string))=funcharposfilename->(* Currently lexing.ml does not handle the line number position.
* Even if there is some fields in the lexing structure, they are not
* maintained by the lexing engine :( So the following code does not work:
* let pos = Lexing.lexeme_end_p lexbuf in
* sprintf "at file %s, line %d, char %d" pos.pos_fname pos.pos_lnum
* (pos.pos_cnum - pos.pos_bol) in
* Hence this function to overcome the previous limitation.
*)letchan=open_infilenameinletlinen=ref0inletposl=ref0inletreccharpos_to_pos_auxlast_valid=lets=trySome(input_linechan)withEnd_of_filewhencharpos=|=last_valid->Noneinincrlinen;matchswithSomes->lets=s^"\n"inif(!posl+String.lengths>charpos)thenbeginclose_inchan;(!linen,charpos-!posl,s)endelsebeginposl:=!posl+String.lengths;charpos_to_pos_aux!posl;end|None->(!linen,charpos-!posl,"\n")inletres=charpos_to_pos_aux0inclose_inchan;resletinfo_from_charposab=profile_code"Common.info_from_charpos"(fun()->info_from_charpos2ab)(* Decalage is here to handle stuff such as cpp which include file and who
* can make shift.
*)let(error_messagebis:filename->(string*int)->int->string)=funfilename(lexeme,lexstart)decalage->letcharpos=lexstart+decalageinlettok=lexemeinlet(line,pos,linecontent)=info_from_charposcharposfilenameinlets=Common2.choplinecontentinlets=(* this happens in Javascript for minified files *)ifString.lengths>200then(String.subs0100)^" (TOO LONG, SHORTEN!)..."elsesinspf"File \"%s\", line %d, column %d, charpos = %d
around = '%s', whole content = %s"filenamelineposcharpostoksleterror_message=funfilename(lexeme,lexstart)->tryerror_messagebisfilename(lexeme,lexstart)0withEnd_of_file->("PB in Common.error_message, position "^i_to_slexstart^" given out of file:"^filename)leterror_message_token_location=funinfo->letfilename=info.fileinletlexeme=info.strinletlexstart=info.charposintryerror_messagebisfilename(lexeme,lexstart)0withEnd_of_file->("PB in Common.error_message, position "^i_to_slexstart^" given out of file:"^filename)leterror_message_infoinfo=letpinfo=token_location_of_infoinfoinerror_message_token_locationpinfo(*
let error_message_short = fun filename (lexeme, lexstart) ->
try
let charpos = lexstart in
let (line, pos, linecontent) = info_from_charpos charpos filename in
spf "File \"%s\", line %d" filename line
with End_of_file ->
begin
("PB in Common.error_message, position " ^ i_to_s lexstart ^
" given out of file:" ^ filename);
end
*)letprint_badline_error(start_line,end_line)filelines=beginpr2("badcount: "^i_to_s(end_line-start_line));fori=start_linetoend_linedolets=filelines.(i)inletline=(* this happens in Javascript for minified files *)ifString.lengths>200then(String.subs0100)^" (TOO LONG, SHORTEN!)..."elsesinifi=|=line_errorthenpr2("BAD:!!!!!"^" "^line)elsepr2("bad:"^" "^line)doneend(*****************************************************************************)(* Parsing statistics *)(*****************************************************************************)(* todo: stat per dir ? give in terms of func_or_decl numbers:
* nbfunc_or_decl pbs / nbfunc_or_decl total ?/
*
* note: cela dit si y'a des fichiers avec des #ifdef dont on connait pas les
* valeurs alors on parsera correctement tout le fichier et pourtant y'aura
* aucune def et donc aucune couverture en fait.
* ==> TODO evaluer les parties non parsé ?
*)letprint_parsing_stat_list?(verbose=false)statxs=(* old:
let total = List.length statxs in
let perfect =
statxs
+> List.filter (function
| {bad = n; _} when n = 0 -> true
| _ -> false)
+> List.length
in
pr2 "\n\n\n---------------------------------------------------------------";
pr2 (
(spf "NB total files = %d; " total) ^
(spf "perfect = %d; " perfect) ^
(spf "=========> %d" ((100 * perfect) / total)) ^ "%"
);
let good = statxs +> List.fold_left (fun acc {correct = x; _} -> acc+x) 0 in
let bad = statxs +> List.fold_left (fun acc {bad = x; _} -> acc+x) 0 in
let gf, badf = float_of_int good, float_of_int bad in
pr2 (
(spf "nb good = %d, nb bad = %d " good bad) ^
(spf "=========> %f" (100.0 *. (gf /. (gf +. badf))) ^ "%"
)
)
*)lettotal=(List.lengthstatxs)inletperfect=statxs|>List.filter(function{have_timeout=false;bad=0;_}->true|_->false)|>List.lengthinifverbosethenbeginpr"\n\n\n---------------------------------------------------------------";pr"pbs with files:";statxs|>List.filter(function|{have_timeout=true;_}->true|{bad=n;_}whenn>0->true|_->false)|>List.iter(function{filename=file;have_timeout=timeout;bad=n;_}->pr(file^" "^(iftimeoutthen"TIMEOUT"elsei_to_sn)););pr"\n\n\n";pr"files with lots of tokens passed/commentized:";letthreshold_passed=100instatxs|>List.filter(function|{commentized=n;_}whenn>threshold_passed->true|_->false)|>List.iter(function{filename=file;commentized=n;_}->pr(file^" "^(i_to_sn)););pr"\n\n\n";end;letgood=statxs|>List.fold_left(funacc{correct=x;_}->acc+x)0inletbad=statxs|>List.fold_left(funacc{bad=x;_}->acc+x)0inletpassed=statxs|>List.fold_left(funacc{commentized=x;_}->acc+x)0inlettotal_lines=good+badinpr"---------------------------------------------------------------";pr((spf"NB total files = %d; "total)^(spf"NB total lines = %d; "total_lines)^(spf"perfect = %d; "perfect)^(spf"pbs = %d; "(statxs|>List.filter(function{bad=n;_}whenn>0->true|_->false)|>List.length))^(spf"timeout = %d; "(statxs|>List.filter(function{have_timeout=true;_}->true|_->false)|>List.length))^(spf"=========> %d"((100*perfect)/total))^"%");letgf,badf=float_of_intgood,float_of_intbadinletpassedf=float_of_intpassedinpr((spf"nb good = %d, nb passed = %d "goodpassed)^(spf"=========> %f"(100.0*.(passedf/.gf))^"%"));pr((spf"nb good = %d, nb bad = %d "goodbad)^(spf"=========> %f"(100.0*.(gf/.(gf+.badf)))^"%"))(*****************************************************************************)(* Most problematic tokens *)(*****************************************************************************)(* inspired by a comment by a reviewer of my CC'09 paper *)letlines_around_error_line~context(file,line)=letarr=Common2.cat_arrayfileinletstartl=max0(line-context)inletendl=min(Array.lengtharr)(line+context)inletres=ref[]infori=startltoendl-1doCommon.pusharr.(i)resdone;List.rev!resletprint_recurring_problematic_tokensxs=leth=Hashtbl.create101inxs|>List.iter(funx->letfile=x.filenameinx.problematic_lines|>List.iter(fun(xs,line_error)->xs|>List.iter(funs->Common2.hupdate_defaults(fun(old,example)->old+1,example)(fun()->0,(file,line_error))h;)));Common2.pr2_xxxxxxxxxxxxxxxxx();pr2("maybe 10 most problematic tokens");Common2.pr2_xxxxxxxxxxxxxxxxx();Common.hash_to_listh|>List.sort(fun(_k1,(v1,_))(_k2,(v2,_))->comparev2v1)|>Common.take_safe10|>List.iter(fun(k,(i,(file_ex,line_ex)))->pr2(spf"%s: present in %d parsing errors"ki);pr2("example: ");letlines=lines_around_error_line~context:2(file_ex,line_ex)inlines|>List.iter(funs->pr2(" "^s)););Common2.pr2_xxxxxxxxxxxxxxxxx();()