123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799(* Yoann Padioleau
*
* Copyright (C) 2010 Facebook
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* version 2.1 as published by the Free Software Foundation, with the
* special exception on linking described in file license.txt.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the file
* license.txt for more details.
*)openCommonmoduleJson_out=Json_iomoduleJson_in=Json_io(*****************************************************************************)(* Prelude *)(*****************************************************************************)(*
* The goal of this module is to provide a data-structure to represent
* code "layers" (a.k.a. code "aspects"). The idea is to imitate google
* earth layers (e.g. the wikipedia layer, panoramio layer, etc), but
* for code. One can have a deadcode layer, a test coverage layer,
* and then can display those layers or not on an existing codebase in
* codemap. The layer is basically some mapping from files to a
* set of lines with a specific color code.
*
*
* A few design choices:
*
* - one could store such information directly into database_xxx.ml
* and have pfff_db compute such information (for instance each function
* could have a set of properties like unit_test, or dead) but this
* would force people to build their own db to visualize the results.
* One could compute this information in database_light_xxx.ml, but this
* will augment the size of the light db slowing down the codemap launch
* even when the people don't use the layers. So it's more flexible to just
* separate layer_code.ml from database_code.ml and have multiple persistent
* files for each information. Also it's quite convenient to have
* utilities like sgrep to be easily extendable to transform a query result
* into a layer.
*
* - How to represent a layer at the macro and micro level in codemap ?
*
* At the micro-level one has just to display the line with the
* requested color. At the macro-level have to either do a majority
* scheme or mixing scheme where for instance draw half of the
* treemap rectangle in red and the other in green.
*
* Because different layers could have different composition needs
* it is simpler to just have the layer say how it should be displayed
* at the macro_level. See the 'macro_level' field below.
*
* - how to have a layer data-structure that can cope with many
* needs ?
*
* Here are some examples of layers and how they are "encoded" by the
* 'layer' type below:
*
* * deadcode (dead function, dead class, dead statement, dead assignnements)
*
* How? dead lines in red color. At the macro_level one can give
* a grey_xxx color with a percentage (e.g. grey53).
*
* * test coverage (static or dynamic)
*
* How? covered lines in green, not covered in red ? Also
* convey a GreyLevel visualization by setting the 'macro_level' field.
*
* * age of file
*
* How? 2010 in green, 2009 in yelow, 2008 in red and so on.
* At the macro_level can do a mix of colors.
*
* * bad smells
*
* How? each bad smell could have a different color and macro_level
* showing a percentage of the rectangle with the right color
* for each smells in the file.
*
* * security patterns (bad smells)
*
* * activity ?
*
* How whow add and delete information ?
* At the micro_level can't show the delete, but at macro_level
* could divide the treemap_rectangle in 2 where percentage of
* add and delete, and also maybe white to show the amount of add
* and delete. Could also use my big circle scheme.
* How link to commit message ? TODO
*
*
* later:
* - could associate more than just a color, e.g. a commit message when want
* to display a version-control layer, or some filling-patterns in
* addition to the color.
* - Could have better precision than the line.
*
* history:
* - I was writing some treemap generator specific for the deadcode
* analysis, the static coverage, the dynamic coverage, and the activity
* in a file (see treemap_php.ml). I was also offering different
* way to visualize the result (DegradeArchiColor | GreyLevel | YesNo).
* It was working fine but there was no easy way to combine 2
* visualisations, like the age "layer" and the "deadcode" layer
* to see correlations. Also adding simple layers like
* visualizing all calls to HTML() or XHP was requiring to
* write another treemap generator. To be more generic and flexible require
* a real 'layer' type.
*)(*****************************************************************************)(* Type *)(*****************************************************************************)typecolor=string(* Simple_color.emacs_color *)(* note: the filenames must be in readable format so layer files can be reused
* by multiple users.
*
* alternatives:
* - could have line range ? useful for layer matching lots of
* consecutive lines in a file ?
* - todo? have more precision than just the line ? precise pos range ?
*
* - could for the lines instead of a 'kind' to have a 'count',
* and then some mappings from range of values to a color.
* For instance on a coverage layer one could say that from X to Y
* then choose this color, from Y to Z another color.
* But can emulate that by having a "coverage1", "coverage2"
* kind with the current scheme.
*
* - have a macro_level_composing_scheme: Majority | Mixed
* that is then interpreted in codemap instead of forcing
* the layer creator to specific how to show the micro_level
* data at the macro_level.
*)typelayer={title:string;description:string;files:(filename*file_info)list;kinds:(kind*color)list;}andfile_info={micro_level:(int(* line *)*kind)list;(* The list can be empty in which case codemap can use
* the micro_level information and show a mix of colors.
*
* The list can have just one element too and have a kind
* different than the one used in the micro_level. For instance
* for the coverage one can have red/green at micro_level
* and grey_xxx at macro_level.
*)macro_level:(kind*float(* percentage of rectangle *))list;}(* ugly: because of the ugly way Ocaml.json_of_v currently works
* the kind can not start with a uppercase
*)andkind=string(* with tarzan *)(* The filenames in the index are in absolute path format. That way they
* can be used from codemap in hashtbl and compared to the
* current file.
*)typelayers_with_index={root:Common.dirname;layers:(layer*bool(* is active *))list;micro_index:(filename,(int,color)Hashtbl.t)Hashtbl.t;macro_index:(filename,(float*color)list)Hashtbl.t;}(*****************************************************************************)(* Reusable properties *)(*****************************************************************************)letred_green_properties=["ok","green";"bad","red";"no_info","white";]letheat_map_properties=["cover 100%","red3";"cover 90%","red1";"cover 80%","orange";"cover 70%","yellow";"cover 60%","YellowGreen";"cover 50%","green";"cover 40%","cyan";"cover 30%","cyan3";"cover 20%","DeepSkyBlue1";"cover 10%","blue";(* Should we use a dark blue for 0, as it is the case usually with
* heatmaps? The picture can become very blue then.
* Do not use white though because draw_macrolevel use white when nothing
* was found so we want to differentiate such cases
*)"cover 0%","blue4";(* alternative: snow4 *)(* when we zoom on a file we just show red/green coverage, no heat color *)"ok","green";"bad","red";"base","azure4";"no_info","white";](*****************************************************************************)(* Multi layers indexing *)(*****************************************************************************)(* Am I reinventing database indexing ? Should use a real database
* to store layer information so one can then just use SQL to
* fastly get all the information relevant to a file and a line ?
* I doubt MySQL can be as fast and light as my JSON + hashtbl indexing.
*)letbuild_index_of_layers~rootlayers=lethmicro=Common2.hash_with_default(fun()->Hashtbl.create101)inlethmacro=Common2.hash_with_default(fun()->[])inlayers|>List.filter(fun(_layer,active)->active)|>List.iter(fun(layer,_active)->lethkind=Common.hash_of_listlayer.kindsinlayer.files|>List.iter(fun(file,finfo)->letfile=Filename.concatrootfilein(* todo? v is supposed to be a float representing a percentage of
* the rectangle but below we will add the macro info of multiple
* layers together which mean the float may not represent percentage
* anynore. They still represent a part of the file though.
* The caller would have to first recompute the sum of all those
* floats to recompute the actual multi-layer percentage.
*)letcolor_macro_level=finfo.macro_level|>Common.map_filter(fun(kind,v)->(* some sanity checking *)trySome(v,Hashtbl.findhkindkind)withNot_found->(* I was originally doing a failwith, but it can be convenient
* to be able to filter kinds in codemap by just editing the
* JSON file and removing certain kind definitions
*)pr2_once(spf"PB: kind %s was not defined"kind);None)inhmacro#updatefile(funold->color_macro_level@old);finfo.micro_level|>List.iter(fun(line,kind)->tryletcolor=Hashtbl.findhkindkindinhmicro#updatefile(funoldh->(* We add so the same line could be assigned multiple colors.
* The order of the layer could determine which color should
* have priority.
*)Hashtbl.addoldhlinecolor;oldh)withNot_found->pr2_once(spf"PB: kind %s was not defined"kind);)););{layers=layers;root=root;macro_index=hmacro#to_h;micro_index=hmicro#to_h;}(*****************************************************************************)(* Layers helpers *)(*****************************************************************************)lethas_active_layerslayers=layers.layers|>List.mapsnd|>Common2.or_list(*****************************************************************************)(* Meta *)(*****************************************************************************)(* generated by ocamltarzan *)letvof_emacs_colors=Ocaml.vof_stringsletvof_filenames=Ocaml.vof_stringsletrecvof_layer{title=v_title;description=v_description;files=v_files;kinds=v_kinds}=letbnds=[]inletarg=Ocaml.vof_list(fun(v1,v2)->letv1=vof_kindv1andv2=vof_emacs_colorv2inOcaml.VTuple[v1;v2])v_kindsinletbnd=("kinds",arg)inletbnds=bnd::bndsinletarg=Ocaml.vof_list(fun(v1,v2)->letv1=vof_filenamev1andv2=vof_file_infov2inOcaml.VTuple[v1;v2])v_filesinletbnd=("files",arg)inletbnds=bnd::bndsinletarg=Ocaml.vof_stringv_descriptioninletbnd=("description",arg)inletbnds=bnd::bndsinletarg=Ocaml.vof_stringv_titleinletbnd=("title",arg)inletbnds=bnd::bndsinOcaml.VDictbndsandvof_file_info{micro_level=v_micro_level;macro_level=v_macro_level}=letbnds=[]inletarg=Ocaml.vof_list(fun(v1,v2)->letv1=vof_kindv1andv2=Ocaml.vof_floatv2inOcaml.VTuple[v1;v2])v_macro_levelinletbnd=("macro_level",arg)inletbnds=bnd::bndsinletarg=Ocaml.vof_list(fun(v1,v2)->letv1=Ocaml.vof_intv1andv2=vof_kindv2inOcaml.VTuple[v1;v2])v_micro_levelinletbnd=("micro_level",arg)inletbnds=bnd::bndsinOcaml.VDictbndsandvof_kindv=Ocaml.vof_stringv(*****************************************************************************)(* Ocaml.v -> layer *)(*****************************************************************************)letemacs_color_ofvv=Ocaml.string_ofvvletfilename_ofvv=Ocaml.string_ofvvletrecord_check_extra_fields=reftruemoduleOcamlx=structopenOcamlmoduleJ=Json_type(*
let stag_incorrect_n_args _loc tag _v =
failwith ("stag_incorrect_n_args on: " ^ tag)
*)(*
let unexpected_stag loc v =
failwith ("unexpected_stag:")
*)(*
let record_only_pairs_expected loc v =
failwith ("record_only_pairs_expected:")
*)letrecord_duplicate_fields_loc_dup_flds_v=failwith("record_duplicate_fields:")letrecord_extra_fields_loc_flds_v=failwith("record_extra_fields:")letrecord_undefined_elements_loc_v_xs=failwith("record_undefined_elements:")letrecord_list_instead_atom_loc_v=failwith("record_list_instead_atom:")lettuple_of_size_n_expected_locnv=failwith(spf"tuple_of_size_n_expected: %d, got %s"n(Common2.dumpv))letrecjson_of_vv=matchvwith|VStrings->J.Strings|VSum((s,vs))->J.Array((J.Strings)::(List.mapjson_of_vvs))|VTuplexs->J.Array(xs|>List.mapjson_of_v)|VDictxs->J.Object(xs|>List.map(fun(s,v)->s,json_of_vv))|VListxs->J.Array(xs|>List.mapjson_of_v)|VNone->J.Null|VSomev->J.Array[J.String"Some";json_of_vv]|VRefv->J.Array[J.String"Ref";json_of_vv]|VUnit->J.Null(* ? *)|VBoolb->J.Boolb(* Note that 'Inf' can be used as a constructor but is also recognized
* by float_of_string as a float (infinity), so when I was implementing
* this code by reverse engineering the generated sexp, it was important
* to guard certain code.
*)|VFloatf->J.Floatf|VCharc->J.String(Common2.string_of_charc)|VInti->J.Inti|VTODO_v1->J.String"VTODO"|VVar_v1->failwith"json_of_v: VVar not handled"|VArrow_v1->failwith"json_of_v: VArrow not handled"(*
* Assumes the json was generated via 'ocamltarzan -choice json_of', which
* have certain conventions on how to encode variants for instance.
*)letrec(v_of_json:Json_type.json_type->v)=funj->matchjwith|J.Strings->VStrings|J.Inti->VInti|J.Floatf->VFloatf|J.Boolb->VBoolb|J.Null->raiseTodo(* Arrays are used for represent constructors or regular list. Have to
* go sligtly deeper to disambiguate.
*)|J.Arrayxs->(matchxswith(* VERY VERY UGLY. It is legitimate to have for instance tuples
* of strings where the first element is a string that happen to
* look like a constructor. With this ugly code we currently
* not handle that :(
*
* update: in the layer json file, one can have a filename
* like Makefile and we don't want it to be a constructor ...
* so for now I just generate constructors strings like
* __Pass so we know it comes from an ocaml constructor.
*)|(J.Strings)::xswhens=~"^__\\([A-Z][A-Za-z_]*\\)$"->letconstructor=Common.matched1sinVSum(constructor,List.mapv_of_jsonxs)|ys->VList(ys|>List.mapv_of_json))|J.Objectflds->VDict(flds|>List.map(fun(s,fld)->s,v_of_jsonfld))letsave_jsonfilejson=lets=Json_out.string_of_jsonjsoninCommon.write_file~filesend(* I have not yet an ocamltarzan script for the of_json ... but I have one
* for of_v, so have to pass through OCaml.v ... ugly
*)letreclayer_ofv__=let_loc="Xxx.layer"infunction|(Ocaml.VDictfield_sexpsassexp)->lettitle_field=refNoneanddescription_field=refNoneandfiles_field=refNoneandkinds_field=refNoneandduplicates=ref[]andextra=ref[]inletreciter=(function|(field_name,field_sexp)::tail->((matchfield_namewith|"title"->(match!title_fieldwith|None->letfvalue=Ocaml.string_ofvfield_sexpintitle_field:=Somefvalue|Some_->duplicates:=field_name::!duplicates)|"description"->(match!description_fieldwith|None->letfvalue=Ocaml.string_ofvfield_sexpindescription_field:=Somefvalue|Some_->duplicates:=field_name::!duplicates)|"files"->(match!files_fieldwith|None->letfvalue=Ocaml.list_ofv(function|Ocaml.VList([v1;v2])->letv1=filename_ofvv1andv2=file_info_ofvv2in(v1,v2)|sexp->Ocamlx.tuple_of_size_n_expected_loc2sexp)field_sexpinfiles_field:=Somefvalue|Some_->duplicates:=field_name::!duplicates)|"kinds"->(match!kinds_fieldwith|None->letfvalue=Ocaml.list_ofv(function|Ocaml.VList([v1;v2])->letv1=kind_ofvv1andv2=emacs_color_ofvv2in(v1,v2)|sexp->Ocamlx.tuple_of_size_n_expected_loc2sexp)field_sexpinkinds_field:=Somefvalue|Some_->duplicates:=field_name::!duplicates)|_->if!record_check_extra_fieldsthenextra:=field_name::!extraelse());itertail)|[]->())in(iterfield_sexps;if!duplicates<>[]thenOcamlx.record_duplicate_fields_loc!duplicatessexpelseif!extra<>[]thenOcamlx.record_extra_fields_loc!extrasexpelse(match((!title_field),(!description_field),(!files_field),(!kinds_field))with|(Sometitle_value,Somedescription_value,Somefiles_value,Somekinds_value)->{title=title_value;description=description_value;files=files_value;kinds=kinds_value;}|_->Ocamlx.record_undefined_elements_locsexp[((!title_field=None),"title");((!description_field=None),"description");((!files_field=None),"files");((!kinds_field=None),"kinds")]))|sexp->Ocamlx.record_list_instead_atom_locsexpandlayer_ofvsexp=layer_ofv__sexpandfile_info_ofv__=let_loc="Xxx.file_info"infunction|(Ocaml.VDictfield_sexpsassexp)->letmicro_level_field=refNoneandmacro_level_field=refNoneandduplicates=ref[]andextra=ref[]inletreciter=(function|(field_name,field_sexp)::tail->((matchfield_namewith|"micro_level"->(match!micro_level_fieldwith|None->letfvalue=Ocaml.list_ofv(function|Ocaml.VList([v1;v2])->letv1=Ocaml.int_ofvv1andv2=kind_ofvv2in(v1,v2)|sexp->Ocamlx.tuple_of_size_n_expected_loc2sexp)field_sexpinmicro_level_field:=Somefvalue|Some_->duplicates:=field_name::!duplicates)|"macro_level"->(match!macro_level_fieldwith|None->letfvalue=Ocaml.list_ofv(function|Ocaml.VList([v1;v2])->letv1=kind_ofvv1andv2=Ocaml.float_ofvv2in(v1,v2)|sexp->Ocamlx.tuple_of_size_n_expected_loc2sexp)field_sexpinmacro_level_field:=Somefvalue|Some_->duplicates:=field_name::!duplicates)|_->if!record_check_extra_fieldsthenextra:=field_name::!extraelse());itertail)|[]->())in(iterfield_sexps;if!duplicates<>[]thenOcamlx.record_duplicate_fields_loc!duplicatessexpelseif!extra<>[]thenOcamlx.record_extra_fields_loc!extrasexpelse(match((!micro_level_field),(!macro_level_field))with|(Somemicro_level_value,Somemacro_level_value)->{micro_level=micro_level_value;macro_level=macro_level_value;}|_->Ocamlx.record_undefined_elements_locsexp[((!micro_level_field=None),"micro_level");((!macro_level_field=None),"macro_level")]))|sexp->Ocamlx.record_list_instead_atom_locsexpandfile_info_ofvsexp=file_info_ofv__sexpandkind_ofv__=let_loc="Xxx.kind"infunsexp->Ocaml.string_ofvsexpandkind_ofvsexp=kind_ofv__sexp(*****************************************************************************)(* Json *)(*****************************************************************************)letjson_of_layerlayer=layer|>vof_layer|>Ocamlx.json_of_vletlayer_of_jsonjson=json|>Ocamlx.v_of_json|>layer_ofv(*****************************************************************************)(* Load/Save *)(*****************************************************************************)(* we allow to save in JSON format because it may be useful to let
* the user edit the layer file, for instance to adjust the colors.
*)letload_layerfile=(* pr2 (spf "loading layer: %s" file); *)ifFile_type.is_json_filenamefilethenJson_in.load_jsonfile|>layer_of_jsonelseCommon2.get_valuefileletsave_layerlayerfile=ifFile_type.is_json_filenamefile(* layer +> vof_layer +> Ocaml.string_of_v +> Common.write_file ~file *)thenlayer|>json_of_layer|>Ocamlx.save_jsonfileelseCommon2.write_valuelayerfile(*****************************************************************************)(* Layer builder helper *)(*****************************************************************************)(* Simple layer builder - group by file, by line, by property.
* The layer can also be used to summarize statistics per dirs and
* subdirs and so on.
*)letsimple_layer_of_parse_infos~root~title?(description="")xskinds=letranks_kinds=kinds|>List.map(fun(k,_color)->k)|>Common.index_list_1|>Common.hash_of_listin(* group by file, group by line, uniq categ *)letfiles_and_lines=xs|>List.map(fun(tok,kind)->letfile=Parse_info.file_of_infotokinletline=Parse_info.line_of_infotokinletfile'=Common2.relative_to_absolutefileinCommon.readable~rootfile',(line,kind))inlet(group_by_file:(Common.filename*(int*kind)list)list)=Common.group_assoc_bykey_efffiles_and_linesin{title=title;description=description;kinds=kinds;files=group_by_file|>List.map(fun(file,lines_and_kinds)->let(group_by_line:(int*kindlist)list)=Common.group_assoc_bykey_efflines_and_kindsinletall_kinds_in_file=group_by_line|>List.mapsnd|>List.flatten|>Common2.uniqin(file,{micro_level=group_by_line|>List.map(fun(line,kinds)->letkinds=Common2.uniqkindsin(* many kinds om same line, keep highest prio *)matchkindswith|[]->raiseImpossible|[x]->line,x|_->letsorted=kinds|>List.map(funx->x,Hashtbl.findranks_kindsx)|>Common.sort_by_val_lowfirstinline,List.hdsorted|>fst);macro_level=(* we could give a percentage per kind but right now
* we instead give a priority based on the rank of the kinds
* in the kind list
*)all_kinds_in_file|>List.map(funkind->(kind,1./.(float_of_int(Hashtbl.findranks_kindskind))))}));}(* old: superseded by Layer_code.layer.files and file_info
* type stat_per_file =
* (string (* a property *), int list (* lines *)) Common.assoc
*
* type stats =
* (Common.filename, stat_per_file) Hashtbl.t
*
*
* old:
* let (print_statistics: stats -> unit) = fun h ->
* let xxs = Common.hash_to_list h in
* pr2_gen (xxs);
* ()
*
* let gen_security_layer xs =
* let _root = Common.common_prefix_of_files_or_dirs xs in
* let files = Lib_parsing_php.find_php_files_of_dir_or_files xs in
*
* let h = Hashtbl.create 101 in
*
* files +> Common.index_list_and_total +> List.iter (fun (file, i, total) ->
* pr2 (spf "processing: %s (%d/%d)" file i total);
* let ast = Parse_php.parse_program file in
* let stat_file = stat_of_program ast in
* Hashtbl.add h file stat_file
* );
* Common.write_value h "/tmp/bigh";
* print_statistics h
*)(* Generates a layer_red_green<output> and layer_heatmap<output> file.
* Take a list of files with a percentage and possibly micro_level
* information.
*)(*
let layer_red_green_and_heatmap ~root ~output xs =
raise Todo
*)(*****************************************************************************)(* Layer stat *)(*****************************************************************************)(* todo? could be useful also to show # of files involved instead of
* just the line count.
*)letstat_of_layerlayer=leth=Common2.hash_with_default(fun()->0)inlayer.kinds|>List.iter(fun(kind,_color)->h#addkind0);layer.files|>List.iter(fun(_file,finfo)->finfo.micro_level|>List.iter(fun(_line,kind)->h#updatekind(funold->old+1)));h#to_listletfilter_layerflayer={layerwithfiles=layer.files|>List.filter(fun(file,_)->ffile);}