123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646(* Yoann Padioleau
*
* Copyright (C) 2012, 2014 Facebook
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* version 2.1 as published by the Free Software Foundation, with the
* special exception on linking described in file license.txt.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the file
* license.txt for more details.
*)openCommonmoduleE=Entity_codemoduleG=Graph_codemoduleG2=Graph_code_optiopenDependencies_matrix_codemoduleDM=Dependencies_matrix_code(*****************************************************************************)(* Prelude *)(*****************************************************************************)(*
* Module to create a Dependency Structure Matrix (DSM) based on
* a code graph.
* See http://en.wikipedia.org/wiki/Design_structure_matrix
* See also main_codegraph.ml
*
* history:
* - naive version
* - projection cache, memoize the projection of a node: given a deep node,
* what is the node present in the matrix that "represents" this deep node
* - full matrix pre-computation optimisation
* - compute lazily deep rows using only a subset of the edges
* - graph code opti, because using arrays is far more efficient than
* hashtbl and/or memoized hashtbl (hmmm)
* - remove full matrix, not anymore needed
* - better layout algorithm, minimize more backward dependencies
* - packing in "..." intermediate directories
* - TODO even better layout algorithm, hill climbing
*)(*****************************************************************************)(* Types *)(*****************************************************************************)(* Phantom types for safer array access between the graph_opti, dm *)type'aidx=inttypeidmtypeigopti(* old: type ifull, but full matrix concept is not needed anymore *)(*****************************************************************************)(* Helpers *)(*****************************************************************************)lethashtbl_find_nodehn=tryHashtbl.findhnwithNot_found->(* pr2 (spf "PB: %s" (G.string_of_node n));*)(* raise Not_found *)failwith(spf"Not_found: %s"(G.string_of_noden))lethashtbl_findhn=tryHashtbl.findhnwithNot_found->pr2_gen("PB:",n);raiseNot_found(*****************************************************************************)(* Building a matrix (given a specific config) *)(*****************************************************************************)letbuild_with_tree2treegopti=(* todo? when we expand do we create a line for the expanded? if no
* then it will have no projection so the test below is not enough.
* but may make sense to create a line for it which corresponds to
* the difference with the children so for all edges that link
* directly to this one?
*
*)letnodes=final_nodes_of_treetreeinletn=List.lengthnodesinletn_nodes=G2.nb_nodesgoptiinletname_to_idm=Hashtbl.create(n/2)inletidm_to_name=Array.maken("",E.Dir)inletigopti_to_idm=Array.maken_nodes(-1)inlet(i:idmidxref)=ref0innodes|>List.iter(funnode->Hashtbl.addname_to_idmnode!i;idm_to_name.(!i)<-node;igopti_to_idm.(hashtbl_find_nodegopti.G2.name_to_inode)<-!i;incri;);letdm={matrix=Common2.make_matrix_init~nrow:n~ncolumn:n(fun_i_j->0);name_to_i=name_to_idm;i_to_name=idm_to_name;config=tree;}inlet(projected_parent_of_igopti:idmidxarray)=Array.maken_nodes(-1)inlet(iroot:igoptiidx)=hashtbl_find_nodegopti.G2.name_to_iG.rootinletrecdepthparentigopti=letchildren=gopti.G2.has_children.(igopti)inletidm=igopti_to_idm.(igopti)inletproject=ifidm=-1thenparentelseidminprojected_parent_of_igopti.(igopti)<-project;children|>List.iter(depthproject);indepth(-1)iroot;gopti.G2.use|>Array.iteri(funixs->letparent_i=projected_parent_of_igopti.(i)inxs|>List.iter(funj->letparent_j=projected_parent_of_igopti.(j)in(* It's possible we operate on a slice of the original dsm,
* for instance when we focus on a node, in which case
* the projection of an edge can not project on anything
* in the current matrix.
*)ifparent_i<>-1&&parent_j<>-1thendm.matrix.(parent_i).(parent_j)<-dm.matrix.(parent_i).(parent_j)+1));dmletbuild_with_treeab=Common.profile_code"DM.build_with_tree"(fun()->build_with_tree2ab)(*****************************************************************************)(* Ordering the rows/columns helpers *)(*****************************************************************************)letformulax=assert(x>0);(* 1 + (int_of_float (log10 (float_of_int x))) *)xletcount_columnjm=letn=Array.lengthminletcnt=ref0infori=0ton-1doifm.(i).(j)>0&&i<>jthencnt:=!cnt+formula(m.(i).(j))done;!cntletis_empty_columnnmdm=count_column(hashtbl_find_nodedm.name_to_in)m=0letcount_rowim=letn=Array.lengthminletcnt=ref0inforj=0ton-1doifm.(i).(j)>0&&i<>jthencnt:=!cnt+formula(m.(i).(j))done;!cntletis_empty_rownmdm=count_row(hashtbl_find_nodedm.name_to_in)m=0letempty_all_cells_relevant_to_nodemdmn=leti=hashtbl_find_nodedm.name_to_ininletn=Array.lengthminforx=0ton-1dom.(i).(x)<-0;m.(x).(i)<-0;done(*****************************************************************************)(* Hill climbing! *)(*****************************************************************************)letreduced_matrixnodesdm=letn=List.lengthnodesinletm=Common2.make_matrix_init~nrow:n~ncolumn:n(fun_i_j->0)inleta=Array.of_listnodesinfori=0ton-1doforj=0ton-1doletni=a.(i)inletnj=a.(j)inletxi=hashtbl_find_nodedm.name_to_iniinletxj=hashtbl_find_nodedm.name_to_injinifi<>jthenbeginm.(i).(j)<-dm.matrix.(xi).(xj);enddonedone;a,mletscore_upper_trianglemdm=DM.score_upper_triangle{dmwithmatrix=m}[](* less: there has to be a more efficient way ... *)letswitchk1k2(a,m)=leta'=Array.copyainletm'=Array.mapArray.copyminletn=Array.lengthainletfidx=match()with|_whenidx=k1->k2|_whenidx=k2->k1|_->idxinfori=0ton-1doa'.(i)<-a.(fi)done;fori=0ton-1doforj=0ton-1dom'.(i).(j)<-m.(fi).(fj)donedone;a',m'(* less: simulated anneahiling? *)lethill_climbingnodesdm=leta,m=reduced_matrixnodesdminletn=Array.lengthainletcurrent_score=score_upper_trianglemdminpr2(spf"current score = %d"current_score);letrecaux(a,m)current_scorei~jump=letj=i+jumpinifj>=nthenifjump=(Array.lengthm-1)then(a,m)elseaux(a,m)current_score0~jump:(jump+1)elselet(a1,m1)=switchij(a,m)inletnew_score=score_upper_trianglem1dminifnew_score<current_scorethenbeginpr2(spf" %s <-> %s, before = %d, after = %d (jmp=%d)"(G.string_of_nodea.(i))(G.string_of_nodea.(j))current_scorenew_scorejump);aux(a1,m1)new_score0~jump:1endelseaux(a,m)current_score(i+1)~jumpinlet(a,_m)=aux(a,m)current_score0~jump:1inArray.to_lista(*****************************************************************************)(* Ordering the rows/columns heuristics *)(*****************************************************************************)letsort_by_count_rows_low_firstxsmdm=xs|>List.map(funn->n,count_row(hashtbl_find_nodedm.name_to_in)m)|>Common.sort_by_val_lowfirst|>List.mapfst(*
let sort_by_count_columns_high_first xs m dm =
xs +> List.map (fun n -> n, count_column (hashtbl_find_node dm.name_to_i n) m)
+> Common.sort_by_val_highfirst
+> List.map fst
*)(* todo: alternatives while discussing with matthieu
* - find the first row, which should be the lines with the smallest
* sum (as it will be part of the big sum of the upper triangular),
* remove then this line, and iterate
* - find the first row by doing the sum of (cell line / sum cell column)
* which is a bit equivalent to normalize, to do sum of percentage.
*)letsort_by_count_rows_low_columns_high_firstxsmdm=xs|>List.map(funn->letidx=hashtbl_find_nodedm.name_to_ininleth=float_of_int(count_rowidxm)/.(1.+.float_of_int(count_columnidxm))inn,h)|>Common.sort_by_val_lowfirst|>List.mapfst(*
* See http://dsmweb.org/dsmweb/en/understand-dsm/technical-dsm-tutorial/partitioning.html
*
* note: sorting by the number of cells which you depend on is not
* enough. For instance let's say X depends on 3 cells, A, B, Y and Y
* depends on 4: A, B, C, D. One could consider to put X
* upper in the matrix, but because X depends on Y, it's better to put
* Y upper.
*
* todo: optimize? we redo some computations ... should memoize
* more? or just invert rows/columns in the original matrix?
*)letpartition_matrixnodesdm=letm=dm.matrix|>Array.mapArray.copyinletn=Array.lengthminfori=0ton-1dom.(i).(i)<-0done;letleft=ref[]inletright=ref[]inletrecstep1nodes=(* "1. Identify system elements (or tasks) that can be determined (or
* executed) without input from the rest of the elements in the matrix.
* Those elements can easily be identified by observing an empty column
* in the DSM. Place those elements to the left of the DSM. Once an
* element is rearranged, it is removed from the DSM (with all its
* corresponding marks) and step 1 is repeated on the remaining
* elements."
*)letelts_with_empty_columns,rest=nodes|>List.partition(funnode->is_empty_columnnodemdm)inletxs=sort_by_count_rows_low_firstelts_with_empty_columnsdm.matrixdminxs|>List.iter(empty_all_cells_relevant_to_nodemdm);right:=xs@!right;(* pr2 (spf "step1: %s" (Common2.dump xs)); *)ifnullxsthenrestelsestep1restandstep2nodes=(* "2.Identify system elements (or tasks) that deliver no
* information to other elements in the matrix. Those elements can
* easily be identified by observing an empty row in the DSM. Place
* those elements to the right of the DSM. Once an element is
* rearranged, it is removed from the DSM (with all its corresponding
* marks) and step 2 is repeated on the remaining elements."
*)letelts_with_empty_lines,rest=nodes|>List.partition(funnode->is_empty_rownodemdm)in(* I use dm.matrix here and not the current matrix m because I want
* to sort by looking globally at whether this item uses very few things.
* todo: maybe don't need this 'm dm' to pass around to all those sort_xxx
* functions. Just pass dm.
*)letxs=sort_by_count_rows_low_firstelts_with_empty_linesdm.matrixdminxs|>List.iter(empty_all_cells_relevant_to_nodemdm);(* pr2 (spf "step2: %s" (Common2.dump xs)); *)left:=!left@xs;ifnullxsthenstep1restelsestep2restinletrest=step2nodesinifnullrestthen!left@!rightelsebegin(*
pr2 "CYCLE";
pr2_gen rest;
*)letrest=sort_by_count_rows_low_columns_high_firstrestmdminletrest=hill_climbingrestdmin!left@rest@!rightend(* to debug the heuristics *)letinfo_ordersdm=dm.matrix|>Array.mapi(funi_->letnrow=(count_rowidm.matrix)inletncol=(count_columnidm.matrix)inleth=float_of_intnrow/.(1.+.float_of_intncol)inh,(spf"%-20s: count lines = %d, count columns = %d, H = %.2f"(fst(dm.i_to_name.(i)))nrowncolh))|>Array.to_list|>Common.sort_by_key_lowfirst|>List.iter(fun(_,s)->pr2s)(*****************************************************************************)(* Manual ordering *)(*****************************************************************************)letoptional_manual_reordering(s,_node_kind)nodesconstraints_opt=matchconstraints_optwith|None->nodes|Someh->ifHashtbl.memhsthenbeginletxs=hashtbl_findhsinlethorder=xs|>Common.index_list_1|>Common.hash_of_listinletcurrent=ref0inletnodes_with_order=nodes|>List.map(fun(s,node_kind)->matchCommon2.hfind_optionshorderwith|None->pr2(spf"INFO_TXT: could not find %s in constraint set"s);(s,node_kind),!current|Somen->current:=n;(s,node_kind),n)inCommon.sort_by_val_lowfirstnodes_with_order|>List.mapfstendelsebeginpr2(spf"didn't find entry in constraints for %s"s);nodesend(*****************************************************************************)(* Create fake "a/b/..." directories *)(*****************************************************************************)(* design decisions, when should we pack?
* - in an adhoc manner in adjust_graph.txt
* - in the graph lazily while building the final config
* - in the graph lazily as a preprocessing phase on a full config
* - in an offline phase that packs everything?
* - in the UI?
*
* Packing lazily is good but it does not necessaraly work well with
* the Focus because depending on our focus, we may have want different
* packings. Also it makes it a bit hard to use cg from the command line
* in a subdirectory. A solution could be to restart from a fresh gopti for
* for each new focus.
*
* Packing in the UI would be more flexible for the Focus,
* but we need lots of extra logic whereas just abusing the Has and
* reorganize the graph makes things (at first) easier.
*
* There are some issues also on how packing and layering impact each other.
* We may not want to pack things that affect a lot the number of
* backward references once packed.
*)letthreshold_pack=ref30(* Optionaly put less "relevant" entries under an extra "..." fake dir.
*
* We used to do this phase in build() at the same time we were building
* the new config. But doing too many things at the same time was complicated
* so better to separate a bit things in a preprocessing phase
* where here we just adjust gopti.
*
* Moreover we wanted the heuristics to order and to pack to use different
* schemes. For packing we want to put under "..." entries considered
* irrelevant when looked globally, that is not look only at the
* column count in the submatrix but in the whole matrix.
* For instance if a/b/c/d is used a lot from e/, but not used
* that much internally inside a/b/c, we still don't want
* to put it under a extra "..." because this directory is globally
* very important.
*
* Moreover when in Focused mode, the children of a node
* are actually not the full set of children, and so we could pack
* things under a "..." that are incomplete? Hmmm at the same time
* it can be good to do some specialized packing based on a Focus.
*)letadjust_gopti_if_needed_lazilytreegopti=letgopti=refgoptiinletrecaux(tree:tree)(brothers:Graph_code.nodelist)=matchtreewith|Node(n,xs)->ifnullxsthenNode(n,[])else(* less: use the full list of children of n? xs can be a subset
* because in a focused generated config
*)ifList.lengthxs<=!threshold_packthenNode(n,xs|>List.map(fun(Node(n1,xs1))->letmore_brothers=xs|>Common.map_filter(fun(Node(n2,_))->ifn1<>n2thenSomen2elseNone)inaux(Node(n1,xs1))(brothers@more_brothers)))elsebeginletchildren_nodes=xs|>List.map(fun(Node(n,_))->n)inletconfig=(Node(n,(xs|>List.map(fun(Node(n,_))->Node(n,[])))@(brothers|>List.map(funn->Node(n,[])))))inletdm=build_with_treeconfig!goptiinletscore=children_nodes|>List.map(funn->letidx=hashtbl_find_nodedm.name_to_ininletm=dm.matrixinn,count_columnidxm+count_rowidxm(* + m.(idx).(idx) / 3 *))|>Common.sort_by_val_highfirst|>List.mapfstin(* minus one because after the packing we will have
* threshold_pack - 1 + the new entry = threshold_pack
* and so we will not loop again and again.
*)let(ok,to_pack)=Common2.splitAt(!threshold_pack-1)scorein(* pr2 (spf "REPACKING: TO_PACK = %s, TO_KEEP = %s"
(Common.dump to_pack) (Common.dump ok)); *)letnew_gopti,dotdotdot_entry=Graph_code_opti.adjust_graph_pack_some_children_under_dotdotdotnto_pack!goptiingopti:=new_gopti;Node(n,(ok@[dotdotdot_entry])|>List.map(funn->(* todo: grab the children of n in the original config? *)Node(n,[])))endinletadjusted_tree=auxtree[]in!gopti,adjusted_tree(*****************************************************************************)(* Building the matrix *)(*****************************************************************************)(* The tree passed is a configuration one would like to explore. Note
* that after a focus, the children of a node in this tree may not contain
* all the original children of this node.
*)letbuildtreeconstraints_optgopti=letgopti,tree=adjust_gopti_if_needed_lazilytreegoptiin(* let's compute a better reordered tree *)letrecauxtree=matchtreewith|Node(n,xs)->ifnullxsthenNode(n,[])elsebeginletconfig_depth1=Node(n,xs|>List.map(function(Node(n2,_))->(Node(n2,[]))))inletchildren_nodes=xs|>List.map(function(Node(n2,_))->n2)inleth_children_of_children_nodes=xs|>List.map(function(Node(n2,xs))->n2,xs)|>Common.hash_of_listin(* first draft *)letdm=build_with_treeconfig_depth1goptiin(* Now we need to reorder to minimize the number of dependencies in
* the top right corner of the matrix (of the submatrix actually)
*)letnodes_reordered=partition_matrixchildren_nodesdminletnodes_reordered=optional_manual_reorderingnnodes_reorderedconstraints_optinNode(n,nodes_reordered|>List.map(funn2->letxs=tryHashtbl.findh_children_of_children_nodesn2(* probably one of the newly created "..." child *)withNot_found->[]in(* recurse *)aux(Node(n2,xs))))endinletordered_config=auxtreeinbuild_with_treeordered_configgopti,gopti(*****************************************************************************)(* Path manipulation *)(*****************************************************************************)(* less: could be put in dependencies_matrix_code.ml *)letput_expand_just_before_last_focus_if_not_childrennxsg=letrecauxxs=matchxswith|[]->[Expandn]|x::xs->(matchxwith|Expand_->x::auxxs|Focus(n2,_style)->letchildren=Graph_code_opti.all_childrenn2ginifnot(List.memnchildren)then(Expandn)::x::xselsex::auxxs)inauxxsletfix_pathpathg=letrecauxaccxs=matchxswith|[]->acc|x::xs->(matchxwith|Focus_->aux(acc@[x])xs|Expand(n)->aux(put_expand_just_before_last_focus_if_not_childrennaccg)xs)inaux[]pathletconfig_of_path(path:config_path)gopti=letpath=fix_pathpathgoptiinletinitial_config=basic_config_optigoptiin(* pr2_gen path; *)path|>List.fold_left(fun(config,gopti)e->matchewith|Expandnode->expand_node_optinodeconfiggopti,gopti|Focus(node,kind)->letdm,gopti=buildconfigNonegoptiinfocus_on_nodenodekindconfigdm,gopti)(initial_config,gopti)