123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869(* Copyright (C) 2019, Francois Berenger
Yamanishi laboratory,
Department of Bioscience and Bioinformatics,
Faculty of Computer Science and Systems Engineering,
Kyushu Institute of Technology,
680-4 Kawazu, Iizuka, Fukuoka, 820-8502, Japan. *)(* mini molecule module *)moduleA=BatArraymoduleIntSet=BatSet.IntmoduleL=BatListmoduleStringMap=BatMap.Stringtypet={name:string;graph:Node.tarray;diameter:int;matrix:intarrayarray}letget_namem=m.nameletget_graphm=m.graphletnb_atomsm=A.lengthm.graphletcreatenamegraphdiametermatrix={name;graph;diameter;matrix}letget_typ(m:t)(i:int)=Node.get_typm.graph.(i)letget_succs(m:t)(i:int)=Node.get_succsm.graph.(i)(* list (sorted-uniq-counted) atom types of all atoms
at given distance from center atom *)lettypes_at_distance(center:int)(curr_height:int)(mol:t)=letmatrix_line=mol.matrix.(center)inletunsorted=A.fold_lefti(funaccix->ifx=curr_heightthen(get_typmoli)::accelseacc)[]matrix_linein(* layer at 'curr_height' *)(curr_height,Utls.list_uniq_countunsorted)letencode(max_height:int)(mol:t):(Atom_env.t*int)list=(* compute atom envs. of given atom up to maximum height allowed *)(* we cannot go deeper than 'maxi' on this molecule *)letmaxi=minmax_heightmol.diameterinletencode_atom(n_i:int):Atom_env.t=letdepths=L.range0`Tomaxiinletlayers=L.map(funheight->types_at_distancen_iheightmol)depthsin(* non empty layers *)L.filter(fun(_h,typs)->typs<>[])layersinletnb_atoms=A.lengthmol.graphinletatom_indexes=L.range0`To(nb_atoms-1)in(* canonicalize the encoding of the molecule by sorting its atom envs
and counting duplicates *)letatom_envs=L.mapencode_atomatom_indexesinUtls.list_uniq_countatom_envs