123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295moduleA=BatArraymoduleHt=BatHashtblmoduleL=BatListmoduleLog=Dolog.Log(* Statistics for ChEMBL-34:
#atom_count symbol prime
61440436 H 2
53818425 C 3
8561288 O 5
8478199 N 7
1040693 F 11
932402 S 13
584961 Cl 17
122574 Br 19
55866 P 23
20896 I 29
15555 Na 31
5937 B 37
3924 Si 41
3432 Se 43
1557 K 47
555 Li 53
281 As 59
220 Te 61
165 Zn 67
155 Ca 71
130 Mg 73
120 Al 79
42 Ag 83
18 Sr 89
12 Rb 97
12 Ba 101
8 Cs 103
6 At 107
5 Bi 109
4 Xe 113
4 Ra 127
2 Kr 131
2 Be 137
1 He 139
1 Ga 149
*)letsymbol2prime=Ht.of_list[("H",2);("C",3);("O",5);("N",7);("F",11);("S",13);("Cl",17);("Br",19);("P",23);("I",29);("Na",31);("B",37);("Si",41);("Se",43);("K",47);("Li",53);("As",59);("Te",61);("Zn",67);("Ca",71);("Mg",73);("Al",79);("Ag",83);("Sr",89);("Rb",97);("Ba",101);("Cs",103);("At",107);("Bi",109);("Xe",113);("Ra",127);("Kr",131);("Be",137);("He",139);("Ga",149)]letprime2symbol=Ht.of_list[(2,"H");(3,"C");(5,"O");(7,"N");(11,"F");(13,"S");(17,"Cl");(19,"Br");(23,"P");(29,"I");(31,"Na");(37,"B");(41,"Si");(43,"Se");(47,"K");(53,"Li");(59,"As");(61,"Te");(67,"Zn");(71,"Ca");(73,"Mg");(79,"Al");(83,"Ag");(89,"Sr");(97,"Rb");(101,"Ba");(103,"Cs");(107,"At");(109,"Bi");(113,"Xe");(127,"Ra");(131,"Kr");(137,"Be");(139,"He");(149,"Ga")](* Enough primes to cover the whole periodic table
$> primes 2 | head -118 | tr '\n' ';'
*)letall_primes=[|2;3;5;7;11;13;17;19;23;29;31;37;41;43;47;53;59;61;67;71;73;79;83;89;97;101;103;107;109;113;127;131;137;139;149;151;157;163;167;173;179;181;191;193;197;199;211;223;227;229;233;239;241;251;257;263;269;271;277;281;283;293;307;311;313;317;331;337;347;349;353;359;367;373;379;383;389;397;401;409;419;421;431;433;439;443;449;457;461;463;467;479;487;491;499;503;509;521;523;541;547;557;563;569;571;577;587;593;599;601;607;613;617;619;631;641;643;647|]letprime_for_symbol(s:string):int=tryHt.findsymbol2primeswithNot_found->(Log.fatal"Ptable.prime_for_symbol: no prime assigned to %s"s;exit1)letsymbol_for_prime(p:int):string=tryHt.findprime2symbolpwithNot_found->(Log.fatal"Ptable.symbol_for_prime: no symbol assigned to %d"p;exit1)letmax_atomic_number=118(* the first atomic number (0) is FAKE but necessary for tabulation *)letanums=A.of_list(L.range0`Tomax_atomic_number)letelements_regexp=Str.regexp"He\\|Li\\|Be\\|Ne\\|Na\\|Mg\\|Al\\|Si\\|Cl\\|Ar\\|Ca\\|Sc\\|Ti\\|Cr\\|Mn\\|Fe\\|Co\\|Ni\\|Cu\\|Zn\\|Ga\\|Ge\\|As\\|Se\\|Br\\|Kr\\|Rb\\|Sr\\|Zr\\|Nb\\|Mo\\|Tc\\|Ru\\|Rh\\|Pd\\|Ag\\|Cd\\|In\\|Sn\\|Sb\\|Te\\|Xe\\|Cs\\|Ba\\|La\\|Ce\\|Pr\\|Nd\\|Pm\\|Sm\\|Eu\\|Gd\\|Tb\\|Dy\\|Ho\\|Er\\|Tm\\|Yb\\|Lu\\|Hf\\|Ta\\|Re\\|Os\\|Ir\\|Pt\\|Au\\|Hg\\|Tl\\|Pb\\|Bi\\|Po\\|At\\|Rn\\|Fr\\|Ra\\|Ac\\|Th\\|Pa\\|Np\\|Pu\\|Am\\|Cm\\|Bk\\|Cf\\|Es\\|Fm\\|Md\\|No\\|Lr\\|Rf\\|Db\\|Sg\\|Bh\\|Hs\\|Mt\\|Ds\\|Rg\\|Cn\\|Nh\\|Fl\\|Mc\\|Lv\\|Ts\\|Og\\|H\\|B\\|C\\|N\\|O\\|F\\|P\\|S\\|K\\|V\\|Y\\|I\\|W\\|U"(* chemical symbols; 1st elt. is also for tabulation reasons only *)letsymbols=[|"";"H";"He";"Li";"Be";"B";"C";"N";"O";"F";"Ne";"Na";"Mg";"Al";"Si";"P";"S";"Cl";"Ar";"K";"Ca";"Sc";"Ti";"V";"Cr";"Mn";"Fe";"Co";"Ni";"Cu";"Zn";"Ga";"Ge";"As";"Se";"Br";"Kr";"Rb";"Sr";"Y";"Zr";"Nb";"Mo";"Tc";"Ru";"Rh";"Pd";"Ag";"Cd";"In";"Sn";"Sb";"Te";"I";"Xe";"Cs";"Ba";"La";"Ce";"Pr";"Nd";"Pm";"Sm";"Eu";"Gd";"Tb";"Dy";"Ho";"Er";"Tm";"Yb";"Lu";"Hf";"Ta";"W";"Re";"Os";"Ir";"Pt";"Au";"Hg";"Tl";"Pb";"Bi";"Po";"At";"Rn";"Fr";"Ra";"Ac";"Th";"Pa";"U";"Np";"Pu";"Am";"Cm";"Bk";"Cf";"Es";"Fm";"Md";"No";"Lr";"Rf";"Db";"Sg";"Bh";"Hs";"Mt";"Ds";"Rg";"Cn";"Nh";"Fl";"Mc";"Lv";"Ts";"Og"|]letsymbol_of_anuma=ifa=0||a>max_atomic_numberthen(Log.fatal"Ptable.symbol_of_anum: no such anum: %d"a;exit1)elsesymbols.(a)letsymbol2anum=Ht.of_list[("H",1);("He",2);("Li",3);("Be",4);("B",5);("C",6);("N",7);("O",8);("F",9);("Ne",10);("Na",11);("Mg",12);("Al",13);("Si",14);("P",15);("S",16);("Cl",17);("Ar",18);("K",19);("Ca",20);("Sc",21);("Ti",22);("V",23);("Cr",24);("Mn",25);("Fe",26);("Co",27);("Ni",28);("Cu",29);("Zn",30);("Ga",31);("Ge",32);("As",33);("Se",34);("Br",35);("Kr",36);("Rb",37);("Sr",38);("Y",39);("Zr",40);("Nb",41);("Mo",42);("Tc",43);("Ru",44);("Rh",45);("Pd",46);("Ag",47);("Cd",48);("In",49);("Sn",50);("Sb",51);("Te",52);("I",53);("Xe",54);("Cs",55);("Ba",56);("La",57);("Ce",58);("Pr",59);("Nd",60);("Pm",61);("Sm",62);("Eu",63);("Gd",64);("Tb",65);("Dy",66);("Ho",67);("Er",68);("Tm",69);("Yb",70);("Lu",71);("Hf",72);("Ta",73);("W",74);("Re",75);("Os",76);("Ir",77);("Pt",78);("Au",79);("Hg",80);("Tl",81);("Pb",82);("Bi",83);("Po",84);("At",85);("Rn",86);("Fr",87);("Ra",88);("Ac",89);("Th",90);("Pa",91);("U",92);("Np",93);("Pu",94);("Am",95);("Cm",96);("Bk",97);("Cf",98);("Es",99);("Fm",100);("Md",101);("No",102);("Lr",103);("Rf",104);("Db",105);("Sg",106);("Bh",107);("Hs",108);("Mt",109);("Ds",110);("Rg",111);("Cn",112);("Nh",113);("Fl",114);("Mc",115);("Lv",116);("Ts",117);("Og",118)]letanum_of_symbols=tryHt.findsymbol2anumswithNot_found->(Log.fatal"Ptable.anum_of_symbol: no such chemical element: %s"s;exit1)letprime_for_anuma=prime_for_symbol(symbol_of_anuma)