1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
module Log = Dolog.Log
module A = BatArray
module Fp = Fingerprint
module L = BatList
module IntMap = BatMap.Int
module IntSet = MyIntSet
let count_set_bits (bitv: Bitv.t): float =
float (Bitv.pop bitv)
let tversky_f alpha a b c =
let beta = 1.0 -. alpha in
c /. ((alpha *. a) +. (beta *. b) +. c)
let tversky_i alpha a b c =
tversky_f alpha (float a) (float b) (float c)
let tanimoto_intset (query: Fp.t) (cand: Fp.t): float =
IntSet.tanimoto (Fp.get_ints query) (Fp.get_ints cand)
let fp_tanimoto_score (query: Fp.t) (cand: Fp.t): float =
let bitv_tanimoto (fpA: Bitv.t) (fpB: Bitv.t): float =
count_set_bits (Bitv.bw_and fpA fpB) /.
count_set_bits (Bitv.bw_or fpA fpB) in
match query with
| Fp.MACCS _
| Fp.PUBCH _
| Fp.ECFP4 _ -> bitv_tanimoto (Fp.get_bits query) (Fp.get_bits cand)
| Fp.MOP2D _ -> tanimoto_intset query cand
let fp_tversky_score (alpha: float) (query: Fp.t) (cand: Fp.t): float =
let bitv_tversky (fpA: Bitv.t) (fpB: Bitv.t): float =
let c = Bitv.pop (Bitv.bw_and fpA fpB) in
let a = (Bitv.pop fpA) - c in
let b = (Bitv.pop fpB) - c in
tversky_i alpha a b c in
bitv_tversky (Fp.get_bits query) (Fp.get_bits cand)
let fp_tanimoto_dist (query: Fp.t) (cand: Fp.t): float =
1.0 -. (fp_tanimoto_score query cand)
let get_fp_score: Flags.score -> Fp.t -> Fp.t -> float = function
| Flags.Tanimoto -> fp_tanimoto_score
| Flags.Tversky alpha -> fp_tversky_score alpha
let tanimoto_et_al (xs: float array) (ys: float array): float * float * float =
let xys = ref 0.0 in
let x2s = ref 0.0 in
let y2s = ref 0.0 in
A.iter2 (fun x y ->
let xy = x *. y in
let x2 = x *. x in
let y2 = y *. y in
xys := !xys +. xy;
x2s := !x2s +. x2;
y2s := !y2s +. y2
) xs ys;
(!xys, !x2s, !y2s)
let array_tanimoto (xs: float array) (ys: float array): float =
let xys, x2s, y2s = tanimoto_et_al xs ys in
xys /. (x2s +. y2s -. xys)
let array_tversky (alpha: float) (xs: float array) (ys: float array): float =
let c, x2s, y2s = tanimoto_et_al xs ys in
let a = x2s -. c in
let b = y2s -. c in
tversky_f alpha a b c
let tanimoto (cons: float array) (cand: Fp.t): float =
let tanimoto_bits (cons: float array) (cand: Bitv.t): float =
let res = MyUtils.bitv_to_floats cand in
assert(A.length cons = A.length res);
array_tanimoto cons res in
tanimoto_bits cons (Fp.get_bits cand)
let tanimoto_intmap (cons: float IntMap.t) (cand': Fp.t): float =
let cand = Fp.get_ints cand' in
let key_values = IntMap.bindings cons in
let x2s = L.fold_left (fun acc (_k, v) -> acc +. v *. v) 0.0 key_values in
let y2s = float (IntSet.sum cand) in
let xys =
L.fold_left (fun acc (k, v) ->
if IntSet.mem k cand then
acc +. v
else
acc
) 0.0 key_values
in
xys /. (x2s +. y2s -. xys)
let tversky (alpha: float) (cons: float array) (cand: Fp.t): float =
let tversky_bits (cons: float array) (cand: Bitv.t): float =
let res = MyUtils.bitv_to_floats cand in
assert(A.length cons = A.length res);
array_tversky alpha cons res in
tversky_bits cons (Fp.get_bits cand)
let get_score (flag: Flags.score): (float array -> Fp.t -> float) =
match flag with
| Flags.Tanimoto -> tanimoto
| Flags.Tversky alpha -> tversky alpha