1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
module L = BatList
module Float = BatFloat
let trapezoid_surface x1 x2 y1 y2 =
let base = abs_float (x1 -. x2) in
let height = 0.5 *. (y1 +. y2) in
base *. height
let rank_order_by_score score_labels =
L.sort (fun (_n1, s1, _i1, _l1) (_n2, s2, _i2, _l2) ->
Float.compare s2 s1
) score_labels
let cumulated_number_actives high_scores_first =
let sum = ref 0 in
L.map
(fun (_name, _score, _index, label) ->
if label then incr sum;
!sum
) high_scores_first
let auc score_labels =
let high_scores_first = rank_order_by_score score_labels in
let fp, tp, fp_prev, tp_prev, a, _p_prev =
L.fold_left
(fun (fp, tp, fp_prev, tp_prev, a, p_prev) (_ni, si, _ii, li) ->
let new_a, new_p_prev, new_fp_prev, new_tp_prev =
if si <> p_prev then
a +. trapezoid_surface fp fp_prev tp tp_prev,
si,
fp,
tp
else
a,
p_prev,
fp_prev,
tp_prev
in
let new_tp, new_fp =
if li then
tp +. 1., fp
else
tp, fp +. 1.
in
(new_fp, new_tp, new_fp_prev, new_tp_prev, new_a, new_p_prev)
)
(0., 0., 0., 0., 0., neg_infinity)
high_scores_first
in
let cum_curve = cumulated_number_actives high_scores_first in
((a +. trapezoid_surface fp fp_prev tp tp_prev) /. (fp *. tp),
cum_curve)
let actives_rate score_labels =
let tp_count, fp_count =
L.fold_left
(fun (tp_c, fp_c) (_name, _score, _index, label) ->
if label then
(tp_c + 1, fp_c)
else
(tp_c, fp_c + 1)
)
(0, 0)
score_labels
in
let nb_molecules = tp_count + fp_count in
(nb_molecules, (float tp_count) /. (float nb_molecules))
let enr_rate p score_labels =
let nb_molecules, rand_actives_rate = actives_rate score_labels in
let top_n = Float.round_to_int (p *. (float nb_molecules)) in
let top_p_percent_molecules =
L.take top_n (rank_order_by_score score_labels) in
let _, top_actives_rate = actives_rate top_p_percent_molecules in
let enr_rate = top_actives_rate /. rand_actives_rate in
(top_n, top_actives_rate, rand_actives_rate, enr_rate)
let only_ER p score_labels =
let _, _, _, er = enr_rate p score_labels in
er
let power_metric (cutoff: float) (scores_tot: Score_label.t list): float =
let nb_actives l =
L.length (L.filter Score_label.is_active l)
in
assert(cutoff > 0.0 && cutoff <= 1.0);
let size_tot = float (List.length scores_tot) in
let x = MyUtils.round (cutoff *. size_tot) in
let size_x = int_of_float x in
assert(size_x >= 1);
let sorted = rank_order_by_score scores_tot in
let scores_x = L.take size_x sorted in
let actives_x = float (nb_actives scores_x) in
let actives_tot = float (nb_actives scores_tot) in
let tpr_x = actives_x /. actives_tot in
let fpr_x = (x -. actives_x) /. (size_tot -. actives_tot) in
tpr_x /. (tpr_x +. fpr_x)