1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
type t = { inverted : bool; times_per_domain : float array array; runs : int }
let record ~budgetf ~n_domains ?(ensure_multi_domain = true)
?(domain_local_await = `Busy_wait) ?(n_warmups = 3) ?(n_runs_min = 7)
?(before = Fun.id) ~init ~work ?(after = Fun.id) () =
let barrier_before = Barrier.make n_domains in
let barrier_init = Barrier.make n_domains in
let barrier_work = Barrier.make n_domains in
let barrier_after = Barrier.make n_domains in
let results =
Array.init n_domains @@ fun _ ->
Stack.create () |> Multicore_magic.copy_as_padded
in
let budget_used = ref false |> Multicore_magic.copy_as_padded in
let runs = ref 0 |> Multicore_magic.copy_as_padded in
let exit = ref false in
let extra_domain =
if n_domains = 1 && ensure_multi_domain then
Some
( Domain.spawn @@ fun () ->
while not !exit do
Domain.cpu_relax ()
done )
else None
in
Gc.full_major ();
let budget_start = Mtime_clock.elapsed () in
let with_busy_wait () =
let open struct
type state = Init | Released | Awaiting of { mutable released : bool }
end in
let state = Atomic.make Init in
let release () =
if Multicore_magic.fenceless_get state != Released then
match Atomic.exchange state Released with
| Awaiting r -> r.released <- true
| _ -> ()
in
let await () =
if Multicore_magic.fenceless_get state != Released then
let awaiting = Awaiting { released = false } in
if Atomic.compare_and_set state Init awaiting then
match awaiting with
| Awaiting r ->
while not r.released do
Domain.cpu_relax ()
done
| _ -> ()
in
Domain_local_await.{ release; await }
in
let main domain_i =
let benchmark () =
for _ = 1 to n_warmups do
Barrier.await barrier_before;
if domain_i = 0 then begin
before ();
Gc.major ()
end;
Barrier.await barrier_init;
let state = init domain_i in
Barrier.await barrier_work;
work domain_i state;
Barrier.await barrier_after;
if domain_i = 0 then after ()
done;
while !runs < n_runs_min || not !budget_used do
Barrier.await barrier_before;
if domain_i = 0 then begin
before ();
if
let budget_stop = Mtime_clock.elapsed () in
let elapsedf =
Mtime.Span.to_float_ns
(Mtime.Span.abs_diff budget_stop budget_start)
*. (1. /. 1_000_000_000.0)
in
budgetf < elapsedf
then budget_used := true;
incr runs;
Gc.major ()
end;
Barrier.await barrier_init;
let state = init domain_i in
Barrier.await barrier_work;
let start = Mtime_clock.elapsed () in
work domain_i state;
let stop = Mtime_clock.elapsed () in
Barrier.await barrier_after;
if domain_i = 0 then after ();
Stack.push
(Mtime.Span.to_float_ns (Mtime.Span.abs_diff stop start)
*. (1. /. 1_000_000_000.0))
results.(domain_i)
done
in
match domain_local_await with
| `Busy_wait ->
Domain_local_await.using ~prepare_for_await:with_busy_wait
~while_running:benchmark
| `Neglect -> benchmark ()
in
let domains =
Array.init (n_domains - 1) @@ fun domain_i ->
Domain.spawn @@ fun () -> main (domain_i + 1)
in
main 0;
Array.iter Domain.join domains;
exit := true;
Option.iter Domain.join extra_domain;
let times_per_domain =
Array.init (Array.length results) @@ fun i ->
Stack.to_seq results.(i) |> Array.of_seq
in
{ inverted = false; times_per_domain; runs = !runs }
let average { inverted; times_per_domain; runs } =
let domains = Array.length times_per_domain in
let n = Array.length times_per_domain.(0) in
let times = Array.create_float n in
for run_i = 0 to n - 1 do
times.(run_i) <- 0.0;
for domain_i = 0 to domains - 1 do
times.(run_i) <- times.(run_i) +. times_per_domain.(domain_i).(run_i)
done;
times.(run_i) <- times.(run_i) /. Float.of_int domains
done;
{ inverted; times_per_domain = [| times |]; runs }
let invert { inverted; times_per_domain; runs } =
{
inverted = not inverted;
times_per_domain =
Array.map (Array.map (fun v -> 1.0 /. v)) times_per_domain;
runs;
}
module Stats = struct
type t = {
mean : float;
median : float;
sd : float;
inverted : bool;
best : float;
runs : int;
}
let scale factor t =
{
t with
mean = t.mean *. factor;
median = t.median *. factor;
sd = t.sd *. factor;
best = t.best *. factor;
}
let mean_of times =
Array.fold_left ( +. ) 0.0 times /. Float.of_int (Array.length times)
let sd_of times mean =
Float.sqrt
(mean_of
(Array.map
(fun v ->
let d = v -. mean in
d *. d)
times))
let median_of times =
Array.sort Float.compare times;
let n = Array.length times in
if n land 1 = 0 then (times.((n asr 1) - 1) +. times.(n asr 1)) /. 2.0
else times.(n asr 1)
let of_times { inverted; times_per_domain; runs } =
let domains = Array.length times_per_domain in
let n = Array.length times_per_domain.(0) in
let times = Array.create_float n in
for run_i = 0 to n - 1 do
times.(run_i) <- 0.0;
for domain_i = 0 to domains - 1 do
times.(run_i) <- times.(run_i) +. times_per_domain.(domain_i).(run_i)
done
done;
let mean = mean_of times in
let sd = sd_of times mean in
let median = median_of times in
let best =
if inverted then Array.fold_left Float.max Float.min_float times
else Array.fold_left Float.min Float.max_float times
in
{ mean; sd; median; inverted; best; runs }
let to_nonbreaking s =
s |> String.split_on_char ' '
|> String.concat " "
let to_json ~name ~description ~units t =
let trend =
if t.inverted then `String "higher-is-better"
else `String "lower-is-better"
in
[
`Assoc
[
("name", `String (to_nonbreaking name));
("value", `Float t.median);
("units", `String units);
("trend", trend);
("description", `String description);
("#best", `Float t.best);
("#mean", `Float t.mean);
("#median", `Float t.median);
("#sd", `Float t.sd);
("#runs", `Int t.runs);
];
]
end
let to_thruput_metrics ~n ~singular ?(plural = singular ^ "s") ~config
?(unit_of_time = `ns) ?(unit_of_rate = `M) times =
List.concat
[
times |> Stats.of_times
|> Stats.scale (Unit_of_time.to_multiplier unit_of_time /. Float.of_int n)
|> Stats.to_json
~name:(Printf.sprintf "time per %s/%s" singular config)
~description:(Printf.sprintf "Time to process one %s" singular)
~units:(Unit_of_time.to_mnemonic unit_of_time);
times |> average |> invert |> Stats.of_times
|> Stats.scale (Float.of_int n /. Unit_of_rate.to_divisor unit_of_rate)
|> Stats.to_json
~name:(Printf.sprintf "%s over time/%s" plural config)
~description:(Printf.sprintf "Total number of %s processed" plural)
~units:(Unit_of_rate.to_mnemonic unit_of_rate);
]