Source file qcow_recycler.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
let src =
let src = Logs.Src.create "qcow" ~doc:"qcow2-formatted BLOCK device" in
Logs.Src.set_level src (Some Logs.Info);
src
module Log = (val Logs.src_log src : Logs.LOG)
open Qcow_types
let ( <| ) = Int64.shift_left
let ( |> ) = Int64.shift_right
module Cache = Qcow_cache
module Error = Qcow_error
module Locks = Qcow_locks
module Metadata = Qcow_metadata
module Physical = Qcow_physical
module Make(B: Qcow_s.RESIZABLE_BLOCK)(Time: Mirage_time.S) = struct
type t = {
base: B.t;
sector_size: int;
cluster_bits: int;
mutable cluster_map: Qcow_cluster_map.t option;
cache: Cache.t;
locks: Locks.t;
metadata: Metadata.t;
zero_buffer: Cstruct.t;
mutable background_thread: unit Lwt.t;
mutable need_to_flush: bool;
need_to_flush_c: unit Lwt_condition.t;
flush_m: Lwt_mutex.t;
runtime_asserts: bool;
}
let create ~base ~sector_size ~cluster_bits ~cache ~locks ~metadata ~runtime_asserts =
let zero_buffer = Io_page.(to_cstruct @@ get 256) in
Cstruct.memset zero_buffer 0;
let background_thread = Lwt.return_unit in
let flush_m = Lwt_mutex.create () in
let cluster_map = None in
let need_to_flush = false in
let need_to_flush_c = Lwt_condition.create () in
{ base; sector_size; cluster_bits; cluster_map; cache; locks; metadata;
zero_buffer; background_thread; need_to_flush; need_to_flush_c;
flush_m; runtime_asserts; }
let set_cluster_map t cluster_map = t.cluster_map <- Some cluster_map
let allocate t n =
let cluster_map = match t.cluster_map with
| Some x -> x
| None -> assert false in
match Cluster.IntervalSet.take (Qcow_cluster_map.Available.get cluster_map) n with
| Some (set, _free) ->
Log.debug (fun f -> f "Allocated %s clusters from free list: %s"
(Cluster.to_string n) (Sexplib.Sexp.to_string_hum ~indent:2 @@ Cluster.IntervalSet.sexp_of_t set)
);
Qcow_cluster_map.(set_cluster_state cluster_map set Available Roots);
Some set
| None ->
None
let copy_already_locked t src dst =
let src = Cluster.to_int64 src and dst = Cluster.to_int64 dst in
let cluster_map = match t.cluster_map with
| Some x -> x
| None -> assert false in
Log.debug (fun f -> f "Copy cluster %Ld to %Ld" src dst);
let npages = 1 lsl (t.cluster_bits - 12) in
let pages = Io_page.(to_cstruct @@ get npages) in
let cluster = Cstruct.sub pages 0 (1 lsl t.cluster_bits) in
let sectors_per_cluster = Int64.(div (1L <| t.cluster_bits) (of_int t.sector_size)) in
let src_sector = Int64.mul src sectors_per_cluster in
let dst_sector = Int64.mul dst sectors_per_cluster in
let open Lwt.Infix in
B.read t.base src_sector [ cluster ]
>>= function
| Error `Unimplemented -> Lwt.return (Error `Unimplemented)
| Error `Disconnected -> Lwt.return (Error `Disconnected)
| Error e -> Format.kasprintf Lwt.fail_with "Unknown error: %a" B.pp_error e
| Ok () ->
B.write t.base dst_sector [ cluster ]
>>= function
| Error `Unimplemented -> Lwt.return (Error `Unimplemented)
| Error `Disconnected -> Lwt.return (Error `Disconnected)
| Error `Is_read_only -> Lwt.return (Error `Is_read_only)
| Error e -> Format.kasprintf Lwt.fail_with "Unknown error: %a" B.pp_write_error e
| Ok () ->
let dst' = Cluster.of_int64 dst in
Cache.Debug.assert_not_cached t.cache dst';
if not @@ Qcow_cluster_map.Copies.mem cluster_map dst' then begin
Log.err (fun f -> f "Copy cluster %Ld to %Ld: but %Ld is not Junk" src dst dst);
Qcow_cluster_map.Debug.assert_no_leaked_blocks cluster_map;
assert false
end;
if Qcow_cluster_map.is_moving cluster_map dst' then begin
Log.err (fun f -> f "Copy cluster from %Ld to %Ld: but %Ld is also moving" src dst dst);
Qcow_cluster_map.Debug.assert_no_leaked_blocks cluster_map;
assert false
end;
Lwt.return (Ok ())
let copy t src dst =
Locks.Read.with_lock t.locks src
(fun () ->
Locks.Write.with_lock t.locks dst
(fun () ->
copy_already_locked t src dst
)
)
let move t move =
let cluster_map = match t.cluster_map with
| Some x -> x
| None -> assert false in
let src, dst = Qcow_cluster_map.Move.(move.src, move.dst) in
Log.debug (fun f -> f "move %s -> %s" (Cluster.to_string src) (Cluster.to_string dst));
let open Lwt.Infix in
Locks.Read.with_lock t.locks src
(fun () ->
Locks.Write.with_lock t.locks dst
(fun () ->
if not(Qcow_cluster_map.is_moving cluster_map src) then begin
Log.info (fun f -> f "Copy of cluster %s prevented: move operation cancelled" (Cluster.to_string src));
Lwt.return (Ok ())
end else begin
copy_already_locked t src dst
>>= function
| Error `Unimplemented -> Lwt.return (Error `Unimplemented)
| Error `Disconnected -> Lwt.return (Error `Disconnected)
| Error `Is_read_only -> Lwt.return (Error `Is_read_only)
| Error e -> Format.kasprintf Lwt.fail_with "Unknown error: %a" B.pp_write_error e
| Ok () ->
Qcow_cluster_map.(set_move_state cluster_map move Copied);
Lwt.return (Ok ())
end
)
)
let move_all ?(progress_cb = fun ~percent:_ -> ()) t moves =
let total = List.length moves in
let rec loop i = function
| [] -> Lwt.return (Ok ())
| m :: ms ->
let open Lwt.Infix in
move t m
>>= function
| Error e -> Lwt.return_error e
| Ok () ->
progress_cb ~percent:((100 * i) / total);
loop (i + 1) ms in
loop 0 moves
let erase t remaining =
let open Lwt.Infix in
let intervals = Cluster.IntervalSet.fold (fun i acc -> i :: acc) remaining [] in
let buffer_size_clusters = Int64.of_int (Cstruct.len t.zero_buffer) |> t.cluster_bits in
Lwt_list.fold_left_s
(fun acc i -> match acc with
| Error e -> Lwt.return (Error e)
| Ok () ->
let x, y = Cluster.IntervalSet.Interval.(x i, y i) in
let x = Cluster.to_int64 x and y = Cluster.to_int64 y in
let n = Int64.(succ @@ sub y x) in
Log.debug (fun f -> f "erasing %Ld clusters (%Ld -> %Ld)" n x y);
let erase cluster n =
assert (n <= buffer_size_clusters);
let buf = Cstruct.sub t.zero_buffer 0 (Int64.to_int (n <| t.cluster_bits)) in
let sector = Int64.(div (cluster <| t.cluster_bits) (of_int t.sector_size)) in
B.write t.base sector [ buf ] in
let rec loop from n m =
if n = 0L then Lwt.return (Ok ())
else if n > m then begin
erase from m
>>= function
| Error e -> Lwt.return (Error e)
| Ok () -> loop (Int64.add from m) (Int64.sub n m) m
end else begin
erase from n
end in
loop x n buffer_size_clusters
) (Ok ()) intervals
let update_references t =
let cluster_map = match t.cluster_map with
| None -> assert false
| Some x -> x in
let open Qcow_cluster_map in
let flushed' =
Cluster.Map.fold (fun src move acc ->
assert (src = move.Qcow_cluster_map.move.Qcow_cluster_map.Move.src);
match move.state with
| Flushed ->
begin match Qcow_cluster_map.find cluster_map src with
| exception Not_found -> acc
| ref_cluster, _ ->
let existing =
if Cluster.Map.mem ref_cluster acc
then Cluster.Map.find ref_cluster acc
else [] in
Cluster.Map.add ref_cluster (move :: existing) acc
end
| _ -> acc
) (moves cluster_map) Cluster.Map.empty in
let flushed = Cluster.Map.bindings flushed' in
let nr_updated = ref 0L in
let open Lwt.Infix in
let client = Locks.Client.make
(fun () -> "Rewriting references after a block copy") in
Lwt_list.fold_left_s
(fun acc (ref_cluster', moves) -> match acc with
| Error e -> Lwt.return (Error e)
| Ok subst ->
let ref_cluster = try Cluster.Map.find ref_cluster' subst with Not_found -> ref_cluster' in
begin match Locks.Write.try_lock ~client t.locks ref_cluster with
| None ->
List.iter (fun ({ move = { Move.src; dst }; _ }) ->
Log.debug (fun f -> f "Not rewriting reference in %s from %s to %s: metadata cluster is locked"
(Cluster.to_string ref_cluster)
(Cluster.to_string src) (Cluster.to_string dst)
);
cancel_move cluster_map src
) moves;
Lwt.return (Ok subst)
| Some lock ->
Lwt.finalize
(fun () ->
Lwt_mutex.with_lock t.flush_m
(fun () ->
Metadata.update ~client t.metadata ref_cluster
(fun c ->
Log.info (fun f -> f "Updating %d references in cluster %s" (List.length moves) (Cluster.to_string ref_cluster));
let addresses = Metadata.Physical.of_contents c in
try
let result = List.fold_left
(fun acc ({ move = { Move.src; dst }; _ } as move) -> match acc with
| Error e -> Error e
| Ok subst ->
begin match Qcow_cluster_map.find cluster_map src with
| exception Not_found ->
Log.warn (fun f -> f "Not copying cluster %s to %s: %s has been discarded"
(Cluster.to_string src) (Cluster.to_string dst) (Cluster.to_string src)
);
Ok subst
| ref_cluster', ref_cluster_within ->
if ref_cluster' <> ref_cluster then begin
Log.info (fun f -> f "Reference to %s moved from %s:%d to %s:%d"
(Cluster.to_string src) (Cluster.to_string ref_cluster) ref_cluster_within
(Cluster.to_string ref_cluster') ref_cluster_within
);
Ok subst
end else if not(Cluster.Map.mem src (Qcow_cluster_map.moves cluster_map)) then begin
Log.debug (fun f -> f "Not rewriting reference in %s :%d from %s to %s: move as been cancelled"
(Cluster.to_string ref_cluster) ref_cluster_within
(Cluster.to_string src) (Cluster.to_string dst)
);
Ok subst
end else begin
let old_reference = Metadata.Physical.get addresses ref_cluster_within in
let old_cluster = Qcow_physical.cluster ~cluster_bits:t.cluster_bits old_reference in
if old_cluster <> src then begin
Log.err (fun f -> f "Rewriting reference in %s :%d from %s to %s, old reference actually pointing to %s"
(Cluster.to_string ref_cluster) ref_cluster_within
(Cluster.to_string src) (Cluster.to_string dst)
(Cluster.to_string old_cluster)
);
assert false
end;
Log.debug (fun f -> f "Rewriting reference in %s :%d from %s to %s"
(Cluster.to_string ref_cluster) ref_cluster_within
(Cluster.to_string src) (Cluster.to_string dst)
);
let dst' = Cluster.to_int dst lsl t.cluster_bits in
let new_reference = Qcow_physical.make ~is_mutable:(Qcow_physical.is_mutable old_reference) ~is_compressed:(Qcow_physical.is_compressed old_reference) dst' in
set_move_state cluster_map move.move Referenced;
Metadata.Physical.set addresses ref_cluster_within new_reference;
nr_updated := Int64.succ !nr_updated;
Ok (Cluster.Map.add src dst subst)
end
end
) (Ok subst) moves in
match result with
| Error e -> Lwt.return (Error e)
| Ok subst ->
if Qcow_cluster_map.is_immovable cluster_map ref_cluster then begin
Log.info (fun f -> f "Cluster %s is L1: we must remap L2 references" (Cluster.to_string ref_cluster));
Qcow_cluster_map.update_references cluster_map subst
end;
Lwt.return (Ok subst)
with Error.Duplicate_reference((c, w), (c', w'), (target: int64)) as e ->
Log.err (fun f -> f "Duplicate_reference during update_references of %s"
(String.concat ", " @@ List.map Qcow_cluster_map.string_of_move @@ List.concat @@ List.map snd flushed)
);
let open Error.Lwt_write_error.Infix in
Qcow_debug.on_duplicate_reference t.metadata cluster_map ~cluster_bits:t.cluster_bits (c, w) (c', w') target
>>= fun () ->
Qcow_cluster_map.Debug.assert_no_leaked_blocks cluster_map;
Lwt.fail e
| e ->
Qcow_cluster_map.Debug.assert_no_leaked_blocks cluster_map;
raise e
)
)
) (fun () ->
Locks.unlock lock;
Lwt.return_unit
)
end
) (Ok Cluster.Map.empty) flushed
>>= function
| Ok _subst ->
t.need_to_flush <- true;
Lwt_condition.signal t.need_to_flush_c ();
Lwt.return (Ok !nr_updated)
| Error e -> Lwt.return (Error e)
let flush t =
let open Qcow_cluster_map in
let cluster_map = match t.cluster_map with
| None -> assert false
| Some x -> x in
let open Lwt.Infix in
Lwt_mutex.with_lock t.flush_m
(fun () ->
let erased = Qcow_cluster_map.Erased.get cluster_map in
let moves = Qcow_cluster_map.moves cluster_map in
B.flush t.base
>>= function
| Error e -> Lwt.return (Error e)
| Ok () ->
let nr_flushed, nr_completed = Cluster.Map.fold (fun _ (move: move) (nr_flushed, nr_completed) ->
match move.state with
| Copying | Flushed ->
nr_flushed, nr_completed
| Copied ->
Qcow_cluster_map.(set_move_state cluster_map move.move Flushed);
nr_flushed + 1, nr_completed
| Referenced ->
Qcow_cluster_map.complete_move cluster_map move.move;
nr_flushed, nr_completed + 1
) moves (0, 0) in
let nr_erased = Cluster.to_int @@ Cluster.IntervalSet.cardinal erased in
Qcow_cluster_map.(set_cluster_state cluster_map erased Erased Available);
if nr_flushed <> 0 || nr_completed <> 0 || nr_erased <> 0 then begin
Log.info (fun f -> f "block recycler: %d cluster copies flushed; %d cluster copies complete; %d clusters erased"
nr_flushed nr_completed nr_erased);
Log.info (fun f -> f "block recycler: flush: %s" (Qcow_cluster_map.to_summary_string cluster_map));
end;
Lwt.return (Ok ())
)
let start_background_thread t ~keep_erased ?compact_after_unmaps () =
let th, _ = Lwt.task () in
Lwt.on_cancel th
(fun () ->
Log.info (fun f -> f "cancellation of block recycler not implemented");
);
let cluster_map = match t.cluster_map with
| Some x -> x
| None -> assert false in
Log.info (fun f -> f "block recycler starting with keep_erased = %Ld" keep_erased);
let open Lwt.Infix in
let rec background_flusher () =
let rec wait () = match t.need_to_flush with
| true -> Lwt.return_unit
| false ->
Lwt_condition.wait t.need_to_flush_c
>>= fun () ->
wait () in
wait ()
>>= fun () ->
t.need_to_flush <- false;
Time.sleep_ns 5_000_000_000L
>>= fun () ->
Log.info (fun f -> f "block recycler: triggering background flush: %s" (Qcow_cluster_map.to_summary_string cluster_map));
flush t
>>= function
| Error _ ->
Log.err (fun f -> f "block recycler: flush failed");
Lwt.return_unit
| Ok () ->
background_flusher () in
Lwt.async background_flusher;
let last_block = ref (Qcow_cluster_map.get_last_block cluster_map) in
let rec wait_for_work () =
let junk = Qcow_cluster_map.Junk.get cluster_map in
let nr_junk = Cluster.to_int64 @@ Cluster.IntervalSet.cardinal junk in
let erased = Qcow_cluster_map.Erased.get cluster_map in
let nr_erased = Cluster.to_int64 @@ Cluster.IntervalSet.cardinal erased in
let available = Qcow_cluster_map.Available.get cluster_map in
let nr_available = Cluster.to_int64 @@ Cluster.IntervalSet.cardinal available in
let total_erased = Int64.add nr_erased nr_available in
let highest_priority =
if total_erased < keep_erased && nr_junk > 0L then begin
let n = Cluster.of_int64 @@ min nr_junk (Int64.sub keep_erased total_erased) in
if Cluster.IntervalSet.cardinal junk < n
then None
else Some (`Erase n)
end else None in
let moves = Qcow_cluster_map.moves cluster_map in
let middle_priority =
let flushed =
Cluster.Map.fold (fun _src move acc ->
match move.Qcow_cluster_map.state with
| Qcow_cluster_map.Flushed -> true
| _ -> acc
) moves false in
if flushed then Some `Update_references else None in
begin match highest_priority, middle_priority, compact_after_unmaps with
| Some x, _, _ -> Lwt.return (Some x)
| _, Some x, _ -> Lwt.return (Some x)
| None, _, Some x when x < nr_junk ->
if not(Cluster.Map.is_empty moves) then begin
Lwt.return None
end else begin
Log.info (fun f -> f "Discards (%Ld) over threshold (%Ld): waiting for discards to finish before beginning compaction" nr_junk x);
let rec wait nr_junk n =
Time.sleep_ns 5_000_000_000L
>>= fun () ->
let nr_junk' = Cluster.to_int64 @@ Cluster.IntervalSet.cardinal @@ Qcow_cluster_map.Junk.get cluster_map in
if nr_junk = nr_junk' then begin
Log.info (fun f -> f "Discards have finished, %Ld clusters have been discarded" nr_junk);
Lwt.return ()
end else begin
if (n mod 60 = 0) then Log.info (fun f -> f "Total discards %Ld, still waiting" nr_junk');
wait nr_junk' (n + 1)
end in
wait nr_junk 0
>>= fun () ->
Lwt.return (Some `Junk)
end
| _ ->
let last_block' = Qcow_cluster_map.get_last_block cluster_map in
let result =
if last_block' < !last_block then Some `Resize else None in
last_block := last_block';
Lwt.return result
end >>= function
| None ->
Qcow_cluster_map.wait cluster_map
>>= fun () ->
wait_for_work ()
| Some work ->
Lwt.return work in
let resize () =
Locks.with_metadata_lock t.locks
(fun () ->
let new_last_block = 1 + (Cluster.to_int @@ Qcow_cluster_map.get_last_block cluster_map) in
Log.info (fun f -> f "block recycler: resize to %d clusters" new_last_block);
let new_size = Physical.make (new_last_block lsl t.cluster_bits) in
let sector = Physical.sector ~sector_size:t.sector_size new_size in
let cluster = Physical.cluster ~cluster_bits:t.cluster_bits new_size in
Qcow_cluster_map.resize cluster_map cluster;
B.resize t.base sector
>>= function
| Error _ -> Lwt.fail_with "resize"
| Ok () ->
Log.debug (fun f -> f "Resized device to %d sectors of size %d" (Qcow_physical.to_bytes new_size) t.sector_size);
Lwt.return_unit
) in
let rec loop () =
t.need_to_flush <- true;
Lwt_condition.signal t.need_to_flush_c ();
wait_for_work ()
>>= function
| `Erase n ->
begin match Cluster.IntervalSet.take (Qcow_cluster_map.Junk.get cluster_map) n with
| None -> loop ()
| Some (to_erase, _) ->
Log.debug (fun f -> f "block recycler: should erase %s clusters" (Cluster.to_string @@ Cluster.IntervalSet.cardinal to_erase));
Qcow_cluster_map.(set_cluster_state cluster_map to_erase Junk Roots);
Lwt.catch
(fun () ->
erase t to_erase
>>= function
| Error e -> Format.kasprintf Lwt.fail_with "%a" B.pp_write_error e
| Ok () ->
Qcow_cluster_map.(set_cluster_state cluster_map to_erase Roots Erased);
Lwt.return_unit
) (fun e ->
Qcow_cluster_map.(set_cluster_state cluster_map to_erase Roots Junk);
Lwt.fail e
)
>>= fun () ->
loop ()
end
| `Junk ->
if t.runtime_asserts then Qcow_cluster_map.Debug.assert_no_leaked_blocks cluster_map;
assert(Cluster.Map.is_empty @@ Qcow_cluster_map.moves cluster_map);
let junk = Qcow_cluster_map.Junk.get cluster_map in
let nr_junk = Cluster.to_int64 @@ Cluster.IntervalSet.cardinal junk in
let moves = Qcow_cluster_map.start_moves cluster_map in
Log.info (fun f -> f "block recycler: %Ld clusters are junk, %d moves are possible" nr_junk (List.length moves));
Qcow_error.Lwt_write_error.or_fail_with @@ move_all t moves
>>= fun () ->
resize ()
>>= fun () ->
loop ()
| `Update_references ->
Log.info (fun f -> f "block recycler: need to update references to blocks");
begin update_references t
>>= function
| Error (`Msg x) -> Lwt.fail_with x
| Error `Unimplemented -> Lwt.fail_with "Unimplemented"
| Error `Disconnected -> Lwt.fail_with "Disconnected"
| Error `Is_read_only -> Lwt.fail_with "Is_read_only"
| Ok nr_updated ->
Log.info (fun f -> f "block recycler: %Ld block references updated" nr_updated);
loop ()
end
| `Resize ->
resize ()
>>= fun () ->
loop ()
in
Lwt.async loop;
t.background_thread <- th
end