Source file irmin_chunk.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
open! Import
let src = Logs.Src.create "irmin.chunk" ~doc:"Irmin chunks"
module Log = (val Logs.src_log src : Logs.LOG)
module Conf = struct
let min_size =
Irmin.Private.Conf.key ~doc:"Minimal chunk size" "min-size"
Irmin.Private.Conf.int 4000
let chunk_size =
Irmin.Private.Conf.key ~doc:"Size of chunk" "size" Irmin.Private.Conf.int
4096
let chunking =
let pp ppf = function
| `Max -> Fmt.pf ppf "max"
| `Best_fit -> Fmt.pf ppf "best-fit"
in
let of_string = function
| "max" -> Ok `Max
| "best-fit" -> Ok `Best_fit
| e ->
Error
(`Msg
(e ^ " is not a valid chunking algorithm. Use 'max' or 'best-fit'"))
in
Irmin.Private.Conf.key ~doc:"Chunking algorithm" "chunking" (of_string, pp)
`Best_fit
end
let chunk_size = Conf.chunk_size
let err_too_small ~min size =
Printf.ksprintf invalid_arg
"Chunks of %d bytes are too small. Size should at least be %d bytes." size
min
let config ?(config = Irmin.Private.Conf.empty) ?size ?min_size
?(chunking = `Best_fit) () =
let module C = Irmin.Private.Conf in
let min_size =
match min_size with None -> C.default Conf.min_size | Some v -> v
in
let size =
match size with
| None -> C.default Conf.chunk_size
| Some v -> if v < min_size then err_too_small ~min:min_size v else v
in
let add x y c = C.add c x y in
config
|> add Conf.min_size min_size
|> add Conf.chunk_size size
|> add Conf.chunking chunking
module Chunk (K : Irmin.Hash.S) = struct
type v = Data of string | Index of K.t list
let v =
let open Irmin.Type in
variant "chunk" (fun d i -> function
| Data data -> d data | Index index -> i index)
|~ case1 "Data" string (fun d -> Data d)
|~ case1 "Index" (list ~len:`Int16 K.t) (fun i -> Index i)
|> sealv
type t = { len : int; v : v }
let size_of = Irmin.Type.unstage (Irmin.Type.size_of v)
let to_bin_string = Irmin.Type.unstage (Irmin.Type.to_bin_string v)
let decode_bin = Irmin.Type.unstage (Irmin.Type.decode_bin v)
let encode_bin = Irmin.Type.unstage (Irmin.Type.encode_bin v)
let size_of_v t =
match size_of t with Some n -> n | None -> String.length (to_bin_string t)
let = size_of_v (Data "")
let = size_of_v (Index [])
let of_string b =
let len = String.length b in
let n, v = decode_bin b 0 in
if len = n then { len; v }
else Fmt.invalid_arg "invalid length: got %d, expecting %d" n len
let to_string t =
let buf = Bytes.make t.len '\000' in
let b = Buffer.create t.len in
encode_bin t.v (Buffer.add_string b);
let s = Buffer.contents b in
Bytes.blit_string s 0 buf 0 (String.length s);
Bytes.unsafe_to_string buf
let t = Irmin.Type.(map string) of_string to_string
end
module Content_addressable
(S : Irmin.APPEND_ONLY_STORE_MAKER)
(K : Irmin.Hash.S)
(V : Irmin.Type.S) =
struct
module Chunk = Chunk (K)
module AO = S (K) (Chunk)
module CA = Irmin.Content_addressable (S) (K) (Chunk)
type key = CA.key
let pp_key = Irmin.Type.pp K.t
type value = V.t
type 'a t = {
chunking : [ `Max | `Best_fit ];
db : 'a CA.t;
chunk_size : int;
max_children : int;
max_data : int;
}
let index t i =
let v = Chunk.Index i in
match t.chunking with
| `Max -> { Chunk.v; len = t.chunk_size }
| `Best_fit -> { Chunk.v; len = Chunk.size_of_v v }
let data t s =
let v = Chunk.Data s in
match t.chunking with
| `Max -> { Chunk.v; len = t.chunk_size }
| `Best_fit -> { Chunk.v; len = Chunk.size_of_v v }
module Tree = struct
let find_leaves t root =
let rec aux acc { Chunk.v; _ } =
match v with
| Chunk.Data d -> Lwt.return (d :: acc)
| Chunk.Index i ->
Lwt_list.fold_left_s
(fun acc key ->
CA.find t.db key >>= function
| None -> Lwt.return acc
| Some v -> aux acc v)
acc i
in
aux [] root >|= List.rev
let list_partition n l =
let rec aux done_ i acc = function
| [] -> List.rev (List.rev acc :: done_)
| h :: t ->
if i >= n then aux (List.rev acc :: done_) 1 [ h ] t
else aux done_ (i + 1) (h :: acc) t
in
aux [] 0 [] l
let add t ~key l =
let rec aux = function
| [] -> invalid_arg "Irmin_chunk.Tree.add"
| [ k ] -> Lwt.return k
| l -> (
let n =
if List.length l >= t.max_children then t.max_children
else List.length l
in
match list_partition n l with
| [ i ] -> AO.add t.db key (index t i) >|= fun () -> key
| l -> Lwt_list.map_p (fun i -> CA.add t.db (index t i)) l >>= aux)
in
aux l
end
let v config =
let module C = Irmin.Private.Conf in
let chunk_size = C.get config Conf.chunk_size in
let max_data = chunk_size - Chunk.size_of_data_header in
let max_children =
(chunk_size - Chunk.size_of_index_header) / K.hash_size
in
let chunking = C.get config Conf.chunking in
(if max_children <= 1 then
let min = Chunk.size_of_index_header + (K.hash_size * 2) in
err_too_small ~min chunk_size);
Log.debug (fun l ->
l "config: chunk-size=%d digest-size=%d max-data=%d max-children=%d"
chunk_size K.hash_size max_data max_children);
let+ db = CA.v config in
{ chunking; db; chunk_size; max_children; max_data }
let close _ = Lwt.return_unit
let clear t = CA.clear t.db
let batch t f = CA.batch t.db (fun db -> f { t with db })
let find_leaves t key =
AO.find t.db key >>= function
| None -> Lwt.return_none
| Some x -> Tree.find_leaves t x >|= Option.some
let equal_hash = Irmin.Type.(unstage (equal K.t))
let of_bin_string = Irmin.Type.(unstage (of_bin_string V.t))
let to_bin_string = Irmin.Type.(unstage (to_bin_string V.t))
let pre_hash = Irmin.Type.(unstage (pre_hash V.t))
let check_hash k v =
let k' = K.hash (pre_hash v) in
if equal_hash k k' then Lwt.return_unit
else
Fmt.kstr Lwt.fail_invalid_arg "corrupted value: got %a, expecting %a"
pp_key k' pp_key k
let find t key =
find_leaves t key >>= function
| None -> Lwt.return_none
| Some bufs -> (
let buf = String.concat "" bufs in
match of_bin_string buf with
| Ok va -> check_hash key va >|= fun () -> Some va
| Error _ -> Lwt.return_none)
let list_range ~init ~stop ~step =
let rec aux acc n =
if n >= stop then List.rev acc else aux (n :: acc) (n + step)
in
aux [] init
let unsafe_add_buffer t key buf =
let len = String.length buf in
if len <= t.max_data then
AO.add t.db key (data t buf) >|= fun () ->
Log.debug (fun l -> l "add -> %a (no split)" pp_key key)
else
let offs = list_range ~init:0 ~stop:len ~step:t.max_data in
let aux off =
let len = min t.max_data (String.length buf - off) in
let payload = String.sub buf off len in
CA.add t.db (data t payload)
in
let+ k = Lwt_list.map_s aux offs >>= Tree.add ~key t in
Log.debug (fun l -> l "add -> %a (split)" pp_key k)
let add t v =
let buf = to_bin_string v in
let key = K.hash (pre_hash v) in
let+ () = unsafe_add_buffer t key buf in
key
let unsafe_add t key v =
let buf = to_bin_string v in
unsafe_add_buffer t key buf
let mem t key = CA.mem t.db key
end