Source file fit.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
(* Christian Lindig <lindig@gmail.com>
 *)

open Angstrom

let defer finally = Fun.protect ~finally

(** redefine & as bitwise operation, && is still logical and *)
let ( & ) = Int.logand

let ( >> ) = Int.shift_right
let fail fmt = Printf.ksprintf fail fmt
let failwith fmt = Printf.ksprintf failwith fmt
let sprintf = Printf.sprintf

type arch = BE  (** Big Endian *) | LE  (** Little Endian *)

module Dict = Map.Make (Int)

module Type = struct
  (** A FIT file holds records of values. Each value has a type and a record is
      defined by the type of each value. This structure is defined in the FIT
      file itself. This module captures this type structure. *)

  (** An integer can be signed or unsigned *)
  type sign = Signed | Unsigned

  (** An invalid integer value is either denoted as zero (ZZ) or FF, depending
      on its type *)
  type invalid = ZZ | FF

  (** base types - a value has one of these types *)
  type base =
    | Enum
    | Bytes
    | String
    | Int of sign * int * invalid
    | Float of int  (** either 32 or 64 bits *)

  type field = {
      slot : int  (** position within record - defines purpose *)
    ; size : int  (** in bytes *)
    ; ty : base  (** representation *)
  }
  (** A record is composed of fields. A field has a size and type. The slot is a
      number that defines its purpose, like heart rate, and this meaning is
      assigned in the protocol and not in the binary stream itself *)

  type record = {
      msg : int
    ; arch : arch
    ; fields : field list
    ; dev_fields : int  (** total size in bytes of dev fields *)
  }
  (** A record is comprised of fields; the overall purpose of the record is
      captured by the msg number; the binary format of data in the fields
      respects the arch architecture. [dev_fields] are additional fields which
      we don't decode but just skip over *)

  let sum = List.fold_left ( + ) 0
  let size { size; _ } = size
  let total fs = fs |> List.map size |> sum

  let json { msg; arch; fields; dev_fields } =
    let t = function
      | Enum -> `String "enum"
      | Bytes -> `String "bytes"
      | String -> `String "string"
      | Int (Signed, bits, _) -> `String (sprintf "int%d" bits)
      | Int (Unsigned, bits, _) -> `String (sprintf "uint%d" bits)
      | Float bits -> `String (sprintf "float%d" bits)
    in
    let f { slot; size; ty } =
      `Assoc [ ("slot", `Int slot); ("size", `Int size); ("type", t ty) ]
    in
    `Assoc
      [
        ("msg", `Int msg)
      ; ("arch", `String (match arch with LE -> "LE" | BE -> "BE"))
      ; ("fields", `List (List.map f fields))
      ; ("dev_fields", `Int dev_fields)
      ; ("size", `Int (total fields + dev_fields))
      ]

  (** parse a [field] definition from a FIT file *)
  let field =
    let* slot = any_uint8 in
    let* size = any_uint8 in
    let* ty' = any_uint8 in
    let ty =
      match ty' & 0b1111 with
      | 0 -> Enum
      | 1 -> Int (Signed, 8, FF)
      | 2 -> Int (Unsigned, 8, FF)
      | 3 -> Int (Signed, 16, FF)
      | 4 -> Int (Unsigned, 16, FF)
      | 5 -> Int (Signed, 32, FF)
      | 6 -> Int (Unsigned, 32, FF)
      | 7 -> String
      | 8 -> Float 32
      | 9 -> Float 64
      | 10 -> Int (Unsigned, 8, ZZ)
      | 11 -> Int (Unsigned, 16, ZZ)
      | 12 -> Int (Unsigned, 32, ZZ)
      | 13 -> Bytes
      | _ -> failwith "unknown field base type (%s)" __LOC__
    in
    return { slot; size; ty }

  (** parse a [record] definition. We know ahead of time if the record defintion
      may contain development field definitions, which we then have to read as
      well *)
  let record ~dev =
    let* arch =
      int8 0 *> any_int8 >>= function
      | 0 -> return LE
      | 1 -> return BE
      | n -> fail "expected 0 or 1 in byte for endianness, found %d" n
    in
    let* msg = match arch with LE -> LE.any_uint16 | BE -> BE.any_uint16 in
    let* n = any_int8 in
    let* fields = count n field in
    let dev_fields =
      if dev then
        let* n = any_int8 in
        let* dev_fields = count n field in
        return dev_fields
      else return []
    in
    let* dev_fields = dev_fields in
    return { msg; arch; fields; dev_fields = total dev_fields }
end

type header = { protocol : int; profile : int; length : int }

(** A [value] represents a datum read from a FIT file. *)
type value =
  | Enum of int
  | String of string
  | Int of int
  | Float of float
  | Unknown

let to_string value =
  match value with
  | Enum d -> sprintf "enum(%d)" d
  | String s -> sprintf "string(%s)" s
  | Int i -> sprintf "int(%d)" i
  | Float f -> sprintf "float(%f)" f
  | Unknown -> "unknown"

type record = { msg : int; fields : (int * value) list }
(** A [record] is a record of values read from a FIT file. Each value is in a
    slot, which is reported as [int] value. Slots are not consecutive and are
    part of the FIT protocol. *)

type t = { header : header; records : record list }
(** [t] represents the contents of a FIT file *)

(** [base] reads the next value of type [ty] from the stream and returns it as a
    [value]. The type [ty] is known because the caller knows the from the record
    definition the types of all value. Furthermore, the current endianness
    [arch] is known as well *)
let base arch ty =
  let ff = -1 in
  let float = function
    | x when Float.is_nan x -> return Unknown
    | x when x = Float.infinity -> return Unknown
    | x when x = Float.neg_infinity -> return Unknown
    | x -> return (Float x)
  in
  let int ukn x = return (if x = ukn then Unknown else Int x) in
  let uint8 unk x =
    let x = x land 0xff in
    return (if x = unk then Unknown else Int x)
  in
  let uint16 unk x =
    let x = x land 0xffff in
    return (if x = unk then Unknown else Int x)
  in
  let int32 ukn x =
    let x = Int32.to_int x in
    return (if x = ukn then Unknown else Int x)
  in
  let uint32 unk x =
    let x = Int32.to_int x land 0xffff_ffff in
    return (if x = unk then Unknown else Int x)
  in
  let value =
    match (arch, ty.Type.ty) with
    | __, Type.Bytes -> take ty.Type.size >>= fun x -> return (String x)
    | __, Type.String -> take ty.Type.size >>= fun x -> return (String x)
    | __, Type.Enum -> any_uint8 >>= fun x -> return (Enum x)
    | __, Type.Int (Signed, 8, FF) -> any_int8 >>= int ff
    | BE, Type.Int (Signed, 16, FF) -> BE.any_int16 >>= int ff
    | LE, Type.Int (Signed, 16, FF) -> LE.any_int16 >>= int ff
    | BE, Type.Int (Signed, 32, FF) -> BE.any_int32 >>= int32 ff
    | LE, Type.Int (Signed, 32, FF) -> LE.any_int32 >>= int32 ff
    | __, Type.Int (Unsigned, 8, ZZ) -> any_uint8 >>= uint8 0
    | LE, Type.Int (Unsigned, 16, ZZ) -> LE.any_uint16 >>= uint16 0
    | BE, Type.Int (Unsigned, 16, ZZ) -> BE.any_uint16 >>= uint16 0
    | LE, Type.Int (Unsigned, 32, ZZ) -> LE.any_int32 >>= uint32 0
    | BE, Type.Int (Unsigned, 32, ZZ) -> BE.any_int32 >>= uint32 0
    | __, Type.Int (Unsigned, 8, FF) -> any_uint8 >>= uint8 0xff
    | LE, Type.Int (Unsigned, 16, FF) -> LE.any_uint16 >>= uint16 0xffff
    | BE, Type.Int (Unsigned, 16, FF) -> BE.any_uint16 >>= uint16 0xffff
    | LE, Type.Int (Unsigned, 32, FF) -> LE.any_int32 >>= uint32 0xffff_ffff
    | BE, Type.Int (Unsigned, 32, FF) -> BE.any_int32 >>= uint32 0xffff_ffff
    | BE, Type.Float 32 -> BE.any_float >>= float
    | LE, Type.Float 32 -> LE.any_float >>= float
    | BE, Type.Float 64 -> BE.any_double >>= float
    | LE, Type.Float 64 -> LE.any_double >>= float
    | __, _ -> advance ty.Type.size *> return Unknown
  in

  let* before = pos in
  let* v = value in
  let* after = pos in
  let size = after - before in
  if size < ty.Type.size then
    (* This works around inconsistent files where the actual size does
       not match the expected size *)
    advance (ty.Type.size - size) >>= fun _ -> return v
  else return v

(** read a record (of type [ty]) of values. Each value in the record is read by
    [loop] which loops over the types of values we expect to find. Each record
    field is read by [base]. *)
let record arch ty =
  let cmp (x, _) (y, _) = Int.compare x y in
  let sort vs = List.sort cmp vs in
  let rec loop vs = function
    | [] -> return { msg = ty.Type.msg; fields = sort vs }
    | t :: ts -> base arch t >>= fun v -> loop ((t.Type.slot, v) :: vs) ts
  in
  loop [] ty.Type.fields >>= fun result ->
  (* skip over developer fields *)
  advance ty.Type.dev_fields *> return result

module File = struct
  let _dump dict =
    Dict.bindings dict
    |> List.rev_map (fun (k, v) -> (string_of_int k, Type.json v))
    |> fun x -> `Assoc x

  let header =
    any_int8 >>= function
    | (12 | 14) as size ->
        any_int8 >>= fun protocol ->
        LE.any_int16 >>= fun profile ->
        LE.any_int32 >>= fun length ->
        string ".FIT"
        *> (if size = 14 then advance 2 (* skip CRC *) else advance 0)
        *> return { protocol; profile; length = Int32.to_int length }
    | n -> fail "found unexpected header of size %d" n

  let block (dict, rs) =
    pos >>= fun p ->
    any_int8 >>= fun byte ->
    let key = byte & 0b0000_1111 in
    let tag = byte & 0b1111_0000 in
    match tag with
    | 0b0100_0000 ->
        (* Printf.eprintf "%06x tag=0x%x key=%d\n" p tag key; *)
        (* This is a block that defines a type - add it to the dict *)
        Type.record ~dev:false >>= fun d -> return (Dict.add key d dict, rs)
    | 0b0110_0000 ->
        (* Printf.eprintf "%06x tag=0x%x key=%d\n" p tag key; *)
        (* This is a block that defines a type - add it to the dict *)
        Type.record ~dev:true >>= fun d -> return (Dict.add key d dict, rs)
    | 0b0000_0000 -> (
        (* Printf.eprintf "%06x tag=0x%x key=%d\n" p tag key; *)
        (* This is a block holding values. Its shape is defined by the
           type it refers to and which we must have read earlier and
           should find in the dictionary *)
        match Dict.find_opt key dict with
        | Some ty ->
            let arch = ty.arch in
            record arch ty >>= fun r -> return (dict, r :: rs)
        | None ->
            pos >>= fun p ->
            fail "corrupted file? No type for key=%d offset=%d at %s" key p
              __LOC__)
    | _ when (tag & 0b1000_0000) <> 0 -> (
        (* Printf.eprintf "%06x tag=0x%x key=%d\n" p tag key; *)
        (* this is a compressed header for a value block that includes a
           timestamp. We ignore the timestamp and only read the other
           fields. *)
        let key = (tag & 0b0110_0000) >> 5 in
        match Dict.find_opt key dict with
        | Some ty ->
            let arch = ty.arch in
            record arch ty >>= fun r -> return (dict, r :: rs)
        | None ->
            pos >>= fun p ->
            fail "corrupted file? No type for key=%d offset=%d at %s" key p
              __LOC__)
    | n ->
        (* Printf.eprintf "%06x tag=0x%x key=%d\n" p tag key; *)
        (* _dump dict; *)
        fail "unexpected block with tag 0x%x at offset %d" n p

  let rec blocks xx finish =
    pos >>= fun p ->
    if p >= finish then return xx else block xx >>= fun xx -> blocks xx finish

  let read =
    let xx = (Dict.empty, []) in
    header >>= fun header ->
    pos >>= fun offset ->
    blocks xx (header.length + offset) >>= fun (_, records) ->
    return { header; records = List.rev records }
end

module MSG = struct
  (** Limited support for decoding records; the most common record is 20
      "record". *)

  let add map (k, v) = Dict.add k v map

  let msgs =
    [
      (0, "file_id")
    ; (1, "capabilities")
    ; (2, "device_settings")
    ; (3, "user_profile")
    ; (4, "hrm_profile")
    ; (5, "sdm_profile")
    ; (6, "bike_profile")
    ; (7, "zones_target")
    ; (8, "hr_zone")
    ; (9, "power_zone")
    ; (10, "met_zone")
    ; (12, "sport")
    ; (15, "goal")
    ; (18, "session")
    ; (19, "lap")
    ; (20, "record")
    ; (21, "event")
    ; (23, "device_info")
    ; (26, "workout")
    ; (27, "workout_step")
    ; (30, "weight_scale")
    ; (31, "course")
    ; (32, "course_point")
    ; (33, "totals")
    ; (34, "activity")
    ; (35, "software")
    ; (37, "file_capabilities")
    ; (38, "mesg_capabilities")
    ; (39, "field_capabilities")
    ; (49, "file_creator")
    ; (51, "blood_pressure")
    ]
    |> List.fold_left add Dict.empty

  let lookup key =
    match Dict.find_opt key msgs with
    | Some name -> name
    | None -> string_of_int key
end

module Decode = struct
  (* Values are scaled and need to be decoded. The scaling is depends on
     the record and slot of a value and what scaling is used by what
     slot is defined in the protocol and can't be deduced from the FIT
     file alone *)

  let timestamp v =
    let offset = 631065600.0 in
    match v with
    | Int n -> Int.to_float n +. offset
    | v -> failwith "%s: unexpected value: %s" __LOC__ (to_string v)

  let scale scale offset v =
    let scale = Float.of_int scale in
    let offset = Float.of_int offset in
    match v with
    | Int x -> (Float.of_int x /. scale) -. offset
    | Float x -> (x /. scale) -. offset
    | v -> failwith "%s: unexpected value: %s" __LOC__ (to_string v)

  let latlon = function
    | Int x -> Int.to_float x *. 180.0 /. 2147483648.0
    | v -> failwith "%s: unexpected value: %s" __LOC__ (to_string v)
end

module JSON = struct
  let timestamp v =
    let offset = 631065600.0 in
    match v with
    | Int n ->
        let ts = Int.to_float n +. offset in
        let pt = Ptime.of_float_s ts in
        Option.fold pt ~none:`Null ~some:(fun x -> `String (Ptime.to_rfc3339 x))
    | _ -> `Null

  let scale scale offset v =
    try `Float (Decode.scale scale offset v) with _ -> `Null

  let latlon v = try `Float (Decode.latlon v) with _ -> `Null

  (* For the most common record "20", here is the meaning of slots and
     how to decode the associated value *)
  let value msg pos v =
    match (msg, pos, v) with
    | 20, 0, v -> ("latitude", latlon v)
    | 20, 1, v -> ("longitude", latlon v)
    | 20, 2, v -> ("altitude", scale 5 500 v)
    | 20, 3, v -> ("heartrate", scale 1 0 v)
    | 20, 4, v -> ("cadence", scale 1 0 v)
    | 20, 5, v -> ("distance", scale 100 0 v)
    | 20, 6, v -> ("speed", scale 1000 0 v)
    | 20, 7, v -> ("power", scale 1 0 v)
    | 20, 73, v -> ("enhanced_speed", scale 1000 0 v)
    | 20, 13, v -> ("temperature", scale 1 0 v)
    | 20, 12, v -> ("cycle_length", scale 100 0 v)
    | 20, 19, v -> ("total_cycles", scale 1 0 v)
    | _, 253, v -> ("timestamp", timestamp v)
    | _, _, Enum n -> (string_of_int pos, `Int n)
    | _, _, String s -> (string_of_int pos, `String s)
    | _, _, Int i -> (string_of_int pos, `Int i)
    | _, _, Float f when Float.is_nan f -> (string_of_int pos, `Null)
    | _, _, Float f -> (string_of_int pos, `Float f)
    | _, _, Unknown -> (string_of_int pos, `Null)

  let field msg (pos, v) = value msg pos v

  let record r =
    `Assoc
      (("msg", `String (MSG.lookup r.msg)) :: List.map (field r.msg) r.fields)
end

module Record = struct
  (** The messages with tag 20 (called "record") are at the heart of all FIT
      files as they contain the measurements. These records may contain
      different values and their presence cannot be expected. This module
      provides a representation for such records but covers only the most common
      values and is not comprehensive *)

  type t = {
      latitude : float option
    ; longitude : float option
    ; timestamp : float option
    ; altitude : float option
    ; heartrate : float option
    ; cadence : float option
    ; power : float option
    ; speed : float option
    ; distance : float option
    ; temperature : float option
    ; cycle_length : float option
  }

  (** if decoding fails, we record the field as not present *)
  let get slot fields decoder =
    List.assoc_opt slot fields |> function
    | Some x -> ( try Some (decoder x) with _ -> None)
    | None -> None

  let record = function
    | { msg = 20; fields } -> (
        try
          Some
            {
              latitude = get 0 fields Decode.latlon
            ; longitude = get 1 fields Decode.latlon
            ; timestamp = get 253 fields Decode.timestamp
            ; altitude = get 2 fields (Decode.scale 5 500)
            ; heartrate = get 3 fields (Decode.scale 1 0)
            ; cadence = get 4 fields (Decode.scale 1 0)
            ; distance = get 5 fields (Decode.scale 100 0)
            ; power = get 7 fields (Decode.scale 1 0)
            ; temperature = get 13 fields (Decode.scale 1 0)
            ; speed = get 6 fields (Decode.scale 1000 0)
            ; cycle_length = get 12 fields (Decode.scale 100 0)
            }
        with _ -> None)
    | _ -> None
end

let to_json fit = `List (List.map JSON.record fit.records)
let records fit = List.filter_map Record.record fit.records

let header str =
  let consume = Consume.Prefix in
  parse_string ~consume File.header str

(** parse a string as a FIT file *)
let parse str =
  let consume = Consume.Prefix in
  parse_string ~consume File.read str

let read_file ~max_size path =
  let ic = open_in path in
  defer (fun () -> close_in ic) @@ fun () ->
  let size = in_channel_length ic in
  if size > max_size then failwith "input file %s exceeds maximum size" path
  else really_input_string ic size

let read ?(max_size = 100 * 1024) path =
  let error fmt = Printf.ksprintf (fun str -> Error str) fmt in
  try read_file ~max_size path |> parse
  with e -> error "Can't process %s: %s" path (Printexc.to_string e)