Source file profile_matrix.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
open Gg
open Core_kernel
open Misc
open Biotk_croquis
module type S = sig
type t = private float array array
val of_array : float array array -> t option
val flat : int -> t
val length : t -> int
val composition : t -> float array
val draw : t -> Croquis.Picture.t
val entropy : t -> float array
end
module type Alphabet = sig
type t
val all : t list
val card : int
val to_char : t -> char
val of_char : char -> t option
val of_char_exn : char -> t
val to_int : t -> int
end
module Make(A : Alphabet) = struct
type t = float array array
let length = Array.length
let flat n =
let eps = 1. /. float A.card in
Array.init n ~f:(fun _ ->
Array.create ~len:A.card eps
)
let of_array = function
| [||] -> None
| xs ->
if Array.for_all xs ~f:(fun x -> Array.length x = A.card)
then Some xs
else None
let composition profile =
let weights = Array.init A.card ~f:(fun j ->
sum (Array.length profile) ~f:(fun i -> profile.(i).(j))
) in
let total = Array.fold weights ~init:0. ~f:( +. ) in
Array.map weights ~f:(fun w -> w /. total)
let xlnx x =
if Float.(x = 0.) then 0. else x *. Float.log x /. Float.log 2.
let column_entropy p =
-. sum (Array.length p) ~f:(fun j -> xlnx p.(j))
let entropy = Array.map ~f:column_entropy
let max_entropy = Float.log (float A.card) /. Float.log 2.
let draw_y_scale () =
let open Croquis.Picture in
let text x y msg =
text ~x ~y msg ~font:Croquis.Font.dejavu_sans_mono_bold ~size:0.1
in
blend [
path [ 0., 0. ; 0., max_entropy ] ;
path [ -. 0.1, 0. ; 0., 0. ] ;
path [ -. 0.1, max_entropy /. 2. ; 0., max_entropy /. 2. ] ;
path [ -. 0.1, max_entropy ; 0., max_entropy ] ;
text (-. 0.2) 0. "0" ;
text (-. 0.2) (max_entropy /. 2.) (Float.to_string (max_entropy /. 2.)) ;
text (-. 0.2) max_entropy (Float.to_string max_entropy) ;
]
let draw t =
let open Croquis in
let open Picture in
let font = Font.dejavu_sans_mono_bold in
let letter =
List.map A.all ~f:(fun c ->
sprintf "%c" (Char.uppercase (A.to_char c))
)
|> Array.of_list
in
let color =
if A.card = 4 then
Color.[| red ; blue ; v_srgbi 0xFF 0xB3 0 ; v_srgbi 0 0x80 0 |]
else
Array.init A.card ~f:(fun i -> Color.gray (float i *. 127. /. float A.card))
in
let draw_letter ~x ~col ~sy l =
if Core_kernel.Float.(sy < 1e-6) then None
else
Some (
text ~valign:`base ~font l ~col ~x ~y:0.
|> scale ~center:`origin ~sy
)
in
let draw_col p_i =
let entropy = column_entropy p_i in
List.init A.card ~f:(fun j ->
draw_letter letter.(j) ~col:color.(j) ~x:0. ~sy:p_i.(j)
)
|> List.filter_opt
|> vstack ~align:`centered
|> reshape ~bbox:(Box2.v (V2.v 0. 0.) (V2.v 1. (max_entropy -. entropy)))
in
Array.map t ~f:draw_col
|> Array.to_list
|> List.cons (draw_y_scale ())
|> hstack ~align:`bottom
end
module DNA = struct
include Make(Nucleotide)
let reverse_complement mat =
let open Nucleotide in
let n = Array.length mat in
let f i c = mat.(n - i - 1).(to_int c) in
Array.init n ~f:(fun i ->
Nucleotide.[| f i t ; f i g ; f i c ; f i a |]
)
end