1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
module Rdkit : sig
type t
val of_pyobject : Pytypes.pyobject -> t
val to_pyobject : t -> Pytypes.pyobject
val __init__ : smi:string -> unit -> t
val type_atom : t -> i:int -> unit -> int array
val get_num_atoms : t -> unit -> int
val get_distance : t -> i:int -> j:int -> unit -> int
end = struct
let filter_opt l = List.filter_map Fun.id l
let py_module =
lazy
(let source =
{pyml_bindgen_string_literal|
import rdkit
from rdkit import Chem
def nb_heavy_atom_neighbors(a):
res = 0
for neighb in a.GetNeighbors():
if neighb.GetAtomicNum() > 1:
res += 1
return res
# return (#HA, #H)
def count_neighbors(a):
nb_heavy = nb_heavy_atom_neighbors(a)
nb_H = a.GetTotalNumHs()
return (nb_heavy, nb_H)
class Rdkit:
# this is needed because the OCaml side want to know how
# to get an object of type t
def __init__(self, smi):
self.mol = Chem.MolFromSmiles(smi)
self.mat = Chem.GetDistanceMatrix(self.mol)
# (atomic_num, #HA, #H, valence - #H, formal_charge)
def type_atom(self, i):
a = self.mol.GetAtomWithIdx(i)
anum = a.GetAtomicNum()
# do this on the ocaml side, since we get anum
# assert(anum > 1) # we want to consider only heavy atoms
nb_HA, nb_H = count_neighbors(a)
valence = a.GetTotalValence()
HA_used_val = valence - nb_H
formal_charge = a.GetFormalCharge()
return [anum, nb_HA, nb_H, HA_used_val, formal_charge]
def get_num_atoms(self):
return self.mol.GetNumAtoms()
# get the distance (in bonds) between a pair of atoms
def get_distance(self, i, j):
return int(self.mat[i][j])
|pyml_bindgen_string_literal}
in
let filename =
{pyml_bindgen_string_literal|rdkit_wrapper.py|pyml_bindgen_string_literal}
in
let bytecode = Py.compile ~filename ~source `Exec in
Py.Import.exec_code_module
{pyml_bindgen_string_literal|rdkit_wrapper|pyml_bindgen_string_literal}
bytecode)
let import_module () = Lazy.force py_module
type t = Pytypes.pyobject
let of_pyobject pyo = pyo
let to_pyobject x = x
let __init__ ~smi () =
let callable = Py.Module.get (import_module ()) "Rdkit" in
let kwargs = filter_opt [ Some ("smi", Py.String.of_string smi) ] in
of_pyobject @@ Py.Callable.to_function_with_keywords callable [||] kwargs
let type_atom t ~i () =
let callable = Py.Object.find_attr_string t "type_atom" in
let kwargs = filter_opt [ Some ("i", Py.Int.of_int i) ] in
Py.List.to_array_map Py.Int.to_int
@@ Py.Callable.to_function_with_keywords callable [||] kwargs
let get_num_atoms t () =
let callable = Py.Object.find_attr_string t "get_num_atoms" in
let kwargs = filter_opt [] in
Py.Int.to_int @@ Py.Callable.to_function_with_keywords callable [||] kwargs
let get_distance t ~i ~j () =
let callable = Py.Object.find_attr_string t "get_distance" in
let kwargs =
filter_opt [ Some ("i", Py.Int.of_int i); Some ("j", Py.Int.of_int j) ]
in
Py.Int.to_int @@ Py.Callable.to_function_with_keywords callable [||] kwargs
end