Source file fpMol.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
(* Copyright (C) 2020, Francois Berenger

   Yamanishi laboratory,
   Department of Bioscience and Bioinformatics,
   Faculty of Computer Science and Systems Engineering,
   Kyushu Institute of Technology,
   680-4 Kawazu, Iizuka, Fukuoka, 820-8502, Japan. *)

(* A fingerprint-encoded molecule *)

module A = Array
module Fp = Fingerprint
module Ht = Hashtbl
module L = MyList

type t = { name: string;
           index: int; (* position in input file *)
           fp: Fp.t }

let create name index bitstring =
  { name; index; fp = Fp.of_string bitstring }

(* read one molecule from an FP file *)
let read_one_mol line =
  try Scanf.sscanf line "%s@,%f,%s"
        (fun name _ic50 bitstring ->
           (name, bitstring)
        )
  with Scanf.Scan_failure msg ->
    failwith ("FpMol.read_one_mol: fmt: %s@,%f,%s err: " ^ msg ^
              " line: " ^ line)

let parse_one index line =
  let name, bitstring = read_one_mol line in
  create name index bitstring

let molecules_of_file fn =
  Utls.mapi_on_lines_of_file fn parse_one

let dist m1 m2 =
  Fp.distance m1.fp m2.fp

let tani m1 m2 =
  Fp.tanimoto m1.fp m2.fp

let get_name x =
  x.name

let get_index x =
  x.index

let get_fp x =
  x.fp

let nb_features x =
  Fp.nb_features x.fp

let mol_is_active line =
  BatString.starts_with line "active"

let is_active x =
  mol_is_active x.name

let drop_features to_drop x =
  { x with fp = Fp.drop_features to_drop x.fp }