Source file my1.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
(*********************************************************************************)
(*                OCaml-RDF                                                      *)
(*                                                                               *)
(*    Copyright (C) 2012-2021 Institut National de Recherche en Informatique     *)
(*    et en Automatique. All rights reserved.                                    *)
(*                                                                               *)
(*    This program is free software; you can redistribute it and/or modify       *)
(*    it under the terms of the GNU Lesser General Public License version        *)
(*    3 as published by the Free Software Foundation.                            *)
(*                                                                               *)
(*    This program is distributed in the hope that it will be useful,            *)
(*    but WITHOUT ANY WARRANTY; without even the implied warranty of             *)
(*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *)
(*    GNU General Public License for more details.                               *)
(*                                                                               *)
(*    You should have received a copy of the GNU General Public License          *)
(*    along with this program; if not, write to the Free Software                *)
(*    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA                   *)
(*    02111-1307  USA                                                            *)
(*                                                                               *)
(*    Contact: Maxence.Guesdon@inria.fr                                          *)
(*                                                                               *)
(*********************************************************************************)

(** *)

open Rdf;;
open Term;;
open My;;

let table_options engine =
  " ENGINE="^engine^
    (if String.lowercase_ascii engine = "myisam" then " DELAY_KEY_WRITE=1" else "")^
    " MAX_ROWS=100000000 DEFAULT CHARSET=UTF8"
;;

let creation_queries =
  [
    "CREATE TABLE IF NOT EXISTS graphs (id integer AUTO_INCREMENT PRIMARY KEY NOT NULL, name text NOT NULL)" ;
    "CREATE TABLE IF NOT EXISTS bnodes (id bigint PRIMARY KEY NOT NULL, value text NOT NULL) AVG_ROW_LENGTH=33" ;
    "CREATE TABLE IF NOT EXISTS resources (id bigint PRIMARY KEY NOT NULL, value text NOT NULL) AVG_ROW_LENGTH=80";
    "CREATE TABLE IF NOT EXISTS literals (id bigint PRIMARY KEY NOT NULL, value longtext NOT NULL,
                                          language text, datatype text)  AVG_ROW_LENGTH=50" ;
  ]
;;

let prepared_term_of_hash = "term_of_hash";;

let hash_of_term dbd ?(add=false) term =
  let hash = Term.term_hash term in
  if add then
    begin
      let test_query =
        "SELECT COUNT(*) FROM " ^
          (match term with
             Iri _ -> "resources"
           | Literal _ -> "literals"
           | Blank_ _ | Blank -> "bnodes"
          ) ^
          " WHERE id=" ^
          (Int64.to_string hash)
      in
      match Mysql.fetch (exec_query dbd test_query) with
        Some [| Some s |] when int_of_string s = 0 ->
          let pre_query =
            match term with
              Iri iri ->
                "resources (id, value) values ("^
                  (Int64.to_string hash) ^
                  ", " ^ (quote_str (Iri.to_string iri)) ^ ")"
            | Literal lit ->
                "literals (id, value, language, datatype) values (" ^
                  (Int64.to_string hash) ^ ", " ^
                  (quote_str lit.lit_value) ^ ", " ^
                  (quote_str (Misc.string_of_opt lit.lit_language)) ^ ", " ^
                  (quote_str (Misc.string_of_opt (Misc.map_opt Iri.to_string lit.lit_type))) ^ ")"
            | Blank_ id ->
                "bnodes (id, value) values (" ^
                (Int64.to_string hash) ^ ", " ^
                  (quote_str (Term.string_of_blank_id id)) ^ ")"
            | Blank -> assert false
          in
          let query = "INSERT INTO " ^ pre_query (* ON DUPLICATE KEY UPDATE value=value*) in
          ignore(exec_query dbd query)
      | _ -> ()
    end;
  hash
;;

let init_db db engine =
  let table_options = table_options engine in
  let queries = List.map (fun q -> q^table_options) creation_queries in
  My.init_db db queries
;;

let to_iri = function
  Term.Iri iri -> iri
| t -> failwith ("Not a IRI:"^(Term.string_of_term t))
;;

let init_graph dbd engine name =
  let table = My.graph_table_of_graph_name dbd name in
  if not (My.table_exists dbd table) then
    begin
      let query =
        "CREATE TABLE IF NOT EXISTS "^table^" (\
         subject bigint NOT NULL, predicate bigint NOT NULL, \
         object bigint NOT NULL,\
         KEY SubjectPredicate (subject,predicate),\
         KEY PredicateObject (predicate,object),\
         KEY SubjectObject (subject,object)\
        ) "^(table_options engine)^" AVG_ROW_LENGTH=59"
      in
      ignore(My.exec_query dbd query);
(*
      let query = Printf.sprintf
        "ALTER TABLE %s ADD UNIQUE INDEX (subject, predicate, object)" table
      in
      ignore(exec_query dbd query)
*)
    end;
  let more = [
      prepared_term_of_hash,
      "SELECT NULL, value, NULL, NULL, NULL FROM resources where id=? LIMIT 1 UNION ALL \
      SELECT NULL, NULL, value, language, datatype FROM literals where id=? LIMIT 1 UNION ALL \
      SELECT value, NULL, NULL, NULL, NULL FROM bnodes where id=? LIMIT 1" ;
    ]
  in
  My.create_namespaces_table dbd table ;
  My.prepare_queries dbd ~more table;
  table
;;

let term_of_hash dbd hash =
  let s_hash = Int64.to_string hash in
  let res = My.exec_prepared dbd prepared_term_of_hash [ s_hash ; s_hash ; s_hash ] in
  let size = Mysql.size res in
  match Int64.compare size Int64.one with
    n when n > 0 ->
      let msg = "No term with hash \"" ^(Int64.to_string hash)^ "\"" in
      raise (Error msg)
  | 0 ->
      begin
        match Mysql.fetch res with
          None -> assert false (* already tested: there is at least one row *)
        | Some t ->
            match t with
              [| Some name ; None ; None ; None ; None |] ->
                Blank_ (Term.blank_id_of_string name)
            | [| None ; Some iri ; None ; None ; None |] ->
                Term.term_of_iri_string iri
            | [| None ; None ; Some value ; lang ; typ |] ->
                let typ = Misc.map_opt
                  Iri.of_string
                  (Misc.opt_of_string (Misc.string_of_opt typ))
                in
                Term.term_of_literal_string
                ?lang: (Misc.opt_of_string (Misc.string_of_opt lang))
                ?typ value
            | _ ->
                let msg = "Bad result for term with hash \"" ^ (Int64.to_string hash) ^"\"" in
                raise (Error msg)
      end
  | _ ->
      let msg = "More than one term found with hash \"" ^ (Int64.to_string hash) ^ "\"" in
      raise (Error msg)
;;

let query_hash_list g stmt params =
  let res = My.exec_prepared g.g_dbd stmt params in
  let f = function
  | [| Some hash |] -> Mysql.int642ml hash
  | _ -> raise (Error "Invalid result: NULL hash or too many fields")
  in
  Mysql.map res ~f
;;

let query_term_list g stmt params =
  let res = My.exec_prepared g.g_dbd stmt params in
  let f = function
  | [| Some hash |] ->
      term_of_hash g.g_dbd (Mysql.int642ml hash)
  | _ -> raise (Error "Invalid result: NULL hash or too many fields")
  in
  Mysql.map res ~f
;;

let query_hash_triple_list g where_clause =
  let query =
    "SELECT subject, predicate, object FROM "^g.g_table^" where " ^ where_clause (* removed DISTINCT *)
  in
  let res = exec_query g.g_dbd query in
  let f = function
  | [| Some sub ; Some pred ; Some obj |] ->
      (Mysql.int642ml sub,
       Mysql.int642ml pred,
       Mysql.int642ml obj
      )
  | _ -> raise (Error "Invalid result: NULL hash(es) or bad number of fields")
  in
  Mysql.map res ~f
;;

let query_triple_list g where_clause =
  let query =
    "SELECT subject, predicate, object FROM "^g.g_table^" where " ^ where_clause (* removed DISTINCT *)
  in
  let res = exec_query g.g_dbd query in
  let f = function
  | [| Some sub ; Some pred ; Some obj |] ->
      (term_of_hash g.g_dbd (Mysql.int642ml sub),
       to_iri (term_of_hash g.g_dbd (Mysql.int642ml pred)),
       term_of_hash g.g_dbd (Mysql.int642ml obj)
      )
  | _ -> raise (Error "Invalid result: NULL hash(es) or bad number of fields")
  in
  Mysql.map res ~f
;;

let open_graph ?(options=[]) name =
  let db = db_of_options options in
  let engine =
    try
      match List.assoc "engine" options with
        "" -> raise Not_found
      | s -> s
    with Not_found -> "InnoDB"
  in
  let engine = String.uppercase_ascii engine in
  let dbd = init_db db engine in
  let table_name = init_graph dbd engine name in
  { g_name = name ;
    g_table = table_name ;
    g_dbd = dbd ;
    g_in_transaction = false ;
    g_transactions = engine = "INNODB" ;
  }
;;

let add_triple g ~sub ~pred ~obj =
  let sub = hash_of_term g.g_dbd ~add:true sub in
  let pred = hash_of_term g.g_dbd ~add:true (Term.Iri pred) in
  let obj = hash_of_term g.g_dbd ~add:true obj in
  let params = [ Int64.to_string sub ; Int64.to_string pred ; Int64.to_string obj] in
  (* do not insert if already present *)
  let res = My.exec_prepared g.g_dbd My.prepared_count_triples params in
  match Mysql.fetch res with
    Some [| Some s |] when int_of_string s = 0 ->
      ignore(My.exec_prepared g.g_dbd My.prepared_insert_triple params)
  | _ -> ()
;;

let rem_triple g ~sub ~pred ~obj =
  let sub = hash_of_term g.g_dbd ~add:false sub in
  let pred = hash_of_term g.g_dbd ~add:false (Term.Iri pred) in
  let obj = hash_of_term g.g_dbd ~add:false obj in
  ignore(My.exec_prepared g.g_dbd
   My.prepared_delete_triple
   [ Int64.to_string sub; Int64.to_string pred; Int64.to_string obj]
  )
;;

let subjects_of g ~pred ~obj =
  query_term_list g My.prepared_subjects_of
  [ Int64.to_string (hash_of_term g.g_dbd (Term.Iri pred)) ;
    Int64.to_string (hash_of_term g.g_dbd obj) ]
;;

let predicates_of g ~sub ~obj =
  List.map to_iri
    (query_term_list g My.prepared_predicates_of
     [ Int64.to_string (hash_of_term g.g_dbd sub) ;
       Int64.to_string (hash_of_term g.g_dbd obj) ]
    )
;;

let objects_of g ~sub ~pred =
  query_term_list g My.prepared_objects_of
  [ Int64.to_string (hash_of_term g.g_dbd sub) ;
    Int64.to_string (hash_of_term g.g_dbd (Term.Iri pred)) ]
;;

let mk_where_clause ?sub ?pred ?obj g =
  let mk_cond field = function
    None -> []
  | Some term ->
      [field ^"="^(Int64.to_string (hash_of_term g.g_dbd term))]
  in
  match sub, pred, obj with
    None, None, None -> "TRUE"
  | _ ->
      let pred_cond =
        match pred with
          None -> []
        | Some p -> ["predicate="^(Int64.to_string (hash_of_term g.g_dbd (Term.Iri p)))]
      in
      let l =
        (mk_cond "subject" sub) @
        pred_cond @
        (mk_cond "object" obj)
      in
      String.concat " AND " l
;;

let mk_hash_where_clause ?sub ?pred ?obj g =
  let mk_cond field = function
    None -> []
  | Some term ->
      [field ^"="^(Int64.to_string term)]
  in
  match sub, pred, obj with
    None, None, None -> "TRUE"
  | _ ->
      let l =
        (mk_cond "subject" sub) @
        (mk_cond "predicate" pred) @
        (mk_cond "object" obj)
      in
      String.concat " AND " l
;;

let find ?sub ?pred ?obj g =
  let clause = mk_where_clause ?sub ?pred ?obj g in
  query_triple_list g clause
;;

let exists = My.exists mk_where_clause;;

let subjects g = query_term_list g My.prepared_subject [];;
let predicates g = List.map to_iri (query_term_list g My.prepared_predicate []);;
let objects g = query_term_list g My.prepared_object [];;

module MyBGP =
  struct
    type term = Int64.t
    type g = t
    let term g t = hash_of_term g.g_dbd ~add: false t
    let rdfterm g t = term_of_hash g.g_dbd t
    let compare _ = Int64.compare
    let subjects g = query_hash_list g My.prepared_subject []
    let objects g = query_hash_list g My.prepared_object []
    let find ?sub ?pred ?obj g =
       let clause = mk_hash_where_clause ?sub ?pred ?obj g in
       query_hash_triple_list g clause
  end

module Mysql =
  struct
    let name = "mysql"
    type g = t
    type error = string
    exception Error = Error
    let string_of_error = My.string_of_error

    let graph_name g = g.g_name
    let graph_size g = My.graph_size g

    let open_graph = open_graph

    let add_triple = add_triple
    let rem_triple = rem_triple

    let add_triple_t g (sub, pred, obj) = add_triple g ~sub ~pred ~obj
    let rem_triple_t g (sub, pred, obj) = rem_triple g ~sub ~pred ~obj

    let subjects_of = subjects_of
    let predicates_of = predicates_of
    let objects_of = objects_of

    let find = find
    let exists = exists
    let exists_t (sub, pred, obj) g = exists ~sub ~pred ~obj g

    let subjects = subjects
    let predicates = predicates
    let objects = objects
    let folder _ = None

    let transaction_start = My.transaction_start
    let transaction_commit = My.transaction_commit
    let transaction_rollback = My.transaction_rollback

    let new_blank_id = My.new_blank_id

    let namespaces = My.namespaces
    let add_namespace = My.add_namespace
    let rem_namespace = My.rem_namespace
    let set_namespaces = My.set_namespaces

    module BGP = MyBGP
  end;;

Graph.add_storage (module Mysql : Graph.Storage);;

(*
let output_times file map =
  let oc = open_out file in
  let total = SMap.fold
  (fun q (t,cpt) acc ->
     Printf.fprintf oc "%f;%f;%d;%s\n"
     (t /. (float cpt)) t cpt q;
     t +. acc
    )
    map 0.
  in
  Printf.fprintf oc "Total=%f\n" total;
  close_out oc
;;

let _ = Stdlib.at_exit
  (fun () ->
     output_times "rdf_my_prep_times.log" !prep_times;
     output_times "rdf_my_q_times.log" !q_times
  )
;;
*)