Source file deserialize.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
(*********************************************************************************)
(*                OCaml-RDF                                                      *)
(*                                                                               *)
(*    Copyright (C) 2012-2024 Institut National de Recherche en Informatique     *)
(*    et en Automatique. All rights reserved.                                    *)
(*                                                                               *)
(*    This program is free software; you can redistribute it and/or modify       *)
(*    it under the terms of the GNU Lesser General Public License version        *)
(*    3 as published by the Free Software Foundation.                            *)
(*                                                                               *)
(*    This program is distributed in the hope that it will be useful,            *)
(*    but WITHOUT ANY WARRANTY; without even the implied warranty of             *)
(*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *)
(*    GNU General Public License for more details.                               *)
(*                                                                               *)
(*    You should have received a copy of the GNU General Public License          *)
(*    along with this program; if not, write to the Free Software                *)
(*    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA                   *)
(*    02111-1307  USA                                                            *)
(*                                                                               *)
(*    Contact: Maxence.Guesdon@inria.fr                                          *)
(*                                                                               *)
(*********************************************************************************)

(** Deserialization from JSON-LD. *)

module L = Log
open Rdf
module Log = L
open J
open T

let max_xsd_int = Float.pow 10. 21.

exception Null

let canonize_double_string str =
  let p =
    match String.index_from_opt str 0 'E' with
    | None -> String.index_from_opt str 0 'e'
    | x -> x
  in
  match p with
  | None -> str
  | Some p ->
      if p = 0 then
        str
      else
        (
         let len = String.length str in
         let exp =
           let p = p + 1 in
           if p >= len then ""
           else
             (
              let p = ref (if String.get str p = '+' then p + 1 else p) in
              while !p < len && String.get str !p = '0' do incr p done ;
              String.sub str !p (len - !p)
             )
         in
         let stop = ref (p - 1) in
         while !stop >= 0 && String.get str !stop = '0' do decr stop done ;
         let n = String.sub str 0 (!stop + 1) in
         Printf.sprintf "%sE%s" n exp
        )

let rec datatype_of_json json =
  match json.data with
  | `String "@json" -> Some Rdf.Rdf_.json
  | `String str ->
      (match iri_of_string str with
       | exception _ -> None
       | iri when Iri.is_relative iri -> None
       | iri -> Some iri
      )
  | `List (json::_) -> datatype_of_json json
  | _ -> None

(* https://www.w3.org/TR/json-ld11-api/#list-to-rdf-conversion *)
let rec list_to_rdf options g json =
  Log.debug (fun m -> m "list_to_rdf: json=%s"
     (J.to_string ~minify:true json));
  match json.data with
  | `List [] -> (* 1) *) Rdf.(Term.Iri Rdf_.nil, [])
  | `List l -> (* 2) to 4) *)
      (
       let f item (rest, embedded_triples) =
         let o = object_to_rdf options g item in
         (* apply strictly the sec algo but in case o is None, we end with
            a list item with no rdf:first... *)
             let bn = Rdf.Term.blank_ (g.Rdf.Graph.new_blank_id()) in
         let embedded_triples =
           (bn, Rdf.Rdf_.rest, rest) :: embedded_triples
         in
         let embedded_triples =
           match o with
           | None -> embedded_triples
           | Some (term, triples) ->
               Log.debug (fun m -> m "list_to_rdf: term=%s"
                  (Rdf.Term.string_of_term term));
               (bn, Rdf.Rdf_.first, term) :: triples @ embedded_triples
         in
         (bn, embedded_triples)
       in
       List.fold_right f l Rdf.(Term.Iri Rdf_.nil, [])
      )
  | _ -> Rdf.(Term.Iri Rdf_.nil, [])

(* https://www.w3.org/TR/json-ld11-api/#object-to-rdf-conversion *)
and object_to_rdf options g item =
  Log.debug (fun m -> m "object_to_rdf item=%s" (J.to_string ~minify:true item));
  match is_node_object item with
  | true -> (* 1) and 2) *)
      (match J.(item -?> "@id") with
       | None -> assert false
       | Some v ->
           match v.data with
           | `String str when str_is_blank_node_id str ->
               (* remove "_:" *)
               let str = String.sub str 2 (String.length str - 2) in
               Some ((Rdf.Term.blank str), [])
           | `String str ->
               (match iri_of_string str with
                | exception _ -> None
                | iri when Iri.is_relative iri -> None
                | iri -> Some (Rdf.Term.Iri (Iri.normalize iri), []))
           | _ -> None
      )
  | false ->
      match is_list_object item with
      | true -> (* 3) *)
          (match J.(item -?> "@list") with
           | None -> assert false
           | Some v -> Some (list_to_rdf options g v)
          )
      | false ->
          try
            (* 7) *)
            let language = match J.(item -?> "@language") with
              | None -> None
              | Some { data = `String s} ->
                      if not (Rdf.Lang.is_valid_language_tag s) then
                        raise Null
                      else
                        Some s
              | _ -> raise Null
            in
            (* 4) *)
            let value, dt =
              match J.(item -?> "@value") with
              | None -> raise Null
              | Some value ->
                  (* 5) *)
                  let datatype =
                    match Option.map datatype_of_json J.(item -?> "@type") with
                    | Some None -> (* 6) *) raise Null
                    | None -> None
                    | Some (Some iri) -> Some iri
                  in
                  (* 8) *)
                  match datatype with
                  | Some iri when Iri.equal iri Rdf.Rdf_.json ->
                      J.string (J.to_string value), iri
                  | _ ->
                      match value.data with
                      | `Bool b -> (* 9) *)
                          let str = if b then "true" else "false" in
                          let dt =
                            match datatype with
                            | Some iri -> iri
                            | None -> Rdf.Rdf_.xsd_boolean
                          in
                          (J.string str, dt)
                      | `Float f -> (* 10) *)
                          if not (Float.is_integer f) || f >= max_xsd_int ||
                            Option.compare Iri.compare datatype (Some Rdf.Rdf_.xsd_double) = 0
                          then
                            let dt = match datatype with
                              | None -> Rdf.Rdf_.xsd_double
                              | Some dt -> dt
                            in
                            let str = Printf.sprintf "%E" f in
                            let str = if Iri.equal dt Rdf.Rdf_.xsd_double
                               then canonize_double_string str
                               else str
                            in
                            (J.string str, dt)
                          else (* 11) *)
                            let v = string_of_int (truncate f) in
                            let dt = match datatype with
                              | None -> Rdf.Rdf_.xsd_integer
                              | Some dt -> dt
                            in
                            (J.string v, dt)
                      | _ -> (* 12) *)
                          match datatype with
                          | None when language <> None -> value, Rdf.Rdf_.dt_langString
                          | None -> value, Rdf.Rdf_.xsd_string
                          | Some dt -> value, dt
            in
            let value_str = match value.data with
              | `String str -> str
              | _ ->
                  Log.warn (fun m -> m "object to rdf: invalid value %s" (J.to_string value));
                  raise Null
            in
            let (lit, triples) = match J.(item -?> "@direction"), options.rdf_direction with
              | Some { data = `String dir }, Some rdf_dir -> (* 13) *)
                  (* 13.1) *)
                  let language = match language with
                    | None -> ""
                    | Some str -> Rdf.Utf8.utf8_lowercase str
                  in
                  (match rdf_dir with
                   | I18n_datatype -> (* 13.2) *)
                       let dt = Printf.sprintf "%s%s_%s" T.i18n_ns language dir in
                       let typ = iri_of_string dt in
                       let lit = Rdf.Term.term_of_literal_string ~typ value_str in
                       lit, []
                   | Compound_literal -> (* 13.3) *)
                       (* 13.3.1) great spec putting a blank term in a variable called "literal"... *)
                       let bn = Rdf.Term.blank_ (g.Rdf.Graph.new_blank_id ()) in
                       (* 13.3.2) *)
                       let triple =(bn, Rdf.Rdf_.value, Rdf.Term.term_of_literal_string value_str) in
                       (* 13.3.3) *)
                       let triples = triple ::
                         (match J.(item -?> "@language") with
                          | None -> []
                          | _ -> [ bn, Rdf.Rdf_.language, Rdf.Term.term_of_literal_string language])
                       in
                       (* 13.3.4) *)
                       let triples =
                         (bn, Rdf.Rdf_.direction, Rdf.Term.term_of_literal_string dir) ::
                           triples
                       in
                       (bn, triples)
                  )
              | _ -> (* 14) *)
                  let lit = Rdf.Term.term_of_literal_string ~typ:dt ?lang:language value_str in
                  lit, []
            in
            Some (lit, triples)
          with
            Null -> None
              (*
                 let term_of_json json =
                 match json.data with
                 | `Null -> None
                 | `Bool b -> Some (Rdf.Term.term_of_bool b)
                 | `Float f -> Some (Rdf.Term.term_of_double f)
                 | `List _ -> None
                 | `Obj _ -> None
                 | `String str when str_is_blank_node_id str ->
                 Some (Rdf.Term.blank str)
                 | `String str ->
                 match iri_of_string
              *)

(* https://www.w3.org/TR/json-ld11-api/#deserialize-json-ld-to-rdf-algorithm *)
let jsonld_to_rdf =
  let add_triple options ds_g sub pred json =
    Log.debug (fun m -> m "jsonld_to_rdf/add_triple: subject=%s, pred=%s, json=%s"
       (Rdf.Term.string_of_term sub) (Iri.to_string pred)
         (J.to_string ~minify:true json)
    );
    match object_to_rdf options ds_g json with
    | None -> Log.debug (fun m -> m "object_to_rdf returned None")
    | Some (obj, triples) ->
        ds_g.Rdf.Graph.add_triple ~sub ~pred ~obj;
        List.iter (fun (sub,pred,obj) -> ds_g.add_triple ~sub ~pred ~obj) triples
  in
  let add_prop options ds_g sub prop values =
    Log.debug (fun m -> m "jsonld_to_rdf/add_prop: subject=%s, prop=%s"
       (Rdf.Term.string_of_term sub) prop);
    match prop with
    | "@type" -> (* 1.3.2.1) *)
        List.iter
        (fun v ->
           match v.data with
           | `String str ->
               let obj =
                 if str_is_blank_node_id str then
                   let str = remove_blank_id_prefix str in
                   Some (Rdf.Term.(Blank_ (blank_id_of_string str)))
                 else
                   match iri_of_string str with
                   | iri -> Some (Rdf.Term.Iri (Iri.normalize iri))
                   | exception _ ->
                       Log.warn (fun m -> m "invalid @type value %s" (J.to_string ~minify:true v));
                       None
               in
               (match obj with
                | None -> ()
                | Some obj ->
                    ds_g.Rdf.Graph.add_triple ~sub ~pred:Rdf.Rdf_.type_ ~obj
               )
           | _ -> Log.warn (fun m -> m "invalid @type value %s" (J.to_string ~minify:true v))
        ) values
    | _ when str_is_kw prop -> (* 1.3.2.2) *)
        ()
    | _ when str_is_blank_node_id prop -> (* 1.3.2.3) *)
        (* we do not handle generalized RDF graphs *)
        ()
    | _ ->
        match iri_of_string prop with
        | exception _ -> (* 1.3.2.4) *) ()
        | iri when Iri.is_relative iri -> (* 1.3.2.4) *) ()
        | iri -> (* 1.3.2.5) *)
            List.iter (add_triple options ds_g sub (Iri.normalize iri)) values
  in
  let add_node options ds_g subject props =
    Log.debug (fun m -> m "jsonld_to_rdf/add_node: subject=%s, props=%a" subject Flatten.pp_node props);
    let props = !props in
    let sub =
      if str_is_blank_node_id subject then
        let subject = remove_blank_id_prefix subject in
        Some (Rdf.Term.blank subject)
      else
        match iri_of_string subject with
        | exception e -> (* 1.3.1) *)
            Log.warn (fun m -> m "invalid subject %S" subject);
            None
        | iri when Iri.is_relative iri -> (* 1.3.1: "not well-formed" includes relative IRIs *)
            None
        | iri -> Some (Rdf.Term.Iri (Iri.normalize iri))
    in
    match sub with
    | None -> ()
    | Some sub -> (* 1.3.2) *) SMap.iter (add_prop options ds_g sub) props
  in
  let add_to_graph options ds_g g =
    (* 1.3) *)
    SMap.iter (add_node options ds_g) g.Flatten.nodes
  in
  fun options (node_map:Flatten.node_map) (ds:Rdf.Ds.dataset) ->
    match Flatten.get_graph node_map "@default" with
    | None -> assert false
    | Some graph ->
        add_to_graph options ds.default graph ;

        Ds.NameMap.iter (fun name g ->
           match ds.add with
           | None -> Log.warn (fun m -> m "Could not add graph %a to dataset" Ds.pp_name name)
           | Some f ->
               let ds_g = f ~name g.Flatten.rdf_g in
               add_to_graph options ds_g g
        )
          node_map.graphs


let to_rdf options json g =
  let base_url = g.Rdf.Graph.name() in
  let%lwt ctx = Expand.ctx_of_options options base_url in
  (*let%lwt _, ctx = process_ctx options
     ctx base_url Iri.Map.empty json
     in*)
  let%lwt json = Expand.expansion options ctx None json base_url in
  let root =
    match json.data with
    | `Obj map
    | `List [ { data = `Obj map } ] ->
        (
         match J.map_get map "@id" with
         | Some { data = `String str } ->
             (try Some (Iri.of_string str)
              with _ -> None)
         | _ -> None
        )
     | _ -> None
  in
  Log.debug (fun m -> m "expanded json: %s" (J.to_string json));
  let node_map = Flatten.init_node_map g in
  Flatten.node_map_generation node_map json ;
  Log.debug (fun m -> m "node_map_generation => %a" Flatten.pp_node_map node_map);
  let ds = Rdf.Ds.mem_dataset g in
  jsonld_to_rdf options node_map ds;
  Lwt.return (ds, root)