1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
(** *)
open Graph;;
open Term;;
(** {2 Using trees for XML docs}
Code taken from Xmlm examples.
Thanks to Xmlm, namespaces are already handled by the parser :-)
*)
type tree = E of Xmlm.tag * tree list | D of string
let in_tree i =
let el tag childs = E (tag, childs) in
let data d = D d in
try
Xmlm.input_doc_tree ~el ~data i
with
Xmlm.Error ((line, col), error) ->
let msg = Printf.sprintf "Line %d, column %d: %s"
line col (Xmlm.error_message error)
in
failwith msg
let out_tree o t =
let frag = function
| E (tag, childs) -> `El (tag, childs)
| D d -> `Data d
in
Xmlm.output_doc_tree frag o t
let apply_namespaces = Dot.apply_namespaces;;
let output_doc_tree ns ?(decl=true) dest tree =
let map (pref, s) =
match pref with
"" -> apply_namespaces ns s
| _ -> (pref, s)
in
let tree =
match tree with
D _ -> tree
| E ((tag,atts),subs) ->
let atts = List.fold_left
(fun acc (((pref,s),v) as att) ->
if pref = Xmlm.ns_xmlns then acc else att :: acc
)
[]
atts
in
let ns_atts = List.map (fun (pref,iri) -> ((Xmlm.ns_xmlns, pref), iri)) ns in
E ((tag, ns_atts @ atts), subs)
in
let ns_prefix s = Some s in
let output = Xmlm.make_output ~ns_prefix ~decl dest in
let frag = function
| D d -> `Data d
| E (((pref,s),atts), childs) ->
let (pref, s) = map (pref, s) in
let atts = List.map
(fun ((pref,s),v) -> (map (pref, s), v)) atts
in
`El (((pref,s),atts), childs)
in
Xmlm.output_doc_tree frag output (None, tree);
;;
let string_of_xmls namespaces trees =
try
let b = Buffer.create 256 in
List.iter (output_doc_tree namespaces ~decl: false (`Buffer b)) trees;
Buffer.contents b
with
Xmlm.Error ((line, col), error) ->
let msg = Printf.sprintf "Line %d, column %d: %s"
line col (Xmlm.error_message error)
in
failwith msg
;;
let xml_of_string str =
try
let i = Xmlm.make_input ~strip: true (`String (0, str)) in
let (_,tree) = in_tree i in
tree
with
Xmlm.Error ((line, col), error) ->
let msg = Printf.sprintf "Line %d, column %d: %s\n%s"
line col (Xmlm.error_message error) str
in
failwith msg
;;
let xmls_of_string str =
let str = "<foo__>"^str^"</foo__>" in
match xml_of_string str with
E ((("","foo__"),_),subs) -> subs
| _ -> assert false
;;
let get_first_child xml tag =
match xml with
D _ -> None
| E ((_,_),subs) ->
try Some (List.find (function E ((t,_),_) -> t = tag | _ -> false) subs)
with Not_found -> None
;;
let is_element iri (pref,loc) =
let iri2 = Iri.of_string (pref^loc) in
let b = Iri.compare iri iri2 = 0 in
b
;;
(** {2 Input} *)
module SMap = Types.SMap;;
module Irimap = Iri.Map
type state =
{ subject : Term.term option ;
predicate : Iri.t option ;
xml_base : Iri.t ;
xml_lang : string option ;
datatype : Iri.t option ;
namespaces : string Irimap.t ;
}
type global_state =
{
blanks : Term.blank_id SMap.t ;
gnamespaces : string Irimap.t ;
}
exception Invalid_rdf of string
let error s = raise (Invalid_rdf s);;
let () = Printexc.register_printer
(function
| Invalid_rdf str ->
Some (Printf.sprintf "Invalid RDF: %s" str)
| _ -> None)
let get_att att l = try Some (List.assoc att l) with Not_found -> None;;
let get_att_iri =
let rec iter pred = function
[] -> None
| (x,v) :: q ->
if pred x then Some v else iter pred q
in
fun iri l -> iter (is_element iri) l
;;
let abs_iri state iri =
let iri =
match Iri.is_relative iri with
false -> iri
| true ->
Iri.resolve ~base: state.xml_base iri
in
iri
let set_xml_base state = function
D _ -> state
| E ((_,atts),_) ->
match get_att (Xmlm.ns_xml, "base") atts with
None -> state
| Some s ->
let r = Iri.of_string s in
let xml_base = abs_iri state r in
{ state with xml_base }
;;
let set_xml_lang state = function
D _ -> state
| E ((_,atts),_) ->
match get_att (Xmlm.ns_xml, "lang") atts with
None -> state
| Some s ->
{ state with xml_lang = Some s }
;;
let set_namespaces gstate state = function
D _ -> (gstate, state)
| E ((_,atts),_) ->
let f (gstate, state) ((pref,s),v) =
if pref = Xmlm.ns_xmlns then
begin
let iri = Iri.of_string v in
let gstate = { gstate with gnamespaces = Irimap.add iri s gstate.gnamespaces } in
let state = { state with namespaces = Irimap.add iri s state.namespaces } in
(gstate, state)
end
else
(gstate, state)
in
List.fold_left f (gstate, state) atts
;;
let update_state gstate state t =
set_namespaces gstate (set_xml_lang (set_xml_base state t) t) t;;
let get_blank_node g gstate id =
try (Blank_ (SMap.find id gstate.blanks), gstate)
with Not_found ->
let bid = g.new_blank_id () in
let gstate = { gstate with blanks = SMap.add id bid gstate.blanks } in
(Blank_ bid, gstate)
let rec input_node g state gstate t =
let (gstate, state) = update_state gstate state t in
match t with
D s when state.predicate = None ->
let msg = Printf.sprintf "Found (Data %S) with no current predicate." s in
error msg
| D s ->
let obj = Term.term_of_literal_string ?typ: state.datatype ?lang: state.xml_lang s in
let sub = match state.subject with None -> assert false | Some s -> s in
let pred = match state.predicate with None -> assert false | Some u -> u in
g.add_triple ~sub ~pred ~obj;
gstate
| E (((pref,s), atts), children) ->
let (node, gstate) =
match get_att_iri Rdf_.about atts with
Some s -> (Iri (abs_iri state (Iri.of_string s)), gstate)
| None ->
match get_att_iri Rdf_.id atts with
Some id -> (Iri (Iri.of_string ((Iri.to_string state.xml_base)^"#"^id)), gstate)
| None ->
match get_att_iri Rdf_.nodeID atts with
Some id -> get_blank_node g gstate id
| None -> (Blank_ (g.new_blank_id ()), gstate)
in
begin
match state.subject, state.predicate with
Some sub, Some pred ->
g.add_triple ~sub ~pred ~obj: node
| _ -> ()
end;
let state = { state with subject = Some node ; predicate = None } in
if not (is_element Rdf_.description (pref,s)) then
begin
let type_iri = Iri.of_string (pref^s) in
g.add_triple ~sub: node ~pred: Rdf_.type_ ~obj: (Iri type_iri)
end;
let f ((pref, s), v) =
if pref <> Xmlm.ns_xml && pref <> Xmlm.ns_xmlns then
begin
let iri_prop = Iri.of_string (pref^s) in
if not (List.exists (Iri.equal iri_prop) [ Rdf_.about ; Rdf_.id; Rdf_.nodeID ]) then
begin
let obj = Term.term_of_literal_string ?lang: state.xml_lang v in
g.add_triple ~sub: node ~pred: iri_prop ~obj
end
end
in
List.iter f atts;
let (gstate, _) = List.fold_left (input_prop g state) (gstate, 1) children in
gstate
and input_prop g state (gstate, li) t =
let (gstate, state) = update_state gstate state t in
match t with
D s ->
let msg = Printf.sprintf "Found (Data %S) but expected a property node." s in
error msg
| E (((pref,s),atts),children) ->
let sub = match state.subject with None -> assert false | Some sub -> sub in
let prop_iri = Iri.of_string (pref^s) in
let (prop_iri, li) =
if Iri.equal prop_iri Rdf_.li then
(Rdf_.n li, li + 1)
else
(prop_iri, li)
in
match get_att_iri Rdf_.resource atts with
Some s ->
let iri = Iri.of_string s in
let obj = Iri (abs_iri state iri) in
g.add_triple ~sub ~pred: prop_iri ~obj ;
(gstate, li)
| None ->
match get_att_iri Rdf_.nodeID atts with
Some id ->
let (obj, gstate) = get_blank_node g gstate id in
g.add_triple ~sub ~pred: prop_iri ~obj ;
(gstate, li)
| None ->
match get_att_iri Rdf_.parseType atts with
Some "Literal" ->
let xml = string_of_xmls
(Irimap.fold (fun iri s acc -> (s, Iri.to_string iri) :: acc) state.namespaces [])
children
in
let obj = Term.term_of_literal_string ~typ: Rdf_.dt_XMLLiteral xml in
g.add_triple ~sub ~pred: prop_iri ~obj;
(gstate, li)
| Some "Resource" ->
begin
let node = Blank_ (g.new_blank_id ()) in
g.add_triple ~sub ~pred: prop_iri ~obj: node ;
let state = { state with subject = Some node ; predicate = None } in
List.fold_left (input_prop g state) (gstate, 1) children
end
| Some "Collection" ->
begin
let rec f (gstate, previous) = function
[] -> assert false
| first :: rest ->
let state = { state with
subject = Some previous ;
predicate = Some Rdf_.first }
in
let gstate = input_node g state gstate first in
match rest with
[] -> g.add_triple ~sub: previous
~pred: Rdf_.rest ~obj: (Iri Rdf_.nil);
(gstate, previous)
| _ ->
let blank = Term.Blank_ (g.new_blank_id ()) in
g.add_triple ~sub: previous ~pred: Rdf_.rest ~obj: blank;
f (gstate, blank) rest
in
let gstate =
match children with
[] -> gstate
| _ ->
let blank = Term.Blank_ (g.new_blank_id ()) in
g.add_triple ~sub ~pred: prop_iri ~obj: blank;
fst (f (gstate, blank) children)
in
(gstate, li)
end
| Some s -> error (Printf.sprintf "Unknown parseType %S" s)
| None ->
match get_att_iri Rdf_.datatype atts, children with
| Some s, [D lit] ->
let typ = abs_iri state (Iri.of_string s) in
let obj = Term.term_of_literal_string ~typ ?lang: state.xml_lang lit in
g.add_triple ~sub ~pred: prop_iri ~obj;
(gstate, li)
| Some s, _ ->
let msg = Printf.sprintf "Property %S with datatype %S but no data"
(Iri.to_string prop_iri) s
in
error msg
| None, _ ->
let pred ((pref,s),v) =
pref <> Xmlm.ns_xml && pref <> Xmlm.ns_xmlns &&
(let iri = Iri.of_string (pref^s) in not (Iri.equal iri Rdf_.id))
in
match List.filter pred atts with
[] ->
let state = { state with predicate = Some prop_iri } in
let gstate = List.fold_left (input_node g state) gstate children in
(gstate, li)
| l ->
let node = Blank_ (g.new_blank_id ()) in
g.add_triple ~sub ~pred: prop_iri ~obj: node ;
let f ((pref,s),lit) =
let obj = Term.term_of_literal_string ?lang: state.xml_lang lit in
let iri_prop = Iri.of_string (pref^s) in
g.add_triple ~sub: node ~pred: iri_prop ~obj
in
List.iter f l;
(gstate, li)
;;
let input_tree g ?(base=g.Graph.name()) t =
let state = {
subject = None ; predicate = None ;
xml_base = base ; xml_lang = None ;
datatype = None ; namespaces = Irimap.empty ;
}
in
let gstate = { gnamespaces = Irimap.empty ; blanks = SMap.empty } in
let (gstate, state) = update_state gstate state t in
let gstate =
match t with
D _ -> assert false
| E ((e,_),children) when is_element Rdf_._RDF e ->
List.fold_left (input_node g state) gstate children
| t -> input_node g state gstate t
in
let add_ns iri prefix = g.add_namespace iri prefix in
Irimap.iter add_ns gstate.gnamespaces
;;
let from_input g ?base i =
let (_, tree) = in_tree i in
input_tree g ?base tree
;;
let from_xml = input_tree;;
let from_string g ?base s =
let i = Xmlm.make_input ~strip: true (`String (0, s)) in
from_input g ?base i
;;
let from_file g ?base file =
let ic = open_in file in
let i = Xmlm.make_input ~strip: true (`Channel ic) in
let (_,tree) =
try let t = in_tree i in close_in ic; t
with e -> close_in ic; raise e
in
input_tree g ?base tree
;;
(** {2 Output} *)
let output ?(compact=true) g =
let xml_prop pred_iri obj =
let (atts, children) =
match obj with
| Iri iri -> ([("", Iri.to_string Rdf_.resource), Iri.to_string iri], [])
| Blank_ id -> ([("", Iri.to_string Rdf_.nodeID), Term.string_of_blank_id id], [])
| Blank -> assert false
| Literal lit ->
let (atts, subs) =
match lit.lit_type with
None -> ([], [D lit.lit_value])
| Some iri when Iri.equal iri Rdf_.dt_XMLLiteral ->
let subs = xmls_of_string lit.lit_value in
(
[("",Iri.to_string Rdf_.parseType), "Literal"],
subs
)
| Some iri ->
(
[("",Iri.to_string Rdf_.datatype), Iri.to_string iri],
[D lit.lit_value]
)
in
let atts = atts @
(match lit.lit_language with
None -> []
| Some lang -> [(Xmlm.ns_xml, "lang"), lang])
in
(atts, subs)
in
E ((("",Iri.to_string pred_iri),atts),children)
in
let subject_atts = function
| Iri iri -> [("", Iri.to_string Rdf_.about), Iri.to_string iri]
| Blank_ id -> [("", Iri.to_string Rdf_.nodeID), Term.string_of_blank_id id]
| Blank -> assert false
| Literal _ -> assert false
in
let fold_props map =
let f iri set acc =
let fo obj acc =
let n = xml_prop iri obj in
n :: acc
in
Term.TSet.fold fo set acc
in
Iri.Map.fold f map []
in
let xmls =
match g.folder () with
| Some map when compact ->
let f sub map acc =
let xml_props = fold_props map in
let atts = subject_atts sub in
let node = E ((("",Iri.to_string Rdf_.description), atts), xml_props) in
node :: acc
in
Term.TMap.fold f map []
| _ ->
let f_triple acc (sub, pred, obj) =
let atts = subject_atts sub in
let xml_prop = xml_prop pred obj in
let node = E ((("",Iri.to_string Rdf_.description), atts), [xml_prop]) in
node :: acc
in
List.fold_left f_triple [] (g.find ())
in
E ((("", Iri.to_string Rdf_._RDF),[]), xmls)
let to_ ?compact ?namespaces g dest =
let namespaces = Dot.build_namespaces ?namespaces g in
try
let tree = output ?compact g in
output_doc_tree namespaces ~decl: true dest tree
with
Xmlm.Error ((line, col), error) ->
let msg = Printf.sprintf "Line %d, column %d: %s"
line col (Xmlm.error_message error)
in
failwith msg
;;
let to_string ?compact ?namespaces g =
let buf = Buffer.create 256 in
let dest = `Buffer buf in
to_ ?compact ?namespaces g dest;
Buffer.contents buf
;;
let to_file ?compact ?namespaces g file =
let oc = open_out file in
try
to_ ?compact ?namespaces g (`Channel oc);
close_out oc
with e ->
close_out oc ; raise e
;;