Source file cut.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
(*********************************************************************************)
(*                Stog                                                           *)
(*                                                                               *)
(*    Copyright (C) 2012-2024 INRIA All rights reserved.                         *)
(*    Author: Maxence Guesdon, INRIA Saclay                                      *)
(*                                                                               *)
(*    This program is free software; you can redistribute it and/or modify       *)
(*    it under the terms of the GNU General Public License as                    *)
(*    published by the Free Software Foundation, version 3 of the License.       *)
(*                                                                               *)
(*    This program is distributed in the hope that it will be useful,            *)
(*    but WITHOUT ANY WARRANTY; without even the implied warranty of             *)
(*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the               *)
(*    GNU General Public License for more details.                               *)
(*                                                                               *)
(*    You should have received a copy of the GNU General Public                  *)
(*    License along with this program; if not, write to the Free Software        *)
(*    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA                   *)
(*    02111-1307  USA                                                            *)
(*                                                                               *)
(*    As a special exception, you have permission to link this program           *)
(*    with the OCaml compiler and distribute executables, as long as you         *)
(*    follow the requirements of the GNU GPL in regard to all of the             *)
(*    software in the executable aside from the OCaml compiler.                  *)
(*                                                                               *)
(*    Contact: Maxence.Guesdon@inria.fr                                          *)
(*                                                                               *)
(*********************************************************************************)

(** *)

open Types

module XR = Xtmpl.Rewrite
module Xml = Xtmpl.Xml

module Sset = Types.Str_set;;
module Nmap = Xml.Name_map;;

module PMap = Path.Map

type cutpoint =
  {
    cut_tag : string * string ;
    cut_doc_type : string ;
    cut_path_sep : string ;
    cut_insert_link : bool ;
    cut_use_parent_path : bool ;
  }
;;

(* since we iter in documents with List.fold_right to keep documents in
  order, we encounter next document of a cutpoint A before cutpoint A. *)
type links = {
  by_doc : (Path.path option * Path.path option) Path.Map.t ;
  next_by_cp : Path.path Nmap.t ;
}

let cutpoint_of_atts doc atts =
  let typ = XR.opt_att_cdata atts ~def: doc.doc_type ("","type") in
  let tag =
    match XR.get_att_cdata atts ("","tag") with
      None -> failwith "Missing 'tag' attribute for <cut-doc> node"
    | Some s ->
        match Stog_base.Misc.split_string s [':'] with
          [] | [_] -> ("", s)
        | h :: q -> (h, String.concat ":" q)
  in
  let sep = XR.opt_att_cdata atts ~def: "-" ("", Tags.path_sep) in
  let insert_link = not (XR.opt_att_cdata atts ~def: "true" ("","insert-link") = "false") in
  let use_parent_path = not (XR.opt_att_cdata atts ~def: "true" ("","use-parent-path") = "false") in
  { cut_tag = tag ; cut_doc_type = typ ;
    cut_path_sep = sep ; cut_insert_link = insert_link ;
    cut_use_parent_path = use_parent_path ;
  }
;;

let new_doc_in_cutpoint cut_tag links doc =
  let next_path =
    try Some (Nmap.find cut_tag links.next_by_cp)
    with Not_found -> None
  in
  let links =
    match next_path with
      None -> links
    | Some next_path ->
        let next =
          try snd (PMap.find next_path links.by_doc)
          with Not_found -> None
        in
        let by_doc = PMap.add next_path
          (Some doc.doc_path, next) links.by_doc
        in
        let prev =
          try fst (PMap.find doc.doc_path links.by_doc)
          with Not_found -> None
        in
        let by_doc = PMap.add doc.doc_path
          (prev, Some next_path) by_doc
        in
        { links with by_doc }
  in
  let next_by_cp = Nmap.add cut_tag doc.doc_path links.next_by_cp in
  { links with next_by_cp }
;;

let add_doc links stog doc =
  let (prev, next) =
    try PMap.find doc.doc_path links.by_doc
    with Not_found -> None, None
  in
  let doc =
    match prev with
      None -> doc
    | Some prev_path ->
        let def = (("", Tags.previous_path), XR.atts_empty,
           [XR.cdata (Path.to_string prev_path)])
        in
        { doc with doc_defs = def :: doc.doc_defs }
    in
  let doc =
    match next with
      None -> doc
    | Some next_path ->
        let def = (("", Tags.next_path), XR.atts_empty,
           [XR.cdata (Path.to_string next_path)])
        in
        { doc with doc_defs = def :: doc.doc_defs }
  in
  Types.add_doc stog doc
;;

let mk_path use_parent_path path sep id =
  let path_s = Path.to_string path in
  match Stog_base.Misc.filename_extension path_s with
    "" ->
      let msg =  "To be cut, " ^path_s ^ " should have an extension (e.g. \".html\")" in
      failwith msg
  | ext ->
      let p =
        if use_parent_path then
          (Filename.chop_extension path_s) ^ sep ^ id
        else
          "/" ^ id
      in
      let p = p ^ "." ^ ext in
      Path.of_string p
;;

let cut_docs =
  let id_set =
    let rec iter set = function
      XR.D _ | XR.C _ | XR.PI _ -> set
    | XR.E { XR.atts ; subs } ->
        let set =
          match XR.get_att_cdata atts ("", "id") with
            None
          | Some "" -> set
          | Some id -> Sset.add id set
        in
        List.fold_left iter set subs
    in
    fun doc ->
      let xmls =
        match doc.doc_out with
          None -> doc.doc_body
        | Some xmls -> xmls
      in
      List.fold_left iter Sset.empty xmls
  in
  let string_of_tag = function
    ("",t) -> "<"^t^">"
  | (n, t) -> "<"^n^":"^t^">"
  in
  let set_id_map stog path atts new_path with_id =
    if path <> new_path then
      match XR.get_att_cdata atts ("","id") with
        None -> stog
      | Some id ->
          let new_id = if with_id then Some id else None in
          Types.id_map_add stog path id new_path new_id
    else
      stog
  in
  let rec iter doc new_path cutpoints links stog new_docs xml =
    match xml with
    | XR.D _ | XR.C _ | XR.PI _ -> (stog, [xml], new_docs, links)
    | XR.E { XR.name = ("","cut-doc") ; atts ; subs } ->
        let cutpoints = (cutpoint_of_atts doc atts) :: cutpoints in
        let (stog, xmls, new_docs, links2) = List.fold_right
          (fold doc new_path cutpoints) subs (stog, [], new_docs, links)
        in
        let links = { links2 with next_by_cp = links.next_by_cp } in
        (stog, xmls, new_docs, links)

    | XR.E ({ XR.name ; atts ; subs } as node) ->
        let cp_opt =
          try Some (List.find (fun cp -> cp.cut_tag = name) cutpoints)
          with Not_found -> None
        in
        match cp_opt with
          None ->
            (* not a cut point *)
            let (stog, subs, new_docs, links) = List.fold_right
              (fold doc new_path cutpoints) subs (stog, [], new_docs, links)
            in
            (stog, [XR.E { node with XR.subs }], new_docs, links)
        | Some cp ->
            try
              let title =
                match XR.get_att_cdata atts ("","title") with
                  None ->
                    Log.warn
                      (fun m -> m "Missing title on cutpoint; not cutting node %s"
                         (string_of_tag name));
                    raise Not_found
                | Some s -> s
              in
              let id =
                match XR.get_att_cdata atts ("","id") with
                  None ->
                    Log.warn
                      (fun m -> m "Missing id on cutpoint; not cutting node %s"
                         (string_of_tag name));
                    raise Not_found
                | Some s -> s
              in
              let new_path = mk_path cp.cut_use_parent_path new_path cp.cut_path_sep id in
              let stog = set_id_map stog doc.doc_path atts new_path false in
              let (stog, xmls, new_docs, links2) =
                List.fold_right (fold doc new_path cutpoints)
                  subs (stog, [], new_docs, links)
              in
              let links = { links2 with next_by_cp = links.next_by_cp } in
              let doc =
                { doc with
                  doc_path = new_path ;
                  doc_parent = Some doc.doc_path ;
                  doc_type = cp.cut_doc_type ;
                  doc_title = title ;
                  doc_body = xmls ;
                  doc_out = None ;
                }
              in
              let links = new_doc_in_cutpoint cp.cut_tag links doc in
              let xml =
                if cp.cut_insert_link then
                  [ XR.node ("","div")
                     ~atts: (XR.atts_one ("","class") [XR.cdata ("cutlink "^(snd name))])
                     [XR.node ("","doc")
                        ~atts: (XR.atts_one ("","href")
                         [XR.cdata (Path.to_string new_path)])
                          []
                      ]
                  ]
                else
                  []
              in
              (stog, xml, doc :: new_docs, links)
            with
              Not_found ->
                (* not enough information to cut *)
                let (stog, xmls, new_docs, links) =
                  List.fold_right (fold doc new_path cutpoints)
                    subs (stog, [], new_docs, links)
                in
                (stog, xmls, new_docs, links)

  and fold doc new_path cutpoints xml (stog, xmls, new_docs, links) =
    let (stog, xmls2, new_docs, links) =
      iter doc new_path cutpoints links stog new_docs xml
    in
    (stog, xmls2 @ xmls, new_docs, links)
  in
  let cut_doc stog doc =
    let links = { by_doc = PMap.empty ; next_by_cp = Nmap.empty } in
    match doc.doc_out with
      None -> (stog, doc, [], links)
    | Some body ->
        let (stog, body, new_docs, links) =
          List.fold_right (fold doc doc.doc_path [])
            body (stog, [], [], links)
        in
        let children =
          match new_docs with
            [] -> doc.doc_children
          | _ -> doc.doc_children @ (List.map (fun doc -> doc.doc_path) new_docs)
        in
        (stog,
         { doc with doc_out = Some body ; doc_children = children },
         new_docs, links)
  in
  let add_id_mappings stog src_path dst_path set =
    Sset.fold
      (fun id stog -> Types.id_map_add stog src_path id dst_path (Some id))
      set stog
  in
  let set_doc_id_mappings orig_path all_ids stog doc =
    let ids = id_set doc in
    let stog = add_id_mappings stog orig_path doc.doc_path ids in
    add_id_mappings stog doc.doc_path orig_path (Sset.diff all_ids ids)
  in
  let f_doc env doc_id stog =
    let doc = Types.doc stog doc_id in
    let (stog, doc2, new_docs, links) = cut_doc stog doc in
    match new_docs with
      [] ->
        (* no new documents means the original document was not modified either *)
        stog
    | _ ->
        let all_ids = id_set doc in
        let stog =
          List.fold_left (set_doc_id_mappings doc.doc_path all_ids)
          stog new_docs
        in
        let stog = Types.set_doc stog doc_id doc2 in
        let stog = List.fold_left (add_doc links) stog new_docs in
        stog
  in
  fun env stog docs ->
    Types.Doc_set.fold (f_doc env) docs stog
;;