Source file xml.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
(*********************************************************************************)
(*                Higlo                                                          *)
(*                                                                               *)
(*    Copyright (C) 2014-2021 Institut National de Recherche en Informatique     *)
(*    et en Automatique. All rights reserved.                                    *)
(*                                                                               *)
(*    This program is free software; you can redistribute it and/or modify       *)
(*    it under the terms of the GNU Lesser General Public License version        *)
(*    3 as published by the Free Software Foundation.                            *)
(*                                                                               *)
(*    This program is distributed in the hope that it will be useful,            *)
(*    but WITHOUT ANY WARRANTY; without even the implied warranty of             *)
(*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *)
(*    GNU Library General Public License for more details.                       *)
(*                                                                               *)
(*    You should have received a copy of the GNU Lesser General Public           *)
(*    License along with this program; if not, write to the Free Software        *)
(*    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA                   *)
(*    02111-1307  USA                                                            *)
(*                                                                               *)
(*    Contact: Maxence.Guesdon@inria.fr                                          *)
(*                                                                               *)
(*                                                                               *)
(*********************************************************************************)

open Lang

let lexeme lb = Sedlexing.(Utf8.lexeme lb, lexeme_length lb);;
let sedlexeme = Sedlexing.Utf8.lexeme;;

let space = [%sedlex.regexp? Plus(' ' | '\n' | '\t' | '\r')]

let digit = [%sedlex.regexp? '0'..'9']
let capchar = [%sedlex.regexp? 'A'..'Z']
let lowchar = [%sedlex.regexp? 'a'..'z']
let idchar =  [%sedlex.regexp? lowchar | capchar | '_' | '-' | ':' | digit]

let entity = [%sedlex.regexp? '&', Plus(Compl('&'|';')), ';']

let tag_start = [%sedlex.regexp? '<', Opt('/'), Plus(idchar)]
let tag_end = [%sedlex.regexp? Opt('/'), '>']

let string = [%sedlex.regexp? '"', Star(Compl('"')), '"']

let comment = [%sedlex.regexp? "<!--", Star(Compl(0x3E) | (Compl('-'),'>')), "-->"]

let id = [%sedlex.regexp? Plus(idchar)]

let rec main lexbuf = match%sedlex lexbuf with
| space -> [Text (lexeme lexbuf)]
| comment -> [Bcomment (lexeme lexbuf)]
| entity -> [Keyword(1, lexeme lexbuf)]
| tag_start ->
  let t = Keyword(0, lexeme lexbuf) in
  t :: (tag lexbuf)
| any -> [Text (lexeme lexbuf)]
| eof -> []
| _ -> failwith "Invalid state"

and tag lexbuf = match%sedlex lexbuf with
| id -> let t = Id (lexeme lexbuf) in t :: tag lexbuf
| string -> let t = String (lexeme lexbuf) in t :: tag lexbuf
| tag_end -> [Keyword(0, lexeme lexbuf)]
| eof -> []
| any -> let t = Text (lexeme lexbuf) in t :: tag lexbuf
| _ -> failwith "Invalid state"

let () = Lang.register_lang "xml" main;;