Source: character_intf.ml (p.fmlib_parse.0.5.11.doc.src.fmlib

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381module type CHARACTER_PARSER = sig include Interfaces.FULL_PARSER with type expect = string * Indent.expectation option (** @inline *) (** {1 Position Information} *) val position: t -> Position.t (** [position p] The current position in the input stream. Can be called at any time. *) val line: t -> int (** [line p] The current line in the input stream. Can be called at any time. *) val column: t -> int (** [column p] The current column in the input stream. Can be called at any time. *) val byte_column: t -> int (** [byte_column p] The current byte_column in the input stream. Can be called at any time. *) (** {1 Run the Parser on Streams} *) val run_on_string: string -> t -> t (** [run_on_string str p] Run the parser [p] on the string [str]. *) val run_on_string_at: int -> string -> t -> int * t (** [run_on_string str start p] Run the parser [p] on the string [str] starting at index [start] Return the parser and the index next to be pushed in. *) val run_on_channel: in_channel -> t -> t (** [run_on_channel ic p] Run the parser [p] on input channel [ic]. *) end module type END_OF_INPUT_COMBINATOR = sig type _ t val expect_end: 'a -> 'a t (** [expect_end a] Expect the end of token stream. In case of success return [a]. In case of failure return the syntax error with the expectation "end of input". {b CAUTION}: There is usually no need to use this combinator! This combinator is needed only for partial parsers. {b Never ever} backtrack over this combinator. *) end module type BASE_64_COMBINATORS = sig type _ t val base64: (string -> 'r) -> (string -> 'r -> 'r) -> 'r t (** [base64 start next] Parse a base64 encoding into an object of type ['r]. A base64 encoding is a sequence of zero or more base64 characters (A-Za-z0-9+/) grouped into sequences of 4 characters and optionally padded with the character [=]. Each group of 2-4 base64 characters are decoded into a string of 1-3 bytes. [start] gets the first 1-3 bytes and [next] gets all subsequent 1-3 bytes until the end of the encoding is reached. *) val string_of_base64: string t (** Parse a base64 encoding and decode it into a string. *) end module type LEXER_COMBINATOR = sig type _ t val lexer: 'a t -> 'tok ->'tok t -> (Position.range * 'tok) t (** [lexer whitespace end_token tok] A lexer combinator. - The [whitespace] combinator recognizes a possibly empty sequence of whitespace (usually blanks, tabs, newlines, comments, ...). - [end_token] is a token which the lexer returns when it has successfully consumed the end of input. - [tok] is a combinator recognizing tokens (usually [tok1 </> tok2 </> ... </> tokn]). The lexer combinator recognizes tokens in an input stream of the form {v WS Token WS Token .... WS EOF v} Note: If a combinator fails to recognize a token and having consumed some input, then the subsequent combinators are not used anymore as alternatives. Therefore if there are tokens which can begin with the same prefix, then it is necessary to make the recognition of the common prefixes backtrackable in all but the last combinator recognizing a token with the same prefix. The same applies to whitespace if part of the whitespace can begin like a token. Examples: - comment: "// ...." - division operator: "/" In this case the recognition at least of the first slash of the comment has to be backtrackable. *) end module type LOCATION_COMBINATORS = sig type _ t val located: 'a t -> 'a Located.t t (** [located p] Parse [p] and return its result with its start and end position. Note: If [p] removes whitespace at the end, the returned end position is at the end of the whitespace. This is not what you usually want. Therefore first parse the essential part located and then remove the whitespace. *) val position: Position.t t (** The current position in the file. *) end module type INDENTATION_COMBINATORS = sig type _ t (** The indentation of a normal construct is the indentation of its leftmost token. The indentation of a vertically aligned construct is the indentation of its first token. *) val indent: int -> 'a t -> 'a t (** [indent i p] Indent [p] by [i] columns relative to its parent. Precondition: [0 <= i] The indentation of [p] is defined by the indentation of its first token. The first token has to be indented at least [i] columns relative to the parent of [p]. After the first token of [p] has been parsed successfully, all subsequent tokens must have at least the same indentation. Note: Indentation of [p] relative to its parent only makes sense, if the first token of [p] is not the first token of its parent! I.e. the parent of [p] should have consumed at least one token before the parsing of [p] starts. *) (** CAUTION WITH ALIGNMENT !! If you want to align a certain number of constructs vertically it is {e mandatory} to indent the whole block of constructs. Do not indent the individual items to be aligned. Indent the whole block. Reason: The parent of the block usually has already consumed some token and the indentation of a construct is the position of the leftmost token. If you don't indent the aligned block, then it will be aligned with the leftmost token of the parent construct. This is usually not intended and a common pitfall. Any indentation e.g. zero indentation is ok. *) val align: 'a t -> 'a t (** [align p] Use the start position of the first token of [p] to align it with other constructs. If [p] does not consume any token, then [align p] has no effect. Alignment makes sense if there are at least two combinators which are aligned and indented. E.g. suppose there are two combinators [p] and [q]. Then we can form {[ indent 1 ( let* a = align p in let* b = align q in return (a,b) ) ]} This combinator parses [p] whose first token has to be indented at least one column relative to its parent. And then it parses [q] whose first token must be aligned with the first token of [p]. The indentation decouples the alignment of [p] and [q] with other aligned siblings or parents. [indent 0 ...] can be used to make the indentation optional. *) val left_align: 'a t -> 'a t (** [left_align p] Align a construct described by [p] at its leftmost possible column. If a whole block of constructs have to be vertically left aligned, then it is important that at least the first construct is left aligned. The subsequent constructs will be aligned exactly vertically. For the subsequent constructs [left_align] has the same effect as {!align}. *) val detach: 'a t -> 'a t (** [detach p] Parse [p] without any indentation and alignment restrictions. Detachment is needed to parse whitespace. The whitespace at the beginning of a line never satisfies any nontrivial indentation or aligment requirements. *) end module type CHARACTER_COMBINATORS = sig type _ t val charp: (char -> bool) -> string -> char t (** [charp p expect] Parse a character which satisfies the predicate [p]. In case of failure, report the failed expectation [expect]. *) val range: char -> char -> char t (** [range c1 c2] Parses a character in the range between [c1] and [c2], i.e. a character [c] which satisfies [c1 <= c && c <= c2].*) val char: char -> char t (** [char c] Parse the character [c]. *) val one_of_chars: string -> string -> char t (** [one_of_chars str expect] Parse one of the characters in the string [str]. In case of failure, report the failed expectation [expect]. *) val string: string -> string t (** [string str] Parse the string [str]. *) val uppercase_letter: char t (** Parse an uppercase letter. *) val lowercase_letter: char t (** Parse a lowercase letter. *) val letter: char t (** Parse a letter. *) val digit_char: char t (** Parse a digit [0..9] and return it as character. *) val digit: int t (** Parse a digit and return it as number. *) val word: (char -> bool) -> (char -> bool) -> string -> string t (** [word first inner error] Parse a word which starts with a character satisfying the predicate [first] followed by zero or more characters satisfying the predicate [inner]. In case of failure add the expectation [error]. *) val hex_uppercase: int t (** Equivalent to [range 'A' 'F'] and then converted to the corresponding number between [10] and [15]. *) val hex_lowercase: int t (** Equivalent to [range 'a' 'f'] and then converted to the corresponding number between [10] and [15]. *) val hex_digit: int t (** Parse a hexadecimal digit and return the corresponding number between [0] and [15]. *) end module type MAKE_FINAL_COMBINATORS = sig type _ t type state type final type parser val make: state -> final t -> parser (** [make state c] Make a parser which starts in state [state] and parses a construct defined by the combinator [c]. The token stream must be ended by [put_end], otherwise the parse won't succeed. {b CAUTION}: [c] must not be a combinator containing [expect_end]. Moreover it must not have been constructed by {!lexer}. *) val make_partial: Position.t -> state -> final t -> parser (** [make_partial pos state c] Make parser which analyzes a part of the input stream. The parser starts at position [pos] in state [state] and parses a construct defined by the combinator [c]. The parser can succeed even if no end token has been pushed into the parser. *) end