Source: odoc_parser.ml (p.odoc.1.5.3.doc.src.odoc.parser)

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103module Ast = Ast (* odoc uses an ocamllex lexer. The "engine" for such lexers is the standard [Lexing] module. As the [Lexing] module reads the input, it keeps track of only the byte offset into the input. It is normally the job of each particular lexer implementation to decide which character sequences count as newlines, and keep track of line/column locations. This is usually done by writing several extra regular expressions, and calling [Lexing.new_line] at the right time. Keeping track of newlines like this makes the odoc lexer somewhat too diffiult to read, however. To factor the aspect of keeping track of newlines fully out of the odoc lexer, instead of having it keep track of newlines as it's scanning the input, the input is pre-scanned before feeding it into the lexer. A table of all the newlines is assembled, and used to convert offsets into line/column pairs after the lexer emits tokens. [offset_to_location ~input ~comment_location offset] converts the byte [offset], relative to the beginning of a comment, into a location, relative to the beginning of the file containing the comment. [input] is the comment text, and [comment_location] is the location of the comment within its file. The function is meant to be partially applied to its first two arguments, at which point it creates the table described above. The remaining function is then passed to the lexer, so it can apply the table to its emitted tokens. *) let offset_to_location : input:string -> comment_location:Lexing.position -> (int -> Odoc_model.Location_.point) = fun ~input ~comment_location -> let rec find_newlines line_number input_index newlines_accumulator = if input_index >= String.length input then newlines_accumulator else (* This is good enough to detect CR-LF also. *) if input.[input_index] = '\n' then find_newlines (line_number + 1) (input_index + 1) ((line_number + 1, input_index + 1)::newlines_accumulator) else find_newlines line_number (input_index + 1) newlines_accumulator in let reversed_newlines : (int * int) list = find_newlines 1 0 [(1, 0)] in fun byte_offset -> let rec scan_to_last_newline reversed_newlines_prefix = match reversed_newlines_prefix with | [] -> assert false | (line_in_comment, line_start_offset)::prefix -> if line_start_offset > byte_offset then scan_to_last_newline prefix else let column_in_comment = byte_offset - line_start_offset in let line_in_file = line_in_comment + comment_location.Lexing.pos_lnum - 1 in let column_in_file = if line_in_comment = 1 then column_in_comment + comment_location.Lexing.pos_cnum - comment_location.Lexing.pos_bol else column_in_comment in {Odoc_model.Location_.line = line_in_file; column = column_in_file} in scan_to_last_newline reversed_newlines let make_parser ~location ~text parse = Odoc_model.Error.accumulate_warnings begin fun warnings -> let token_stream = let lexbuf = Lexing.from_string text in let offset_to_location = offset_to_location ~input:text ~comment_location:location in let input : Lexer.input = { file = location.Lexing.pos_fname; offset_to_location; warnings; lexbuf; } in Stream.from (fun _token_index -> Some (Lexer.token input lexbuf)) in parse warnings token_stream end let parse_comment_raw ~location ~text = make_parser ~location ~text Syntax.parse let parse_comment ~sections_allowed ~containing_definition ~location ~text = make_parser ~location ~text (fun warnings token_stream -> Syntax.parse warnings token_stream |> Semantics.ast_to_comment warnings ~sections_allowed ~parent_of_sections:containing_definition )