From 86ec559f913c389e8dc055b494630f21a45e039b Mon Sep 17 00:00:00 2001 From: Sébastien Dailly Date: Thu, 4 Feb 2021 21:14:01 +0100 Subject: css_merge application --- css/lib/lex_buffer.ml | 114 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100755 css/lib/lex_buffer.ml (limited to 'css/lib/lex_buffer.ml') diff --git a/css/lib/lex_buffer.ml b/css/lib/lex_buffer.ml new file mode 100755 index 0000000..0416e53 --- /dev/null +++ b/css/lib/lex_buffer.ml @@ -0,0 +1,114 @@ +(* Based on + * https://github.com/smolkaj/ocaml-parsing/blob/master/src/LexBuffer.ml *) + +(** A custom lexbuffer that automatically keeps track of the source location. + This module is a thin wrapper arounds sedlexing's default buffer, which does + not provide this functionality. *) + +type t = { + buf : Sedlexing.lexbuf; + mutable pos : Lexing.position; + mutable pos_mark : Lexing.position; + mutable last_char : int option; + mutable last_char_mark : int option; +} +(** the lex buffer type *) + +let of_sedlex ?(file = "") ?pos buf = + let pos = + match pos with + | None -> + { + Lexing.pos_fname = file; + pos_lnum = 1; + (* line number *) + pos_bol = 0; + (* offset of beginning of current line *) + pos_cnum = 0 (* total offset *); + } + | Some p -> p + in + { buf; pos; pos_mark = pos; last_char = None; last_char_mark = None } + +let of_ascii_string ?pos s = of_sedlex ?pos (Sedlexing.Latin1.from_string s) + +let of_ascii_file file = + let chan = open_in file in + of_sedlex ~file (Sedlexing.Latin1.from_channel chan) + +(** The next four functions are used by sedlex internally. + See https://www.lexifi.com/sedlex/libdoc/Sedlexing.html. *) +let mark lexbuf p = + lexbuf.pos_mark <- lexbuf.pos; + lexbuf.last_char_mark <- lexbuf.last_char; + Sedlexing.mark lexbuf.buf p + +let backtrack lexbuf = + lexbuf.pos <- lexbuf.pos_mark; + lexbuf.last_char <- lexbuf.last_char_mark; + Sedlexing.backtrack lexbuf.buf + +let start lexbuf = + lexbuf.pos_mark <- lexbuf.pos; + lexbuf.last_char_mark <- lexbuf.last_char; + Sedlexing.start lexbuf.buf + +(** location of next character *) +let next_loc lexbuf = { lexbuf.pos with pos_cnum = lexbuf.pos.pos_cnum + 1 } + +let cr = Char.code '\r' + +(** next character *) +let next lexbuf = + let c = Sedlexing.next lexbuf.buf in + let pos = next_loc lexbuf in + let ch = + match c with + | None -> None + | Some c -> ( try Some (Uchar.to_char c) with Invalid_argument _ -> None ) + in + ( match ch with + | Some '\r' -> + lexbuf.pos <- + { pos with pos_bol = pos.pos_cnum - 1; pos_lnum = pos.pos_lnum + 1 } + | Some '\n' when not (lexbuf.last_char = Some cr) -> + lexbuf.pos <- + { pos with pos_bol = pos.pos_cnum - 1; pos_lnum = pos.pos_lnum + 1 } + | Some '\n' -> () + | _ -> lexbuf.pos <- pos ); + ( match c with + | None -> lexbuf.last_char <- None + | Some c -> lexbuf.last_char <- Some (Uchar.to_int c) ); + c + +let raw lexbuf = Sedlexing.lexeme lexbuf.buf + +let latin1 ?(skip = 0) ?(drop = 0) lexbuf = + let len = Sedlexing.lexeme_length lexbuf.buf - skip - drop in + Sedlexing.Latin1.sub_lexeme lexbuf.buf skip len + +let utf8 ?(skip = 0) ?(drop = 0) lexbuf = + let len = Sedlexing.lexeme_length lexbuf.buf - skip - drop in + Sedlexing.Utf8.sub_lexeme lexbuf.buf skip len + +let container_lnum_ref = ref 0 + +let fix_loc loc = + let fix_pos pos = + (* It looks like lex_buffer.ml returns a position with 2 extra + * chars for parsed lines after the first one. Bug? *) + let pos_cnum = + if pos.Lexing.pos_lnum > !container_lnum_ref then pos.Lexing.pos_cnum - 2 + else pos.Lexing.pos_cnum + in + { pos with Lexing.pos_cnum } + in + let loc_start = fix_pos loc.Location.loc_start in + let loc_end = fix_pos loc.Location.loc_end in + { loc with Location.loc_start; loc_end } + +let make_loc ?(loc_ghost = false) start_pos end_pos : Location.t = + { Location.loc_start = start_pos; loc_end = end_pos; loc_ghost } + +let make_loc_and_fix ?(loc_ghost = false) start_pos end_pos : Location.t = + make_loc ~loc_ghost start_pos end_pos |> fix_loc -- cgit v1.2.3