diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/dune | 3 | ||||
-rw-r--r-- | lib/lexer.ml | 46 | ||||
-rw-r--r-- | lib/lexer.mli | 4 |
3 files changed, 48 insertions, 5 deletions
@@ -1,8 +1,9 @@ (library (name qparser) (libraries - qsp_syntax + str menhirLib + qsp_syntax ) (preprocess (pps sedlex.ppx diff --git a/lib/lexer.ml b/lib/lexer.ml index 324171f..1a2d788 100644 --- a/lib/lexer.ml +++ b/lib/lexer.ml @@ -6,10 +6,14 @@ open Tokens exception UnclosedQuote of { content : string; line : int } exception LexError of Lexing.position * string +exception EOF let pp_pos out { Lexing.pos_lnum; pos_cnum; pos_bol; _ } = Format.fprintf out "line %d:%d" pos_lnum (pos_cnum - pos_bol) +(* Extract the location name from the pattern *) +let location_name = Str.regexp {|.* \(.*\)|} + module type Encoding = sig val lexeme : Sedlexing.lexbuf -> string end @@ -140,12 +144,28 @@ let rec token : (module Encoding.S) -> Sedlexing.lexbuf -> token = fun (module E : Encoding) lexbuf -> match%sedlex lexbuf with | 0Xfeff -> + Sedlexing.start lexbuf; (* Ignore the BOM *) token (module E) lexbuf | '#', Star space, location -> + let _start_pos, end_pos = Sedlexing.lexing_positions lexbuf in + + (* Extract the location name *) let ident = E.lexeme lexbuf in + let () = + match Str.string_match location_name ident 0 with + | false -> () + | true -> Sedlexing.set_filename lexbuf (Str.matched_group 1 ident) + in + + (* Restart the line number (new location here) *) + Sedlexing.start lexbuf; + Sedlexing.set_position lexbuf { end_pos with Lexing.pos_lnum = 1 }; LOCATION_START ident + | '-', Plus '-', Star (Sub (any, ('\r' | '\n'))) -> + Bucket.remove is_expression lexbuf; + LOCATION_END | Plus digit -> INTEGER (E.lexeme lexbuf) | '+' -> PLUS | '-' -> MINUS @@ -158,7 +178,6 @@ let rec token : (module Encoding.S) -> Sedlexing.lexbuf -> token = (* We are leaving the block, the comment will be handled again *) decr_level lexbuf; COLUMN - | '-', Plus '-', Star (Sub (any, ('\r' | '\n'))) -> LOCATION_END | '[' -> L_BRACKET | ']' -> R_BRACKET | '(' -> @@ -170,9 +189,6 @@ let rec token : (module Encoding.S) -> Sedlexing.lexbuf -> token = | '<' -> LT | '>' -> GT | coma -> COMA - | eof -> - Bucket.remove is_expression lexbuf; - EOF | '=' -> incr_level lexbuf; EQUAL @@ -191,6 +207,7 @@ let rec token : (module Encoding.S) -> Sedlexing.lexbuf -> token = | '\'' -> LITERAL (wait_balance read_quoted_string lexbuf) | '"' -> LITERAL (wait_balance read_dquoted_string lexbuf) | '{' -> LITERAL (wait_balance (read_long_string 0) lexbuf) + | eof -> raise EOF | _ -> let position = fst @@ Sedlexing.lexing_positions lexbuf in let tok = E.lexeme lexbuf in @@ -201,6 +218,27 @@ let rec token : (module Encoding.S) -> Sedlexing.lexbuf -> token = raise @@ LexError (position, msg) +let rec discard lexbuf = + match%sedlex lexbuf with + | '\'' -> + ignore (wait_balance read_quoted_string lexbuf); + discard lexbuf + | '"' -> + ignore (wait_balance read_dquoted_string lexbuf); + discard lexbuf + | '{' -> + ignore (wait_balance (read_long_string 0) lexbuf); + discard lexbuf + | eof -> raise EOF + | '-', Plus '-', Star (Sub (any, ('\r' | '\n'))) -> + Bucket.remove is_expression lexbuf; + () + | '!' -> + ignore @@ skip_comment lexbuf; + discard lexbuf + | any -> discard lexbuf + | _ -> raise EOF + (** Tokenizer for menhir *) let lexer : (module Encoding) -> diff --git a/lib/lexer.mli b/lib/lexer.mli index 41c7c11..0a8ec12 100644 --- a/lib/lexer.mli +++ b/lib/lexer.mli @@ -2,9 +2,13 @@ module type Encoding = sig val lexeme : Sedlexing.lexbuf -> string end +exception EOF + val lexer : (module Encoding) -> Sedlexing.lexbuf -> unit -> Tokens.token * Lexing.position * Lexing.position (** Apply the lexer to the source *) + +val discard : Sedlexing.lexbuf -> unit |