aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/dune3
-rw-r--r--lib/lexer.ml46
-rw-r--r--lib/lexer.mli4
3 files changed, 48 insertions, 5 deletions
diff --git a/lib/dune b/lib/dune
index 5717f90..f62c90e 100644
--- a/lib/dune
+++ b/lib/dune
@@ -1,8 +1,9 @@
(library
(name qparser)
(libraries
- qsp_syntax
+ str
menhirLib
+ qsp_syntax
)
(preprocess (pps
sedlex.ppx
diff --git a/lib/lexer.ml b/lib/lexer.ml
index 324171f..1a2d788 100644
--- a/lib/lexer.ml
+++ b/lib/lexer.ml
@@ -6,10 +6,14 @@ open Tokens
exception UnclosedQuote of { content : string; line : int }
exception LexError of Lexing.position * string
+exception EOF
let pp_pos out { Lexing.pos_lnum; pos_cnum; pos_bol; _ } =
Format.fprintf out "line %d:%d" pos_lnum (pos_cnum - pos_bol)
+(* Extract the location name from the pattern *)
+let location_name = Str.regexp {|.* \(.*\)|}
+
module type Encoding = sig
val lexeme : Sedlexing.lexbuf -> string
end
@@ -140,12 +144,28 @@ let rec token : (module Encoding.S) -> Sedlexing.lexbuf -> token =
fun (module E : Encoding) lexbuf ->
match%sedlex lexbuf with
| 0Xfeff ->
+ Sedlexing.start lexbuf;
(* Ignore the BOM *)
token (module E) lexbuf
| '#', Star space, location ->
+ let _start_pos, end_pos = Sedlexing.lexing_positions lexbuf in
+
+ (* Extract the location name *)
let ident = E.lexeme lexbuf in
+ let () =
+ match Str.string_match location_name ident 0 with
+ | false -> ()
+ | true -> Sedlexing.set_filename lexbuf (Str.matched_group 1 ident)
+ in
+
+ (* Restart the line number (new location here) *)
+ Sedlexing.start lexbuf;
+ Sedlexing.set_position lexbuf { end_pos with Lexing.pos_lnum = 1 };
LOCATION_START ident
+ | '-', Plus '-', Star (Sub (any, ('\r' | '\n'))) ->
+ Bucket.remove is_expression lexbuf;
+ LOCATION_END
| Plus digit -> INTEGER (E.lexeme lexbuf)
| '+' -> PLUS
| '-' -> MINUS
@@ -158,7 +178,6 @@ let rec token : (module Encoding.S) -> Sedlexing.lexbuf -> token =
(* We are leaving the block, the comment will be handled again *)
decr_level lexbuf;
COLUMN
- | '-', Plus '-', Star (Sub (any, ('\r' | '\n'))) -> LOCATION_END
| '[' -> L_BRACKET
| ']' -> R_BRACKET
| '(' ->
@@ -170,9 +189,6 @@ let rec token : (module Encoding.S) -> Sedlexing.lexbuf -> token =
| '<' -> LT
| '>' -> GT
| coma -> COMA
- | eof ->
- Bucket.remove is_expression lexbuf;
- EOF
| '=' ->
incr_level lexbuf;
EQUAL
@@ -191,6 +207,7 @@ let rec token : (module Encoding.S) -> Sedlexing.lexbuf -> token =
| '\'' -> LITERAL (wait_balance read_quoted_string lexbuf)
| '"' -> LITERAL (wait_balance read_dquoted_string lexbuf)
| '{' -> LITERAL (wait_balance (read_long_string 0) lexbuf)
+ | eof -> raise EOF
| _ ->
let position = fst @@ Sedlexing.lexing_positions lexbuf in
let tok = E.lexeme lexbuf in
@@ -201,6 +218,27 @@ let rec token : (module Encoding.S) -> Sedlexing.lexbuf -> token =
raise @@ LexError (position, msg)
+let rec discard lexbuf =
+ match%sedlex lexbuf with
+ | '\'' ->
+ ignore (wait_balance read_quoted_string lexbuf);
+ discard lexbuf
+ | '"' ->
+ ignore (wait_balance read_dquoted_string lexbuf);
+ discard lexbuf
+ | '{' ->
+ ignore (wait_balance (read_long_string 0) lexbuf);
+ discard lexbuf
+ | eof -> raise EOF
+ | '-', Plus '-', Star (Sub (any, ('\r' | '\n'))) ->
+ Bucket.remove is_expression lexbuf;
+ ()
+ | '!' ->
+ ignore @@ skip_comment lexbuf;
+ discard lexbuf
+ | any -> discard lexbuf
+ | _ -> raise EOF
+
(** Tokenizer for menhir *)
let lexer :
(module Encoding) ->
diff --git a/lib/lexer.mli b/lib/lexer.mli
index 41c7c11..0a8ec12 100644
--- a/lib/lexer.mli
+++ b/lib/lexer.mli
@@ -2,9 +2,13 @@ module type Encoding = sig
val lexeme : Sedlexing.lexbuf -> string
end
+exception EOF
+
val lexer :
(module Encoding) ->
Sedlexing.lexbuf ->
unit ->
Tokens.token * Lexing.position * Lexing.position
(** Apply the lexer to the source *)
+
+val discard : Sedlexing.lexbuf -> unit