aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bin/qsp_parser.ml23
-rw-r--r--lib/analyzer.ml15
-rw-r--r--lib/analyzer.mli1
-rw-r--r--lib/interpreter.ml32
-rw-r--r--lib/lexbuf.ml14
-rw-r--r--lib/lexbuf.mli8
-rw-r--r--lib/lexer.ml119
-rw-r--r--lib/lexer.mli10
-rw-r--r--test/qsp_parser_test.ml7
9 files changed, 112 insertions, 117 deletions
diff --git a/bin/qsp_parser.ml b/bin/qsp_parser.ml
index 1d846e0..3980ad5 100644
--- a/bin/qsp_parser.ml
+++ b/bin/qsp_parser.ml
@@ -41,12 +41,9 @@ let filter_report : filters -> Report.t list -> Report.t -> Report.t list =
(** Read the source file until getting a report (the whole location has been
read properly), or until the first syntax error.
*)
-let parse_location :
- (module Qparser.Lexer.Encoding) -> Sedlexing.lexbuf -> filters -> unit =
- fun encoding lexbuf filters ->
- let result =
- Qparser.Analyzer.parse (module Qsp_syntax.Type_of) encoding lexbuf
- in
+let parse_location : Sedlexing.lexbuf -> filters -> unit =
+ fun lexbuf filters ->
+ let result = Qparser.Analyzer.parse (module Qsp_syntax.Type_of) lexbuf in
let result =
Result.map
@@ -78,23 +75,17 @@ let () =
let ic = Stdlib.open_in file_name in
(*let lexer = Lexing.from_channel ~with_positions:true ic in*)
- let lexer, mod_ =
+ let lexer =
match Filename.extension file_name with
- | ".qsrc" ->
- ( Sedlexing.Utf8.from_channel ic,
- (module Sedlexing.Utf8 : Qparser.Lexer.Encoding) )
- | ".txt" ->
- ( Sedlexing.Utf16.from_channel ic (Some Little_endian),
- (module struct
- let lexeme lexbuf = Sedlexing.Utf8.lexeme lexbuf
- end : Qparser.Lexer.Encoding) )
+ | ".qsrc" -> Sedlexing.Utf8.from_channel ic
+ | ".txt" -> Sedlexing.Utf16.from_channel ic (Some Little_endian)
| _ -> raise (Failure "unknown extension")
in
let () =
try
while true do
- parse_location mod_ lexer filters
+ parse_location lexer filters
done
with Qparser.Lexer.EOF -> ()
in
diff --git a/lib/analyzer.ml b/lib/analyzer.ml
index a6f5e51..547b3da 100644
--- a/lib/analyzer.ml
+++ b/lib/analyzer.ml
@@ -5,18 +5,19 @@
*)
let parse :
(module Qsp_syntax.S.Analyzer with type Location.repr = 'a) ->
- (module Encoding.S) ->
Sedlexing.lexbuf ->
('a, Qsp_syntax.Report.t) Result.t =
- fun (type a) (module S : Qsp_syntax.S.Analyzer with type Location.repr = a)
- (module E : Encoding.S) ->
+ fun (type a) (module S : Qsp_syntax.S.Analyzer with type Location.repr = a) ->
let module Parser = Parser.Make (S) in
let module IncrementalParser =
Interpreter.Interpreter (Parser.MenhirInterpreter) in
fun lexbuf ->
- IncrementalParser.of_lexbuf lexbuf
- (Lexer.lexer (module E))
- Parser.Incremental.main
+ let l = Lexbuf.from_lexbuf lexbuf in
+ let lexer = Lexbuf.tokenize Lexer.token l in
+
+ let init = Parser.Incremental.main (fst (Lexbuf.positions l)) in
+
+ IncrementalParser.of_lexbuf lexer l init
|> Result.map_error (fun e ->
let message =
match e.IncrementalParser.code with
@@ -38,5 +39,5 @@ let parse :
(* Discard the remaining file to read. The parser is now in a blank
state, it does not make sense to keep feeding it with the new
tokens. *)
- Lexer.discard lexbuf;
+ Lexer.discard l;
report)
diff --git a/lib/analyzer.mli b/lib/analyzer.mli
index e7efdb0..d79ea76 100644
--- a/lib/analyzer.mli
+++ b/lib/analyzer.mli
@@ -1,6 +1,5 @@
val parse :
(module Qsp_syntax.S.Analyzer with type Location.repr = 'a) ->
- (module Encoding.S) ->
Sedlexing.lexbuf ->
('a, Qsp_syntax.Report.t) Result.t
(** Read the source and build a analyzis over it.
diff --git a/lib/interpreter.ml b/lib/interpreter.ml
index d3b24c9..b719600 100644
--- a/lib/interpreter.ml
+++ b/lib/interpreter.ml
@@ -27,47 +27,41 @@ struct
let range_message (start_pos, end_pos) : error_code -> error =
fun code -> { code; start_pos; end_pos }
- let get_parse_error lexbuf env : error =
+ let get_parse_error : Lexbuf.t -> 'a MI.env -> error =
+ fun buffer env ->
match MI.stack env with
| (lazy Nil) ->
(* The parser is in its initial state. We should not get an
error here *)
- let positions = Sedlexing.lexing_positions lexbuf in
+ let positions = Lexbuf.positions buffer in
+
range_message positions UnrecoverableError
| (lazy (Cons (MI.Element (state, _, start_pos, end_pos), _))) ->
range_message (start_pos, end_pos) (MenhirCode (MI.number state))
let rec _parse :
- (Sedlexing.lexbuf -> unit -> step) ->
- Sedlexing.lexbuf ->
- 'a MI.checkpoint ->
- ('a, error) Result.t =
- fun get_token (lexbuf : Sedlexing.lexbuf) (checkpoint : 'a MI.checkpoint) ->
+ Lexbuf.t -> (unit -> step) -> 'a MI.checkpoint -> ('a, error) Result.t =
+ fun buffer get_token (checkpoint : 'a MI.checkpoint) ->
match checkpoint with
| MI.InputNeeded _env ->
- let token, startp, endp = get_token lexbuf () in
+ let token, startp, endp = get_token () in
let checkpoint = MI.offer checkpoint (token, startp, endp) in
- _parse get_token lexbuf checkpoint
+ _parse buffer get_token checkpoint
| MI.Shifting _ | MI.AboutToReduce _ ->
let checkpoint = MI.resume checkpoint in
- _parse get_token lexbuf checkpoint
+ _parse buffer get_token checkpoint
| MI.HandlingError _env ->
- let err = get_parse_error lexbuf _env in
+ let err = get_parse_error buffer _env in
Error err
| MI.Accepted v -> Ok v
| MI.Rejected ->
- let positions = Sedlexing.lexing_positions lexbuf in
+ let positions = Lexbuf.positions buffer in
let err = range_message positions InvalidSyntax in
Error err
type 'a builder = Lexing.position -> 'a MI.checkpoint
let of_lexbuf :
- Sedlexing.lexbuf ->
- (Sedlexing.lexbuf -> unit -> step) ->
- 'a builder ->
- ('a, error) result =
- fun lexbuf lexer f ->
- let init = f (fst (Sedlexing.lexing_positions lexbuf)) in
- _parse lexer lexbuf init
+ (unit -> step) -> Lexbuf.t -> 'a MI.checkpoint -> ('a, error) result =
+ fun lexer buffer init -> _parse buffer lexer init
end
diff --git a/lib/lexbuf.ml b/lib/lexbuf.ml
new file mode 100644
index 0000000..2ed9099
--- /dev/null
+++ b/lib/lexbuf.ml
@@ -0,0 +1,14 @@
+type t = Sedlexing.lexbuf
+
+let buffer : t -> Sedlexing.lexbuf = fun t -> t
+let start : t -> unit = fun t -> Sedlexing.start t
+
+let positions : t -> Lexing.position * Lexing.position =
+ fun t -> Sedlexing.lexing_positions t
+
+let content : t -> string = fun t -> Sedlexing.Utf8.lexeme t
+let from_lexbuf : Sedlexing.lexbuf -> t = fun t -> t
+
+let tokenize : (t -> 'a) -> t -> unit -> 'a * Lexing.position * Lexing.position
+ =
+ fun f lexbuf -> Sedlexing.with_tokenizer f lexbuf
diff --git a/lib/lexbuf.mli b/lib/lexbuf.mli
new file mode 100644
index 0000000..cf93c7e
--- /dev/null
+++ b/lib/lexbuf.mli
@@ -0,0 +1,8 @@
+type t
+
+val start : t -> unit
+val buffer : t -> Sedlexing.lexbuf
+val positions : t -> Lexing.position * Lexing.position
+val content : t -> string
+val from_lexbuf : Sedlexing.lexbuf -> t
+val tokenize : (t -> 'a) -> t -> unit -> 'a * Lexing.position * Lexing.position
diff --git a/lib/lexer.ml b/lib/lexer.ml
index 1a2d788..a91bfdb 100644
--- a/lib/lexer.ml
+++ b/lib/lexer.ml
@@ -57,12 +57,14 @@ let incr_level lexbuf =
| None -> Bucket.add is_expression lexbuf 1
| Some v -> Bucket.add is_expression lexbuf (v + 1)
-let wait_balance rule lexbuf =
- try[@warning "-52"] rule (Buffer.create 17) lexbuf
+let wait_balance : (Buffer.t -> Lexbuf.t -> 'a) -> Lexbuf.t -> 'a =
+ fun rule lexbuf ->
+ try[@warning "-52"]
+ let token = rule (Buffer.create 256) lexbuf in
+ token
with Failure "lexing: empty token" ->
- let position, _ = Sedlexing.lexing_positions lexbuf in
- let line = position.Lexing.pos_lnum
- and content = Sedlexing.lexeme lexbuf |> Idents.of_uchars in
+ let position, _ = Lexbuf.positions lexbuf in
+ let line = position.Lexing.pos_lnum and content = Lexbuf.content lexbuf in
(raise (UnclosedQuote { line; content }) [@warning "+52"])
let space = [%sedlex.regexp? ' ' | '\t']
@@ -76,82 +78,87 @@ let location_ident = [%sedlex.regexp? letters | digit]
let location_prefix = [%sedlex.regexp? '!' | '$' | '#' | '^']
let location = [%sedlex.regexp? Opt location_prefix, Plus location_ident]
-let rec read_long_string level buf lexbuf =
+let rec read_long_string level buf buffer =
+ let lexbuf = Lexbuf.buffer buffer in
match%sedlex lexbuf with
| '{' ->
Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf);
- read_long_string (level + 1) buf lexbuf
+ read_long_string (level + 1) buf buffer
| '}' -> (
match level with
| 0 -> Buffer.contents buf
| _ ->
Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf);
- read_long_string (level - 1) buf lexbuf)
+ read_long_string (level - 1) buf buffer)
| eol ->
- Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf);
- read_long_string level buf lexbuf
+ Buffer.add_string buf (Lexbuf.content buffer);
+ read_long_string level buf buffer
| any ->
- Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf);
- read_long_string level buf lexbuf
+ Buffer.add_string buf (Lexbuf.content buffer);
+ read_long_string level buf buffer
| _ -> raise Not_found
-let rec read_dquoted_string buf lexbuf =
+let rec read_dquoted_string buf buffer =
+ let lexbuf = Lexbuf.buffer buffer in
match%sedlex lexbuf with
| "\"\"" ->
Buffer.add_char buf '"';
- read_dquoted_string buf lexbuf
+ read_dquoted_string buf buffer
| '"' -> Buffer.contents buf
| any ->
Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf);
- read_dquoted_string buf lexbuf
+ read_dquoted_string buf buffer
| _ -> raise Not_found
-let rec read_quoted_string buf lexbuf =
+let rec read_quoted_string buf buffer =
+ let lexbuf = Lexbuf.buffer buffer in
match%sedlex lexbuf with
| "''" ->
Buffer.add_char buf '\'';
- read_quoted_string buf lexbuf
+ read_quoted_string buf buffer
| '\'' -> Buffer.contents buf
| eol ->
- Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf);
- read_quoted_string buf lexbuf
+ Buffer.add_string buf (Lexbuf.content buffer);
+ read_quoted_string buf buffer
| any ->
- Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf);
- read_quoted_string buf lexbuf
+ Buffer.add_string buf (Lexbuf.content buffer);
+ read_quoted_string buf buffer
| _ -> raise Not_found
-let rec skip_comment lexbuf =
+let rec skip_comment buffer =
+ let lexbuf = Lexbuf.buffer buffer in
match%sedlex lexbuf with
| '{' ->
- let _ = wait_balance (read_long_string 0) lexbuf in
- skip_comment lexbuf
+ let _ = wait_balance (read_long_string 0) buffer in
+ skip_comment buffer
| '\'' ->
- let _ = wait_balance read_quoted_string lexbuf in
- skip_comment lexbuf
+ let _ = wait_balance read_quoted_string buffer in
+ skip_comment buffer
| '"' ->
- let _ = wait_balance read_dquoted_string lexbuf in
- skip_comment lexbuf
+ let _ = wait_balance read_dquoted_string buffer in
+ skip_comment buffer
| eol ->
(* Ugly hack used in order to put the eol in the front of the next
parsing. *)
Sedlexing.rollback lexbuf;
COMMENT
- | any -> skip_comment lexbuf
+ | any -> skip_comment buffer
| _ -> raise Not_found
(** Main lexer *)
-let rec token : (module Encoding.S) -> Sedlexing.lexbuf -> token =
- fun (module E : Encoding) lexbuf ->
+let rec token : Lexbuf.t -> token =
+ fun buffer ->
+ let lexbuf = Lexbuf.buffer buffer in
match%sedlex lexbuf with
| 0Xfeff ->
- Sedlexing.start lexbuf;
+ Lexbuf.start buffer;
(* Ignore the BOM *)
- token (module E) lexbuf
+ token buffer
| '#', Star space, location ->
let _start_pos, end_pos = Sedlexing.lexing_positions lexbuf in
(* Extract the location name *)
- let ident = E.lexeme lexbuf in
+ let ident = Lexbuf.content buffer in
let () =
match Str.string_match location_name ident 0 with
| false -> ()
@@ -166,7 +173,7 @@ let rec token : (module Encoding.S) -> Sedlexing.lexbuf -> token =
| '-', Plus '-', Star (Sub (any, ('\r' | '\n'))) ->
Bucket.remove is_expression lexbuf;
LOCATION_END
- | Plus digit -> INTEGER (E.lexeme lexbuf)
+ | Plus digit -> INTEGER (Lexbuf.content buffer)
| '+' -> PLUS
| '-' -> MINUS
| "+=" -> INCR
@@ -202,15 +209,15 @@ let rec token : (module Encoding.S) -> Sedlexing.lexbuf -> token =
| '!' -> (
match Bucket.find is_expression lexbuf with
| Some i when i <> 0 -> EXCLAMATION
- | _ -> skip_comment lexbuf)
- | spaces -> token (module E) lexbuf
- | '\'' -> LITERAL (wait_balance read_quoted_string lexbuf)
- | '"' -> LITERAL (wait_balance read_dquoted_string lexbuf)
- | '{' -> LITERAL (wait_balance (read_long_string 0) lexbuf)
+ | _ -> skip_comment buffer)
+ | spaces -> token buffer
+ | '\'' -> LITERAL (wait_balance read_quoted_string buffer)
+ | '"' -> LITERAL (wait_balance read_dquoted_string buffer)
+ | '{' -> LITERAL (wait_balance (read_long_string 0) buffer)
| eof -> raise EOF
| _ ->
let position = fst @@ Sedlexing.lexing_positions lexbuf in
- let tok = E.lexeme lexbuf in
+ let tok = Lexbuf.content buffer in
let msg =
Format.asprintf "Unexpected character %S at %a" tok pp_pos position
@@ -218,32 +225,24 @@ let rec token : (module Encoding.S) -> Sedlexing.lexbuf -> token =
raise @@ LexError (position, msg)
-let rec discard lexbuf =
+let rec discard buffer =
+ let lexbuf = Lexbuf.buffer buffer in
match%sedlex lexbuf with
| '\'' ->
- ignore (wait_balance read_quoted_string lexbuf);
- discard lexbuf
+ ignore (wait_balance read_quoted_string buffer);
+ discard buffer
| '"' ->
- ignore (wait_balance read_dquoted_string lexbuf);
- discard lexbuf
+ ignore (wait_balance read_dquoted_string buffer);
+ discard buffer
| '{' ->
- ignore (wait_balance (read_long_string 0) lexbuf);
- discard lexbuf
+ ignore (wait_balance (read_long_string 0) buffer);
+ discard buffer
| eof -> raise EOF
| '-', Plus '-', Star (Sub (any, ('\r' | '\n'))) ->
Bucket.remove is_expression lexbuf;
()
| '!' ->
- ignore @@ skip_comment lexbuf;
- discard lexbuf
- | any -> discard lexbuf
+ ignore @@ skip_comment buffer;
+ discard buffer
+ | any -> discard buffer
| _ -> raise EOF
-
-(** Tokenizer for menhir *)
-let lexer :
- (module Encoding) ->
- Sedlexing.lexbuf ->
- unit ->
- token * Lexing.position * Lexing.position =
- fun (module E : Encoding.S) lexbuf ->
- Sedlexing.with_tokenizer (token (module E)) lexbuf
diff --git a/lib/lexer.mli b/lib/lexer.mli
index 0a8ec12..65e9fa0 100644
--- a/lib/lexer.mli
+++ b/lib/lexer.mli
@@ -4,11 +4,5 @@ end
exception EOF
-val lexer :
- (module Encoding) ->
- Sedlexing.lexbuf ->
- unit ->
- Tokens.token * Lexing.position * Lexing.position
-(** Apply the lexer to the source *)
-
-val discard : Sedlexing.lexbuf -> unit
+val token : Lexbuf.t -> Tokens.token
+val discard : Lexbuf.t -> unit
diff --git a/test/qsp_parser_test.ml b/test/qsp_parser_test.ml
index 3e4a96b..6ea9eab 100644
--- a/test/qsp_parser_test.ml
+++ b/test/qsp_parser_test.ml
@@ -12,12 +12,7 @@ let parse : string -> T.pos location =
fun content ->
let lexing = Sedlexing.Latin1.from_string content in
- match
- Qparser.Analyzer.parse
- (module Qsp_syntax.Tree)
- (module Sedlexing.Utf8)
- lexing
- with
+ match Qparser.Analyzer.parse (module Qsp_syntax.Tree) lexing with
| Ok e -> e
| Error e ->
let msg = Format.asprintf "%a" Qsp_syntax.Report.pp e in