aboutsummaryrefslogtreecommitdiff
path: root/lib/lexer.ml
diff options
context:
space:
mode:
Diffstat (limited to 'lib/lexer.ml')
-rw-r--r--lib/lexer.ml119
1 files changed, 59 insertions, 60 deletions
diff --git a/lib/lexer.ml b/lib/lexer.ml
index 1a2d788..a91bfdb 100644
--- a/lib/lexer.ml
+++ b/lib/lexer.ml
@@ -57,12 +57,14 @@ let incr_level lexbuf =
| None -> Bucket.add is_expression lexbuf 1
| Some v -> Bucket.add is_expression lexbuf (v + 1)
-let wait_balance rule lexbuf =
- try[@warning "-52"] rule (Buffer.create 17) lexbuf
+let wait_balance : (Buffer.t -> Lexbuf.t -> 'a) -> Lexbuf.t -> 'a =
+ fun rule lexbuf ->
+ try[@warning "-52"]
+ let token = rule (Buffer.create 256) lexbuf in
+ token
with Failure "lexing: empty token" ->
- let position, _ = Sedlexing.lexing_positions lexbuf in
- let line = position.Lexing.pos_lnum
- and content = Sedlexing.lexeme lexbuf |> Idents.of_uchars in
+ let position, _ = Lexbuf.positions lexbuf in
+ let line = position.Lexing.pos_lnum and content = Lexbuf.content lexbuf in
(raise (UnclosedQuote { line; content }) [@warning "+52"])
let space = [%sedlex.regexp? ' ' | '\t']
@@ -76,82 +78,87 @@ let location_ident = [%sedlex.regexp? letters | digit]
let location_prefix = [%sedlex.regexp? '!' | '$' | '#' | '^']
let location = [%sedlex.regexp? Opt location_prefix, Plus location_ident]
-let rec read_long_string level buf lexbuf =
+let rec read_long_string level buf buffer =
+ let lexbuf = Lexbuf.buffer buffer in
match%sedlex lexbuf with
| '{' ->
Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf);
- read_long_string (level + 1) buf lexbuf
+ read_long_string (level + 1) buf buffer
| '}' -> (
match level with
| 0 -> Buffer.contents buf
| _ ->
Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf);
- read_long_string (level - 1) buf lexbuf)
+ read_long_string (level - 1) buf buffer)
| eol ->
- Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf);
- read_long_string level buf lexbuf
+ Buffer.add_string buf (Lexbuf.content buffer);
+ read_long_string level buf buffer
| any ->
- Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf);
- read_long_string level buf lexbuf
+ Buffer.add_string buf (Lexbuf.content buffer);
+ read_long_string level buf buffer
| _ -> raise Not_found
-let rec read_dquoted_string buf lexbuf =
+let rec read_dquoted_string buf buffer =
+ let lexbuf = Lexbuf.buffer buffer in
match%sedlex lexbuf with
| "\"\"" ->
Buffer.add_char buf '"';
- read_dquoted_string buf lexbuf
+ read_dquoted_string buf buffer
| '"' -> Buffer.contents buf
| any ->
Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf);
- read_dquoted_string buf lexbuf
+ read_dquoted_string buf buffer
| _ -> raise Not_found
-let rec read_quoted_string buf lexbuf =
+let rec read_quoted_string buf buffer =
+ let lexbuf = Lexbuf.buffer buffer in
match%sedlex lexbuf with
| "''" ->
Buffer.add_char buf '\'';
- read_quoted_string buf lexbuf
+ read_quoted_string buf buffer
| '\'' -> Buffer.contents buf
| eol ->
- Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf);
- read_quoted_string buf lexbuf
+ Buffer.add_string buf (Lexbuf.content buffer);
+ read_quoted_string buf buffer
| any ->
- Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf);
- read_quoted_string buf lexbuf
+ Buffer.add_string buf (Lexbuf.content buffer);
+ read_quoted_string buf buffer
| _ -> raise Not_found
-let rec skip_comment lexbuf =
+let rec skip_comment buffer =
+ let lexbuf = Lexbuf.buffer buffer in
match%sedlex lexbuf with
| '{' ->
- let _ = wait_balance (read_long_string 0) lexbuf in
- skip_comment lexbuf
+ let _ = wait_balance (read_long_string 0) buffer in
+ skip_comment buffer
| '\'' ->
- let _ = wait_balance read_quoted_string lexbuf in
- skip_comment lexbuf
+ let _ = wait_balance read_quoted_string buffer in
+ skip_comment buffer
| '"' ->
- let _ = wait_balance read_dquoted_string lexbuf in
- skip_comment lexbuf
+ let _ = wait_balance read_dquoted_string buffer in
+ skip_comment buffer
| eol ->
(* Ugly hack used in order to put the eol in the front of the next
parsing. *)
Sedlexing.rollback lexbuf;
COMMENT
- | any -> skip_comment lexbuf
+ | any -> skip_comment buffer
| _ -> raise Not_found
(** Main lexer *)
-let rec token : (module Encoding.S) -> Sedlexing.lexbuf -> token =
- fun (module E : Encoding) lexbuf ->
+let rec token : Lexbuf.t -> token =
+ fun buffer ->
+ let lexbuf = Lexbuf.buffer buffer in
match%sedlex lexbuf with
| 0Xfeff ->
- Sedlexing.start lexbuf;
+ Lexbuf.start buffer;
(* Ignore the BOM *)
- token (module E) lexbuf
+ token buffer
| '#', Star space, location ->
let _start_pos, end_pos = Sedlexing.lexing_positions lexbuf in
(* Extract the location name *)
- let ident = E.lexeme lexbuf in
+ let ident = Lexbuf.content buffer in
let () =
match Str.string_match location_name ident 0 with
| false -> ()
@@ -166,7 +173,7 @@ let rec token : (module Encoding.S) -> Sedlexing.lexbuf -> token =
| '-', Plus '-', Star (Sub (any, ('\r' | '\n'))) ->
Bucket.remove is_expression lexbuf;
LOCATION_END
- | Plus digit -> INTEGER (E.lexeme lexbuf)
+ | Plus digit -> INTEGER (Lexbuf.content buffer)
| '+' -> PLUS
| '-' -> MINUS
| "+=" -> INCR
@@ -202,15 +209,15 @@ let rec token : (module Encoding.S) -> Sedlexing.lexbuf -> token =
| '!' -> (
match Bucket.find is_expression lexbuf with
| Some i when i <> 0 -> EXCLAMATION
- | _ -> skip_comment lexbuf)
- | spaces -> token (module E) lexbuf
- | '\'' -> LITERAL (wait_balance read_quoted_string lexbuf)
- | '"' -> LITERAL (wait_balance read_dquoted_string lexbuf)
- | '{' -> LITERAL (wait_balance (read_long_string 0) lexbuf)
+ | _ -> skip_comment buffer)
+ | spaces -> token buffer
+ | '\'' -> LITERAL (wait_balance read_quoted_string buffer)
+ | '"' -> LITERAL (wait_balance read_dquoted_string buffer)
+ | '{' -> LITERAL (wait_balance (read_long_string 0) buffer)
| eof -> raise EOF
| _ ->
let position = fst @@ Sedlexing.lexing_positions lexbuf in
- let tok = E.lexeme lexbuf in
+ let tok = Lexbuf.content buffer in
let msg =
Format.asprintf "Unexpected character %S at %a" tok pp_pos position
@@ -218,32 +225,24 @@ let rec token : (module Encoding.S) -> Sedlexing.lexbuf -> token =
raise @@ LexError (position, msg)
-let rec discard lexbuf =
+let rec discard buffer =
+ let lexbuf = Lexbuf.buffer buffer in
match%sedlex lexbuf with
| '\'' ->
- ignore (wait_balance read_quoted_string lexbuf);
- discard lexbuf
+ ignore (wait_balance read_quoted_string buffer);
+ discard buffer
| '"' ->
- ignore (wait_balance read_dquoted_string lexbuf);
- discard lexbuf
+ ignore (wait_balance read_dquoted_string buffer);
+ discard buffer
| '{' ->
- ignore (wait_balance (read_long_string 0) lexbuf);
- discard lexbuf
+ ignore (wait_balance (read_long_string 0) buffer);
+ discard buffer
| eof -> raise EOF
| '-', Plus '-', Star (Sub (any, ('\r' | '\n'))) ->
Bucket.remove is_expression lexbuf;
()
| '!' ->
- ignore @@ skip_comment lexbuf;
- discard lexbuf
- | any -> discard lexbuf
+ ignore @@ skip_comment buffer;
+ discard buffer
+ | any -> discard buffer
| _ -> raise EOF
-
-(** Tokenizer for menhir *)
-let lexer :
- (module Encoding) ->
- Sedlexing.lexbuf ->
- unit ->
- token * Lexing.position * Lexing.position =
- fun (module E : Encoding.S) lexbuf ->
- Sedlexing.with_tokenizer (token (module E)) lexbuf