aboutsummaryrefslogtreecommitdiff
path: root/lib/lexer.ml
diff options
context:
space:
mode:
Diffstat (limited to 'lib/lexer.ml')
-rw-r--r--lib/lexer.ml73
1 files changed, 17 insertions, 56 deletions
diff --git a/lib/lexer.ml b/lib/lexer.ml
index 7014d85..c643577 100644
--- a/lib/lexer.ml
+++ b/lib/lexer.ml
@@ -14,49 +14,16 @@ let pp_pos out { Lexing.pos_lnum; pos_cnum; pos_bol; _ } =
(* Extract the location name from the pattern *)
let location_name = Str.regexp {|.* \(.*\)|}
-module type Encoding = sig
- val lexeme : Sedlexing.lexbuf -> string
-end
-
-(* The comment system is terrible. The same symbol can be used for :
- - starting a comment
- - inequality operation
- In order to manage this, I try to identify the context in a very basic way,
- using a flag True False for determining the token to send.
-*)
-module Bucket = Ephemeron.K1.Bucket
-
-type bucket = (Sedlexing.lexbuf, int) Bucket.t
-
-let is_expression : bucket = Bucket.make ()
-
-let incr_level lexbuf =
- match Bucket.find is_expression lexbuf with
- | None -> Bucket.add is_expression lexbuf 1
- | Some v -> Bucket.add is_expression lexbuf (v + 1)
-
-let decr_level lexbuf =
- match Bucket.find is_expression lexbuf with
- | None -> ()
- | Some v ->
- if v > 1 then Bucket.add is_expression lexbuf (v - 1)
- else Bucket.remove is_expression lexbuf
-
-let build_ident lexbuf =
- let id =
- Sedlexing.lexeme lexbuf |> Idents.of_uchars |> String.uppercase_ascii
- in
+let build_ident buffer =
+ let id = Lexbuf.content buffer |> String.uppercase_ascii in
try
let value = Hashtbl.find Idents.keyword_table id in
- let _ = match value with IF | ELIF -> incr_level lexbuf | _ -> () in
+ let _ =
+ match value with IF | ELIF -> Lexbuf.incr_level buffer | _ -> ()
+ in
value
with Not_found -> IDENT id
-let incr_level lexbuf =
- match Bucket.find is_expression lexbuf with
- | None -> Bucket.add is_expression lexbuf 1
- | Some v -> Bucket.add is_expression lexbuf (v + 1)
-
let wait_balance : (Buffer.t -> Lexbuf.t -> 'a) -> Lexbuf.t -> 'a =
fun rule lexbuf ->
let _, position = Lexbuf.positions lexbuf in
@@ -154,12 +121,9 @@ let rec token : Lexbuf.t -> token =
let lexbuf = Lexbuf.buffer buffer in
match%sedlex lexbuf with
| 0Xfeff ->
- Lexbuf.start buffer;
(* Ignore the BOM *)
token buffer
| '#', Star space, location ->
- let _start_pos, end_pos = Sedlexing.lexing_positions lexbuf in
-
(* Extract the location name *)
let ident = Lexbuf.content buffer in
let () =
@@ -169,12 +133,11 @@ let rec token : Lexbuf.t -> token =
in
(* Restart the line number (new location here) *)
- Sedlexing.start lexbuf;
- Sedlexing.set_position lexbuf { end_pos with Lexing.pos_lnum = 1 };
+ Lexbuf.start buffer;
LOCATION_START ident
| '-', Plus '-', Star (Sub (any, ('\r' | '\n'))) ->
- Bucket.remove is_expression lexbuf;
+ Lexbuf.reset_level buffer;
LOCATION_END
| Plus digit -> INTEGER (Lexbuf.content buffer)
| '+' -> PLUS
@@ -186,33 +149,31 @@ let rec token : Lexbuf.t -> token =
| '*' -> STAR
| ':' ->
(* We are leaving the block, the comment will be handled again *)
- decr_level lexbuf;
+ Lexbuf.decr_level buffer;
+
COLUMN
| '[' -> L_BRACKET
| ']' -> R_BRACKET
| '(' ->
- incr_level lexbuf;
+ Lexbuf.incr_level buffer;
L_PAREN
| ')' ->
- decr_level lexbuf;
+ Lexbuf.decr_level buffer;
R_PAREN
| '<' -> LT
| '>' -> GT
| coma -> COMA
| '=' ->
- incr_level lexbuf;
+ Lexbuf.incr_level buffer;
EQUAL
- | ident -> build_ident lexbuf
+ | ident -> build_ident buffer
| eol ->
- Bucket.add is_expression lexbuf 0;
+ Lexbuf.reset_level buffer;
EOL
| '&' ->
- Bucket.add is_expression lexbuf 0;
+ Lexbuf.reset_level buffer;
AMPERSAND
- | '!' -> (
- match Bucket.find is_expression lexbuf with
- | Some i when i <> 0 -> EXCLAMATION
- | _ -> skip_comment buffer)
+ | '!' -> if Lexbuf.level buffer > 0 then EXCLAMATION else skip_comment buffer
| spaces -> token buffer
| '\'' -> LITERAL (wait_balance read_quoted_string buffer)
| '"' -> LITERAL (wait_balance read_dquoted_string buffer)
@@ -242,7 +203,7 @@ let rec discard buffer =
discard buffer
| eof -> raise EOF
| '-', Plus '-', Star (Sub (any, ('\r' | '\n'))) ->
- Bucket.remove is_expression lexbuf;
+ Lexbuf.reset_level buffer;
()
| '!' ->
ignore @@ skip_comment buffer;