diff options
author | Chimrod <> | 2023-10-06 08:35:56 +0200 |
---|---|---|
committer | Chimrod <> | 2023-10-06 08:35:56 +0200 |
commit | 97ab5c9a21166f0bffee482210d69877fd6809fa (patch) | |
tree | d1fa44000fa07631edc8924a90020f2cfe637263 /lib/lexer.ml | |
parent | 40f4dbe7844725e0ab07f03f25c35f55b4699b46 (diff) |
Moved qparser and syntax in the library folder
Diffstat (limited to 'lib/lexer.ml')
-rw-r--r-- | lib/lexer.ml | 212 |
1 files changed, 0 insertions, 212 deletions
diff --git a/lib/lexer.ml b/lib/lexer.ml deleted file mode 100644 index c643577..0000000 --- a/lib/lexer.ml +++ /dev/null @@ -1,212 +0,0 @@ -(** - Lexer using sedlex - *) - -open Tokens - -exception UnclosedQuote of { content : string; line : int } -exception LexError of Lexing.position * string -exception EOF - -let pp_pos out { Lexing.pos_lnum; pos_cnum; pos_bol; _ } = - Format.fprintf out "line %d:%d" pos_lnum (pos_cnum - pos_bol) - -(* Extract the location name from the pattern *) -let location_name = Str.regexp {|.* \(.*\)|} - -let build_ident buffer = - let id = Lexbuf.content buffer |> String.uppercase_ascii in - try - let value = Hashtbl.find Idents.keyword_table id in - let _ = - match value with IF | ELIF -> Lexbuf.incr_level buffer | _ -> () - in - value - with Not_found -> IDENT id - -let wait_balance : (Buffer.t -> Lexbuf.t -> 'a) -> Lexbuf.t -> 'a = - fun rule lexbuf -> - let _, position = Lexbuf.positions lexbuf in - Lexbuf.set_start_position lexbuf position; - - try[@warning "-52"] - let token = rule (Buffer.create 256) lexbuf in - token - with Failure "lexing: empty token" -> - let position, _ = Lexbuf.positions lexbuf in - let line = position.Lexing.pos_lnum and content = Lexbuf.content lexbuf in - (raise (UnclosedQuote { line; content }) [@warning "+52"]) - -let space = [%sedlex.regexp? ' ' | '\t'] -let eol = [%sedlex.regexp? '\r' | '\n' | "\r\n"] -let coma = [%sedlex.regexp? ','] -let digit = [%sedlex.regexp? '0' .. '9'] -let letters = [%sedlex.regexp? 'a' .. 'z' | 'A' .. 'Z' | '_'] -let spaces = [%sedlex.regexp? Plus space] -let ident = [%sedlex.regexp? ('$' | letters), Star (digit | letters)] -let location_ident = [%sedlex.regexp? letters | digit] -let location_prefix = [%sedlex.regexp? '!' | '$' | '#' | '^'] -let location = [%sedlex.regexp? Opt location_prefix, Plus location_ident] - -let rec read_long_string level buf buffer = - let lexbuf = Lexbuf.buffer buffer in - match%sedlex lexbuf with - | '{' -> - Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf); - read_long_string (level + 1) buf buffer - | '}' -> ( - match level with - | 0 -> Buffer.contents buf - | _ -> - Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf); - read_long_string (level - 1) buf buffer) - | eol -> - Buffer.add_string buf (Lexbuf.content buffer); - read_long_string level buf buffer - | any -> - Buffer.add_string buf (Lexbuf.content buffer); - read_long_string level buf buffer - | _ -> raise Not_found - -let rec read_dquoted_string buf buffer = - let lexbuf = Lexbuf.buffer buffer in - match%sedlex lexbuf with - | "\"\"" -> - Buffer.add_char buf '"'; - read_dquoted_string buf buffer - | '"' -> Buffer.contents buf - | any -> - Buffer.add_string buf (Sedlexing.Utf8.lexeme lexbuf); - read_dquoted_string buf buffer - | _ -> raise Not_found - -let rec read_quoted_string buf buffer = - let lexbuf = Lexbuf.buffer buffer in - match%sedlex lexbuf with - | "''" -> - Buffer.add_char buf '\''; - read_quoted_string buf buffer - | '\'' -> Buffer.contents buf - | eol -> - Buffer.add_string buf (Lexbuf.content buffer); - read_quoted_string buf buffer - | any -> - Buffer.add_string buf (Lexbuf.content buffer); - read_quoted_string buf buffer - | _ -> raise Not_found - -let rec skip_comment buffer = - let lexbuf = Lexbuf.buffer buffer in - match%sedlex lexbuf with - | '{' -> - let _ = wait_balance (read_long_string 0) buffer in - skip_comment buffer - | '\'' -> - let _ = wait_balance read_quoted_string buffer in - skip_comment buffer - | '"' -> - let _ = wait_balance read_dquoted_string buffer in - skip_comment buffer - | eol -> - (* Ugly hack used in order to put the eol in the front of the next - parsing. *) - Sedlexing.rollback lexbuf; - COMMENT - | any -> skip_comment buffer - | _ -> raise Not_found - -(** Main lexer *) -let rec token : Lexbuf.t -> token = - fun buffer -> - let lexbuf = Lexbuf.buffer buffer in - match%sedlex lexbuf with - | 0Xfeff -> - (* Ignore the BOM *) - token buffer - | '#', Star space, location -> - (* Extract the location name *) - let ident = Lexbuf.content buffer in - let () = - match Str.string_match location_name ident 0 with - | false -> () - | true -> Sedlexing.set_filename lexbuf (Str.matched_group 1 ident) - in - - (* Restart the line number (new location here) *) - Lexbuf.start buffer; - - LOCATION_START ident - | '-', Plus '-', Star (Sub (any, ('\r' | '\n'))) -> - Lexbuf.reset_level buffer; - LOCATION_END - | Plus digit -> INTEGER (Lexbuf.content buffer) - | '+' -> PLUS - | '-' -> MINUS - | "+=" -> INCR - | "-=" -> DECR - | "*=" -> MULT_EQUAL - | '/' -> DIV - | '*' -> STAR - | ':' -> - (* We are leaving the block, the comment will be handled again *) - Lexbuf.decr_level buffer; - - COLUMN - | '[' -> L_BRACKET - | ']' -> R_BRACKET - | '(' -> - Lexbuf.incr_level buffer; - L_PAREN - | ')' -> - Lexbuf.decr_level buffer; - R_PAREN - | '<' -> LT - | '>' -> GT - | coma -> COMA - | '=' -> - Lexbuf.incr_level buffer; - EQUAL - | ident -> build_ident buffer - | eol -> - Lexbuf.reset_level buffer; - EOL - | '&' -> - Lexbuf.reset_level buffer; - AMPERSAND - | '!' -> if Lexbuf.level buffer > 0 then EXCLAMATION else skip_comment buffer - | spaces -> token buffer - | '\'' -> LITERAL (wait_balance read_quoted_string buffer) - | '"' -> LITERAL (wait_balance read_dquoted_string buffer) - | '{' -> LITERAL (wait_balance (read_long_string 0) buffer) - | eof -> raise EOF - | _ -> - let position = fst @@ Sedlexing.lexing_positions lexbuf in - let tok = Lexbuf.content buffer in - - let msg = - Format.asprintf "Unexpected character %S at %a" tok pp_pos position - in - - raise @@ LexError (position, msg) - -let rec discard buffer = - let lexbuf = Lexbuf.buffer buffer in - match%sedlex lexbuf with - | '\'' -> - ignore (wait_balance read_quoted_string buffer); - discard buffer - | '"' -> - ignore (wait_balance read_dquoted_string buffer); - discard buffer - | '{' -> - ignore (wait_balance (read_long_string 0) buffer); - discard buffer - | eof -> raise EOF - | '-', Plus '-', Star (Sub (any, ('\r' | '\n'))) -> - Lexbuf.reset_level buffer; - () - | '!' -> - ignore @@ skip_comment buffer; - discard buffer - | any -> discard buffer - | _ -> raise EOF |