diff options
author | Sébastien Dailly <sebastien@chimrod.com> | 2021-02-04 21:14:01 +0100 |
---|---|---|
committer | Sébastien Dailly <sebastien@dailly.me> | 2022-02-07 14:37:57 +0100 |
commit | 86ec559f913c389e8dc055b494630f21a45e039b (patch) | |
tree | 822341b481695c9bf8b39f8b8fcbdeef56e629d6 /css/lib | |
parent | 03f8a08fe2dde9db9fb656dbea2e5494b67236ad (diff) |
css_merge application
Diffstat (limited to 'css/lib')
-rwxr-xr-x | css/lib/dune | 9 | ||||
-rwxr-xr-x | css/lib/lex_buffer.ml | 114 | ||||
-rwxr-xr-x | css/lib/lexer.ml | 351 | ||||
-rwxr-xr-x | css/lib/location.ml | 19 | ||||
-rwxr-xr-x | css/lib/menhir_parser.mly | 196 | ||||
-rwxr-xr-x | css/lib/parser.ml | 5 | ||||
-rwxr-xr-x | css/lib/parser.mli | 8 | ||||
-rwxr-xr-x | css/lib/types.ml | 76 | ||||
-rwxr-xr-x | css/lib/types.mli | 68 |
9 files changed, 846 insertions, 0 deletions
diff --git a/css/lib/dune b/css/lib/dune new file mode 100755 index 0000000..1b84385 --- /dev/null +++ b/css/lib/dune @@ -0,0 +1,9 @@ +(menhir + (modules menhir_parser) + (flags --explain)) + +(library + (name Css) + (libraries sedlex menhirLib) + (preprocess (pps sedlex.ppx))) + diff --git a/css/lib/lex_buffer.ml b/css/lib/lex_buffer.ml new file mode 100755 index 0000000..0416e53 --- /dev/null +++ b/css/lib/lex_buffer.ml @@ -0,0 +1,114 @@ +(* Based on + * https://github.com/smolkaj/ocaml-parsing/blob/master/src/LexBuffer.ml *) + +(** A custom lexbuffer that automatically keeps track of the source location. + This module is a thin wrapper arounds sedlexing's default buffer, which does + not provide this functionality. *) + +type t = { + buf : Sedlexing.lexbuf; + mutable pos : Lexing.position; + mutable pos_mark : Lexing.position; + mutable last_char : int option; + mutable last_char_mark : int option; +} +(** the lex buffer type *) + +let of_sedlex ?(file = "<n/a>") ?pos buf = + let pos = + match pos with + | None -> + { + Lexing.pos_fname = file; + pos_lnum = 1; + (* line number *) + pos_bol = 0; + (* offset of beginning of current line *) + pos_cnum = 0 (* total offset *); + } + | Some p -> p + in + { buf; pos; pos_mark = pos; last_char = None; last_char_mark = None } + +let of_ascii_string ?pos s = of_sedlex ?pos (Sedlexing.Latin1.from_string s) + +let of_ascii_file file = + let chan = open_in file in + of_sedlex ~file (Sedlexing.Latin1.from_channel chan) + +(** The next four functions are used by sedlex internally. + See https://www.lexifi.com/sedlex/libdoc/Sedlexing.html. *) +let mark lexbuf p = + lexbuf.pos_mark <- lexbuf.pos; + lexbuf.last_char_mark <- lexbuf.last_char; + Sedlexing.mark lexbuf.buf p + +let backtrack lexbuf = + lexbuf.pos <- lexbuf.pos_mark; + lexbuf.last_char <- lexbuf.last_char_mark; + Sedlexing.backtrack lexbuf.buf + +let start lexbuf = + lexbuf.pos_mark <- lexbuf.pos; + lexbuf.last_char_mark <- lexbuf.last_char; + Sedlexing.start lexbuf.buf + +(** location of next character *) +let next_loc lexbuf = { lexbuf.pos with pos_cnum = lexbuf.pos.pos_cnum + 1 } + +let cr = Char.code '\r' + +(** next character *) +let next lexbuf = + let c = Sedlexing.next lexbuf.buf in + let pos = next_loc lexbuf in + let ch = + match c with + | None -> None + | Some c -> ( try Some (Uchar.to_char c) with Invalid_argument _ -> None ) + in + ( match ch with + | Some '\r' -> + lexbuf.pos <- + { pos with pos_bol = pos.pos_cnum - 1; pos_lnum = pos.pos_lnum + 1 } + | Some '\n' when not (lexbuf.last_char = Some cr) -> + lexbuf.pos <- + { pos with pos_bol = pos.pos_cnum - 1; pos_lnum = pos.pos_lnum + 1 } + | Some '\n' -> () + | _ -> lexbuf.pos <- pos ); + ( match c with + | None -> lexbuf.last_char <- None + | Some c -> lexbuf.last_char <- Some (Uchar.to_int c) ); + c + +let raw lexbuf = Sedlexing.lexeme lexbuf.buf + +let latin1 ?(skip = 0) ?(drop = 0) lexbuf = + let len = Sedlexing.lexeme_length lexbuf.buf - skip - drop in + Sedlexing.Latin1.sub_lexeme lexbuf.buf skip len + +let utf8 ?(skip = 0) ?(drop = 0) lexbuf = + let len = Sedlexing.lexeme_length lexbuf.buf - skip - drop in + Sedlexing.Utf8.sub_lexeme lexbuf.buf skip len + +let container_lnum_ref = ref 0 + +let fix_loc loc = + let fix_pos pos = + (* It looks like lex_buffer.ml returns a position with 2 extra + * chars for parsed lines after the first one. Bug? *) + let pos_cnum = + if pos.Lexing.pos_lnum > !container_lnum_ref then pos.Lexing.pos_cnum - 2 + else pos.Lexing.pos_cnum + in + { pos with Lexing.pos_cnum } + in + let loc_start = fix_pos loc.Location.loc_start in + let loc_end = fix_pos loc.Location.loc_end in + { loc with Location.loc_start; loc_end } + +let make_loc ?(loc_ghost = false) start_pos end_pos : Location.t = + { Location.loc_start = start_pos; loc_end = end_pos; loc_ghost } + +let make_loc_and_fix ?(loc_ghost = false) start_pos end_pos : Location.t = + make_loc ~loc_ghost start_pos end_pos |> fix_loc diff --git a/css/lib/lexer.ml b/css/lib/lexer.ml new file mode 100755 index 0000000..9185acc --- /dev/null +++ b/css/lib/lexer.ml @@ -0,0 +1,351 @@ +(** CSS lexer. + * Reference: + * https://www.w3.org/TR/css-syntax-3/ + * https://github.com/yahoo/css-js/blob/master/src/l/css.3.l *) + +module Sedlexing = Lex_buffer + +exception LexingError of (Lexing.position * string) +(** Signals a lexing error at the provided source location. *) + +exception + ParseError of (Menhir_parser.token * Lexing.position * Lexing.position) +(** Signals a parsing error at the provided token and its start and end + * locations. *) + +exception GrammarError of (string * Location.t) +(** Signals a grammar error at the provided location. *) + +let position_to_string pos = + Printf.sprintf "[%d,%d+%d]" pos.Lexing.pos_lnum pos.Lexing.pos_bol + (pos.Lexing.pos_cnum - pos.Lexing.pos_bol) + +let location_to_string loc = + Printf.sprintf "%s..%s" + (position_to_string loc.Location.loc_start) + (position_to_string loc.Location.loc_end) + +let dimension_to_string = function + | Types.Length -> "length" + | Angle -> "angle" + | Time -> "time" + | Frequency -> "frequency" + +let token_to_string = function + | Menhir_parser.EOF -> "EOF" + | LEFT_BRACE -> "{" + | RIGHT_BRACE -> "}" + | LEFT_PAREN -> "(" + | RIGHT_PAREN -> ")" + | LEFT_BRACKET -> "[" + | RIGHT_BRACKET -> "]" + | COLON -> ":" + | DOT -> "." + (* Whitespaces are detected only in selectors, before ":", ".", and "#", to + * disambiguate between "p :first-child" and "p:first-child", these + * whitespaces are replaced with "*" *) + | WHITESPACE -> "*" + | SEMI_COLON -> ";" + | PERCENTAGE -> "%" + | IMPORTANT -> "!important" + | IDENT s -> "IDENT(" ^ s ^ ")" + | STRING s -> "STRING(" ^ s ^ ")" + | URI s -> "URI(" ^ s ^ ")" + | OPERATOR s -> "OPERATOR(" ^ s ^ ")" + | DELIM s -> "DELIM(" ^ s ^ ")" + | NESTED_AT_RULE s -> "NESTED_AT_RULE(" ^ s ^ ")" + | AT_RULE_WITHOUT_BODY s -> "AT_RULE_WITHOUT_BODY(" ^ s ^ ")" + | AT_RULE s -> "AT_RULE(" ^ s ^ ")" + | FUNCTION s -> "FUNCTION(" ^ s ^ ")" + | HASH s -> "HASH(" ^ s ^ ")" + | NUMBER s -> "NUMBER(" ^ s ^ ")" + | UNICODE_RANGE s -> "UNICODE_RANGE(" ^ s ^ ")" + | FLOAT_DIMENSION (n, s, d) -> + "FLOAT_DIMENSION(" ^ n ^ ", " ^ s ^ ", " ^ dimension_to_string d ^ ")" + | DIMENSION (n, d) -> "DIMENSION(" ^ n ^ ", " ^ d ^ ")" + +let () = + Location.register_error_of_exn (function + | LexingError (pos, msg) -> + let loc = Lex_buffer.make_loc_and_fix pos pos in + Some (Location.error ~loc msg) + | ParseError (token, start_pos, end_pos) -> + let loc = Lex_buffer.make_loc_and_fix start_pos end_pos in + let msg = + Printf.sprintf "Parse error while reading token '%s'" + (token_to_string token) + in + Some (Location.error ~loc msg) + | GrammarError (msg, loc) -> Some (Location.error ~loc msg) + | _ -> None) + +(* Regexes *) +let newline = [%sedlex.regexp? '\n' | "\r\n" | '\r' | '\012'] + +let white_space = [%sedlex.regexp? " " | '\t' | newline] + +let ws = [%sedlex.regexp? Star white_space] + +let hex_digit = [%sedlex.regexp? '0' .. '9' | 'a' .. 'f' | 'A' .. 'F'] + +let digit = [%sedlex.regexp? '0' .. '9'] + +let non_ascii = [%sedlex.regexp? '\160' .. '\255'] + +let up_to_6_hex_digits = [%sedlex.regexp? Rep (hex_digit, 1 .. 6)] + +let unicode = [%sedlex.regexp? '\\', up_to_6_hex_digits, Opt white_space] + +let unicode_range = + [%sedlex.regexp? + ( Rep ((hex_digit | '?'), 1 .. 6) + | up_to_6_hex_digits, '-', up_to_6_hex_digits )] + +let escape = + [%sedlex.regexp? unicode | '\\', Compl ('\r' | '\n' | '\012' | hex_digit)] + +let ident_start = + [%sedlex.regexp? '_' | 'a' .. 'z' | 'A' .. 'Z' | non_ascii | escape] + +let ident_char = + [%sedlex.regexp? + '_' | 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '-' | non_ascii | escape] + +let ident = [%sedlex.regexp? (Opt '-', ident_start | '-', '-'), Star ident_char] + +let string_quote = + [%sedlex.regexp? + '"', Star (Compl ('\n' | '\r' | '\012' | '"') | '\\', newline | escape), '"'] + +let string_apos = + [%sedlex.regexp? + ( '\'', + Star (Compl ('\n' | '\r' | '\012' | '\'') | '\\', newline | escape), + '\'' )] + +let string = [%sedlex.regexp? string_quote | string_apos] + +let name = [%sedlex.regexp? Plus ident_char] + +let number = + [%sedlex.regexp? + ( ( Opt ('+' | '-'), + Plus digit, + Opt ('.', Plus digit), + Opt (('e' | 'E'), ('+' | '-'), Plus digit) ) + | ( Opt ('+' | '-'), + '.', + Plus digit, + Opt (('e' | 'E'), ('+' | '-'), Plus digit) ) )] + +let non_printable = + [%sedlex.regexp? '\x00' .. '\x08' | '\x0B' | '\x0E' .. '\x1F' | '\x7F'] + +let url_unquoted = + [%sedlex.regexp? + Star (Compl ('"' | '\'' | '(' | ')' | '\\' | non_printable) | escape)] + +let url = [%sedlex.regexp? url_unquoted | string] + +let operator = [%sedlex.regexp? "~=" | "|=" | "^=" | "$=" | "*=" | "||"] + +let at_rule = [%sedlex.regexp? "@", ident] + +let at_rule_without_body = + [%sedlex.regexp? "@", ("charset" | "import" | "namespace")] + +let vendor_prefix = [%sedlex.regexp? "-webkit-" | "-moz-" | "-o-" | "-ms-"] + +let nested_at_rule = + [%sedlex.regexp? + ( "@", + ( "document" | "keyframes" + | vendor_prefix, "keyframes" + | "media" | "supports" | "scope" ) )] + +let _a = [%sedlex.regexp? 'A' | 'a'] + +let _b = [%sedlex.regexp? 'B' | 'b'] + +let _c = [%sedlex.regexp? 'C' | 'c'] + +let _d = [%sedlex.regexp? 'D' | 'd'] + +let _e = [%sedlex.regexp? 'e' | 'E'] + +let _f = [%sedlex.regexp? 'F' | 'f'] + +let _g = [%sedlex.regexp? 'G' | 'g'] + +let _h = [%sedlex.regexp? 'H' | 'h'] + +let _i = [%sedlex.regexp? 'I' | 'i'] + +let _j = [%sedlex.regexp? 'J' | 'j'] + +let _k = [%sedlex.regexp? 'K' | 'k'] + +let _l = [%sedlex.regexp? 'L' | 'l'] + +let _m = [%sedlex.regexp? 'M' | 'm'] + +let _n = [%sedlex.regexp? 'N' | 'n'] + +let _o = [%sedlex.regexp? 'O' | 'o'] + +let _p = [%sedlex.regexp? 'P' | 'p'] + +let _q = [%sedlex.regexp? 'Q' | 'q'] + +let _r = [%sedlex.regexp? 'R' | 'r'] + +let _s = [%sedlex.regexp? 'S' | 's'] + +let _t = [%sedlex.regexp? 'T' | 't'] + +let _u = [%sedlex.regexp? 'U' | 'u'] + +let _v = [%sedlex.regexp? 'V' | 'v'] + +let _w = [%sedlex.regexp? 'W' | 'w'] + +let _x = [%sedlex.regexp? 'X' | 'x'] + +let _y = [%sedlex.regexp? 'Y' | 'y'] + +let _z = [%sedlex.regexp? 'Z' | 'z'] + +let important = [%sedlex.regexp? "!", ws, _i, _m, _p, _o, _r, _t, _a, _n, _t] + +let length = + [%sedlex.regexp? + ( _c, _a, _p + | _c, _h + | _e, _m + | _e, _x + | _i, _c + | _l, _h + | _r, _e, _m + | _r, _l, _h + | _v, _h + | _v, _w + | _v, _i + | _v, _b + | _v, _m, _i, _n + | _v, _m, _a, _x + | _c, _m + | _m, _m + | _q + | _i, _n + | _p, _c + | _p, _t + | _p, _x )] + +let angle = + [%sedlex.regexp? _d, _e, _g | _g, _r, _a, _d | _r, _a, _d | _t, _u, _r, _n] + +let time = [%sedlex.regexp? _s | _m, _s] + +let frequency = [%sedlex.regexp? _h, _z | _k, _h, _z] + +(* Returns true if white spaces were discarded *) +let discard_comments_and_white_spaces buf = + let rec discard_white_spaces buf spaces_detected = + match%sedlex buf with + | Plus white_space -> discard_white_spaces buf true + | "/*" -> discard_comments buf spaces_detected + | _ -> spaces_detected + and discard_comments buf spaces_detected = + match%sedlex buf with + | eof -> + raise (LexingError (buf.Lex_buffer.pos, "Unterminated comment at EOF")) + | "*/" -> discard_white_spaces buf spaces_detected + | any -> discard_comments buf spaces_detected + | _ -> assert false + in + discard_white_spaces buf false + +let rec get_next_tokens buf spaces_detected = + let open Menhir_parser in + match%sedlex buf with + | eof -> [ EOF ] + | ';' -> [ SEMI_COLON ] + | '}' -> [ RIGHT_BRACE ] + | '{' -> [ LEFT_BRACE ] + | ':' -> if spaces_detected then [ WHITESPACE; COLON ] else [ COLON ] + | '.' -> if spaces_detected then [ WHITESPACE; DOT ] else [ DOT ] + | '(' -> [ LEFT_PAREN ] + | ')' -> [ RIGHT_PAREN ] + | '[' -> [ LEFT_BRACKET ] + | ']' -> [ RIGHT_BRACKET ] + | '%' -> [ PERCENTAGE ] + | operator -> [ OPERATOR (Lex_buffer.latin1 buf) ] + | string -> [ STRING (Lex_buffer.latin1 ~skip:1 ~drop:1 buf) ] + | "url(" -> [ get_url "" buf ] + | important -> [ IMPORTANT ] + | nested_at_rule -> [ NESTED_AT_RULE (Lex_buffer.latin1 ~skip:1 buf) ] + | at_rule_without_body -> + [ AT_RULE_WITHOUT_BODY (Lex_buffer.latin1 ~skip:1 buf) ] + | at_rule -> [ AT_RULE (Lex_buffer.latin1 ~skip:1 buf) ] + (* NOTE: should be placed above ident, otherwise pattern with + * '-[0-9a-z]{1,6}' cannot be matched *) + | _u, '+', unicode_range -> [ UNICODE_RANGE (Lex_buffer.latin1 buf) ] + | ident, '(' -> [ FUNCTION (Lex_buffer.latin1 ~drop:1 buf) ] + | ident -> [ IDENT (Lex_buffer.latin1 buf) ] + | '#', name -> + if spaces_detected then + [ WHITESPACE; HASH (Lex_buffer.latin1 ~skip:1 buf) ] + else [ HASH (Lex_buffer.latin1 ~skip:1 buf) ] + | number -> [ get_dimension (Lex_buffer.latin1 buf) buf ] + | any -> [ DELIM (Lex_buffer.latin1 buf) ] + | _ -> assert false + +and get_dimension n buf = + match%sedlex buf with + | length -> FLOAT_DIMENSION (n, Lex_buffer.latin1 buf, Types.Length) + | angle -> FLOAT_DIMENSION (n, Lex_buffer.latin1 buf, Types.Angle) + | time -> FLOAT_DIMENSION (n, Lex_buffer.latin1 buf, Types.Time) + | frequency -> FLOAT_DIMENSION (n, Lex_buffer.latin1 buf, Types.Frequency) + | ident -> DIMENSION (n, Lex_buffer.latin1 buf) + | _ -> NUMBER n + +and get_url url buf = + match%sedlex buf with + | ws -> get_url url buf + | url -> get_url (Lex_buffer.latin1 buf) buf + | ")" -> URI url + | eof -> raise (LexingError (buf.Lex_buffer.pos, "Incomplete URI")) + | any -> + raise + (LexingError + ( buf.Lex_buffer.pos, + "Unexpected token: " ^ Lex_buffer.latin1 buf ^ " parsing an URI" )) + | _ -> assert false + +let token_queue = Queue.create () + +let queue_next_tokens_with_location buf = + let spaces_detected = discard_comments_and_white_spaces buf in + let loc_start = Lex_buffer.next_loc buf in + let tokens = get_next_tokens buf spaces_detected in + let loc_end = Lex_buffer.next_loc buf in + List.iter (fun t -> Queue.add (t, loc_start, loc_end) token_queue) tokens + +let parse buf p = + let last_token = + ref (Menhir_parser.EOF, Lexing.dummy_pos, Lexing.dummy_pos) + in + let next_token () = + if Queue.is_empty token_queue then queue_next_tokens_with_location buf; + last_token := Queue.take token_queue; + !last_token + in + try MenhirLib.Convert.Simplified.traditional2revised p next_token with + | LexingError _ as e -> raise e + | _ -> raise (ParseError !last_token) + +let parse_string ?container_lnum ?pos s p = + (match container_lnum with + | None -> () + | Some lnum -> Lex_buffer.container_lnum_ref := lnum); + parse (Lex_buffer.of_ascii_string ?pos s) p diff --git a/css/lib/location.ml b/css/lib/location.ml new file mode 100755 index 0000000..23e9c49 --- /dev/null +++ b/css/lib/location.ml @@ -0,0 +1,19 @@ +type t = { + + loc_start : Lexing.position; + loc_end : Lexing.position; + loc_ghost : bool; +} + +let none = + { loc_start = Lexing.dummy_pos + ; loc_end = Lexing.dummy_pos + ; loc_ghost = true + } + +let register_error_of_exn _ = () + +let error ~loc ?(sub=[]) _ = + let _ = loc + and _ = sub in + () diff --git a/css/lib/menhir_parser.mly b/css/lib/menhir_parser.mly new file mode 100755 index 0000000..fb5a1cf --- /dev/null +++ b/css/lib/menhir_parser.mly @@ -0,0 +1,196 @@ +%{ + +(* Workaround for this dune bug: https://github.com/ocaml/dune/issues/2450 *) +module Css = struct end + +open Types + +%} + +%token EOF +%token LEFT_BRACE +%token RIGHT_BRACE +%token LEFT_PAREN +%token RIGHT_PAREN +%token LEFT_BRACKET +%token RIGHT_BRACKET +%token COLON +%token DOT +(* Whitespaces are detected only in selectors, before ":", ".", and "#", to + * disambiguate between "p :first-child" and "p:first-child", these + * whitespaces are replaced with "*" *) +%token WHITESPACE +%token SEMI_COLON +%token PERCENTAGE +%token IMPORTANT +%token <string> IDENT +%token <string> STRING +%token <string> URI +%token <string> OPERATOR +%token <string> DELIM +%token <string> NESTED_AT_RULE +%token <string> AT_RULE_WITHOUT_BODY +%token <string> AT_RULE +%token <string> FUNCTION +%token <string> HASH +%token <string> NUMBER +%token <string> UNICODE_RANGE +%token <string * string * Types.dimension> FLOAT_DIMENSION +%token <string * string> DIMENSION + +%start <Types.Stylesheet.t> stylesheet +%start <Types.Declaration_list.t> declaration_list + +%% + +stylesheet: + s = stylesheet_without_eof; EOF { s } + ; + +stylesheet_without_eof: + rs = list(rule) { (rs, Lex_buffer.make_loc_and_fix $startpos $endpos) } + ; + +declaration_list: + ds = declarations_with_loc; EOF { ds } + ; + +rule: + | r = at_rule { Rule.At_rule r } + | r = style_rule { Rule.Style_rule r } + ; + +at_rule: + | name = AT_RULE_WITHOUT_BODY; xs = prelude_with_loc; SEMI_COLON { + { At_rule.name = (name, Lex_buffer.make_loc_and_fix $startpos(name) $endpos(name)); + prelude = xs; + block = Brace_block.Empty; + loc = Lex_buffer.make_loc_and_fix $startpos $endpos; + } + } + | name = NESTED_AT_RULE; xs = prelude_with_loc; LEFT_BRACE; s = stylesheet_without_eof; RIGHT_BRACE { + { At_rule.name = (name, Lex_buffer.make_loc_and_fix $startpos(name) $endpos(name)); + prelude = xs; + block = Brace_block.Stylesheet s; + loc = Lex_buffer.make_loc_and_fix $startpos $endpos; + } + } + | name = AT_RULE; xs = prelude_with_loc; LEFT_BRACE; ds = declarations_with_loc; RIGHT_BRACE { + { At_rule.name = (name, Lex_buffer.make_loc_and_fix $startpos(name) $endpos(name)); + prelude = xs; + block = Brace_block.Declaration_list ds; + loc = Lex_buffer.make_loc_and_fix $startpos $endpos; + } + } + ; + +style_rule: + | xs = prelude_with_loc; LEFT_BRACE; RIGHT_BRACE { + { Style_rule.prelude = xs; + block = [], Location.none; + loc = Lex_buffer.make_loc_and_fix $startpos $endpos; + } + } + | xs = prelude_with_loc; LEFT_BRACE; ds = declarations_with_loc; RIGHT_BRACE { + { Style_rule.prelude = xs; + block = ds; + loc = Lex_buffer.make_loc_and_fix $startpos $endpos; + } + } + ; + +prelude_with_loc: + xs = prelude { (xs, Lex_buffer.make_loc_and_fix $startpos $endpos) } + ; + +prelude: + xs = list(component_value_with_loc_in_prelude) { xs } + ; + +declarations_with_loc: + | ds = declarations { (ds, Lex_buffer.make_loc_and_fix ~loc_ghost:true $startpos $endpos) } + ; + +declarations: + | ds = declarations_without_ending_semi_colon { List.rev ds } + | ds = declarations_without_ending_semi_colon; SEMI_COLON { List.rev ds } + ; + +declarations_without_ending_semi_colon: + | d = declaration_or_at_rule { [d] } + | ds = declarations_without_ending_semi_colon; SEMI_COLON; d = declaration_or_at_rule { d :: ds } + ; + +declaration_or_at_rule: + | d = declaration { Declaration_list.Declaration d } + | r = at_rule { Declaration_list.At_rule r } + ; + +declaration: + n = IDENT; option(WHITESPACE); COLON; v = list(component_value_with_loc); i = boption(IMPORTANT) { + { Declaration.name = (n, Lex_buffer.make_loc_and_fix $startpos(n) $endpos(n)); + value = (v, Lex_buffer.make_loc_and_fix $startpos(v) $endpos(v)); + important = (i, Lex_buffer.make_loc_and_fix $startpos(i) $endpos(i)); + loc = Lex_buffer.make_loc_and_fix $startpos $endpos; + } + } + ; + +paren_block: + LEFT_PAREN; xs = list(component_value_with_loc); RIGHT_PAREN { xs } + ; + +bracket_block: + LEFT_BRACKET; xs = list(component_value_with_loc); RIGHT_BRACKET { xs } + ; + +component_value_with_loc: + | c = component_value { (c, Lex_buffer.make_loc_and_fix $startpos $endpos) } + +component_value: + | b = paren_block { Component_value.Paren_block b } + | b = bracket_block { Component_value.Bracket_block b } + | n = NUMBER; PERCENTAGE { Component_value.Percentage n } + | i = IDENT { Component_value.Ident i } + | s = STRING { Component_value.String s } + | u = URI { Component_value.Uri u } + | o = OPERATOR { Component_value.Operator o } + | d = DELIM { Component_value.Delim d } + | option(WHITESPACE); COLON { Component_value.Delim ":" } + | option(WHITESPACE); DOT { Component_value.Delim "." } + | f = FUNCTION; xs = list(component_value_with_loc); RIGHT_PAREN { + Component_value.Function ((f, Lex_buffer.make_loc_and_fix $startpos(f) $endpos(f)), + (xs, Lex_buffer.make_loc_and_fix $startpos(xs) $endpos(xs))) + } + | option(WHITESPACE); h = HASH { Component_value.Hash h } + | n = NUMBER { Component_value.Number n } + | r = UNICODE_RANGE { Component_value.Unicode_range r } + | d = FLOAT_DIMENSION { Component_value.Float_dimension d } + | d = DIMENSION { Component_value.Dimension d } + ; + +component_value_with_loc_in_prelude: + | c = component_value_in_prelude { (c, Lex_buffer.make_loc_and_fix $startpos $endpos) } + +component_value_in_prelude: + | b = paren_block { Component_value.Paren_block b } + | b = bracket_block { Component_value.Bracket_block b } + | n = NUMBER; PERCENTAGE { Component_value.Percentage n } + | i = IDENT { Component_value.Ident i } + | s = STRING { Component_value.String s } + | u = URI { Component_value.Uri u } + | o = OPERATOR { Component_value.Operator o } + | d = DELIM { Component_value.Delim d } + | WHITESPACE { Component_value.Delim "*" } + | COLON { Component_value.Delim ":" } + | DOT { Component_value.Delim "." } + | f = FUNCTION; xs = list(component_value_with_loc); RIGHT_PAREN { + Component_value.Function ((f, Lex_buffer.make_loc_and_fix $startpos(f) $endpos(f)), + (xs, Lex_buffer.make_loc_and_fix $startpos(xs) $endpos(xs))) + } + | h = HASH { Component_value.Hash h } + | n = NUMBER { Component_value.Number n } + | r = UNICODE_RANGE { Component_value.Unicode_range r } + | d = FLOAT_DIMENSION { Component_value.Float_dimension d } + | d = DIMENSION { Component_value.Dimension d } + ; diff --git a/css/lib/parser.ml b/css/lib/parser.ml new file mode 100755 index 0000000..1b4a5e7 --- /dev/null +++ b/css/lib/parser.ml @@ -0,0 +1,5 @@ +let parse_stylesheet ?container_lnum ?pos css = + Lexer.parse_string ?container_lnum ?pos css Menhir_parser.stylesheet + +let parse_declaration_list ?container_lnum ?pos css = + Lexer.parse_string ?container_lnum ?pos css Menhir_parser.declaration_list diff --git a/css/lib/parser.mli b/css/lib/parser.mli new file mode 100755 index 0000000..3ddb9ae --- /dev/null +++ b/css/lib/parser.mli @@ -0,0 +1,8 @@ +val parse_stylesheet : + ?container_lnum:int -> ?pos:Lexing.position -> string -> Types.Stylesheet.t + +val parse_declaration_list : + ?container_lnum:int -> + ?pos:Lexing.position -> + string -> + Types.Declaration_list.t diff --git a/css/lib/types.ml b/css/lib/types.ml new file mode 100755 index 0000000..8767fdf --- /dev/null +++ b/css/lib/types.ml @@ -0,0 +1,76 @@ +type 'a with_loc = 'a * Location.t + +type dimension = Length | Angle | Time | Frequency + +module rec Component_value : sig + type t = + | Paren_block of t with_loc list + | Bracket_block of t with_loc list + | Percentage of string + | Ident of string + | String of string + | Uri of string + | Operator of string + | Delim of string + | Function of string with_loc * t with_loc list with_loc + | Hash of string + | Number of string + | Unicode_range of string + | Float_dimension of (string * string * dimension) + | Dimension of (string * string) +end = + Component_value + +and Brace_block : sig + type t = + | Empty + | Declaration_list of Declaration_list.t + | Stylesheet of Stylesheet.t +end = + Brace_block + +and At_rule : sig + type t = { + name : string with_loc; + prelude : Component_value.t with_loc list with_loc; + block : Brace_block.t; + loc : Location.t; + } +end = + At_rule + +and Declaration : sig + type t = { + name : string with_loc; + value : Component_value.t with_loc list with_loc; + important : bool with_loc; + loc : Location.t; + } +end = + Declaration + +and Declaration_list : sig + type kind = Declaration of Declaration.t | At_rule of At_rule.t + + type t = kind list with_loc +end = + Declaration_list + +and Style_rule : sig + type t = { + prelude : Component_value.t with_loc list with_loc; + block : Declaration_list.t; + loc : Location.t; + } +end = + Style_rule + +and Rule : sig + type t = Style_rule of Style_rule.t | At_rule of At_rule.t +end = + Rule + +and Stylesheet : sig + type t = Rule.t list with_loc +end = + Stylesheet diff --git a/css/lib/types.mli b/css/lib/types.mli new file mode 100755 index 0000000..94b0ea1 --- /dev/null +++ b/css/lib/types.mli @@ -0,0 +1,68 @@ +type 'a with_loc = 'a * Location.t + +type dimension = Length | Angle | Time | Frequency + +module rec Component_value : sig + type t = + | Paren_block of t with_loc list + | Bracket_block of t with_loc list + | Percentage of string + | Ident of string + | String of string + | Uri of string + | Operator of string + | Delim of string + | Function of string with_loc * t with_loc list with_loc + | Hash of string + | Number of string + | Unicode_range of string + | Float_dimension of (string * string * dimension) + | Dimension of (string * string) +end + +and Brace_block : sig + type t = + | Empty + | Declaration_list of Declaration_list.t + | Stylesheet of Stylesheet.t +end + +and At_rule : sig + type t = { + name : string with_loc; + prelude : Component_value.t with_loc list with_loc; + block : Brace_block.t; + loc : Location.t; + } +end + +and Declaration : sig + type t = { + name : string with_loc; + value : Component_value.t with_loc list with_loc; + important : bool with_loc; + loc : Location.t; + } +end + +and Declaration_list : sig + type kind = Declaration of Declaration.t | At_rule of At_rule.t + + type t = kind list with_loc +end + +and Style_rule : sig + type t = { + prelude : Component_value.t with_loc list with_loc; + block : Declaration_list.t; + loc : Location.t; + } +end + +and Rule : sig + type t = Style_rule of Style_rule.t | At_rule of At_rule.t +end + +and Stylesheet : sig + type t = Rule.t list with_loc +end |