Prefer character sets over backtracking in the EDN parser.

This commit is contained in:
Kevin Mehall 2018-03-05 16:15:59 -08:00 committed by Richard Newman
parent 30bf827d16
commit c117f1e958

View file

@ -77,7 +77,7 @@ alphanumeric = [0-9a-zA-Z]
octaldigit = [0-7]
validbase = [3][0-6] / [12][0-9] / [2-9]
hex = [0-9a-fA-F]
sign = "-" / "+"
sign = [+-]
pub bigint -> ValueAndSpan =
start:#position b:$( sign? digit+ ) "N" end:#position {
@ -208,8 +208,8 @@ namespace_separator = "/"
// TODO: Be more picky here
// Keywords follow the rules of symbols, except they can (and must) begin with :
// e.g. :fred or :my/fred. See https://github.com/edn-format/edn#keywords
symbol_char_initial = [a-z] / [A-Z] / [0-9] / [*!_?$%&=<>]
symbol_char_subsequent = [a-z] / [A-Z] / [0-9] / [-*!_?$%&=<>]
symbol_char_initial = [a-zA-Z0-9*!_?$%&=<>]
symbol_char_subsequent = [a-zA-Z0-9*!_?$%&=<>-]
symbol_namespace = symbol_char_initial symbol_char_subsequent* (namespace_divider symbol_char_subsequent+)*
symbol_name = ( symbol_char_initial+ symbol_char_subsequent* )
@ -286,7 +286,7 @@ pub value -> ValueAndSpan =
// Clojure (and thus EDN) regards commas as whitespace, and thus the two-element vectors [1 2] and
// [1,,,,2] are equivalent, as are the maps {:a 1, :b 2} and {:a 1 :b 2}.
whitespace = (" " / "\r" / "\n" / "\t" / ",")
comment = ";" [^\r\n]* ("\r" / "\n")?
whitespace = [ \r\n\t,]
comment = ";" [^\r\n]* [\r\n]?
__ = (whitespace / comment)*