Prefer character sets over backtracking in the EDN parser.

2018-03-05 16:15:59 -08:00 · 2018-03-05 16:15:59 -08:00 · c117f1e958
commit c117f1e958
parent 30bf827d16
1 changed files with 5 additions and 5 deletions
--- a/edn/src/edn.rustpeg
+++ b/edn/src/edn.rustpeg
@ -77,7 +77,7 @@ alphanumeric = [0-9a-zA-Z]
 octaldigit = [0-7]
 validbase = [3][0-6] / [12][0-9] / [2-9]
 hex = [0-9a-fA-F]
-sign = "-" / "+"
+sign = [+-]

 pub bigint -> ValueAndSpan =
    start:#position b:$( sign? digit+ ) "N" end:#position {
@ -208,8 +208,8 @@ namespace_separator = "/"
 // TODO: Be more picky here
 // Keywords follow the rules of symbols, except they can (and must) begin with :
 // e.g. :fred or :my/fred. See https://github.com/edn-format/edn#keywords
-symbol_char_initial = [a-z] / [A-Z] / [0-9] / [*!_?$%&=<>]
-symbol_char_subsequent = [a-z] / [A-Z] / [0-9] / [-*!_?$%&=<>]
+symbol_char_initial = [a-zA-Z0-9*!_?$%&=<>]
+symbol_char_subsequent = [a-zA-Z0-9*!_?$%&=<>-]

 symbol_namespace = symbol_char_initial symbol_char_subsequent* (namespace_divider symbol_char_subsequent+)*
 symbol_name = ( symbol_char_initial+ symbol_char_subsequent* )
@ -286,7 +286,7 @@ pub value -> ValueAndSpan =

 // Clojure (and thus EDN) regards commas as whitespace, and thus the two-element vectors [1 2] and
 // [1,,,,2] are equivalent, as are the maps {:a 1, :b 2} and {:a 1 :b 2}.
-whitespace = (" " / "\r" / "\n" / "\t" / ",")
-comment = ";" [^\r\n]* ("\r" / "\n")?
+whitespace = [  \r\n\t,]
+comment = ";" [^\r\n]* [\r\n]?

 __ = (whitespace / comment)*