From c117f1e958959295b3347c740e9b6dc34516c727 Mon Sep 17 00:00:00 2001 From: Kevin Mehall Date: Mon, 5 Mar 2018 16:15:59 -0800 Subject: [PATCH] Prefer character sets over backtracking in the EDN parser. --- edn/src/edn.rustpeg | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/edn/src/edn.rustpeg b/edn/src/edn.rustpeg index d26b77c8..51f76310 100644 --- a/edn/src/edn.rustpeg +++ b/edn/src/edn.rustpeg @@ -77,7 +77,7 @@ alphanumeric = [0-9a-zA-Z] octaldigit = [0-7] validbase = [3][0-6] / [12][0-9] / [2-9] hex = [0-9a-fA-F] -sign = "-" / "+" +sign = [+-] pub bigint -> ValueAndSpan = start:#position b:$( sign? digit+ ) "N" end:#position { @@ -208,8 +208,8 @@ namespace_separator = "/" // TODO: Be more picky here // Keywords follow the rules of symbols, except they can (and must) begin with : // e.g. :fred or :my/fred. See https://github.com/edn-format/edn#keywords -symbol_char_initial = [a-z] / [A-Z] / [0-9] / [*!_?$%&=<>] -symbol_char_subsequent = [a-z] / [A-Z] / [0-9] / [-*!_?$%&=<>] +symbol_char_initial = [a-zA-Z0-9*!_?$%&=<>] +symbol_char_subsequent = [a-zA-Z0-9*!_?$%&=<>-] symbol_namespace = symbol_char_initial symbol_char_subsequent* (namespace_divider symbol_char_subsequent+)* symbol_name = ( symbol_char_initial+ symbol_char_subsequent* ) @@ -286,7 +286,7 @@ pub value -> ValueAndSpan = // Clojure (and thus EDN) regards commas as whitespace, and thus the two-element vectors [1 2] and // [1,,,,2] are equivalent, as are the maps {:a 1, :b 2} and {:a 1 :b 2}. -whitespace = (" " / "\r" / "\n" / "\t" / ",") -comment = ";" [^\r\n]* ("\r" / "\n")? +whitespace = [ \r\n\t,] +comment = ";" [^\r\n]* [\r\n]? __ = (whitespace / comment)*