Consolidate edn peg rules to better parse keywords and symbols, r=ncalexan. Fixes #219

2017-02-03 09:08:24 +01:00 · 2017-02-03 09:08:24 +01:00 · c038c11017
commit c038c11017
parent 5b770a54cd 9ee0ac8e00
4 changed files with 79 additions and 69 deletions
--- a/edn/Cargo.toml
+++ b/edn/Cargo.toml
@ -14,4 +14,4 @@ num = "0.1.35"
 ordered-float = "0.4.0"
 [build-dependencies]
-peg = "0.4"
+peg = "0.5.1"
--- a/edn/src/edn.rustpeg
+++ b/edn/src/edn.rustpeg
@ -27,28 +27,25 @@ use types::Value;
 // TODO: Support tagged elements
 // TODO: Support discard
-#[export]
+pub nil -> Value =
-nil -> Value = "nil" {
+    "nil" { Value::Nil }
    Value::Nil
 }
-#[export]
+pub boolean -> Value =
 boolean -> Value =
    "true" { Value::Boolean(true) } /
    "false" { Value::Boolean(false) }
 digit = [0-9]
 sign = "-" / "+"
-#[export]
+pub bigint -> Value =
-bigint -> Value = b:$( sign? digit+ ) "N" {
+    b:$( sign? digit+ ) "N" {
        Value::BigInteger(b.parse::<BigInt>().unwrap())
-}
+    }
-#[export]
+pub integer -> Value =
-integer -> Value = i:$( sign? digit+ ) {
+    i:$( sign? digit+ ) {
        Value::Integer(i.parse::<i64>().unwrap())
-}
+    }
 frac =     sign? digit+ "." digit+
 exp =      sign? digit+            ("e" / "E") sign? digit+
@ -56,10 +53,10 @@ frac_exp = sign? digit+ "." digit+ ("e" / "E") sign? digit+
 // The order here is important - frac_exp must come before (exp / frac) or the
 // parser assumes exp or frac when the float is really a frac_exp and fails
-#[export]
+pub float -> Value =
-float -> Value = f:$( frac_exp / exp / frac ) {
+    f:$( frac_exp / exp / frac ) {
        Value::Float(OrderedFloat(f.parse::<f64>().unwrap()))
-}
+    }
 // TODO: \newline, \return, \space and \tab
 special_char = quote / tab
@ -67,80 +64,74 @@ quote = "\\\""
 tab = "\\tab"
 char = [^"] / special_char
-#[export]
+pub text -> Value =
-text -> Value = "\"" t:$(  char* ) "\"" {
+    "\"" t:$( char* ) "\"" {
        Value::Text(t.to_string())
-}
+    }
 namespace_divider = "."
 namespace_separator = "/"
 // TODO: Be more picky here
 // Keywords follow the rules of symbols, except they can (and must) begin with :
 // e.g. :fred or :my/fred. See https://github.com/edn-format/edn#keywords
 symbol_char_initial = [a-z] / [A-Z] / [0-9] / [*!_?$%&=<>]
 symbol_char_subsequent = [a-z] / [A-Z] / [0-9] / [-*!_?$%&=<>]
-symbol_namespace = symbol_char_initial+ (namespace_divider symbol_char_subsequent+)*
+symbol_namespace = symbol_char_initial symbol_char_subsequent* (namespace_divider symbol_char_subsequent+)*
 symbol_name = ( symbol_char_initial+ / "." ) ( symbol_char_subsequent* / "." )
 keyword_prefix = ":"
-// TODO: More chars here?
+pub symbol -> Value =
-keyword_namespace_char = [a-z] / [A-Z] / [0-9]
+    ns:( sns:$(symbol_namespace) namespace_separator {
-keyword_namespace = keyword_namespace_char+ (namespace_divider keyword_namespace_char+)*
+        sns
-
+    })? n:$(symbol_name) {
 keyword_name_char = [a-z] / [A-Z] / [0-9] / "."
 keyword_name = keyword_name_char+
 #[export]
 symbol -> Value
    = ns:( sns:$(symbol_namespace) namespace_separator { sns })? n:$(symbol_name) {
        types::to_symbol(ns, n)
    }
-#[export]
+pub keyword -> Value =
-keyword -> Value
+    keyword_prefix ns:( sns:$(symbol_namespace) namespace_separator {
-    = keyword_prefix ns:( kns:$(keyword_namespace) namespace_separator { kns })? n:$(keyword_name) {
+        sns
    })? n:$(symbol_name) {
        types::to_keyword(ns, n)
    }
-#[export]
+pub list -> Value =
-list -> Value = "(" v:(value)* ")" {
+    "(" v:(value)* ")" {
        Value::List(LinkedList::from_iter(v))
-}
+    }
-#[export]
+pub vector -> Value =
-vector -> Value = "[" v:(value)* "]" {
+    "[" v:(value)* "]" {
        Value::Vector(v)
-}
+    }
-#[export]
+pub set -> Value =
-set -> Value = "#{" v:(value)* "}" {
+    "#{" v:(value)* "}" {
        Value::Set(BTreeSet::from_iter(v))
-}
+    }
-pair -> (Value, Value) = k:(value) v:(value) {
+pair -> (Value, Value) =
    k:(value) v:(value) {
        (k, v)
-}
+    }
-#[export]
+pub map -> Value =
-map -> Value = "{" v:(pair)* "}" {
+    "{" v:(pair)* "}" {
        Value::Map(BTreeMap::from_iter(v))
-}
+    }
 // It's important that float comes before integer or the parser assumes that
 // floats are integers and fails to parse
-#[export]
+pub value -> Value =
-value -> Value
+    __ v:(nil / boolean / float / bigint / integer / text / keyword / symbol / list / vector / map / set) __ {
    = __ v:(nil / boolean / float / bigint / integer / text /
      keyword / symbol /
      list / vector / map / set) __ {
        v
-}
+    }
 // Clojure (and thus EDN) regards commas as whitespace, and thus the two-element vectors [1 2] and
 // [1,,,,2] are equivalent, as are the maps {:a 1, :b 2} and {:a 1 :b 2}.
 whitespace = (" " / "\r" / "\n" / "\t" / ",")
 comment = ";" [^\r\n]* ("\r" / "\n")?
 __ = (whitespace / comment)*
--- a/edn/src/types.rs
+++ b/edn/src/types.rs
@ -50,7 +50,7 @@ impl Display for Value {
    // See https://github.com/mozilla/mentat/issues/232
    fn fmt(&self, f: &mut Formatter) -> ::std::fmt::Result {
        match *self {
-            Nil => write!(f, "null"),
+            Nil => write!(f, "nil"),
            Boolean(v) => write!(f, "{}", v),
            Integer(v) => write!(f, "{}", v),
            BigInteger(ref v) => write!(f, "{}N", v),
@ -280,7 +280,7 @@ mod test {
    #[test]
    fn test_print_edn() {
-        assert_eq!("[ 1 2 ( 3.14 ) #{ 4N } { :foo/bar 42 } [ ] :five :six/seven eight nine/ten true false null ]",
+        assert_eq!("[ 1 2 ( 3.14 ) #{ 4N } { :foo/bar 42 } [ ] :five :six/seven eight nine/ten true false nil ]",
            Value::Vector(vec![
                Value::Integer(1),
                Value::Integer(2),
--- a/edn/tests/tests.rs
+++ b/edn/tests/tests.rs
@ -105,17 +105,36 @@ fn test_text() {
 fn test_symbol() {
    assert_eq!(symbol("$").unwrap(), s_plain("$"));
    assert_eq!(symbol(".").unwrap(), s_plain("."));
-    //assert_eq!(symbol("r_r").unwrap(), s_plain("r_r"));
+
-    //assert_eq!(symbol("$symbol").unwrap(), s_plain("$symbol"));
+    assert_eq!(symbol("hello/world").unwrap(), s_ns("hello", "world"));
-    //assert_eq!(symbol("hello").unwrap(), s_plain("hello"));
+    assert_eq!(symbol("foo-bar/baz-boz").unwrap(), s_ns("foo-bar", "baz-boz"));
    assert_eq!(symbol("foo-bar/baz_boz").unwrap(), s_ns("foo-bar", "baz_boz"));
    assert_eq!(symbol("foo_bar/baz-boz").unwrap(), s_ns("foo_bar", "baz-boz"));
    assert_eq!(symbol("foo_bar/baz_boz").unwrap(), s_ns("foo_bar", "baz_boz"));
    assert_eq!(symbol("symbol").unwrap(), s_plain("symbol"));
    assert_eq!(symbol("hello").unwrap(), s_plain("hello"));
    assert_eq!(symbol("foo-bar").unwrap(), s_plain("foo-bar"));
    assert_eq!(symbol("foo_bar").unwrap(), s_plain("foo_bar"));
 }
 #[test]
 fn test_keyword() {
    assert_eq!(keyword(":hello/world").unwrap(), k_ns("hello", "world"));
    assert_eq!(keyword(":foo-bar/baz-boz").unwrap(), k_ns("foo-bar", "baz-boz"));
    assert_eq!(keyword(":foo-bar/baz_boz").unwrap(), k_ns("foo-bar", "baz_boz"));
    assert_eq!(keyword(":foo_bar/baz-boz").unwrap(), k_ns("foo_bar", "baz-boz"));
    assert_eq!(keyword(":foo_bar/baz_boz").unwrap(), k_ns("foo_bar", "baz_boz"));
    assert_eq!(keyword(":symbol").unwrap(), k_plain("symbol"));
    assert_eq!(keyword(":hello").unwrap(), k_plain("hello"));
    assert_eq!(keyword(":foo-bar").unwrap(), k_plain("foo-bar"));
    assert_eq!(keyword(":foo_bar").unwrap(), k_plain("foo_bar"));
    assert!(keyword(":").is_err());
    assert!(keyword(":foo/").is_err());
 }
 #[test]