Consolidate edn peg rules to better parse keywords and symbols, r=ncalexan. Fixes #219
This commit is contained in:
commit
c038c11017
4 changed files with 79 additions and 69 deletions
|
@ -14,4 +14,4 @@ num = "0.1.35"
|
||||||
ordered-float = "0.4.0"
|
ordered-float = "0.4.0"
|
||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
peg = "0.4"
|
peg = "0.5.1"
|
||||||
|
|
|
@ -27,28 +27,25 @@ use types::Value;
|
||||||
// TODO: Support tagged elements
|
// TODO: Support tagged elements
|
||||||
// TODO: Support discard
|
// TODO: Support discard
|
||||||
|
|
||||||
#[export]
|
pub nil -> Value =
|
||||||
nil -> Value = "nil" {
|
"nil" { Value::Nil }
|
||||||
Value::Nil
|
|
||||||
}
|
|
||||||
|
|
||||||
#[export]
|
pub boolean -> Value =
|
||||||
boolean -> Value =
|
|
||||||
"true" { Value::Boolean(true) } /
|
"true" { Value::Boolean(true) } /
|
||||||
"false" { Value::Boolean(false) }
|
"false" { Value::Boolean(false) }
|
||||||
|
|
||||||
digit = [0-9]
|
digit = [0-9]
|
||||||
sign = "-" / "+"
|
sign = "-" / "+"
|
||||||
|
|
||||||
#[export]
|
pub bigint -> Value =
|
||||||
bigint -> Value = b:$( sign? digit+ ) "N" {
|
b:$( sign? digit+ ) "N" {
|
||||||
Value::BigInteger(b.parse::<BigInt>().unwrap())
|
Value::BigInteger(b.parse::<BigInt>().unwrap())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[export]
|
pub integer -> Value =
|
||||||
integer -> Value = i:$( sign? digit+ ) {
|
i:$( sign? digit+ ) {
|
||||||
Value::Integer(i.parse::<i64>().unwrap())
|
Value::Integer(i.parse::<i64>().unwrap())
|
||||||
}
|
}
|
||||||
|
|
||||||
frac = sign? digit+ "." digit+
|
frac = sign? digit+ "." digit+
|
||||||
exp = sign? digit+ ("e" / "E") sign? digit+
|
exp = sign? digit+ ("e" / "E") sign? digit+
|
||||||
|
@ -56,10 +53,10 @@ frac_exp = sign? digit+ "." digit+ ("e" / "E") sign? digit+
|
||||||
|
|
||||||
// The order here is important - frac_exp must come before (exp / frac) or the
|
// The order here is important - frac_exp must come before (exp / frac) or the
|
||||||
// parser assumes exp or frac when the float is really a frac_exp and fails
|
// parser assumes exp or frac when the float is really a frac_exp and fails
|
||||||
#[export]
|
pub float -> Value =
|
||||||
float -> Value = f:$( frac_exp / exp / frac ) {
|
f:$( frac_exp / exp / frac ) {
|
||||||
Value::Float(OrderedFloat(f.parse::<f64>().unwrap()))
|
Value::Float(OrderedFloat(f.parse::<f64>().unwrap()))
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: \newline, \return, \space and \tab
|
// TODO: \newline, \return, \space and \tab
|
||||||
special_char = quote / tab
|
special_char = quote / tab
|
||||||
|
@ -67,80 +64,74 @@ quote = "\\\""
|
||||||
tab = "\\tab"
|
tab = "\\tab"
|
||||||
char = [^"] / special_char
|
char = [^"] / special_char
|
||||||
|
|
||||||
#[export]
|
pub text -> Value =
|
||||||
text -> Value = "\"" t:$( char* ) "\"" {
|
"\"" t:$( char* ) "\"" {
|
||||||
Value::Text(t.to_string())
|
Value::Text(t.to_string())
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace_divider = "."
|
namespace_divider = "."
|
||||||
namespace_separator = "/"
|
namespace_separator = "/"
|
||||||
|
|
||||||
// TODO: Be more picky here
|
// TODO: Be more picky here
|
||||||
|
// Keywords follow the rules of symbols, except they can (and must) begin with :
|
||||||
|
// e.g. :fred or :my/fred. See https://github.com/edn-format/edn#keywords
|
||||||
symbol_char_initial = [a-z] / [A-Z] / [0-9] / [*!_?$%&=<>]
|
symbol_char_initial = [a-z] / [A-Z] / [0-9] / [*!_?$%&=<>]
|
||||||
symbol_char_subsequent = [a-z] / [A-Z] / [0-9] / [-*!_?$%&=<>]
|
symbol_char_subsequent = [a-z] / [A-Z] / [0-9] / [-*!_?$%&=<>]
|
||||||
|
|
||||||
symbol_namespace = symbol_char_initial+ (namespace_divider symbol_char_subsequent+)*
|
symbol_namespace = symbol_char_initial symbol_char_subsequent* (namespace_divider symbol_char_subsequent+)*
|
||||||
symbol_name = ( symbol_char_initial+ / "." ) ( symbol_char_subsequent* / "." )
|
symbol_name = ( symbol_char_initial+ / "." ) ( symbol_char_subsequent* / "." )
|
||||||
|
|
||||||
keyword_prefix = ":"
|
keyword_prefix = ":"
|
||||||
|
|
||||||
// TODO: More chars here?
|
pub symbol -> Value =
|
||||||
keyword_namespace_char = [a-z] / [A-Z] / [0-9]
|
ns:( sns:$(symbol_namespace) namespace_separator {
|
||||||
keyword_namespace = keyword_namespace_char+ (namespace_divider keyword_namespace_char+)*
|
sns
|
||||||
|
})? n:$(symbol_name) {
|
||||||
keyword_name_char = [a-z] / [A-Z] / [0-9] / "."
|
|
||||||
keyword_name = keyword_name_char+
|
|
||||||
|
|
||||||
#[export]
|
|
||||||
symbol -> Value
|
|
||||||
= ns:( sns:$(symbol_namespace) namespace_separator { sns })? n:$(symbol_name) {
|
|
||||||
types::to_symbol(ns, n)
|
types::to_symbol(ns, n)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[export]
|
pub keyword -> Value =
|
||||||
keyword -> Value
|
keyword_prefix ns:( sns:$(symbol_namespace) namespace_separator {
|
||||||
= keyword_prefix ns:( kns:$(keyword_namespace) namespace_separator { kns })? n:$(keyword_name) {
|
sns
|
||||||
|
})? n:$(symbol_name) {
|
||||||
types::to_keyword(ns, n)
|
types::to_keyword(ns, n)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[export]
|
pub list -> Value =
|
||||||
list -> Value = "(" v:(value)* ")" {
|
"(" v:(value)* ")" {
|
||||||
Value::List(LinkedList::from_iter(v))
|
Value::List(LinkedList::from_iter(v))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[export]
|
pub vector -> Value =
|
||||||
vector -> Value = "[" v:(value)* "]" {
|
"[" v:(value)* "]" {
|
||||||
Value::Vector(v)
|
Value::Vector(v)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[export]
|
pub set -> Value =
|
||||||
set -> Value = "#{" v:(value)* "}" {
|
"#{" v:(value)* "}" {
|
||||||
Value::Set(BTreeSet::from_iter(v))
|
Value::Set(BTreeSet::from_iter(v))
|
||||||
}
|
}
|
||||||
|
|
||||||
pair -> (Value, Value) = k:(value) v:(value) {
|
pair -> (Value, Value) =
|
||||||
|
k:(value) v:(value) {
|
||||||
(k, v)
|
(k, v)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[export]
|
pub map -> Value =
|
||||||
map -> Value = "{" v:(pair)* "}" {
|
"{" v:(pair)* "}" {
|
||||||
Value::Map(BTreeMap::from_iter(v))
|
Value::Map(BTreeMap::from_iter(v))
|
||||||
}
|
}
|
||||||
|
|
||||||
// It's important that float comes before integer or the parser assumes that
|
// It's important that float comes before integer or the parser assumes that
|
||||||
// floats are integers and fails to parse
|
// floats are integers and fails to parse
|
||||||
#[export]
|
pub value -> Value =
|
||||||
value -> Value
|
__ v:(nil / boolean / float / bigint / integer / text / keyword / symbol / list / vector / map / set) __ {
|
||||||
= __ v:(nil / boolean / float / bigint / integer / text /
|
|
||||||
keyword / symbol /
|
|
||||||
list / vector / map / set) __ {
|
|
||||||
v
|
v
|
||||||
}
|
}
|
||||||
|
|
||||||
// Clojure (and thus EDN) regards commas as whitespace, and thus the two-element vectors [1 2] and
|
// Clojure (and thus EDN) regards commas as whitespace, and thus the two-element vectors [1 2] and
|
||||||
// [1,,,,2] are equivalent, as are the maps {:a 1, :b 2} and {:a 1 :b 2}.
|
// [1,,,,2] are equivalent, as are the maps {:a 1, :b 2} and {:a 1 :b 2}.
|
||||||
whitespace = (" " / "\r" / "\n" / "\t" / ",")
|
whitespace = (" " / "\r" / "\n" / "\t" / ",")
|
||||||
|
|
||||||
comment = ";" [^\r\n]* ("\r" / "\n")?
|
comment = ";" [^\r\n]* ("\r" / "\n")?
|
||||||
|
|
||||||
__ = (whitespace / comment)*
|
__ = (whitespace / comment)*
|
||||||
|
|
|
@ -50,7 +50,7 @@ impl Display for Value {
|
||||||
// See https://github.com/mozilla/mentat/issues/232
|
// See https://github.com/mozilla/mentat/issues/232
|
||||||
fn fmt(&self, f: &mut Formatter) -> ::std::fmt::Result {
|
fn fmt(&self, f: &mut Formatter) -> ::std::fmt::Result {
|
||||||
match *self {
|
match *self {
|
||||||
Nil => write!(f, "null"),
|
Nil => write!(f, "nil"),
|
||||||
Boolean(v) => write!(f, "{}", v),
|
Boolean(v) => write!(f, "{}", v),
|
||||||
Integer(v) => write!(f, "{}", v),
|
Integer(v) => write!(f, "{}", v),
|
||||||
BigInteger(ref v) => write!(f, "{}N", v),
|
BigInteger(ref v) => write!(f, "{}N", v),
|
||||||
|
@ -280,7 +280,7 @@ mod test {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_print_edn() {
|
fn test_print_edn() {
|
||||||
assert_eq!("[ 1 2 ( 3.14 ) #{ 4N } { :foo/bar 42 } [ ] :five :six/seven eight nine/ten true false null ]",
|
assert_eq!("[ 1 2 ( 3.14 ) #{ 4N } { :foo/bar 42 } [ ] :five :six/seven eight nine/ten true false nil ]",
|
||||||
Value::Vector(vec![
|
Value::Vector(vec![
|
||||||
Value::Integer(1),
|
Value::Integer(1),
|
||||||
Value::Integer(2),
|
Value::Integer(2),
|
||||||
|
|
|
@ -105,17 +105,36 @@ fn test_text() {
|
||||||
fn test_symbol() {
|
fn test_symbol() {
|
||||||
assert_eq!(symbol("$").unwrap(), s_plain("$"));
|
assert_eq!(symbol("$").unwrap(), s_plain("$"));
|
||||||
assert_eq!(symbol(".").unwrap(), s_plain("."));
|
assert_eq!(symbol(".").unwrap(), s_plain("."));
|
||||||
//assert_eq!(symbol("r_r").unwrap(), s_plain("r_r"));
|
|
||||||
//assert_eq!(symbol("$symbol").unwrap(), s_plain("$symbol"));
|
assert_eq!(symbol("hello/world").unwrap(), s_ns("hello", "world"));
|
||||||
//assert_eq!(symbol("hello").unwrap(), s_plain("hello"));
|
assert_eq!(symbol("foo-bar/baz-boz").unwrap(), s_ns("foo-bar", "baz-boz"));
|
||||||
|
|
||||||
|
assert_eq!(symbol("foo-bar/baz_boz").unwrap(), s_ns("foo-bar", "baz_boz"));
|
||||||
|
assert_eq!(symbol("foo_bar/baz-boz").unwrap(), s_ns("foo_bar", "baz-boz"));
|
||||||
|
assert_eq!(symbol("foo_bar/baz_boz").unwrap(), s_ns("foo_bar", "baz_boz"));
|
||||||
|
|
||||||
|
assert_eq!(symbol("symbol").unwrap(), s_plain("symbol"));
|
||||||
|
assert_eq!(symbol("hello").unwrap(), s_plain("hello"));
|
||||||
|
assert_eq!(symbol("foo-bar").unwrap(), s_plain("foo-bar"));
|
||||||
|
assert_eq!(symbol("foo_bar").unwrap(), s_plain("foo_bar"));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_keyword() {
|
fn test_keyword() {
|
||||||
assert_eq!(keyword(":hello/world").unwrap(), k_ns("hello", "world"));
|
assert_eq!(keyword(":hello/world").unwrap(), k_ns("hello", "world"));
|
||||||
|
assert_eq!(keyword(":foo-bar/baz-boz").unwrap(), k_ns("foo-bar", "baz-boz"));
|
||||||
|
|
||||||
|
assert_eq!(keyword(":foo-bar/baz_boz").unwrap(), k_ns("foo-bar", "baz_boz"));
|
||||||
|
assert_eq!(keyword(":foo_bar/baz-boz").unwrap(), k_ns("foo_bar", "baz-boz"));
|
||||||
|
assert_eq!(keyword(":foo_bar/baz_boz").unwrap(), k_ns("foo_bar", "baz_boz"));
|
||||||
|
|
||||||
assert_eq!(keyword(":symbol").unwrap(), k_plain("symbol"));
|
assert_eq!(keyword(":symbol").unwrap(), k_plain("symbol"));
|
||||||
assert_eq!(keyword(":hello").unwrap(), k_plain("hello"));
|
assert_eq!(keyword(":hello").unwrap(), k_plain("hello"));
|
||||||
|
assert_eq!(keyword(":foo-bar").unwrap(), k_plain("foo-bar"));
|
||||||
|
assert_eq!(keyword(":foo_bar").unwrap(), k_plain("foo_bar"));
|
||||||
|
|
||||||
|
assert!(keyword(":").is_err());
|
||||||
|
assert!(keyword(":foo/").is_err());
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
Loading…
Reference in a new issue