mentat/edn/src/edn.rustpeg

/* vim: set filetype=rust.rustpeg */

// Copyright 2016 Mozilla
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use
// this file except in compliance with the License. You may obtain a copy of the
// License at http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.

use std::collections::{BTreeSet, BTreeMap, LinkedList};
use std::iter::FromIterator;
use std::f64::{NAN, INFINITY, NEG_INFINITY};

use num::BigInt;
use ordered_float::OrderedFloat;
use types;
use types::Value;

// Goal: Be able to parse https://github.com/edn-format/edn
// Also extensible to help parse http://docs.datomic.com/query.html

// Debugging hint: test using `cargo test --features peg/trace -- --nocapture`
// to trace where the parser is failing

// TODO: Support tagged elements
// TODO: Support discard

pub nil -> Value =
    "nil" { Value::Nil }

pub nan -> Value =
    "#f NaN" { Value::Float(OrderedFloat(NAN)) }

pub infinity -> Value =
    "#f" s:$(sign) "Infinity" {
        Value::Float(OrderedFloat(if s == "+" { INFINITY } else { NEG_INFINITY }))
    }

pub boolean -> Value =
    "true" { Value::Boolean(true) } /
    "false" { Value::Boolean(false) }

digit = [0-9]
sign = "-" / "+"

pub bigint -> Value =
    b:$( sign? digit+ ) "N" {
        Value::BigInteger(b.parse::<BigInt>().unwrap())
    }

pub integer -> Value =
    i:$( sign? digit+ ) {
        Value::Integer(i.parse::<i64>().unwrap())
    }

frac =     sign? digit+ "." digit+
exp =      sign? digit+            ("e" / "E") sign? digit+
frac_exp = sign? digit+ "." digit+ ("e" / "E") sign? digit+

// The order here is important - frac_exp must come before (exp / frac) or the
// parser assumes exp or frac when the float is really a frac_exp and fails
pub float -> Value =
    f:$( frac_exp / exp / frac ) {
        Value::Float(OrderedFloat(f.parse::<f64>().unwrap()))
    }

// TODO: \newline, \return, \space and \tab
special_char = quote / tab
quote = "\\\""
tab = "\\tab"
char = [^"] / special_char

pub text -> Value =
    "\"" t:$( char* ) "\"" {
        Value::Text(t.to_string())
    }

namespace_divider = "."
namespace_separator = "/"

// TODO: Be more picky here
// Keywords follow the rules of symbols, except they can (and must) begin with :
// e.g. :fred or :my/fred. See https://github.com/edn-format/edn#keywords
symbol_char_initial = [a-z] / [A-Z] / [0-9] / [*!_?$%&=<>]
symbol_char_subsequent = [a-z] / [A-Z] / [0-9] / [-*!_?$%&=<>]

symbol_namespace = symbol_char_initial symbol_char_subsequent* (namespace_divider symbol_char_subsequent+)*
symbol_name = ( symbol_char_initial+ / "." ) ( symbol_char_subsequent* / "." )

keyword_prefix = ":"

pub symbol -> Value =
    ns:( sns:$(symbol_namespace) namespace_separator {
        sns
    })? n:$(symbol_name) {
        types::to_symbol(ns, n)
    }

pub keyword -> Value =
    keyword_prefix ns:( sns:$(symbol_namespace) namespace_separator {
        sns
    })? n:$(symbol_name) {
        types::to_keyword(ns, n)
    }

pub list -> Value =
    "(" v:(value)* ")" {
        Value::List(LinkedList::from_iter(v))
    }

pub vector -> Value =
    "[" v:(value)* "]" {
        Value::Vector(v)
    }

pub set -> Value =
    "#{" v:(value)* "}" {
        Value::Set(BTreeSet::from_iter(v))
    }

pair -> (Value, Value) =
    k:(value) v:(value) {
        (k, v)
    }

pub map -> Value =
    "{" v:(pair)* "}" {
        Value::Map(BTreeMap::from_iter(v))
    }

// It's important that float comes before integer or the parser assumes that
// floats are integers and fails to parse
pub value -> Value =
    __ v:(nil / nan / infinity / boolean / float / bigint / integer / text / keyword / symbol / list / vector / map / set) __ {
        v
    }

// Clojure (and thus EDN) regards commas as whitespace, and thus the two-element vectors [1 2] and
// [1,,,,2] are equivalent, as are the maps {:a 1, :b 2} and {:a 1 :b 2}.
whitespace = (" " / "\r" / "\n" / "\t" / ",")
comment = ";" [^\r\n]* ("\r" / "\n")?

__ = (whitespace / comment)*
Implement a basic EDN parser. (#149) r=rnewman,bgrins,nalexander The parser mostly works and has a decent test suite. It parses all the queries issued by the Tofino UAS, with some caveats. Known flaws: * No support for tagged elements, comments, discarded elements or "'". * Incomplete support for escaped characters in strings and the range of characters that are allowed in keywords and symbols. * Possible whitespace handling problems. 2017-01-06 15:15:57 +00:00			`/* vim: set filetype=rust.rustpeg */`

			`// Copyright 2016 Mozilla`
			`//`
			`// Licensed under the Apache License, Version 2.0 (the "License"); you may not use`
			`// this file except in compliance with the License. You may obtain a copy of the`
			`// License at http://www.apache.org/licenses/LICENSE-2.0`
			`// Unless required by applicable law or agreed to in writing, software distributed`
			`// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR`
			`// CONDITIONS OF ANY KIND, either express or implied. See the License for the`
			`// specific language governing permissions and limitations under the License.`

			`use std::collections::{BTreeSet, BTreeMap, LinkedList};`
			`use std::iter::FromIterator;`
Parse and display EDN values for NaN, +Infinity and -Infinity. Fixes #232 (#238) r=victorporof 2017-02-03 18:14:23 +00:00			`use std::f64::{NAN, INFINITY, NEG_INFINITY};`
Read EDN keywords and symbols as rich types. Fixes #154. r=nalexander 2017-01-11 21:51:34 +00:00
Implement a basic EDN parser. (#149) r=rnewman,bgrins,nalexander The parser mostly works and has a decent test suite. It parses all the queries issued by the Tofino UAS, with some caveats. Known flaws: * No support for tagged elements, comments, discarded elements or "'". * Incomplete support for escaped characters in strings and the range of characters that are allowed in keywords and symbols. * Possible whitespace handling problems. 2017-01-06 15:15:57 +00:00			`use num::BigInt;`
			`use ordered_float::OrderedFloat;`
Read EDN keywords and symbols as rich types. Fixes #154. r=nalexander 2017-01-11 21:51:34 +00:00			`use types;`
			`use types::Value;`
Implement a basic EDN parser. (#149) r=rnewman,bgrins,nalexander The parser mostly works and has a decent test suite. It parses all the queries issued by the Tofino UAS, with some caveats. Known flaws: * No support for tagged elements, comments, discarded elements or "'". * Incomplete support for escaped characters in strings and the range of characters that are allowed in keywords and symbols. * Possible whitespace handling problems. 2017-01-06 15:15:57 +00:00
			`// Goal: Be able to parse https://github.com/edn-format/edn`
			`// Also extensible to help parse http://docs.datomic.com/query.html`

			// Debugging hint: test using `cargo test --features peg/trace -- --nocapture`
			`// to trace where the parser is failing`

			`// TODO: Support tagged elements`
			`// TODO: Support discard`

Update rustpeg to latest version and follow new syntax and formatting rules Signed-off-by: Victor Porof <vporof@mozilla.com> 2017-02-02 10:36:53 +00:00			`pub nil -> Value =`
			`"nil" { Value::Nil }`
Implement a basic EDN parser. (#149) r=rnewman,bgrins,nalexander The parser mostly works and has a decent test suite. It parses all the queries issued by the Tofino UAS, with some caveats. Known flaws: * No support for tagged elements, comments, discarded elements or "'". * Incomplete support for escaped characters in strings and the range of characters that are allowed in keywords and symbols. * Possible whitespace handling problems. 2017-01-06 15:15:57 +00:00
Parse and display EDN values for NaN, +Infinity and -Infinity. Fixes #232 (#238) r=victorporof 2017-02-03 18:14:23 +00:00			`pub nan -> Value =`
			`"#f NaN" { Value::Float(OrderedFloat(NAN)) }`

			`pub infinity -> Value =`
			`"#f" s:$(sign) "Infinity" {`
			`Value::Float(OrderedFloat(if s == "+" { INFINITY } else { NEG_INFINITY }))`
			`}`

Update rustpeg to latest version and follow new syntax and formatting rules Signed-off-by: Victor Porof <vporof@mozilla.com> 2017-02-02 10:36:53 +00:00			`pub boolean -> Value =`
Implement a basic EDN parser. (#149) r=rnewman,bgrins,nalexander The parser mostly works and has a decent test suite. It parses all the queries issued by the Tofino UAS, with some caveats. Known flaws: * No support for tagged elements, comments, discarded elements or "'". * Incomplete support for escaped characters in strings and the range of characters that are allowed in keywords and symbols. * Possible whitespace handling problems. 2017-01-06 15:15:57 +00:00			`"true" { Value::Boolean(true) } /`
			`"false" { Value::Boolean(false) }`

			`digit = [0-9]`
			`sign = "-" / "+"`

Update rustpeg to latest version and follow new syntax and formatting rules Signed-off-by: Victor Porof <vporof@mozilla.com> 2017-02-02 10:36:53 +00:00			`pub bigint -> Value =`
			`b:$( sign? digit+ ) "N" {`
			`Value::BigInteger(b.parse::<BigInt>().unwrap())`
			`}`
Implement a basic EDN parser. (#149) r=rnewman,bgrins,nalexander The parser mostly works and has a decent test suite. It parses all the queries issued by the Tofino UAS, with some caveats. Known flaws: * No support for tagged elements, comments, discarded elements or "'". * Incomplete support for escaped characters in strings and the range of characters that are allowed in keywords and symbols. * Possible whitespace handling problems. 2017-01-06 15:15:57 +00:00
Update rustpeg to latest version and follow new syntax and formatting rules Signed-off-by: Victor Porof <vporof@mozilla.com> 2017-02-02 10:36:53 +00:00			`pub integer -> Value =`
			`i:$( sign? digit+ ) {`
			`Value::Integer(i.parse::<i64>().unwrap())`
			`}`
Implement a basic EDN parser. (#149) r=rnewman,bgrins,nalexander The parser mostly works and has a decent test suite. It parses all the queries issued by the Tofino UAS, with some caveats. Known flaws: * No support for tagged elements, comments, discarded elements or "'". * Incomplete support for escaped characters in strings and the range of characters that are allowed in keywords and symbols. * Possible whitespace handling problems. 2017-01-06 15:15:57 +00:00
			`frac = sign? digit+ "." digit+`
			`exp = sign? digit+ ("e" / "E") sign? digit+`
			`frac_exp = sign? digit+ "." digit+ ("e" / "E") sign? digit+`

			`// The order here is important - frac_exp must come before (exp / frac) or the`
			`// parser assumes exp or frac when the float is really a frac_exp and fails`
Update rustpeg to latest version and follow new syntax and formatting rules Signed-off-by: Victor Porof <vporof@mozilla.com> 2017-02-02 10:36:53 +00:00			`pub float -> Value =`
			`f:$( frac_exp / exp / frac ) {`
			`Value::Float(OrderedFloat(f.parse::<f64>().unwrap()))`
			`}`
Implement a basic EDN parser. (#149) r=rnewman,bgrins,nalexander The parser mostly works and has a decent test suite. It parses all the queries issued by the Tofino UAS, with some caveats. Known flaws: * No support for tagged elements, comments, discarded elements or "'". * Incomplete support for escaped characters in strings and the range of characters that are allowed in keywords and symbols. * Possible whitespace handling problems. 2017-01-06 15:15:57 +00:00
			`// TODO: \newline, \return, \space and \tab`
			`special_char = quote / tab`
			`quote = "\\\""`
			`tab = "\\tab"`
			`char = [^"] / special_char`

Update rustpeg to latest version and follow new syntax and formatting rules Signed-off-by: Victor Porof <vporof@mozilla.com> 2017-02-02 10:36:53 +00:00			`pub text -> Value =`
			`"\"" t:$( char* ) "\"" {`
			`Value::Text(t.to_string())`
			`}`
Implement a basic EDN parser. (#149) r=rnewman,bgrins,nalexander The parser mostly works and has a decent test suite. It parses all the queries issued by the Tofino UAS, with some caveats. Known flaws: * No support for tagged elements, comments, discarded elements or "'". * Incomplete support for escaped characters in strings and the range of characters that are allowed in keywords and symbols. * Possible whitespace handling problems. 2017-01-06 15:15:57 +00:00
Read EDN keywords and symbols as rich types. Fixes #154. r=nalexander 2017-01-11 21:51:34 +00:00			`namespace_divider = "."`
			`namespace_separator = "/"`

Implement a basic EDN parser. (#149) r=rnewman,bgrins,nalexander The parser mostly works and has a decent test suite. It parses all the queries issued by the Tofino UAS, with some caveats. Known flaws: * No support for tagged elements, comments, discarded elements or "'". * Incomplete support for escaped characters in strings and the range of characters that are allowed in keywords and symbols. * Possible whitespace handling problems. 2017-01-06 15:15:57 +00:00			`// TODO: Be more picky here`
Unify and generalize keywords and symbols parsing Signed-off-by: Victor Porof <vporof@mozilla.com> 2017-02-02 10:52:34 +00:00			`// Keywords follow the rules of symbols, except they can (and must) begin with :`
			`// e.g. :fred or :my/fred. See https://github.com/edn-format/edn#keywords`
Read EDN keywords and symbols as rich types. Fixes #154. r=nalexander 2017-01-11 21:51:34 +00:00			`symbol_char_initial = [a-z] / [A-Z] / [0-9] / [*!_?$%&=<>]`
			`symbol_char_subsequent = [a-z] / [A-Z] / [0-9] / [-*!_?$%&=<>]`
Implement a basic EDN parser. (#149) r=rnewman,bgrins,nalexander The parser mostly works and has a decent test suite. It parses all the queries issued by the Tofino UAS, with some caveats. Known flaws: * No support for tagged elements, comments, discarded elements or "'". * Incomplete support for escaped characters in strings and the range of characters that are allowed in keywords and symbols. * Possible whitespace handling problems. 2017-01-06 15:15:57 +00:00
Unify and generalize keywords and symbols parsing Signed-off-by: Victor Porof <vporof@mozilla.com> 2017-02-02 10:52:34 +00:00			`symbol_namespace = symbol_char_initial symbol_char_subsequent* (namespace_divider symbol_char_subsequent+)*`
Read EDN keywords and symbols as rich types. Fixes #154. r=nalexander 2017-01-11 21:51:34 +00:00			`symbol_name = ( symbol_char_initial+ / "." ) ( symbol_char_subsequent* / "." )`

			`keyword_prefix = ":"`
Implement a basic EDN parser. (#149) r=rnewman,bgrins,nalexander The parser mostly works and has a decent test suite. It parses all the queries issued by the Tofino UAS, with some caveats. Known flaws: * No support for tagged elements, comments, discarded elements or "'". * Incomplete support for escaped characters in strings and the range of characters that are allowed in keywords and symbols. * Possible whitespace handling problems. 2017-01-06 15:15:57 +00:00
Update rustpeg to latest version and follow new syntax and formatting rules Signed-off-by: Victor Porof <vporof@mozilla.com> 2017-02-02 10:36:53 +00:00			`pub symbol -> Value =`
Unify and generalize keywords and symbols parsing Signed-off-by: Victor Porof <vporof@mozilla.com> 2017-02-02 10:52:34 +00:00			`ns:( sns:$(symbol_namespace) namespace_separator {`
			`sns`
			`})? n:$(symbol_name) {`
Read EDN keywords and symbols as rich types. Fixes #154. r=nalexander 2017-01-11 21:51:34 +00:00			`types::to_symbol(ns, n)`
			`}`

Update rustpeg to latest version and follow new syntax and formatting rules Signed-off-by: Victor Porof <vporof@mozilla.com> 2017-02-02 10:36:53 +00:00			`pub keyword -> Value =`
Unify and generalize keywords and symbols parsing Signed-off-by: Victor Porof <vporof@mozilla.com> 2017-02-02 10:52:34 +00:00			`keyword_prefix ns:( sns:$(symbol_namespace) namespace_separator {`
			`sns`
			`})? n:$(symbol_name) {`
Read EDN keywords and symbols as rich types. Fixes #154. r=nalexander 2017-01-11 21:51:34 +00:00			`types::to_keyword(ns, n)`
			`}`
Implement a basic EDN parser. (#149) r=rnewman,bgrins,nalexander The parser mostly works and has a decent test suite. It parses all the queries issued by the Tofino UAS, with some caveats. Known flaws: * No support for tagged elements, comments, discarded elements or "'". * Incomplete support for escaped characters in strings and the range of characters that are allowed in keywords and symbols. * Possible whitespace handling problems. 2017-01-06 15:15:57 +00:00
Update rustpeg to latest version and follow new syntax and formatting rules Signed-off-by: Victor Porof <vporof@mozilla.com> 2017-02-02 10:36:53 +00:00			`pub list -> Value =`
			`"(" v:(value)* ")" {`
			`Value::List(LinkedList::from_iter(v))`
			`}`
Implement a basic EDN parser. (#149) r=rnewman,bgrins,nalexander The parser mostly works and has a decent test suite. It parses all the queries issued by the Tofino UAS, with some caveats. Known flaws: * No support for tagged elements, comments, discarded elements or "'". * Incomplete support for escaped characters in strings and the range of characters that are allowed in keywords and symbols. * Possible whitespace handling problems. 2017-01-06 15:15:57 +00:00
Update rustpeg to latest version and follow new syntax and formatting rules Signed-off-by: Victor Porof <vporof@mozilla.com> 2017-02-02 10:36:53 +00:00			`pub vector -> Value =`
			`"[" v:(value)* "]" {`
			`Value::Vector(v)`
			`}`
Implement a basic EDN parser. (#149) r=rnewman,bgrins,nalexander The parser mostly works and has a decent test suite. It parses all the queries issued by the Tofino UAS, with some caveats. Known flaws: * No support for tagged elements, comments, discarded elements or "'". * Incomplete support for escaped characters in strings and the range of characters that are allowed in keywords and symbols. * Possible whitespace handling problems. 2017-01-06 15:15:57 +00:00
Update rustpeg to latest version and follow new syntax and formatting rules Signed-off-by: Victor Porof <vporof@mozilla.com> 2017-02-02 10:36:53 +00:00			`pub set -> Value =`
			`"#{" v:(value)* "}" {`
			`Value::Set(BTreeSet::from_iter(v))`
			`}`
Implement a basic EDN parser. (#149) r=rnewman,bgrins,nalexander The parser mostly works and has a decent test suite. It parses all the queries issued by the Tofino UAS, with some caveats. Known flaws: * No support for tagged elements, comments, discarded elements or "'". * Incomplete support for escaped characters in strings and the range of characters that are allowed in keywords and symbols. * Possible whitespace handling problems. 2017-01-06 15:15:57 +00:00
Update rustpeg to latest version and follow new syntax and formatting rules Signed-off-by: Victor Porof <vporof@mozilla.com> 2017-02-02 10:36:53 +00:00			`pair -> (Value, Value) =`
			`k:(value) v:(value) {`
			`(k, v)`
			`}`
Implement a basic EDN parser. (#149) r=rnewman,bgrins,nalexander The parser mostly works and has a decent test suite. It parses all the queries issued by the Tofino UAS, with some caveats. Known flaws: * No support for tagged elements, comments, discarded elements or "'". * Incomplete support for escaped characters in strings and the range of characters that are allowed in keywords and symbols. * Possible whitespace handling problems. 2017-01-06 15:15:57 +00:00
Update rustpeg to latest version and follow new syntax and formatting rules Signed-off-by: Victor Porof <vporof@mozilla.com> 2017-02-02 10:36:53 +00:00			`pub map -> Value =`
			`"{" v:(pair)* "}" {`
			`Value::Map(BTreeMap::from_iter(v))`
			`}`
Implement a basic EDN parser. (#149) r=rnewman,bgrins,nalexander The parser mostly works and has a decent test suite. It parses all the queries issued by the Tofino UAS, with some caveats. Known flaws: * No support for tagged elements, comments, discarded elements or "'". * Incomplete support for escaped characters in strings and the range of characters that are allowed in keywords and symbols. * Possible whitespace handling problems. 2017-01-06 15:15:57 +00:00
			`// It's important that float comes before integer or the parser assumes that`
			`// floats are integers and fails to parse`
Update rustpeg to latest version and follow new syntax and formatting rules Signed-off-by: Victor Porof <vporof@mozilla.com> 2017-02-02 10:36:53 +00:00			`pub value -> Value =`
Parse and display EDN values for NaN, +Infinity and -Infinity. Fixes #232 (#238) r=victorporof 2017-02-03 18:14:23 +00:00			`__ v:(nil / nan / infinity / boolean / float / bigint / integer / text / keyword / symbol / list / vector / map / set) __ {`
Update rustpeg to latest version and follow new syntax and formatting rules Signed-off-by: Victor Porof <vporof@mozilla.com> 2017-02-02 10:36:53 +00:00			`v`
			`}`
edn: Bound values by optional whitespace; treat comma as whitespace. 2017-01-17 19:26:45 +00:00
			`// Clojure (and thus EDN) regards commas as whitespace, and thus the two-element vectors [1 2] and`
			`// [1,,,,2] are equivalent, as are the maps {:a 1, :b 2} and {:a 1 :b 2}.`
			`whitespace = (" " / "\r" / "\n" / "\t" / ",")`
edn: Allow comments. EDN supports only one type of comment: initiated by ; and lasting until the end of the current line or the end of the input stream. 2017-01-17 19:25:31 +00:00			`comment = ";" [^\r\n]* ("\r" / "\n")?`
Implement a basic EDN parser. (#149) r=rnewman,bgrins,nalexander The parser mostly works and has a decent test suite. It parses all the queries issued by the Tofino UAS, with some caveats. Known flaws: * No support for tagged elements, comments, discarded elements or "'". * Incomplete support for escaped characters in strings and the range of characters that are allowed in keywords and symbols. * Possible whitespace handling problems. 2017-01-06 15:15:57 +00:00
edn: Allow comments. EDN supports only one type of comment: initiated by ; and lasting until the end of the current line or the end of the input stream. 2017-01-17 19:25:31 +00:00			`__ = (whitespace / comment)*`