2017-01-06 15:15:57 +00:00
|
|
|
/* vim: set filetype=rust.rustpeg */
|
|
|
|
|
|
|
|
// Copyright 2016 Mozilla
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use
|
|
|
|
// this file except in compliance with the License. You may obtain a copy of the
|
|
|
|
// License at http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
// Unless required by applicable law or agreed to in writing, software distributed
|
|
|
|
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
|
|
|
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
|
|
|
// specific language governing permissions and limitations under the License.
|
|
|
|
|
|
|
|
use std::collections::{BTreeSet, BTreeMap, LinkedList};
|
|
|
|
use std::iter::FromIterator;
|
2017-02-03 18:14:23 +00:00
|
|
|
use std::f64::{NAN, INFINITY, NEG_INFINITY};
|
2017-01-11 21:51:34 +00:00
|
|
|
|
2017-04-29 03:11:55 +00:00
|
|
|
use chrono::{
|
|
|
|
DateTime,
|
|
|
|
TimeZone,
|
2017-11-21 16:24:08 +00:00
|
|
|
Utc
|
2017-04-29 03:11:55 +00:00
|
|
|
};
|
2017-01-06 15:15:57 +00:00
|
|
|
use num::BigInt;
|
|
|
|
use ordered_float::OrderedFloat;
|
2017-04-29 03:11:55 +00:00
|
|
|
use uuid::Uuid;
|
2017-02-14 15:43:32 +00:00
|
|
|
|
|
|
|
use types::{SpannedValue, Span, ValueAndSpan};
|
2017-01-06 15:15:57 +00:00
|
|
|
|
|
|
|
// Goal: Be able to parse https://github.com/edn-format/edn
|
|
|
|
// Also extensible to help parse http://docs.datomic.com/query.html
|
|
|
|
|
|
|
|
// Debugging hint: test using `cargo test --features peg/trace -- --nocapture`
|
|
|
|
// to trace where the parser is failing
|
|
|
|
|
|
|
|
// TODO: Support tagged elements
|
|
|
|
// TODO: Support discard
|
|
|
|
|
2018-03-06 04:33:51 +00:00
|
|
|
pub nil -> SpannedValue = "nil" { SpannedValue::Nil }
|
|
|
|
pub nan -> SpannedValue = "#f" whitespace+ "NaN" { SpannedValue::Float(OrderedFloat(NAN)) }
|
2017-01-06 15:15:57 +00:00
|
|
|
|
2018-03-06 04:33:51 +00:00
|
|
|
pub infinity -> SpannedValue = "#f" whitespace+ s:$(sign) "Infinity"
|
|
|
|
{ SpannedValue::Float(OrderedFloat(if s == "+" { INFINITY } else { NEG_INFINITY })) }
|
2017-02-03 18:14:23 +00:00
|
|
|
|
2018-03-06 04:33:51 +00:00
|
|
|
pub boolean -> SpannedValue
|
|
|
|
= "true" { SpannedValue::Boolean(true) }
|
|
|
|
/ "false" { SpannedValue::Boolean(false) }
|
2017-01-06 15:15:57 +00:00
|
|
|
|
|
|
|
digit = [0-9]
|
2017-02-11 00:03:35 +00:00
|
|
|
alphanumeric = [0-9a-zA-Z]
|
|
|
|
octaldigit = [0-7]
|
|
|
|
validbase = [3][0-6] / [12][0-9] / [2-9]
|
|
|
|
hex = [0-9a-fA-F]
|
2018-03-06 04:33:51 +00:00
|
|
|
sign = [+-]
|
2017-01-06 15:15:57 +00:00
|
|
|
|
2018-03-06 04:33:51 +00:00
|
|
|
pub bigint -> SpannedValue = b:$( sign? digit+ ) "N"
|
|
|
|
{ SpannedValue::BigInteger(b.parse::<BigInt>().unwrap()) }
|
|
|
|
pub octalinteger -> SpannedValue = "0" i:$( octaldigit+ )
|
|
|
|
{ SpannedValue::Integer(i64::from_str_radix(i, 8).unwrap()) }
|
|
|
|
pub hexinteger -> SpannedValue = "0x" i:$( hex+ )
|
|
|
|
{ SpannedValue::Integer(i64::from_str_radix(i, 16).unwrap()) }
|
2017-01-06 15:15:57 +00:00
|
|
|
|
2018-03-06 04:33:51 +00:00
|
|
|
pub basedinteger -> SpannedValue = b:$( validbase ) "r" i:$( alphanumeric+ )
|
|
|
|
{ SpannedValue::Integer(i64::from_str_radix(i, b.parse::<u32>().unwrap()).unwrap()) }
|
2017-02-11 00:03:35 +00:00
|
|
|
|
2018-03-06 04:33:51 +00:00
|
|
|
pub integer -> SpannedValue = i:$( sign? digit+ ) !("." / ([eE]))
|
|
|
|
{ SpannedValue::Integer(i.parse::<i64>().unwrap()) }
|
2017-02-11 00:03:35 +00:00
|
|
|
|
2018-03-06 04:33:51 +00:00
|
|
|
pub float -> SpannedValue = f:$(sign? digit+ ("." digit+)? ([eE] sign? digit+)?)
|
|
|
|
{ SpannedValue::Float(OrderedFloat(f.parse::<f64>().unwrap())) }
|
2017-01-06 15:15:57 +00:00
|
|
|
|
2018-03-06 04:33:51 +00:00
|
|
|
number -> SpannedValue = ( bigint / basedinteger / hexinteger / octalinteger / integer / float )
|
2017-01-06 15:15:57 +00:00
|
|
|
|
2018-03-15 14:13:27 +00:00
|
|
|
// TODO: standalone characters: \<char>, \newline, \return, \space and \tab.
|
|
|
|
|
|
|
|
string_special_char -> &'input str = "\\" $([\\"ntr])
|
|
|
|
string_normal_chars -> &'input str = $([^"\\]+)
|
|
|
|
|
|
|
|
// This is what we need to do in order to unescape. We can't just match the entire string slice:
|
|
|
|
// we get a Vec<&str> from rust-peg, where some of the parts might be unescaped special characters,
|
|
|
|
// and we join it together to form an output string.
|
|
|
|
// E.g., input = r#"\"foo\\\\bar\""#
|
|
|
|
// output = [quote, "foo", backslash, "bar", quote]
|
|
|
|
// result = r#""foo\\bar""#
|
|
|
|
// For the typical case, string_normal_chars will match multiple, leading to a single-element vec.
|
|
|
|
pub text -> SpannedValue = "\"" t:((string_special_char / string_normal_chars)*) "\""
|
|
|
|
{ SpannedValue::Text(t.join(&"").to_string()) }
|
2017-04-29 03:11:55 +00:00
|
|
|
|
|
|
|
// RFC 3339 timestamps. #inst "1985-04-12T23:20:50.52Z"
|
|
|
|
// We accept an arbitrary depth of decimals.
|
|
|
|
// Note that we discard the timezone information -- all times are translated to UTC.
|
2018-03-06 04:33:51 +00:00
|
|
|
inst_string -> DateTime<Utc> =
|
2017-04-29 03:11:55 +00:00
|
|
|
"#inst" whitespace+ "\"" d:$( [0-9]*<4> "-" [0-2][0-9] "-" [0-3][0-9]
|
|
|
|
"T"
|
|
|
|
[0-2][0-9] ":" [0-5][0-9] ":" [0-6][0-9]
|
|
|
|
("." [0-9]+)?
|
|
|
|
"Z" / (("+" / "-") [0-2][0-9] ":" [0-5][0-9])
|
|
|
|
)
|
|
|
|
"\"" {?
|
|
|
|
DateTime::parse_from_rfc3339(d)
|
2017-11-21 16:24:08 +00:00
|
|
|
.map(|t| t.with_timezone(&Utc))
|
2017-04-29 03:11:55 +00:00
|
|
|
.map_err(|_| "invalid datetime") // Oh, rustpeg.
|
|
|
|
}
|
|
|
|
|
2018-03-06 04:33:51 +00:00
|
|
|
inst_micros -> DateTime<Utc> =
|
2017-04-29 03:11:55 +00:00
|
|
|
"#instmicros" whitespace+ d:$( digit+ ) {
|
|
|
|
let micros = d.parse::<i64>().unwrap();
|
|
|
|
let seconds: i64 = micros / 1000000;
|
|
|
|
let nanos: u32 = ((micros % 1000000).abs() as u32) * 1000;
|
2017-11-21 16:24:08 +00:00
|
|
|
Utc.timestamp(seconds, nanos)
|
2017-04-29 03:11:55 +00:00
|
|
|
}
|
|
|
|
|
2018-03-06 04:33:51 +00:00
|
|
|
inst_millis -> DateTime<Utc> =
|
2017-04-29 03:11:55 +00:00
|
|
|
"#instmillis" whitespace+ d:$( digit+ ) {
|
|
|
|
let millis = d.parse::<i64>().unwrap();
|
|
|
|
let seconds: i64 = millis / 1000;
|
|
|
|
let nanos: u32 = ((millis % 1000).abs() as u32) * 1000000;
|
2017-11-21 16:24:08 +00:00
|
|
|
Utc.timestamp(seconds, nanos)
|
2017-04-29 03:11:55 +00:00
|
|
|
}
|
|
|
|
|
2018-03-06 04:33:51 +00:00
|
|
|
inst -> SpannedValue = t:(inst_millis / inst_micros / inst_string)
|
|
|
|
{ SpannedValue::Instant(t) }
|
2017-04-29 03:11:55 +00:00
|
|
|
|
2018-03-06 04:33:51 +00:00
|
|
|
uuid_string -> Uuid =
|
2017-04-29 03:11:55 +00:00
|
|
|
"\"" u:$( [a-f0-9]*<8> "-" [a-f0-9]*<4> "-" [a-f0-9]*<4> "-" [a-f0-9]*<4> "-" [a-f0-9]*<12> ) "\"" {
|
|
|
|
Uuid::parse_str(u).expect("this is a valid UUID string")
|
|
|
|
}
|
|
|
|
|
2018-03-06 04:33:51 +00:00
|
|
|
pub uuid -> SpannedValue = "#uuid" whitespace+ u:uuid_string
|
|
|
|
{ SpannedValue::Uuid(u) }
|
2017-04-29 03:11:55 +00:00
|
|
|
|
2017-01-11 21:51:34 +00:00
|
|
|
namespace_divider = "."
|
|
|
|
namespace_separator = "/"
|
|
|
|
|
2017-01-06 15:15:57 +00:00
|
|
|
// TODO: Be more picky here
|
2017-02-02 10:52:34 +00:00
|
|
|
// Keywords follow the rules of symbols, except they can (and must) begin with :
|
|
|
|
// e.g. :fred or :my/fred. See https://github.com/edn-format/edn#keywords
|
2018-03-06 04:33:51 +00:00
|
|
|
symbol_char_initial = [a-zA-Z0-9*!_?$%&=<>]
|
|
|
|
symbol_char_subsequent = [a-zA-Z0-9*!_?$%&=<>-]
|
2017-01-06 15:15:57 +00:00
|
|
|
|
2017-02-02 10:52:34 +00:00
|
|
|
symbol_namespace = symbol_char_initial symbol_char_subsequent* (namespace_divider symbol_char_subsequent+)*
|
2017-03-06 22:55:14 +00:00
|
|
|
symbol_name = ( symbol_char_initial+ symbol_char_subsequent* )
|
|
|
|
plain_symbol_name = symbol_name / "..." / "."
|
2017-01-11 21:51:34 +00:00
|
|
|
|
|
|
|
keyword_prefix = ":"
|
2017-01-06 15:15:57 +00:00
|
|
|
|
2018-03-06 04:33:51 +00:00
|
|
|
pub symbol -> SpannedValue =
|
2017-02-14 15:43:32 +00:00
|
|
|
ns:( sns:$(symbol_namespace) namespace_separator { sns })?
|
2017-03-06 22:55:14 +00:00
|
|
|
n:$(plain_symbol_name)
|
2018-03-06 04:33:51 +00:00
|
|
|
{ SpannedValue::from_symbol(ns, n) }
|
2017-02-14 15:43:32 +00:00
|
|
|
|
2018-03-06 04:33:51 +00:00
|
|
|
pub keyword -> SpannedValue =
|
2017-02-14 15:43:32 +00:00
|
|
|
keyword_prefix
|
|
|
|
ns:( sns:$(symbol_namespace) namespace_separator { sns })?
|
|
|
|
n:$(symbol_name)
|
2018-03-06 04:33:51 +00:00
|
|
|
{ SpannedValue::from_keyword(ns, n) }
|
2017-02-14 15:43:32 +00:00
|
|
|
|
2018-03-06 04:33:51 +00:00
|
|
|
pub list -> SpannedValue = "(" __ v:(value)* __ ")"
|
|
|
|
{ SpannedValue::List(LinkedList::from_iter(v)) }
|
2017-02-14 15:43:32 +00:00
|
|
|
|
2018-03-06 04:33:51 +00:00
|
|
|
pub vector -> SpannedValue = "[" __ v:(value)* __ "]"
|
|
|
|
{ SpannedValue::Vector(v) }
|
2017-02-14 15:43:32 +00:00
|
|
|
|
2018-03-06 04:33:51 +00:00
|
|
|
pub set -> SpannedValue = "#{" __ v:(value)* __ "}"
|
|
|
|
{ SpannedValue::Set(BTreeSet::from_iter(v)) }
|
2017-02-14 15:43:32 +00:00
|
|
|
|
|
|
|
pair -> (ValueAndSpan, ValueAndSpan) =
|
2017-02-02 10:36:53 +00:00
|
|
|
k:(value) v:(value) {
|
|
|
|
(k, v)
|
|
|
|
}
|
2017-01-06 15:15:57 +00:00
|
|
|
|
2018-03-06 04:33:51 +00:00
|
|
|
pub map -> SpannedValue = "{" __ v:(pair)* __ "}"
|
|
|
|
{ SpannedValue::Map(BTreeMap::from_iter(v)) }
|
2017-01-06 15:15:57 +00:00
|
|
|
|
|
|
|
// It's important that float comes before integer or the parser assumes that
|
|
|
|
// floats are integers and fails to parse
|
2017-02-14 15:43:32 +00:00
|
|
|
pub value -> ValueAndSpan =
|
2018-03-06 04:33:51 +00:00
|
|
|
__ start:#position v:(nil / nan / infinity / boolean / number / inst / uuid / text / keyword / symbol / list / vector / map / set) end:#position __ {
|
|
|
|
ValueAndSpan {
|
|
|
|
inner: v,
|
|
|
|
span: Span::new(start, end)
|
|
|
|
}
|
2017-02-02 10:36:53 +00:00
|
|
|
}
|
2017-01-17 19:26:45 +00:00
|
|
|
|
|
|
|
// Clojure (and thus EDN) regards commas as whitespace, and thus the two-element vectors [1 2] and
|
|
|
|
// [1,,,,2] are equivalent, as are the maps {:a 1, :b 2} and {:a 1 :b 2}.
|
2018-03-06 04:33:51 +00:00
|
|
|
whitespace = [ \r\n\t,]
|
|
|
|
comment = ";" [^\r\n]* [\r\n]?
|
2017-01-06 15:15:57 +00:00
|
|
|
|
2017-01-17 19:25:31 +00:00
|
|
|
__ = (whitespace / comment)*
|