/* vim: set filetype=rust.rustpeg */ // Copyright 2016 Mozilla // // Licensed under the Apache License, Version 2.0 (the "License"); you may not use // this file except in compliance with the License. You may obtain a copy of the // License at http://www.apache.org/licenses/LICENSE-2.0 // Unless required by applicable law or agreed to in writing, software distributed // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. use std::collections::{BTreeSet, BTreeMap, LinkedList}; use std::iter::FromIterator; use std::f64::{NAN, INFINITY, NEG_INFINITY}; use num::BigInt; use ordered_float::OrderedFloat; use types::{SpannedValue, Span, ValueAndSpan}; // Goal: Be able to parse https://github.com/edn-format/edn // Also extensible to help parse http://docs.datomic.com/query.html // Debugging hint: test using `cargo test --features peg/trace -- --nocapture` // to trace where the parser is failing // TODO: Support tagged elements // TODO: Support discard pub nil -> ValueAndSpan = start:#position "nil" end:#position { ValueAndSpan { inner: SpannedValue::Nil, span: Span(start, end) } } pub nan -> ValueAndSpan = start:#position "#f" whitespace+ "NaN" end:#position { ValueAndSpan { inner: SpannedValue::Float(OrderedFloat(NAN)), span: Span(start, end) } } pub infinity -> ValueAndSpan = start:#position "#f" whitespace+ s:$(sign) "Infinity" end:#position { ValueAndSpan { inner: SpannedValue::Float(OrderedFloat(if s == "+" { INFINITY } else { NEG_INFINITY })), span: Span(start, end) } } pub boolean -> ValueAndSpan = start:#position "true" end:#position { ValueAndSpan { inner: SpannedValue::Boolean(true), span: Span(start, end) } } / start:#position "false" end:#position { ValueAndSpan { inner: SpannedValue::Boolean(false), span: Span(start, end) } } digit = [0-9] alphanumeric = [0-9a-zA-Z] octaldigit = [0-7] validbase = [3][0-6] / [12][0-9] / [2-9] hex = [0-9a-fA-F] sign = "-" / "+" pub bigint -> ValueAndSpan = start:#position b:$( sign? digit+ ) "N" end:#position { ValueAndSpan { inner: SpannedValue::BigInteger(b.parse::().unwrap()), span: Span(start, end) } } pub octalinteger -> ValueAndSpan = start:#position "0" i:$( octaldigit+ ) end:#position { ValueAndSpan { inner: SpannedValue::Integer(i64::from_str_radix(i, 8).unwrap()), span: Span(start, end) } } pub hexinteger -> ValueAndSpan = start:#position "0x" i:$( hex+ ) end:#position { ValueAndSpan { inner: SpannedValue::Integer(i64::from_str_radix(i, 16).unwrap()), span: Span(start, end) } } pub basedinteger -> ValueAndSpan = // Only allow values 2-36 start:#position b:$( validbase ) "r" i:$( alphanumeric+ ) end:#position { ValueAndSpan { inner: SpannedValue::Integer(i64::from_str_radix(i, b.parse::().unwrap()).unwrap()), span: Span(start, end) } } pub integer -> ValueAndSpan = start:#position i:$( sign? digit+ ) end:#position { ValueAndSpan { inner: SpannedValue::Integer(i.parse::().unwrap()), span: Span(start, end) } } frac = sign? digit+ "." digit+ exp = sign? digit+ ("e" / "E") sign? digit+ frac_exp = sign? digit+ "." digit+ ("e" / "E") sign? digit+ // The order here is important - frac_exp must come before (exp / frac) or the // parser assumes exp or frac when the float is really a frac_exp and fails pub float -> ValueAndSpan = start:#position f:$( frac_exp / exp / frac ) end:#position { ValueAndSpan { inner: SpannedValue::Float(OrderedFloat(f.parse::().unwrap())), span: Span(start, end) } } // TODO: \newline, \return, \space and \tab special_char = quote / tab quote = "\\\"" tab = "\\tab" char = [^"] / special_char pub text -> ValueAndSpan = start:#position "\"" t:$( char* ) "\"" end:#position { ValueAndSpan { inner: SpannedValue::Text(t.to_string()), span: Span(start, end) } } namespace_divider = "." namespace_separator = "/" // TODO: Be more picky here // Keywords follow the rules of symbols, except they can (and must) begin with : // e.g. :fred or :my/fred. See https://github.com/edn-format/edn#keywords symbol_char_initial = [a-z] / [A-Z] / [0-9] / [*!_?$%&=<>] symbol_char_subsequent = [a-z] / [A-Z] / [0-9] / [-*!_?$%&=<>] symbol_namespace = symbol_char_initial symbol_char_subsequent* (namespace_divider symbol_char_subsequent+)* symbol_name = ( symbol_char_initial+ / "." ) ( symbol_char_subsequent* / "." ) keyword_prefix = ":" pub symbol -> ValueAndSpan = start:#position ns:( sns:$(symbol_namespace) namespace_separator { sns })? n:$(symbol_name) end:#position { ValueAndSpan { inner: SpannedValue::from_symbol(ns, n), span: Span(start, end) } } pub keyword -> ValueAndSpan = start:#position keyword_prefix ns:( sns:$(symbol_namespace) namespace_separator { sns })? n:$(symbol_name) end:#position { ValueAndSpan { inner: SpannedValue::from_keyword(ns, n), span: Span(start, end) } } pub list -> ValueAndSpan = start:#position "(" __ v:(value)* __ ")" end:#position { ValueAndSpan { inner: SpannedValue::List(LinkedList::from_iter(v)), span: Span(start, end) } } pub vector -> ValueAndSpan = start:#position "[" __ v:(value)* __ "]" end:#position { ValueAndSpan { inner: SpannedValue::Vector(v), span: Span(start, end) } } pub set -> ValueAndSpan = start:#position "#{" __ v:(value)* __ "}" end:#position { ValueAndSpan { inner: SpannedValue::Set(BTreeSet::from_iter(v)), span: Span(start, end) } } pair -> (ValueAndSpan, ValueAndSpan) = k:(value) v:(value) { (k, v) } pub map -> ValueAndSpan = start:#position "{" __ v:(pair)* __ "}" end:#position { ValueAndSpan { inner: SpannedValue::Map(BTreeMap::from_iter(v)), span: Span(start, end) } } // It's important that float comes before integer or the parser assumes that // floats are integers and fails to parse pub value -> ValueAndSpan = __ v:(nil / nan / infinity / boolean / float / octalinteger / hexinteger / basedinteger / bigint / integer / text / keyword / symbol / list / vector / map / set) __ { v } // Clojure (and thus EDN) regards commas as whitespace, and thus the two-element vectors [1 2] and // [1,,,,2] are equivalent, as are the maps {:a 1, :b 2} and {:a 1 :b 2}. whitespace = (" " / "\r" / "\n" / "\t" / ",") comment = ";" [^\r\n]* ("\r" / "\n")? __ = (whitespace / comment)*