// Copyright 2016 Mozilla // // Licensed under the Apache License, Version 2.0 (the "License"); you may not use // this file except in compliance with the License. You may obtain a copy of the // License at http://www.apache.org/licenses/LICENSE-2.0 // Unless required by applicable law or agreed to in writing, software distributed // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. use std; use std::fmt::{ Debug, Display, Formatter, }; use std::cmp::Ordering; use combine::{ ConsumedResult, ParseError, Parser, ParseResult, StreamOnce, many, many1, parser, satisfy, satisfy_map, }; use combine::primitives; // To not shadow Error. use combine::primitives::{ Consumed, FastResult, }; use combine::combinator::{ Expected, FnParser, }; use edn; /// A wrapper to let us order `edn::Span` in whatever way is appropriate for parsing with `combine`. #[derive(Clone, Copy, Debug)] pub struct SpanPosition(edn::Span); impl Display for SpanPosition { fn fmt(&self, f: &mut Formatter) -> ::std::fmt::Result { self.0.fmt(f) } } impl PartialEq for SpanPosition { fn eq(&self, other: &Self) -> bool { self.cmp(other) == Ordering::Equal } } impl Eq for SpanPosition { } impl PartialOrd for SpanPosition { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } impl Ord for SpanPosition { fn cmp(&self, other: &Self) -> Ordering { (self.0).0.cmp(&(other.0).0) } } /// An iterator specifically for iterating `edn::ValueAndSpan` instances in various ways. /// /// Enumerating each iteration type allows us to have a single `combine::Stream` implementation /// yielding `ValueAndSpan` items, which allows us to yield uniform `combine::ParseError` types from /// disparate parsers. #[derive(Clone)] pub enum IntoIter { Empty(std::iter::Empty), Atom(std::iter::Once), Vector(std::vec::IntoIter), List(std::collections::linked_list::IntoIter), /// Iterates via a single `flat_map` [k1, v1, k2, v2, ...]. Map(std::vec::IntoIter), // TODO: Support Set and Map more naturally. This is significantly more work because the // existing BTreeSet and BTreeMap iterators do not implement Clone, and implementing Clone for // them is involved. Since we don't really need to parse sets and maps at this time, this will // do for now. } impl Iterator for IntoIter { type Item = edn::ValueAndSpan; fn next(&mut self) -> Option { match *self { IntoIter::Empty(ref mut i) => i.next(), IntoIter::Atom(ref mut i) => i.next(), IntoIter::Vector(ref mut i) => i.next(), IntoIter::List(ref mut i) => i.next(), IntoIter::Map(ref mut i) => i.next(), } } } /// A single `combine::Stream` implementation iterating `edn::ValueAndSpan` instances. Equivalent /// to `combine::IteratorStream` as produced by `combine::from_iter`, but specialized to /// `edn::ValueAndSpan`. #[derive(Clone)] pub struct Stream(IntoIter, SpanPosition); /// Things specific to parsing with `combine` and our `Stream` that need a trait to live outside of /// the `edn` crate. pub trait Item: Clone + PartialEq + Sized { /// Position could be specialized to `SpanPosition`. type Position: Clone + Ord + std::fmt::Display; /// A slight generalization of `combine::Positioner` that allows to set the position based on /// the `edn::ValueAndSpan` being iterated. fn start(&self) -> Self::Position; fn update_position(&self, &mut Self::Position); fn into_child_stream_iter(self) -> IntoIter; fn into_child_stream(self) -> Stream; fn into_atom_stream_iter(self) -> IntoIter; fn into_atom_stream(self) -> Stream; } impl Item for edn::ValueAndSpan { type Position = SpanPosition; fn start(&self) -> Self::Position { SpanPosition(self.span.clone()) } fn update_position(&self, position: &mut Self::Position) { *position = SpanPosition(self.span.clone()) } fn into_child_stream_iter(self) -> IntoIter { match self.inner { edn::SpannedValue::Vector(values) => IntoIter::Vector(values.into_iter()), edn::SpannedValue::List(values) => IntoIter::List(values.into_iter()), // Parsing pairs with `combine` is tricky; parsing sequences is easy. edn::SpannedValue::Map(map) => IntoIter::Map(map.into_iter().flat_map(|(a, v)| std::iter::once(a).chain(std::iter::once(v))).collect::>().into_iter()), _ => IntoIter::Empty(std::iter::empty()), } } fn into_child_stream(self) -> Stream { let span = self.span.clone(); Stream(self.into_child_stream_iter(), SpanPosition(span)) } fn into_atom_stream_iter(self) -> IntoIter { IntoIter::Atom(std::iter::once(self)) } fn into_atom_stream(self) -> Stream { let span = self.span.clone(); Stream(self.into_atom_stream_iter(), SpanPosition(span)) } } /// `OfExactly` and `of_exactly` allow us to express nested parsers naturally. /// /// For example, `vector().of_exactly(many(list()))` parses a vector-of-lists, like [(1 2) (:a :b) ("test") ()]. /// /// The "outer" parser `P` and the "nested" parser `N` must be compatible: `P` must produce an /// output `edn::ValueAndSpan` which can itself be turned into a stream of child elements; and `N` /// must accept the resulting input `Stream`. This compatibility allows us to lift errors from the /// nested parser to the outer parser, which is part of what has made parsing `&'a [edn::Value]` /// difficult. #[derive(Clone)] pub struct OfExactly(P, N); impl Parser for OfExactly where P: Parser, N: Parser, { type Input = P::Input; type Output = O; #[inline] fn parse_lazy(&mut self, input: Self::Input) -> ConsumedResult { use self::FastResult::*; match self.0.parse_lazy(input) { ConsumedOk((outer_value, outer_input)) => { match self.1.parse_lazy(outer_value.into_child_stream()) { ConsumedOk((inner_value, mut inner_input)) | EmptyOk((inner_value, mut inner_input)) => { match inner_input.uncons() { Err(ref err) if *err == primitives::Error::end_of_input() => ConsumedOk((inner_value, outer_input)), _ => EmptyErr(ParseError::empty(inner_input.position())), } }, // TODO: Improve the error output to reference the nested value (or span) in // some way. This seems surprisingly difficult to do, so we just surface the // inner error message right now. See also the comment below. EmptyErr(e) | ConsumedErr(e) => ConsumedErr(e), } }, EmptyOk((outer_value, outer_input)) => { match self.1.parse_lazy(outer_value.into_child_stream()) { ConsumedOk((inner_value, mut inner_input)) | EmptyOk((inner_value, mut inner_input)) => { match inner_input.uncons() { Err(ref err) if *err == primitives::Error::end_of_input() => EmptyOk((inner_value, outer_input)), _ => EmptyErr(ParseError::empty(inner_input.position())), } }, // TODO: Improve the error output. See the comment above. EmptyErr(e) | ConsumedErr(e) => EmptyErr(e), } }, ConsumedErr(e) => ConsumedErr(e), EmptyErr(e) => EmptyErr(e), } } fn add_error(&mut self, errors: &mut ParseError) { self.0.add_error(errors); } } #[inline(always)] pub fn of_exactly(p: P, n: N) -> OfExactly where P: Parser, N: Parser, { OfExactly(p, n) } /// We need a trait to define `Parser.of` and have it live outside of the `combine` crate. pub trait OfExactlyParsing: Parser + Sized { fn of_exactly(self, n: N) -> OfExactly where Self: Sized, N: Parser; } impl

OfExactlyParsing for P where P: Parser { fn of_exactly(self, n: N) -> OfExactly where N: Parser { of_exactly(self, n) } } /// Equivalent to `combine::IteratorStream`. impl StreamOnce for Stream { type Item = edn::ValueAndSpan; type Range = edn::ValueAndSpan; type Position = SpanPosition; #[inline] fn uncons(&mut self) -> std::result::Result> { match self.0.next() { Some(x) => { x.update_position(&mut self.1); Ok(x) }, None => Err(primitives::Error::end_of_input()), } } #[inline(always)] fn position(&self) -> Self::Position { self.1.clone() } } /// Shorthands, just enough to convert the `mentat_db` crate for now. Written using `Box` for now: /// it's simple and we can address allocation issues if and when they surface. pub fn vector() -> Box> { satisfy(|v: edn::ValueAndSpan| v.inner.is_vector()).boxed() } pub fn list() -> Box> { satisfy(|v: edn::ValueAndSpan| v.inner.is_list()).boxed() } pub fn map() -> Box> { satisfy(|v: edn::ValueAndSpan| v.inner.is_map()).boxed() } pub fn seq() -> Box> { satisfy(|v: edn::ValueAndSpan| v.inner.is_list() || v.inner.is_vector()).boxed() } pub fn integer() -> Box> { satisfy_map(|v: edn::ValueAndSpan| v.inner.as_integer()).boxed() } pub fn namespaced_keyword() -> Box> { satisfy_map(|v: edn::ValueAndSpan| v.inner.as_namespaced_keyword().cloned()).boxed() } /// Like `combine::token()`, but compare an `edn::Value` to an `edn::ValueAndSpan`. pub fn value(value: edn::Value) -> Box> { // TODO: make this comparison faster. Right now, we drop all the spans; if we walked the value // trees together, we could avoid creating garbage. satisfy(move |v: edn::ValueAndSpan| value == v.inner.into()).boxed() } fn keyword_map_(input: Stream) -> ParseResult { // One run is a keyword followed by one or more non-keywords. let run = (satisfy(|v: edn::ValueAndSpan| v.inner.is_keyword()), many1(satisfy(|v: edn::ValueAndSpan| !v.inner.is_keyword())) .map(|vs: Vec| { // TODO: extract "spanning". let beg = vs.first().unwrap().span.0; let end = vs.last().unwrap().span.1; edn::ValueAndSpan { inner: edn::SpannedValue::Vector(vs), span: edn::Span(beg, end), } })); let mut runs = vector().of_exactly(many::, _>(run)); let (data, input) = try!(runs.parse_lazy(input).into()); let mut m: std::collections::BTreeMap = std::collections::BTreeMap::default(); for (k, vs) in data { if m.insert(k, vs).is_some() { // TODO: improve this message. return Err(Consumed::Empty(ParseError::from_errors(input.into_inner().position(), Vec::new()))) } } let map = edn::ValueAndSpan { inner: edn::SpannedValue::Map(m), span: edn::Span(0, 0), // TODO: fix this. }; Ok((map, input)) } /// Turn a vector of keywords and non-keyword values into a map. As an example, turn /// ```edn /// [:keyword1 value1 value2 ... :keyword2 value3 value4 ...] /// ``` /// into /// ```edn /// {:keyword1 [value1 value2 ...] :keyword2 [value3 value4 ...]} /// ```. pub fn keyword_map() -> Expected ParseResult>> { // The `as` work arounds https://github.com/rust-lang/rust/issues/20178. parser(keyword_map_ as fn(Stream) -> ParseResult).expected("keyword map") } /// Generate a `satisfy` expression that matches a `PlainSymbol` value with the given name. /// /// We do this rather than using `combine::token` so that we don't need to allocate a new `String` /// inside a `PlainSymbol` inside a `SpannedValue` inside a `ValueAndSpan` just to match input. #[macro_export] macro_rules! def_matches_plain_symbol { ( $parser: ident, $name: ident, $input: expr ) => { def_parser!($parser, $name, edn::ValueAndSpan, { satisfy(|v: edn::ValueAndSpan| { match v.inner { edn::SpannedValue::PlainSymbol(ref s) => s.0.as_str() == $input, _ => false, } }) }); } } /// Generate a `satisfy` expression that matches a `Keyword` value with the given name. /// /// We do this rather than using `combine::token` to save allocations. #[macro_export] macro_rules! def_matches_keyword { ( $parser: ident, $name: ident, $input: expr ) => { def_parser!($parser, $name, edn::ValueAndSpan, { satisfy(|v: edn::ValueAndSpan| { match v.inner { edn::SpannedValue::Keyword(ref s) => s.0.as_str() == $input, _ => false, } }) }); } } /// Generate a `satisfy` expression that matches a `NamespacedKeyword` value with the given /// namespace and name. /// /// We do this rather than using `combine::token` to save allocations. #[macro_export] macro_rules! def_matches_namespaced_keyword { ( $parser: ident, $name: ident, $input_namespace: expr, $input_name: expr ) => { def_parser!($parser, $name, edn::ValueAndSpan, { satisfy(|v: edn::ValueAndSpan| { match v.inner { edn::SpannedValue::NamespacedKeyword(ref s) => s.namespace.as_str() == $input_namespace && s.name.as_str() == $input_name, _ => false, } }) }); } } #[cfg(test)] mod tests { use combine::{eof}; use super::*; /// Take a string `input` and a string `expected` and ensure that `input` parses to an /// `edn::Value` keyword map equivalent to the `edn::Value` that `expected` parses to. macro_rules! assert_keyword_map_eq { ( $input: expr, $expected: expr ) => {{ let input = edn::parse::value($input).expect("to be able to parse input EDN"); let expected = $expected.map(|e| { edn::parse::value(e).expect("to be able to parse expected EDN").without_spans() }); let mut par = keyword_map().map(|x| x.without_spans()).skip(eof()); let result = par.parse(input.into_atom_stream()).map(|x| x.0); assert_eq!(result.ok(), expected); }} } #[test] fn test_keyword_map() { assert_keyword_map_eq!( "[:foo 1 2 3 :bar 4]", Some("{:foo [1 2 3] :bar [4]}")); // Trailing keywords aren't allowed. assert_keyword_map_eq!( "[:foo]", None); assert_keyword_map_eq!( "[:foo 2 :bar]", None); // Duplicate keywords aren't allowed. assert_keyword_map_eq!( "[:foo 2 :foo 1]", None); // Starting with anything but a keyword isn't allowed. assert_keyword_map_eq!( "[2 :foo 1]", None); // Consecutive keywords aren't allowed. assert_keyword_map_eq!( "[:foo :bar 1]", None); // Empty lists return an empty map. assert_keyword_map_eq!( "[]", Some("{}")); } }